diff --git a/CMakeLists.txt b/CMakeLists.txt
index 67c93c2..8433688 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,11 +85,11 @@
 # For versions without ABI changes, bump the smallest number in CERES_VERSION,
 # but leave the CERES_ABI_VERSION unchanged.
 SET(CERES_VERSION_MAJOR 1)
-SET(CERES_VERSION_MINOR 9)
+SET(CERES_VERSION_MINOR 10)
 SET(CERES_VERSION_PATCH 0)
 SET(CERES_VERSION
     ${CERES_VERSION_MAJOR}.${CERES_VERSION_MINOR}.${CERES_VERSION_PATCH})
-SET(CERES_ABI_VERSION 1.9.0)
+SET(CERES_ABI_VERSION 1.10.0)
 
 ENABLE_TESTING()
 
diff --git a/docs/source/api.rst b/docs/source/api.rst
new file mode 100644
index 0000000..051f6e7
--- /dev/null
+++ b/docs/source/api.rst
@@ -0,0 +1,12 @@
+.. _chapter-api:
+
+=============
+API Reference
+=============
+
+.. toctree::
+   :maxdepth: 3
+
+   nnls_modeling
+   nnls_solving
+   gradient_solver
diff --git a/docs/source/building.rst b/docs/source/building.rst
index a34c1fc..bbb9676 100644
--- a/docs/source/building.rst
+++ b/docs/source/building.rst
@@ -9,7 +9,7 @@
 .. _section-source:
 
 You can start with the `latest stable release
-<http://ceres-solver.org/ceres-solver-1.9.0.tar.gz>`_ . Or if you want
+<http://ceres-solver.org/ceres-solver-1.10.0.tar.gz>`_ . Or if you want
 the latest version, you can clone the git repository
 
 .. code-block:: bash
@@ -31,7 +31,7 @@
   .. NOTE ::
 
     Ceres can also use Eigen as a sparse linear algebra
-    library. Please see the documentation for ``-DEIGENSPARSE`` for
+    library. Please see the documentation for ``-DEIGENSPARSE`` for`
     more details.
 
 - `CMake <http://www.cmake.org>`_ 2.8.0 or later.
@@ -141,10 +141,10 @@
 
 .. code-block:: bash
 
- tar zxf ceres-solver-1.9.0.tar.gz
+ tar zxf ceres-solver-1.10.0.tar.gz
  mkdir ceres-bin
  cd ceres-bin
- cmake ../ceres-solver-1.9.0
+ cmake ../ceres-solver-1.10.0
  make -j3
  make test
  make install
@@ -155,7 +155,7 @@
 
 .. code-block:: bash
 
- bin/simple_bundle_adjuster ../ceres-solver-1.9.0/data/problem-16-22106-pre.txt
+ bin/simple_bundle_adjuster ../ceres-solver-1.10.0/data/problem-16-22106-pre.txt
 
 This runs Ceres for a maximum of 10 iterations using the
 ``DENSE_SCHUR`` linear solver. The output should look something like
@@ -262,10 +262,10 @@
 
 .. code-block:: bash
 
-   tar zxf ceres-solver-1.9.0.tar.gz
+   tar zxf ceres-solver-1.10.0.tar.gz
    mkdir ceres-bin
    cd ceres-bin
-   cmake ../ceres-solver-1.9.0
+   cmake ../ceres-solver-1.10.0
    make -j3
    make test
    make install
@@ -483,6 +483,10 @@
    header only library, including this code will result in an ``LGPL``
    licensed version of Ceres.
 
+   .. NOTE::
+
+      For good performance, use Eigen version 3.2.2 or later.
+
 #. ``GFLAGS [Default: ON]``: Turn this ``OFF`` to build Ceres without
    ``gflags``. This will also prevent some of the example code from
    building.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 478682f..439d0c0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -48,9 +48,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '1.9'
+version = '1.10'
 # The full version, including alpha/beta/rc tags.
-release = '1.9.0'
+release = '1.10.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/faqs.rst b/docs/source/faqs.rst
index 73ad41d..e9a3ad9 100644
--- a/docs/source/faqs.rst
+++ b/docs/source/faqs.rst
@@ -64,7 +64,7 @@
 
    To this end, Ceres has extensive support for mixing analytic,
    automatic and numeric differentiation. See
-   :class:`NumericDiffFunctor` and :class:`CostFunctionToFunctor`.
+   :class:`CostFunctionToFunctor`.
 
 #. Putting `Inverse Function Theorem
    <http://en.wikipedia.org/wiki/Inverse_function_theorem>`_ to use.
@@ -142,8 +142,12 @@
 
    4. For larger bundle adjustment problems with sparse Schur
       Complement/Reduced camera matrices use ``SPARSE_SCHUR``. This
-      requires that you have ``SuiteSparse`` or ``CXSparse``
-      installed.
+      requires that you build Ceres with support for ``SuiteSparse``,
+      ``CXSparse`` or Eigen's sparse linear algebra libraries.
+
+      If you do not have access to these libraries for whatever
+      reason, ``ITERATIVE_SCHUR`` with ``SCHUR_JACOBI`` is an
+      excellent alternative.
 
    5. For large bundle adjustment problems (a few thousand cameras or
       more) use the ``ITERATIVE_SCHUR`` solver. There are a number of
@@ -153,13 +157,19 @@
       which ``DENSE_SCHUR`` is too slow but ``SuiteSparse`` is not
       available.
 
+      .. NOTE::
+
+        If you are solving small to medium sized problems, consider
+	setting ``Solver::Options::use_explicit_schur_complement`` to
+	``true``, it can result in a substantial performance boost.
+
       If you are not satisfied with ``SCHUR_JACOBI``'s performance try
       ``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL`` in that
       order. They require that you have ``SuiteSparse``
       installed. Both of these preconditioners use a clustering
       algorithm. Use ``SINGLE_LINKAGE`` before ``CANONICAL_VIEWS``.
 
-#. Use `Solver::Summary::FullReport` to diagnose performance problems.
+#. Use :function:`Solver::Summary::FullReport` to diagnose performance problems.
 
    When diagnosing Ceres performance issues - runtime and convergence,
    the first place to start is by looking at the output of
diff --git a/docs/source/features.rst b/docs/source/features.rst
index 50f22e7..681876e 100644
--- a/docs/source/features.rst
+++ b/docs/source/features.rst
@@ -4,11 +4,8 @@
 .. _chapter-features:
 
 * **Code Quality** - Ceres Solver has been used in production at
-  Google for more than three years now. It is used to solve a wide
-  variety of problems, both in size and complexity. The code runs on
-  Google's data centers, desktops and on cellphones. It is clean,
-  extensively tested and well documented code that is actively
-  developed and supported.
+  Google for more than four years now. It is clean, extensively tested
+  and well documented code that is actively developed and supported.
 
 * **Modeling API** - It is rarely the case that one starts with the
   exact and complete formulation of the problem that one is trying to
@@ -16,9 +13,7 @@
   easily build and modify the objective function, one term at a
   time. And to do so without worrying about how the solver is going to
   deal with the resulting changes in the sparsity/structure of the
-  underlying problem. Indeed we take great care to separate the
-  modeling of the optimization problem from solving it. The two can be
-  done more or less completely independently of each other.
+  underlying problem.
 
   - **Derivatives** Supplying derivatives is perhaps the most tedious
     and error prone part of using an optimization library.  Ceres
@@ -29,19 +24,18 @@
 
   - **Robust Loss Functions** Most non-linear least squares problems
     involve data. If there is data, there will be outliers. Ceres
-    allows the user to *shape* their residuals using robust loss
-    functions to reduce the influence of outliers.
+    allows the user to *shape* their residuals using a
+    :class:`LossFunction` to reduce the influence of outliers.
 
   - **Local Parameterization** In many cases, some parameters lie on a
     manifold other than Euclidean space, e.g., rotation matrices. In
     such cases, the user can specify the geometry of the local tangent
-    space by specifying a LocalParameterization object.
+    space by specifying a :class:`LocalParameterization` object.
 
 * **Solver Choice** Depending on the size, sparsity structure, time &
   memory budgets, and solution quality requiremnts, different
   optimization algorithms will suit different needs. To this end,
-  Ceres Solver comes with a variety of optimization algorithms, some
-  of them the result of the author's own research.
+  Ceres Solver comes with a variety of optimization algorithms:
 
   - **Trust Region Solvers** - Ceres supports Levenberg-Marquardt,
     Powell's Dogleg, and Subspace dogleg methods. The key
@@ -49,8 +43,8 @@
     linear system. To this end Ceres ships with a variety of linear
     solvers - dense QR and dense Cholesky factorization (using
     `Eigen`_ or `LAPACK`_) for dense problems, sparse Cholesky
-    factorization (`SuiteSparse`_ or `CXSparse`_) for large sparse
-    problems custom Schur complement based dense, sparse, and
+    factorization (`SuiteSparse`_, `CXSparse`_ or `Eigen`_) for large
+    sparse problems custom Schur complement based dense, sparse, and
     iterative linear solvers for `bundle adjustment`_ problems.
 
   - **Line Search Solvers** - When the problem size is so large that
@@ -59,11 +53,11 @@
     line search based algorithms. This includes a number of variants
     of Non-linear Conjugate Gradients, BFGS and LBFGS.
 
-* **Speed** - Ceres code has been extensively optimized, with C++
+* **Speed** - Ceres Solver has been extensively optimized, with C++
   templating, hand written linear algebra routines and OpenMP based
   multithreading of the Jacobian evaluation and the linear solvers.
 
-* **Solution Quality** Ceres is the best performing solver on the NIST
+* **Solution Quality** Ceres is the `best performing`_ solver on the NIST
   problem set used by Mondragon and Borchers for benchmarking
   non-linear least squares solvers.
 
@@ -82,7 +76,7 @@
 * **BSD Licensed** The BSD license offers the flexibility to ship your
   application
 
-.. _solution quality: https://groups.google.com/forum/#!topic/ceres-solver/UcicgMPgbXw
+.. _best performing: https://groups.google.com/forum/#!topic/ceres-solver/UcicgMPgbXw
 .. _bundle adjustment: http://en.wikipedia.org/wiki/Bundle_adjustment
 .. _SuiteSparse: http://www.cise.ufl.edu/research/sparse/SuiteSparse/
 .. _Eigen: http://eigen.tuxfamily.org/
diff --git a/docs/source/gradient_solver.rst b/docs/source/gradient_solver.rst
new file mode 100644
index 0000000..cf06530
--- /dev/null
+++ b/docs/source/gradient_solver.rst
@@ -0,0 +1,484 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-gradient_problem_solver:
+
+==================================
+General Unconstrained Minimization
+==================================
+
+Modeling
+========
+
+:class:`FirstOrderFunction`
+---------------------------
+
+.. class:: FirstOrderFunction
+
+  Instances of :class:`FirstOrderFunction` implement the evaluation of
+  a function and its gradient.
+
+  .. code-block:: c++
+
+   class FirstOrderFunction {
+     public:
+      virtual ~FirstOrderFunction() {}
+      virtual bool Evaluate(const double* const parameters,
+                            double* cost,
+                            double* gradient) const = 0;
+      virtual int NumParameters() const = 0;
+   };
+
+.. function:: bool FirstOrderFunction::Evaluate(const double* const parameters, double* cost, double* gradient) const
+
+   Evaluate the cost/value of the function. If ``gradient`` is not
+   ``NULL`` then evaluate the gradient too. If evaluation is
+   successful return, ``true`` else return ``false``.
+
+   ``cost`` guaranteed to be never ``NULL``, ``gradient`` can be ``NULL``.
+
+.. function:: int FirstOrderFunction::NumParameters() const
+
+   Number of parameters in the domain of the function.
+
+
+:class:`GradientProblem`
+------------------------
+
+.. class:: GradientProblem
+
+.. code-block:: c++
+
+  class GradientProblem {
+   public:
+    explicit GradientProblem(FirstOrderFunction* function);
+    GradientProblem(FirstOrderFunction* function,
+                    LocalParameterization* parameterization);
+    int NumParameters() const;
+    int NumLocalParameters() const;
+    bool Evaluate(const double* parameters, double* cost, double* gradient) const;
+    bool Plus(const double* x, const double* delta, double* x_plus_delta) const;
+  };
+
+Instances of :class:`GradientProblem` represent general non-linear
+optimization problems that must be solved using just the value of the
+objective function and its gradient. Unlike the :class:`Problem`
+class, which can only be used to model non-linear least squares
+problems, instances of :class:`GradientProblem` not restricted in the
+form of the objective function.
+
+Structurally :class:`GradientProblem` is a composition of a
+:class:`FirstOrderFunction` and optionally a
+:class:`LocalParameterization`.
+
+The :class:`FirstOrderFunction` is responsible for evaluating the cost
+and gradient of the objective function.
+
+The :class:`LocalParameterization` is responsible for going back and
+forth between the ambient space and the local tangent space. When a
+:class:`LocalParameterization` is not provided, then the tangent space
+is assumed to coincide with the ambient Euclidean space that the
+gradient vector lives in.
+
+The constructor takes ownership of the :class:`FirstOrderFunction` and
+:class:`LocalParamterization` objects passed to it.
+
+
+.. function:: void Solve(const GradientProblemSolver::Options& options, const GradientProblem& problem, double* parameters, GradientProblemSolver::Summary* summary)
+
+   Solve the given :class:`GradientProblem` using the values in
+   ``parameters`` as the initial guess of the solution.
+
+
+Solving
+=======
+
+:class:`GradientProblemSolver::Options`
+---------------------------------------
+
+.. class:: GradientProblemSolver::Options
+
+   :class:`GradientProblemSolver::Options` controls the overall
+   behavior of the solver. We list the various settings and their
+   default values below.
+
+.. function:: bool GradientProblemSolver::Options::IsValid(string* error) const
+
+   Validate the values in the options struct and returns true on
+   success. If there is a problem, the method returns false with
+   ``error`` containing a textual description of the cause.
+
+.. member:: LineSearchDirectionType GradientProblemSolver::Options::line_search_direction_type
+
+   Default: ``LBFGS``
+
+   Choices are ``STEEPEST_DESCENT``, ``NONLINEAR_CONJUGATE_GRADIENT``,
+   ``BFGS`` and ``LBFGS``.
+
+.. member:: LineSearchType GradientProblemSolver::Options::line_search_type
+
+   Default: ``WOLFE``
+
+   Choices are ``ARMIJO`` and ``WOLFE`` (strong Wolfe conditions).
+   Note that in order for the assumptions underlying the ``BFGS`` and
+   ``LBFGS`` line search direction algorithms to be guaranteed to be
+   satisifed, the ``WOLFE`` line search should be used.
+
+.. member:: NonlinearConjugateGradientType GradientProblemSolver::Options::nonlinear_conjugate_gradient_type
+
+   Default: ``FLETCHER_REEVES``
+
+   Choices are ``FLETCHER_REEVES``, ``POLAK_RIBIERE`` and
+   ``HESTENES_STIEFEL``.
+
+.. member:: int GradientProblemSolver::Options::max_lbfs_rank
+
+   Default: 20
+
+   The L-BFGS hessian approximation is a low rank approximation to the
+   inverse of the Hessian matrix. The rank of the approximation
+   determines (linearly) the space and time complexity of using the
+   approximation. Higher the rank, the better is the quality of the
+   approximation. The increase in quality is however is bounded for a
+   number of reasons.
+
+     1. The method only uses secant information and not actual
+        derivatives.
+
+     2. The Hessian approximation is constrained to be positive
+        definite.
+
+   So increasing this rank to a large number will cost time and space
+   complexity without the corresponding increase in solution
+   quality. There are no hard and fast rules for choosing the maximum
+   rank. The best choice usually requires some problem specific
+   experimentation.
+
+.. member:: bool GradientProblemSolver::Options::use_approximate_eigenvalue_bfgs_scaling
+
+   Default: ``false``
+
+   As part of the ``BFGS`` update step / ``LBFGS`` right-multiply
+   step, the initial inverse Hessian approximation is taken to be the
+   Identity.  However, [Oren]_ showed that using instead :math:`I *
+   \gamma`, where :math:`\gamma` is a scalar chosen to approximate an
+   eigenvalue of the true inverse Hessian can result in improved
+   convergence in a wide variety of cases.  Setting
+   ``use_approximate_eigenvalue_bfgs_scaling`` to true enables this
+   scaling in ``BFGS`` (before first iteration) and ``LBFGS`` (at each
+   iteration).
+
+   Precisely, approximate eigenvalue scaling equates to
+
+   .. math:: \gamma = \frac{y_k' s_k}{y_k' y_k}
+
+   With:
+
+  .. math:: y_k = \nabla f_{k+1} - \nabla f_k
+  .. math:: s_k = x_{k+1} - x_k
+
+  Where :math:`f()` is the line search objective and :math:`x` the
+  vector of parameter values [NocedalWright]_.
+
+  It is important to note that approximate eigenvalue scaling does
+  **not** *always* improve convergence, and that it can in fact
+  *significantly* degrade performance for certain classes of problem,
+  which is why it is disabled by default.  In particular it can
+  degrade performance when the sensitivity of the problem to different
+  parameters varies significantly, as in this case a single scalar
+  factor fails to capture this variation and detrimentally downscales
+  parts of the Jacobian approximation which correspond to
+  low-sensitivity parameters. It can also reduce the robustness of the
+  solution to errors in the Jacobians.
+
+.. member:: LineSearchIterpolationType GradientProblemSolver::Options::line_search_interpolation_type
+
+   Default: ``CUBIC``
+
+   Degree of the polynomial used to approximate the objective
+   function. Valid values are ``BISECTION``, ``QUADRATIC`` and
+   ``CUBIC``.
+
+.. member:: double GradientProblemSolver::Options::min_line_search_step_size
+
+   The line search terminates if:
+
+   .. math:: \|\Delta x_k\|_\infty < \text{min_line_search_step_size}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, and
+   :math:`\Delta x_k` is the step change in the parameter values at
+   the :math:`k`-th iteration.
+
+.. member:: double GradientProblemSolver::Options::line_search_sufficient_function_decrease
+
+   Default: ``1e-4``
+
+   Solving the line search problem exactly is computationally
+   prohibitive. Fortunately, line search based optimization algorithms
+   can still guarantee convergence if instead of an exact solution,
+   the line search algorithm returns a solution which decreases the
+   value of the objective function sufficiently. More precisely, we
+   are looking for a step size s.t.
+
+   .. math:: f(\text{step_size}) \le f(0) + \text{sufficient_decrease} * [f'(0) * \text{step_size}]
+
+   This condition is known as the Armijo condition.
+
+.. member:: double GradientProblemSolver::Options::max_line_search_step_contraction
+
+   Default: ``1e-3``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} >= \text{max_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: double GradientProblemSolver::Options::min_line_search_step_contraction
+
+   Default: ``0.6``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} <= \text{min_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: int GradientProblemSolver::Options::max_num_line_search_step_size_iterations
+
+   Default: ``20``
+
+   Maximum number of trial step size iterations during each line
+   search, if a step size satisfying the search conditions cannot be
+   found within this number of trials, the line search will stop.
+
+   As this is an 'artificial' constraint (one imposed by the user, not
+   the underlying math), if ``WOLFE`` line search is being used, *and*
+   points satisfying the Armijo sufficient (function) decrease
+   condition have been found during the current search (in :math:`<=`
+   ``max_num_line_search_step_size_iterations``).  Then, the step size
+   with the lowest function value which satisfies the Armijo condition
+   will be returned as the new valid step, even though it does *not*
+   satisfy the strong Wolfe conditions.  This behaviour protects
+   against early termination of the optimizer at a sub-optimal point.
+
+.. member:: int GradientProblemSolver::Options::max_num_line_search_direction_restarts
+
+   Default: ``5``
+
+   Maximum number of restarts of the line search direction algorithm
+   before terminating the optimization. Restarts of the line search
+   direction algorithm occur when the current algorithm fails to
+   produce a new descent direction. This typically indicates a
+   numerical failure, or a breakdown in the validity of the
+   approximations used.
+
+.. member:: double GradientProblemSolver::Options::line_search_sufficient_curvature_decrease
+
+   Default: ``0.9``
+
+   The strong Wolfe conditions consist of the Armijo sufficient
+   decrease condition, and an additional requirement that the
+   step size be chosen s.t. the *magnitude* ('strong' Wolfe
+   conditions) of the gradient along the search direction
+   decreases sufficiently. Precisely, this second condition
+   is that we seek a step size s.t.
+
+   .. math:: \|f'(\text{step_size})\| <= \text{sufficient_curvature_decrease} * \|f'(0)\|
+
+   Where :math:`f()` is the line search objective and :math:`f'()` is the derivative
+   of :math:`f` with respect to the step size: :math:`\frac{d f}{d~\text{step size}}`.
+
+.. member:: double GradientProblemSolver::Options::max_line_search_step_expansion
+
+   Default: ``10.0``
+
+   During the bracketing phase of a Wolfe line search, the step size
+   is increased until either a point satisfying the Wolfe conditions
+   is found, or an upper bound for a bracket containing a point
+   satisfying the conditions is found.  Precisely, at each iteration
+   of the expansion:
+
+   .. math:: \text{new_step_size} <= \text{max_step_expansion} * \text{step_size}
+
+   By definition for expansion
+
+   .. math:: \text{max_step_expansion} > 1.0
+
+.. member:: int GradientProblemSolver::Options::max_num_iterations
+
+   Default: ``50``
+
+   Maximum number of iterations for which the solver should run.
+
+.. member:: double GradientProblemSolver::Options::max_solver_time_in_seconds
+
+   Default: ``1e6``
+   Maximum amount of time for which the solver should run.
+
+.. member:: double GradientProblemSolver::Options::function_tolerance
+
+   Default: ``1e-6``
+
+   Solver terminates if
+
+   .. math:: \frac{|\Delta \text{cost}|}{\text{cost}} < \text{function_tolerance}
+
+   where, :math:`\Delta \text{cost}` is the change in objective
+   function value (up or down) in the current iteration of
+   Levenberg-Marquardt.
+
+.. member:: double GradientProblemSolver::Options::gradient_tolerance
+
+   Default: ``1e-10``
+
+   Solver terminates if
+
+   .. math:: \|x - \Pi \boxplus(x, -g(x))\|_\infty < \text{gradient_tolerance}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, :math:`\Pi`
+   is projection onto the bounds constraints and :math:`\boxplus` is
+   Plus operation for the overall local parameterization associated
+   with the parameter vector.
+
+.. member:: LoggingType GradientProblemSolver::Options::logging_type
+
+   Default: ``PER_MINIMIZER_ITERATION``
+
+.. member:: bool GradientProblemSolver::Options::minimizer_progress_to_stdout
+
+   Default: ``false``
+
+   By default the :class:`Minimizer` progress is logged to ``STDERR``
+   depending on the ``vlog`` level. If this flag is set to true, and
+   :member:`GradientProblemSolver::Options::logging_type` is not
+   ``SILENT``, the logging output is sent to ``STDOUT``.
+
+   The progress display looks like
+
+   .. code-block:: bash
+
+      0: f: 2.317806e+05 d: 0.00e+00 g: 3.19e-01 h: 0.00e+00 s: 0.00e+00 e:  0 it: 2.98e-02 tt: 8.50e-02
+      1: f: 2.312019e+05 d: 5.79e+02 g: 3.18e-01 h: 2.41e+01 s: 1.00e+00 e:  1 it: 4.54e-02 tt: 1.31e-01
+      2: f: 2.300462e+05 d: 1.16e+03 g: 3.17e-01 h: 4.90e+01 s: 2.54e-03 e:  1 it: 4.96e-02 tt: 1.81e-01
+
+   Here
+
+   #. ``f`` is the value of the objective function.
+   #. ``d`` is the change in the value of the objective function if
+      the step computed in this iteration is accepted.
+   #. ``g`` is the max norm of the gradient.
+   #. ``h`` is the change in the parameter vector.
+   #. ``s`` is the optimal step length computed by the line search.
+   #. ``it`` is the time take by the current iteration.
+   #. ``tt`` is the total time taken by the minimizer.
+
+.. member:: vector<IterationCallback> GradientProblemSolver::Options::callbacks
+
+   Callbacks that are executed at the end of each iteration of the
+   :class:`Minimizer`. They are executed in the order that they are
+   specified in this vector. See the documentation for
+   :class:`IterationCallback` for more details.
+
+   The solver does NOT take ownership of these pointers.
+
+
+:class:`GradientProblemSolver::Summary`
+---------------------------------------
+
+.. class:: GradientProblemSolver::Summary
+
+   Summary of the various stages of the solver after termination.
+
+.. function:: string GradientProblemSolver::Summary::BriefReport() const
+
+   A brief one line description of the state of the solver after
+   termination.
+
+.. function:: string GradientProblemSolver::Summary::FullReport() const
+
+   A full multiline description of the state of the solver after
+   termination.
+
+.. function:: bool GradientProblemSolver::Summary::IsSolutionUsable() const
+
+   Whether the solution returned by the optimization algorithm can be
+   relied on to be numerically sane. This will be the case if
+   `GradientProblemSolver::Summary:termination_type` is set to `CONVERGENCE`,
+   `USER_SUCCESS` or `NO_CONVERGENCE`, i.e., either the solver
+   converged by meeting one of the convergence tolerances or because
+   the user indicated that it had converged or it ran to the maximum
+   number of iterations or time.
+
+.. member:: TerminationType GradientProblemSolver::Summary::termination_type
+
+   The cause of the minimizer terminating.
+
+.. member:: string GradientProblemSolver::Summary::message
+
+   Reason why the solver terminated.
+
+.. member:: double GradientProblemSolver::Summary::initial_cost
+
+   Cost of the problem (value of the objective function) before the
+   optimization.
+
+.. member:: double GradientProblemSolver::Summary::final_cost
+
+   Cost of the problem (value of the objective function) after the
+   optimization.
+
+.. member:: vector<IterationSummary> GradientProblemSolver::Summary::iterations
+
+   :class:`IterationSummary` for each minimizer iteration in order.
+
+.. member:: double GradientProblemSolver::Summary::total_time_in_seconds
+
+   Time (in seconds) spent in the solver.
+
+.. member:: double GradientProblemSolver::Summary::cost_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the cost vector.
+
+.. member:: double GradientProblemSolver::Summary::gradient_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the gradient vector.
+
+.. member:: int GradientProblemSolver::Summary::num_parameters
+
+   Number of parameters in the problem.
+
+.. member:: int GradientProblemSolver::Summary::num_local_parameters
+
+   Dimension of the tangent space of the problem. This is different
+   from :member:`GradientProblemSolver::Summary::num_parameters` if a
+   :class:`LocalParameterization` object is used.
+
+.. member:: LineSearchDirectionType GradientProblemSolver::Summary::line_search_direction_type
+
+   Type of line search direction used.
+
+.. member:: LineSearchType GradientProblemSolver::Summary::line_search_type
+
+   Type of the line search algorithm used.
+
+.. member:: LineSearchInterpolationType GradientProblemSolver::Summary::line_search_interpolation_type
+
+   When performing line search, the degree of the polynomial used to
+   approximate the objective function.
+
+.. member:: NonlinearConjugateGradientType GradientProblemSolver::Summary::nonlinear_conjugate_gradient_type
+
+   If the line search direction is `NONLINEAR_CONJUGATE_GRADIENT`,
+   then this indicates the particular variant of non-linear conjugate
+   gradient used.
+
+.. member:: int GradientProblemSolver::Summary::max_lbfgs_rank
+
+   If the type of the line search direction is `LBFGS`, then this
+   indicates the rank of the Hessian approximation.
diff --git a/docs/source/gradient_tutorial.rst b/docs/source/gradient_tutorial.rst
new file mode 100644
index 0000000..318bcf2
--- /dev/null
+++ b/docs/source/gradient_tutorial.rst
@@ -0,0 +1,138 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-gradient_tutorial:
+
+==================================
+General Unconstrained Minimization
+==================================
+
+While much of Ceres Solver is devoted to solving non-linear least
+squares problems, internally it contains a solver that can solve
+general unconstrained optimization problems using just their objective
+function value and gradients. The ``GradientProblem`` and
+``GradientProblemSolver`` objects give the user access to this solver.
+
+So without much further ado, let us look at how one goes about using
+them.
+
+Rosenbrock's Function
+=====================
+
+We consider the minimization of the famous `Rosenbrock's function
+<http://en.wikipedia.org/wiki/Rosenbrock_function>`_ [#f9]_.
+
+We begin by defining an instance of the ``FirstOrderFunction``
+interface. This is the object that is responsible for computing the
+objective function value and the gradient (if required). This is the
+analog of the :class:`CostFunction` when defining non-linear least
+squares problems in Ceres.
+
+.. code::
+
+  class Rosenbrock : public ceres::FirstOrderFunction {
+   public:
+    virtual bool Evaluate(const double* parameters,
+                          double* cost,
+                          double* gradient) const {
+      const double x = parameters[0];
+      const double y = parameters[1];
+
+      cost[0] = (1.0 - x) * (1.0 - x) + 100.0 * (y - x * x) * (y - x * x);
+      if (gradient != NULL) {
+        gradient[0] = -2.0 * (1.0 - x) - 200.0 * (y - x * x) * 2.0 * x;
+        gradient[1] = 200.0 * (y - x * x);
+      }
+      return true;
+    }
+
+    virtual int NumParameters() const { return 2; }
+  };
+
+
+Minimizing it then is a straightforward matter of constructing a
+:class:`GradientProblem` object and calling :func:`Solve` on it.
+
+.. code::
+
+    double parameters[2] = {-1.2, 1.0};
+
+    ceres::GradientProblem problem(new Rosenbrock());
+
+    ceres::GradientProblemSolver::Options options;
+    options.minimizer_progress_to_stdout = true;
+    ceres::GradientProblemSolver::Summary summary;
+    ceres::Solve(options, problem, parameters, &summary);
+
+    std::cout << summary.FullReport() << "\n";
+
+Executing this code results, solve the problem using limited memory
+`BFGS
+<http://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm>`_
+algorithm.
+
+.. code-block:: bash
+
+     0: f: 2.420000e+01 d: 0.00e+00 g: 2.16e+02 h: 0.00e+00 s: 0.00e+00 e:  0 it: 2.00e-05 tt: 2.00e-05
+     1: f: 4.280493e+00 d: 1.99e+01 g: 1.52e+01 h: 2.01e-01 s: 8.62e-04 e:  2 it: 7.32e-05 tt: 2.19e-04
+     2: f: 3.571154e+00 d: 7.09e-01 g: 1.35e+01 h: 3.78e-01 s: 1.34e-01 e:  3 it: 2.50e-05 tt: 2.68e-04
+     3: f: 3.440869e+00 d: 1.30e-01 g: 1.73e+01 h: 1.36e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 2.92e-04
+     4: f: 3.213597e+00 d: 2.27e-01 g: 1.55e+01 h: 1.06e-01 s: 4.59e-01 e:  1 it: 2.86e-06 tt: 3.14e-04
+     5: f: 2.839723e+00 d: 3.74e-01 g: 1.05e+01 h: 1.34e-01 s: 5.24e-01 e:  1 it: 2.86e-06 tt: 3.36e-04
+     6: f: 2.448490e+00 d: 3.91e-01 g: 1.29e+01 h: 3.04e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 3.58e-04
+     7: f: 1.943019e+00 d: 5.05e-01 g: 4.00e+00 h: 8.81e-02 s: 7.43e-01 e:  1 it: 4.05e-06 tt: 3.79e-04
+     8: f: 1.731469e+00 d: 2.12e-01 g: 7.36e+00 h: 1.71e-01 s: 4.60e-01 e:  2 it: 9.06e-06 tt: 4.06e-04
+     9: f: 1.503267e+00 d: 2.28e-01 g: 6.47e+00 h: 8.66e-02 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 4.33e-04
+    10: f: 1.228331e+00 d: 2.75e-01 g: 2.00e+00 h: 7.70e-02 s: 7.90e-01 e:  1 it: 3.81e-06 tt: 4.54e-04
+    11: f: 1.016523e+00 d: 2.12e-01 g: 5.15e+00 h: 1.39e-01 s: 3.76e-01 e:  2 it: 1.00e-05 tt: 4.82e-04
+    12: f: 9.145773e-01 d: 1.02e-01 g: 6.74e+00 h: 7.98e-02 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 5.03e-04
+    13: f: 7.508302e-01 d: 1.64e-01 g: 3.88e+00 h: 5.76e-02 s: 4.93e-01 e:  1 it: 2.86e-06 tt: 5.25e-04
+    14: f: 5.832378e-01 d: 1.68e-01 g: 5.56e+00 h: 1.42e-01 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 5.47e-04
+    15: f: 3.969581e-01 d: 1.86e-01 g: 1.64e+00 h: 1.17e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 5.68e-04
+    16: f: 3.171557e-01 d: 7.98e-02 g: 3.84e+00 h: 1.18e-01 s: 3.97e-01 e:  2 it: 9.06e-06 tt: 5.94e-04
+    17: f: 2.641257e-01 d: 5.30e-02 g: 3.27e+00 h: 6.14e-02 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 6.16e-04
+    18: f: 1.909730e-01 d: 7.32e-02 g: 5.29e-01 h: 8.55e-02 s: 6.82e-01 e:  1 it: 4.05e-06 tt: 6.42e-04
+    19: f: 1.472012e-01 d: 4.38e-02 g: 3.11e+00 h: 1.20e-01 s: 3.47e-01 e:  2 it: 1.00e-05 tt: 6.69e-04
+    20: f: 1.093558e-01 d: 3.78e-02 g: 2.97e+00 h: 8.43e-02 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 6.91e-04
+    21: f: 6.710346e-02 d: 4.23e-02 g: 1.42e+00 h: 9.64e-02 s: 8.85e-01 e:  1 it: 3.81e-06 tt: 7.12e-04
+    22: f: 3.993377e-02 d: 2.72e-02 g: 2.30e+00 h: 1.29e-01 s: 4.63e-01 e:  2 it: 9.06e-06 tt: 7.39e-04
+    23: f: 2.911794e-02 d: 1.08e-02 g: 2.55e+00 h: 6.55e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 7.62e-04
+    24: f: 1.457683e-02 d: 1.45e-02 g: 2.77e-01 h: 6.37e-02 s: 6.14e-01 e:  1 it: 3.81e-06 tt: 7.84e-04
+    25: f: 8.577515e-03 d: 6.00e-03 g: 2.86e+00 h: 1.40e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.05e-04
+    26: f: 3.486574e-03 d: 5.09e-03 g: 1.76e-01 h: 1.23e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.27e-04
+    27: f: 1.257570e-03 d: 2.23e-03 g: 1.39e-01 h: 5.08e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.48e-04
+    28: f: 2.783568e-04 d: 9.79e-04 g: 6.20e-01 h: 6.47e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.69e-04
+    29: f: 2.533399e-05 d: 2.53e-04 g: 1.68e-02 h: 1.98e-03 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 8.91e-04
+    30: f: 7.591572e-07 d: 2.46e-05 g: 5.40e-03 h: 9.27e-03 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 9.12e-04
+    31: f: 1.902460e-09 d: 7.57e-07 g: 1.62e-03 h: 1.89e-03 s: 1.00e+00 e:  1 it: 2.86e-06 tt: 9.33e-04
+    32: f: 1.003030e-12 d: 1.90e-09 g: 3.50e-05 h: 3.52e-05 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 9.54e-04
+    33: f: 4.835994e-17 d: 1.00e-12 g: 1.05e-07 h: 1.13e-06 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 9.81e-04
+    34: f: 1.885250e-22 d: 4.84e-17 g: 2.69e-10 h: 1.45e-08 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 1.00e-03
+
+  Solver Summary (v 1.10.0-lapack-suitesparse-cxsparse-no_openmp)
+
+  Parameters                                  2
+  Line search direction              LBFGS (20)
+  Line search type                  CUBIC WOLFE
+
+
+  Cost:
+  Initial                          2.420000e+01
+  Final                            1.885250e-22
+  Change                           2.420000e+01
+
+  Minimizer iterations                       35
+
+  Time (in seconds):
+
+    Cost evaluation                       0.000
+    Gradient evaluation                   0.000
+  Total                                   0.003
+
+  Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 9.032775e-13 <= 1.000000e-10)
+
+.. rubric:: Footnotes
+
+.. [#f1] `examples/rosenbrock.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/rosenbrock.cc>`_
diff --git a/docs/source/history.rst b/docs/source/history.rst
deleted file mode 100644
index b159284..0000000
--- a/docs/source/history.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. _chapter-history:
-
-=======
-History
-=======
-
-Ceres Solver grew out of the need for general least squares solving at
-Google. In early 2010, Sameer Agarwal and Fredrik Schaffalitzky
-started the development of Ceres Solver. Fredrik left Google shortly
-thereafter and Keir Mierle stepped in to take his place. After two
-years of on-and-off development, Ceres Solver was released as open
-source in May of 2012.
-
-Origin of the name
-------------------
-
-While there is some debate as to who invented the method of Least
-Squares [Stigler]_, there is no debate that it was `Carl Friedrich
-Gauss
-<http://www-groups.dcs.st-and.ac.uk/~history/Biographies/Gauss.html>`_
-who brought it to the attention of the world. Using just 22
-observations of the newly discovered asteroid `Ceres
-<http://en.wikipedia.org/wiki/Ceres_(dwarf_planet)>`_, Gauss used the
-method of least squares to correctly predict when and where the
-asteroid will emerge from behind the Sun [TenenbaumDirector]_. We
-named our solver after Ceres to celebrate this seminal event in the
-history of astronomy, statistics and optimization.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0349eb9..0515233 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,55 +14,36 @@
    features
    building
    tutorial
-   modeling
-   solving
+   api
    faqs
+   users
    contributing
    version_history
-   history
    bibliography
    license
 
-Ceres Solver is an open source C++ library for modeling and solving
-large complicated `nonlinear least squares`_ problems. It is a feature
+Ceres Solver [#f1]_ is an open source C++ library for modeling and
+solving large, complicated optimization problems. It is a feature
 rich, mature and performant library which has been used in production
-since 2010. At Google, Ceres Solver is used to:
+at Google since 2010. Ceres Solver can solve two kinds of problems.
 
-* Estimate the pose of `Street View`_ cars, aircrafts, and satellites.
-* Build 3D models for `PhotoTours`_.
-* Estimate satellite image sensor characteristics.
-* Stitch `panoramas`_ or apply `Lens Blur`_ on Android.
-* Solve `bundle adjustment`_ and SLAM problems in `Project Tango`_.
+1. `Non-linear Least Squares`_ problems with bounds constraints.
+2. General unconstrained optimization problems.
 
-Outside Google, Ceres is used for solving problems in computer vision,
-computer graphics, astronomy and physics. For example, `Willow Garage`_ uses
-it to solve SLAM problems and `Blender`_ uses it for for planar
-tracking and bundle adjustment.
-
-.. _nonlinear least squares: http://en.wikipedia.org/wiki/Non-linear_least_squares
-.. _fitting curves: http://en.wikipedia.org/wiki/Nonlinear_regression
-.. _bundle adjustment: http://en.wikipedia.org/wiki/Structure_from_motion
-.. _Street View: http://youtu.be/z00ORu4bU-A
-.. _PhotoTours: http://google-latlong.blogspot.com/2012/04/visit-global-landmarks-with-photo-tours.html
-.. _panoramas: http://www.google.com/maps/about/contribute/photosphere/
-.. _Project Tango: https://www.google.com/atap/projecttango/
-.. _Blender: http://mango.blender.org/development/planar-tracking-preview/
-.. _Willow Garage: https://www.willowgarage.com/blog/2013/08/09/enabling-robots-see-better-through-improved-camera-calibration
-.. _Lens Blur: http://googleresearch.blogspot.com/2014/04/lens-blur-in-new-google-camera-app.html
+.. _Non-linear Least Squares: http://en.wikipedia.org/wiki/Non-linear_least_squares
 
 Getting started
----------------
+===============
 
 * Download the `latest stable release
-  <http://ceres-solver.org/ceres-solver-1.9.0.tar.gz>`_ or clone the
+  <http://ceres-solver.org/ceres-solver-1.10.0.tar.gz>`_ or clone the
   Git repository for the latest development version.
 
   .. code-block:: bash
 
        git clone https://ceres-solver.googlesource.com/ceres-solver
 
-* Read the :ref:`chapter-tutorial`, browse the chapters on the
-  :ref:`chapter-modeling` API and the :ref:`chapter-solving` API.
+* Read the :ref:`chapter-tutorial` and browse the :ref:`chapter-api`.
 * Join the `mailing list
   <https://groups.google.com/forum/?fromgroups#!forum/ceres-solver>`_
   and ask questions.
@@ -71,7 +52,8 @@
 
 
 Cite Us
--------
+=======
+
 If you use Ceres Solver for a publication, please cite it as::
 
     @misc{ceres-solver,
@@ -79,3 +61,19 @@
       title = "Ceres Solver",
       howpublished = "\url{http://ceres-solver.org}",
     }
+
+
+.. rubric:: Footnotes
+
+.. [#f1] While there is some debate as to who invented the method of
+         Least Squares [Stigler]_, there is no questioning the fact
+         that it was `Carl Friedrich Gauss
+         <http://www-groups.dcs.st-and.ac.uk/~history/Biographies/Gauss.html>`_
+         who brought it to the attention of the world. Using just 22
+         observations of the newly discovered asteroid `Ceres
+         <http://en.wikipedia.org/wiki/Ceres_(dwarf_planet)>`_, Gauss
+         used the method of least squares to correctly predict when
+         and where the asteroid will emerge from behind the Sun
+         [TenenbaumDirector]_. We named our solver after Ceres to
+         celebrate this seminal event in the history of astronomy,
+         statistics and optimization.
diff --git a/docs/source/modeling.rst b/docs/source/nnls_modeling.rst
similarity index 97%
rename from docs/source/modeling.rst
rename to docs/source/nnls_modeling.rst
index b88d62e..276a046 100644
--- a/docs/source/modeling.rst
+++ b/docs/source/nnls_modeling.rst
@@ -2,17 +2,20 @@
 
 .. cpp:namespace:: ceres
 
-.. _`chapter-modeling`:
+.. _`chapter-nnls_modeling`:
 
-========
-Modeling
-========
+=================================
+Modeling Non-linear Least Squares
+=================================
+
+Introduction
+============
 
 Ceres solver consists of two distinct parts. A modeling API which
 provides a rich set of tools to construct an optimization problem one
 term at a time and a solver API that controls the minimization
 algorithm. This chapter is devoted to the task of modeling
-optimization problems using Ceres. :ref:`chapter-solving` discusses
+optimization problems using Ceres. :ref:`chapter-nnls_solving` discusses
 the various ways in which an optimization problem can be solved using
 Ceres.
 
@@ -55,7 +58,7 @@
    \frac{1}{2}\sum_{i} \left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2.
 
 :class:`CostFunction`
----------------------
+=====================
 
 For each term in the objective function, a :class:`CostFunction` is
 responsible for computing a vector of residuals and if asked a vector
@@ -130,7 +133,7 @@
    computations for instance.
 
 :class:`SizedCostFunction`
---------------------------
+==========================
 
 .. class:: SizedCostFunction
 
@@ -154,7 +157,7 @@
 
 
 :class:`AutoDiffCostFunction`
------------------------------
+=============================
 
 .. class:: AutoDiffCostFunction
 
@@ -184,7 +187,7 @@
        // Ignore the template parameter kNumResiduals and use
        // num_residuals instead.
        AutoDiffCostFunction(CostFunctor* functor, int num_residuals);
-     }
+     }g
 
    To get an auto differentiated cost function, you must define a
    class with a templated ``operator()`` (a functor) that computes the
@@ -294,7 +297,7 @@
 
 
 :class:`DynamicAutoDiffCostFunction`
-------------------------------------
+====================================
 
 .. class:: DynamicAutoDiffCostFunction
 
@@ -349,7 +352,7 @@
    you use :class:`DynamicAutoDiffCostFunction`.
 
 :class:`NumericDiffCostFunction`
---------------------------------
+================================
 
 .. class:: NumericDiffCostFunction
 
@@ -523,7 +526,7 @@
    example.
 
 :class:`DynamicNumericDiffCostFunction`
----------------------------------------
+=======================================
 
 .. class:: DynamicNumericDiffCostFunction
 
@@ -568,7 +571,7 @@
    you use :class:`DynamicNumericDiffCostFunction`.
 
 :class:`CostFunctionToFunctor`
-------------------------------
+==============================
 
 .. class:: CostFunctionToFunctor
 
@@ -696,7 +699,7 @@
 
 
 :class:`ConditionedCostFunction`
---------------------------------
+================================
 
 .. class:: ConditionedCostFunction
 
@@ -737,7 +740,7 @@
 
 
 :class:`NormalPrior`
---------------------
+====================
 
 .. class:: NormalPrior
 
@@ -776,7 +779,7 @@
 .. _`section-loss_function`:
 
 :class:`LossFunction`
----------------------
+=====================
 
 .. class:: LossFunction
 
@@ -850,7 +853,7 @@
    its square.
 
 Instances
-^^^^^^^^^
+---------
 
 Ceres includes a number of predefined loss functions. For simplicity
 we described their unscaled versions. The figure below illustrates
@@ -954,7 +957,7 @@
 
 
 Theory
-^^^^^^
+------
 
 Let us consider a problem with a single problem and a single parameter
 block.
@@ -1003,7 +1006,7 @@
 
 
 :class:`LocalParameterization`
-------------------------------
+==============================
 
 .. class:: LocalParameterization
 
@@ -1016,6 +1019,10 @@
                          const double* delta,
                          double* x_plus_delta) const = 0;
        virtual bool ComputeJacobian(const double* x, double* jacobian) const = 0;
+       virtual bool MultiplyByJacobian(const double* x,
+                                       const int num_rows,
+                                       const double* global_matrix,
+                                       double* local_matrix) const;
        virtual int GlobalSize() const = 0;
        virtual int LocalSize() const = 0;
      };
@@ -1080,8 +1087,19 @@
 
    in row major form.
 
+.. function:: bool MultiplyByJacobian(const double* x, const int num_rows, const double* global_matrix, double* local_matrix) const
+
+   local_matrix = global_matrix * jacobian
+
+   global_matrix is a num_rows x GlobalSize  row major matrix.
+   local_matrix is a num_rows x LocalSize row major matrix.
+   jacobian is the matrix returned by :func:`LocalParameterization::ComputeJacobian` at :math:`x`.
+
+   This is only used by GradientProblem. For most normal uses, it is
+   okay to use the default implementation.
+
 Instances
-^^^^^^^^^
+---------
 
 .. class:: IdentityParameterization
 
@@ -1124,7 +1142,7 @@
 
 
 :class:`AutoDiffLocalParameterization`
---------------------------------------
+======================================
 
 .. class:: AutoDiffLocalParameterization
 
@@ -1197,7 +1215,7 @@
 
 
 :class:`Problem`
-----------------
+================
 
 .. class:: Problem
 
@@ -1460,6 +1478,14 @@
    blocks for a parameter block will incur a scan of the entire
    :class:`Problem` object.
 
+.. function:: const CostFunction* GetCostFunctionForResidualBlock(const ResidualBlockId residual_block) const
+
+   Get the :class:`CostFunction` for the given residual block.
+
+.. function:: const LossFunction* GetLossFunctionForResidualBlock(const ResidualBlockId residual_block) const
+
+   Get the :class:`LossFunction` for the given residual block.
+
 .. function:: bool Problem::Evaluate(const Problem::EvaluateOptions& options, double* cost, vector<double>* residuals, vector<double>* gradient, CRSMatrix* jacobian)
 
    Evaluate a :class:`Problem`. Any of the output pointers can be
@@ -1519,7 +1545,7 @@
    vector containing all the parameter blocks.
 
 ``rotation.h``
---------------
+==============
 
 Many applications of Ceres Solver involve optimization problems where
 some of the variables correspond to rotations. To ease the pain of
diff --git a/docs/source/solving.rst b/docs/source/nnls_solving.rst
similarity index 95%
rename from docs/source/solving.rst
rename to docs/source/nnls_solving.rst
index c80e88d..e25ae32 100644
--- a/docs/source/solving.rst
+++ b/docs/source/nnls_solving.rst
@@ -3,11 +3,11 @@
 
 .. cpp:namespace:: ceres
 
-.. _chapter-solving:
+.. _chapter-nnls_solving:
 
-=======
-Solving
-=======
+================================
+Solving Non-linear Least Squares
+================================
 
 Introduction
 ============
@@ -618,44 +618,68 @@
 ``ITERATIVE_SCHUR``
 -------------------
 
-Another option for bundle adjustment problems is to apply PCG to the
-reduced camera matrix :math:`S` instead of :math:`H`. One reason to do
-this is that :math:`S` is a much smaller matrix than :math:`H`, but
-more importantly, it can be shown that :math:`\kappa(S)\leq
-\kappa(H)`.  Ceres implements PCG on :math:`S` as the
+Another option for bundle adjustment problems is to apply
+Preconditioned Conjugate Gradients to the reduced camera matrix
+:math:`S` instead of :math:`H`. One reason to do this is that
+:math:`S` is a much smaller matrix than :math:`H`, but more
+importantly, it can be shown that :math:`\kappa(S)\leq \kappa(H)`.
+Ceres implements Conjugate Gradients on :math:`S` as the
 ``ITERATIVE_SCHUR`` solver. When the user chooses ``ITERATIVE_SCHUR``
 as the linear solver, Ceres automatically switches from the exact step
 algorithm to an inexact step algorithm.
 
-The cost of forming and storing the Schur complement :math:`S` can be
-prohibitive for large problems. Indeed, for an inexact Newton solver
-that computes :math:`S` and runs PCG on it, almost all of its time is
-spent in constructing :math:`S`; the time spent inside the PCG
-algorithm is negligible in comparison. Because PCG only needs access
-to :math:`S` via its product with a vector, one way to evaluate
-:math:`Sx` is to observe that
+The key computational operation when using Conjuagate Gradients is the
+evaluation of the matrix vector product :math:`Sx` for an arbitrary
+vector :math:`x`. There are two ways in which this product can be
+evaluated, and this can be controlled using
+``Solver::Options::use_explicit_schur_complement`. Depending on the
+problem at hand, the performance difference between these two methods
+can be quite substantial.
 
-.. math::  x_1 &= E^\top x
-.. math::  x_2 &= C^{-1} x_1
-.. math::  x_3 &= Ex_2\\
-.. math::  x_4 &= Bx\\
-.. math::   Sx &= x_4 - x_3
-   :label: schurtrick1
+  1. **Implicit** This is default. Implicit evaluation is suitable for
+     large problems where the cost of computing and storing the Schur
+     Complement :math:`S` is prohibitive. Because PCG only needs
+     access to :math:`S` via its product with a vector, one way to
+     evaluate :math:`Sx` is to observe that
 
-Thus, we can run PCG on :math:`S` with the same computational effort
-per iteration as PCG on :math:`H`, while reaping the benefits of a
-more powerful preconditioner. In fact, we do not even need to compute
-:math:`H`, :eq:`schurtrick1` can be implemented using just the columns
-of :math:`J`.
+     .. math::  x_1 &= E^\top x
+     .. math::  x_2 &= C^{-1} x_1
+     .. math::  x_3 &= Ex_2\\
+     .. math::  x_4 &= Bx\\
+     .. math::   Sx &= x_4 - x_3
+        :label: schurtrick1
 
-Equation :eq:`schurtrick1` is closely related to *Domain
-Decomposition methods* for solving large linear systems that arise in
-structural engineering and partial differential equations. In the
-language of Domain Decomposition, each point in a bundle adjustment
-problem is a domain, and the cameras form the interface between these
-domains. The iterative solution of the Schur complement then falls
-within the sub-category of techniques known as Iterative
-Sub-structuring [Saad]_ [Mathew]_.
+     Thus, we can run PCG on :math:`S` with the same computational
+     effort per iteration as PCG on :math:`H`, while reaping the
+     benefits of a more powerful preconditioner. In fact, we do not
+     even need to compute :math:`H`, :eq:`schurtrick1` can be
+     implemented using just the columns of :math:`J`.
+
+     Equation :eq:`schurtrick1` is closely related to *Domain
+     Decomposition methods* for solving large linear systems that
+     arise in structural engineering and partial differential
+     equations. In the language of Domain Decomposition, each point in
+     a bundle adjustment problem is a domain, and the cameras form the
+     interface between these domains. The iterative solution of the
+     Schur complement then falls within the sub-category of techniques
+     known as Iterative Sub-structuring [Saad]_ [Mathew]_.
+
+  2. **Explicit** The complexity of implicit matrix-vector product
+     evaluation scales with the number of non-zeros in the
+     Jacobian. For small to medium sized problems, the cost of
+     constructing the Schur Complement is small enough that it is
+     better to construct it explicitly in memory and use it to
+     evaluate the product :math:`Sx`.
+
+
+  .. NOTE::
+
+     In exact arithmetic, the choice of implicit versus explicit Schur
+     complement would have no impact on solution quality. However, in
+     practice if the Jacobian is poorly conditioned, one may observe
+     (usually small) differences in solution quality. This is a
+     natural consequence of performing computations in finite arithmetic.
+
 
 .. _section-preconditioner:
 
@@ -789,7 +813,7 @@
 .. _section-solver-options:
 
 :class:`Solver::Options`
-------------------------
+========================
 
 .. class:: Solver::Options
 
@@ -1262,6 +1286,35 @@
 
    See :ref:`section-ordering` for more details.
 
+.. member:: bool Solver::Options::use_explicit_schur_complement
+
+   Default: ``false``
+
+   Use an explicitly computed Schur complement matrix with
+   ``ITERATIVE_SCHUR``.
+
+   By default this option is disabled and ``ITERATIVE_SCHUR``
+   evaluates evaluates matrix-vector products between the Schur
+   complement and a vector implicitly by exploiting the algebraic
+   expression for the Schur complement.
+
+   The cost of this evaluation scales with the number of non-zeros in
+   the Jacobian.
+
+   For small to medium sized problems there is a sweet spot where
+   computing the Schur complement is cheap enough that it is much more
+   efficient to explicitly compute it and use it for evaluating the
+   matrix-vector products.
+
+   Enabling this option tells ``ITERATIVE_SCHUR`` to use an explicitly
+   computed Schur complement. This can improve the performance of the
+   ``ITERATIVE_SCHUR`` solver significantly.
+
+   .. NOTE:
+
+     This option can only be used with the ``SCHUR_JACOBI``
+     preconditioner.
+
 .. member:: bool Solver::Options::use_post_ordering
 
    Default: ``false``
@@ -1549,7 +1602,7 @@
    application using Ceres and using an :class:`IterationCallback`.
 
 :class:`ParameterBlockOrdering`
--------------------------------
+===============================
 
 .. class:: ParameterBlockOrdering
 
@@ -1609,7 +1662,7 @@
 
 
 :class:`IterationCallback`
---------------------------
+==========================
 
 .. class:: IterationSummary
 
@@ -1802,7 +1855,7 @@
 
 
 :class:`CRSMatrix`
-------------------
+==================
 
 .. class:: CRSMatrix
 
@@ -1859,13 +1912,12 @@
 
 
 :class:`Solver::Summary`
-------------------------
+========================
 
 .. class:: Solver::Summary
 
    Summary of the various stages of the solver after termination.
 
-
 .. function:: string Solver::Summary::BriefReport() const
 
    A brief one line description of the state of the solver after
@@ -2095,10 +2147,17 @@
    blank and asked for an automatic ordering, or if the problem
    contains some constant or inactive parameter blocks.
 
-.. member:: PreconditionerType Solver::Summary::preconditioner_type
+.. member:: PreconditionerType Solver::Summary::preconditioner_type_given
 
-   Type of preconditioner used for solving the trust region step. Only
-   meaningful when an iterative linear solver is used.
+   Type of the preconditioner requested by the user.
+
+.. member:: PreconditionerType Solver::Summary::preconditioner_type_used
+
+   Type of the preconditioner actually used. This may be different
+   from :member:`Solver::Summary::linear_solver_type_given` if Ceres
+   determines that the problem structure is not compatible with the
+   linear solver requested or if the linear solver requested by the
+   user is not available.
 
 .. member:: VisibilityClusteringType Solver::Summary::visibility_clustering_type
 
diff --git a/docs/source/nnls_tutorial.rst b/docs/source/nnls_tutorial.rst
new file mode 100644
index 0000000..bede3c5
--- /dev/null
+++ b/docs/source/nnls_tutorial.rst
@@ -0,0 +1,823 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-nnls_tutorial:
+
+========================
+Non-linear Least Squares
+========================
+
+Introduction
+============
+
+Ceres can solve bounds constrained robustified non-linear least
+squares problems of the form
+
+.. math:: :label: ceresproblem
+
+   \min_{\mathbf{x}} &\quad \frac{1}{2}\sum_{i} \rho_i\left(\left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2\right) \\
+   \text{s.t.} &\quad l_j \le x_j \le u_j
+
+Problems of this form comes up in a broad range of areas across
+science and engineering - from `fitting curves`_ in statistics, to
+constructing `3D models from photographs`_ in computer vision.
+
+.. _fitting curves: http://en.wikipedia.org/wiki/Nonlinear_regression
+.. _3D models from photographs: http://en.wikipedia.org/wiki/Bundle_adjustment
+
+In this chapter we will learn how to solve :eq:`ceresproblem` using
+Ceres Solver. Full working code for all the examples described in this
+chapter and more can be found in the `examples
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
+directory.
+
+The expression
+:math:`\rho_i\left(\left\|f_i\left(x_{i_1},...,x_{i_k}\right)\right\|^2\right)`
+is known as a ``ResidualBlock``, where :math:`f_i(\cdot)` is a
+:class:`CostFunction` that depends on the parameter blocks
+:math:`\left[x_{i_1},... , x_{i_k}\right]`. In most optimization
+problems small groups of scalars occur together. For example the three
+components of a translation vector and the four components of the
+quaternion that define the pose of a camera. We refer to such a group
+of small scalars as a ``ParameterBlock``. Of course a
+``ParameterBlock`` can just be a single parameter. :math:`l_j` and
+:math:`u_j` are bounds on the parameter block :math:`x_j`.
+
+:math:`\rho_i` is a :class:`LossFunction`. A :class:`LossFunction` is
+a scalar function that is used to reduce the influence of outliers on
+the solution of non-linear least squares problems.
+
+As a special case, when :math:`\rho_i(x) = x`, i.e., the identity
+function, and :math:`l_j = -\infty` and :math:`u_j = \infty` we get
+the more familiar `non-linear least squares problem
+<http://en.wikipedia.org/wiki/Non-linear_least_squares>`_.
+
+.. math:: \frac{1}{2}\sum_{i} \left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2.
+   :label: ceresproblem2
+
+.. _section-hello-world:
+
+Hello World!
+============
+
+To get started, consider the problem of finding the minimum of the
+function
+
+.. math:: \frac{1}{2}(10 -x)^2.
+
+This is a trivial problem, whose minimum is located at :math:`x = 10`,
+but it is a good place to start to illustrate the basics of solving a
+problem with Ceres [#f1]_.
+
+The first step is to write a functor that will evaluate this the
+function :math:`f(x) = 10 - x`:
+
+.. code-block:: c++
+
+   struct CostFunctor {
+      template <typename T>
+      bool operator()(const T* const x, T* residual) const {
+        residual[0] = T(10.0) - x[0];
+        return true;
+      }
+   };
+
+The important thing to note here is that ``operator()`` is a templated
+method, which assumes that all its inputs and outputs are of some type
+``T``. The use of templating here allows Ceres to call
+``CostFunctor::operator<T>()``, with ``T=double`` when just the value
+of the residual is needed, and with a special type ``T=Jet`` when the
+Jacobians are needed. In :ref:`section-derivatives` we will discuss the
+various ways of supplying derivatives to Ceres in more detail.
+
+Once we have a way of computing the residual function, it is now time
+to construct a non-linear least squares problem using it and have
+Ceres solve it.
+
+.. code-block:: c++
+
+   int main(int argc, char** argv) {
+     google::InitGoogleLogging(argv[0]);
+
+     // The variable to solve for with its initial value.
+     double initial_x = 5.0;
+     double x = initial_x;
+
+     // Build the problem.
+     Problem problem;
+
+     // Set up the only cost function (also known as residual). This uses
+     // auto-differentiation to obtain the derivative (jacobian).
+     CostFunction* cost_function =
+         new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
+     problem.AddResidualBlock(cost_function, NULL, &x);
+
+     // Run the solver!
+     Solver::Options options;
+     options.linear_solver_type = ceres::DENSE_QR;
+     options.minimizer_progress_to_stdout = true;
+     Solver::Summary summary;
+     Solve(options, &problem, &summary);
+
+     std::cout << summary.BriefReport() << "\n";
+     std::cout << "x : " << initial_x
+               << " -> " << x << "\n";
+     return 0;
+   }
+
+:class:`AutoDiffCostFunction` takes a ``CostFunctor`` as input,
+automatically differentiates it and gives it a :class:`CostFunction`
+interface.
+
+Compiling and running `examples/helloworld.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+gives us
+
+.. code-block:: bash
+
+   iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+      0  4.512500e+01    0.00e+00    9.50e+00   0.00e+00   0.00e+00  1.00e+04       0    5.33e-04    3.46e-03
+      1  4.511598e-07    4.51e+01    9.50e-04   9.50e+00   1.00e+00  3.00e+04       1    5.00e-04    4.05e-03
+      2  5.012552e-16    4.51e-07    3.17e-08   9.50e-04   1.00e+00  9.00e+04       1    1.60e-05    4.09e-03
+   Ceres Solver Report: Iterations: 2, Initial cost: 4.512500e+01, Final cost: 5.012552e-16, Termination: CONVERGENCE
+   x : 0.5 -> 10
+
+Starting from a :math:`x=5`, the solver in two iterations goes to 10
+[#f2]_. The careful reader will note that this is a linear problem and
+one linear solve should be enough to get the optimal value.  The
+default configuration of the solver is aimed at non-linear problems,
+and for reasons of simplicity we did not change it in this example. It
+is indeed possible to obtain the solution to this problem using Ceres
+in one iteration. Also note that the solver did get very close to the
+optimal function value of 0 in the very first iteration. We will
+discuss these issues in greater detail when we talk about convergence
+and parameter settings for Ceres.
+
+.. rubric:: Footnotes
+
+.. [#f1] `examples/helloworld.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+.. [#f2] Actually the solver ran for three iterations, and it was
+   by looking at the value returned by the linear solver in the third
+   iteration, it observed that the update to the parameter block was too
+   small and declared convergence. Ceres only prints out the display at
+   the end of an iteration, and terminates as soon as it detects
+   convergence, which is why you only see two iterations here and not
+   three.
+
+.. _section-derivatives:
+
+
+Derivatives
+===========
+
+Ceres Solver like most optimization packages, depends on being able to
+evaluate the value and the derivatives of each term in the objective
+function at arbitrary parameter values. Doing so correctly and
+efficiently is essential to getting good results.  Ceres Solver
+provides a number of ways of doing so. You have already seen one of
+them in action --
+Automatic Differentiation in `examples/helloworld.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+
+We now consider the other two possibilities. Analytic and numeric
+derivatives.
+
+
+Numeric Derivatives
+-------------------
+
+In some cases, its not possible to define a templated cost functor,
+for example when the evaluation of the residual involves a call to a
+library function that you do not have control over.  In such a
+situation, numerical differentiation can be used. The user defines a
+functor which computes the residual value and construct a
+:class:`NumericDiffCostFunction` using it. e.g., for :math:`f(x) = 10 - x`
+the corresponding functor would be
+
+.. code-block:: c++
+
+  struct NumericDiffCostFunctor {
+    bool operator()(const double* const x, double* residual) const {
+      residual[0] = 10.0 - x[0];
+      return true;
+    }
+  };
+
+Which is added to the :class:`Problem` as:
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+    new NumericDiffCostFunction<NumericDiffCostFunctor, ceres::CENTRAL, 1, 1, 1>(
+        new NumericDiffCostFunctor)
+  problem.AddResidualBlock(cost_function, NULL, &x);
+
+Notice the parallel from when we were using automatic differentiation
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+      new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
+  problem.AddResidualBlock(cost_function, NULL, &x);
+
+The construction looks almost identical to the one used for automatic
+differentiation, except for an extra template parameter that indicates
+the kind of finite differencing scheme to be used for computing the
+numerical derivatives [#f3]_. For more details see the documentation
+for :class:`NumericDiffCostFunction`.
+
+**Generally speaking we recommend automatic differentiation instead of
+numeric differentiation. The use of C++ templates makes automatic
+differentiation efficient, whereas numeric differentiation is
+expensive, prone to numeric errors, and leads to slower convergence.**
+
+
+Analytic Derivatives
+--------------------
+
+In some cases, using automatic differentiation is not possible. For
+example, it may be the case that it is more efficient to compute the
+derivatives in closed form instead of relying on the chain rule used
+by the automatic differentiation code.
+
+In such cases, it is possible to supply your own residual and jacobian
+computation code. To do this, define a subclass of
+:class:`CostFunction` or :class:`SizedCostFunction` if you know the
+sizes of the parameters and residuals at compile time. Here for
+example is ``SimpleCostFunction`` that implements :math:`f(x) = 10 -
+x`.
+
+.. code-block:: c++
+
+  class QuadraticCostFunction : public ceres::SizedCostFunction<1, 1> {
+   public:
+    virtual ~QuadraticCostFunction() {}
+    virtual bool Evaluate(double const* const* parameters,
+                          double* residuals,
+                          double** jacobians) const {
+      const double x = parameters[0][0];
+      residuals[0] = 10 - x;
+
+      // Compute the Jacobian if asked for.
+      if (jacobians != NULL && jacobians[0] != NULL) {
+        jacobians[0][0] = -1;
+      }
+      return true;
+    }
+  };
+
+
+``SimpleCostFunction::Evaluate`` is provided with an input array of
+``parameters``, an output array ``residuals`` for residuals and an
+output array ``jacobians`` for Jacobians. The ``jacobians`` array is
+optional, ``Evaluate`` is expected to check when it is non-null, and
+if it is the case then fill it with the values of the derivative of
+the residual function. In this case since the residual function is
+linear, the Jacobian is constant [#f4]_ .
+
+As can be seen from the above code fragments, implementing
+:class:`CostFunction` objects is a bit tedious. We recommend that
+unless you have a good reason to manage the jacobian computation
+yourself, you use :class:`AutoDiffCostFunction` or
+:class:`NumericDiffCostFunction` to construct your residual blocks.
+
+More About Derivatives
+----------------------
+
+Computing derivatives is by far the most complicated part of using
+Ceres, and depending on the circumstance the user may need more
+sophisticated ways of computing derivatives. This section just
+scratches the surface of how derivatives can be supplied to
+Ceres. Once you are comfortable with using
+:class:`NumericDiffCostFunction` and :class:`AutoDiffCostFunction` we
+recommend taking a look at :class:`DynamicAutoDiffCostFunction`,
+:class:`CostFunctionToFunctor`, :class:`NumericDiffFunctor` and
+:class:`ConditionedCostFunction` for more advanced ways of
+constructing and computing cost functions.
+
+.. rubric:: Footnotes
+
+.. [#f3] `examples/helloworld_numeric_diff.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_numeric_diff.cc>`_.
+.. [#f4] `examples/helloworld_analytic_diff.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_analytic_diff.cc>`_.
+
+
+.. _section-powell:
+
+Powell's Function
+=================
+
+Consider now a slightly more complicated example -- the minimization
+of Powell's function. Let :math:`x = \left[x_1, x_2, x_3, x_4 \right]`
+and
+
+.. math::
+
+  \begin{align}
+     f_1(x) &= x_1 + 10x_2 \\
+     f_2(x) &= \sqrt{5}  (x_3 - x_4)\\
+     f_3(x) &= (x_2 - 2x_3)^2\\
+     f_4(x) &= \sqrt{10}  (x_1 - x_4)^2\\
+       F(x) &= \left[f_1(x),\ f_2(x),\ f_3(x),\ f_4(x) \right]
+  \end{align}
+
+
+:math:`F(x)` is a function of four parameters, has four residuals
+and we wish to find :math:`x` such that :math:`\frac{1}{2}\|F(x)\|^2`
+is minimized.
+
+Again, the first step is to define functors that evaluate of the terms
+in the objective functor. Here is the code for evaluating
+:math:`f_4(x_1, x_4)`:
+
+.. code-block:: c++
+
+ struct F4 {
+   template <typename T>
+   bool operator()(const T* const x1, const T* const x4, T* residual) const {
+     residual[0] = T(sqrt(10.0)) * (x1[0] - x4[0]) * (x1[0] - x4[0]);
+     return true;
+   }
+ };
+
+
+Similarly, we can define classes ``F1``, ``F2`` and ``F4`` to evaluate
+:math:`f_1(x_1, x_2)`, :math:`f_2(x_3, x_4)` and :math:`f_3(x_2, x_3)`
+respectively. Using these, the problem can be constructed as follows:
+
+
+.. code-block:: c++
+
+  double x1 =  3.0; double x2 = -1.0; double x3 =  0.0; double x4 = 1.0;
+
+  Problem problem;
+
+  // Add residual terms to the problem using the using the autodiff
+  // wrapper to get the derivatives automatically.
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F1, 1, 1, 1>(new F1), NULL, &x1, &x2);
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F2, 1, 1, 1>(new F2), NULL, &x3, &x4);
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F3, 1, 1, 1>(new F3), NULL, &x2, &x3)
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F4, 1, 1, 1>(new F4), NULL, &x1, &x4);
+
+
+Note that each ``ResidualBlock`` only depends on the two parameters
+that the corresponding residual object depends on and not on all four
+parameters. Compiling and running `examples/powell.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_
+gives us:
+
+.. code-block:: bash
+
+    Initial x1 = 3, x2 = -1, x3 = 0, x4 = 1
+    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+       0  1.075000e+02    0.00e+00    1.55e+02   0.00e+00   0.00e+00  1.00e+04       0    4.95e-04    2.30e-03
+       1  5.036190e+00    1.02e+02    2.00e+01   2.16e+00   9.53e-01  3.00e+04       1    4.39e-05    2.40e-03
+       2  3.148168e-01    4.72e+00    2.50e+00   6.23e-01   9.37e-01  9.00e+04       1    9.06e-06    2.43e-03
+       3  1.967760e-02    2.95e-01    3.13e-01   3.08e-01   9.37e-01  2.70e+05       1    8.11e-06    2.45e-03
+       4  1.229900e-03    1.84e-02    3.91e-02   1.54e-01   9.37e-01  8.10e+05       1    6.91e-06    2.48e-03
+       5  7.687123e-05    1.15e-03    4.89e-03   7.69e-02   9.37e-01  2.43e+06       1    7.87e-06    2.50e-03
+       6  4.804625e-06    7.21e-05    6.11e-04   3.85e-02   9.37e-01  7.29e+06       1    5.96e-06    2.52e-03
+       7  3.003028e-07    4.50e-06    7.64e-05   1.92e-02   9.37e-01  2.19e+07       1    5.96e-06    2.55e-03
+       8  1.877006e-08    2.82e-07    9.54e-06   9.62e-03   9.37e-01  6.56e+07       1    5.96e-06    2.57e-03
+       9  1.173223e-09    1.76e-08    1.19e-06   4.81e-03   9.37e-01  1.97e+08       1    7.87e-06    2.60e-03
+      10  7.333425e-11    1.10e-09    1.49e-07   2.40e-03   9.37e-01  5.90e+08       1    6.20e-06    2.63e-03
+      11  4.584044e-12    6.88e-11    1.86e-08   1.20e-03   9.37e-01  1.77e+09       1    6.91e-06    2.65e-03
+      12  2.865573e-13    4.30e-12    2.33e-09   6.02e-04   9.37e-01  5.31e+09       1    5.96e-06    2.67e-03
+      13  1.791438e-14    2.69e-13    2.91e-10   3.01e-04   9.37e-01  1.59e+10       1    7.15e-06    2.69e-03
+
+    Ceres Solver v1.10.0 Solve Report
+    ----------------------------------
+                                         Original                  Reduced
+    Parameter blocks                            4                        4
+    Parameters                                  4                        4
+    Residual blocks                             4                        4
+    Residual                                    4                        4
+
+    Minimizer                        TRUST_REGION
+
+    Dense linear algebra library            EIGEN
+    Trust region strategy     LEVENBERG_MARQUARDT
+
+                                            Given                     Used
+    Linear solver                        DENSE_QR                 DENSE_QR
+    Threads                                     1                        1
+    Linear solver threads                       1                        1
+
+    Cost:
+    Initial                          1.075000e+02
+    Final                            1.791438e-14
+    Change                           1.075000e+02
+
+    Minimizer iterations                       14
+    Successful steps                           14
+    Unsuccessful steps                          0
+
+    Time (in seconds):
+    Preprocessor                            0.002
+
+      Residual evaluation                   0.000
+      Jacobian evaluation                   0.000
+      Linear solver                         0.000
+    Minimizer                               0.001
+
+    Postprocessor                           0.000
+    Total                                   0.005
+
+    Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 3.642190e-11 <= 1.000000e-10)
+
+    Final x1 = 0.000292189, x2 = -2.92189e-05, x3 = 4.79511e-05, x4 = 4.79511e-05
+
+It is easy to see that the optimal solution to this problem is at
+:math:`x_1=0, x_2=0, x_3=0, x_4=0` with an objective function value of
+:math:`0`. In 10 iterations, Ceres finds a solution with an objective
+function value of :math:`4\times 10^{-12}`.
+
+.. rubric:: Footnotes
+
+.. [#f5] `examples/powell.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_.
+
+
+.. _section-fitting:
+
+Curve Fitting
+=============
+
+The examples we have seen until now are simple optimization problems
+with no data. The original purpose of least squares and non-linear
+least squares analysis was fitting curves to data. It is only
+appropriate that we now consider an example of such a problem
+[#f6]_. It contains data generated by sampling the curve :math:`y =
+e^{0.3x + 0.1}` and adding Gaussian noise with standard deviation
+:math:`\sigma = 0.2`. Let us fit some data to the curve
+
+.. math::  y = e^{mx + c}.
+
+We begin by defining a templated object to evaluate the
+residual. There will be a residual for each observation.
+
+.. code-block:: c++
+
+ struct ExponentialResidual {
+   ExponentialResidual(double x, double y)
+       : x_(x), y_(y) {}
+
+   template <typename T>
+   bool operator()(const T* const m, const T* const c, T* residual) const {
+     residual[0] = T(y_) - exp(m[0] * T(x_) + c[0]);
+     return true;
+   }
+
+  private:
+   // Observations for a sample.
+   const double x_;
+   const double y_;
+ };
+
+Assuming the observations are in a :math:`2n` sized array called
+``data`` the problem construction is a simple matter of creating a
+:class:`CostFunction` for every observation.
+
+
+.. code-block:: c++
+
+ double m = 0.0;
+ double c = 0.0;
+
+ Problem problem;
+ for (int i = 0; i < kNumObservations; ++i) {
+   CostFunction* cost_function =
+        new AutoDiffCostFunction<ExponentialResidual, 1, 1, 1>(
+            new ExponentialResidual(data[2 * i], data[2 * i + 1]));
+   problem.AddResidualBlock(cost_function, NULL, &m, &c);
+ }
+
+Compiling and running `examples/curve_fitting.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
+gives us:
+
+.. code-block:: bash
+
+    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+       0  1.211734e+02    0.00e+00    3.61e+02   0.00e+00   0.00e+00  1.00e+04       0    5.34e-04    2.56e-03
+       1  1.211734e+02   -2.21e+03    0.00e+00   7.52e-01  -1.87e+01  5.00e+03       1    4.29e-05    3.25e-03
+       2  1.211734e+02   -2.21e+03    0.00e+00   7.51e-01  -1.86e+01  1.25e+03       1    1.10e-05    3.28e-03
+       3  1.211734e+02   -2.19e+03    0.00e+00   7.48e-01  -1.85e+01  1.56e+02       1    1.41e-05    3.31e-03
+       4  1.211734e+02   -2.02e+03    0.00e+00   7.22e-01  -1.70e+01  9.77e+00       1    1.00e-05    3.34e-03
+       5  1.211734e+02   -7.34e+02    0.00e+00   5.78e-01  -6.32e+00  3.05e-01       1    1.00e-05    3.36e-03
+       6  3.306595e+01    8.81e+01    4.10e+02   3.18e-01   1.37e+00  9.16e-01       1    2.79e-05    3.41e-03
+       7  6.426770e+00    2.66e+01    1.81e+02   1.29e-01   1.10e+00  2.75e+00       1    2.10e-05    3.45e-03
+       8  3.344546e+00    3.08e+00    5.51e+01   3.05e-02   1.03e+00  8.24e+00       1    2.10e-05    3.48e-03
+       9  1.987485e+00    1.36e+00    2.33e+01   8.87e-02   9.94e-01  2.47e+01       1    2.10e-05    3.52e-03
+      10  1.211585e+00    7.76e-01    8.22e+00   1.05e-01   9.89e-01  7.42e+01       1    2.10e-05    3.56e-03
+      11  1.063265e+00    1.48e-01    1.44e+00   6.06e-02   9.97e-01  2.22e+02       1    2.60e-05    3.61e-03
+      12  1.056795e+00    6.47e-03    1.18e-01   1.47e-02   1.00e+00  6.67e+02       1    2.10e-05    3.64e-03
+      13  1.056751e+00    4.39e-05    3.79e-03   1.28e-03   1.00e+00  2.00e+03       1    2.10e-05    3.68e-03
+    Ceres Solver Report: Iterations: 13, Initial cost: 1.211734e+02, Final cost: 1.056751e+00, Termination: CONVERGENCE
+    Initial m: 0 c: 0
+    Final   m: 0.291861 c: 0.131439
+
+Starting from parameter values :math:`m = 0, c=0` with an initial
+objective function value of :math:`121.173` Ceres finds a solution
+:math:`m= 0.291861, c = 0.131439` with an objective function value of
+:math:`1.05675`. These values are a bit different than the
+parameters of the original model :math:`m=0.3, c= 0.1`, but this is
+expected. When reconstructing a curve from noisy data, we expect to
+see such deviations. Indeed, if you were to evaluate the objective
+function for :math:`m=0.3, c=0.1`, the fit is worse with an objective
+function value of :math:`1.082425`.  The figure below illustrates the fit.
+
+.. figure:: least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+   Least squares curve fitting.
+
+
+.. rubric:: Footnotes
+
+.. [#f6] `examples/curve_fitting.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
+
+
+Robust Curve Fitting
+=====================
+
+Now suppose the data we are given has some outliers, i.e., we have
+some points that do not obey the noise model. If we were to use the
+code above to fit such data, we would get a fit that looks as
+below. Notice how the fitted curve deviates from the ground truth.
+
+.. figure:: non_robust_least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+To deal with outliers, a standard technique is to use a
+:class:`LossFunction`. Loss functions reduce the influence of
+residual blocks with high residuals, usually the ones corresponding to
+outliers. To associate a loss function with a residual block, we change
+
+.. code-block:: c++
+
+   problem.AddResidualBlock(cost_function, NULL , &m, &c);
+
+to
+
+.. code-block:: c++
+
+   problem.AddResidualBlock(cost_function, new CauchyLoss(0.5) , &m, &c);
+
+:class:`CauchyLoss` is one of the loss functions that ships with Ceres
+Solver. The argument :math:`0.5` specifies the scale of the loss
+function. As a result, we get the fit below [#f7]_. Notice how the
+fitted curve moves back closer to the ground truth curve.
+
+.. figure:: robust_least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+   Using :class:`LossFunction` to reduce the effect of outliers on a
+   least squares fit.
+
+
+.. rubric:: Footnotes
+
+.. [#f7] `examples/robust_curve_fitting.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/robust_curve_fitting.cc>`_
+
+
+Bundle Adjustment
+=================
+
+One of the main reasons for writing Ceres was our need to solve large
+scale bundle adjustment problems [HartleyZisserman]_, [Triggs]_.
+
+Given a set of measured image feature locations and correspondences,
+the goal of bundle adjustment is to find 3D point positions and camera
+parameters that minimize the reprojection error. This optimization
+problem is usually formulated as a non-linear least squares problem,
+where the error is the squared :math:`L_2` norm of the difference between
+the observed feature location and the projection of the corresponding
+3D point on the image plane of the camera. Ceres has extensive support
+for solving bundle adjustment problems.
+
+Let us solve a problem from the `BAL
+<http://grail.cs.washington.edu/projects/bal/>`_ dataset [#f8]_.
+
+The first step as usual is to define a templated functor that computes
+the reprojection error/residual. The structure of the functor is
+similar to the ``ExponentialResidual``, in that there is an
+instance of this object responsible for each image observation.
+
+Each residual in a BAL problem depends on a three dimensional point
+and a nine parameter camera. The nine parameters defining the camera
+are: three for rotation as a Rodriques' axis-angle vector, three
+for translation, one for focal length and two for radial distortion.
+The details of this camera model can be found the `Bundler homepage
+<http://phototour.cs.washington.edu/bundler/>`_ and the `BAL homepage
+<http://grail.cs.washington.edu/projects/bal/>`_.
+
+.. code-block:: c++
+
+ struct SnavelyReprojectionError {
+   SnavelyReprojectionError(double observed_x, double observed_y)
+       : observed_x(observed_x), observed_y(observed_y) {}
+
+   template <typename T>
+   bool operator()(const T* const camera,
+                   const T* const point,
+                   T* residuals) const {
+     // camera[0,1,2] are the angle-axis rotation.
+     T p[3];
+     ceres::AngleAxisRotatePoint(camera, point, p);
+     // camera[3,4,5] are the translation.
+     p[0] += camera[3]; p[1] += camera[4]; p[2] += camera[5];
+
+     // Compute the center of distortion. The sign change comes from
+     // the camera model that Noah Snavely's Bundler assumes, whereby
+     // the camera coordinate system has a negative z axis.
+     T xp = - p[0] / p[2];
+     T yp = - p[1] / p[2];
+
+     // Apply second and fourth order radial distortion.
+     const T& l1 = camera[7];
+     const T& l2 = camera[8];
+     T r2 = xp*xp + yp*yp;
+     T distortion = T(1.0) + r2  * (l1 + l2  * r2);
+
+     // Compute final projected point position.
+     const T& focal = camera[6];
+     T predicted_x = focal * distortion * xp;
+     T predicted_y = focal * distortion * yp;
+
+     // The error is the difference between the predicted and observed position.
+     residuals[0] = predicted_x - T(observed_x);
+     residuals[1] = predicted_y - T(observed_y);
+     return true;
+   }
+
+    // Factory to hide the construction of the CostFunction object from
+    // the client code.
+    static ceres::CostFunction* Create(const double observed_x,
+                                       const double observed_y) {
+      return (new ceres::AutoDiffCostFunction<SnavelyReprojectionError, 2, 9, 3>(
+                  new SnavelyReprojectionError(observed_x, observed_y)));
+    }
+
+   double observed_x;
+   double observed_y;
+ };
+
+
+Note that unlike the examples before, this is a non-trivial function
+and computing its analytic Jacobian is a bit of a pain. Automatic
+differentiation makes life much simpler. The function
+:func:`AngleAxisRotatePoint` and other functions for manipulating
+rotations can be found in ``include/ceres/rotation.h``.
+
+Given this functor, the bundle adjustment problem can be constructed
+as follows:
+
+.. code-block:: c++
+
+ ceres::Problem problem;
+ for (int i = 0; i < bal_problem.num_observations(); ++i) {
+   ceres::CostFunction* cost_function =
+       SnavelyReprojectionError::Create(
+            bal_problem.observations()[2 * i + 0],
+            bal_problem.observations()[2 * i + 1]);
+   problem.AddResidualBlock(cost_function,
+                            NULL /* squared loss */,
+                            bal_problem.mutable_camera_for_observation(i),
+                            bal_problem.mutable_point_for_observation(i));
+ }
+
+
+Notice that the problem construction for bundle adjustment is very
+similar to the curve fitting example -- one term is added to the
+objective function per observation.
+
+Since this large sparse problem (well large for ``DENSE_QR`` anyways),
+one way to solve this problem is to set
+:member:`Solver::Options::linear_solver_type` to
+``SPARSE_NORMAL_CHOLESKY`` and call :member:`Solve`. And while this is
+a reasonable thing to do, bundle adjustment problems have a special
+sparsity structure that can be exploited to solve them much more
+efficiently. Ceres provides three specialized solvers (collectively
+known as Schur-based solvers) for this task. The example code uses the
+simplest of them ``DENSE_SCHUR``.
+
+.. code-block:: c++
+
+ ceres::Solver::Options options;
+ options.linear_solver_type = ceres::DENSE_SCHUR;
+ options.minimizer_progress_to_stdout = true;
+ ceres::Solver::Summary summary;
+ ceres::Solve(options, &problem, &summary);
+ std::cout << summary.FullReport() << "\n";
+
+For a more sophisticated bundle adjustment example which demonstrates
+the use of Ceres' more advanced features including its various linear
+solvers, robust loss functions and local parameterizations see
+`examples/bundle_adjuster.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
+
+
+.. rubric:: Footnotes
+
+.. [#f8] `examples/simple_bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/simple_bundle_adjuster.cc>`_
+
+Other Examples
+==============
+
+Besides the examples in this chapter, the  `example
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
+directory contains a number of other examples:
+
+#. `bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
+   shows how to use the various features of Ceres to solve bundle
+   adjustment problems.
+
+#. `circle_fit.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/circle_fit.cc>`_
+   shows how to fit data to a circle.
+
+#. `ellipse_approximation.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/ellipse_approximation.cc>`_
+   fits points randomly distributed on an ellipse with an approximate
+   line segment contour. This is done by jointly optimizing the
+   control points of the line segment contour along with the preimage
+   positions for the data points. The purpose of this example is to
+   show an example use case for ``Solver::Options::dynamic_sparsity``,
+   and how it can benefit problems which are numerically dense but
+   dynamically sparse.
+
+#. `denoising.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/denoising.cc>`_
+   implements image denoising using the `Fields of Experts
+   <http://www.gris.informatik.tu-darmstadt.de/~sroth/research/foe/index.html>`_
+   model.
+
+#. `nist.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/nist.cc>`_
+   implements and attempts to solves the `NIST
+   <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtm>`_
+   non-linear regression problems.
+
+#. `nist.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/nist.cc>`_
+   implements and attempts to solves the `NIST
+   <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtm>`_
+   non-linear regression problems.
+
+#. `more_garbow_hillstrom.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/more_garbow_hillstrom.cc>`_
+   A subset of the test problems from the paper
+
+   Testing Unconstrained Optimization Software
+   Jorge J. More, Burton S. Garbow and Kenneth E. Hillstrom
+   ACM Transactions on Mathematical Software, 7(1), pp. 17-41, 1981
+
+   which were augmented with bounds and used for testing bounds
+   constrained optimization algorithms by
+
+   A Trust Region Approach to Linearly Constrained Optimization
+   David M. Gay
+   Numerical Analysis (Griffiths, D.F., ed.), pp. 72-105
+   Lecture Notes in Mathematics 1066, Springer Verlag, 1984.
+
+
+#. `libmv_bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/libmv_bundle_adjuster.cc>`_
+   is the bundle adjustment algorithm used by `Blender <www.blender.org>`_/libmv.
+
+#. `libmv_homography.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/libmv_homography.cc>`_
+   This file demonstrates solving for a homography between two sets of
+   points and using a custom exit criterion by having a callback check
+   for image-space error.
+
+#. `robot_pose_mle.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/robot_pose_mle.cc>`_
+   This example demonstrates how to use the ``DynamicAutoDiffCostFunction``
+   variant of CostFunction. The ``DynamicAutoDiffCostFunction`` is meant to
+   be used in cases where the number of parameter blocks or the sizes are not
+   known at compile time.
+
+   This example simulates a robot traversing down a 1-dimension hallway with
+   noise odometry readings and noisy range readings of the end of the hallway.
+   By fusing the noisy odometry and sensor readings this example demonstrates
+   how to compute the maximum likelihood estimate (MLE) of the robot's pose at
+   each timestep.
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 2d8da68..a3fd212 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -1,772 +1,11 @@
-.. highlight:: c++
-
-.. default-domain:: cpp
-
 .. _chapter-tutorial:
 
 ========
 Tutorial
 ========
 
-Ceres solves robustified non-linear bounds constrained least squares
-problems of the form
+.. toctree::
+   :maxdepth: 3
 
-.. math:: :label: ceresproblem
-
-   \min_{\mathbf{x}} &\quad \frac{1}{2}\sum_{i} \rho_i\left(\left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2\right) \\
-   \text{s.t.} &\quad l_j \le x_j \le u_j
-
-Problems of this form comes up in a broad range of areas across
-science and engineering - from `fitting curves`_ in statistics, to
-constructing `3D models from photographs`_ in computer vision.
-
-.. _fitting curves: http://en.wikipedia.org/wiki/Nonlinear_regression
-.. _3D models from photographs: http://en.wikipedia.org/wiki/Bundle_adjustment
-
-In this chapter we will learn how to solve :eq:`ceresproblem` using
-Ceres Solver. Full working code for all the examples described in this
-chapter and more can be found in the `examples
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
-directory.
-
-The expression
-:math:`\rho_i\left(\left\|f_i\left(x_{i_1},...,x_{i_k}\right)\right\|^2\right)`
-is known as a ``ResidualBlock``, where :math:`f_i(\cdot)` is a
-:class:`CostFunction` that depends on the parameter blocks
-:math:`\left[x_{i_1},... , x_{i_k}\right]`. In most optimization
-problems small groups of scalars occur together. For example the three
-components of a translation vector and the four components of the
-quaternion that define the pose of a camera. We refer to such a group
-of small scalars as a ``ParameterBlock``. Of course a
-``ParameterBlock`` can just be a single parameter. :math:`l_j` and
-:math:`u_j` are bounds on the parameter block :math:`x_j`.
-
-:math:`\rho_i` is a :class:`LossFunction`. A :class:`LossFunction` is
-a scalar function that is used to reduce the influence of outliers on
-the solution of non-linear least squares problems.
-
-As a special case, when :math:`\rho_i(x) = x`, i.e., the identity
-function, and :math:`l_j = -\infty` and :math:`u_j = \infty` we get
-the more familiar `non-linear least squares problem
-<http://en.wikipedia.org/wiki/Non-linear_least_squares>`_.
-
-.. math:: \frac{1}{2}\sum_{i} \left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2.
-   :label: ceresproblem2
-
-.. _section-hello-world:
-
-Hello World!
-============
-
-To get started, consider the problem of finding the minimum of the
-function
-
-.. math:: \frac{1}{2}(10 -x)^2.
-
-This is a trivial problem, whose minimum is located at :math:`x = 10`,
-but it is a good place to start to illustrate the basics of solving a
-problem with Ceres [#f1]_.
-
-The first step is to write a functor that will evaluate this the
-function :math:`f(x) = 10 - x`:
-
-.. code-block:: c++
-
-   struct CostFunctor {
-      template <typename T>
-      bool operator()(const T* const x, T* residual) const {
-        residual[0] = T(10.0) - x[0];
-        return true;
-      }
-   };
-
-The important thing to note here is that ``operator()`` is a templated
-method, which assumes that all its inputs and outputs are of some type
-``T``. The use of templating here allows Ceres to call
-``CostFunctor::operator<T>()``, with ``T=double`` when just the value
-of the residual is needed, and with a special type ``T=Jet`` when the
-Jacobians are needed. In :ref:`section-derivatives` we will discuss the
-various ways of supplying derivatives to Ceres in more detail.
-
-Once we have a way of computing the residual function, it is now time
-to construct a non-linear least squares problem using it and have
-Ceres solve it.
-
-.. code-block:: c++
-
-   int main(int argc, char** argv) {
-     google::InitGoogleLogging(argv[0]);
-
-     // The variable to solve for with its initial value.
-     double initial_x = 5.0;
-     double x = initial_x;
-
-     // Build the problem.
-     Problem problem;
-
-     // Set up the only cost function (also known as residual). This uses
-     // auto-differentiation to obtain the derivative (jacobian).
-     CostFunction* cost_function =
-         new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
-     problem.AddResidualBlock(cost_function, NULL, &x);
-
-     // Run the solver!
-     Solver::Options options;
-     options.linear_solver_type = ceres::DENSE_QR;
-     options.minimizer_progress_to_stdout = true;
-     Solver::Summary summary;
-     Solve(options, &problem, &summary);
-
-     std::cout << summary.BriefReport() << "\n";
-     std::cout << "x : " << initial_x
-               << " -> " << x << "\n";
-     return 0;
-   }
-
-:class:`AutoDiffCostFunction` takes a ``CostFunctor`` as input,
-automatically differentiates it and gives it a :class:`CostFunction`
-interface.
-
-Compiling and running `examples/helloworld.cc
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
-gives us
-
-.. code-block:: bash
-
-   iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
-      0  4.512500e+01    0.00e+00    9.50e+00   0.00e+00   0.00e+00  1.00e+04       0    5.33e-04    3.46e-03
-      1  4.511598e-07    4.51e+01    9.50e-04   9.50e+00   1.00e+00  3.00e+04       1    5.00e-04    4.05e-03
-      2  5.012552e-16    4.51e-07    3.17e-08   9.50e-04   1.00e+00  9.00e+04       1    1.60e-05    4.09e-03
-   Ceres Solver Report: Iterations: 2, Initial cost: 4.512500e+01, Final cost: 5.012552e-16, Termination: CONVERGENCE
-   x : 0.5 -> 10
-
-Starting from a :math:`x=5`, the solver in two iterations goes to 10
-[#f2]_. The careful reader will note that this is a linear problem and
-one linear solve should be enough to get the optimal value.  The
-default configuration of the solver is aimed at non-linear problems,
-and for reasons of simplicity we did not change it in this example. It
-is indeed possible to obtain the solution to this problem using Ceres
-in one iteration. Also note that the solver did get very close to the
-optimal function value of 0 in the very first iteration. We will
-discuss these issues in greater detail when we talk about convergence
-and parameter settings for Ceres.
-
-.. rubric:: Footnotes
-
-.. [#f1] `examples/helloworld.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
-
-.. [#f2] Actually the solver ran for three iterations, and it was
-   by looking at the value returned by the linear solver in the third
-   iteration, it observed that the update to the parameter block was too
-   small and declared convergence. Ceres only prints out the display at
-   the end of an iteration, and terminates as soon as it detects
-   convergence, which is why you only see two iterations here and not
-   three.
-
-.. _section-derivatives:
-
-
-Derivatives
-===========
-
-Ceres Solver like most optimization packages, depends on being able to
-evaluate the value and the derivatives of each term in the objective
-function at arbitrary parameter values. Doing so correctly and
-efficiently is essential to getting good results.  Ceres Solver
-provides a number of ways of doing so. You have already seen one of
-them in action --
-Automatic Differentiation in `examples/helloworld.cc
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
-
-We now consider the other two possibilities. Analytic and numeric
-derivatives.
-
-
-Numeric Derivatives
--------------------
-
-In some cases, its not possible to define a templated cost functor,
-for example when the evaluation of the residual involves a call to a
-library function that you do not have control over.  In such a
-situation, numerical differentiation can be used. The user defines a
-functor which computes the residual value and construct a
-:class:`NumericDiffCostFunction` using it. e.g., for :math:`f(x) = 10 - x`
-the corresponding functor would be
-
-.. code-block:: c++
-
-  struct NumericDiffCostFunctor {
-    bool operator()(const double* const x, double* residual) const {
-      residual[0] = 10.0 - x[0];
-      return true;
-    }
-  };
-
-Which is added to the :class:`Problem` as:
-
-.. code-block:: c++
-
-  CostFunction* cost_function =
-    new NumericDiffCostFunction<NumericDiffCostFunctor, ceres::CENTRAL, 1, 1, 1>(
-        new NumericDiffCostFunctor)
-  problem.AddResidualBlock(cost_function, NULL, &x);
-
-Notice the parallel from when we were using automatic differentiation
-
-.. code-block:: c++
-
-  CostFunction* cost_function =
-      new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
-  problem.AddResidualBlock(cost_function, NULL, &x);
-
-The construction looks almost identical to the one used for automatic
-differentiation, except for an extra template parameter that indicates
-the kind of finite differencing scheme to be used for computing the
-numerical derivatives [#f3]_. For more details see the documentation
-for :class:`NumericDiffCostFunction`.
-
-**Generally speaking we recommend automatic differentiation instead of
-numeric differentiation. The use of C++ templates makes automatic
-differentiation efficient, whereas numeric differentiation is
-expensive, prone to numeric errors, and leads to slower convergence.**
-
-
-Analytic Derivatives
---------------------
-
-In some cases, using automatic differentiation is not possible. For
-example, it may be the case that it is more efficient to compute the
-derivatives in closed form instead of relying on the chain rule used
-by the automatic differentiation code.
-
-In such cases, it is possible to supply your own residual and jacobian
-computation code. To do this, define a subclass of
-:class:`CostFunction` or :class:`SizedCostFunction` if you know the
-sizes of the parameters and residuals at compile time. Here for
-example is ``SimpleCostFunction`` that implements :math:`f(x) = 10 -
-x`.
-
-.. code-block:: c++
-
-  class QuadraticCostFunction : public ceres::SizedCostFunction<1, 1> {
-   public:
-    virtual ~QuadraticCostFunction() {}
-    virtual bool Evaluate(double const* const* parameters,
-                          double* residuals,
-                          double** jacobians) const {
-      const double x = parameters[0][0];
-      residuals[0] = 10 - x;
-
-      // Compute the Jacobian if asked for.
-      if (jacobians != NULL && jacobians[0] != NULL) {
-        jacobians[0][0] = -1;
-      }
-      return true;
-    }
-  };
-
-
-``SimpleCostFunction::Evaluate`` is provided with an input array of
-``parameters``, an output array ``residuals`` for residuals and an
-output array ``jacobians`` for Jacobians. The ``jacobians`` array is
-optional, ``Evaluate`` is expected to check when it is non-null, and
-if it is the case then fill it with the values of the derivative of
-the residual function. In this case since the residual function is
-linear, the Jacobian is constant [#f4]_ .
-
-As can be seen from the above code fragments, implementing
-:class:`CostFunction` objects is a bit tedious. We recommend that
-unless you have a good reason to manage the jacobian computation
-yourself, you use :class:`AutoDiffCostFunction` or
-:class:`NumericDiffCostFunction` to construct your residual blocks.
-
-More About Derivatives
-----------------------
-
-Computing derivatives is by far the most complicated part of using
-Ceres, and depending on the circumstance the user may need more
-sophisticated ways of computing derivatives. This section just
-scratches the surface of how derivatives can be supplied to
-Ceres. Once you are comfortable with using
-:class:`NumericDiffCostFunction` and :class:`AutoDiffCostFunction` we
-recommend taking a look at :class:`DynamicAutoDiffCostFunction`,
-:class:`CostFunctionToFunctor`, :class:`NumericDiffFunctor` and
-:class:`ConditionedCostFunction` for more advanced ways of
-constructing and computing cost functions.
-
-.. rubric:: Footnotes
-
-.. [#f3] `examples/helloworld_numeric_diff.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_numeric_diff.cc>`_.
-
-.. [#f4] `examples/helloworld_analytic_diff.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_analytic_diff.cc>`_.
-
-
-.. _section-powell:
-
-Powell's Function
-=================
-
-Consider now a slightly more complicated example -- the minimization
-of Powell's function. Let :math:`x = \left[x_1, x_2, x_3, x_4 \right]`
-and
-
-.. math::
-
-  \begin{align}
-     f_1(x) &= x_1 + 10x_2 \\
-     f_2(x) &= \sqrt{5}  (x_3 - x_4)\\
-     f_3(x) &= (x_2 - 2x_3)^2\\
-     f_4(x) &= \sqrt{10}  (x_1 - x_4)^2\\
-       F(x) &= \left[f_1(x),\ f_2(x),\ f_3(x),\ f_4(x) \right]
-  \end{align}
-
-
-:math:`F(x)` is a function of four parameters, has four residuals
-and we wish to find :math:`x` such that :math:`\frac{1}{2}\|F(x)\|^2`
-is minimized.
-
-Again, the first step is to define functors that evaluate of the terms
-in the objective functor. Here is the code for evaluating
-:math:`f_4(x_1, x_4)`:
-
-.. code-block:: c++
-
- struct F4 {
-   template <typename T>
-   bool operator()(const T* const x1, const T* const x4, T* residual) const {
-     residual[0] = T(sqrt(10.0)) * (x1[0] - x4[0]) * (x1[0] - x4[0]);
-     return true;
-   }
- };
-
-
-Similarly, we can define classes ``F1``, ``F2`` and ``F4`` to evaluate
-:math:`f_1(x_1, x_2)`, :math:`f_2(x_3, x_4)` and :math:`f_3(x_2, x_3)`
-respectively. Using these, the problem can be constructed as follows:
-
-
-.. code-block:: c++
-
-  double x1 =  3.0; double x2 = -1.0; double x3 =  0.0; double x4 = 1.0;
-
-  Problem problem;
-
-  // Add residual terms to the problem using the using the autodiff
-  // wrapper to get the derivatives automatically.
-  problem.AddResidualBlock(
-    new AutoDiffCostFunction<F1, 1, 1, 1>(new F1), NULL, &x1, &x2);
-  problem.AddResidualBlock(
-    new AutoDiffCostFunction<F2, 1, 1, 1>(new F2), NULL, &x3, &x4);
-  problem.AddResidualBlock(
-    new AutoDiffCostFunction<F3, 1, 1, 1>(new F3), NULL, &x2, &x3)
-  problem.AddResidualBlock(
-    new AutoDiffCostFunction<F4, 1, 1, 1>(new F4), NULL, &x1, &x4);
-
-
-Note that each ``ResidualBlock`` only depends on the two parameters
-that the corresponding residual object depends on and not on all four
-parameters. Compiling and running `examples/powell.cc
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_
-gives us:
-
-.. code-block:: bash
-
-    Initial x1 = 3, x2 = -1, x3 = 0, x4 = 1
-    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
-       0  1.075000e+02    0.00e+00    1.55e+02   0.00e+00   0.00e+00  1.00e+04       0    4.95e-04    2.30e-03
-       1  5.036190e+00    1.02e+02    2.00e+01   2.16e+00   9.53e-01  3.00e+04       1    4.39e-05    2.40e-03
-       2  3.148168e-01    4.72e+00    2.50e+00   6.23e-01   9.37e-01  9.00e+04       1    9.06e-06    2.43e-03
-       3  1.967760e-02    2.95e-01    3.13e-01   3.08e-01   9.37e-01  2.70e+05       1    8.11e-06    2.45e-03
-       4  1.229900e-03    1.84e-02    3.91e-02   1.54e-01   9.37e-01  8.10e+05       1    6.91e-06    2.48e-03
-       5  7.687123e-05    1.15e-03    4.89e-03   7.69e-02   9.37e-01  2.43e+06       1    7.87e-06    2.50e-03
-       6  4.804625e-06    7.21e-05    6.11e-04   3.85e-02   9.37e-01  7.29e+06       1    5.96e-06    2.52e-03
-       7  3.003028e-07    4.50e-06    7.64e-05   1.92e-02   9.37e-01  2.19e+07       1    5.96e-06    2.55e-03
-       8  1.877006e-08    2.82e-07    9.54e-06   9.62e-03   9.37e-01  6.56e+07       1    5.96e-06    2.57e-03
-       9  1.173223e-09    1.76e-08    1.19e-06   4.81e-03   9.37e-01  1.97e+08       1    7.87e-06    2.60e-03
-      10  7.333425e-11    1.10e-09    1.49e-07   2.40e-03   9.37e-01  5.90e+08       1    6.20e-06    2.63e-03
-      11  4.584044e-12    6.88e-11    1.86e-08   1.20e-03   9.37e-01  1.77e+09       1    6.91e-06    2.65e-03
-      12  2.865573e-13    4.30e-12    2.33e-09   6.02e-04   9.37e-01  5.31e+09       1    5.96e-06    2.67e-03
-      13  1.791438e-14    2.69e-13    2.91e-10   3.01e-04   9.37e-01  1.59e+10       1    7.15e-06    2.69e-03
-
-    Ceres Solver v1.10.0 Solve Report
-    ----------------------------------
-                                         Original                  Reduced
-    Parameter blocks                            4                        4
-    Parameters                                  4                        4
-    Residual blocks                             4                        4
-    Residual                                    4                        4
-
-    Minimizer                        TRUST_REGION
-
-    Dense linear algebra library            EIGEN
-    Trust region strategy     LEVENBERG_MARQUARDT
-
-                                            Given                     Used
-    Linear solver                        DENSE_QR                 DENSE_QR
-    Threads                                     1                        1
-    Linear solver threads                       1                        1
-
-    Cost:
-    Initial                          1.075000e+02
-    Final                            1.791438e-14
-    Change                           1.075000e+02
-
-    Minimizer iterations                       14
-    Successful steps                           14
-    Unsuccessful steps                          0
-
-    Time (in seconds):
-    Preprocessor                            0.002
-
-      Residual evaluation                   0.000
-      Jacobian evaluation                   0.000
-      Linear solver                         0.000
-    Minimizer                               0.001
-
-    Postprocessor                           0.000
-    Total                                   0.005
-
-    Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 3.642190e-11 <= 1.000000e-10)
-
-    Final x1 = 0.000292189, x2 = -2.92189e-05, x3 = 4.79511e-05, x4 = 4.79511e-05
-
-It is easy to see that the optimal solution to this problem is at
-:math:`x_1=0, x_2=0, x_3=0, x_4=0` with an objective function value of
-:math:`0`. In 10 iterations, Ceres finds a solution with an objective
-function value of :math:`4\times 10^{-12}`.
-
-
-.. rubric:: Footnotes
-
-.. [#f5] `examples/powell.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_.
-
-
-.. _section-fitting:
-
-Curve Fitting
-=============
-
-The examples we have seen until now are simple optimization problems
-with no data. The original purpose of least squares and non-linear
-least squares analysis was fitting curves to data. It is only
-appropriate that we now consider an example of such a problem
-[#f6]_. It contains data generated by sampling the curve :math:`y =
-e^{0.3x + 0.1}` and adding Gaussian noise with standard deviation
-:math:`\sigma = 0.2`. Let us fit some data to the curve
-
-.. math::  y = e^{mx + c}.
-
-We begin by defining a templated object to evaluate the
-residual. There will be a residual for each observation.
-
-.. code-block:: c++
-
- struct ExponentialResidual {
-   ExponentialResidual(double x, double y)
-       : x_(x), y_(y) {}
-
-   template <typename T>
-   bool operator()(const T* const m, const T* const c, T* residual) const {
-     residual[0] = T(y_) - exp(m[0] * T(x_) + c[0]);
-     return true;
-   }
-
-  private:
-   // Observations for a sample.
-   const double x_;
-   const double y_;
- };
-
-Assuming the observations are in a :math:`2n` sized array called
-``data`` the problem construction is a simple matter of creating a
-:class:`CostFunction` for every observation.
-
-
-.. code-block:: c++
-
- double m = 0.0;
- double c = 0.0;
-
- Problem problem;
- for (int i = 0; i < kNumObservations; ++i) {
-   CostFunction* cost_function =
-        new AutoDiffCostFunction<ExponentialResidual, 1, 1, 1>(
-            new ExponentialResidual(data[2 * i], data[2 * i + 1]));
-   problem.AddResidualBlock(cost_function, NULL, &m, &c);
- }
-
-Compiling and running `examples/curve_fitting.cc
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
-gives us:
-
-.. code-block:: bash
-
-    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
-       0  1.211734e+02    0.00e+00    3.61e+02   0.00e+00   0.00e+00  1.00e+04       0    5.34e-04    2.56e-03
-       1  1.211734e+02   -2.21e+03    0.00e+00   7.52e-01  -1.87e+01  5.00e+03       1    4.29e-05    3.25e-03
-       2  1.211734e+02   -2.21e+03    0.00e+00   7.51e-01  -1.86e+01  1.25e+03       1    1.10e-05    3.28e-03
-       3  1.211734e+02   -2.19e+03    0.00e+00   7.48e-01  -1.85e+01  1.56e+02       1    1.41e-05    3.31e-03
-       4  1.211734e+02   -2.02e+03    0.00e+00   7.22e-01  -1.70e+01  9.77e+00       1    1.00e-05    3.34e-03
-       5  1.211734e+02   -7.34e+02    0.00e+00   5.78e-01  -6.32e+00  3.05e-01       1    1.00e-05    3.36e-03
-       6  3.306595e+01    8.81e+01    4.10e+02   3.18e-01   1.37e+00  9.16e-01       1    2.79e-05    3.41e-03
-       7  6.426770e+00    2.66e+01    1.81e+02   1.29e-01   1.10e+00  2.75e+00       1    2.10e-05    3.45e-03
-       8  3.344546e+00    3.08e+00    5.51e+01   3.05e-02   1.03e+00  8.24e+00       1    2.10e-05    3.48e-03
-       9  1.987485e+00    1.36e+00    2.33e+01   8.87e-02   9.94e-01  2.47e+01       1    2.10e-05    3.52e-03
-      10  1.211585e+00    7.76e-01    8.22e+00   1.05e-01   9.89e-01  7.42e+01       1    2.10e-05    3.56e-03
-      11  1.063265e+00    1.48e-01    1.44e+00   6.06e-02   9.97e-01  2.22e+02       1    2.60e-05    3.61e-03
-      12  1.056795e+00    6.47e-03    1.18e-01   1.47e-02   1.00e+00  6.67e+02       1    2.10e-05    3.64e-03
-      13  1.056751e+00    4.39e-05    3.79e-03   1.28e-03   1.00e+00  2.00e+03       1    2.10e-05    3.68e-03
-    Ceres Solver Report: Iterations: 13, Initial cost: 1.211734e+02, Final cost: 1.056751e+00, Termination: CONVERGENCE
-    Initial m: 0 c: 0
-    Final   m: 0.291861 c: 0.131439
-
-Starting from parameter values :math:`m = 0, c=0` with an initial
-objective function value of :math:`121.173` Ceres finds a solution
-:math:`m= 0.291861, c = 0.131439` with an objective function value of
-:math:`1.05675`. These values are a bit different than the
-parameters of the original model :math:`m=0.3, c= 0.1`, but this is
-expected. When reconstructing a curve from noisy data, we expect to
-see such deviations. Indeed, if you were to evaluate the objective
-function for :math:`m=0.3, c=0.1`, the fit is worse with an objective
-function value of :math:`1.082425`.  The figure below illustrates the fit.
-
-.. figure:: least_squares_fit.png
-   :figwidth: 500px
-   :height: 400px
-   :align: center
-
-   Least squares curve fitting.
-
-
-.. rubric:: Footnotes
-
-.. [#f6] `examples/curve_fitting.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
-
-
-Robust Curve Fitting
-=====================
-
-Now suppose the data we are given has some outliers, i.e., we have
-some points that do not obey the noise model. If we were to use the
-code above to fit such data, we would get a fit that looks as
-below. Notice how the fitted curve deviates from the ground truth.
-
-.. figure:: non_robust_least_squares_fit.png
-   :figwidth: 500px
-   :height: 400px
-   :align: center
-
-To deal with outliers, a standard technique is to use a
-:class:`LossFunction`. Loss functions reduce the influence of
-residual blocks with high residuals, usually the ones corresponding to
-outliers. To associate a loss function with a residual block, we change
-
-.. code-block:: c++
-
-   problem.AddResidualBlock(cost_function, NULL , &m, &c);
-
-to
-
-.. code-block:: c++
-
-   problem.AddResidualBlock(cost_function, new CauchyLoss(0.5) , &m, &c);
-
-:class:`CauchyLoss` is one of the loss functions that ships with Ceres
-Solver. The argument :math:`0.5` specifies the scale of the loss
-function. As a result, we get the fit below [#f7]_. Notice how the
-fitted curve moves back closer to the ground truth curve.
-
-.. figure:: robust_least_squares_fit.png
-   :figwidth: 500px
-   :height: 400px
-   :align: center
-
-   Using :class:`LossFunction` to reduce the effect of outliers on a
-   least squares fit.
-
-
-.. rubric:: Footnotes
-
-.. [#f7] `examples/robust_curve_fitting.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/robust_curve_fitting.cc>`_
-
-
-Bundle Adjustment
-=================
-
-One of the main reasons for writing Ceres was our need to solve large
-scale bundle adjustment problems [HartleyZisserman]_, [Triggs]_.
-
-Given a set of measured image feature locations and correspondences,
-the goal of bundle adjustment is to find 3D point positions and camera
-parameters that minimize the reprojection error. This optimization
-problem is usually formulated as a non-linear least squares problem,
-where the error is the squared :math:`L_2` norm of the difference between
-the observed feature location and the projection of the corresponding
-3D point on the image plane of the camera. Ceres has extensive support
-for solving bundle adjustment problems.
-
-Let us solve a problem from the `BAL
-<http://grail.cs.washington.edu/projects/bal/>`_ dataset [#f8]_.
-
-The first step as usual is to define a templated functor that computes
-the reprojection error/residual. The structure of the functor is
-similar to the ``ExponentialResidual``, in that there is an
-instance of this object responsible for each image observation.
-
-Each residual in a BAL problem depends on a three dimensional point
-and a nine parameter camera. The nine parameters defining the camera
-are: three for rotation as a Rodriques' axis-angle vector, three
-for translation, one for focal length and two for radial distortion.
-The details of this camera model can be found the `Bundler homepage
-<http://phototour.cs.washington.edu/bundler/>`_ and the `BAL homepage
-<http://grail.cs.washington.edu/projects/bal/>`_.
-
-.. code-block:: c++
-
- struct SnavelyReprojectionError {
-   SnavelyReprojectionError(double observed_x, double observed_y)
-       : observed_x(observed_x), observed_y(observed_y) {}
-
-   template <typename T>
-   bool operator()(const T* const camera,
-                   const T* const point,
-                   T* residuals) const {
-     // camera[0,1,2] are the angle-axis rotation.
-     T p[3];
-     ceres::AngleAxisRotatePoint(camera, point, p);
-     // camera[3,4,5] are the translation.
-     p[0] += camera[3]; p[1] += camera[4]; p[2] += camera[5];
-
-     // Compute the center of distortion. The sign change comes from
-     // the camera model that Noah Snavely's Bundler assumes, whereby
-     // the camera coordinate system has a negative z axis.
-     T xp = - p[0] / p[2];
-     T yp = - p[1] / p[2];
-
-     // Apply second and fourth order radial distortion.
-     const T& l1 = camera[7];
-     const T& l2 = camera[8];
-     T r2 = xp*xp + yp*yp;
-     T distortion = T(1.0) + r2  * (l1 + l2  * r2);
-
-     // Compute final projected point position.
-     const T& focal = camera[6];
-     T predicted_x = focal * distortion * xp;
-     T predicted_y = focal * distortion * yp;
-
-     // The error is the difference between the predicted and observed position.
-     residuals[0] = predicted_x - T(observed_x);
-     residuals[1] = predicted_y - T(observed_y);
-     return true;
-   }
-
-    // Factory to hide the construction of the CostFunction object from
-    // the client code.
-    static ceres::CostFunction* Create(const double observed_x,
-                                       const double observed_y) {
-      return (new ceres::AutoDiffCostFunction<SnavelyReprojectionError, 2, 9, 3>(
-                  new SnavelyReprojectionError(observed_x, observed_y)));
-    }
-
-   double observed_x;
-   double observed_y;
- };
-
-
-Note that unlike the examples before, this is a non-trivial function
-and computing its analytic Jacobian is a bit of a pain. Automatic
-differentiation makes life much simpler. The function
-:func:`AngleAxisRotatePoint` and other functions for manipulating
-rotations can be found in ``include/ceres/rotation.h``.
-
-Given this functor, the bundle adjustment problem can be constructed
-as follows:
-
-.. code-block:: c++
-
- ceres::Problem problem;
- for (int i = 0; i < bal_problem.num_observations(); ++i) {
-   ceres::CostFunction* cost_function =
-       SnavelyReprojectionError::Create(
-            bal_problem.observations()[2 * i + 0],
-            bal_problem.observations()[2 * i + 1]);
-   problem.AddResidualBlock(cost_function,
-                            NULL /* squared loss */,
-                            bal_problem.mutable_camera_for_observation(i),
-                            bal_problem.mutable_point_for_observation(i));
- }
-
-
-Notice that the problem construction for bundle adjustment is very
-similar to the curve fitting example -- one term is added to the
-objective function per observation.
-
-Since this large sparse problem (well large for ``DENSE_QR`` anyways),
-one way to solve this problem is to set
-:member:`Solver::Options::linear_solver_type` to
-``SPARSE_NORMAL_CHOLESKY`` and call :member:`Solve`. And while this is
-a reasonable thing to do, bundle adjustment problems have a special
-sparsity structure that can be exploited to solve them much more
-efficiently. Ceres provides three specialized solvers (collectively
-known as Schur-based solvers) for this task. The example code uses the
-simplest of them ``DENSE_SCHUR``.
-
-.. code-block:: c++
-
- ceres::Solver::Options options;
- options.linear_solver_type = ceres::DENSE_SCHUR;
- options.minimizer_progress_to_stdout = true;
- ceres::Solver::Summary summary;
- ceres::Solve(options, &problem, &summary);
- std::cout << summary.FullReport() << "\n";
-
-For a more sophisticated bundle adjustment example which demonstrates
-the use of Ceres' more advanced features including its various linear
-solvers, robust loss functions and local parameterizations see
-`examples/bundle_adjuster.cc
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
-
-
-.. rubric:: Footnotes
-
-.. [#f8] `examples/simple_bundle_adjuster.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/simple_bundle_adjuster.cc>`_
-
-
-Other Examples
-==============
-
-Besides the examples in this chapter, the  `example
-<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
-directory contains a number of other examples:
-
-#. `bundle_adjuster.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
-   shows how to use the various features of Ceres to solve bundle
-   adjustment problems.
-
-#. `circle_fit.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/circle_fit.cc>`_
-   shows how to fit data to a circle.
-
-#. `denoising.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/denoising.cc>`_
-   implements image denoising using the `Fields of Experts
-   <http://www.gris.informatik.tu-darmstadt.de/~sroth/research/foe/index.html>`_
-   model.
-
-#. `nist.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/nist.cc>`_
-   implements and attempts to solves the `NIST
-   <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtm>`_
-   non-linear regression problems.
-
-#. `libmv_bundle_adjuster.cc
-   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/libmv_bundle_adjuster.cc>`_
-   is the bundle adjustment algorithm used by `Blender <www.blender.org>`_/libmv.
+   nnls_tutorial
+   gradient_tutorial
diff --git a/docs/source/users.rst b/docs/source/users.rst
new file mode 100644
index 0000000..23a6414
--- /dev/null
+++ b/docs/source/users.rst
@@ -0,0 +1,66 @@
+.. _chapter-users:
+
+=====
+Users
+=====
+
+* At `Google <http://www.google.com>`_, Ceres is used to:
+
+  * Estimate the pose of `Street View`_ cars, aircrafts, and satellites.
+  * Build 3D models for `PhotoTours`_.
+  * Estimate satellite image sensor characteristics.
+  * Stitch `panoramas`_ on Android and iOS.
+  * Apply `Lens Blur`_ on Android.
+  * Solve `bundle adjustment`_ and `SLAM`_ problems in `Project
+    Tango`_.
+
+* `Willow Garage`_ uses Ceres to solve `SLAM`_ problems.
+* `Southwest Research Insitute <http://www.swri.org/>`_ uses Ceres for
+  `calibrating robot-camera systems`_.
+* `Blender <http://www.blender.org>`_ uses Ceres for `planar
+  tracking`_ and `bundle adjustment`_.
+* `OpenMVG <http://imagine.enpc.fr/~moulonp/openMVG/>`_ an open source
+  multi-view geometry library uses Ceres for `bundle adjustment`_.
+* `Microsoft Research <http://research.microsoft.com/en-us/>`_ uses
+  Ceres for nonlinear optimization of objectives involving subdivision
+  surfaces under `skinned control meshes`_
+* `Matterport <http://www.matterport.com>`_, uses Ceres for global
+  alignment of 3D point clouds and for pose graph optimization.
+
+.. _bundle adjustment: http://en.wikipedia.org/wiki/Structure_from_motion
+.. _Street View: http://youtu.be/z00ORu4bU-A
+.. _PhotoTours: http://google-latlong.blogspot.com/2012/04/visit-global-landmarks-with-photo-tours.html
+.. _panoramas: http://www.google.com/maps/about/contribute/photosphere/
+.. _Project Tango: https://www.google.com/atap/projecttango/
+.. _planar tracking: http://mango.blender.org/development/planar-tracking-preview/
+.. _Willow Garage: https://www.willowgarage.com/blog/2013/08/09/enabling-robots-see-better-through-improved-camera-calibration
+.. _Lens Blur: http://googleresearch.blogspot.com/2014/04/lens-blur-in-new-google-camera-app.html
+.. _SLAM: http://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping
+.. _calibrating robot-camera systems:
+   http://rosindustrial.org/news/2014/9/24/industrial-calibration-library-update-and-presentation
+.. _skinned control meshes: http://research.microsoft.com/en-us/projects/handmodelingfrommonoculardepth/
+
+
+Publications
+============
+
+Ceres Solver is used (and cited) in the following publications:
+
+#. **User-Specific Hand Modeling from Monocular Depth
+   Sequences**, J. Taylor, R. Stebbing, V. Ramakrishna, C. Keskin, J. Shotton, S. Izadi, A. Hertzmann,
+   and A. Fitzgibbon, CVPR 2014.
+
+#. **Global Fusion of Relative Motions for Robust, Accurate and
+   Scalable Structure from Motion**, P. Moulon, P. Monasse
+   and R. Marlet, ICCV 2013.
+
+#. **Recurrent neural networks for voice activity
+   detection**, T. Hughes and K. Mierle, ICASSP 2013.
+
+#. **Street View Motion-from-Structure-from-Motion**, B. Klingner, D. Martin
+   and J. Roseborough, ICCV 2013.
+
+#. **Adaptive Structure from Motion with a contrario model
+   estimation**, P. Moulon, P. Monasse and R. Marlet, ACCV 2012.
+
+#. **Visibility based preconditioning for bundle adjustment**, A. Kushal and S. Agarwal, CVPR 2012.
diff --git a/docs/source/version_history.rst b/docs/source/version_history.rst
index cbbad65..33d32b2 100644
--- a/docs/source/version_history.rst
+++ b/docs/source/version_history.rst
@@ -1,40 +1,118 @@
 .. _chapter-version-history:
 
-========
-Releases
-========
+===============
+Version History
+===============
 
-HEAD
-====
+1.10.0
+======
 
-#. Added ``Solver::Options::IsValid`` which allows users to validate
-   their solver configuration before calling ``Solve``.
+New Features
+------------
+#. Ceres Solver can now be used to solve general unconstrained
+   optimization problems. See the documentation for
+   ``GradientProblem`` and ``GradientProblemSolver``.
+#. ``Eigen`` can now be as a sparse linear algebra backend. This can
+   be done by setting
+   ``Solver::Options::sparse_linear_algebra_library_type`` to
+   ``EIGEN_SPARSE``. Performance should be comparable to ``CX_SPARSE``.
+
+   .. NOTE::
+
+      Because ``Eigen`` is a header only library, and some of the code
+      related to sparse Cholesky factorization is LGPL, building Ceres
+      with support for Eigen's sparse linear algebra is disabled by
+      default and should be enabled explicitly.
+
+   .. NOTE::
+
+      For good performance, use Eigen version 3.2.2 or later.
 
 #. Added ``EIGEN_SPARSE_QR`` algorithm for covariance estimation using
    ``Eigen``'s sparse QR factorization. (Michael Vitus)
+#. Faster inner iterations when using multiple threads.
+#. Faster ``ITERATIVE_SCHUR`` + ``SCHUR_JACOBI`` for small to medium
+   sized problems (see documentation for
+   ``Solver::Options::use_explicit_schur_complement``).
+#. Faster automatic Schur ordering.
+#. Reduced memory usage when solving problems with dynamic sparsity.
+#. ``CostFunctionToFunctor`` now supports dynamic number of residuals.
+#. A complete re-write of the problem preprocessing phase.
+#. ``Solver::Summary::FullReport`` now reports the build configuration
+   for Ceres.
+#. When building on Android, the ``NDK`` version detection logic has
+   been improved.
+#. The ``CERES_VERSION`` macro has been improved and replaced with the
+   ``CERES_VERSION_STRING`` macro.
+#. Added ``Solver::Options::IsValid`` which allows users to validate
+   their solver configuration before calling ``Solve``.
+#. Added ``Problem::GetCostFunctionForResidualBlock`` and
+   ``Problem::GetLossFunctionForResidualBlock``.
 
 Backward Incompatible API Changes
 ---------------------------------
-
 #. ``NumericDiffFunctor`` has been removed. It's API was broken, and
    the implementation was an unnecessary layer of abstraction over
    ``CostFunctionToFunctor``.
-
+#. ``POLAK_RIBIRERE`` conjugate gradients direction type has been
+   renamed to ``POLAK_RIBIERE``.
 #. ``Solver::Options::solver_log`` has been removed. If needed this
-    iteration callback can easily be implemented in user code.
-
+   iteration callback can easily be implemented in user code.
 #. The ``SPARSE_CHOLESKY`` algorithm for covariance estimation has
-    been removed. It is not rank revealing and numerically poorly
-    behaved. Sparse QR factorization is a much better way to do this.
-
+   been removed. It is not rank revealing and numerically poorly
+   behaved. Sparse QR factorization is a much better way to do this.
 #. The ``SPARSE_QR`` algorithm for covariance estimation has been
-    renamed to ``SUITE_SPARSE_QR`` to be consistent with
-    ``EIGEN_SPARSE_QR``.
-
-# ``Solver::Summary::preconditioner_type`` has been replaced with
+   renamed to ``SUITE_SPARSE_QR`` to be consistent with
+   ``EIGEN_SPARSE_QR``.
+#. ``Solver::Summary::preconditioner_type`` has been replaced with
    ``Solver::Summary::preconditioner_type_given`` and
    ``Solver::Summary::preconditioner_type_used`` to be more consistent
    with how information about the linear solver is communicated.
+#. ``CERES_VERSION`` and ``CERES_ABI_VERSION`` macros were not
+   terribly useful. They have been replaced with
+   ``CERES_VERSION_MAJOR``, ``CERES_VERSION_MINOR`` ,
+   ``CERES_VERSION_REVISION`` and ``CERES_VERSION_ABI`` macros. In
+   particular the functionality of ``CERES_VERSION`` is provided by
+   ``CERES_VERSION_STRING`` macro.
+
+Bug Fixes
+---------
+#. Fix a formatting error TrustRegionMinimizer logging.
+#. Add an explicit include for local_parameterization.h (cooordz)
+#. Fix a number of typos in the documentation (Martin Baeuml)
+#. Made the logging in TrustRegionMinimizer consistent with
+   LineSearchMinimizer.
+#. Fix some obsolete documentation in CostFunction::Evaluate.
+#. Fix CG solver options for ITERATIVE_SCHUR, which did not copy
+   min_num_iterations (Johannes Schönberger)
+#. Remove obsolete include of numeric_diff_functor.h. (Martin Baeuml)
+#. Fix max. linear solver iterations in ConjugateGradientsSolver
+   (Johannes Schönberger)
+#. Expand check for lack of a sparse linear algebra library. (Michael
+   Samples and Domink Reitzle)
+#. Fix Eigen Row/ColMajor bug in NumericDiffCostFunction. (Dominik
+   Reitzle)
+#. Fix crash in Covariance if # threads > 1 requested without OpenMP.
+#. Fixed Malformed regex. (Björn Piltz)
+#. Fixed MSVC error C2124: divide or mod by zero. (Björn Piltz)
+#. Add missing #include of <limits> for loss functions.
+#. Make canned loss functions more robust.
+#. Fix type of suppressed compiler warning for Eigen 3.2.0.
+#. Suppress unused variable warning from Eigen 3.2.0.
+#. Add "make install" to the install instructions.
+#. Correct formula in documentation of
+   Solver::Options::function_tolerance. (Alessandro Gentilini)
+#. Add release flags to iOS toolchain.
+#. Fix a broken hyperlink in the documentation. (Henrique Mendonca)
+#. Add fixes for multiple definitions of ERROR on Windows to docs.
+#. Compile miniglog into Ceres if enabled on all platforms.
+#. Add two missing files to Android.mk (Greg Coombe)
+#. Fix Cmake error when using miniglog. (Greg Coombe)
+#. Don't build miniglog unconditionally as a static library (Björn Piltz)
+#. Added a missing include. (Björn Piltz)
+#. Conditionally disable SparseNormalCholesky.
+#. Fix a memory leak in program_test.cc.
+
 
 1.9.0
 =====
@@ -810,4 +888,16 @@
 1.0.0
 =====
 
-Initial Release. Nathan Wiegand contributed to the Mac OSX port.
+Initial open source release. Nathan Wiegand contributed to the Mac OSX
+port.
+
+
+Origins
+=======
+
+Ceres Solver grew out of the need for general least squares solving at
+Google. In early 2010, Sameer Agarwal and Fredrik Schaffalitzky
+started the development of Ceres Solver. Fredrik left Google shortly
+thereafter and Keir Mierle stepped in to take his place. After two
+years of on-and-off development, Ceres Solver was released as open
+source in May of 2012.
diff --git a/include/ceres/gradient_problem_solver.h b/include/ceres/gradient_problem_solver.h
index 484d88e..db706f7 100644
--- a/include/ceres/gradient_problem_solver.h
+++ b/include/ceres/gradient_problem_solver.h
@@ -72,7 +72,6 @@
       max_line_search_step_expansion = 10.0;
       max_num_iterations = 50;
       max_solver_time_in_seconds = 1e9;
-      num_threads = 1;
       function_tolerance = 1e-6;
       gradient_tolerance = 1e-10;
       logging_type = PER_MINIMIZER_ITERATION;
@@ -224,10 +223,6 @@
     // Maximum time for which the minimizer should run for.
     double max_solver_time_in_seconds;
 
-    // Number of threads used by Ceres for evaluating the cost and
-    // jacobians.
-    int num_threads;
-
     // Minimizer terminates when
     //
     //   (new_cost - old_cost) < function_tolerance * old_cost;
@@ -251,13 +246,6 @@
     // is sent to STDOUT.
     bool minimizer_progress_to_stdout;
 
-    // If true, the user's parameter blocks are updated at the end of
-    // every Minimizer iteration, otherwise they are updated when the
-    // Minimizer terminates. This is useful if, for example, the user
-    // wishes to visualize the state of the optimization every
-    // iteration.
-    bool update_state_every_iteration;
-
     // Callbacks that are executed at the end of each iteration of the
     // Minimizer. An iteration may terminate midway, either due to
     // numerical failures or because one of the convergence tests has
@@ -309,10 +297,10 @@
     // Sum total of all time spent inside Ceres when Solve is called.
     double total_time_in_seconds;
 
-    // Time (in seconds) spent evaluating the residual vector.
+    // Time (in seconds) spent evaluating the cost.
     double cost_evaluation_time_in_seconds;
 
-    // Time (in seconds) spent evaluating the jacobian matrix.
+    // Time (in seconds) spent evaluating the gradient.
     double gradient_evaluation_time_in_seconds;
 
     // Number of parameters in the probem.
diff --git a/internal/ceres/coordinate_descent_minimizer.cc b/internal/ceres/coordinate_descent_minimizer.cc
index 535d6e1..0d8adee 100644
--- a/internal/ceres/coordinate_descent_minimizer.cc
+++ b/internal/ceres/coordinate_descent_minimizer.cc
@@ -149,7 +149,8 @@
     }
 
 #ifdef CERES_USE_OPENMP
-    const int num_inner_iteration_threads = min(options.num_threads, num_problems);
+    const int num_inner_iteration_threads =
+        min(options.num_threads, num_problems);
     evaluator_options_.num_threads =
         max(1, options.num_threads / num_inner_iteration_threads);
 
diff --git a/scripts/ceres-solver.spec b/scripts/ceres-solver.spec
index 1e5c7c0..96ee2bf 100644
--- a/scripts/ceres-solver.spec
+++ b/scripts/ceres-solver.spec
@@ -1,9 +1,9 @@
 Name:           ceres-solver
-Version:        1.9.0
+Version:        1.10.0
 # Release candidate versions are messy. Give them a release of
 # e.g. "0.1.0%{?dist}" for RC1 (and remember to adjust the Source0
 # URL). Non-RC releases go back to incrementing integers starting at 1.
-Release:        0.2.0%{?dist}
+Release:        0.1.0%{?dist}
 Summary:        A non-linear least squares minimizer
 
 Group:          Development/Libraries
@@ -111,6 +111,9 @@
 
 
 %changelog
+* Mon Oct 6 2014 Sameer Agarwal <sameeragarwal@google.com> - 1.10.0-0.1.0
+- Bump version
+
 * Mon May 27 2014 Sameer Agarwal <sameeragarwal@google.com> - 1.9.0-0.2.0
 - Bump version
 
