Blame - internal/ceres/parallel_for.h - ceres-solver

blob: 11db1fbc4886c5146bf2c45dea71039a6c7556cd [file] [log] [blame]

Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
Sameer Agarwal	5a30cae	2023-09-19 15:29:34 -0700	[diff] [blame]	2	// Copyright 2023 Google Inc. All rights reserved.
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	29	// Authors: vitus@google.com (Michael Vitus),
				30	// dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	31
Sameer Agarwal	4705159	2022-03-12 15:22:19 -0800	[diff] [blame]	32	#ifndef CERES_INTERNAL_PARALLEL_FOR_H_
				33	#define CERES_INTERNAL_PARALLEL_FOR_H_
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	34
Sameer Agarwal	9a28947	2022-09-20 09:50:10 -0700	[diff] [blame]	35	#include <mutex>
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	36	#include <vector>
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	37
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	38	#include "ceres/context_impl.h"
Dmitriy Korchemkin	b158515	2022-11-27 21:35:44 +0300	[diff] [blame]	39	#include "ceres/internal/eigen.h"
Sergiu Deitsch	f90833f	2022-02-07 23:43:19 +0100	[diff] [blame]	40	#include "ceres/internal/export.h"
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	41	#include "ceres/parallel_invoke.h"
				42	#include "ceres/partition_range_for_parallel_for.h"
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	43	#include "glog/logging.h"
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	44
Sameer Agarwal	caf614a	2022-04-21 17:41:10 -0700	[diff] [blame]	45	namespace ceres::internal {
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	46
Sameer Agarwal	9a28947	2022-09-20 09:50:10 -0700	[diff] [blame]	47	// Use a dummy mutex if num_threads = 1.
				48	inline decltype(auto) MakeConditionalLock(const int num_threads,
				49	std::mutex& m) {
				50	return (num_threads == 1) ? std::unique_lock<std::mutex>{}
				51	: std::unique_lock<std::mutex>{m};
				52	}
				53
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	54	// Execute the function for every element in the range [start, end) with at most
				55	// num_threads. It will execute all the work on the calling thread if
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	56	// num_threads or (end - start) is equal to 1.
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	57	// Depending on function signature, it will be supplied with either loop index
				58	// or a range of loop indicies; function can also be supplied with thread_id.
				59	// The following function signatures are supported:
				60	// - Functions accepting a single loop index:
				61	// - [](int index) { ... }
				62	// - [](int thread_id, int index) { ... }
				63	// - Functions accepting a range of loop index:
				64	// - [](std::tuple<int, int> index) { ... }
				65	// - [](int thread_id, std::tuple<int, int> index) { ... }
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	66	//
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	67	// When distributing workload between threads, it is assumed that each loop
				68	// iteration takes approximately equal time to complete.
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	69	template <typename F>
Dmitriy Korchemkin	dc7a859	2023-10-06 15:55:21 +0000	[diff] [blame]	70	void ParallelFor(ContextImpl* context,
				71	int start,
				72	int end,
				73	int num_threads,
				74	F&& function,
				75	int min_block_size = 1) {
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	76	CHECK_GT(num_threads, 0);
				77	if (start >= end) {
				78	return;
				79	}
				80
Dmitriy Korchemkin	dc7a859	2023-10-06 15:55:21 +0000	[diff] [blame]	81	if (num_threads == 1 \|\| end - start < min_block_size * 2) {
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	82	InvokeOnSegment(0, std::make_tuple(start, end), std::forward<F>(function));
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	83	return;
				84	}
				85
				86	CHECK(context != nullptr);
Dmitriy Korchemkin	dc7a859	2023-10-06 15:55:21 +0000	[diff] [blame]	87	ParallelInvoke(context,
				88	start,
				89	end,
				90	num_threads,
				91	std::forward<F>(function),
				92	min_block_size);
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	93	}
				94
				95	// Execute function for every element in the range [start, end) with at most
				96	// num_threads, using user-provided partitions array.
				97	// When distributing workload between threads, it is assumed that each segment
				98	// bounded by adjacent elements of partitions array takes approximately equal
				99	// time to process.
				100	template <typename F>
				101	void ParallelFor(ContextImpl* context,
				102	int start,
				103	int end,
				104	int num_threads,
				105	F&& function,
				106	const std::vector<int>& partitions) {
				107	CHECK_GT(num_threads, 0);
				108	if (start >= end) {
				109	return;
				110	}
				111	CHECK_EQ(partitions.front(), start);
				112	CHECK_EQ(partitions.back(), end);
				113	if (num_threads == 1 \|\| end - start <= num_threads) {
				114	ParallelFor(context, start, end, num_threads, std::forward<F>(function));
				115	return;
				116	}
				117	CHECK_GT(partitions.size(), 1);
				118	const int num_partitions = partitions.size() - 1;
				119	ParallelFor(context,
				120	0,
				121	num_partitions,
				122	num_threads,
				123	[&function, &partitions](int thread_id,
				124	std::tuple<int, int> partition_ids) {
				125	// partition_ids is a range of partition indices
				126	const auto [partition_start, partition_end] = partition_ids;
				127	// Execution over several adjacent segments is equivalent
				128	// to execution over union of those segments (which is also a
				129	// contiguous segment)
				130	const int range_start = partitions[partition_start];
				131	const int range_end = partitions[partition_end];
				132	// Range of original loop indices
				133	const auto range = std::make_tuple(range_start, range_end);
				134	InvokeOnSegment(thread_id, range, function);
				135	});
Dmitriy Korchemkin	c0c4f93	2022-08-18 22:10:17 +0300	[diff] [blame]	136	}
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	137
				138	// Execute function for every element in the range [start, end) with at most
				139	// num_threads, taking into account user-provided integer cumulative costs of
				140	// iterations. Cumulative costs of iteration for indices in range [0, end) are
				141	// stored in objects from cumulative_cost_data. User-provided
				142	// cumulative_cost_fun returns non-decreasing integer values corresponding to
				143	// inclusive cumulative cost of loop iterations, provided with a reference to
				144	// user-defined object. Only indices from [start, end) will be referenced. This
				145	// routine assumes that cumulative_cost_fun is non-decreasing (in other words,
				146	// all costs are non-negative);
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	147	// When distributing workload between threads, input range of loop indices will
				148	// be partitioned into disjoint contiguous intervals, with the maximal cost
				149	// being minimized.
				150	// For example, with iteration costs of [1, 1, 5, 3, 1, 4] cumulative_cost_fun
				151	// should return [1, 2, 7, 10, 11, 15], and with num_threads = 4 this range
				152	// will be split into segments [0, 2) [2, 3) [3, 5) [5, 6) with costs
				153	// [2, 5, 4, 4].
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	154	template <typename F, typename CumulativeCostData, typename CumulativeCostFun>
				155	void ParallelFor(ContextImpl* context,
				156	int start,
				157	int end,
				158	int num_threads,
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	159	F&& function,
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	160	const CumulativeCostData* cumulative_cost_data,
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	161	CumulativeCostFun&& cumulative_cost_fun) {
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	162	CHECK_GT(num_threads, 0);
				163	if (start >= end) {
				164	return;
				165	}
				166	if (num_threads == 1 \|\| end - start <= num_threads) {
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	167	ParallelFor(context, start, end, num_threads, std::forward<F>(function));
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	168	return;
				169	}
				170	// Creating several partitions allows us to tolerate imperfections of
				171	// partitioning and user-supplied iteration costs up to a certain extent
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	172	constexpr int kNumPartitionsPerThread = 4;
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	173	const int kMaxPartitions = num_threads * kNumPartitionsPerThread;
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	174	const auto& partitions = PartitionRangeForParallelFor(
				175	start,
				176	end,
				177	kMaxPartitions,
				178	cumulative_cost_data,
				179	std::forward<CumulativeCostFun>(cumulative_cost_fun));
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	180	CHECK_GT(partitions.size(), 1);
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	181	ParallelFor(
				182	context, start, end, num_threads, std::forward<F>(function), partitions);
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	183	}
Sameer Agarwal	caf614a	2022-04-21 17:41:10 -0700	[diff] [blame]	184	} // namespace ceres::internal
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	185
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	186	#endif // CERES_INTERNAL_PARALLEL_FOR_H_