blob: 58a27d4cf1eca4e6dc47be0764b6f62d06154d13 [file] [log] [blame]
Mike Vitusdc5ea0e2018-01-24 15:53:19 -08001// Ceres Solver - A fast non-linear least squares minimizer
Sameer Agarwal5a30cae2023-09-19 15:29:34 -07002// Copyright 2023 Google Inc. All rights reserved.
Mike Vitusdc5ea0e2018-01-24 15:53:19 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080031#include "ceres/parallel_for.h"
32
Sameer Agarwal8c811492018-02-28 19:41:47 -080033#include <cmath>
Mike Vitusf0c3b232018-02-28 13:08:48 -080034#include <condition_variable>
35#include <mutex>
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +030036#include <numeric>
37#include <random>
Mike Vitusf0c3b232018-02-28 13:08:48 -080038#include <thread>
Sameer Agarwal8e5d83f2022-12-17 17:29:42 -080039#include <tuple>
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080040#include <vector>
41
Mike Vitusf408f892018-02-22 10:28:39 -080042#include "ceres/context_impl.h"
Sameer Agarwal06bfe6f2022-11-26 16:02:28 -080043#include "ceres/internal/config.h"
Dmitriy Korchemkin54ad3dd2022-12-19 18:24:54 +030044#include "ceres/parallel_vector_ops.h"
Mike Vitusf0c3b232018-02-28 13:08:48 -080045#include "glog/logging.h"
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080046#include "gmock/gmock.h"
47#include "gtest/gtest.h"
48
Sameer Agarwalcaf614a2022-04-21 17:41:10 -070049namespace ceres::internal {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080050
51using testing::ElementsAreArray;
Mike Vitusf0c3b232018-02-28 13:08:48 -080052using testing::UnorderedElementsAreArray;
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080053
54// Tests the parallel for loop computes the correct result for various number of
55// threads.
56TEST(ParallelFor, NumThreads) {
Mike Vitusf408f892018-02-22 10:28:39 -080057 ContextImpl context;
58 context.EnsureMinimumThreads(/*num_threads=*/2);
59
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080060 const int size = 16;
61 std::vector<int> expected_results(size, 0);
62 for (int i = 0; i < size; ++i) {
63 expected_results[i] = std::sqrt(i);
64 }
65
66 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
67 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020068 ParallelFor(&context, 0, size, num_threads, [&values](int i) {
69 values[i] = std::sqrt(i);
70 });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080071 EXPECT_THAT(values, ElementsAreArray(expected_results));
72 }
73}
74
Dmitriy Korchemkinb1585152022-11-27 21:35:44 +030075// Tests parallel for loop with ranges
76TEST(ParallelForWithRange, NumThreads) {
77 ContextImpl context;
78 context.EnsureMinimumThreads(/*num_threads=*/2);
79
80 const int size = 16;
81 std::vector<int> expected_results(size, 0);
82 for (int i = 0; i < size; ++i) {
83 expected_results[i] = std::sqrt(i);
84 }
85
86 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
87 std::vector<int> values(size, 0);
88 ParallelFor(
89 &context, 0, size, num_threads, [&values](std::tuple<int, int> range) {
90 auto [start, end] = range;
91 for (int i = start; i < end; ++i) values[i] = std::sqrt(i);
92 });
93 EXPECT_THAT(values, ElementsAreArray(expected_results));
94 }
95}
96
Dmitriy Korchemkindc7a8592023-10-06 15:55:21 +000097// Tests parallel for loop with ranges and lower bound on minimal range size
98TEST(ParallelForWithRange, MinimalSize) {
99 ContextImpl context;
100 constexpr int kNumThreads = 4;
101 constexpr int kMinBlockSize = 5;
102 context.EnsureMinimumThreads(kNumThreads);
103
104 for (int size = kMinBlockSize; size <= 25; ++size) {
105 std::atomic<bool> failed(false);
106 ParallelFor(
107 &context,
108 0,
109 size,
110 kNumThreads,
111 [&failed, kMinBlockSize](std::tuple<int, int> range) {
112 auto [start, end] = range;
113 if (end - start < kMinBlockSize) failed = true;
114 },
115 kMinBlockSize);
116 EXPECT_EQ(failed, false);
117 }
118}
119
Mike Vitusf0c3b232018-02-28 13:08:48 -0800120// Tests the parallel for loop with the thread ID interface computes the correct
121// result for various number of threads.
122TEST(ParallelForWithThreadId, NumThreads) {
123 ContextImpl context;
124 context.EnsureMinimumThreads(/*num_threads=*/2);
125
126 const int size = 16;
127 std::vector<int> expected_results(size, 0);
128 for (int i = 0; i < size; ++i) {
129 expected_results[i] = std::sqrt(i);
130 }
131
132 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
133 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200134 ParallelFor(
135 &context, 0, size, num_threads, [&values](int thread_id, int i) {
136 values[i] = std::sqrt(i);
137 });
Mike Vitusf0c3b232018-02-28 13:08:48 -0800138 EXPECT_THAT(values, ElementsAreArray(expected_results));
139 }
140}
141
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800142// Tests nested for loops do not result in a deadlock.
143TEST(ParallelFor, NestedParallelForDeadlock) {
Mike Vitusf408f892018-02-22 10:28:39 -0800144 ContextImpl context;
145 context.EnsureMinimumThreads(/*num_threads=*/2);
146
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800147 // Increment each element in the 2D matrix.
148 std::vector<std::vector<int>> x(3, {1, 2, 3});
Mike Vitusf408f892018-02-22 10:28:39 -0800149 ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800150 std::vector<int>& y = x.at(i);
Mike Vitusf408f892018-02-22 10:28:39 -0800151 ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800152 });
153
154 const std::vector<int> results = {2, 3, 4};
155 for (const std::vector<int>& value : x) {
156 EXPECT_THAT(value, ElementsAreArray(results));
157 }
158}
159
Mike Vitusf0c3b232018-02-28 13:08:48 -0800160// Tests nested for loops do not result in a deadlock for the parallel for with
161// thread ID interface.
162TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
163 ContextImpl context;
164 context.EnsureMinimumThreads(/*num_threads=*/2);
165
166 // Increment each element in the 2D matrix.
167 std::vector<std::vector<int>> x(3, {1, 2, 3});
168 ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
169 std::vector<int>& y = x.at(i);
170 ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
171 });
172
173 const std::vector<int> results = {2, 3, 4};
174 for (const std::vector<int>& value : x) {
175 EXPECT_THAT(value, ElementsAreArray(results));
176 }
177}
178
179TEST(ParallelForWithThreadId, UniqueThreadIds) {
180 // Ensure the hardware supports more than 1 thread to ensure the test will
181 // pass.
182 const int num_hardware_threads = std::thread::hardware_concurrency();
183 if (num_hardware_threads <= 1) {
184 LOG(ERROR)
185 << "Test not supported, the hardware does not support threading.";
186 return;
187 }
188
189 ContextImpl context;
190 context.EnsureMinimumThreads(/*num_threads=*/2);
191 // Increment each element in the 2D matrix.
192 std::vector<int> x(2, -1);
193 std::mutex mutex;
194 std::condition_variable condition;
195 int count = 0;
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200196 ParallelFor(&context,
197 0,
198 2,
199 2,
Mike Vitusf0c3b232018-02-28 13:08:48 -0800200 [&x, &mutex, &condition, &count](int thread_id, int i) {
201 std::unique_lock<std::mutex> lock(mutex);
202 x[i] = thread_id;
203 ++count;
204 condition.notify_all();
205 condition.wait(lock, [&]() { return count == 2; });
206 });
207
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200208 EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Mike Vitusf0c3b232018-02-28 13:08:48 -0800209}
210
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300211// Helper function for partition tests
212bool BruteForcePartition(
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800213 int* costs, int start, int end, int max_partitions, int max_cost);
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300214// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
215// simple test-cases
216TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300217 std::vector<int> costs, cumulative_costs, partition;
218 costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
219 cumulative_costs.resize(costs.size());
220 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
221 const auto dummy_getter = [](const int v) { return v; };
222
223 // [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
224 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
225 costs.size(),
226 3,
227 7,
228 0,
229 cumulative_costs.data(),
230 dummy_getter,
231 &partition));
232 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
233 // [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
234 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
235 costs.size(),
236 3,
237 18,
238 0,
239 cumulative_costs.data(),
240 dummy_getter,
241 &partition));
242 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
243 // Impossible since there is item of cost 7
244 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
245 costs.size(),
246 3,
247 6,
248 0,
249 cumulative_costs.data(),
250 dummy_getter,
251 &partition));
252 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
253 // Impossible
254 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
255 costs.size(),
256 2,
257 10,
258 0,
259 cumulative_costs.data(),
260 dummy_getter,
261 &partition));
262 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
263}
264
265// Randomized tests for MaxPartitionCostIsFeasible
266TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300267 std::vector<int> costs, cumulative_costs, partition;
268 const auto dummy_getter = [](const int v) { return v; };
269
270 // Random tests
271 const int kNumTests = 1000;
272 const int kMaxElements = 32;
273 const int kMaxPartitions = 16;
274 const int kMaxElCost = 8;
275 std::mt19937 rng;
276 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
277 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
278 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
279 for (int t = 0; t < kNumTests; ++t) {
280 const int N = rng_N(rng);
281 const int M = rng_M(rng);
282 int total = 0;
283 costs.clear();
284 for (int i = 0; i < N; ++i) {
285 costs.push_back(rng_e(rng));
286 total += costs.back();
287 }
288
289 cumulative_costs.resize(N);
290 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
291
292 std::uniform_int_distribution<int> rng_seg(0, N - 1);
293 int start = rng_seg(rng);
294 int end = rng_seg(rng);
295 if (start > end) std::swap(start, end);
296 ++end;
297
298 int first_admissible = 0;
299 for (int threshold = 1; threshold <= total; ++threshold) {
300 const bool bruteforce =
301 BruteForcePartition(costs.data(), start, end, M, threshold);
302 if (bruteforce && !first_admissible) {
303 first_admissible = threshold;
304 }
305 const bool binary_search =
306 MaxPartitionCostIsFeasible(start,
307 end,
308 M,
309 threshold,
310 start ? cumulative_costs[start - 1] : 0,
311 cumulative_costs.data(),
312 dummy_getter,
313 &partition);
314 EXPECT_EQ(bruteforce, binary_search);
315 EXPECT_LE(partition.size(), M + 1);
316 // check partition itself
317 if (binary_search) {
318 ASSERT_GT(partition.size(), 1);
319 EXPECT_EQ(partition.front(), start);
320 EXPECT_EQ(partition.back(), end);
321
322 const int num_partitions = partition.size() - 1;
323 EXPECT_LE(num_partitions, M);
324 for (int j = 0; j < num_partitions; ++j) {
325 int total = 0;
326 for (int k = partition[j]; k < partition[j + 1]; ++k) {
327 EXPECT_LT(k, end);
328 EXPECT_GE(k, start);
329 total += costs[k];
330 }
331 EXPECT_LE(total, threshold);
332 }
333 }
334 }
335 }
336}
337
Dmitriy Korchemkin54ad3dd2022-12-19 18:24:54 +0300338TEST(GuidedParallelFor, PartitionRangeForParallelFor) {
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300339 std::vector<int> costs, cumulative_costs, partition;
340 const auto dummy_getter = [](const int v) { return v; };
341
342 // Random tests
343 const int kNumTests = 1000;
344 const int kMaxElements = 32;
345 const int kMaxPartitions = 16;
346 const int kMaxElCost = 8;
347 std::mt19937 rng;
348 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
349 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
350 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
351 for (int t = 0; t < kNumTests; ++t) {
352 const int N = rng_N(rng);
353 const int M = rng_M(rng);
354 int total = 0;
355 costs.clear();
356 for (int i = 0; i < N; ++i) {
357 costs.push_back(rng_e(rng));
358 total += costs.back();
359 }
360
361 cumulative_costs.resize(N);
362 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
363
364 std::uniform_int_distribution<int> rng_seg(0, N - 1);
365 int start = rng_seg(rng);
366 int end = rng_seg(rng);
367 if (start > end) std::swap(start, end);
368 ++end;
369
370 int first_admissible = 0;
371 for (int threshold = 1; threshold <= total; ++threshold) {
372 const bool bruteforce =
373 BruteForcePartition(costs.data(), start, end, M, threshold);
374 if (bruteforce) {
375 first_admissible = threshold;
376 break;
377 }
378 }
379 EXPECT_TRUE(first_admissible != 0 || total == 0);
Dmitriy Korchemkin54ad3dd2022-12-19 18:24:54 +0300380 partition = PartitionRangeForParallelFor(
381 start, end, M, cumulative_costs.data(), dummy_getter);
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300382 ASSERT_GT(partition.size(), 1);
383 EXPECT_EQ(partition.front(), start);
384 EXPECT_EQ(partition.back(), end);
385
386 const int num_partitions = partition.size() - 1;
387 EXPECT_LE(num_partitions, M);
388 for (int j = 0; j < num_partitions; ++j) {
389 int total = 0;
390 for (int k = partition[j]; k < partition[j + 1]; ++k) {
391 EXPECT_LT(k, end);
392 EXPECT_GE(k, start);
393 total += costs[k];
394 }
395 EXPECT_LE(total, first_admissible);
396 }
397 }
398}
399
400// Recursively try to partition range into segements of total cost
401// less than max_cost
402bool BruteForcePartition(
403 int* costs, int start, int end, int max_partitions, int max_cost) {
404 if (start == end) return true;
405 if (start < end && max_partitions == 0) return false;
406 int total_cost = 0;
407 for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
408 total_cost += costs[last_curr - 1];
409 if (total_cost > max_cost) break;
410 if (BruteForcePartition(
411 costs, last_curr, end, max_partitions - 1, max_cost))
412 return true;
413 }
414 return false;
415}
416
417// Tests if guided parallel for loop computes the correct result for various
418// number of threads.
419TEST(GuidedParallelFor, NumThreads) {
420 ContextImpl context;
421 context.EnsureMinimumThreads(/*num_threads=*/2);
422
423 const int size = 16;
424 std::vector<int> expected_results(size, 0);
425 for (int i = 0; i < size; ++i) {
426 expected_results[i] = std::sqrt(i);
427 }
428
429 std::vector<int> costs, cumulative_costs;
430 for (int i = 1; i <= size; ++i) {
431 int cost = i * i;
432 costs.push_back(cost);
433 if (i == 1) {
434 cumulative_costs.push_back(cost);
435 } else {
436 cumulative_costs.push_back(cost + cumulative_costs.back());
437 }
438 }
439
440 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
441 std::vector<int> values(size, 0);
442 ParallelFor(
443 &context,
444 0,
445 size,
446 num_threads,
447 [&values](int i) { values[i] = std::sqrt(i); },
448 cumulative_costs.data(),
449 [](const int v) { return v; });
450 EXPECT_THAT(values, ElementsAreArray(expected_results));
451 }
452}
453
Dmitriy Korchemkinb1585152022-11-27 21:35:44 +0300454TEST(ParallelAssign, D2MulX) {
455 const int kVectorSize = 1024 * 1024;
456 const int kMaxNumThreads = 8;
Dmitriy Korchemkin8bf4a2f2023-01-31 17:13:19 +0300457 const double kEpsilon = 1e-16;
Dmitriy Korchemkinb1585152022-11-27 21:35:44 +0300458
459 const Vector D_full = Vector::Random(kVectorSize * 2);
460 const ConstVectorRef D(D_full.data() + kVectorSize, kVectorSize);
461 const Vector x = Vector::Random(kVectorSize);
462 const Vector y_expected = D.array().square() * x.array();
463 ContextImpl context;
464 context.EnsureMinimumThreads(kMaxNumThreads);
465
466 for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
467 Vector y_observed(kVectorSize);
468 ParallelAssign(
469 &context, num_threads, y_observed, D.array().square() * x.array());
470
Dmitriy Korchemkin8bf4a2f2023-01-31 17:13:19 +0300471 // We might get non-bit-exact result due to different precision in scalar
472 // and vector code. For example, in x86 mode mingw might emit x87
473 // instructions for scalar code, thus making bit-exact check fail
474 EXPECT_NEAR((y_expected - y_observed).squaredNorm(),
475 0.,
476 kEpsilon * y_expected.squaredNorm());
Dmitriy Korchemkinb1585152022-11-27 21:35:44 +0300477 }
478}
479
480TEST(ParallelAssign, SetZero) {
481 const int kVectorSize = 1024 * 1024;
482 const int kMaxNumThreads = 8;
483
484 ContextImpl context;
485 context.EnsureMinimumThreads(kMaxNumThreads);
486
487 for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
488 Vector x = Vector::Random(kVectorSize);
489 ParallelSetZero(&context, num_threads, x);
490
491 CHECK_EQ(x.squaredNorm(), 0.);
492 }
493}
494
Sameer Agarwalcaf614a2022-04-21 17:41:10 -0700495} // namespace ceres::internal