blob: 54ca636832c19a23c83103ceddbc7e7017155b65 [file] [log] [blame]
Mike Vitusdc5ea0e2018-01-24 15:53:19 -08001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2018 Google Inc. All rights reserved.
3// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
31// This include must come before any #ifndef check on Ceres compile options.
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020032// clang-format off
Sergiu Deitschf90833f2022-02-07 23:43:19 +010033#include "ceres/internal/config.h"
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020034// clang-format on
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080035
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080036#include "ceres/parallel_for.h"
37
Sameer Agarwal8c811492018-02-28 19:41:47 -080038#include <cmath>
Mike Vitusf0c3b232018-02-28 13:08:48 -080039#include <condition_variable>
40#include <mutex>
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +030041#include <numeric>
42#include <random>
Mike Vitusf0c3b232018-02-28 13:08:48 -080043#include <thread>
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080044#include <vector>
45
Mike Vitusf408f892018-02-22 10:28:39 -080046#include "ceres/context_impl.h"
Mike Vitusf0c3b232018-02-28 13:08:48 -080047#include "glog/logging.h"
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080048#include "gmock/gmock.h"
49#include "gtest/gtest.h"
50
Sameer Agarwalcaf614a2022-04-21 17:41:10 -070051namespace ceres::internal {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080052
53using testing::ElementsAreArray;
Mike Vitusf0c3b232018-02-28 13:08:48 -080054using testing::UnorderedElementsAreArray;
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080055
56// Tests the parallel for loop computes the correct result for various number of
57// threads.
58TEST(ParallelFor, NumThreads) {
Mike Vitusf408f892018-02-22 10:28:39 -080059 ContextImpl context;
60 context.EnsureMinimumThreads(/*num_threads=*/2);
61
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080062 const int size = 16;
63 std::vector<int> expected_results(size, 0);
64 for (int i = 0; i < size; ++i) {
65 expected_results[i] = std::sqrt(i);
66 }
67
68 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
69 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020070 ParallelFor(&context, 0, size, num_threads, [&values](int i) {
71 values[i] = std::sqrt(i);
72 });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080073 EXPECT_THAT(values, ElementsAreArray(expected_results));
74 }
75}
76
Mike Vitusf0c3b232018-02-28 13:08:48 -080077// Tests the parallel for loop with the thread ID interface computes the correct
78// result for various number of threads.
79TEST(ParallelForWithThreadId, NumThreads) {
80 ContextImpl context;
81 context.EnsureMinimumThreads(/*num_threads=*/2);
82
83 const int size = 16;
84 std::vector<int> expected_results(size, 0);
85 for (int i = 0; i < size; ++i) {
86 expected_results[i] = std::sqrt(i);
87 }
88
89 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
90 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020091 ParallelFor(
92 &context, 0, size, num_threads, [&values](int thread_id, int i) {
93 values[i] = std::sqrt(i);
94 });
Mike Vitusf0c3b232018-02-28 13:08:48 -080095 EXPECT_THAT(values, ElementsAreArray(expected_results));
96 }
97}
98
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080099// Tests nested for loops do not result in a deadlock.
100TEST(ParallelFor, NestedParallelForDeadlock) {
Mike Vitusf408f892018-02-22 10:28:39 -0800101 ContextImpl context;
102 context.EnsureMinimumThreads(/*num_threads=*/2);
103
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800104 // Increment each element in the 2D matrix.
105 std::vector<std::vector<int>> x(3, {1, 2, 3});
Mike Vitusf408f892018-02-22 10:28:39 -0800106 ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800107 std::vector<int>& y = x.at(i);
Mike Vitusf408f892018-02-22 10:28:39 -0800108 ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800109 });
110
111 const std::vector<int> results = {2, 3, 4};
112 for (const std::vector<int>& value : x) {
113 EXPECT_THAT(value, ElementsAreArray(results));
114 }
115}
116
Mike Vitusf0c3b232018-02-28 13:08:48 -0800117// Tests nested for loops do not result in a deadlock for the parallel for with
118// thread ID interface.
119TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
120 ContextImpl context;
121 context.EnsureMinimumThreads(/*num_threads=*/2);
122
123 // Increment each element in the 2D matrix.
124 std::vector<std::vector<int>> x(3, {1, 2, 3});
125 ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
126 std::vector<int>& y = x.at(i);
127 ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
128 });
129
130 const std::vector<int> results = {2, 3, 4};
131 for (const std::vector<int>& value : x) {
132 EXPECT_THAT(value, ElementsAreArray(results));
133 }
134}
135
Mike Vitus5d8b4942018-04-09 10:10:03 -0700136// This test is only valid when multithreading support is enabled.
137#ifndef CERES_NO_THREADS
Mike Vitusf0c3b232018-02-28 13:08:48 -0800138TEST(ParallelForWithThreadId, UniqueThreadIds) {
139 // Ensure the hardware supports more than 1 thread to ensure the test will
140 // pass.
141 const int num_hardware_threads = std::thread::hardware_concurrency();
142 if (num_hardware_threads <= 1) {
143 LOG(ERROR)
144 << "Test not supported, the hardware does not support threading.";
145 return;
146 }
147
148 ContextImpl context;
149 context.EnsureMinimumThreads(/*num_threads=*/2);
150 // Increment each element in the 2D matrix.
151 std::vector<int> x(2, -1);
152 std::mutex mutex;
153 std::condition_variable condition;
154 int count = 0;
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200155 ParallelFor(&context,
156 0,
157 2,
158 2,
Mike Vitusf0c3b232018-02-28 13:08:48 -0800159 [&x, &mutex, &condition, &count](int thread_id, int i) {
160 std::unique_lock<std::mutex> lock(mutex);
161 x[i] = thread_id;
162 ++count;
163 condition.notify_all();
164 condition.wait(lock, [&]() { return count == 2; });
165 });
166
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200167 EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Mike Vitusf0c3b232018-02-28 13:08:48 -0800168}
Mike Vitus5d8b4942018-04-09 10:10:03 -0700169#endif // CERES_NO_THREADS
Mike Vitusf0c3b232018-02-28 13:08:48 -0800170
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300171// Helper function for partition tests
172bool BruteForcePartition(
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800173 int* costs, int start, int end, int max_partitions, int max_cost);
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300174// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
175// simple test-cases
176TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800177 using parallel_for_details::MaxPartitionCostIsFeasible;
178
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300179 std::vector<int> costs, cumulative_costs, partition;
180 costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
181 cumulative_costs.resize(costs.size());
182 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
183 const auto dummy_getter = [](const int v) { return v; };
184
185 // [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
186 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
187 costs.size(),
188 3,
189 7,
190 0,
191 cumulative_costs.data(),
192 dummy_getter,
193 &partition));
194 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
195 // [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
196 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
197 costs.size(),
198 3,
199 18,
200 0,
201 cumulative_costs.data(),
202 dummy_getter,
203 &partition));
204 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
205 // Impossible since there is item of cost 7
206 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
207 costs.size(),
208 3,
209 6,
210 0,
211 cumulative_costs.data(),
212 dummy_getter,
213 &partition));
214 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
215 // Impossible
216 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
217 costs.size(),
218 2,
219 10,
220 0,
221 cumulative_costs.data(),
222 dummy_getter,
223 &partition));
224 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
225}
226
227// Randomized tests for MaxPartitionCostIsFeasible
228TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800229 using parallel_for_details::MaxPartitionCostIsFeasible;
230
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300231 std::vector<int> costs, cumulative_costs, partition;
232 const auto dummy_getter = [](const int v) { return v; };
233
234 // Random tests
235 const int kNumTests = 1000;
236 const int kMaxElements = 32;
237 const int kMaxPartitions = 16;
238 const int kMaxElCost = 8;
239 std::mt19937 rng;
240 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
241 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
242 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
243 for (int t = 0; t < kNumTests; ++t) {
244 const int N = rng_N(rng);
245 const int M = rng_M(rng);
246 int total = 0;
247 costs.clear();
248 for (int i = 0; i < N; ++i) {
249 costs.push_back(rng_e(rng));
250 total += costs.back();
251 }
252
253 cumulative_costs.resize(N);
254 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
255
256 std::uniform_int_distribution<int> rng_seg(0, N - 1);
257 int start = rng_seg(rng);
258 int end = rng_seg(rng);
259 if (start > end) std::swap(start, end);
260 ++end;
261
262 int first_admissible = 0;
263 for (int threshold = 1; threshold <= total; ++threshold) {
264 const bool bruteforce =
265 BruteForcePartition(costs.data(), start, end, M, threshold);
266 if (bruteforce && !first_admissible) {
267 first_admissible = threshold;
268 }
269 const bool binary_search =
270 MaxPartitionCostIsFeasible(start,
271 end,
272 M,
273 threshold,
274 start ? cumulative_costs[start - 1] : 0,
275 cumulative_costs.data(),
276 dummy_getter,
277 &partition);
278 EXPECT_EQ(bruteforce, binary_search);
279 EXPECT_LE(partition.size(), M + 1);
280 // check partition itself
281 if (binary_search) {
282 ASSERT_GT(partition.size(), 1);
283 EXPECT_EQ(partition.front(), start);
284 EXPECT_EQ(partition.back(), end);
285
286 const int num_partitions = partition.size() - 1;
287 EXPECT_LE(num_partitions, M);
288 for (int j = 0; j < num_partitions; ++j) {
289 int total = 0;
290 for (int k = partition[j]; k < partition[j + 1]; ++k) {
291 EXPECT_LT(k, end);
292 EXPECT_GE(k, start);
293 total += costs[k];
294 }
295 EXPECT_LE(total, threshold);
296 }
297 }
298 }
299 }
300}
301
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800302TEST(GuidedParallelFor, ComputePartition) {
303 using parallel_for_details::ComputePartition;
304
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300305 std::vector<int> costs, cumulative_costs, partition;
306 const auto dummy_getter = [](const int v) { return v; };
307
308 // Random tests
309 const int kNumTests = 1000;
310 const int kMaxElements = 32;
311 const int kMaxPartitions = 16;
312 const int kMaxElCost = 8;
313 std::mt19937 rng;
314 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
315 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
316 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
317 for (int t = 0; t < kNumTests; ++t) {
318 const int N = rng_N(rng);
319 const int M = rng_M(rng);
320 int total = 0;
321 costs.clear();
322 for (int i = 0; i < N; ++i) {
323 costs.push_back(rng_e(rng));
324 total += costs.back();
325 }
326
327 cumulative_costs.resize(N);
328 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
329
330 std::uniform_int_distribution<int> rng_seg(0, N - 1);
331 int start = rng_seg(rng);
332 int end = rng_seg(rng);
333 if (start > end) std::swap(start, end);
334 ++end;
335
336 int first_admissible = 0;
337 for (int threshold = 1; threshold <= total; ++threshold) {
338 const bool bruteforce =
339 BruteForcePartition(costs.data(), start, end, M, threshold);
340 if (bruteforce) {
341 first_admissible = threshold;
342 break;
343 }
344 }
345 EXPECT_TRUE(first_admissible != 0 || total == 0);
346 partition =
347 ComputePartition(start, end, M, cumulative_costs.data(), dummy_getter);
348 ASSERT_GT(partition.size(), 1);
349 EXPECT_EQ(partition.front(), start);
350 EXPECT_EQ(partition.back(), end);
351
352 const int num_partitions = partition.size() - 1;
353 EXPECT_LE(num_partitions, M);
354 for (int j = 0; j < num_partitions; ++j) {
355 int total = 0;
356 for (int k = partition[j]; k < partition[j + 1]; ++k) {
357 EXPECT_LT(k, end);
358 EXPECT_GE(k, start);
359 total += costs[k];
360 }
361 EXPECT_LE(total, first_admissible);
362 }
363 }
364}
365
366// Recursively try to partition range into segements of total cost
367// less than max_cost
368bool BruteForcePartition(
369 int* costs, int start, int end, int max_partitions, int max_cost) {
370 if (start == end) return true;
371 if (start < end && max_partitions == 0) return false;
372 int total_cost = 0;
373 for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
374 total_cost += costs[last_curr - 1];
375 if (total_cost > max_cost) break;
376 if (BruteForcePartition(
377 costs, last_curr, end, max_partitions - 1, max_cost))
378 return true;
379 }
380 return false;
381}
382
383// Tests if guided parallel for loop computes the correct result for various
384// number of threads.
385TEST(GuidedParallelFor, NumThreads) {
386 ContextImpl context;
387 context.EnsureMinimumThreads(/*num_threads=*/2);
388
389 const int size = 16;
390 std::vector<int> expected_results(size, 0);
391 for (int i = 0; i < size; ++i) {
392 expected_results[i] = std::sqrt(i);
393 }
394
395 std::vector<int> costs, cumulative_costs;
396 for (int i = 1; i <= size; ++i) {
397 int cost = i * i;
398 costs.push_back(cost);
399 if (i == 1) {
400 cumulative_costs.push_back(cost);
401 } else {
402 cumulative_costs.push_back(cost + cumulative_costs.back());
403 }
404 }
405
406 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
407 std::vector<int> values(size, 0);
408 ParallelFor(
409 &context,
410 0,
411 size,
412 num_threads,
413 [&values](int i) { values[i] = std::sqrt(i); },
414 cumulative_costs.data(),
415 [](const int v) { return v; });
416 EXPECT_THAT(values, ElementsAreArray(expected_results));
417 }
418}
419
Sameer Agarwalcaf614a2022-04-21 17:41:10 -0700420} // namespace ceres::internal