blob: 6974ab903d1664f4beb8bac42fbeac9f6413eacb [file] [log] [blame]
Mike Vitusdc5ea0e2018-01-24 15:53:19 -08001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2018 Google Inc. All rights reserved.
3// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080031#include "ceres/parallel_for.h"
32
Sameer Agarwal8c811492018-02-28 19:41:47 -080033#include <cmath>
Mike Vitusf0c3b232018-02-28 13:08:48 -080034#include <condition_variable>
35#include <mutex>
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +030036#include <numeric>
37#include <random>
Mike Vitusf0c3b232018-02-28 13:08:48 -080038#include <thread>
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080039#include <vector>
40
Mike Vitusf408f892018-02-22 10:28:39 -080041#include "ceres/context_impl.h"
Sameer Agarwal06bfe6f2022-11-26 16:02:28 -080042#include "ceres/internal/config.h"
Mike Vitusf0c3b232018-02-28 13:08:48 -080043#include "glog/logging.h"
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080044#include "gmock/gmock.h"
45#include "gtest/gtest.h"
46
Sameer Agarwalcaf614a2022-04-21 17:41:10 -070047namespace ceres::internal {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080048
49using testing::ElementsAreArray;
Mike Vitusf0c3b232018-02-28 13:08:48 -080050using testing::UnorderedElementsAreArray;
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080051
52// Tests the parallel for loop computes the correct result for various number of
53// threads.
54TEST(ParallelFor, NumThreads) {
Mike Vitusf408f892018-02-22 10:28:39 -080055 ContextImpl context;
56 context.EnsureMinimumThreads(/*num_threads=*/2);
57
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080058 const int size = 16;
59 std::vector<int> expected_results(size, 0);
60 for (int i = 0; i < size; ++i) {
61 expected_results[i] = std::sqrt(i);
62 }
63
64 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
65 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020066 ParallelFor(&context, 0, size, num_threads, [&values](int i) {
67 values[i] = std::sqrt(i);
68 });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080069 EXPECT_THAT(values, ElementsAreArray(expected_results));
70 }
71}
72
Mike Vitusf0c3b232018-02-28 13:08:48 -080073// Tests the parallel for loop with the thread ID interface computes the correct
74// result for various number of threads.
75TEST(ParallelForWithThreadId, NumThreads) {
76 ContextImpl context;
77 context.EnsureMinimumThreads(/*num_threads=*/2);
78
79 const int size = 16;
80 std::vector<int> expected_results(size, 0);
81 for (int i = 0; i < size; ++i) {
82 expected_results[i] = std::sqrt(i);
83 }
84
85 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
86 std::vector<int> values(size, 0);
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +020087 ParallelFor(
88 &context, 0, size, num_threads, [&values](int thread_id, int i) {
89 values[i] = std::sqrt(i);
90 });
Mike Vitusf0c3b232018-02-28 13:08:48 -080091 EXPECT_THAT(values, ElementsAreArray(expected_results));
92 }
93}
94
Mike Vitusdc5ea0e2018-01-24 15:53:19 -080095// Tests nested for loops do not result in a deadlock.
96TEST(ParallelFor, NestedParallelForDeadlock) {
Mike Vitusf408f892018-02-22 10:28:39 -080097 ContextImpl context;
98 context.EnsureMinimumThreads(/*num_threads=*/2);
99
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800100 // Increment each element in the 2D matrix.
101 std::vector<std::vector<int>> x(3, {1, 2, 3});
Mike Vitusf408f892018-02-22 10:28:39 -0800102 ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800103 std::vector<int>& y = x.at(i);
Mike Vitusf408f892018-02-22 10:28:39 -0800104 ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
Mike Vitusdc5ea0e2018-01-24 15:53:19 -0800105 });
106
107 const std::vector<int> results = {2, 3, 4};
108 for (const std::vector<int>& value : x) {
109 EXPECT_THAT(value, ElementsAreArray(results));
110 }
111}
112
Mike Vitusf0c3b232018-02-28 13:08:48 -0800113// Tests nested for loops do not result in a deadlock for the parallel for with
114// thread ID interface.
115TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
116 ContextImpl context;
117 context.EnsureMinimumThreads(/*num_threads=*/2);
118
119 // Increment each element in the 2D matrix.
120 std::vector<std::vector<int>> x(3, {1, 2, 3});
121 ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
122 std::vector<int>& y = x.at(i);
123 ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
124 });
125
126 const std::vector<int> results = {2, 3, 4};
127 for (const std::vector<int>& value : x) {
128 EXPECT_THAT(value, ElementsAreArray(results));
129 }
130}
131
132TEST(ParallelForWithThreadId, UniqueThreadIds) {
133 // Ensure the hardware supports more than 1 thread to ensure the test will
134 // pass.
135 const int num_hardware_threads = std::thread::hardware_concurrency();
136 if (num_hardware_threads <= 1) {
137 LOG(ERROR)
138 << "Test not supported, the hardware does not support threading.";
139 return;
140 }
141
142 ContextImpl context;
143 context.EnsureMinimumThreads(/*num_threads=*/2);
144 // Increment each element in the 2D matrix.
145 std::vector<int> x(2, -1);
146 std::mutex mutex;
147 std::condition_variable condition;
148 int count = 0;
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200149 ParallelFor(&context,
150 0,
151 2,
152 2,
Mike Vitusf0c3b232018-02-28 13:08:48 -0800153 [&x, &mutex, &condition, &count](int thread_id, int i) {
154 std::unique_lock<std::mutex> lock(mutex);
155 x[i] = thread_id;
156 ++count;
157 condition.notify_all();
158 condition.wait(lock, [&]() { return count == 2; });
159 });
160
Nikolaus Demmel7b8f6752020-09-20 21:45:24 +0200161 EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Mike Vitusf0c3b232018-02-28 13:08:48 -0800162}
163
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300164// Helper function for partition tests
165bool BruteForcePartition(
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800166 int* costs, int start, int end, int max_partitions, int max_cost);
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300167// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
168// simple test-cases
169TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800170 using parallel_for_details::MaxPartitionCostIsFeasible;
171
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300172 std::vector<int> costs, cumulative_costs, partition;
173 costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
174 cumulative_costs.resize(costs.size());
175 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
176 const auto dummy_getter = [](const int v) { return v; };
177
178 // [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
179 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
180 costs.size(),
181 3,
182 7,
183 0,
184 cumulative_costs.data(),
185 dummy_getter,
186 &partition));
187 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
188 // [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
189 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
190 costs.size(),
191 3,
192 18,
193 0,
194 cumulative_costs.data(),
195 dummy_getter,
196 &partition));
197 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
198 // Impossible since there is item of cost 7
199 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
200 costs.size(),
201 3,
202 6,
203 0,
204 cumulative_costs.data(),
205 dummy_getter,
206 &partition));
207 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
208 // Impossible
209 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
210 costs.size(),
211 2,
212 10,
213 0,
214 cumulative_costs.data(),
215 dummy_getter,
216 &partition));
217 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
218}
219
220// Randomized tests for MaxPartitionCostIsFeasible
221TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800222 using parallel_for_details::MaxPartitionCostIsFeasible;
223
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300224 std::vector<int> costs, cumulative_costs, partition;
225 const auto dummy_getter = [](const int v) { return v; };
226
227 // Random tests
228 const int kNumTests = 1000;
229 const int kMaxElements = 32;
230 const int kMaxPartitions = 16;
231 const int kMaxElCost = 8;
232 std::mt19937 rng;
233 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
234 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
235 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
236 for (int t = 0; t < kNumTests; ++t) {
237 const int N = rng_N(rng);
238 const int M = rng_M(rng);
239 int total = 0;
240 costs.clear();
241 for (int i = 0; i < N; ++i) {
242 costs.push_back(rng_e(rng));
243 total += costs.back();
244 }
245
246 cumulative_costs.resize(N);
247 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
248
249 std::uniform_int_distribution<int> rng_seg(0, N - 1);
250 int start = rng_seg(rng);
251 int end = rng_seg(rng);
252 if (start > end) std::swap(start, end);
253 ++end;
254
255 int first_admissible = 0;
256 for (int threshold = 1; threshold <= total; ++threshold) {
257 const bool bruteforce =
258 BruteForcePartition(costs.data(), start, end, M, threshold);
259 if (bruteforce && !first_admissible) {
260 first_admissible = threshold;
261 }
262 const bool binary_search =
263 MaxPartitionCostIsFeasible(start,
264 end,
265 M,
266 threshold,
267 start ? cumulative_costs[start - 1] : 0,
268 cumulative_costs.data(),
269 dummy_getter,
270 &partition);
271 EXPECT_EQ(bruteforce, binary_search);
272 EXPECT_LE(partition.size(), M + 1);
273 // check partition itself
274 if (binary_search) {
275 ASSERT_GT(partition.size(), 1);
276 EXPECT_EQ(partition.front(), start);
277 EXPECT_EQ(partition.back(), end);
278
279 const int num_partitions = partition.size() - 1;
280 EXPECT_LE(num_partitions, M);
281 for (int j = 0; j < num_partitions; ++j) {
282 int total = 0;
283 for (int k = partition[j]; k < partition[j + 1]; ++k) {
284 EXPECT_LT(k, end);
285 EXPECT_GE(k, start);
286 total += costs[k];
287 }
288 EXPECT_LE(total, threshold);
289 }
290 }
291 }
292 }
293}
294
Sameer Agarwaladdcd342022-11-14 12:00:18 -0800295TEST(GuidedParallelFor, ComputePartition) {
296 using parallel_for_details::ComputePartition;
297
Dmitriy Korchemkin5d53d1e2022-11-02 16:06:48 +0300298 std::vector<int> costs, cumulative_costs, partition;
299 const auto dummy_getter = [](const int v) { return v; };
300
301 // Random tests
302 const int kNumTests = 1000;
303 const int kMaxElements = 32;
304 const int kMaxPartitions = 16;
305 const int kMaxElCost = 8;
306 std::mt19937 rng;
307 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
308 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
309 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
310 for (int t = 0; t < kNumTests; ++t) {
311 const int N = rng_N(rng);
312 const int M = rng_M(rng);
313 int total = 0;
314 costs.clear();
315 for (int i = 0; i < N; ++i) {
316 costs.push_back(rng_e(rng));
317 total += costs.back();
318 }
319
320 cumulative_costs.resize(N);
321 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
322
323 std::uniform_int_distribution<int> rng_seg(0, N - 1);
324 int start = rng_seg(rng);
325 int end = rng_seg(rng);
326 if (start > end) std::swap(start, end);
327 ++end;
328
329 int first_admissible = 0;
330 for (int threshold = 1; threshold <= total; ++threshold) {
331 const bool bruteforce =
332 BruteForcePartition(costs.data(), start, end, M, threshold);
333 if (bruteforce) {
334 first_admissible = threshold;
335 break;
336 }
337 }
338 EXPECT_TRUE(first_admissible != 0 || total == 0);
339 partition =
340 ComputePartition(start, end, M, cumulative_costs.data(), dummy_getter);
341 ASSERT_GT(partition.size(), 1);
342 EXPECT_EQ(partition.front(), start);
343 EXPECT_EQ(partition.back(), end);
344
345 const int num_partitions = partition.size() - 1;
346 EXPECT_LE(num_partitions, M);
347 for (int j = 0; j < num_partitions; ++j) {
348 int total = 0;
349 for (int k = partition[j]; k < partition[j + 1]; ++k) {
350 EXPECT_LT(k, end);
351 EXPECT_GE(k, start);
352 total += costs[k];
353 }
354 EXPECT_LE(total, first_admissible);
355 }
356 }
357}
358
359// Recursively try to partition range into segements of total cost
360// less than max_cost
361bool BruteForcePartition(
362 int* costs, int start, int end, int max_partitions, int max_cost) {
363 if (start == end) return true;
364 if (start < end && max_partitions == 0) return false;
365 int total_cost = 0;
366 for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
367 total_cost += costs[last_curr - 1];
368 if (total_cost > max_cost) break;
369 if (BruteForcePartition(
370 costs, last_curr, end, max_partitions - 1, max_cost))
371 return true;
372 }
373 return false;
374}
375
376// Tests if guided parallel for loop computes the correct result for various
377// number of threads.
378TEST(GuidedParallelFor, NumThreads) {
379 ContextImpl context;
380 context.EnsureMinimumThreads(/*num_threads=*/2);
381
382 const int size = 16;
383 std::vector<int> expected_results(size, 0);
384 for (int i = 0; i < size; ++i) {
385 expected_results[i] = std::sqrt(i);
386 }
387
388 std::vector<int> costs, cumulative_costs;
389 for (int i = 1; i <= size; ++i) {
390 int cost = i * i;
391 costs.push_back(cost);
392 if (i == 1) {
393 cumulative_costs.push_back(cost);
394 } else {
395 cumulative_costs.push_back(cost + cumulative_costs.back());
396 }
397 }
398
399 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
400 std::vector<int> values(size, 0);
401 ParallelFor(
402 &context,
403 0,
404 size,
405 num_threads,
406 [&values](int i) { values[i] = std::sqrt(i); },
407 cumulative_costs.data(),
408 [](const int v) { return v; });
409 EXPECT_THAT(values, ElementsAreArray(expected_results));
410 }
411}
412
Sameer Agarwalcaf614a2022-04-21 17:41:10 -0700413} // namespace ceres::internal