Blame - internal/ceres/parallel_for_test.cc - ceres-solver

blob: 58a27d4cf1eca4e6dc47be0764b6f62d06154d13 [file] [log] [blame]

Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
Sameer Agarwal	5a30cae	2023-09-19 15:29:34 -0700	[diff] [blame]	2	// Copyright 2023 Google Inc. All rights reserved.
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: vitus@google.com (Michael Vitus)
				30
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	31	#include "ceres/parallel_for.h"
				32
Sameer Agarwal	8c81149	2018-02-28 19:41:47 -0800	[diff] [blame]	33	#include <cmath>
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	34	#include <condition_variable>
				35	#include <mutex>
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	36	#include <numeric>
				37	#include <random>
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	38	#include <thread>
Sameer Agarwal	8e5d83f	2022-12-17 17:29:42 -0800	[diff] [blame]	39	#include <tuple>
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	40	#include <vector>
				41
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	42	#include "ceres/context_impl.h"
Sameer Agarwal	06bfe6f	2022-11-26 16:02:28 -0800	[diff] [blame]	43	#include "ceres/internal/config.h"
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	44	#include "ceres/parallel_vector_ops.h"
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	45	#include "glog/logging.h"
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	46	#include "gmock/gmock.h"
				47	#include "gtest/gtest.h"
				48
Sameer Agarwal	caf614a	2022-04-21 17:41:10 -0700	[diff] [blame]	49	namespace ceres::internal {
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	50
				51	using testing::ElementsAreArray;
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	52	using testing::UnorderedElementsAreArray;
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	53
				54	// Tests the parallel for loop computes the correct result for various number of
				55	// threads.
				56	TEST(ParallelFor, NumThreads) {
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	57	ContextImpl context;
				58	context.EnsureMinimumThreads(/num_threads=/2);
				59
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	60	const int size = 16;
				61	std::vector<int> expected_results(size, 0);
				62	for (int i = 0; i < size; ++i) {
				63	expected_results[i] = std::sqrt(i);
				64	}
				65
				66	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				67	std::vector<int> values(size, 0);
Nikolaus Demmel	7b8f675	2020-09-20 21:45:24 +0200	[diff] [blame]	68	ParallelFor(&context, 0, size, num_threads, [&values](int i) {
				69	values[i] = std::sqrt(i);
				70	});
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	71	EXPECT_THAT(values, ElementsAreArray(expected_results));
				72	}
				73	}
				74
Dmitriy Korchemkin	b158515	2022-11-27 21:35:44 +0300	[diff] [blame]	75	// Tests parallel for loop with ranges
				76	TEST(ParallelForWithRange, NumThreads) {
				77	ContextImpl context;
				78	context.EnsureMinimumThreads(/num_threads=/2);
				79
				80	const int size = 16;
				81	std::vector<int> expected_results(size, 0);
				82	for (int i = 0; i < size; ++i) {
				83	expected_results[i] = std::sqrt(i);
				84	}
				85
				86	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				87	std::vector<int> values(size, 0);
				88	ParallelFor(
				89	&context, 0, size, num_threads, [&values](std::tuple<int, int> range) {
				90	auto [start, end] = range;
				91	for (int i = start; i < end; ++i) values[i] = std::sqrt(i);
				92	});
				93	EXPECT_THAT(values, ElementsAreArray(expected_results));
				94	}
				95	}
				96
Dmitriy Korchemkin	dc7a859	2023-10-06 15:55:21 +0000	[diff] [blame]	97	// Tests parallel for loop with ranges and lower bound on minimal range size
				98	TEST(ParallelForWithRange, MinimalSize) {
				99	ContextImpl context;
				100	constexpr int kNumThreads = 4;
				101	constexpr int kMinBlockSize = 5;
				102	context.EnsureMinimumThreads(kNumThreads);
				103
				104	for (int size = kMinBlockSize; size <= 25; ++size) {
				105	std::atomic<bool> failed(false);
				106	ParallelFor(
				107	&context,
				108	0,
				109	size,
				110	kNumThreads,
				111	[&failed, kMinBlockSize](std::tuple<int, int> range) {
				112	auto [start, end] = range;
				113	if (end - start < kMinBlockSize) failed = true;
				114	},
				115	kMinBlockSize);
				116	EXPECT_EQ(failed, false);
				117	}
				118	}
				119
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	120	// Tests the parallel for loop with the thread ID interface computes the correct
				121	// result for various number of threads.
				122	TEST(ParallelForWithThreadId, NumThreads) {
				123	ContextImpl context;
				124	context.EnsureMinimumThreads(/num_threads=/2);
				125
				126	const int size = 16;
				127	std::vector<int> expected_results(size, 0);
				128	for (int i = 0; i < size; ++i) {
				129	expected_results[i] = std::sqrt(i);
				130	}
				131
				132	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				133	std::vector<int> values(size, 0);
Nikolaus Demmel	7b8f675	2020-09-20 21:45:24 +0200	[diff] [blame]	134	ParallelFor(
				135	&context, 0, size, num_threads, [&values](int thread_id, int i) {
				136	values[i] = std::sqrt(i);
				137	});
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	138	EXPECT_THAT(values, ElementsAreArray(expected_results));
				139	}
				140	}
				141
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	142	// Tests nested for loops do not result in a deadlock.
				143	TEST(ParallelFor, NestedParallelForDeadlock) {
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	144	ContextImpl context;
				145	context.EnsureMinimumThreads(/num_threads=/2);
				146
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	147	// Increment each element in the 2D matrix.
				148	std::vector<std::vector<int>> x(3, {1, 2, 3});
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	149	ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	150	std::vector<int>& y = x.at(i);
Mike Vitus	f408f89	2018-02-22 10:28:39 -0800	[diff] [blame]	151	ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
Mike Vitus	dc5ea0e	2018-01-24 15:53:19 -0800	[diff] [blame]	152	});
				153
				154	const std::vector<int> results = {2, 3, 4};
				155	for (const std::vector<int>& value : x) {
				156	EXPECT_THAT(value, ElementsAreArray(results));
				157	}
				158	}
				159
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	160	// Tests nested for loops do not result in a deadlock for the parallel for with
				161	// thread ID interface.
				162	TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
				163	ContextImpl context;
				164	context.EnsureMinimumThreads(/num_threads=/2);
				165
				166	// Increment each element in the 2D matrix.
				167	std::vector<std::vector<int>> x(3, {1, 2, 3});
				168	ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
				169	std::vector<int>& y = x.at(i);
				170	ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
				171	});
				172
				173	const std::vector<int> results = {2, 3, 4};
				174	for (const std::vector<int>& value : x) {
				175	EXPECT_THAT(value, ElementsAreArray(results));
				176	}
				177	}
				178
				179	TEST(ParallelForWithThreadId, UniqueThreadIds) {
				180	// Ensure the hardware supports more than 1 thread to ensure the test will
				181	// pass.
				182	const int num_hardware_threads = std::thread::hardware_concurrency();
				183	if (num_hardware_threads <= 1) {
				184	LOG(ERROR)
				185	<< "Test not supported, the hardware does not support threading.";
				186	return;
				187	}
				188
				189	ContextImpl context;
				190	context.EnsureMinimumThreads(/num_threads=/2);
				191	// Increment each element in the 2D matrix.
				192	std::vector<int> x(2, -1);
				193	std::mutex mutex;
				194	std::condition_variable condition;
				195	int count = 0;
Nikolaus Demmel	7b8f675	2020-09-20 21:45:24 +0200	[diff] [blame]	196	ParallelFor(&context,
				197	0,
				198	2,
				199	2,
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	200	[&x, &mutex, &condition, &count](int thread_id, int i) {
				201	std::unique_lock<std::mutex> lock(mutex);
				202	x[i] = thread_id;
				203	++count;
				204	condition.notify_all();
				205	condition.wait(lock, [&]() { return count == 2; });
				206	});
				207
Nikolaus Demmel	7b8f675	2020-09-20 21:45:24 +0200	[diff] [blame]	208	EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Mike Vitus	f0c3b23	2018-02-28 13:08:48 -0800	[diff] [blame]	209	}
				210
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	211	// Helper function for partition tests
				212	bool BruteForcePartition(
Sameer Agarwal	addcd34	2022-11-14 12:00:18 -0800	[diff] [blame]	213	int* costs, int start, int end, int max_partitions, int max_cost);
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	214	// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
				215	// simple test-cases
				216	TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	217	std::vector<int> costs, cumulative_costs, partition;
				218	costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
				219	cumulative_costs.resize(costs.size());
				220	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				221	const auto dummy_getter = [](const int v) { return v; };
				222
				223	// [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
				224	EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
				225	costs.size(),
				226	3,
				227	7,
				228	0,
				229	cumulative_costs.data(),
				230	dummy_getter,
				231	&partition));
				232	EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
				233	// [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
				234	EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
				235	costs.size(),
				236	3,
				237	18,
				238	0,
				239	cumulative_costs.data(),
				240	dummy_getter,
				241	&partition));
				242	EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
				243	// Impossible since there is item of cost 7
				244	EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
				245	costs.size(),
				246	3,
				247	6,
				248	0,
				249	cumulative_costs.data(),
				250	dummy_getter,
				251	&partition));
				252	EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
				253	// Impossible
				254	EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
				255	costs.size(),
				256	2,
				257	10,
				258	0,
				259	cumulative_costs.data(),
				260	dummy_getter,
				261	&partition));
				262	EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
				263	}
				264
				265	// Randomized tests for MaxPartitionCostIsFeasible
				266	TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	267	std::vector<int> costs, cumulative_costs, partition;
				268	const auto dummy_getter = [](const int v) { return v; };
				269
				270	// Random tests
				271	const int kNumTests = 1000;
				272	const int kMaxElements = 32;
				273	const int kMaxPartitions = 16;
				274	const int kMaxElCost = 8;
				275	std::mt19937 rng;
				276	std::uniform_int_distribution<int> rng_N(1, kMaxElements);
				277	std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
				278	std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
				279	for (int t = 0; t < kNumTests; ++t) {
				280	const int N = rng_N(rng);
				281	const int M = rng_M(rng);
				282	int total = 0;
				283	costs.clear();
				284	for (int i = 0; i < N; ++i) {
				285	costs.push_back(rng_e(rng));
				286	total += costs.back();
				287	}
				288
				289	cumulative_costs.resize(N);
				290	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				291
				292	std::uniform_int_distribution<int> rng_seg(0, N - 1);
				293	int start = rng_seg(rng);
				294	int end = rng_seg(rng);
				295	if (start > end) std::swap(start, end);
				296	++end;
				297
				298	int first_admissible = 0;
				299	for (int threshold = 1; threshold <= total; ++threshold) {
				300	const bool bruteforce =
				301	BruteForcePartition(costs.data(), start, end, M, threshold);
				302	if (bruteforce && !first_admissible) {
				303	first_admissible = threshold;
				304	}
				305	const bool binary_search =
				306	MaxPartitionCostIsFeasible(start,
				307	end,
				308	M,
				309	threshold,
				310	start ? cumulative_costs[start - 1] : 0,
				311	cumulative_costs.data(),
				312	dummy_getter,
				313	&partition);
				314	EXPECT_EQ(bruteforce, binary_search);
				315	EXPECT_LE(partition.size(), M + 1);
				316	// check partition itself
				317	if (binary_search) {
				318	ASSERT_GT(partition.size(), 1);
				319	EXPECT_EQ(partition.front(), start);
				320	EXPECT_EQ(partition.back(), end);
				321
				322	const int num_partitions = partition.size() - 1;
				323	EXPECT_LE(num_partitions, M);
				324	for (int j = 0; j < num_partitions; ++j) {
				325	int total = 0;
				326	for (int k = partition[j]; k < partition[j + 1]; ++k) {
				327	EXPECT_LT(k, end);
				328	EXPECT_GE(k, start);
				329	total += costs[k];
				330	}
				331	EXPECT_LE(total, threshold);
				332	}
				333	}
				334	}
				335	}
				336	}
				337
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	338	TEST(GuidedParallelFor, PartitionRangeForParallelFor) {
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	339	std::vector<int> costs, cumulative_costs, partition;
				340	const auto dummy_getter = [](const int v) { return v; };
				341
				342	// Random tests
				343	const int kNumTests = 1000;
				344	const int kMaxElements = 32;
				345	const int kMaxPartitions = 16;
				346	const int kMaxElCost = 8;
				347	std::mt19937 rng;
				348	std::uniform_int_distribution<int> rng_N(1, kMaxElements);
				349	std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
				350	std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
				351	for (int t = 0; t < kNumTests; ++t) {
				352	const int N = rng_N(rng);
				353	const int M = rng_M(rng);
				354	int total = 0;
				355	costs.clear();
				356	for (int i = 0; i < N; ++i) {
				357	costs.push_back(rng_e(rng));
				358	total += costs.back();
				359	}
				360
				361	cumulative_costs.resize(N);
				362	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				363
				364	std::uniform_int_distribution<int> rng_seg(0, N - 1);
				365	int start = rng_seg(rng);
				366	int end = rng_seg(rng);
				367	if (start > end) std::swap(start, end);
				368	++end;
				369
				370	int first_admissible = 0;
				371	for (int threshold = 1; threshold <= total; ++threshold) {
				372	const bool bruteforce =
				373	BruteForcePartition(costs.data(), start, end, M, threshold);
				374	if (bruteforce) {
				375	first_admissible = threshold;
				376	break;
				377	}
				378	}
				379	EXPECT_TRUE(first_admissible != 0 \|\| total == 0);
Dmitriy Korchemkin	54ad3dd	2022-12-19 18:24:54 +0300	[diff] [blame]	380	partition = PartitionRangeForParallelFor(
				381	start, end, M, cumulative_costs.data(), dummy_getter);
Dmitriy Korchemkin	5d53d1e	2022-11-02 16:06:48 +0300	[diff] [blame]	382	ASSERT_GT(partition.size(), 1);
				383	EXPECT_EQ(partition.front(), start);
				384	EXPECT_EQ(partition.back(), end);
				385
				386	const int num_partitions = partition.size() - 1;
				387	EXPECT_LE(num_partitions, M);
				388	for (int j = 0; j < num_partitions; ++j) {
				389	int total = 0;
				390	for (int k = partition[j]; k < partition[j + 1]; ++k) {
				391	EXPECT_LT(k, end);
				392	EXPECT_GE(k, start);
				393	total += costs[k];
				394	}
				395	EXPECT_LE(total, first_admissible);
				396	}
				397	}
				398	}
				399
				400	// Recursively try to partition range into segements of total cost
				401	// less than max_cost
				402	bool BruteForcePartition(
				403	int* costs, int start, int end, int max_partitions, int max_cost) {
				404	if (start == end) return true;
				405	if (start < end && max_partitions == 0) return false;
				406	int total_cost = 0;
				407	for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
				408	total_cost += costs[last_curr - 1];
				409	if (total_cost > max_cost) break;
				410	if (BruteForcePartition(
				411	costs, last_curr, end, max_partitions - 1, max_cost))
				412	return true;
				413	}
				414	return false;
				415	}
				416
				417	// Tests if guided parallel for loop computes the correct result for various
				418	// number of threads.
				419	TEST(GuidedParallelFor, NumThreads) {
				420	ContextImpl context;
				421	context.EnsureMinimumThreads(/num_threads=/2);
				422
				423	const int size = 16;
				424	std::vector<int> expected_results(size, 0);
				425	for (int i = 0; i < size; ++i) {
				426	expected_results[i] = std::sqrt(i);
				427	}
				428
				429	std::vector<int> costs, cumulative_costs;
				430	for (int i = 1; i <= size; ++i) {
				431	int cost = i * i;
				432	costs.push_back(cost);
				433	if (i == 1) {
				434	cumulative_costs.push_back(cost);
				435	} else {
				436	cumulative_costs.push_back(cost + cumulative_costs.back());
				437	}
				438	}
				439
				440	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				441	std::vector<int> values(size, 0);
				442	ParallelFor(
				443	&context,
				444	0,
				445	size,
				446	num_threads,
				447	[&values](int i) { values[i] = std::sqrt(i); },
				448	cumulative_costs.data(),
				449	[](const int v) { return v; });
				450	EXPECT_THAT(values, ElementsAreArray(expected_results));
				451	}
				452	}
				453
Dmitriy Korchemkin	b158515	2022-11-27 21:35:44 +0300	[diff] [blame]	454	TEST(ParallelAssign, D2MulX) {
				455	const int kVectorSize = 1024 * 1024;
				456	const int kMaxNumThreads = 8;
Dmitriy Korchemkin	8bf4a2f	2023-01-31 17:13:19 +0300	[diff] [blame]	457	const double kEpsilon = 1e-16;
Dmitriy Korchemkin	b158515	2022-11-27 21:35:44 +0300	[diff] [blame]	458
				459	const Vector D_full = Vector::Random(kVectorSize * 2);
				460	const ConstVectorRef D(D_full.data() + kVectorSize, kVectorSize);
				461	const Vector x = Vector::Random(kVectorSize);
				462	const Vector y_expected = D.array().square() * x.array();
				463	ContextImpl context;
				464	context.EnsureMinimumThreads(kMaxNumThreads);
				465
				466	for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
				467	Vector y_observed(kVectorSize);
				468	ParallelAssign(
				469	&context, num_threads, y_observed, D.array().square() * x.array());
				470
Dmitriy Korchemkin	8bf4a2f	2023-01-31 17:13:19 +0300	[diff] [blame]	471	// We might get non-bit-exact result due to different precision in scalar
				472	// and vector code. For example, in x86 mode mingw might emit x87
				473	// instructions for scalar code, thus making bit-exact check fail
				474	EXPECT_NEAR((y_expected - y_observed).squaredNorm(),
				475	0.,
				476	kEpsilon * y_expected.squaredNorm());
Dmitriy Korchemkin	b158515	2022-11-27 21:35:44 +0300	[diff] [blame]	477	}
				478	}
				479
				480	TEST(ParallelAssign, SetZero) {
				481	const int kVectorSize = 1024 * 1024;
				482	const int kMaxNumThreads = 8;
				483
				484	ContextImpl context;
				485	context.EnsureMinimumThreads(kMaxNumThreads);
				486
				487	for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
				488	Vector x = Vector::Random(kVectorSize);
				489	ParallelSetZero(&context, num_threads, x);
				490
				491	CHECK_EQ(x.squaredNorm(), 0.);
				492	}
				493	}
				494
Sameer Agarwal	caf614a	2022-04-21 17:41:10 -0700	[diff] [blame]	495	} // namespace ceres::internal