benchmarks.cc (17317B)
1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Benchmarks for absl random distributions as well as a selection of the 16 // C++ standard library random distributions. 17 18 #include <algorithm> 19 #include <cstddef> 20 #include <cstdint> 21 #include <initializer_list> 22 #include <iterator> 23 #include <limits> 24 #include <random> 25 #include <type_traits> 26 #include <vector> 27 28 #include "absl/base/macros.h" 29 #include "absl/random/bernoulli_distribution.h" 30 #include "absl/random/beta_distribution.h" 31 #include "absl/random/exponential_distribution.h" 32 #include "absl/random/gaussian_distribution.h" 33 #include "absl/random/internal/fast_uniform_bits.h" 34 #include "absl/random/internal/randen_engine.h" 35 #include "absl/random/log_uniform_int_distribution.h" 36 #include "absl/random/poisson_distribution.h" 37 #include "absl/random/random.h" 38 #include "absl/random/uniform_int_distribution.h" 39 #include "absl/random/uniform_real_distribution.h" 40 #include "absl/random/zipf_distribution.h" 41 #include "benchmark/benchmark.h" 42 43 namespace { 44 45 // Seed data to avoid reading random_device() for benchmarks. 46 uint32_t kSeedData[] = { 47 0x1B510052, 0x9A532915, 0xD60F573F, 0xBC9BC6E4, 0x2B60A476, 0x81E67400, 48 0x08BA6FB5, 0x571BE91F, 0xF296EC6B, 0x2A0DD915, 0xB6636521, 0xE7B9F9B6, 49 0xFF34052E, 0xC5855664, 0x53B02D5D, 0xA99F8FA1, 0x08BA4799, 0x6E85076A, 50 0x4B7A70E9, 0xB5B32944, 0xDB75092E, 0xC4192623, 0xAD6EA6B0, 0x49A7DF7D, 51 0x9CEE60B8, 0x8FEDB266, 0xECAA8C71, 0x699A18FF, 0x5664526C, 0xC2B19EE1, 52 0x193602A5, 0x75094C29, 0xA0591340, 0xE4183A3E, 0x3F54989A, 0x5B429D65, 53 0x6B8FE4D6, 0x99F73FD6, 0xA1D29C07, 0xEFE830F5, 0x4D2D38E6, 0xF0255DC1, 54 0x4CDD2086, 0x8470EB26, 0x6382E9C6, 0x021ECC5E, 0x09686B3F, 0x3EBAEFC9, 55 0x3C971814, 0x6B6A70A1, 0x687F3584, 0x52A0E286, 0x13198A2E, 0x03707344, 56 }; 57 58 // PrecompiledSeedSeq provides kSeedData to a conforming 59 // random engine to speed initialization in the benchmarks. 60 class PrecompiledSeedSeq { 61 public: 62 using result_type = uint32_t; 63 64 PrecompiledSeedSeq() = default; 65 66 template <typename Iterator> 67 PrecompiledSeedSeq(Iterator, Iterator) {} 68 69 template <typename T> 70 PrecompiledSeedSeq(std::initializer_list<T>) {} 71 72 template <typename OutIterator> 73 void generate(OutIterator begin, OutIterator end) { 74 static size_t idx = 0; 75 for (; begin != end; begin++) { 76 *begin = kSeedData[idx++]; 77 if (idx >= ABSL_ARRAYSIZE(kSeedData)) { 78 idx = 0; 79 } 80 } 81 } 82 83 size_t size() const { return ABSL_ARRAYSIZE(kSeedData); } 84 85 template <typename OutIterator> 86 void param(OutIterator out) const { 87 std::copy(std::begin(kSeedData), std::end(kSeedData), out); 88 } 89 }; 90 91 // Triggers default constructor initialization. 92 class DefaultConstructorSeedSeq {}; 93 94 // make_engine<T, SSeq> returns a random_engine which is initialized, 95 // either via the default constructor, when use_default_initialization<T> 96 // is true, or via the indicated seed sequence, SSeq. 97 template <typename Engine, typename SSeq = DefaultConstructorSeedSeq> 98 Engine make_engine() { 99 constexpr bool use_default_initialization = 100 std::is_same_v<SSeq, DefaultConstructorSeedSeq>; 101 if constexpr (use_default_initialization) { 102 return Engine(); 103 } else { 104 // Otherwise, use the provided seed sequence. 105 SSeq seq(std::begin(kSeedData), std::end(kSeedData)); 106 return Engine(seq); 107 } 108 } 109 110 template <typename Engine, typename SSeq> 111 void BM_Construct(benchmark::State& state) { 112 for (auto _ : state) { 113 auto rng = make_engine<Engine, SSeq>(); 114 benchmark::DoNotOptimize(rng()); 115 } 116 } 117 118 template <typename Engine> 119 void BM_Direct(benchmark::State& state) { 120 using value_type = typename Engine::result_type; 121 // Direct use of the URBG. 122 auto rng = make_engine<Engine>(); 123 for (auto _ : state) { 124 benchmark::DoNotOptimize(rng()); 125 } 126 state.SetBytesProcessed(sizeof(value_type) * state.iterations()); 127 } 128 129 template <typename Engine> 130 void BM_Generate(benchmark::State& state) { 131 // std::generate makes a copy of the RNG; thus this tests the 132 // copy-constructor efficiency. 133 using value_type = typename Engine::result_type; 134 std::vector<value_type> v(64); 135 auto rng = make_engine<Engine>(); 136 while (state.KeepRunningBatch(64)) { 137 std::generate(std::begin(v), std::end(v), rng); 138 } 139 } 140 141 template <typename Engine, size_t elems> 142 void BM_Shuffle(benchmark::State& state) { 143 // Direct use of the Engine. 144 std::vector<uint32_t> v(elems); 145 while (state.KeepRunningBatch(elems)) { 146 auto rng = make_engine<Engine>(); 147 std::shuffle(std::begin(v), std::end(v), rng); 148 } 149 } 150 151 template <typename Engine, size_t elems> 152 void BM_ShuffleReuse(benchmark::State& state) { 153 // Direct use of the Engine. 154 std::vector<uint32_t> v(elems); 155 auto rng = make_engine<Engine>(); 156 while (state.KeepRunningBatch(elems)) { 157 std::shuffle(std::begin(v), std::end(v), rng); 158 } 159 } 160 161 template <typename Engine, typename Dist, typename... Args> 162 void BM_Dist(benchmark::State& state, Args&&... args) { 163 using value_type = typename Dist::result_type; 164 auto rng = make_engine<Engine>(); 165 Dist dis{std::forward<Args>(args)...}; 166 // Compare the following loop performance: 167 for (auto _ : state) { 168 benchmark::DoNotOptimize(dis(rng)); 169 } 170 state.SetBytesProcessed(sizeof(value_type) * state.iterations()); 171 } 172 173 template <typename Engine, typename Dist> 174 void BM_Large(benchmark::State& state) { 175 using value_type = typename Dist::result_type; 176 volatile value_type kMin = 0; 177 volatile value_type kMax = std::numeric_limits<value_type>::max() / 2 + 1; 178 BM_Dist<Engine, Dist>(state, kMin, kMax); 179 } 180 181 template <typename Engine, typename Dist> 182 void BM_Small(benchmark::State& state) { 183 using value_type = typename Dist::result_type; 184 volatile value_type kMin = 0; 185 volatile value_type kMax = std::numeric_limits<value_type>::max() / 64 + 1; 186 BM_Dist<Engine, Dist>(state, kMin, kMax); 187 } 188 189 template <typename Engine, typename Dist, int A> 190 void BM_Bernoulli(benchmark::State& state) { 191 volatile double a = static_cast<double>(A) / 1000000; 192 BM_Dist<Engine, Dist>(state, a); 193 } 194 195 template <typename Engine, typename Dist, int A, int B> 196 void BM_Beta(benchmark::State& state) { 197 using value_type = typename Dist::result_type; 198 volatile value_type a = static_cast<value_type>(A) / 100; 199 volatile value_type b = static_cast<value_type>(B) / 100; 200 BM_Dist<Engine, Dist>(state, a, b); 201 } 202 203 template <typename Engine, typename Dist, int A> 204 void BM_Gamma(benchmark::State& state) { 205 using value_type = typename Dist::result_type; 206 volatile value_type a = static_cast<value_type>(A) / 100; 207 BM_Dist<Engine, Dist>(state, a); 208 } 209 210 template <typename Engine, typename Dist, int A = 100> 211 void BM_Poisson(benchmark::State& state) { 212 volatile double a = static_cast<double>(A) / 100; 213 BM_Dist<Engine, Dist>(state, a); 214 } 215 216 template <typename Engine, typename Dist, int Q = 2, int V = 1> 217 void BM_Zipf(benchmark::State& state) { 218 using value_type = typename Dist::result_type; 219 volatile double q = Q; 220 volatile double v = V; 221 BM_Dist<Engine, Dist>(state, std::numeric_limits<value_type>::max(), q, v); 222 } 223 224 template <typename Engine, typename Dist> 225 void BM_Thread(benchmark::State& state) { 226 using value_type = typename Dist::result_type; 227 auto rng = make_engine<Engine>(); 228 Dist dis{}; 229 for (auto _ : state) { 230 benchmark::DoNotOptimize(dis(rng)); 231 } 232 state.SetBytesProcessed(sizeof(value_type) * state.iterations()); 233 } 234 235 // NOTES: 236 // 237 // std::geometric_distribution is similar to the zipf distributions. 238 // The algorithm for the geometric_distribution is, basically, 239 // floor(log(1-X) / log(1-p)) 240 241 // Normal benchmark suite 242 #define BM_BASIC(Engine) \ 243 BENCHMARK_TEMPLATE(BM_Construct, Engine, DefaultConstructorSeedSeq); \ 244 BENCHMARK_TEMPLATE(BM_Construct, Engine, PrecompiledSeedSeq); \ 245 BENCHMARK_TEMPLATE(BM_Construct, Engine, std::seed_seq); \ 246 BENCHMARK_TEMPLATE(BM_Direct, Engine); \ 247 BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 10); \ 248 BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100); \ 249 BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000); \ 250 BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100); \ 251 BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000); \ 252 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 253 absl::random_internal::FastUniformBits<uint32_t>); \ 254 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 255 absl::random_internal::FastUniformBits<uint64_t>); \ 256 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int32_t>); \ 257 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int64_t>); \ 258 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 259 absl::uniform_int_distribution<int32_t>); \ 260 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 261 absl::uniform_int_distribution<int64_t>); \ 262 BENCHMARK_TEMPLATE(BM_Large, Engine, \ 263 std::uniform_int_distribution<int32_t>); \ 264 BENCHMARK_TEMPLATE(BM_Large, Engine, \ 265 std::uniform_int_distribution<int64_t>); \ 266 BENCHMARK_TEMPLATE(BM_Large, Engine, \ 267 absl::uniform_int_distribution<int32_t>); \ 268 BENCHMARK_TEMPLATE(BM_Large, Engine, \ 269 absl::uniform_int_distribution<int64_t>); \ 270 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<float>); \ 271 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<double>); \ 272 BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<float>); \ 273 BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<double>) 274 275 #define BM_COPY(Engine) BENCHMARK_TEMPLATE(BM_Generate, Engine) 276 277 #define BM_THREAD(Engine) \ 278 BENCHMARK_TEMPLATE(BM_Thread, Engine, \ 279 absl::uniform_int_distribution<int64_t>) \ 280 ->ThreadPerCpu(); \ 281 BENCHMARK_TEMPLATE(BM_Thread, Engine, \ 282 absl::uniform_real_distribution<double>) \ 283 ->ThreadPerCpu(); \ 284 BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100)->ThreadPerCpu(); \ 285 BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000)->ThreadPerCpu(); \ 286 BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100)->ThreadPerCpu(); \ 287 BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000)->ThreadPerCpu() 288 289 #define BM_EXTENDED(Engine) \ 290 /* -------------- Extended Uniform -----------------------*/ \ 291 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 292 std::uniform_int_distribution<int32_t>); \ 293 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 294 std::uniform_int_distribution<int64_t>); \ 295 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 296 absl::uniform_int_distribution<int32_t>); \ 297 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 298 absl::uniform_int_distribution<int64_t>); \ 299 BENCHMARK_TEMPLATE(BM_Small, Engine, std::uniform_real_distribution<float>); \ 300 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 301 std::uniform_real_distribution<double>); \ 302 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 303 absl::uniform_real_distribution<float>); \ 304 BENCHMARK_TEMPLATE(BM_Small, Engine, \ 305 absl::uniform_real_distribution<double>); \ 306 /* -------------- Other -----------------------*/ \ 307 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::normal_distribution<double>); \ 308 BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::gaussian_distribution<double>); \ 309 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::exponential_distribution<double>); \ 310 BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::exponential_distribution<double>); \ 311 BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ 312 100); \ 313 BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ 314 100); \ 315 BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ 316 10 * 100); \ 317 BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ 318 10 * 100); \ 319 BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ 320 13 * 100); \ 321 BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ 322 13 * 100); \ 323 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 324 absl::log_uniform_int_distribution<int32_t>); \ 325 BENCHMARK_TEMPLATE(BM_Dist, Engine, \ 326 absl::log_uniform_int_distribution<int64_t>); \ 327 BENCHMARK_TEMPLATE(BM_Dist, Engine, std::geometric_distribution<int64_t>); \ 328 BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>); \ 329 BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>, 2, \ 330 3); \ 331 BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, std::bernoulli_distribution, \ 332 257305); \ 333 BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, absl::bernoulli_distribution, \ 334 257305); \ 335 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 65, \ 336 41); \ 337 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 99, \ 338 330); \ 339 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 150, \ 340 150); \ 341 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 410, \ 342 580); \ 343 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 65, 41); \ 344 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 99, \ 345 330); \ 346 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 150, \ 347 150); \ 348 BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 410, \ 349 580); \ 350 BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<float>, 199); \ 351 BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<double>, 199) 352 353 // ABSL Recommended interfaces. 354 BM_BASIC(absl::InsecureBitGen); // === pcg64_2018_engine 355 BM_BASIC(absl::BitGen); // === randen_engine<uint64_t>. 356 BM_THREAD(absl::BitGen); 357 BM_EXTENDED(absl::BitGen); 358 359 // Instantiate benchmarks for multiple engines. 360 using randen_engine_64 = absl::random_internal::randen_engine<uint64_t>; 361 using randen_engine_32 = absl::random_internal::randen_engine<uint32_t>; 362 363 // Comparison interfaces. 364 BM_BASIC(std::mt19937_64); 365 BM_COPY(std::mt19937_64); 366 BM_EXTENDED(std::mt19937_64); 367 BM_BASIC(randen_engine_64); 368 BM_COPY(randen_engine_64); 369 BM_EXTENDED(randen_engine_64); 370 371 BM_BASIC(std::mt19937); 372 BM_COPY(std::mt19937); 373 BM_BASIC(randen_engine_32); 374 BM_COPY(randen_engine_32); 375 376 } // namespace