profiler_example.cc (3020B)
1 // Copyright 2017 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <cmath> 16 17 #include "hwy/base.h" // Abort 18 #include "hwy/cache_control.h" // Pause 19 #include "hwy/contrib/thread_pool/thread_pool.h" 20 #include "hwy/profiler.h" 21 #include "hwy/timer.h" 22 23 namespace hwy { 24 namespace { 25 26 HWY_INLINE void Spin(const double min_time) { 27 const double t0 = hwy::platform::Now(); 28 for (;;) { 29 hwy::Pause(); 30 const double elapsed = hwy::platform::Now() - t0; 31 if (elapsed > min_time) { 32 break; 33 } 34 } 35 } 36 37 HWY_NOINLINE void Spin10us() { 38 PROFILER_FUNC; 39 Spin(10E-6); 40 } 41 42 HWY_NOINLINE void Spin20us() { 43 PROFILER_FUNC; 44 Spin(20E-6); 45 } 46 47 HWY_NOINLINE void CallTwoSpin() { 48 PROFILER_ZONE("NearZeroBecauseOfChildZones"); 49 { 50 PROFILER_ZONE("spin30"); 51 Spin(30E-6); 52 } 53 { 54 PROFILER_ZONE("spin60"); 55 Spin(60E-6); 56 } 57 } 58 59 HWY_NOINLINE void Compute(Profiler& p, HWY_MAYBE_UNUSED size_t thread) { 60 static const auto zone = p.AddZone("Compute"); 61 PROFILER_ZONE3(p, static_cast<uint8_t>(thread), zone); 62 for (int rep = 0; rep < 100; ++rep) { 63 double total = 0.0; 64 for (int i = 0; i < 200 - rep; ++i) { 65 total += std::pow(0.9, i); 66 } 67 if (std::abs(total - 10.0) > 1E-2) { 68 HWY_ABORT("unexpected total %f", total); 69 } 70 } 71 } 72 73 HWY_NOINLINE void TestThreads(Profiler& p) { 74 PROFILER_ZONE("NearZeroBecauseOfThreadedChildZones"); 75 { 76 PROFILER_ZONE("Create pool1"); 77 ThreadPool pool(3); 78 pool.Run(0, 5, [&p](uint64_t /*task*/, HWY_MAYBE_UNUSED size_t thread) { 79 Compute(p, thread); 80 }); 81 } 82 83 { 84 PROFILER_ZONE("Create pool2"); 85 ThreadPool pool(8); 86 pool.Run(0, 8, [&p](uint64_t /*task*/, HWY_MAYBE_UNUSED size_t thread) { 87 Compute(p, thread); 88 }); 89 } 90 } 91 92 HWY_NOINLINE void CallTestThreadPlus20us(Profiler& p) { 93 PROFILER_FUNC; 94 TestThreads(p); 95 Spin(20E-6); 96 } 97 98 HWY_NOINLINE void CallCallTestThreadPlus10us(Profiler& p) { 99 PROFILER_FUNC; 100 CallTestThreadPlus20us(p); 101 Spin(10E-6); 102 } 103 104 void ProfilerExample(Profiler& p) { 105 PROFILER_ZONE("NearZeroDespiteInclusiveChildZone"); 106 { 107 static const auto zone = 108 p.AddZone("Inclusive 10+20", ProfilerFlags::kInclusive); 109 PROFILER_ZONE3(p, 0, zone); 110 Spin20us(); 111 Spin10us(); 112 } 113 CallTwoSpin(); 114 CallCallTestThreadPlus10us(p); 115 } 116 117 } // namespace 118 } // namespace hwy 119 120 int main(int /*argc*/, char* /*argv*/[]) { 121 hwy::ProfilerExample(hwy::Profiler::Get()); 122 PROFILER_PRINT_RESULTS(); // after all zones have exited 123 return 0; 124 }