topology_test.cc (4276B)
1 // Copyright 2024 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/contrib/thread_pool/topology.h" 17 18 #include <stddef.h> 19 #include <stdio.h> 20 21 #include <vector> 22 23 #include "hwy/base.h" 24 #include "hwy/tests/hwy_gtest.h" 25 #include "hwy/tests/test_util-inl.h" 26 #include "hwy/timer.h" 27 28 namespace hwy { 29 namespace { 30 31 TEST(TopologyTest, TestNum) { 32 const size_t total = TotalLogicalProcessors(); 33 fprintf(stderr, "TotalLogical %zu\n", total); 34 35 LogicalProcessorSet lps; 36 if (GetThreadAffinity(lps)) { 37 fprintf(stderr, "Active %zu\n", lps.Count()); 38 HWY_ASSERT(lps.Count() <= total); 39 } 40 } 41 42 TEST(TopologyTest, TestTopology) { 43 char cpu100[100]; 44 if (hwy::platform::GetCpuString(cpu100)) { 45 fprintf(stderr, "%s\n", cpu100); 46 } 47 48 Topology topology; 49 if (topology.packages.empty()) return; 50 51 fprintf(stderr, "Topology: %zuP %zuX %zuC\n", topology.packages.size(), 52 topology.packages[0].clusters.size(), 53 topology.packages[0].clusters[0].lps.Count()); 54 55 HWY_ASSERT(!topology.lps.empty()); 56 LogicalProcessorSet nodes; 57 for (size_t lp = 0; lp < topology.lps.size(); ++lp) { 58 const size_t node = static_cast<size_t>(topology.lps[lp].node); 59 if (!nodes.Get(node)) { 60 fprintf(stderr, "Found NUMA node %zu, LP %zu\n", node, lp); 61 nodes.Set(node); 62 } 63 } 64 65 size_t lps_by_cluster = 0; 66 size_t lps_by_core = 0; 67 LogicalProcessorSet all_lps; 68 for (const Topology::Package& pkg : topology.packages) { 69 HWY_ASSERT(!pkg.clusters.empty()); 70 HWY_ASSERT(!pkg.cores.empty()); 71 HWY_ASSERT(pkg.clusters.size() <= pkg.cores.size()); 72 73 for (const Topology::Cluster& c : pkg.clusters) { 74 lps_by_cluster += c.lps.Count(); 75 c.lps.Foreach([&all_lps](size_t lp) { all_lps.Set(lp); }); 76 } 77 for (const Topology::Core& c : pkg.cores) { 78 lps_by_core += c.lps.Count(); 79 c.lps.Foreach([&all_lps](size_t lp) { all_lps.Set(lp); }); 80 } 81 } 82 // Ensure the per-cluster and per-core sets sum to the total. 83 HWY_ASSERT(lps_by_cluster == topology.lps.size()); 84 HWY_ASSERT(lps_by_core == topology.lps.size()); 85 // .. and are a partition of unity (all LPs are covered) 86 HWY_ASSERT(all_lps.Count() == topology.lps.size()); 87 } 88 89 void PrintCache(const Cache& c, size_t level) { 90 fprintf(stderr, 91 "L%zu: size %u KiB, line size %u, assoc %u, sets %u, cores %u\n", 92 level, c.size_kib, c.bytes_per_line, c.associativity, c.sets, 93 c.cores_sharing); 94 } 95 96 static void CheckCache(const Cache& c, size_t level) { 97 // L1-L2 must exist, L3 is not guaranteed. 98 if (level == 3 && c.size_kib == 0) { 99 HWY_ASSERT(c.associativity == 0 && c.bytes_per_line == 0 && c.sets == 0); 100 return; 101 } 102 103 // size and thus sets are not necessarily powers of two. 104 HWY_ASSERT(c.size_kib != 0); 105 HWY_ASSERT(c.sets != 0); 106 107 // Intel Skylake has non-pow2 L3 associativity, and Apple L2 also, so we can 108 // only check loose bounds. 109 HWY_ASSERT(c.associativity >= 2); 110 HWY_ASSERT(c.associativity <= Cache::kMaxAssociativity); 111 112 // line sizes are always powers of two because CPUs partition addresses into 113 // line offsets (the lower bits), set, and tag. 114 const auto is_pow2 = [](uint32_t x) { return x != 0 && (x & (x - 1)) == 0; }; 115 HWY_ASSERT(is_pow2(c.bytes_per_line)); 116 HWY_ASSERT(32 <= c.bytes_per_line && c.bytes_per_line <= 1024); 117 118 HWY_ASSERT(c.cores_sharing != 0); 119 // +1 observed on RISC-V. 120 HWY_ASSERT(c.cores_sharing <= TotalLogicalProcessors() + 1); 121 } 122 123 TEST(TopologyTest, TestCaches) { 124 const Cache* caches = DataCaches(); 125 if (!caches) return; 126 for (size_t level = 1; level <= 3; ++level) { 127 PrintCache(caches[level], level); 128 CheckCache(caches[level], level); 129 } 130 } 131 132 } // namespace 133 } // namespace hwy 134 135 HWY_TEST_MAIN();