result-inl.h (10781B)
1 // Copyright 2021 Google LLC 2 // SPDX-License-Identifier: Apache-2.0 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #include "hwy/contrib/sort/algo-inl.h" 17 18 // Normal include guard for non-SIMD parts 19 #ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_ 20 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_ 21 22 #include <stdint.h> 23 #include <stdio.h> 24 #include <time.h> 25 26 #include <algorithm> // std::sort 27 #include <string> 28 #include <vector> 29 30 #include "hwy/aligned_allocator.h" 31 #include "hwy/base.h" 32 #include "hwy/per_target.h" // DispatchedTarget 33 #include "hwy/targets.h" // TargetName 34 35 namespace hwy { 36 37 // Returns trimmed mean (we don't want to run an out-of-L3-cache sort often 38 // enough for the mode to be reliable). 39 static inline double SummarizeMeasurements(std::vector<double>& seconds) { 40 std::sort(seconds.begin(), seconds.end()); 41 double sum = 0; 42 int count = 0; 43 const size_t num = seconds.size(); 44 for (size_t i = num / 4; i < num / 2; ++i) { 45 sum += seconds[i]; 46 count += 1; 47 } 48 return sum / count; 49 } 50 51 struct SortResult { 52 SortResult() {} 53 SortResult(Algo algo_in, Dist dist_in, size_t num_keys_in, 54 size_t num_threads_in, double sec_in, size_t sizeof_key_in, 55 const char* key_name_in) 56 : target(DispatchedTarget()), 57 algo(algo_in), 58 dist(dist_in), 59 num_keys(num_keys_in), 60 num_threads(num_threads_in), 61 sec(sec_in), 62 sizeof_key(sizeof_key_in), 63 key_name(key_name_in) {} 64 65 void Print() const { 66 const double bytes = static_cast<double>(num_keys) * 67 static_cast<double>(num_threads) * 68 static_cast<double>(sizeof_key); 69 printf("%10s: %12s: %7s: %9s: %05g %4.0f MB/s (%2zu threads)\n", 70 hwy::TargetName(target), AlgoName(algo), key_name.c_str(), 71 DistName(dist), static_cast<double>(num_keys), bytes * 1E-6 / sec, 72 num_threads); 73 } 74 75 int64_t target; 76 Algo algo; 77 Dist dist; 78 size_t num_keys = 0; 79 size_t num_threads = 0; 80 double sec = 0.0; 81 size_t sizeof_key = 0; 82 std::string key_name; 83 }; 84 85 } // namespace hwy 86 #endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_ 87 88 // Per-target 89 #if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \ 90 defined(HWY_TARGET_TOGGLE) 91 #ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE 92 #undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE 93 #else 94 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE 95 #endif 96 97 HWY_BEFORE_NAMESPACE(); 98 namespace hwy { 99 namespace HWY_NAMESPACE { 100 101 // Copies the input, and compares results to that of a reference algorithm. 102 template <class Traits> 103 class ReferenceSortVerifier { 104 using LaneType = typename Traits::LaneType; 105 using KeyType = typename Traits::KeyType; 106 using Order = typename Traits::Order; 107 static constexpr bool kAscending = Order::IsAscending(); 108 static constexpr size_t kLPK = Traits().LanesPerKey(); 109 110 public: 111 ReferenceSortVerifier(const LaneType* in_lanes, size_t num_lanes) { 112 num_lanes_ = num_lanes; 113 num_keys_ = num_lanes / kLPK; 114 in_lanes_ = hwy::AllocateAligned<LaneType>(num_lanes); 115 HWY_ASSERT(in_lanes_); 116 CopyBytes(in_lanes, in_lanes_.get(), num_lanes * sizeof(LaneType)); 117 } 118 119 // For full sorts, k_keys == num_keys. 120 void operator()(Algo algo, const LaneType* out_lanes, size_t k_keys) { 121 SharedState shared; 122 const Traits st; 123 const CappedTag<LaneType, kLPK> d; 124 125 HWY_ASSERT(hwy::IsAligned(in_lanes_.get(), sizeof(KeyType))); 126 KeyType* in_keys = HWY_RCAST_ALIGNED(KeyType*, in_lanes_.get()); 127 128 char caption[10]; 129 const char* algo_type = IsPartialSort(algo) ? "PartialSort" : "Sort"; 130 131 HWY_ASSERT(k_keys <= num_keys_); 132 Run(ReferenceAlgoFor(algo), in_keys, num_keys_, shared, /*thread=*/0, 133 k_keys, Order()); 134 135 if (IsSelect(algo)) { 136 // Print lanes centered around k_keys. 137 if (VQSORT_PRINT >= 3) { 138 const size_t begin_lane = k_keys < 3 ? 0 : (k_keys - 3) * kLPK; 139 const size_t end_lane = HWY_MIN(num_lanes_, (k_keys + 3) * kLPK); 140 fprintf(stderr, "\nExpected:\n"); 141 for (size_t i = begin_lane; i < end_lane; i += kLPK) { 142 snprintf(caption, sizeof(caption), "%4zu ", i / kLPK); 143 Print(d, caption, st.SetKey(d, &in_lanes_[i])); 144 } 145 fprintf(stderr, "\n\nActual:\n"); 146 for (size_t i = begin_lane; i < end_lane; i += kLPK) { 147 snprintf(caption, sizeof(caption), "%4zu ", i / kLPK); 148 Print(d, caption, st.SetKey(d, &out_lanes[i])); 149 } 150 fprintf(stderr, "\n\n"); 151 } 152 153 // At k_keys: should be equivalent, i.e. neither a < b nor b < a. 154 // SortOrderVerifier will also check the ordering of the rest of the keys. 155 const size_t k = k_keys * kLPK; 156 if (st.Compare1(&in_lanes_[k], &out_lanes[k]) || 157 st.Compare1(&out_lanes[k], &in_lanes_[k])) { 158 Print(d, "Expected", st.SetKey(d, &in_lanes_[k])); 159 Print(d, " Actual", st.SetKey(d, &out_lanes[k])); 160 HWY_ABORT("Select %s asc=%d: mismatch at k_keys=%zu, num_keys=%zu\n", 161 st.KeyString(), kAscending, k_keys, num_keys_); 162 } 163 } else { 164 if (VQSORT_PRINT >= 3) { 165 const size_t lanes_to_print = HWY_MIN(40, k_keys * kLPK); 166 fprintf(stderr, "\nExpected:\n"); 167 for (size_t i = 0; i < lanes_to_print; i += kLPK) { 168 snprintf(caption, sizeof(caption), "%4zu ", i / kLPK); 169 Print(d, caption, st.SetKey(d, &in_lanes_[i])); 170 } 171 fprintf(stderr, "\n\nActual:\n"); 172 for (size_t i = 0; i < lanes_to_print; i += kLPK) { 173 snprintf(caption, sizeof(caption), "%4zu ", i / kLPK); 174 Print(d, caption, st.SetKey(d, &out_lanes[i])); 175 } 176 fprintf(stderr, "\n\n"); 177 } 178 179 // Full or partial sort: all elements up to k_keys are equivalent to the 180 // reference sort. SortOrderVerifier also checks the output's ordering. 181 for (size_t i = 0; i < k_keys * kLPK; i += kLPK) { 182 // All up to k_keys should be equivalent, i.e. neither a < b nor b < a. 183 if (st.Compare1(&in_lanes_[i], &out_lanes[i]) || 184 st.Compare1(&out_lanes[i], &in_lanes_[i])) { 185 Print(d, "Expected", st.SetKey(d, &in_lanes_[i])); 186 Print(d, " Actual", st.SetKey(d, &out_lanes[i])); 187 HWY_ABORT("%s %s asc=%d: mismatch at %zu, k_keys=%zu, num_keys=%zu\n", 188 algo_type, st.KeyString(), kAscending, i / kLPK, k_keys, 189 num_keys_); 190 } 191 } 192 } 193 } 194 195 private: 196 hwy::AlignedFreeUniquePtr<LaneType[]> in_lanes_; 197 size_t num_lanes_; 198 size_t num_keys_; 199 }; 200 201 // Faster than ReferenceSortVerifier, for use in bench_sort. Only verifies 202 // order, without running a slow reference sorter. This means it can't verify 203 // Select places the correct key at `k_keys`, nor that input and output keys are 204 // the same. 205 template <class Traits> 206 class SortOrderVerifier { 207 using LaneType = typename Traits::LaneType; 208 using Order = typename Traits::Order; 209 static constexpr bool kAscending = Order::IsAscending(); 210 static constexpr size_t kLPK = Traits().LanesPerKey(); 211 212 public: 213 void operator()(Algo algo, const InputStats<LaneType>& input_stats, 214 const LaneType* output, size_t num_keys, size_t k_keys) { 215 if (IsSelect(algo)) { 216 CheckSelectOrder(input_stats, output, num_keys, k_keys); 217 } else { 218 CheckSortedOrder(algo, input_stats, output, num_keys, k_keys); 219 } 220 } 221 222 private: 223 // For full or partial sorts: ensures keys are in sorted order. 224 void CheckSortedOrder(const Algo algo, 225 const InputStats<LaneType>& input_stats, 226 const LaneType* output, const size_t num_keys, 227 const size_t k_keys) { 228 const Traits st; 229 const CappedTag<LaneType, kLPK> d; 230 const size_t num_lanes = num_keys * kLPK; 231 const size_t k = k_keys * kLPK; 232 const char* algo_type = IsPartialSort(algo) ? "PartialSort" : "Sort"; 233 234 InputStats<LaneType> output_stats; 235 // Even for partial sorts, loop over all keys to verify none disappeared. 236 for (size_t i = 0; i < num_lanes - kLPK; i += kLPK) { 237 output_stats.Notify(output[i]); 238 if (kLPK == 2) output_stats.Notify(output[i + 1]); 239 240 // Only check the first k_keys (== num_keys for a full sort). 241 // Reverse order instead of checking !Compare1 so we accept equal keys. 242 if (i < k - kLPK && st.Compare1(output + i + kLPK, output + i)) { 243 Print(d, " cur", st.SetKey(d, &output[i])); 244 Print(d, "next", st.SetKey(d, &output[i + kLPK])); 245 HWY_ABORT( 246 "%s %s asc=%d: wrong order at %zu, k_keys=%zu, num_keys=%zu\n", 247 algo_type, st.KeyString(), kAscending, i / kLPK, k_keys, num_keys); 248 } 249 } 250 output_stats.Notify(output[num_lanes - kLPK]); 251 if (kLPK == 2) output_stats.Notify(output[num_lanes - kLPK + 1]); 252 253 HWY_ASSERT(input_stats == output_stats); 254 } 255 256 // Ensures keys below index k_keys are less, and all above are greater. 257 void CheckSelectOrder(const InputStats<LaneType>& input_stats, 258 const LaneType* output, const size_t num_keys, 259 const size_t k_keys) { 260 const Traits st; 261 const CappedTag<LaneType, kLPK> d; 262 const size_t num_lanes = num_keys * kLPK; 263 const size_t k = k_keys * kLPK; 264 265 InputStats<LaneType> output_stats; 266 for (size_t i = 0; i < num_lanes - kLPK; i += kLPK) { 267 output_stats.Notify(output[i]); 268 if (kLPK == 2) output_stats.Notify(output[i + 1]); 269 // Reverse order instead of checking !Compare1 so we accept equal keys. 270 if (i < k ? st.Compare1(output + k, output + i) 271 : st.Compare1(output + i, output + k)) { 272 Print(d, "cur", st.SetKey(d, &output[i])); 273 Print(d, "kth", st.SetKey(d, &output[k])); 274 HWY_ABORT( 275 "Select %s asc=%d: wrong order at %zu, k_keys=%zu, num_keys=%zu\n", 276 st.KeyString(), kAscending, i / kLPK, k_keys, num_keys); 277 } 278 } 279 output_stats.Notify(output[num_lanes - kLPK]); 280 if (kLPK == 2) output_stats.Notify(output[num_lanes - kLPK + 1]); 281 282 HWY_ASSERT(input_stats == output_stats); 283 } 284 }; 285 286 // NOLINTNEXTLINE(google-readability-namespace-comments) 287 } // namespace HWY_NAMESPACE 288 } // namespace hwy 289 HWY_AFTER_NAMESPACE(); 290 291 #endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE