tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

result-inl.h (10781B)


      1 // Copyright 2021 Google LLC
      2 // SPDX-License-Identifier: Apache-2.0
      3 //
      4 // Licensed under the Apache License, Version 2.0 (the "License");
      5 // you may not use this file except in compliance with the License.
      6 // You may obtain a copy of the License at
      7 //
      8 //      http://www.apache.org/licenses/LICENSE-2.0
      9 //
     10 // Unless required by applicable law or agreed to in writing, software
     11 // distributed under the License is distributed on an "AS IS" BASIS,
     12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 // See the License for the specific language governing permissions and
     14 // limitations under the License.
     15 
     16 #include "hwy/contrib/sort/algo-inl.h"
     17 
     18 // Normal include guard for non-SIMD parts
     19 #ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
     20 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
     21 
     22 #include <stdint.h>
     23 #include <stdio.h>
     24 #include <time.h>
     25 
     26 #include <algorithm>  // std::sort
     27 #include <string>
     28 #include <vector>
     29 
     30 #include "hwy/aligned_allocator.h"
     31 #include "hwy/base.h"
     32 #include "hwy/per_target.h"  // DispatchedTarget
     33 #include "hwy/targets.h"     // TargetName
     34 
     35 namespace hwy {
     36 
     37 // Returns trimmed mean (we don't want to run an out-of-L3-cache sort often
     38 // enough for the mode to be reliable).
     39 static inline double SummarizeMeasurements(std::vector<double>& seconds) {
     40  std::sort(seconds.begin(), seconds.end());
     41  double sum = 0;
     42  int count = 0;
     43  const size_t num = seconds.size();
     44  for (size_t i = num / 4; i < num / 2; ++i) {
     45    sum += seconds[i];
     46    count += 1;
     47  }
     48  return sum / count;
     49 }
     50 
     51 struct SortResult {
     52  SortResult() {}
     53  SortResult(Algo algo_in, Dist dist_in, size_t num_keys_in,
     54             size_t num_threads_in, double sec_in, size_t sizeof_key_in,
     55             const char* key_name_in)
     56      : target(DispatchedTarget()),
     57        algo(algo_in),
     58        dist(dist_in),
     59        num_keys(num_keys_in),
     60        num_threads(num_threads_in),
     61        sec(sec_in),
     62        sizeof_key(sizeof_key_in),
     63        key_name(key_name_in) {}
     64 
     65  void Print() const {
     66    const double bytes = static_cast<double>(num_keys) *
     67                         static_cast<double>(num_threads) *
     68                         static_cast<double>(sizeof_key);
     69    printf("%10s: %12s: %7s: %9s: %05g %4.0f MB/s (%2zu threads)\n",
     70           hwy::TargetName(target), AlgoName(algo), key_name.c_str(),
     71           DistName(dist), static_cast<double>(num_keys), bytes * 1E-6 / sec,
     72           num_threads);
     73  }
     74 
     75  int64_t target;
     76  Algo algo;
     77  Dist dist;
     78  size_t num_keys = 0;
     79  size_t num_threads = 0;
     80  double sec = 0.0;
     81  size_t sizeof_key = 0;
     82  std::string key_name;
     83 };
     84 
     85 }  // namespace hwy
     86 #endif  // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
     87 
     88 // Per-target
     89 #if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \
     90    defined(HWY_TARGET_TOGGLE)
     91 #ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
     92 #undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
     93 #else
     94 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
     95 #endif
     96 
     97 HWY_BEFORE_NAMESPACE();
     98 namespace hwy {
     99 namespace HWY_NAMESPACE {
    100 
    101 // Copies the input, and compares results to that of a reference algorithm.
    102 template <class Traits>
    103 class ReferenceSortVerifier {
    104  using LaneType = typename Traits::LaneType;
    105  using KeyType = typename Traits::KeyType;
    106  using Order = typename Traits::Order;
    107  static constexpr bool kAscending = Order::IsAscending();
    108  static constexpr size_t kLPK = Traits().LanesPerKey();
    109 
    110 public:
    111  ReferenceSortVerifier(const LaneType* in_lanes, size_t num_lanes) {
    112    num_lanes_ = num_lanes;
    113    num_keys_ = num_lanes / kLPK;
    114    in_lanes_ = hwy::AllocateAligned<LaneType>(num_lanes);
    115    HWY_ASSERT(in_lanes_);
    116    CopyBytes(in_lanes, in_lanes_.get(), num_lanes * sizeof(LaneType));
    117  }
    118 
    119  // For full sorts, k_keys == num_keys.
    120  void operator()(Algo algo, const LaneType* out_lanes, size_t k_keys) {
    121    SharedState shared;
    122    const Traits st;
    123    const CappedTag<LaneType, kLPK> d;
    124 
    125    HWY_ASSERT(hwy::IsAligned(in_lanes_.get(), sizeof(KeyType)));
    126    KeyType* in_keys = HWY_RCAST_ALIGNED(KeyType*, in_lanes_.get());
    127 
    128    char caption[10];
    129    const char* algo_type = IsPartialSort(algo) ? "PartialSort" : "Sort";
    130 
    131    HWY_ASSERT(k_keys <= num_keys_);
    132    Run(ReferenceAlgoFor(algo), in_keys, num_keys_, shared, /*thread=*/0,
    133        k_keys, Order());
    134 
    135    if (IsSelect(algo)) {
    136      // Print lanes centered around k_keys.
    137      if (VQSORT_PRINT >= 3) {
    138        const size_t begin_lane = k_keys < 3 ? 0 : (k_keys - 3) * kLPK;
    139        const size_t end_lane = HWY_MIN(num_lanes_, (k_keys + 3) * kLPK);
    140        fprintf(stderr, "\nExpected:\n");
    141        for (size_t i = begin_lane; i < end_lane; i += kLPK) {
    142          snprintf(caption, sizeof(caption), "%4zu ", i / kLPK);
    143          Print(d, caption, st.SetKey(d, &in_lanes_[i]));
    144        }
    145        fprintf(stderr, "\n\nActual:\n");
    146        for (size_t i = begin_lane; i < end_lane; i += kLPK) {
    147          snprintf(caption, sizeof(caption), "%4zu ", i / kLPK);
    148          Print(d, caption, st.SetKey(d, &out_lanes[i]));
    149        }
    150        fprintf(stderr, "\n\n");
    151      }
    152 
    153      // At k_keys: should be equivalent, i.e. neither a < b nor b < a.
    154      // SortOrderVerifier will also check the ordering of the rest of the keys.
    155      const size_t k = k_keys * kLPK;
    156      if (st.Compare1(&in_lanes_[k], &out_lanes[k]) ||
    157          st.Compare1(&out_lanes[k], &in_lanes_[k])) {
    158        Print(d, "Expected", st.SetKey(d, &in_lanes_[k]));
    159        Print(d, "  Actual", st.SetKey(d, &out_lanes[k]));
    160        HWY_ABORT("Select %s asc=%d: mismatch at k_keys=%zu, num_keys=%zu\n",
    161                  st.KeyString(), kAscending, k_keys, num_keys_);
    162      }
    163    } else {
    164      if (VQSORT_PRINT >= 3) {
    165        const size_t lanes_to_print = HWY_MIN(40, k_keys * kLPK);
    166        fprintf(stderr, "\nExpected:\n");
    167        for (size_t i = 0; i < lanes_to_print; i += kLPK) {
    168          snprintf(caption, sizeof(caption), "%4zu ", i / kLPK);
    169          Print(d, caption, st.SetKey(d, &in_lanes_[i]));
    170        }
    171        fprintf(stderr, "\n\nActual:\n");
    172        for (size_t i = 0; i < lanes_to_print; i += kLPK) {
    173          snprintf(caption, sizeof(caption), "%4zu ", i / kLPK);
    174          Print(d, caption, st.SetKey(d, &out_lanes[i]));
    175        }
    176        fprintf(stderr, "\n\n");
    177      }
    178 
    179      // Full or partial sort: all elements up to k_keys are equivalent to the
    180      // reference sort. SortOrderVerifier also checks the output's ordering.
    181      for (size_t i = 0; i < k_keys * kLPK; i += kLPK) {
    182        // All up to k_keys should be equivalent, i.e. neither a < b nor b < a.
    183        if (st.Compare1(&in_lanes_[i], &out_lanes[i]) ||
    184            st.Compare1(&out_lanes[i], &in_lanes_[i])) {
    185          Print(d, "Expected", st.SetKey(d, &in_lanes_[i]));
    186          Print(d, "  Actual", st.SetKey(d, &out_lanes[i]));
    187          HWY_ABORT("%s %s asc=%d: mismatch at %zu, k_keys=%zu, num_keys=%zu\n",
    188                    algo_type, st.KeyString(), kAscending, i / kLPK, k_keys,
    189                    num_keys_);
    190        }
    191      }
    192    }
    193  }
    194 
    195 private:
    196  hwy::AlignedFreeUniquePtr<LaneType[]> in_lanes_;
    197  size_t num_lanes_;
    198  size_t num_keys_;
    199 };
    200 
    201 // Faster than ReferenceSortVerifier, for use in bench_sort. Only verifies
    202 // order, without running a slow reference sorter. This means it can't verify
    203 // Select places the correct key at `k_keys`, nor that input and output keys are
    204 // the same.
    205 template <class Traits>
    206 class SortOrderVerifier {
    207  using LaneType = typename Traits::LaneType;
    208  using Order = typename Traits::Order;
    209  static constexpr bool kAscending = Order::IsAscending();
    210  static constexpr size_t kLPK = Traits().LanesPerKey();
    211 
    212 public:
    213  void operator()(Algo algo, const InputStats<LaneType>& input_stats,
    214                  const LaneType* output, size_t num_keys, size_t k_keys) {
    215    if (IsSelect(algo)) {
    216      CheckSelectOrder(input_stats, output, num_keys, k_keys);
    217    } else {
    218      CheckSortedOrder(algo, input_stats, output, num_keys, k_keys);
    219    }
    220  }
    221 
    222 private:
    223  // For full or partial sorts: ensures keys are in sorted order.
    224  void CheckSortedOrder(const Algo algo,
    225                        const InputStats<LaneType>& input_stats,
    226                        const LaneType* output, const size_t num_keys,
    227                        const size_t k_keys) {
    228    const Traits st;
    229    const CappedTag<LaneType, kLPK> d;
    230    const size_t num_lanes = num_keys * kLPK;
    231    const size_t k = k_keys * kLPK;
    232    const char* algo_type = IsPartialSort(algo) ? "PartialSort" : "Sort";
    233 
    234    InputStats<LaneType> output_stats;
    235    // Even for partial sorts, loop over all keys to verify none disappeared.
    236    for (size_t i = 0; i < num_lanes - kLPK; i += kLPK) {
    237      output_stats.Notify(output[i]);
    238      if (kLPK == 2) output_stats.Notify(output[i + 1]);
    239 
    240      // Only check the first k_keys (== num_keys for a full sort).
    241      // Reverse order instead of checking !Compare1 so we accept equal keys.
    242      if (i < k - kLPK && st.Compare1(output + i + kLPK, output + i)) {
    243        Print(d, " cur", st.SetKey(d, &output[i]));
    244        Print(d, "next", st.SetKey(d, &output[i + kLPK]));
    245        HWY_ABORT(
    246            "%s %s asc=%d: wrong order at %zu, k_keys=%zu, num_keys=%zu\n",
    247            algo_type, st.KeyString(), kAscending, i / kLPK, k_keys, num_keys);
    248      }
    249    }
    250    output_stats.Notify(output[num_lanes - kLPK]);
    251    if (kLPK == 2) output_stats.Notify(output[num_lanes - kLPK + 1]);
    252 
    253    HWY_ASSERT(input_stats == output_stats);
    254  }
    255 
    256  // Ensures keys below index k_keys are less, and all above are greater.
    257  void CheckSelectOrder(const InputStats<LaneType>& input_stats,
    258                        const LaneType* output, const size_t num_keys,
    259                        const size_t k_keys) {
    260    const Traits st;
    261    const CappedTag<LaneType, kLPK> d;
    262    const size_t num_lanes = num_keys * kLPK;
    263    const size_t k = k_keys * kLPK;
    264 
    265    InputStats<LaneType> output_stats;
    266    for (size_t i = 0; i < num_lanes - kLPK; i += kLPK) {
    267      output_stats.Notify(output[i]);
    268      if (kLPK == 2) output_stats.Notify(output[i + 1]);
    269      // Reverse order instead of checking !Compare1 so we accept equal keys.
    270      if (i < k ? st.Compare1(output + k, output + i)
    271                : st.Compare1(output + i, output + k)) {
    272        Print(d, "cur", st.SetKey(d, &output[i]));
    273        Print(d, "kth", st.SetKey(d, &output[k]));
    274        HWY_ABORT(
    275            "Select %s asc=%d: wrong order at %zu, k_keys=%zu, num_keys=%zu\n",
    276            st.KeyString(), kAscending, i / kLPK, k_keys, num_keys);
    277      }
    278    }
    279    output_stats.Notify(output[num_lanes - kLPK]);
    280    if (kLPK == 2) output_stats.Notify(output[num_lanes - kLPK + 1]);
    281 
    282    HWY_ASSERT(input_stats == output_stats);
    283  }
    284 };
    285 
    286 // NOLINTNEXTLINE(google-readability-namespace-comments)
    287 }  // namespace HWY_NAMESPACE
    288 }  // namespace hwy
    289 HWY_AFTER_NAMESPACE();
    290 
    291 #endif  // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE