tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

charset.h (5663B)


      1 // Copyright 2022 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 //
     15 // -----------------------------------------------------------------------------
     16 // File: charset.h
     17 // -----------------------------------------------------------------------------
     18 //
     19 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
     20 // characters.
     21 //
     22 // Instances can be initialized as constexpr constants. For example:
     23 //
     24 //   constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
     25 //   constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
     26 //   constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
     27 //
     28 // Multiple instances can be combined that still forms a constexpr expression.
     29 // For example:
     30 //
     31 //   constexpr absl::CharSet kLettersAndNumbers =
     32 //       absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
     33 //
     34 // Several pre-defined character classes are available that mirror the methods
     35 // from <cctype>. For example:
     36 //
     37 //   constexpr absl::CharSet kLettersAndWhitespace =
     38 //       absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
     39 //
     40 // To check membership, use the .contains method, e.g.
     41 //
     42 //   absl::CharSet hex_letters("abcdef");
     43 //   hex_letters.contains('a');  // true
     44 //   hex_letters.contains('g');  // false
     45 
     46 #ifndef ABSL_STRINGS_CHARSET_H_
     47 #define ABSL_STRINGS_CHARSET_H_
     48 
     49 #include <cstdint>
     50 
     51 #include "absl/base/config.h"
     52 #include "absl/strings/string_view.h"
     53 
     54 namespace absl {
     55 ABSL_NAMESPACE_BEGIN
     56 
     57 class CharSet {
     58 public:
     59  constexpr CharSet() : m_() {}
     60 
     61  // Initializes with a given string_view.
     62  constexpr explicit CharSet(absl::string_view str) : m_() {
     63    for (char c : str) {
     64      SetChar(static_cast<unsigned char>(c));
     65    }
     66  }
     67 
     68  constexpr bool contains(char c) const {
     69    return ((m_[static_cast<unsigned char>(c) / 64] >>
     70             (static_cast<unsigned char>(c) % 64)) &
     71            0x1) == 0x1;
     72  }
     73 
     74  constexpr bool empty() const {
     75    for (uint64_t c : m_) {
     76      if (c != 0) return false;
     77    }
     78    return true;
     79  }
     80 
     81  // Containing only a single specified char.
     82  static constexpr CharSet Char(char x) {
     83    return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
     84                   CharMaskForWord(x, 2), CharMaskForWord(x, 3));
     85  }
     86 
     87  // Containing all the chars in the closed interval [lo,hi].
     88  static constexpr CharSet Range(char lo, char hi) {
     89    return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
     90                   RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
     91  }
     92 
     93  friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
     94    return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
     95                   a.m_[3] & b.m_[3]);
     96  }
     97 
     98  friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
     99    return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
    100                   a.m_[3] | b.m_[3]);
    101  }
    102 
    103  friend constexpr CharSet operator~(const CharSet& a) {
    104    return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
    105  }
    106 
    107  // Mirrors the char-classifying predicates in <cctype>.
    108  static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
    109  static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
    110  static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
    111  static constexpr CharSet AsciiAlphabet() {
    112    return AsciiLowercase() | AsciiUppercase();
    113  }
    114  static constexpr CharSet AsciiAlphanumerics() {
    115    return AsciiDigits() | AsciiAlphabet();
    116  }
    117  static constexpr CharSet AsciiHexDigits() {
    118    return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
    119  }
    120  static constexpr CharSet AsciiPrintable() {
    121    return CharSet::Range(0x20, 0x7e);
    122  }
    123  static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
    124  static constexpr CharSet AsciiPunctuation() {
    125    return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
    126  }
    127 
    128 private:
    129  constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
    130      : m_{b0, b1, b2, b3} {}
    131 
    132  static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
    133    return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
    134           ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
    135  }
    136 
    137  // All the chars in the specified word of the range [0, upper).
    138  static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
    139                                                     uint64_t word) {
    140    return (upper <= 64 * word) ? 0
    141           : (upper >= 64 * (word + 1))
    142               ? ~static_cast<uint64_t>(0)
    143               : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
    144  }
    145 
    146  static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
    147    return (static_cast<unsigned char>(x) / 64 == word)
    148               ? (static_cast<uint64_t>(1)
    149                  << (static_cast<unsigned char>(x) % 64))
    150               : 0;
    151  }
    152 
    153  constexpr void SetChar(unsigned char c) {
    154    m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
    155  }
    156 
    157  uint64_t m_[4];
    158 };
    159 
    160 ABSL_NAMESPACE_END
    161 }  // namespace absl
    162 
    163 #endif  // ABSL_STRINGS_CHARSET_H_