tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

escaping.cc (39739B)


      1 // Copyright 2017 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "absl/strings/escaping.h"
     16 
     17 #include <algorithm>
     18 #include <array>
     19 #include <cassert>
     20 #include <cstddef>
     21 #include <cstdint>
     22 #include <cstring>
     23 #include <limits>
     24 #include <string>
     25 #include <utility>
     26 
     27 #include "absl/base/config.h"
     28 #include "absl/base/internal/endian.h"
     29 #include "absl/base/internal/raw_logging.h"
     30 #include "absl/base/internal/unaligned_access.h"
     31 #include "absl/base/nullability.h"
     32 #include "absl/strings/ascii.h"
     33 #include "absl/strings/charset.h"
     34 #include "absl/strings/internal/escaping.h"
     35 #include "absl/strings/internal/resize_uninitialized.h"
     36 #include "absl/strings/internal/utf8.h"
     37 #include "absl/strings/numbers.h"
     38 #include "absl/strings/str_cat.h"
     39 #include "absl/strings/string_view.h"
     40 
     41 namespace absl {
     42 ABSL_NAMESPACE_BEGIN
     43 namespace {
     44 
     45 // These are used for the leave_nulls_escaped argument to CUnescapeInternal().
     46 constexpr bool kUnescapeNulls = false;
     47 
     48 inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
     49 
     50 inline unsigned int hex_digit_to_int(char c) {
     51  static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
     52                "Character set must be ASCII.");
     53  assert(absl::ascii_isxdigit(static_cast<unsigned char>(c)));
     54  unsigned int x = static_cast<unsigned char>(c);
     55  if (x > '9') {
     56    x += 9;
     57  }
     58  return x & 0xf;
     59 }
     60 
     61 inline bool IsSurrogate(char32_t c, absl::string_view src,
     62                        absl::Nullable<std::string*> error) {
     63  if (c >= 0xD800 && c <= 0xDFFF) {
     64    if (error) {
     65      *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
     66                            src);
     67    }
     68    return true;
     69  }
     70  return false;
     71 }
     72 
     73 // ----------------------------------------------------------------------
     74 // CUnescapeInternal()
     75 //    Implements both CUnescape() and CUnescapeForNullTerminatedString().
     76 //
     77 //    Unescapes C escape sequences and is the reverse of CEscape().
     78 //
     79 //    If 'source' is valid, stores the unescaped string and its size in
     80 //    'dest' and 'dest_len' respectively, and returns true. Otherwise
     81 //    returns false and optionally stores the error description in
     82 //    'error'. Set 'error' to nullptr to disable error reporting.
     83 //
     84 //    'dest' should point to a buffer that is at least as big as 'source'.
     85 //    'source' and 'dest' may be the same.
     86 //
     87 //     NOTE: any changes to this function must also be reflected in the older
     88 //     UnescapeCEscapeSequences().
     89 // ----------------------------------------------------------------------
     90 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
     91                       absl::Nonnull<char*> dest,
     92                       absl::Nonnull<ptrdiff_t*> dest_len,
     93                       absl::Nullable<std::string*> error) {
     94  char* d = dest;
     95  const char* p = source.data();
     96  const char* end = p + source.size();
     97  const char* last_byte = end - 1;
     98 
     99  // Small optimization for case where source = dest and there's no escaping
    100  while (p == d && p < end && *p != '\\') p++, d++;
    101 
    102  while (p < end) {
    103    if (*p != '\\') {
    104      *d++ = *p++;
    105    } else {
    106      if (++p > last_byte) {  // skip past the '\\'
    107        if (error) *error = "String cannot end with \\";
    108        return false;
    109      }
    110      switch (*p) {
    111        case 'a':  *d++ = '\a';  break;
    112        case 'b':  *d++ = '\b';  break;
    113        case 'f':  *d++ = '\f';  break;
    114        case 'n':  *d++ = '\n';  break;
    115        case 'r':  *d++ = '\r';  break;
    116        case 't':  *d++ = '\t';  break;
    117        case 'v':  *d++ = '\v';  break;
    118        case '\\': *d++ = '\\';  break;
    119        case '?':  *d++ = '\?';  break;    // \?  Who knew?
    120        case '\'': *d++ = '\'';  break;
    121        case '"':  *d++ = '\"';  break;
    122        case '0':
    123        case '1':
    124        case '2':
    125        case '3':
    126        case '4':
    127        case '5':
    128        case '6':
    129        case '7': {
    130          // octal digit: 1 to 3 digits
    131          const char* octal_start = p;
    132          unsigned int ch = static_cast<unsigned int>(*p - '0');  // digit 1
    133          if (p < last_byte && is_octal_digit(p[1]))
    134            ch = ch * 8 + static_cast<unsigned int>(*++p - '0');  // digit 2
    135          if (p < last_byte && is_octal_digit(p[1]))
    136            ch = ch * 8 + static_cast<unsigned int>(*++p - '0');  // digit 3
    137          if (ch > 0xff) {
    138            if (error) {
    139              *error = "Value of \\" +
    140                       std::string(octal_start,
    141                                   static_cast<size_t>(p + 1 - octal_start)) +
    142                       " exceeds 0xff";
    143            }
    144            return false;
    145          }
    146          if ((ch == 0) && leave_nulls_escaped) {
    147            // Copy the escape sequence for the null character
    148            const size_t octal_size = static_cast<size_t>(p + 1 - octal_start);
    149            *d++ = '\\';
    150            memmove(d, octal_start, octal_size);
    151            d += octal_size;
    152            break;
    153          }
    154          *d++ = static_cast<char>(ch);
    155          break;
    156        }
    157        case 'x':
    158        case 'X': {
    159          if (p >= last_byte) {
    160            if (error) *error = "String cannot end with \\x";
    161            return false;
    162          } else if (!absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
    163            if (error) *error = "\\x cannot be followed by a non-hex digit";
    164            return false;
    165          }
    166          unsigned int ch = 0;
    167          const char* hex_start = p;
    168          while (p < last_byte &&
    169                 absl::ascii_isxdigit(static_cast<unsigned char>(p[1])))
    170            // Arbitrarily many hex digits
    171            ch = (ch << 4) + hex_digit_to_int(*++p);
    172          if (ch > 0xFF) {
    173            if (error) {
    174              *error = "Value of \\" +
    175                       std::string(hex_start,
    176                                   static_cast<size_t>(p + 1 - hex_start)) +
    177                       " exceeds 0xff";
    178            }
    179            return false;
    180          }
    181          if ((ch == 0) && leave_nulls_escaped) {
    182            // Copy the escape sequence for the null character
    183            const size_t hex_size = static_cast<size_t>(p + 1 - hex_start);
    184            *d++ = '\\';
    185            memmove(d, hex_start, hex_size);
    186            d += hex_size;
    187            break;
    188          }
    189          *d++ = static_cast<char>(ch);
    190          break;
    191        }
    192        case 'u': {
    193          // \uhhhh => convert 4 hex digits to UTF-8
    194          char32_t rune = 0;
    195          const char* hex_start = p;
    196          if (p + 4 >= end) {
    197            if (error) {
    198              *error = "\\u must be followed by 4 hex digits: \\" +
    199                       std::string(hex_start,
    200                                   static_cast<size_t>(p + 1 - hex_start));
    201            }
    202            return false;
    203          }
    204          for (int i = 0; i < 4; ++i) {
    205            // Look one char ahead.
    206            if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
    207              rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
    208            } else {
    209              if (error) {
    210                *error = "\\u must be followed by 4 hex digits: \\" +
    211                         std::string(hex_start,
    212                                     static_cast<size_t>(p + 1 - hex_start));
    213              }
    214              return false;
    215            }
    216          }
    217          if ((rune == 0) && leave_nulls_escaped) {
    218            // Copy the escape sequence for the null character
    219            *d++ = '\\';
    220            memmove(d, hex_start, 5);  // u0000
    221            d += 5;
    222            break;
    223          }
    224          if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
    225            return false;
    226          }
    227          d += strings_internal::EncodeUTF8Char(d, rune);
    228          break;
    229        }
    230        case 'U': {
    231          // \Uhhhhhhhh => convert 8 hex digits to UTF-8
    232          char32_t rune = 0;
    233          const char* hex_start = p;
    234          if (p + 8 >= end) {
    235            if (error) {
    236              *error = "\\U must be followed by 8 hex digits: \\" +
    237                       std::string(hex_start,
    238                                   static_cast<size_t>(p + 1 - hex_start));
    239            }
    240            return false;
    241          }
    242          for (int i = 0; i < 8; ++i) {
    243            // Look one char ahead.
    244            if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
    245              // Don't change rune until we're sure this
    246              // is within the Unicode limit, but do advance p.
    247              uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
    248              if (newrune > 0x10FFFF) {
    249                if (error) {
    250                  *error = "Value of \\" +
    251                           std::string(hex_start,
    252                                       static_cast<size_t>(p + 1 - hex_start)) +
    253                           " exceeds Unicode limit (0x10FFFF)";
    254                }
    255                return false;
    256              } else {
    257                rune = newrune;
    258              }
    259            } else {
    260              if (error) {
    261                *error = "\\U must be followed by 8 hex digits: \\" +
    262                         std::string(hex_start,
    263                                     static_cast<size_t>(p + 1 - hex_start));
    264              }
    265              return false;
    266            }
    267          }
    268          if ((rune == 0) && leave_nulls_escaped) {
    269            // Copy the escape sequence for the null character
    270            *d++ = '\\';
    271            memmove(d, hex_start, 9);  // U00000000
    272            d += 9;
    273            break;
    274          }
    275          if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
    276            return false;
    277          }
    278          d += strings_internal::EncodeUTF8Char(d, rune);
    279          break;
    280        }
    281        default: {
    282          if (error) *error = std::string("Unknown escape sequence: \\") + *p;
    283          return false;
    284        }
    285      }
    286      p++;                                 // read past letter we escaped
    287    }
    288  }
    289  *dest_len = d - dest;
    290  return true;
    291 }
    292 
    293 // ----------------------------------------------------------------------
    294 // CUnescapeInternal()
    295 //
    296 //    Same as above but uses a std::string for output. 'source' and 'dest'
    297 //    may be the same.
    298 // ----------------------------------------------------------------------
    299 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
    300                       absl::Nonnull<std::string*> dest,
    301                       absl::Nullable<std::string*> error) {
    302  strings_internal::STLStringResizeUninitialized(dest, source.size());
    303 
    304  ptrdiff_t dest_size;
    305  if (!CUnescapeInternal(source,
    306                         leave_nulls_escaped,
    307                         &(*dest)[0],
    308                         &dest_size,
    309                         error)) {
    310    return false;
    311  }
    312  dest->erase(static_cast<size_t>(dest_size));
    313  return true;
    314 }
    315 
    316 // ----------------------------------------------------------------------
    317 // CEscape()
    318 // CHexEscape()
    319 // Utf8SafeCEscape()
    320 // Utf8SafeCHexEscape()
    321 //    Escapes 'src' using C-style escape sequences.  This is useful for
    322 //    preparing query flags.  The 'Hex' version uses hexadecimal rather than
    323 //    octal sequences.  The 'Utf8Safe' version does not touch UTF-8 bytes.
    324 //
    325 //    Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
    326 // ----------------------------------------------------------------------
    327 std::string CEscapeInternal(absl::string_view src, bool use_hex,
    328                            bool utf8_safe) {
    329  std::string dest;
    330  bool last_hex_escape = false;  // true if last output char was \xNN.
    331 
    332  for (char c : src) {
    333    bool is_hex_escape = false;
    334    switch (c) {
    335      case '\n': dest.append("\\" "n"); break;
    336      case '\r': dest.append("\\" "r"); break;
    337      case '\t': dest.append("\\" "t"); break;
    338      case '\"': dest.append("\\" "\""); break;
    339      case '\'': dest.append("\\" "'"); break;
    340      case '\\': dest.append("\\" "\\"); break;
    341      default: {
    342        // Note that if we emit \xNN and the src character after that is a hex
    343        // digit then that digit must be escaped too to prevent it being
    344        // interpreted as part of the character code by C.
    345        const unsigned char uc = static_cast<unsigned char>(c);
    346        if ((!utf8_safe || uc < 0x80) &&
    347            (!absl::ascii_isprint(uc) ||
    348             (last_hex_escape && absl::ascii_isxdigit(uc)))) {
    349          if (use_hex) {
    350            dest.append("\\" "x");
    351            dest.push_back(numbers_internal::kHexChar[uc / 16]);
    352            dest.push_back(numbers_internal::kHexChar[uc % 16]);
    353            is_hex_escape = true;
    354          } else {
    355            dest.append("\\");
    356            dest.push_back(numbers_internal::kHexChar[uc / 64]);
    357            dest.push_back(numbers_internal::kHexChar[(uc % 64) / 8]);
    358            dest.push_back(numbers_internal::kHexChar[uc % 8]);
    359          }
    360        } else {
    361          dest.push_back(c);
    362          break;
    363        }
    364      }
    365    }
    366    last_hex_escape = is_hex_escape;
    367  }
    368 
    369  return dest;
    370 }
    371 
    372 /* clang-format off */
    373 constexpr std::array<unsigned char, 256> kCEscapedLen = {
    374    4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,  // \t, \n, \r
    375    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    376    1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,  // ", '
    377    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // '0'..'9'
    378    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'A'..'O'
    379    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,  // 'P'..'Z', '\'
    380    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'a'..'o'
    381    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,  // 'p'..'z', DEL
    382    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    383    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    384    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    385    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    386    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    387    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    388    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    389    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
    390 };
    391 /* clang-format on */
    392 
    393 constexpr uint32_t MakeCEscapedLittleEndianUint32(size_t c) {
    394  size_t char_len = kCEscapedLen[c];
    395  if (char_len == 1) {
    396    return static_cast<uint32_t>(c);
    397  }
    398  if (char_len == 2) {
    399    switch (c) {
    400      case '\n':
    401        return '\\' | (static_cast<uint32_t>('n') << 8);
    402      case '\r':
    403        return '\\' | (static_cast<uint32_t>('r') << 8);
    404      case '\t':
    405        return '\\' | (static_cast<uint32_t>('t') << 8);
    406      case '\"':
    407        return '\\' | (static_cast<uint32_t>('\"') << 8);
    408      case '\'':
    409        return '\\' | (static_cast<uint32_t>('\'') << 8);
    410      case '\\':
    411        return '\\' | (static_cast<uint32_t>('\\') << 8);
    412    }
    413  }
    414  return static_cast<uint32_t>('\\' | (('0' + (c / 64)) << 8) |
    415                               (('0' + ((c % 64) / 8)) << 16) |
    416                               (('0' + (c % 8)) << 24));
    417 }
    418 
    419 template <size_t... indexes>
    420 inline constexpr std::array<uint32_t, sizeof...(indexes)>
    421 MakeCEscapedLittleEndianUint32Array(std::index_sequence<indexes...>) {
    422  return {MakeCEscapedLittleEndianUint32(indexes)...};
    423 }
    424 constexpr std::array<uint32_t, 256> kCEscapedLittleEndianUint32Array =
    425    MakeCEscapedLittleEndianUint32Array(std::make_index_sequence<256>());
    426 
    427 // Calculates the length of the C-style escaped version of 'src'.
    428 // Assumes that non-printable characters are escaped using octal sequences, and
    429 // that UTF-8 bytes are not handled specially.
    430 inline size_t CEscapedLength(absl::string_view src) {
    431  size_t escaped_len = 0;
    432  // The maximum value of kCEscapedLen[x] is 4, so we can escape any string of
    433  // length size_t_max/4 without checking for overflow.
    434  size_t unchecked_limit =
    435      std::min<size_t>(src.size(), std::numeric_limits<size_t>::max() / 4);
    436  size_t i = 0;
    437  while (i < unchecked_limit) {
    438    // Common case: No need to check for overflow.
    439    escaped_len += kCEscapedLen[static_cast<unsigned char>(src[i++])];
    440  }
    441  while (i < src.size()) {
    442    // Beyond unchecked_limit we need to check for overflow before adding.
    443    size_t char_len = kCEscapedLen[static_cast<unsigned char>(src[i++])];
    444    ABSL_INTERNAL_CHECK(
    445        escaped_len <= std::numeric_limits<size_t>::max() - char_len,
    446        "escaped_len overflow");
    447    escaped_len += char_len;
    448  }
    449  return escaped_len;
    450 }
    451 
    452 void CEscapeAndAppendInternal(absl::string_view src,
    453                              absl::Nonnull<std::string*> dest) {
    454  size_t escaped_len = CEscapedLength(src);
    455  if (escaped_len == src.size()) {
    456    dest->append(src.data(), src.size());
    457    return;
    458  }
    459 
    460  // We keep 3 slop bytes so that we can call `little_endian::Store32`
    461  // invariably regardless of the length of the escaped character.
    462  constexpr size_t slop_bytes = 3;
    463  size_t cur_dest_len = dest->size();
    464  size_t new_dest_len = cur_dest_len + escaped_len + slop_bytes;
    465  ABSL_INTERNAL_CHECK(new_dest_len > cur_dest_len, "std::string size overflow");
    466  strings_internal::AppendUninitializedTraits<std::string>::Append(
    467      dest, escaped_len + slop_bytes);
    468  char* append_ptr = &(*dest)[cur_dest_len];
    469 
    470  for (char c : src) {
    471    unsigned char uc = static_cast<unsigned char>(c);
    472    size_t char_len = kCEscapedLen[uc];
    473    uint32_t little_endian_uint32 = kCEscapedLittleEndianUint32Array[uc];
    474    little_endian::Store32(append_ptr, little_endian_uint32);
    475    append_ptr += char_len;
    476  }
    477  dest->resize(new_dest_len - slop_bytes);
    478 }
    479 
    480 // Reverses the mapping in Base64EscapeInternal; see that method's
    481 // documentation for details of the mapping.
    482 bool Base64UnescapeInternal(absl::Nullable<const char*> src_param, size_t szsrc,
    483                            absl::Nullable<char*> dest, size_t szdest,
    484                            const std::array<signed char, 256>& unbase64,
    485                            absl::Nonnull<size_t*> len) {
    486  static const char kPad64Equals = '=';
    487  static const char kPad64Dot = '.';
    488 
    489  size_t destidx = 0;
    490  int decode = 0;
    491  int state = 0;
    492  unsigned char ch = 0;
    493  unsigned int temp = 0;
    494 
    495  // If "char" is signed by default, using *src as an array index results in
    496  // accessing negative array elements. Treat the input as a pointer to
    497  // unsigned char to avoid this.
    498  const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
    499 
    500  // The GET_INPUT macro gets the next input character, skipping
    501  // over any whitespace, and stopping when we reach the end of the
    502  // string or when we read any non-data character.  The arguments are
    503  // an arbitrary identifier (used as a label for goto) and the number
    504  // of data bytes that must remain in the input to avoid aborting the
    505  // loop.
    506 #define GET_INPUT(label, remain)                                \
    507  label:                                                        \
    508  --szsrc;                                                      \
    509  ch = *src++;                                                  \
    510  decode = unbase64[ch];                                        \
    511  if (decode < 0) {                                             \
    512    if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
    513    state = 4 - remain;                                         \
    514    break;                                                      \
    515  }
    516 
    517  // if dest is null, we're just checking to see if it's legal input
    518  // rather than producing output.  (I suspect this could just be done
    519  // with a regexp...).  We duplicate the loop so this test can be
    520  // outside it instead of in every iteration.
    521 
    522  if (dest) {
    523    // This loop consumes 4 input bytes and produces 3 output bytes
    524    // per iteration.  We can't know at the start that there is enough
    525    // data left in the string for a full iteration, so the loop may
    526    // break out in the middle; if so 'state' will be set to the
    527    // number of input bytes read.
    528 
    529    while (szsrc >= 4) {
    530      // We'll start by optimistically assuming that the next four
    531      // bytes of the string (src[0..3]) are four good data bytes
    532      // (that is, no nulls, whitespace, padding chars, or illegal
    533      // chars).  We need to test src[0..2] for nulls individually
    534      // before constructing temp to preserve the property that we
    535      // never read past a null in the string (no matter how long
    536      // szsrc claims the string is).
    537 
    538      if (!src[0] || !src[1] || !src[2] ||
    539          ((temp = ((unsigned(unbase64[src[0]]) << 18) |
    540                    (unsigned(unbase64[src[1]]) << 12) |
    541                    (unsigned(unbase64[src[2]]) << 6) |
    542                    (unsigned(unbase64[src[3]])))) &
    543           0x80000000)) {
    544        // Iff any of those four characters was bad (null, illegal,
    545        // whitespace, padding), then temp's high bit will be set
    546        // (because unbase64[] is -1 for all bad characters).
    547        //
    548        // We'll back up and resort to the slower decoder, which knows
    549        // how to handle those cases.
    550 
    551        GET_INPUT(first, 4);
    552        temp = static_cast<unsigned char>(decode);
    553        GET_INPUT(second, 3);
    554        temp = (temp << 6) | static_cast<unsigned char>(decode);
    555        GET_INPUT(third, 2);
    556        temp = (temp << 6) | static_cast<unsigned char>(decode);
    557        GET_INPUT(fourth, 1);
    558        temp = (temp << 6) | static_cast<unsigned char>(decode);
    559      } else {
    560        // We really did have four good data bytes, so advance four
    561        // characters in the string.
    562 
    563        szsrc -= 4;
    564        src += 4;
    565      }
    566 
    567      // temp has 24 bits of input, so write that out as three bytes.
    568 
    569      if (destidx + 3 > szdest) return false;
    570      dest[destidx + 2] = static_cast<char>(temp);
    571      temp >>= 8;
    572      dest[destidx + 1] = static_cast<char>(temp);
    573      temp >>= 8;
    574      dest[destidx] = static_cast<char>(temp);
    575      destidx += 3;
    576    }
    577  } else {
    578    while (szsrc >= 4) {
    579      if (!src[0] || !src[1] || !src[2] ||
    580          ((temp = ((unsigned(unbase64[src[0]]) << 18) |
    581                    (unsigned(unbase64[src[1]]) << 12) |
    582                    (unsigned(unbase64[src[2]]) << 6) |
    583                    (unsigned(unbase64[src[3]])))) &
    584           0x80000000)) {
    585        GET_INPUT(first_no_dest, 4);
    586        GET_INPUT(second_no_dest, 3);
    587        GET_INPUT(third_no_dest, 2);
    588        GET_INPUT(fourth_no_dest, 1);
    589      } else {
    590        szsrc -= 4;
    591        src += 4;
    592      }
    593      destidx += 3;
    594    }
    595  }
    596 
    597 #undef GET_INPUT
    598 
    599  // if the loop terminated because we read a bad character, return
    600  // now.
    601  if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
    602      !absl::ascii_isspace(ch))
    603    return false;
    604 
    605  if (ch == kPad64Equals || ch == kPad64Dot) {
    606    // if we stopped by hitting an '=' or '.', un-read that character -- we'll
    607    // look at it again when we count to check for the proper number of
    608    // equals signs at the end.
    609    ++szsrc;
    610    --src;
    611  } else {
    612    // This loop consumes 1 input byte per iteration.  It's used to
    613    // clean up the 0-3 input bytes remaining when the first, faster
    614    // loop finishes.  'temp' contains the data from 'state' input
    615    // characters read by the first loop.
    616    while (szsrc > 0) {
    617      --szsrc;
    618      ch = *src++;
    619      decode = unbase64[ch];
    620      if (decode < 0) {
    621        if (absl::ascii_isspace(ch)) {
    622          continue;
    623        } else if (ch == kPad64Equals || ch == kPad64Dot) {
    624          // back up one character; we'll read it again when we check
    625          // for the correct number of pad characters at the end.
    626          ++szsrc;
    627          --src;
    628          break;
    629        } else {
    630          return false;
    631        }
    632      }
    633 
    634      // Each input character gives us six bits of output.
    635      temp = (temp << 6) | static_cast<unsigned char>(decode);
    636      ++state;
    637      if (state == 4) {
    638        // If we've accumulated 24 bits of output, write that out as
    639        // three bytes.
    640        if (dest) {
    641          if (destidx + 3 > szdest) return false;
    642          dest[destidx + 2] = static_cast<char>(temp);
    643          temp >>= 8;
    644          dest[destidx + 1] = static_cast<char>(temp);
    645          temp >>= 8;
    646          dest[destidx] = static_cast<char>(temp);
    647        }
    648        destidx += 3;
    649        state = 0;
    650        temp = 0;
    651      }
    652    }
    653  }
    654 
    655  // Process the leftover data contained in 'temp' at the end of the input.
    656  int expected_equals = 0;
    657  switch (state) {
    658    case 0:
    659      // Nothing left over; output is a multiple of 3 bytes.
    660      break;
    661 
    662    case 1:
    663      // Bad input; we have 6 bits left over.
    664      return false;
    665 
    666    case 2:
    667      // Produce one more output byte from the 12 input bits we have left.
    668      if (dest) {
    669        if (destidx + 1 > szdest) return false;
    670        temp >>= 4;
    671        dest[destidx] = static_cast<char>(temp);
    672      }
    673      ++destidx;
    674      expected_equals = 2;
    675      break;
    676 
    677    case 3:
    678      // Produce two more output bytes from the 18 input bits we have left.
    679      if (dest) {
    680        if (destidx + 2 > szdest) return false;
    681        temp >>= 2;
    682        dest[destidx + 1] = static_cast<char>(temp);
    683        temp >>= 8;
    684        dest[destidx] = static_cast<char>(temp);
    685      }
    686      destidx += 2;
    687      expected_equals = 1;
    688      break;
    689 
    690    default:
    691      // state should have no other values at this point.
    692      ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
    693                   state);
    694  }
    695 
    696  // The remainder of the string should be all whitespace, mixed with
    697  // exactly 0 equals signs, or exactly 'expected_equals' equals
    698  // signs.  (Always accepting 0 equals signs is an Abseil extension
    699  // not covered in the RFC, as is accepting dot as the pad character.)
    700 
    701  int equals = 0;
    702  while (szsrc > 0) {
    703    if (*src == kPad64Equals || *src == kPad64Dot)
    704      ++equals;
    705    else if (!absl::ascii_isspace(*src))
    706      return false;
    707    --szsrc;
    708    ++src;
    709  }
    710 
    711  const bool ok = (equals == 0 || equals == expected_equals);
    712  if (ok) *len = destidx;
    713  return ok;
    714 }
    715 
    716 // The arrays below map base64-escaped characters back to their original values.
    717 // For the inverse case, see k(WebSafe)Base64Chars in the internal
    718 // escaping.cc.
    719 // These arrays were generated by the following inversion code:
    720 // #include <sys/time.h>
    721 // #include <stdlib.h>
    722 // #include <string.h>
    723 // main()
    724 // {
    725 //   static const char Base64[] =
    726 //     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    727 //   char* pos;
    728 //   int idx, i, j;
    729 //   printf("    ");
    730 //   for (i = 0; i < 255; i += 8) {
    731 //     for (j = i; j < i + 8; j++) {
    732 //       pos = strchr(Base64, j);
    733 //       if ((pos == nullptr) || (j == 0))
    734 //         idx = -1;
    735 //       else
    736 //         idx = pos - Base64;
    737 //       if (idx == -1)
    738 //         printf(" %2d,     ", idx);
    739 //       else
    740 //         printf(" %2d/*%c*/,", idx, j);
    741 //     }
    742 //     printf("\n    ");
    743 //   }
    744 // }
    745 //
    746 // where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars
    747 // in the internal escaping.cc.
    748 /* clang-format off */
    749 constexpr std::array<signed char, 256> kUnBase64 = {
    750    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    751    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    752    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    753    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    754    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    755    -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
    756    52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
    757    60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
    758    -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
    759    07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
    760    15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
    761    23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
    762    -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
    763    33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
    764    41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
    765    49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
    766    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    767    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    768    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    769    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    770    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    771    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    772    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    773    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    774    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    775    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    776    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    777    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    778    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    779    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    780    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    781    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
    782 };
    783 
    784 constexpr std::array<signed char, 256> kUnWebSafeBase64 = {
    785    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    786    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    787    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    788    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    789    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    790    -1,      -1,      -1,      -1,      -1,      62/*-*/, -1,      -1,
    791    52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
    792    60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
    793    -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
    794    07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
    795    15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
    796    23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      63/*_*/,
    797    -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
    798    33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
    799    41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
    800    49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
    801    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    802    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    803    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    804    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    805    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    806    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    807    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    808    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    809    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    810    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    811    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    812    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    813    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    814    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    815    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
    816    -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
    817 };
    818 /* clang-format on */
    819 
    820 template <typename String>
    821 bool Base64UnescapeInternal(absl::Nullable<const char*> src, size_t slen,
    822                            absl::Nonnull<String*> dest,
    823                            const std::array<signed char, 256>& unbase64) {
    824  // Determine the size of the output string.  Base64 encodes every 3 bytes into
    825  // 4 characters.  Any leftover chars are added directly for good measure.
    826  const size_t dest_len = 3 * (slen / 4) + (slen % 4);
    827 
    828  strings_internal::STLStringResizeUninitialized(dest, dest_len);
    829 
    830  // We are getting the destination buffer by getting the beginning of the
    831  // string and converting it into a char *.
    832  size_t len;
    833  const bool ok =
    834      Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
    835  if (!ok) {
    836    dest->clear();
    837    return false;
    838  }
    839 
    840  // could be shorter if there was padding
    841  assert(len <= dest_len);
    842  dest->erase(len);
    843 
    844  return true;
    845 }
    846 
    847 /* clang-format off */
    848 constexpr std::array<char, 256> kHexValueLenient = {
    849    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    850    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    851    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    852    0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0,  // '0'..'9'
    853    0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'A'..'F'
    854    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    855    0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'a'..'f'
    856    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    857    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    858    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    859    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    860    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    861    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    862    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    863    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    864    0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    865 };
    866 
    867 constexpr std::array<signed char, 256> kHexValueStrict = {
    868    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    869    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    870    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    871     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,  // '0'..'9'
    872    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'A'..'F'
    873    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    874    -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'a'..'f'
    875    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    876    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    877    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    878    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    879    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    880    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    881    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    882    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    883    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    884 };
    885 /* clang-format on */
    886 
    887 // This is a templated function so that T can be either a char*
    888 // or a string.  This works because we use the [] operator to access
    889 // individual characters at a time.
    890 template <typename T>
    891 void HexStringToBytesInternal(absl::Nullable<const char*> from, T to,
    892                              size_t num) {
    893  for (size_t i = 0; i < num; i++) {
    894    to[i] = static_cast<char>(kHexValueLenient[from[i * 2] & 0xFF] << 4) +
    895            (kHexValueLenient[from[i * 2 + 1] & 0xFF]);
    896  }
    897 }
    898 
    899 // This is a templated function so that T can be either a char* or a
    900 // std::string.
    901 template <typename T>
    902 void BytesToHexStringInternal(absl::Nullable<const unsigned char*> src, T dest,
    903                              size_t num) {
    904  auto dest_ptr = &dest[0];
    905  for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
    906    const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
    907    std::copy(hex_p, hex_p + 2, dest_ptr);
    908  }
    909 }
    910 
    911 }  // namespace
    912 
    913 // ----------------------------------------------------------------------
    914 // CUnescape()
    915 //
    916 // See CUnescapeInternal() for implementation details.
    917 // ----------------------------------------------------------------------
    918 bool CUnescape(absl::string_view source, absl::Nonnull<std::string*> dest,
    919               absl::Nullable<std::string*> error) {
    920  return CUnescapeInternal(source, kUnescapeNulls, dest, error);
    921 }
    922 
    923 std::string CEscape(absl::string_view src) {
    924  std::string dest;
    925  CEscapeAndAppendInternal(src, &dest);
    926  return dest;
    927 }
    928 
    929 std::string CHexEscape(absl::string_view src) {
    930  return CEscapeInternal(src, true, false);
    931 }
    932 
    933 std::string Utf8SafeCEscape(absl::string_view src) {
    934  return CEscapeInternal(src, false, true);
    935 }
    936 
    937 std::string Utf8SafeCHexEscape(absl::string_view src) {
    938  return CEscapeInternal(src, true, true);
    939 }
    940 
    941 bool Base64Unescape(absl::string_view src, absl::Nonnull<std::string*> dest) {
    942  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
    943 }
    944 
    945 bool WebSafeBase64Unescape(absl::string_view src,
    946                           absl::Nonnull<std::string*> dest) {
    947  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
    948 }
    949 
    950 void Base64Escape(absl::string_view src, absl::Nonnull<std::string*> dest) {
    951  strings_internal::Base64EscapeInternal(
    952      reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
    953      true, strings_internal::kBase64Chars);
    954 }
    955 
    956 void WebSafeBase64Escape(absl::string_view src,
    957                         absl::Nonnull<std::string*> dest) {
    958  strings_internal::Base64EscapeInternal(
    959      reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
    960      false, strings_internal::kWebSafeBase64Chars);
    961 }
    962 
    963 std::string Base64Escape(absl::string_view src) {
    964  std::string dest;
    965  strings_internal::Base64EscapeInternal(
    966      reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
    967      true, strings_internal::kBase64Chars);
    968  return dest;
    969 }
    970 
    971 std::string WebSafeBase64Escape(absl::string_view src) {
    972  std::string dest;
    973  strings_internal::Base64EscapeInternal(
    974      reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
    975      false, strings_internal::kWebSafeBase64Chars);
    976  return dest;
    977 }
    978 
    979 bool HexStringToBytes(absl::string_view hex,
    980                      absl::Nonnull<std::string*> bytes) {
    981  std::string output;
    982 
    983  size_t num_bytes = hex.size() / 2;
    984  if (hex.size() != num_bytes * 2) {
    985    return false;
    986  }
    987 
    988  absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes);
    989  auto hex_p = hex.cbegin();
    990  for (std::string::iterator bin_p = output.begin(); bin_p != output.end();
    991       ++bin_p) {
    992    int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
    993    int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
    994    if (h1 == -1 || h2 == -1) {
    995      output.resize(static_cast<size_t>(bin_p - output.begin()));
    996      return false;
    997    }
    998    *bin_p = static_cast<char>((h1 << 4) + h2);
    999  }
   1000 
   1001  *bytes = std::move(output);
   1002  return true;
   1003 }
   1004 
   1005 std::string HexStringToBytes(absl::string_view from) {
   1006  std::string result;
   1007  const auto num = from.size() / 2;
   1008  strings_internal::STLStringResizeUninitialized(&result, num);
   1009  absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
   1010  return result;
   1011 }
   1012 
   1013 std::string BytesToHexString(absl::string_view from) {
   1014  std::string result;
   1015  strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
   1016  absl::BytesToHexStringInternal<std::string&>(
   1017      reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
   1018  return result;
   1019 }
   1020 
   1021 ABSL_NAMESPACE_END
   1022 }  // namespace absl