tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

escaping.cc (7585B)


      1 // Copyright 2020 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "absl/strings/internal/escaping.h"
     16 
     17 #include <limits>
     18 
     19 #include "absl/base/internal/endian.h"
     20 #include "absl/base/internal/raw_logging.h"
     21 
     22 namespace absl {
     23 ABSL_NAMESPACE_BEGIN
     24 namespace strings_internal {
     25 
     26 // The two strings below provide maps from normal 6-bit characters to their
     27 // base64-escaped equivalent.
     28 // For the inverse case, see kUn(WebSafe)Base64 in the external
     29 // escaping.cc.
     30 ABSL_CONST_INIT const char kBase64Chars[] =
     31    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
     32 
     33 ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
     34    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
     35 
     36 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
     37  // Base64 encodes three bytes of input at a time. If the input is not
     38  // divisible by three, we pad as appropriate.
     39  //
     40  // Base64 encodes each three bytes of input into four bytes of output.
     41  constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3;
     42  ABSL_INTERNAL_CHECK(input_len <= kMaxSize,
     43                      "CalculateBase64EscapedLenInternal() overflow");
     44  size_t len = (input_len / 3) * 4;
     45 
     46  // Since all base 64 input is an integral number of octets, only the following
     47  // cases can arise:
     48  if (input_len % 3 == 0) {
     49    // (from https://tools.ietf.org/html/rfc3548)
     50    // (1) the final quantum of encoding input is an integral multiple of 24
     51    // bits; here, the final unit of encoded output will be an integral
     52    // multiple of 4 characters with no "=" padding,
     53  } else if (input_len % 3 == 1) {
     54    // (from https://tools.ietf.org/html/rfc3548)
     55    // (2) the final quantum of encoding input is exactly 8 bits; here, the
     56    // final unit of encoded output will be two characters followed by two
     57    // "=" padding characters, or
     58    len += 2;
     59    if (do_padding) {
     60      len += 2;
     61    }
     62  } else {  // (input_len % 3 == 2)
     63    // (from https://tools.ietf.org/html/rfc3548)
     64    // (3) the final quantum of encoding input is exactly 16 bits; here, the
     65    // final unit of encoded output will be three characters followed by one
     66    // "=" padding character.
     67    len += 3;
     68    if (do_padding) {
     69      len += 1;
     70    }
     71  }
     72 
     73  return len;
     74 }
     75 
     76 // ----------------------------------------------------------------------
     77 //   Take the input in groups of 4 characters and turn each
     78 //   character into a code 0 to 63 thus:
     79 //           A-Z map to 0 to 25
     80 //           a-z map to 26 to 51
     81 //           0-9 map to 52 to 61
     82 //           +(- for WebSafe) maps to 62
     83 //           /(_ for WebSafe) maps to 63
     84 //   There will be four numbers, all less than 64 which can be represented
     85 //   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
     86 //   Arrange the 6 digit binary numbers into three bytes as such:
     87 //   aaaaaabb bbbbcccc ccdddddd
     88 //   Equals signs (one or two) are used at the end of the encoded block to
     89 //   indicate that the text was not an integer multiple of three bytes long.
     90 // ----------------------------------------------------------------------
     91 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
     92                            size_t szdest, const char* base64,
     93                            bool do_padding) {
     94  static const char kPad64 = '=';
     95 
     96  if (szsrc * 4 > szdest * 3) return 0;
     97 
     98  char* cur_dest = dest;
     99  const unsigned char* cur_src = src;
    100 
    101  char* const limit_dest = dest + szdest;
    102  const unsigned char* const limit_src = src + szsrc;
    103 
    104  // (from https://tools.ietf.org/html/rfc3548)
    105  // Special processing is performed if fewer than 24 bits are available
    106  // at the end of the data being encoded.  A full encoding quantum is
    107  // always completed at the end of a quantity.  When fewer than 24 input
    108  // bits are available in an input group, zero bits are added (on the
    109  // right) to form an integral number of 6-bit groups.
    110  //
    111  // If do_padding is true, padding at the end of the data is performed. This
    112  // output padding uses the '=' character.
    113 
    114  // Three bytes of data encodes to four characters of cyphertext.
    115  // So we can pump through three-byte chunks atomically.
    116  if (szsrc >= 3) {                    // "limit_src - 3" is UB if szsrc < 3.
    117    while (cur_src < limit_src - 3) {  // While we have >= 32 bits.
    118      uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
    119 
    120      cur_dest[0] = base64[in >> 18];
    121      in &= 0x3FFFF;
    122      cur_dest[1] = base64[in >> 12];
    123      in &= 0xFFF;
    124      cur_dest[2] = base64[in >> 6];
    125      in &= 0x3F;
    126      cur_dest[3] = base64[in];
    127 
    128      cur_dest += 4;
    129      cur_src += 3;
    130    }
    131  }
    132  // To save time, we didn't update szdest or szsrc in the loop.  So do it now.
    133  szdest = static_cast<size_t>(limit_dest - cur_dest);
    134  szsrc = static_cast<size_t>(limit_src - cur_src);
    135 
    136  /* now deal with the tail (<=3 bytes) */
    137  switch (szsrc) {
    138    case 0:
    139      // Nothing left; nothing more to do.
    140      break;
    141    case 1: {
    142      // One byte left: this encodes to two characters, and (optionally)
    143      // two pad characters to round out the four-character cypherblock.
    144      if (szdest < 2) return 0;
    145      uint32_t in = cur_src[0];
    146      cur_dest[0] = base64[in >> 2];
    147      in &= 0x3;
    148      cur_dest[1] = base64[in << 4];
    149      cur_dest += 2;
    150      szdest -= 2;
    151      if (do_padding) {
    152        if (szdest < 2) return 0;
    153        cur_dest[0] = kPad64;
    154        cur_dest[1] = kPad64;
    155        cur_dest += 2;
    156        szdest -= 2;
    157      }
    158      break;
    159    }
    160    case 2: {
    161      // Two bytes left: this encodes to three characters, and (optionally)
    162      // one pad character to round out the four-character cypherblock.
    163      if (szdest < 3) return 0;
    164      uint32_t in = absl::big_endian::Load16(cur_src);
    165      cur_dest[0] = base64[in >> 10];
    166      in &= 0x3FF;
    167      cur_dest[1] = base64[in >> 4];
    168      in &= 0x00F;
    169      cur_dest[2] = base64[in << 2];
    170      cur_dest += 3;
    171      szdest -= 3;
    172      if (do_padding) {
    173        if (szdest < 1) return 0;
    174        cur_dest[0] = kPad64;
    175        cur_dest += 1;
    176        szdest -= 1;
    177      }
    178      break;
    179    }
    180    case 3: {
    181      // Three bytes left: same as in the big loop above.  We can't do this in
    182      // the loop because the loop above always reads 4 bytes, and the fourth
    183      // byte is past the end of the input.
    184      if (szdest < 4) return 0;
    185      uint32_t in =
    186          (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
    187      cur_dest[0] = base64[in >> 18];
    188      in &= 0x3FFFF;
    189      cur_dest[1] = base64[in >> 12];
    190      in &= 0xFFF;
    191      cur_dest[2] = base64[in >> 6];
    192      in &= 0x3F;
    193      cur_dest[3] = base64[in];
    194      cur_dest += 4;
    195      szdest -= 4;
    196      break;
    197    }
    198    default:
    199      // Should not be reached: blocks of 4 bytes are handled
    200      // in the while loop before this switch statement.
    201      ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
    202      break;
    203  }
    204  return static_cast<size_t>(cur_dest - dest);
    205 }
    206 
    207 }  // namespace strings_internal
    208 ABSL_NAMESPACE_END
    209 }  // namespace absl