tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

binascii.c (16042B)


      1 /* Copyright (c) 2001, Matej Pfajfar.
      2 * Copyright (c) 2001-2004, Roger Dingledine.
      3 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
      4 * Copyright (c) 2007-2021, The Tor Project, Inc. */
      5 /* See LICENSE for licensing information */
      6 
      7 /**
      8 * \file binascii.c
      9 *
     10 * \brief Miscellaneous functions for encoding and decoding various things
     11 *   in base{16,32,64}.
     12 */
     13 
     14 #include "orconfig.h"
     15 
     16 #include "lib/encoding/binascii.h"
     17 #include "lib/log/log.h"
     18 #include "lib/log/util_bug.h"
     19 #include "lib/cc/torint.h"
     20 #include "lib/string/compat_ctype.h"
     21 #include "lib/intmath/muldiv.h"
     22 #include "lib/malloc/malloc.h"
     23 
     24 #include <stddef.h>
     25 #include <string.h>
     26 #include <stdlib.h>
     27 
     28 /** Return a pointer to a NUL-terminated hexadecimal string encoding
     29 * the first <b>fromlen</b> bytes of <b>from</b>. (fromlen must be \<= 32.) The
     30 * result does not need to be deallocated, but repeated calls to
     31 * hex_str will trash old results.
     32 */
     33 const char *
     34 hex_str(const char *from, size_t fromlen)
     35 {
     36  static char buf[65];
     37  if (fromlen>(sizeof(buf)-1)/2)
     38    fromlen = (sizeof(buf)-1)/2;
     39  base16_encode(buf,sizeof(buf),from,fromlen);
     40  return buf;
     41 }
     42 
     43 /* Return the base32 encoded size in bytes using the source length srclen.
     44 *
     45 * (WATCH OUT: This API counts the terminating NUL byte, but
     46 * base64_encode_size does not.)
     47 */
     48 size_t
     49 base32_encoded_size(size_t srclen)
     50 {
     51  size_t enclen;
     52  tor_assert(srclen < SIZE_T_CEILING / 8);
     53  enclen = BASE32_NOPAD_BUFSIZE(srclen);
     54  tor_assert(enclen < INT_MAX && enclen > srclen);
     55  return enclen;
     56 }
     57 
     58 /** Implements base32 encoding as in RFC 4648. */
     59 void
     60 base32_encode(char *dest, size_t destlen, const char *src, size_t srclen)
     61 {
     62  unsigned int i, v, u;
     63  size_t nbits = srclen * 8;
     64  size_t bit;
     65 
     66  /* We need enough space for the encoded data and the extra NUL byte. */
     67  tor_assert(base32_encoded_size(srclen) <= destlen);
     68  tor_assert(destlen < SIZE_T_CEILING);
     69 
     70  /* Make sure we leave no uninitialized data in the destination buffer. */
     71  memset(dest, 0, destlen);
     72 
     73  for (i=0,bit=0; bit < nbits; ++i, bit+=5) {
     74    /* set v to the 16-bit value starting at src[bits/8], 0-padded. */
     75    size_t idx = bit / 8;
     76    v = ((uint8_t)src[idx]) << 8;
     77    if (idx+1 < srclen)
     78      v += (uint8_t)src[idx+1];
     79    /* set u to the 5-bit value at the bit'th bit of buf. */
     80    u = (v >> (11-(bit%8))) & 0x1F;
     81    dest[i] = BASE32_CHARS[u];
     82  }
     83  dest[i] = '\0';
     84 }
     85 
     86 /** Implements base32 decoding as in RFC 4648.
     87 * Return the number of bytes decoded if successful; -1 otherwise.
     88 */
     89 int
     90 base32_decode(char *dest, size_t destlen, const char *src, size_t srclen)
     91 {
     92  /* XXXX we might want to rewrite this along the lines of base64_decode, if
     93   * it ever shows up in the profile. */
     94  unsigned int i;
     95  size_t nbits, j, bit;
     96  char *tmp;
     97  nbits = ((srclen * 5) / 8) * 8;
     98 
     99  tor_assert(srclen < SIZE_T_CEILING / 5);
    100  tor_assert((nbits/8) <= destlen); /* We need enough space. */
    101  tor_assert(destlen < SIZE_T_CEILING);
    102 
    103  /* Make sure we leave no uninitialized data in the destination buffer. */
    104  memset(dest, 0, destlen);
    105 
    106  /* Convert base32 encoded chars to the 5-bit values that they represent. */
    107  tmp = tor_malloc_zero(srclen);
    108  for (j = 0; j < srclen; ++j) {
    109    if (src[j] > 0x60 && src[j] < 0x7B) tmp[j] = src[j] - 0x61;
    110    else if (src[j] > 0x31 && src[j] < 0x38) tmp[j] = src[j] - 0x18;
    111    else if (src[j] > 0x40 && src[j] < 0x5B) tmp[j] = src[j] - 0x41;
    112    else {
    113      log_warn(LD_GENERAL, "illegal character in base32 encoded string");
    114      tor_free(tmp);
    115      return -1;
    116    }
    117  }
    118 
    119  /* Assemble result byte-wise by applying five possible cases. */
    120  for (i = 0, bit = 0; bit < nbits; ++i, bit += 8) {
    121    switch (bit % 40) {
    122    case 0:
    123      dest[i] = (((uint8_t)tmp[(bit/5)]) << 3) +
    124                (((uint8_t)tmp[(bit/5)+1]) >> 2);
    125      break;
    126    case 8:
    127      dest[i] = (((uint8_t)tmp[(bit/5)]) << 6) +
    128                (((uint8_t)tmp[(bit/5)+1]) << 1) +
    129                (((uint8_t)tmp[(bit/5)+2]) >> 4);
    130      break;
    131    case 16:
    132      dest[i] = (((uint8_t)tmp[(bit/5)]) << 4) +
    133                (((uint8_t)tmp[(bit/5)+1]) >> 1);
    134      break;
    135    case 24:
    136      dest[i] = (((uint8_t)tmp[(bit/5)]) << 7) +
    137                (((uint8_t)tmp[(bit/5)+1]) << 2) +
    138                (((uint8_t)tmp[(bit/5)+2]) >> 3);
    139      break;
    140    case 32:
    141      dest[i] = (((uint8_t)tmp[(bit/5)]) << 5) +
    142                ((uint8_t)tmp[(bit/5)+1]);
    143      break;
    144    }
    145  }
    146 
    147  memset(tmp, 0, srclen); /* on the heap, this should be safe */
    148  tor_free(tmp);
    149  tmp = NULL;
    150  return i;
    151 }
    152 
    153 #define BASE64_OPENSSL_LINELEN 64
    154 
    155 /** Return the Base64 encoded size of <b>srclen</b> bytes of data in
    156 * bytes.
    157 *
    158 * (WATCH OUT: This API <em>does not</em> count the terminating NUL byte,
    159 * but base32_encoded_size does.)
    160 *
    161 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return the size
    162 * of the encoded output as multiline output (64 character, `\n' terminated
    163 * lines).
    164 */
    165 size_t
    166 base64_encode_size(size_t srclen, int flags)
    167 {
    168  size_t enclen;
    169 
    170  /* Use INT_MAX for overflow checking because base64_encode() returns int. */
    171  tor_assert(srclen < INT_MAX);
    172  tor_assert(CEIL_DIV(srclen, 3) < INT_MAX / 4);
    173 
    174  enclen = BASE64_LEN(srclen);
    175  if (flags & BASE64_ENCODE_MULTILINE)
    176    enclen += CEIL_DIV(enclen, BASE64_OPENSSL_LINELEN);
    177 
    178  tor_assert(enclen < INT_MAX && (enclen == 0 || enclen > srclen));
    179  return enclen;
    180 }
    181 
    182 /** Return an upper bound on the number of bytes that might be needed to hold
    183 * the data from decoding the base64 string <b>srclen</b>.  This is only an
    184 * upper bound, since some part of the base64 string might be padding or
    185 * space. */
    186 size_t
    187 base64_decode_maxsize(size_t srclen)
    188 {
    189  tor_assert(srclen < INT_MAX / 3);
    190 
    191  return CEIL_DIV(srclen * 3, 4);
    192 }
    193 
    194 /** Internal table mapping 6 bit values to the Base64 alphabet. */
    195 static const char base64_encode_table[64] = {
    196  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
    197  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
    198  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
    199  'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
    200  'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
    201  'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
    202  'w', 'x', 'y', 'z', '0', '1', '2', '3',
    203  '4', '5', '6', '7', '8', '9', '+', '/'
    204 };
    205 
    206 /** Base64 encode <b>srclen</b> bytes of data from <b>src</b>.  Write
    207 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
    208 * bytes. Return the number of bytes written on success; -1 if
    209 * destlen is too short, or other failure.
    210 *
    211 * If <b>flags</b>&amp;BASE64_ENCODE_MULTILINE is true, return encoded
    212 * output in multiline format (64 character, `\n' terminated lines).
    213 */
    214 int
    215 base64_encode(char *dest, size_t destlen, const char *src, size_t srclen,
    216              int flags)
    217 {
    218  const unsigned char *usrc = (unsigned char *)src;
    219  const unsigned char *eous = usrc + srclen;
    220  char *d = dest;
    221  uint32_t n = 0;
    222  size_t linelen = 0;
    223  size_t enclen;
    224  int n_idx = 0;
    225 
    226  if (!src || !dest)
    227    return -1;
    228 
    229  /* Ensure that there is sufficient space, including the NUL. */
    230  enclen = base64_encode_size(srclen, flags);
    231  if (destlen < enclen + 1)
    232    return -1;
    233  if (destlen > SIZE_T_CEILING)
    234    return -1;
    235  if (enclen > INT_MAX)
    236    return -1;
    237 
    238  /* Make sure we leave no uninitialized data in the destination buffer. */
    239  memset(dest, 0, destlen);
    240 
    241  /* XXX/Yawning: If this ends up being too slow, this can be sped up
    242   * by separating the multiline format case and the normal case, and
    243   * processing 48 bytes of input at a time when newlines are desired.
    244   */
    245 #define ENCODE_CHAR(ch) \
    246  STMT_BEGIN                                                    \
    247    *d++ = ch;                                                  \
    248    if (flags & BASE64_ENCODE_MULTILINE) {                      \
    249      if (++linelen % BASE64_OPENSSL_LINELEN == 0) {            \
    250        linelen = 0;                                            \
    251        *d++ = '\n';                                            \
    252      }                                                         \
    253    }                                                           \
    254  STMT_END
    255 
    256 #define ENCODE_N(idx) \
    257  ENCODE_CHAR(base64_encode_table[(n >> ((3 - idx) * 6)) & 0x3f])
    258 
    259 #define ENCODE_PAD() ENCODE_CHAR('=')
    260 
    261  /* Iterate over all the bytes in src.  Each one will add 8 bits to the
    262   * value we're encoding.  Accumulate bits in <b>n</b>, and whenever we
    263   * have 24 bits, batch them into 4 bytes and flush those bytes to dest.
    264   */
    265  for ( ; usrc < eous; ++usrc) {
    266    n = (n << 8) | *usrc;
    267    if ((++n_idx) == 3) {
    268      ENCODE_N(0);
    269      ENCODE_N(1);
    270      ENCODE_N(2);
    271      ENCODE_N(3);
    272      n_idx = 0;
    273      n = 0;
    274    }
    275  }
    276  switch (n_idx) {
    277  case 0:
    278    /* 0 leftover bits, no padding to add. */
    279    break;
    280  case 1:
    281    /* 8 leftover bits, pad to 12 bits, write the 2 6-bit values followed
    282     * by 2 padding characters.
    283     */
    284    n <<= 4;
    285    ENCODE_N(2);
    286    ENCODE_N(3);
    287    ENCODE_PAD();
    288    ENCODE_PAD();
    289    break;
    290  case 2:
    291    /* 16 leftover bits, pad to 18 bits, write the 3 6-bit values followed
    292     * by 1 padding character.
    293     */
    294    n <<= 2;
    295    ENCODE_N(1);
    296    ENCODE_N(2);
    297    ENCODE_N(3);
    298    ENCODE_PAD();
    299    break;
    300  // LCOV_EXCL_START -- we can't reach this point, because we enforce
    301  // 0 <= ncov_idx < 3 in the loop above.
    302  default:
    303    /* Something went catastrophically wrong. */
    304    tor_fragile_assert();
    305    return -1;
    306  // LCOV_EXCL_STOP
    307  }
    308 
    309 #undef ENCODE_N
    310 #undef ENCODE_PAD
    311 #undef ENCODE_CHAR
    312 
    313  /* Multiline output always includes at least one newline. */
    314  if (flags & BASE64_ENCODE_MULTILINE && linelen != 0)
    315    *d++ = '\n';
    316 
    317  tor_assert(d - dest == (ptrdiff_t)enclen);
    318 
    319  *d++ = '\0'; /* NUL terminate the output. */
    320 
    321  return (int) enclen;
    322 }
    323 
    324 /** As base64_encode, but do not add any internal spaces, and remove external
    325 * padding from the output stream.
    326 * dest must be at least base64_encode_size(srclen, 0), including space for
    327 * the removed external padding. */
    328 int
    329 base64_encode_nopad(char *dest, size_t destlen,
    330                    const uint8_t *src, size_t srclen)
    331 {
    332  int n = base64_encode(dest, destlen, (const char*) src, srclen, 0);
    333  if (n <= 0)
    334    return n;
    335  tor_assert((size_t)n < destlen && dest[n] == 0);
    336  char *in, *out;
    337  in = out = dest;
    338  while (*in) {
    339    if (*in == '=' || *in == '\n') {
    340      ++in;
    341    } else {
    342      *out++ = *in++;
    343    }
    344  }
    345  *out = 0;
    346 
    347  tor_assert(out - dest <= INT_MAX);
    348 
    349  return (int)(out - dest);
    350 }
    351 
    352 #undef BASE64_OPENSSL_LINELEN
    353 
    354 /** @{ */
    355 /** Special values used for the base64_decode_table */
    356 #define X 255
    357 #define SP 64
    358 #define PAD 65
    359 /** @} */
    360 /** Internal table mapping byte values to what they represent in base64.
    361 * Numbers 0..63 are 6-bit integers.  SPs are spaces, and should be
    362 * skipped.  Xs are invalid and must not appear in base64. PAD indicates
    363 * end-of-string. */
    364 static const uint8_t base64_decode_table[256] = {
    365  X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X, /* */
    366  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    367  SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
    368  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
    369  X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
    370  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
    371  X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    372  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
    373  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    374  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    375  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    376  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    377  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    378  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    379  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    380  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
    381 };
    382 
    383 /** Base64 decode <b>srclen</b> bytes of data from <b>src</b>.  Write
    384 * the result into <b>dest</b>, if it will fit within <b>destlen</b>
    385 * bytes.  Return the number of bytes written on success; -1 if
    386 * destlen is too short, or other failure.
    387 *
    388 * NOTE 1: destlen is checked conservatively, as though srclen contained no
    389 * spaces or padding.
    390 *
    391 * NOTE 2: This implementation does not check for the correct number of
    392 * padding "=" characters at the end of the string, and does not check
    393 * for internal padding characters.
    394 */
    395 int
    396 base64_decode(char *dest, size_t destlen, const char *src, size_t srclen)
    397 {
    398  const char *eos = src+srclen;
    399  uint32_t n=0;
    400  int n_idx=0;
    401  size_t di = 0;
    402 
    403  if (destlen > INT_MAX)
    404    return -1;
    405 
    406  /* Make sure we leave no uninitialized data in the destination buffer. */
    407  memset(dest, 0, destlen);
    408 
    409  /* Iterate over all the bytes in src.  Each one will add 0 or 6 bits to the
    410   * value we're decoding.  Accumulate bits in <b>n</b>, and whenever we have
    411   * 24 bits, batch them into 3 bytes and flush those bytes to dest.
    412   */
    413  for ( ; src < eos; ++src) {
    414    unsigned char c = (unsigned char) *src;
    415    uint8_t v = base64_decode_table[c];
    416    switch (v) {
    417      case X:
    418        /* This character isn't allowed in base64. */
    419        return -1;
    420      case SP:
    421        /* This character is whitespace, and has no effect. */
    422        continue;
    423      case PAD:
    424        /* We've hit an = character: the data is over. */
    425        goto end_of_loop;
    426      default:
    427        /* We have an actual 6-bit value.  Append it to the bits in n. */
    428        n = (n<<6) | v;
    429        if ((++n_idx) == 4) {
    430          /* We've accumulated 24 bits in n. Flush them. */
    431          if (destlen < 3 || di > destlen - 3)
    432            return -1;
    433          dest[di++] = (n>>16);
    434          dest[di++] = (n>>8) & 0xff;
    435          dest[di++] = (n) & 0xff;
    436          n_idx = 0;
    437          n = 0;
    438        }
    439    }
    440  }
    441 end_of_loop:
    442  /* If we have leftover bits, we need to cope. */
    443  switch (n_idx) {
    444    case 0:
    445    default:
    446      /* No leftover bits.  We win. */
    447      break;
    448    case 1:
    449      /* 6 leftover bits. That's invalid; we can't form a byte out of that. */
    450      return -1;
    451    case 2:
    452      /* 12 leftover bits: The last 4 are padding and the first 8 are data. */
    453      if (destlen < 1 || di > destlen - 1)
    454        return -1;
    455      dest[di++] = n >> 4;
    456      break;
    457    case 3:
    458      /* 18 leftover bits: The last 2 are padding and the first 16 are data. */
    459      if (destlen < 2 || di > destlen - 2)
    460        return -1;
    461      dest[di++] = n >> 10;
    462      dest[di++] = n >> 2;
    463  }
    464 
    465  tor_assert(di <= destlen);
    466 
    467  return (int)di;
    468 }
    469 #undef X
    470 #undef SP
    471 #undef PAD
    472 
    473 /** Encode the <b>srclen</b> bytes at <b>src</b> in a NUL-terminated,
    474 * uppercase hexadecimal string; store it in the <b>destlen</b>-byte buffer
    475 * <b>dest</b>.
    476 */
    477 void
    478 base16_encode(char *dest, size_t destlen, const char *src, size_t srclen)
    479 {
    480  const char *end;
    481  char *cp;
    482 
    483  tor_assert(srclen < SIZE_T_CEILING / 2 - 1);
    484  tor_assert(destlen >= BASE16_BUFSIZE(srclen));
    485  tor_assert(destlen < SIZE_T_CEILING);
    486 
    487  /* Make sure we leave no uninitialized data in the destination buffer. */
    488  memset(dest, 0, destlen);
    489 
    490  cp = dest;
    491  end = src+srclen;
    492  while (src<end) {
    493    *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) >> 4 ];
    494    *cp++ = "0123456789ABCDEF"[ (*(const uint8_t*)src) & 0xf ];
    495    ++src;
    496  }
    497  *cp = '\0';
    498 }
    499 
    500 /** Given a hexadecimal string of <b>srclen</b> bytes in <b>src</b>, decode
    501 * it and store the result in the <b>destlen</b>-byte buffer at <b>dest</b>.
    502 * Return the number of bytes decoded on success, -1 on failure. If
    503 * <b>destlen</b> is greater than INT_MAX or less than half of
    504 * <b>srclen</b>, -1 is returned. */
    505 int
    506 base16_decode(char *dest, size_t destlen, const char *src, size_t srclen)
    507 {
    508  const char *end;
    509  char *dest_orig = dest;
    510  int v1,v2;
    511 
    512  if ((srclen % 2) != 0)
    513    return -1;
    514  if (destlen < srclen/2 || destlen > INT_MAX)
    515    return -1;
    516 
    517  /* Make sure we leave no uninitialized data in the destination buffer. */
    518  memset(dest, 0, destlen);
    519 
    520  end = src+srclen;
    521  while (src<end) {
    522    v1 = hex_decode_digit(*src);
    523    v2 = hex_decode_digit(*(src+1));
    524    if (v1<0||v2<0)
    525      return -1;
    526    *(uint8_t*)dest = (v1<<4)|v2;
    527    ++dest;
    528    src+=2;
    529  }
    530 
    531  tor_assert((dest-dest_orig) <= (ptrdiff_t) destlen);
    532 
    533  return (int) (dest-dest_orig);
    534 }