tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bytesinkutil.cpp (5086B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // bytesinkutil.cpp
      5 // created: 2017sep14 Markus W. Scherer
      6 
      7 #include "unicode/utypes.h"
      8 #include "unicode/bytestream.h"
      9 #include "unicode/edits.h"
     10 #include "unicode/stringoptions.h"
     11 #include "unicode/utf8.h"
     12 #include "unicode/utf16.h"
     13 #include "bytesinkutil.h"
     14 #include "charstr.h"
     15 #include "cmemory.h"
     16 #include "uassert.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 UBool
     21 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
     22                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
     23    if (U_FAILURE(errorCode)) { return false; }
     24    char scratch[200];
     25    int32_t s8Length = 0;
     26    for (int32_t i = 0; i < s16Length;) {
     27        int32_t capacity;
     28        int32_t desiredCapacity = s16Length - i;
     29        if (desiredCapacity < (INT32_MAX / 3)) {
     30            desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
     31        } else if (desiredCapacity < (INT32_MAX / 2)) {
     32            desiredCapacity *= 2;
     33        } else {
     34            desiredCapacity = INT32_MAX;
     35        }
     36        char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
     37                                            scratch, UPRV_LENGTHOF(scratch), &capacity);
     38        capacity -= U8_MAX_LENGTH - 1;
     39        int32_t j = 0;
     40        for (; i < s16Length && j < capacity;) {
     41            UChar32 c;
     42            U16_NEXT_UNSAFE(s16, i, c);
     43            U8_APPEND_UNSAFE(buffer, j, c);
     44        }
     45        if (j > (INT32_MAX - s8Length)) {
     46            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     47            return false;
     48        }
     49        sink.Append(buffer, j);
     50        s8Length += j;
     51    }
     52    if (edits != nullptr) {
     53        edits->addReplace(length, s8Length);
     54    }
     55    return true;
     56 }
     57 
     58 UBool
     59 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
     60                           const char16_t *s16, int32_t s16Length,
     61                           ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
     62    if (U_FAILURE(errorCode)) { return false; }
     63    if ((limit - s) > INT32_MAX) {
     64        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
     65        return false;
     66    }
     67    return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
     68 }
     69 
     70 void
     71 ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
     72    char s8[U8_MAX_LENGTH];
     73    int32_t s8Length = 0;
     74    U8_APPEND_UNSAFE(s8, s8Length, c);
     75    if (edits != nullptr) {
     76        edits->addReplace(length, s8Length);
     77    }
     78    sink.Append(s8, s8Length);
     79 }
     80 
     81 namespace {
     82 
     83 // See unicode/utf8.h U8_APPEND_UNSAFE().
     84 inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
     85 inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }
     86 
     87 }  // namespace
     88 
     89 void
     90 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
     91    U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
     92    char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
     93    sink.Append(s8, 2);
     94 }
     95 
     96 void
     97 ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
     98                                      ByteSink &sink, uint32_t options, Edits *edits) {
     99    U_ASSERT(length > 0);
    100    if (edits != nullptr) {
    101        edits->addUnchanged(length);
    102    }
    103    if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
    104        sink.Append(reinterpret_cast<const char *>(s), length);
    105    }
    106 }
    107 
    108 UBool
    109 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
    110                              ByteSink &sink, uint32_t options, Edits *edits,
    111                              UErrorCode &errorCode) {
    112    if (U_FAILURE(errorCode)) { return false; }
    113    if ((limit - s) > INT32_MAX) {
    114        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
    115        return false;
    116    }
    117    int32_t length = static_cast<int32_t>(limit - s);
    118    if (length > 0) {
    119        appendNonEmptyUnchanged(s, length, sink, options, edits);
    120    }
    121    return true;
    122 }
    123 
    124 CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
    125 }
    126 
    127 CharStringByteSink::~CharStringByteSink() = default;
    128 
    129 void
    130 CharStringByteSink::Append(const char* bytes, int32_t n) {
    131    UErrorCode status = U_ZERO_ERROR;
    132    dest_.append(bytes, n, status);
    133    // Any errors are silently ignored.
    134 }
    135 
    136 char*
    137 CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
    138                                    int32_t desired_capacity_hint,
    139                                    char* scratch,
    140                                    int32_t scratch_capacity,
    141                                    int32_t* result_capacity) {
    142    if (min_capacity < 1 || scratch_capacity < min_capacity) {
    143        *result_capacity = 0;
    144        return nullptr;
    145    }
    146 
    147    UErrorCode status = U_ZERO_ERROR;
    148    char* result = dest_.getAppendBuffer(
    149            min_capacity,
    150            desired_capacity_hint,
    151            *result_capacity,
    152            status);
    153    if (U_SUCCESS(status)) {
    154        return result;
    155    }
    156 
    157    *result_capacity = scratch_capacity;
    158    return scratch;
    159 }
    160 
    161 U_NAMESPACE_END