tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

esctrn.cpp (6827B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (c) 2001-2011, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   11/19/2001  aliu        Creation.
     10 **********************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_TRANSLITERATION
     16 
     17 #include "unicode/utf16.h"
     18 #include "esctrn.h"
     19 #include "util.h"
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 static const char16_t UNIPRE[] = {85,43,0}; // "U+"
     24 static const char16_t BS_u[] = {92,117,0}; // "\\u"
     25 static const char16_t BS_U[] = {92,85,0}; // "\\U"
     26 static const char16_t XMLPRE[] = {38,35,120,0}; // "&#x"
     27 static const char16_t XML10PRE[] = {38,35,0}; // "&#"
     28 static const char16_t PERLPRE[] = {92,120,123,0}; // "\\x{"
     29 static const char16_t SEMI[] = {59,0}; // ";"
     30 static const char16_t RBRACE[] = {125,0}; // "}"
     31 
     32 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
     33 
     34 /**
     35 * Factory methods
     36 */
     37 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
     38    // Unicode: "U+10FFFF" hex, min=4, max=6
     39    return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, nullptr);
     40 }
     41 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
     42    // Java: "\\uFFFF" hex, min=4, max=4
     43    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, nullptr);
     44 }
     45 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
     46    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
     47    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true,
     48             new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, nullptr));
     49 }
     50 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
     51    // XML: "" hex, min=1, max=6
     52    return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, nullptr);
     53 }
     54 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
     55    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
     56    return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, nullptr);
     57 }
     58 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
     59    // Perl: "\\x{263A}" hex, min=1, max=6
     60    return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, nullptr);
     61 }
     62 
     63 /**
     64 * Registers standard variants with the system.  Called by
     65 * Transliterator during initialization.
     66 */
     67 void EscapeTransliterator::registerIDs() {
     68    Token t = integerToken(0);
     69 
     70    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
     71 
     72    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
     73 
     74    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
     75 
     76    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
     77 
     78    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
     79 
     80    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
     81 
     82    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
     83 }
     84 
     85 /**
     86 * Constructs an escape transliterator with the given ID and
     87 * parameters.  See the class member documentation for details.
     88 */
     89 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
     90                         const UnicodeString& _prefix, const UnicodeString& _suffix,
     91                         int32_t _radix, int32_t _minDigits,
     92                         UBool _grokSupplementals,
     93                         EscapeTransliterator* adoptedSupplementalHandler) :
     94    Transliterator(newID, nullptr)
     95 {
     96    this->prefix = _prefix;
     97    this->suffix = _suffix;
     98    this->radix = _radix;
     99    this->minDigits = _minDigits;
    100    this->grokSupplementals = _grokSupplementals;
    101    this->supplementalHandler = adoptedSupplementalHandler;
    102 }
    103 
    104 /**
    105 * Copy constructor.
    106 */
    107 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
    108    Transliterator(o),
    109    prefix(o.prefix),
    110    suffix(o.suffix),
    111    radix(o.radix),
    112    minDigits(o.minDigits),
    113    grokSupplementals(o.grokSupplementals) {
    114    supplementalHandler = o.supplementalHandler != nullptr ?
    115        new EscapeTransliterator(*o.supplementalHandler) : nullptr;
    116 }
    117 
    118 EscapeTransliterator::~EscapeTransliterator() {
    119    delete supplementalHandler;
    120 }
    121 
    122 /**
    123 * Transliterator API.
    124 */
    125 EscapeTransliterator* EscapeTransliterator::clone() const {
    126    return new EscapeTransliterator(*this);
    127 }
    128 
    129 /**
    130 * Implements {@link Transliterator#handleTransliterate}.
    131 */
    132 void EscapeTransliterator::handleTransliterate(Replaceable& text,
    133                                               UTransPosition& pos,
    134                                               UBool /*isIncremental*/) const
    135 {
    136    /* TODO: Verify that isIncremental can be ignored */
    137    int32_t start = pos.start;
    138    int32_t limit = pos.limit;
    139 
    140    UnicodeString buf(prefix);
    141    int32_t prefixLen = prefix.length();
    142    UBool redoPrefix = false;
    143 
    144    while (start < limit) {
    145        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
    146        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
    147 
    148        if ((c & 0xFFFF0000) != 0 && supplementalHandler != nullptr) {
    149            buf.truncate(0);
    150            buf.append(supplementalHandler->prefix);
    151            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
    152                                  supplementalHandler->minDigits);
    153            buf.append(supplementalHandler->suffix);
    154            redoPrefix = true;
    155        } else {
    156            if (redoPrefix) {
    157                buf.truncate(0);
    158                buf.append(prefix);
    159                redoPrefix = false;
    160            } else {
    161                buf.truncate(prefixLen);
    162            }
    163            ICU_Utility::appendNumber(buf, c, radix, minDigits);
    164            buf.append(suffix);
    165        }
    166 
    167        text.handleReplaceBetween(start, start + charLen, buf);
    168        start += buf.length();
    169        limit += buf.length() - charLen;
    170    }
    171 
    172    pos.contextLimit += limit - pos.limit;
    173    pos.limit = limit;
    174    pos.start = start;
    175 }
    176 
    177 U_NAMESPACE_END
    178 
    179 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
    180 
    181 //eof