tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ruleiter.cpp (4502B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 * Copyright (c) 2003-2011, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 * Author: Alan Liu
      9 * Created: September 24 2003
     10 * Since: ICU 2.8
     11 **********************************************************************
     12 */
     13 #include "ruleiter.h"
     14 #include "unicode/parsepos.h"
     15 #include "unicode/symtable.h"
     16 #include "unicode/unistr.h"
     17 #include "unicode/utf16.h"
     18 #include "patternprops.h"
     19 
     20 /* \U87654321 or \ud800\udc00 */
     21 #define MAX_U_NOTATION_LEN 12
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
     26                      ParsePosition& thePos) :
     27    text(theText),
     28    pos(thePos),
     29    sym(theSym),
     30    buf(nullptr),
     31    bufPos(0)
     32 {}
     33 
     34 UBool RuleCharacterIterator::atEnd() const {
     35    return buf == nullptr && pos.getIndex() == text.length();
     36 }
     37 
     38 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
     39    if (U_FAILURE(ec)) return DONE;
     40 
     41    UChar32 c = DONE;
     42    isEscaped = false;
     43 
     44    for (;;) {
     45        c = _current();
     46        _advance(U16_LENGTH(c));
     47 
     48        if (c == SymbolTable::SYMBOL_REF && buf == nullptr &&
     49            (options & PARSE_VARIABLES) != 0 && sym != nullptr) {
     50            UnicodeString name = sym->parseReference(text, pos, text.length());
     51            // If name is empty there was an isolated SYMBOL_REF;
     52            // return it.  Caller must be prepared for this.
     53            if (name.length() == 0) {
     54                break;
     55            }
     56            bufPos = 0;
     57            buf = sym->lookup(name);
     58            if (buf == nullptr) {
     59                ec = U_UNDEFINED_VARIABLE;
     60                return DONE;
     61            }
     62            // Handle empty variable value
     63            if (buf->length() == 0) {
     64                buf = nullptr;
     65            }
     66            continue;
     67        }
     68 
     69        if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
     70            continue;
     71        }
     72 
     73        if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
     74            UnicodeString tempEscape;
     75            int32_t offset = 0;
     76            c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
     77            jumpahead(offset);
     78            isEscaped = true;
     79            if (c < 0) {
     80                ec = U_MALFORMED_UNICODE_ESCAPE;
     81                return DONE;
     82            }
     83        }
     84 
     85        break;
     86    }
     87 
     88    return c;
     89 }
     90 
     91 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
     92    p.buf = buf;
     93    p.pos = pos.getIndex();
     94    p.bufPos = bufPos;
     95 }
     96 
     97 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
     98    buf = p.buf;
     99    pos.setIndex(p.pos);
    100    bufPos = p.bufPos;
    101 }
    102 
    103 void RuleCharacterIterator::skipIgnored(int32_t options) {
    104    if ((options & SKIP_WHITESPACE) != 0) {
    105        for (;;) {
    106            UChar32 a = _current();
    107            if (!PatternProps::isWhiteSpace(a)) break;
    108            _advance(U16_LENGTH(a));
    109        }
    110    }
    111 }
    112 
    113 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
    114    if (maxLookAhead < 0) {
    115        maxLookAhead = 0x7FFFFFFF;
    116    }
    117    if (buf != nullptr) {
    118        buf->extract(bufPos, maxLookAhead, result);
    119    } else {
    120        text.extract(pos.getIndex(), maxLookAhead, result);
    121    }
    122    return result;
    123 }
    124 
    125 void RuleCharacterIterator::jumpahead(int32_t count) {
    126    _advance(count);
    127 }
    128 
    129 /*
    130 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
    131    int32_t b = pos.getIndex();
    132    text.extract(0, b, result);
    133    return result.append((char16_t) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
    134 }
    135 */
    136 
    137 UChar32 RuleCharacterIterator::_current() const {
    138    if (buf != nullptr) {
    139        return buf->char32At(bufPos);
    140    } else {
    141        int i = pos.getIndex();
    142        return (i < text.length()) ? text.char32At(i) : static_cast<UChar32>(DONE);
    143    }
    144 }
    145 
    146 void RuleCharacterIterator::_advance(int32_t count) {
    147    if (buf != nullptr) {
    148        bufPos += count;
    149        if (bufPos == buf->length()) {
    150            buf = nullptr;
    151        }
    152    } else {
    153        pos.setIndex(pos.getIndex() + count);
    154        if (pos.getIndex() > text.length()) {
    155            pos.setIndex(text.length());
    156        }
    157    }
    158 }
    159 
    160 U_NAMESPACE_END
    161 
    162 //eof