tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

casetrn.cpp (5083B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2001-2011, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  casetrn.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2004sep03
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Implementation class for lower-/upper-/title-casing transliterators.
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 
     23 #if !UCONFIG_NO_TRANSLITERATION
     24 
     25 #include "unicode/uchar.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/utf.h"
     28 #include "unicode/utf16.h"
     29 #include "tolowtrn.h"
     30 #include "ucase.h"
     31 #include "cpputils.h"
     32 
     33 /* case context iterator using a Replaceable */
     34 U_CFUNC UChar32 U_CALLCONV
     35 utrans_rep_caseContextIterator(void *context, int8_t dir)
     36 {
     37    U_NAMESPACE_USE
     38 
     39    UCaseContext *csc=(UCaseContext *)context;
     40    Replaceable *rep=(Replaceable *)csc->p;
     41    UChar32 c;
     42 
     43    if(dir<0) {
     44        /* reset for backward iteration */
     45        csc->index=csc->cpStart;
     46        csc->dir=dir;
     47    } else if(dir>0) {
     48        /* reset for forward iteration */
     49        csc->index=csc->cpLimit;
     50        csc->dir=dir;
     51    } else {
     52        /* continue current iteration direction */
     53        dir=csc->dir;
     54    }
     55 
     56    // automatically adjust start and limit if the Replaceable disagrees
     57    // with the original values
     58    if(dir<0) {
     59        if(csc->start<csc->index) {
     60            c=rep->char32At(csc->index-1);
     61            if(c<0) {
     62                csc->start=csc->index;
     63            } else {
     64                csc->index-=U16_LENGTH(c);
     65                return c;
     66            }
     67        }
     68    } else {
     69        // detect, and store in csc->b1, if we hit the limit
     70        if(csc->index<csc->limit) {
     71            c=rep->char32At(csc->index);
     72            if(c<0) {
     73                csc->limit=csc->index;
     74                csc->b1=true;
     75            } else {
     76                csc->index+=U16_LENGTH(c);
     77                return c;
     78            }
     79        } else {
     80            csc->b1=true;
     81        }
     82    }
     83    return U_SENTINEL;
     84 }
     85 
     86 U_NAMESPACE_BEGIN
     87 
     88 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator)
     89 
     90 /**
     91 * Constructs a transliterator.
     92 */
     93 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : 
     94    Transliterator(id, nullptr),
     95    fMap(map)
     96 {
     97    // TODO test incremental mode with context-sensitive text (e.g. greek sigma)
     98    // TODO need to call setMaximumContextLength()?!
     99 }
    100 
    101 /**
    102 * Destructor.
    103 */
    104 CaseMapTransliterator::~CaseMapTransliterator() {
    105 }
    106 
    107 /**
    108 * Copy constructor.
    109 */
    110 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) :
    111    Transliterator(o),
    112    fMap(o.fMap)
    113 {
    114 }
    115 
    116 /**
    117 * Assignment operator.
    118 */
    119 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) {
    120    Transliterator::operator=(o);
    121    fMap = o.fMap;
    122    return *this;
    123 }*/
    124 
    125 /**
    126 * Transliterator API.
    127 */
    128 /*CaseMapTransliterator* CaseMapTransliterator::clone() const {
    129    return new CaseMapTransliterator(*this);
    130 }*/
    131 
    132 /**
    133 * Implements {@link Transliterator#handleTransliterate}.
    134 */
    135 void CaseMapTransliterator::handleTransliterate(Replaceable& text,
    136                                 UTransPosition& offsets, 
    137                                 UBool isIncremental) const
    138 {
    139    if (offsets.start >= offsets.limit) {
    140        return;
    141    }
    142 
    143    UCaseContext csc;
    144    uprv_memset(&csc, 0, sizeof(csc));
    145    csc.p = &text;
    146    csc.start = offsets.contextStart;
    147    csc.limit = offsets.contextLimit;
    148 
    149    UnicodeString tmp;
    150    const char16_t *s;
    151    UChar32 c;
    152    int32_t textPos, delta, result;
    153 
    154    for(textPos=offsets.start; textPos<offsets.limit;) {
    155        csc.cpStart=textPos;
    156        c=text.char32At(textPos);
    157        csc.cpLimit=textPos+=U16_LENGTH(c);
    158 
    159        result=fMap(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
    160 
    161        if(csc.b1 && isIncremental) {
    162            // fMap() tried to look beyond the context limit
    163            // wait for more input
    164            offsets.start=csc.cpStart;
    165            return;
    166        }
    167 
    168        if(result>=0) {
    169            // replace the current code point with its full case mapping result
    170            // see UCASE_MAX_STRING_LENGTH
    171            if(result<=UCASE_MAX_STRING_LENGTH) {
    172                // string s[result]
    173                tmp.setTo(false, s, result);
    174                delta=result-U16_LENGTH(c);
    175            } else {
    176                // single code point
    177                tmp.setTo(result);
    178                delta=tmp.length()-U16_LENGTH(c);
    179            }
    180            text.handleReplaceBetween(csc.cpStart, textPos, tmp);
    181            if(delta!=0) {
    182                textPos+=delta;
    183                csc.limit=offsets.contextLimit+=delta;
    184                offsets.limit+=delta;
    185            }
    186        }
    187    }
    188    offsets.start=textPos;
    189 }
    190 
    191 U_NAMESPACE_END
    192 
    193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */