tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

titletrn.cpp (5458B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2011, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   Date        Name        Description
      9 *   05/24/01    aliu        Creation.
     10 **********************************************************************
     11 */
     12 
     13 #include "unicode/utypes.h"
     14 
     15 #if !UCONFIG_NO_TRANSLITERATION
     16 
     17 #include "unicode/uchar.h"
     18 #include "unicode/uniset.h"
     19 #include "unicode/ustring.h"
     20 #include "unicode/utf16.h"
     21 #include "titletrn.h"
     22 #include "umutex.h"
     23 #include "ucase.h"
     24 #include "cpputils.h"
     25 
     26 U_NAMESPACE_BEGIN
     27 
     28 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
     29 
     30 TitlecaseTransliterator::TitlecaseTransliterator() :
     31    CaseMapTransliterator(UNICODE_STRING("Any-Title", 9), nullptr)
     32 {
     33    // Need to look back 2 characters in the case of "can't"
     34    setMaximumContextLength(2);
     35 }
     36 
     37 /**
     38 * Destructor.
     39 */
     40 TitlecaseTransliterator::~TitlecaseTransliterator() {
     41 }
     42 
     43 /**
     44 * Copy constructor.
     45 */
     46 TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
     47    CaseMapTransliterator(o)
     48 {
     49 }
     50 
     51 /**
     52 * Assignment operator.
     53 */
     54 /*TitlecaseTransliterator& TitlecaseTransliterator::operator=(
     55                             const TitlecaseTransliterator& o) {
     56    CaseMapTransliterator::operator=(o);
     57    return *this;
     58 }*/
     59 
     60 /**
     61 * Transliterator API.
     62 */
     63 TitlecaseTransliterator* TitlecaseTransliterator::clone() const {
     64    return new TitlecaseTransliterator(*this);
     65 }
     66 
     67 /**
     68 * Implements {@link Transliterator#handleTransliterate}.
     69 */
     70 void TitlecaseTransliterator::handleTransliterate(
     71                                  Replaceable& text, UTransPosition& offsets,
     72                                  UBool isIncremental) const
     73 {
     74    // TODO reimplement, see ustrcase.c
     75    // using a real word break iterator
     76    //   instead of just looking for a transition between cased and uncased characters
     77    // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
     78    // needs to take isIncremental into account because case mappings are context-sensitive
     79    //   also detect when lowercasing function did not finish because of context
     80 
     81    if (offsets.start >= offsets.limit) {
     82        return;
     83    }
     84 
     85    // case type: >0 cased (UCASE_LOWER etc.)  ==0 uncased  <0 case-ignorable
     86    int32_t type;
     87 
     88    // Our mode; we are either converting letter toTitle or
     89    // toLower.
     90    UBool doTitle = true;
     91    
     92    // Determine if there is a preceding context of cased case-ignorable*,
     93    // in which case we want to start in toLower mode.  If the
     94    // prior context is anything else (including empty) then start
     95    // in toTitle mode.
     96    UChar32 c;
     97    int32_t start;
     98    for (start = offsets.start - 1; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
     99        c = text.char32At(start);
    100        type=ucase_getTypeOrIgnorable(c);
    101        if(type>0) { // cased
    102            doTitle=false;
    103            break;
    104        } else if(type==0) { // uncased but not ignorable
    105            break;
    106        }
    107        // else (type<0) case-ignorable: continue
    108    }
    109    
    110    // Convert things after a cased character toLower; things
    111    // after an uncased, non-case-ignorable character toTitle.  Case-ignorable
    112    // characters are copied directly and do not change the mode.
    113    UCaseContext csc;
    114    uprv_memset(&csc, 0, sizeof(csc));
    115    csc.p = &text;
    116    csc.start = offsets.contextStart;
    117    csc.limit = offsets.contextLimit;
    118 
    119    UnicodeString tmp;
    120    const char16_t *s;
    121    int32_t textPos, delta, result;
    122 
    123    for(textPos=offsets.start; textPos<offsets.limit;) {
    124        csc.cpStart=textPos;
    125        c=text.char32At(textPos);
    126        csc.cpLimit=textPos+=U16_LENGTH(c);
    127 
    128        type=ucase_getTypeOrIgnorable(c);
    129        if(type>=0) { // not case-ignorable
    130            if(doTitle) {
    131                result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
    132            } else {
    133                result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
    134            }
    135            doTitle = static_cast<UBool>(type == 0); // doTitle=isUncased
    136 
    137            if(csc.b1 && isIncremental) {
    138                // fMap() tried to look beyond the context limit
    139                // wait for more input
    140                offsets.start=csc.cpStart;
    141                return;
    142            }
    143 
    144            if(result>=0) {
    145                // replace the current code point with its full case mapping result
    146                // see UCASE_MAX_STRING_LENGTH
    147                if(result<=UCASE_MAX_STRING_LENGTH) {
    148                    // string s[result]
    149                    tmp.setTo(false, s, result);
    150                    delta=result-U16_LENGTH(c);
    151                } else {
    152                    // single code point
    153                    tmp.setTo(result);
    154                    delta=tmp.length()-U16_LENGTH(c);
    155                }
    156                text.handleReplaceBetween(csc.cpStart, textPos, tmp);
    157                if(delta!=0) {
    158                    textPos+=delta;
    159                    csc.limit=offsets.contextLimit+=delta;
    160                    offsets.limit+=delta;
    161                }
    162            }
    163        }
    164    }
    165    offsets.start=textPos;
    166 }
    167 
    168 U_NAMESPACE_END
    169 
    170 #endif /* #if !UCONFIG_NO_TRANSLITERATION */