tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ubrk.cpp (9047B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ********************************************************************************
      5 *   Copyright (C) 1996-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ********************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/ubrk.h"
     15 
     16 #include "unicode/brkiter.h"
     17 #include "unicode/uloc.h"
     18 #include "unicode/ustring.h"
     19 #include "unicode/uchriter.h"
     20 #include "unicode/rbbi.h"
     21 #include "rbbirb.h"
     22 #include "uassert.h"
     23 #include "cmemory.h"
     24 
     25 U_NAMESPACE_USE
     26 
     27 //------------------------------------------------------------------------------
     28 //
     29 //    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
     30 //                   and locale.
     31 //
     32 //------------------------------------------------------------------------------
     33 U_CAPI UBreakIterator* U_EXPORT2
     34 ubrk_open(UBreakIteratorType type,
     35      const char *locale,
     36      const char16_t *text,
     37      int32_t textLength,
     38      UErrorCode *status)
     39 {
     40 
     41  if (U_FAILURE(*status)) return nullptr;
     42 
     43  BreakIterator *result = nullptr;
     44 
     45  switch(type) {
     46 
     47  case UBRK_CHARACTER:
     48    result = BreakIterator::createCharacterInstance(Locale(locale), *status);
     49    break;
     50 
     51  case UBRK_WORD:
     52    result = BreakIterator::createWordInstance(Locale(locale), *status);
     53    break;
     54 
     55  case UBRK_LINE:
     56    result = BreakIterator::createLineInstance(Locale(locale), *status);
     57    break;
     58 
     59  case UBRK_SENTENCE:
     60    result = BreakIterator::createSentenceInstance(Locale(locale), *status);
     61    break;
     62 
     63  case UBRK_TITLE:
     64    result = BreakIterator::createTitleInstance(Locale(locale), *status);
     65    break;
     66 
     67  default:
     68    *status = U_ILLEGAL_ARGUMENT_ERROR;
     69  }
     70 
     71  // check for allocation error
     72  if (U_FAILURE(*status)) {
     73    return nullptr;
     74  }
     75  if (result == nullptr) {
     76    *status = U_MEMORY_ALLOCATION_ERROR;
     77    return nullptr;
     78  }
     79 
     80 
     81  UBreakIterator *uBI = (UBreakIterator *)result;
     82  if (text != nullptr) {
     83      ubrk_setText(uBI, text, textLength, status);
     84  }
     85  return uBI;
     86 }
     87 
     88 
     89 
     90 //------------------------------------------------------------------------------
     91 //
     92 //   ubrk_openRules      open a break iterator from a set of break rules.
     93 //                       Invokes the rule builder.
     94 //
     95 //------------------------------------------------------------------------------
     96 U_CAPI UBreakIterator* U_EXPORT2
     97 ubrk_openRules(  const char16_t     *rules,
     98                       int32_t       rulesLength,
     99                 const char16_t     *text,
    100                       int32_t       textLength,
    101                       UParseError  *parseErr,
    102                       UErrorCode   *status)  {
    103 
    104    if (status == nullptr || U_FAILURE(*status)){
    105        return nullptr;
    106    }
    107 
    108    BreakIterator *result = nullptr;
    109    UnicodeString ruleString(rules, rulesLength);
    110    result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
    111    if(U_FAILURE(*status)) {
    112        return nullptr;
    113    }
    114 
    115    UBreakIterator *uBI = (UBreakIterator *)result;
    116    if (text != nullptr) {
    117        ubrk_setText(uBI, text, textLength, status);
    118    }
    119    return uBI;
    120 }
    121 
    122 
    123 U_CAPI UBreakIterator* U_EXPORT2
    124 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
    125                     const char16_t *  text, int32_t textLength,
    126                     UErrorCode *   status)
    127 {
    128    if (U_FAILURE(*status)) {
    129        return nullptr;
    130    }
    131    if (rulesLength < 0) {
    132        *status = U_ILLEGAL_ARGUMENT_ERROR;
    133        return nullptr;
    134    }
    135    LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
    136    if (U_FAILURE(*status)) {
    137        return nullptr;
    138    }
    139    UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
    140    if (text != nullptr) {
    141        ubrk_setText(uBI, text, textLength, status);
    142    }
    143    return uBI;
    144 }
    145 
    146 
    147 U_CAPI UBreakIterator * U_EXPORT2
    148 ubrk_safeClone(
    149          const UBreakIterator *bi,
    150          void * /*stackBuffer*/,
    151          int32_t *pBufferSize,
    152          UErrorCode *status)
    153 {
    154    if (status == nullptr || U_FAILURE(*status)){
    155        return nullptr;
    156    }
    157    if (bi == nullptr) {
    158       *status = U_ILLEGAL_ARGUMENT_ERROR;
    159        return nullptr;
    160    }
    161    if (pBufferSize != nullptr) {
    162        int32_t inputSize = *pBufferSize;
    163        *pBufferSize = 1;
    164        if (inputSize == 0) {
    165            return nullptr;  // preflighting for deprecated functionality
    166        }
    167    }
    168    BreakIterator *newBI = ((BreakIterator *)bi)->clone();
    169    if (newBI == nullptr) {
    170        *status = U_MEMORY_ALLOCATION_ERROR;
    171    } else if (pBufferSize != nullptr) {
    172        *status = U_SAFECLONE_ALLOCATED_WARNING;
    173    }
    174    return (UBreakIterator *)newBI;
    175 }
    176 
    177 U_CAPI UBreakIterator * U_EXPORT2
    178 ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
    179    return ubrk_safeClone(bi, nullptr, nullptr, status);
    180 }
    181 
    182 
    183 U_CAPI void U_EXPORT2
    184 ubrk_close(UBreakIterator *bi)
    185 {
    186    delete (BreakIterator *)bi;
    187 }
    188 
    189 U_CAPI void U_EXPORT2
    190 ubrk_setText(UBreakIterator* bi,
    191             const char16_t*    text,
    192             int32_t         textLength,
    193             UErrorCode*     status)
    194 {
    195    UText  ut = UTEXT_INITIALIZER;
    196    utext_openUChars(&ut, text, textLength, status);
    197    ((BreakIterator*)bi)->setText(&ut, *status);
    198    // A stack allocated UText wrapping a char16_t * string
    199    //   can be dumped without explicitly closing it.
    200 }
    201 
    202 
    203 
    204 U_CAPI void U_EXPORT2
    205 ubrk_setUText(UBreakIterator *bi,
    206             UText          *text,
    207             UErrorCode     *status)
    208 {
    209  ((BreakIterator*)bi)->setText(text, *status);
    210 }
    211 
    212 
    213 
    214 
    215 
    216 U_CAPI int32_t U_EXPORT2
    217 ubrk_current(const UBreakIterator *bi)
    218 {
    219 
    220  return ((BreakIterator*)bi)->current();
    221 }
    222 
    223 U_CAPI int32_t U_EXPORT2
    224 ubrk_next(UBreakIterator *bi)
    225 {
    226 
    227  return ((BreakIterator*)bi)->next();
    228 }
    229 
    230 U_CAPI int32_t U_EXPORT2
    231 ubrk_previous(UBreakIterator *bi)
    232 {
    233 
    234  return ((BreakIterator*)bi)->previous();
    235 }
    236 
    237 U_CAPI int32_t U_EXPORT2
    238 ubrk_first(UBreakIterator *bi)
    239 {
    240 
    241  return ((BreakIterator*)bi)->first();
    242 }
    243 
    244 U_CAPI int32_t U_EXPORT2
    245 ubrk_last(UBreakIterator *bi)
    246 {
    247 
    248  return ((BreakIterator*)bi)->last();
    249 }
    250 
    251 U_CAPI int32_t U_EXPORT2
    252 ubrk_preceding(UBreakIterator *bi,
    253           int32_t offset)
    254 {
    255 
    256  return ((BreakIterator*)bi)->preceding(offset);
    257 }
    258 
    259 U_CAPI int32_t U_EXPORT2
    260 ubrk_following(UBreakIterator *bi,
    261           int32_t offset)
    262 {
    263 
    264  return ((BreakIterator*)bi)->following(offset);
    265 }
    266 
    267 U_CAPI const char* U_EXPORT2
    268 ubrk_getAvailable(int32_t index)
    269 {
    270 
    271  return uloc_getAvailable(index);
    272 }
    273 
    274 U_CAPI int32_t U_EXPORT2
    275 ubrk_countAvailable()
    276 {
    277 
    278  return uloc_countAvailable();
    279 }
    280 
    281 
    282 U_CAPI  UBool U_EXPORT2
    283 ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
    284 {
    285    return ((BreakIterator*)bi)->isBoundary(offset);
    286 }
    287 
    288 
    289 U_CAPI  int32_t U_EXPORT2
    290 ubrk_getRuleStatus(UBreakIterator *bi)
    291 {
    292    return ((BreakIterator*)bi)->getRuleStatus();
    293 }
    294 
    295 U_CAPI  int32_t U_EXPORT2
    296 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
    297 {
    298    return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
    299 }
    300 
    301 
    302 U_CAPI const char* U_EXPORT2
    303 ubrk_getLocaleByType(const UBreakIterator *bi,
    304                     ULocDataLocaleType type,
    305                     UErrorCode* status)
    306 {
    307    if (bi == nullptr) {
    308        if (U_SUCCESS(*status)) {
    309            *status = U_ILLEGAL_ARGUMENT_ERROR;
    310        }
    311        return nullptr;
    312    }
    313    return ((BreakIterator*)bi)->getLocaleID(type, *status);
    314 }
    315 
    316 
    317 U_CAPI void U_EXPORT2
    318 ubrk_refreshUText(UBreakIterator *bi,
    319                       UText          *text,
    320                       UErrorCode     *status)
    321 {
    322    BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
    323    bii->refreshInputText(text, *status);
    324 }
    325 
    326 U_CAPI int32_t U_EXPORT2
    327 ubrk_getBinaryRules(UBreakIterator *bi,
    328                    uint8_t *       binaryRules, int32_t rulesCapacity,
    329                    UErrorCode *    status)
    330 {
    331    if (U_FAILURE(*status)) {
    332        return 0;
    333    }
    334    if ((binaryRules == nullptr && rulesCapacity > 0) || rulesCapacity < 0) {
    335        *status = U_ILLEGAL_ARGUMENT_ERROR;
    336        return 0;
    337    }
    338    RuleBasedBreakIterator* rbbi;
    339    if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == nullptr) {
    340        *status = U_ILLEGAL_ARGUMENT_ERROR;
    341        return 0;
    342    }
    343    uint32_t rulesLength;
    344    const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
    345    if (rulesLength > INT32_MAX) {
    346        *status = U_INDEX_OUTOFBOUNDS_ERROR;
    347        return 0;
    348    }
    349    if (binaryRules != nullptr) { // if not preflighting
    350        // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
    351        if ((int32_t)rulesLength > rulesCapacity) {
    352            *status = U_BUFFER_OVERFLOW_ERROR;
    353        } else {
    354            uprv_memcpy(binaryRules, returnedRules, rulesLength);
    355        }
    356    }
    357    return (int32_t)rulesLength;
    358 }
    359 
    360 
    361 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */