tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uregex.cpp (69297B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2004-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  uregex.cpp
      9 */
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
     14 
     15 #include "unicode/regex.h"
     16 #include "unicode/uregex.h"
     17 #include "unicode/unistr.h"
     18 #include "unicode/ustring.h"
     19 #include "unicode/uchar.h"
     20 #include "unicode/uobject.h"
     21 #include "unicode/utf16.h"
     22 #include "cmemory.h"
     23 #include "uassert.h"
     24 #include "uhash.h"
     25 #include "umutex.h"
     26 #include "uvectr32.h"
     27 
     28 #include "regextxt.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
     33 
     34 struct RegularExpression: public UMemory {
     35 public:
     36    RegularExpression();
     37    ~RegularExpression();
     38    int32_t           fMagic;
     39    RegexPattern     *fPat;
     40    u_atomic_int32_t *fPatRefCount;
     41    char16_t         *fPatString;
     42    int32_t           fPatStringLen;
     43    RegexMatcher     *fMatcher;
     44    const char16_t   *fText;         // Text from setText()
     45    int32_t           fTextLength;   // Length provided by user with setText(), which
     46                                     //  may be -1.
     47    UBool             fOwnsText;
     48 };
     49 
     50 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
     51 
     52 RegularExpression::RegularExpression() {
     53    fMagic        = REXP_MAGIC;
     54    fPat          = nullptr;
     55    fPatRefCount  = nullptr;
     56    fPatString    = nullptr;
     57    fPatStringLen = 0;
     58    fMatcher      = nullptr;
     59    fText         = nullptr;
     60    fTextLength   = 0;
     61    fOwnsText     = false;
     62 }
     63 
     64 RegularExpression::~RegularExpression() {
     65    delete fMatcher;
     66    fMatcher = nullptr;
     67    if (fPatRefCount!=nullptr && umtx_atomic_dec(fPatRefCount)==0) {
     68        delete fPat;
     69        uprv_free(fPatString);
     70        uprv_free((void *)fPatRefCount);
     71    }
     72    if (fOwnsText && fText!=nullptr) {
     73        uprv_free((void *)fText);
     74    }
     75    fMagic = 0;
     76 }
     77 
     78 U_NAMESPACE_END
     79 
     80 U_NAMESPACE_USE
     81 
     82 //----------------------------------------------------------------------------------------
     83 //
     84 //   validateRE    Do boilerplate style checks on API function parameters.
     85 //                 Return true if they look OK.
     86 //----------------------------------------------------------------------------------------
     87 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
     88    if (U_FAILURE(*status)) {
     89        return false;
     90    }
     91    if (re == nullptr || re->fMagic != REXP_MAGIC) {
     92        *status = U_ILLEGAL_ARGUMENT_ERROR;
     93        return false;
     94    }
     95    // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
     96    if (requiresText && re->fText == nullptr && !re->fOwnsText) {
     97        *status = U_REGEX_INVALID_STATE;
     98        return false;
     99    }
    100    return true;
    101 }
    102 
    103 //----------------------------------------------------------------------------------------
    104 //
    105 //    uregex_open
    106 //
    107 //----------------------------------------------------------------------------------------
    108 U_CAPI URegularExpression *  U_EXPORT2
    109 uregex_open( const  char16_t       *pattern,
    110                    int32_t         patternLength,
    111                    uint32_t        flags,
    112                    UParseError    *pe,
    113                    UErrorCode     *status) {
    114 
    115    if (U_FAILURE(*status)) {
    116        return nullptr;
    117    }
    118    if (pattern == nullptr || patternLength < -1 || patternLength == 0) {
    119        *status = U_ILLEGAL_ARGUMENT_ERROR;
    120        return nullptr;
    121    }
    122    int32_t actualPatLen = patternLength;
    123    if (actualPatLen == -1) {
    124        actualPatLen = u_strlen(pattern);
    125    }
    126 
    127    RegularExpression  *re     = new RegularExpression;
    128    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    129    char16_t           *patBuf = (char16_t *)uprv_malloc(sizeof(char16_t)*(actualPatLen+1));
    130    if (re == nullptr || refC == nullptr || patBuf == nullptr) {
    131        *status = U_MEMORY_ALLOCATION_ERROR;
    132        delete re;
    133        uprv_free((void *)refC);
    134        uprv_free(patBuf);
    135        return nullptr;
    136    }
    137    re->fPatRefCount = refC;
    138    *re->fPatRefCount = 1;
    139 
    140    //
    141    // Make a copy of the pattern string, so we can return it later if asked.
    142    //    For compiling the pattern, we will use a UText wrapper around
    143    //    this local copy, to avoid making even more copies.
    144    //
    145    re->fPatString    = patBuf;
    146    re->fPatStringLen = patternLength;
    147    u_memcpy(patBuf, pattern, actualPatLen);
    148    patBuf[actualPatLen] = 0;
    149 
    150    UText patText = UTEXT_INITIALIZER;
    151    utext_openUChars(&patText, patBuf, patternLength, status);
    152 
    153    //
    154    // Compile the pattern
    155    //
    156    if (pe != nullptr) {
    157        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    158    } else {
    159        re->fPat = RegexPattern::compile(&patText, flags, *status);
    160    }
    161    utext_close(&patText);
    162 
    163    if (U_FAILURE(*status)) {
    164        goto ErrorExit;
    165    }
    166 
    167    //
    168    // Create the matcher object
    169    //
    170    re->fMatcher = re->fPat->matcher(*status);
    171    if (U_SUCCESS(*status)) {
    172        return (URegularExpression*)re;
    173    }
    174 
    175 ErrorExit:
    176    delete re;
    177    return nullptr;
    178 
    179 }
    180 
    181 //----------------------------------------------------------------------------------------
    182 //
    183 //    uregex_openUText
    184 //
    185 //----------------------------------------------------------------------------------------
    186 U_CAPI URegularExpression *  U_EXPORT2
    187 uregex_openUText(UText          *pattern,
    188                 uint32_t        flags,
    189                 UParseError    *pe,
    190                 UErrorCode     *status) {
    191 
    192    if (U_FAILURE(*status)) {
    193        return nullptr;
    194    }
    195    if (pattern == nullptr) {
    196        *status = U_ILLEGAL_ARGUMENT_ERROR;
    197        return nullptr;
    198    }
    199 
    200    int64_t patternNativeLength = utext_nativeLength(pattern);
    201 
    202    if (patternNativeLength == 0) {
    203        *status = U_ILLEGAL_ARGUMENT_ERROR;
    204        return nullptr;
    205    }
    206 
    207    RegularExpression *re     = new RegularExpression;
    208 
    209    UErrorCode lengthStatus = U_ZERO_ERROR;
    210    int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, nullptr, 0, &lengthStatus);
    211 
    212    u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
    213    char16_t           *patBuf = (char16_t *)uprv_malloc(sizeof(char16_t)*(pattern16Length+1));
    214    if (re == nullptr || refC == nullptr || patBuf == nullptr) {
    215        *status = U_MEMORY_ALLOCATION_ERROR;
    216        delete re;
    217        uprv_free((void *)refC);
    218        uprv_free(patBuf);
    219        return nullptr;
    220    }
    221    re->fPatRefCount = refC;
    222    *re->fPatRefCount = 1;
    223 
    224    //
    225    // Make a copy of the pattern string, so we can return it later if asked.
    226    //    For compiling the pattern, we will use a read-only UText wrapper
    227    //    around this local copy, to avoid making even more copies.
    228    //
    229    re->fPatString    = patBuf;
    230    re->fPatStringLen = pattern16Length;
    231    utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
    232 
    233    UText patText = UTEXT_INITIALIZER;
    234    utext_openUChars(&patText, patBuf, pattern16Length, status);
    235 
    236    //
    237    // Compile the pattern
    238    //
    239    if (pe != nullptr) {
    240        re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
    241    } else {
    242        re->fPat = RegexPattern::compile(&patText, flags, *status);
    243    }
    244    utext_close(&patText);
    245 
    246    if (U_FAILURE(*status)) {
    247        goto ErrorExit;
    248    }
    249 
    250    //
    251    // Create the matcher object
    252    //
    253    re->fMatcher = re->fPat->matcher(*status);
    254    if (U_SUCCESS(*status)) {
    255        return (URegularExpression*)re;
    256    }
    257 
    258 ErrorExit:
    259    delete re;
    260    return nullptr;
    261 
    262 }
    263 
    264 //----------------------------------------------------------------------------------------
    265 //
    266 //    uregex_close
    267 //
    268 //----------------------------------------------------------------------------------------
    269 U_CAPI void  U_EXPORT2
    270 uregex_close(URegularExpression  *re2) {
    271    RegularExpression *re = (RegularExpression*)re2;
    272    UErrorCode  status = U_ZERO_ERROR;
    273    if (validateRE(re, false, &status) == false) {
    274        return;
    275    }
    276    delete re;
    277 }
    278 
    279 
    280 //----------------------------------------------------------------------------------------
    281 //
    282 //    uregex_clone
    283 //
    284 //----------------------------------------------------------------------------------------
    285 U_CAPI URegularExpression * U_EXPORT2
    286 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
    287    RegularExpression *source = (RegularExpression*)source2;
    288    if (validateRE(source, false, status) == false) {
    289        return nullptr;
    290    }
    291 
    292    RegularExpression *clone = new RegularExpression;
    293    if (clone == nullptr) {
    294        *status = U_MEMORY_ALLOCATION_ERROR;
    295        return nullptr;
    296    }
    297 
    298    clone->fMatcher = source->fPat->matcher(*status);
    299    if (U_FAILURE(*status)) {
    300        delete clone;
    301        return nullptr;
    302    }
    303 
    304    clone->fPat          = source->fPat;
    305    clone->fPatRefCount  = source->fPatRefCount;
    306    clone->fPatString    = source->fPatString;
    307    clone->fPatStringLen = source->fPatStringLen;
    308    umtx_atomic_inc(source->fPatRefCount);
    309    // Note:  fText is not cloned.
    310 
    311    return (URegularExpression*)clone;
    312 }
    313 
    314 
    315 
    316 
    317 //------------------------------------------------------------------------------
    318 //
    319 //    uregex_pattern
    320 //
    321 //------------------------------------------------------------------------------
    322 U_CAPI const char16_t * U_EXPORT2
    323 uregex_pattern(const  URegularExpression *regexp2,
    324                      int32_t            *patLength,
    325                      UErrorCode         *status)  {
    326    RegularExpression *regexp = (RegularExpression*)regexp2;
    327 
    328    if (validateRE(regexp, false, status) == false) {
    329        return nullptr;
    330    }
    331    if (patLength != nullptr) {
    332        *patLength = regexp->fPatStringLen;
    333    }
    334    return regexp->fPatString;
    335 }
    336 
    337 
    338 //------------------------------------------------------------------------------
    339 //
    340 //    uregex_patternUText
    341 //
    342 //------------------------------------------------------------------------------
    343 U_CAPI UText * U_EXPORT2
    344 uregex_patternUText(const URegularExpression *regexp2,
    345                          UErrorCode         *status)  {
    346    RegularExpression *regexp = (RegularExpression*)regexp2;
    347    return regexp->fPat->patternText(*status);
    348 }
    349 
    350 
    351 //------------------------------------------------------------------------------
    352 //
    353 //    uregex_flags
    354 //
    355 //------------------------------------------------------------------------------
    356 U_CAPI int32_t U_EXPORT2
    357 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
    358    RegularExpression *regexp = (RegularExpression*)regexp2;
    359    if (validateRE(regexp, false, status) == false) {
    360        return 0;
    361    }
    362    int32_t flags = regexp->fPat->flags();
    363    return flags;
    364 }
    365 
    366 
    367 //------------------------------------------------------------------------------
    368 //
    369 //    uregex_setText
    370 //
    371 //------------------------------------------------------------------------------
    372 U_CAPI void U_EXPORT2
    373 uregex_setText(URegularExpression *regexp2,
    374               const char16_t     *text,
    375               int32_t             textLength,
    376               UErrorCode         *status)  {
    377    RegularExpression *regexp = (RegularExpression*)regexp2;
    378    if (validateRE(regexp, false, status) == false) {
    379        return;
    380    }
    381    if (text == nullptr || textLength < -1) {
    382        *status = U_ILLEGAL_ARGUMENT_ERROR;
    383        return;
    384    }
    385 
    386    if (regexp->fOwnsText && regexp->fText != nullptr) {
    387        uprv_free((void *)regexp->fText);
    388    }
    389 
    390    regexp->fText       = text;
    391    regexp->fTextLength = textLength;
    392    regexp->fOwnsText   = false;
    393 
    394    UText input = UTEXT_INITIALIZER;
    395    utext_openUChars(&input, text, textLength, status);
    396    regexp->fMatcher->reset(&input);
    397    utext_close(&input); // reset() made a shallow clone, so we don't need this copy
    398 }
    399 
    400 
    401 //------------------------------------------------------------------------------
    402 //
    403 //    uregex_setUText
    404 //
    405 //------------------------------------------------------------------------------
    406 U_CAPI void U_EXPORT2
    407 uregex_setUText(URegularExpression *regexp2,
    408                UText              *text,
    409                UErrorCode         *status) {
    410    RegularExpression *regexp = (RegularExpression*)regexp2;
    411    if (validateRE(regexp, false, status) == false) {
    412        return;
    413    }
    414    if (text == nullptr) {
    415        *status = U_ILLEGAL_ARGUMENT_ERROR;
    416        return;
    417    }
    418 
    419    if (regexp->fOwnsText && regexp->fText != nullptr) {
    420        uprv_free((void *)regexp->fText);
    421    }
    422 
    423    regexp->fText       = nullptr; // only fill it in on request
    424    regexp->fTextLength = -1;
    425    regexp->fOwnsText   = true;
    426    regexp->fMatcher->reset(text);
    427 }
    428 
    429 
    430 
    431 //------------------------------------------------------------------------------
    432 //
    433 //    uregex_getText
    434 //
    435 //------------------------------------------------------------------------------
    436 U_CAPI const char16_t * U_EXPORT2
    437 uregex_getText(URegularExpression *regexp2,
    438               int32_t            *textLength,
    439               UErrorCode         *status)  {
    440    RegularExpression *regexp = (RegularExpression*)regexp2;
    441    if (validateRE(regexp, false, status) == false) {
    442        return nullptr;
    443    }
    444 
    445    if (regexp->fText == nullptr) {
    446        // need to fill in the text
    447        UText *inputText = regexp->fMatcher->inputText();
    448        int64_t inputNativeLength = utext_nativeLength(inputText);
    449        if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
    450            regexp->fText = inputText->chunkContents;
    451            regexp->fTextLength = (int32_t)inputNativeLength;
    452            regexp->fOwnsText = false; // because the UText owns it
    453        } else {
    454            UErrorCode lengthStatus = U_ZERO_ERROR;
    455            regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, nullptr, 0, &lengthStatus); // buffer overflow error
    456            char16_t *inputChars = (char16_t *)uprv_malloc(sizeof(char16_t)*(regexp->fTextLength+1));
    457 
    458            utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
    459            regexp->fText = inputChars;
    460            regexp->fOwnsText = true; // should already be set but just in case
    461        }
    462    }
    463 
    464    if (textLength != nullptr) {
    465        *textLength = regexp->fTextLength;
    466    }
    467    return regexp->fText;
    468 }
    469 
    470 
    471 //------------------------------------------------------------------------------
    472 //
    473 //    uregex_getUText
    474 //
    475 //------------------------------------------------------------------------------
    476 U_CAPI UText * U_EXPORT2
    477 uregex_getUText(URegularExpression *regexp2,
    478                UText              *dest,
    479                UErrorCode         *status)  {
    480    RegularExpression *regexp = (RegularExpression*)regexp2;
    481    if (validateRE(regexp, false, status) == false) {
    482        return dest;
    483    }
    484    return regexp->fMatcher->getInput(dest, *status);
    485 }
    486 
    487 
    488 //------------------------------------------------------------------------------
    489 //
    490 //    uregex_refreshUText
    491 //
    492 //------------------------------------------------------------------------------
    493 U_CAPI void U_EXPORT2
    494 uregex_refreshUText(URegularExpression *regexp2,
    495                    UText              *text,
    496                    UErrorCode         *status) {
    497    RegularExpression *regexp = (RegularExpression*)regexp2;
    498    if (validateRE(regexp, false, status) == false) {
    499        return;
    500    }
    501    regexp->fMatcher->refreshInputText(text, *status);
    502 }
    503 
    504 
    505 //------------------------------------------------------------------------------
    506 //
    507 //    uregex_matches
    508 //
    509 //------------------------------------------------------------------------------
    510 U_CAPI UBool U_EXPORT2
    511 uregex_matches(URegularExpression *regexp2,
    512               int32_t            startIndex,
    513               UErrorCode        *status)  {
    514    return uregex_matches64( regexp2, (int64_t)startIndex, status);
    515 }
    516 
    517 U_CAPI UBool U_EXPORT2
    518 uregex_matches64(URegularExpression *regexp2,
    519                 int64_t            startIndex,
    520                 UErrorCode        *status)  {
    521    RegularExpression *regexp = (RegularExpression*)regexp2;
    522    UBool result = false;
    523    if (validateRE(regexp, true, status) == false) {
    524        return result;
    525    }
    526    if (startIndex == -1) {
    527        result = regexp->fMatcher->matches(*status);
    528    } else {
    529        result = regexp->fMatcher->matches(startIndex, *status);
    530    }
    531    return result;
    532 }
    533 
    534 
    535 //------------------------------------------------------------------------------
    536 //
    537 //    uregex_lookingAt
    538 //
    539 //------------------------------------------------------------------------------
    540 U_CAPI UBool U_EXPORT2
    541 uregex_lookingAt(URegularExpression *regexp2,
    542                 int32_t             startIndex,
    543                 UErrorCode         *status)  {
    544    return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
    545 }
    546 
    547 U_CAPI UBool U_EXPORT2
    548 uregex_lookingAt64(URegularExpression *regexp2,
    549                   int64_t             startIndex,
    550                   UErrorCode         *status)  {
    551    RegularExpression *regexp = (RegularExpression*)regexp2;
    552    UBool result = false;
    553    if (validateRE(regexp, true, status) == false) {
    554        return result;
    555    }
    556    if (startIndex == -1) {
    557        result = regexp->fMatcher->lookingAt(*status);
    558    } else {
    559        result = regexp->fMatcher->lookingAt(startIndex, *status);
    560    }
    561    return result;
    562 }
    563 
    564 
    565 
    566 //------------------------------------------------------------------------------
    567 //
    568 //    uregex_find
    569 //
    570 //------------------------------------------------------------------------------
    571 U_CAPI UBool U_EXPORT2
    572 uregex_find(URegularExpression *regexp2,
    573            int32_t             startIndex,
    574            UErrorCode         *status)  {
    575    return uregex_find64( regexp2, (int64_t)startIndex, status);
    576 }
    577 
    578 U_CAPI UBool U_EXPORT2
    579 uregex_find64(URegularExpression *regexp2,
    580              int64_t             startIndex,
    581              UErrorCode         *status)  {
    582    RegularExpression *regexp = (RegularExpression*)regexp2;
    583    UBool result = false;
    584    if (validateRE(regexp, true, status) == false) {
    585        return result;
    586    }
    587    if (startIndex == -1) {
    588        regexp->fMatcher->resetPreserveRegion();
    589        result = regexp->fMatcher->find(*status);
    590    } else {
    591        result = regexp->fMatcher->find(startIndex, *status);
    592    }
    593    return result;
    594 }
    595 
    596 
    597 //------------------------------------------------------------------------------
    598 //
    599 //    uregex_findNext
    600 //
    601 //------------------------------------------------------------------------------
    602 U_CAPI UBool U_EXPORT2
    603 uregex_findNext(URegularExpression *regexp2,
    604                UErrorCode         *status)  {
    605    RegularExpression *regexp = (RegularExpression*)regexp2;
    606    if (validateRE(regexp, true, status) == false) {
    607        return false;
    608    }
    609    UBool result = regexp->fMatcher->find(*status);
    610    return result;
    611 }
    612 
    613 //------------------------------------------------------------------------------
    614 //
    615 //    uregex_groupCount
    616 //
    617 //------------------------------------------------------------------------------
    618 U_CAPI int32_t U_EXPORT2
    619 uregex_groupCount(URegularExpression *regexp2,
    620                  UErrorCode         *status)  {
    621    RegularExpression *regexp = (RegularExpression*)regexp2;
    622    if (validateRE(regexp, false, status) == false) {
    623        return 0;
    624    }
    625    int32_t  result = regexp->fMatcher->groupCount();
    626    return result;
    627 }
    628 
    629 
    630 //------------------------------------------------------------------------------
    631 //
    632 //    uregex_groupNumberFromName
    633 //
    634 //------------------------------------------------------------------------------
    635 int32_t
    636 uregex_groupNumberFromName(URegularExpression *regexp2,
    637                           const char16_t     *groupName,
    638                           int32_t             nameLength,
    639                           UErrorCode          *status) {
    640    RegularExpression* regexp = reinterpret_cast<RegularExpression*>(regexp2);
    641    if (validateRE(regexp, false, status) == false) {
    642        return 0;
    643    }
    644    int32_t  result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
    645    return result;
    646 }
    647 
    648 int32_t
    649 uregex_groupNumberFromCName(URegularExpression *regexp2,
    650                            const char         *groupName,
    651                            int32_t             nameLength,
    652                            UErrorCode          *status) {
    653    RegularExpression* regexp = reinterpret_cast<RegularExpression*>(regexp2);
    654    if (validateRE(regexp, false, status) == false) {
    655        return 0;
    656    }
    657    return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
    658 }
    659 
    660 //------------------------------------------------------------------------------
    661 //
    662 //    uregex_group
    663 //
    664 //------------------------------------------------------------------------------
    665 U_CAPI int32_t U_EXPORT2
    666 uregex_group(URegularExpression *regexp2,
    667             int32_t             groupNum,
    668             char16_t           *dest,
    669             int32_t             destCapacity,
    670             UErrorCode          *status)  {
    671    RegularExpression *regexp = (RegularExpression*)regexp2;
    672    if (validateRE(regexp, true, status) == false) {
    673        return 0;
    674    }
    675    if (destCapacity < 0 || (destCapacity > 0 && dest == nullptr)) {
    676        *status = U_ILLEGAL_ARGUMENT_ERROR;
    677        return 0;
    678    }
    679 
    680    if (destCapacity == 0 || regexp->fText != nullptr) {
    681        // If preflighting or if we already have the text as UChars,
    682        // this is a little cheaper than extracting from the UText
    683 
    684        //
    685        // Pick up the range of characters from the matcher
    686        //
    687        int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
    688        int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
    689        if (U_FAILURE(*status)) {
    690            return 0;
    691        }
    692 
    693        //
    694        // Trim length based on buffer capacity
    695        //
    696        int32_t fullLength = endIx - startIx;
    697        int32_t copyLength = fullLength;
    698        if (copyLength < destCapacity) {
    699            dest[copyLength] = 0;
    700        } else if (copyLength == destCapacity) {
    701            *status = U_STRING_NOT_TERMINATED_WARNING;
    702        } else {
    703            copyLength = destCapacity;
    704            *status = U_BUFFER_OVERFLOW_ERROR;
    705        }
    706 
    707        //
    708        // Copy capture group to user's buffer
    709        //
    710        if (copyLength > 0) {
    711            u_memcpy(dest, &regexp->fText[startIx], copyLength);
    712        }
    713        return fullLength;
    714    } else {
    715        int64_t  start = regexp->fMatcher->start64(groupNum, *status);
    716        int64_t  limit = regexp->fMatcher->end64(groupNum, *status);
    717        if (U_FAILURE(*status)) {
    718            return 0;
    719        }
    720        // Note edge cases:
    721        //   Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
    722        //   Zero Length Match: start == end.
    723        int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
    724        return length;
    725    }
    726 
    727 }
    728 
    729 
    730 //------------------------------------------------------------------------------
    731 //
    732 //    uregex_groupUText
    733 //
    734 //------------------------------------------------------------------------------
    735 U_CAPI UText * U_EXPORT2
    736 uregex_groupUText(URegularExpression *regexp2,
    737                  int32_t             groupNum,
    738                  UText              *dest,
    739                  int64_t            *groupLength,
    740                  UErrorCode         *status)  {
    741    RegularExpression *regexp = (RegularExpression*)regexp2;
    742    if (validateRE(regexp, true, status) == false) {
    743        UErrorCode emptyTextStatus = U_ZERO_ERROR;
    744        return (dest ? dest : utext_openUChars(nullptr, nullptr, 0, &emptyTextStatus));
    745    }
    746 
    747    return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
    748 }
    749 
    750 //------------------------------------------------------------------------------
    751 //
    752 //    uregex_start
    753 //
    754 //------------------------------------------------------------------------------
    755 U_CAPI int32_t U_EXPORT2
    756 uregex_start(URegularExpression *regexp2,
    757             int32_t             groupNum,
    758             UErrorCode          *status)  {
    759    return (int32_t)uregex_start64( regexp2, groupNum, status);
    760 }
    761 
    762 U_CAPI int64_t U_EXPORT2
    763 uregex_start64(URegularExpression *regexp2,
    764               int32_t             groupNum,
    765               UErrorCode          *status)  {
    766    RegularExpression *regexp = (RegularExpression*)regexp2;
    767    if (validateRE(regexp, true, status) == false) {
    768        return 0;
    769    }
    770    int64_t result = regexp->fMatcher->start64(groupNum, *status);
    771    return result;
    772 }
    773 
    774 //------------------------------------------------------------------------------
    775 //
    776 //    uregex_end
    777 //
    778 //------------------------------------------------------------------------------
    779 U_CAPI int32_t U_EXPORT2
    780 uregex_end(URegularExpression   *regexp2,
    781           int32_t               groupNum,
    782           UErrorCode           *status)  {
    783    return (int32_t)uregex_end64( regexp2, groupNum, status);
    784 }
    785 
    786 U_CAPI int64_t U_EXPORT2
    787 uregex_end64(URegularExpression   *regexp2,
    788             int32_t               groupNum,
    789             UErrorCode           *status)  {
    790    RegularExpression *regexp = (RegularExpression*)regexp2;
    791    if (validateRE(regexp, true, status) == false) {
    792        return 0;
    793    }
    794    int64_t result = regexp->fMatcher->end64(groupNum, *status);
    795    return result;
    796 }
    797 
    798 //------------------------------------------------------------------------------
    799 //
    800 //    uregex_reset
    801 //
    802 //------------------------------------------------------------------------------
    803 U_CAPI void U_EXPORT2
    804 uregex_reset(URegularExpression    *regexp2,
    805             int32_t               index,
    806             UErrorCode            *status)  {
    807    uregex_reset64( regexp2, (int64_t)index, status);
    808 }
    809 
    810 U_CAPI void U_EXPORT2
    811 uregex_reset64(URegularExpression    *regexp2,
    812               int64_t               index,
    813               UErrorCode            *status)  {
    814    RegularExpression *regexp = (RegularExpression*)regexp2;
    815    if (validateRE(regexp, true, status) == false) {
    816        return;
    817    }
    818    regexp->fMatcher->reset(index, *status);
    819 }
    820 
    821 
    822 //------------------------------------------------------------------------------
    823 //
    824 //    uregex_setRegion
    825 //
    826 //------------------------------------------------------------------------------
    827 U_CAPI void U_EXPORT2
    828 uregex_setRegion(URegularExpression   *regexp2,
    829                 int32_t               regionStart,
    830                 int32_t               regionLimit,
    831                 UErrorCode           *status)  {
    832    uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
    833 }
    834 
    835 U_CAPI void U_EXPORT2
    836 uregex_setRegion64(URegularExpression   *regexp2,
    837                   int64_t               regionStart,
    838                   int64_t               regionLimit,
    839                   UErrorCode           *status)  {
    840    RegularExpression *regexp = (RegularExpression*)regexp2;
    841    if (validateRE(regexp, true, status) == false) {
    842        return;
    843    }
    844    regexp->fMatcher->region(regionStart, regionLimit, *status);
    845 }
    846 
    847 
    848 //------------------------------------------------------------------------------
    849 //
    850 //    uregex_setRegionAndStart
    851 //
    852 //------------------------------------------------------------------------------
    853 U_CAPI void U_EXPORT2
    854 uregex_setRegionAndStart(URegularExpression   *regexp2,
    855                 int64_t               regionStart,
    856                 int64_t               regionLimit,
    857                 int64_t               startIndex,
    858                 UErrorCode           *status)  {
    859    RegularExpression *regexp = (RegularExpression*)regexp2;
    860    if (validateRE(regexp, true, status) == false) {
    861        return;
    862    }
    863    regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
    864 }
    865 
    866 //------------------------------------------------------------------------------
    867 //
    868 //    uregex_regionStart
    869 //
    870 //------------------------------------------------------------------------------
    871 U_CAPI int32_t U_EXPORT2
    872 uregex_regionStart(const  URegularExpression   *regexp2,
    873                          UErrorCode           *status)  {
    874    return (int32_t)uregex_regionStart64(regexp2, status);
    875 }
    876 
    877 U_CAPI int64_t U_EXPORT2
    878 uregex_regionStart64(const  URegularExpression   *regexp2,
    879                            UErrorCode           *status)  {
    880    RegularExpression *regexp = (RegularExpression*)regexp2;
    881    if (validateRE(regexp, true, status) == false) {
    882        return 0;
    883    }
    884    return regexp->fMatcher->regionStart();
    885 }
    886 
    887 
    888 //------------------------------------------------------------------------------
    889 //
    890 //    uregex_regionEnd
    891 //
    892 //------------------------------------------------------------------------------
    893 U_CAPI int32_t U_EXPORT2
    894 uregex_regionEnd(const  URegularExpression   *regexp2,
    895                        UErrorCode           *status)  {
    896    return (int32_t)uregex_regionEnd64(regexp2, status);
    897 }
    898 
    899 U_CAPI int64_t U_EXPORT2
    900 uregex_regionEnd64(const  URegularExpression   *regexp2,
    901                          UErrorCode           *status)  {
    902    RegularExpression *regexp = (RegularExpression*)regexp2;
    903    if (validateRE(regexp, true, status) == false) {
    904        return 0;
    905    }
    906    return regexp->fMatcher->regionEnd();
    907 }
    908 
    909 
    910 //------------------------------------------------------------------------------
    911 //
    912 //    uregex_hasTransparentBounds
    913 //
    914 //------------------------------------------------------------------------------
    915 U_CAPI UBool U_EXPORT2
    916 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
    917                                   UErrorCode           *status)  {
    918    RegularExpression *regexp = (RegularExpression*)regexp2;
    919    if (validateRE(regexp, false, status) == false) {
    920        return false;
    921    }
    922    return regexp->fMatcher->hasTransparentBounds();
    923 }
    924 
    925 
    926 //------------------------------------------------------------------------------
    927 //
    928 //    uregex_useTransparentBounds
    929 //
    930 //------------------------------------------------------------------------------
    931 U_CAPI void U_EXPORT2
    932 uregex_useTransparentBounds(URegularExpression    *regexp2,
    933                            UBool                  b,
    934                            UErrorCode            *status)  {
    935    RegularExpression *regexp = (RegularExpression*)regexp2;
    936    if (validateRE(regexp, false, status) == false) {
    937        return;
    938    }
    939    regexp->fMatcher->useTransparentBounds(b);
    940 }
    941 
    942 
    943 //------------------------------------------------------------------------------
    944 //
    945 //    uregex_hasAnchoringBounds
    946 //
    947 //------------------------------------------------------------------------------
    948 U_CAPI UBool U_EXPORT2
    949 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
    950                                 UErrorCode           *status)  {
    951    RegularExpression *regexp = (RegularExpression*)regexp2;
    952    if (validateRE(regexp, false, status) == false) {
    953        return false;
    954    }
    955    return regexp->fMatcher->hasAnchoringBounds();
    956 }
    957 
    958 
    959 //------------------------------------------------------------------------------
    960 //
    961 //    uregex_useAnchoringBounds
    962 //
    963 //------------------------------------------------------------------------------
    964 U_CAPI void U_EXPORT2
    965 uregex_useAnchoringBounds(URegularExpression    *regexp2,
    966                          UBool                  b,
    967                          UErrorCode            *status)  {
    968    RegularExpression *regexp = (RegularExpression*)regexp2;
    969    if (validateRE(regexp, false, status) == false) {
    970        return;
    971    }
    972    regexp->fMatcher->useAnchoringBounds(b);
    973 }
    974 
    975 
    976 //------------------------------------------------------------------------------
    977 //
    978 //    uregex_hitEnd
    979 //
    980 //------------------------------------------------------------------------------
    981 U_CAPI UBool U_EXPORT2
    982 uregex_hitEnd(const  URegularExpression   *regexp2,
    983                     UErrorCode           *status)  {
    984    RegularExpression *regexp = (RegularExpression*)regexp2;
    985    if (validateRE(regexp, true, status) == false) {
    986        return false;
    987    }
    988    return regexp->fMatcher->hitEnd();
    989 }
    990 
    991 
    992 //------------------------------------------------------------------------------
    993 //
    994 //    uregex_requireEnd
    995 //
    996 //------------------------------------------------------------------------------
    997 U_CAPI UBool U_EXPORT2
    998 uregex_requireEnd(const  URegularExpression   *regexp2,
    999                         UErrorCode           *status)  {
   1000    RegularExpression *regexp = (RegularExpression*)regexp2;
   1001    if (validateRE(regexp, true, status) == false) {
   1002        return false;
   1003    }
   1004    return regexp->fMatcher->requireEnd();
   1005 }
   1006 
   1007 
   1008 //------------------------------------------------------------------------------
   1009 //
   1010 //    uregex_setTimeLimit
   1011 //
   1012 //------------------------------------------------------------------------------
   1013 U_CAPI void U_EXPORT2
   1014 uregex_setTimeLimit(URegularExpression   *regexp2,
   1015                    int32_t               limit,
   1016                    UErrorCode           *status) {
   1017    RegularExpression *regexp = (RegularExpression*)regexp2;
   1018    if (validateRE(regexp, false, status)) {
   1019        regexp->fMatcher->setTimeLimit(limit, *status);
   1020    }
   1021 }
   1022 
   1023 
   1024 
   1025 //------------------------------------------------------------------------------
   1026 //
   1027 //    uregex_getTimeLimit
   1028 //
   1029 //------------------------------------------------------------------------------
   1030 U_CAPI int32_t U_EXPORT2
   1031 uregex_getTimeLimit(const  URegularExpression   *regexp2,
   1032                           UErrorCode           *status) {
   1033    int32_t retVal = 0;
   1034    RegularExpression *regexp = (RegularExpression*)regexp2;
   1035    if (validateRE(regexp, false, status)) {
   1036        retVal = regexp->fMatcher->getTimeLimit();
   1037    }
   1038    return retVal;
   1039 }
   1040 
   1041 
   1042 
   1043 //------------------------------------------------------------------------------
   1044 //
   1045 //    uregex_setStackLimit
   1046 //
   1047 //------------------------------------------------------------------------------
   1048 U_CAPI void U_EXPORT2
   1049 uregex_setStackLimit(URegularExpression   *regexp2,
   1050                     int32_t               limit,
   1051                     UErrorCode           *status) {
   1052    RegularExpression *regexp = (RegularExpression*)regexp2;
   1053    if (validateRE(regexp, false, status)) {
   1054        regexp->fMatcher->setStackLimit(limit, *status);
   1055    }
   1056 }
   1057 
   1058 
   1059 
   1060 //------------------------------------------------------------------------------
   1061 //
   1062 //    uregex_getStackLimit
   1063 //
   1064 //------------------------------------------------------------------------------
   1065 U_CAPI int32_t U_EXPORT2
   1066 uregex_getStackLimit(const  URegularExpression   *regexp2,
   1067                            UErrorCode           *status) {
   1068    int32_t retVal = 0;
   1069    RegularExpression *regexp = (RegularExpression*)regexp2;
   1070    if (validateRE(regexp, false, status)) {
   1071        retVal = regexp->fMatcher->getStackLimit();
   1072    }
   1073    return retVal;
   1074 }
   1075 
   1076 
   1077 //------------------------------------------------------------------------------
   1078 //
   1079 //    uregex_setMatchCallback
   1080 //
   1081 //------------------------------------------------------------------------------
   1082 U_CAPI void U_EXPORT2
   1083 uregex_setMatchCallback(URegularExpression      *regexp2,
   1084                        URegexMatchCallback     *callback,
   1085                        const void              *context,
   1086                        UErrorCode              *status) {
   1087    RegularExpression *regexp = (RegularExpression*)regexp2;
   1088    if (validateRE(regexp, false, status)) {
   1089        regexp->fMatcher->setMatchCallback(callback, context, *status);
   1090    }
   1091 }
   1092 
   1093 
   1094 //------------------------------------------------------------------------------
   1095 //
   1096 //    uregex_getMatchCallback
   1097 //
   1098 //------------------------------------------------------------------------------
   1099 U_CAPI void U_EXPORT2
   1100 uregex_getMatchCallback(const URegularExpression    *regexp2,
   1101                        URegexMatchCallback        **callback,
   1102                        const void                 **context,
   1103                        UErrorCode                  *status) {
   1104    RegularExpression *regexp = (RegularExpression*)regexp2;
   1105     if (validateRE(regexp, false, status)) {
   1106         regexp->fMatcher->getMatchCallback(*callback, *context, *status);
   1107     }
   1108 }
   1109 
   1110 
   1111 //------------------------------------------------------------------------------
   1112 //
   1113 //    uregex_setMatchProgressCallback
   1114 //
   1115 //------------------------------------------------------------------------------
   1116 U_CAPI void U_EXPORT2
   1117 uregex_setFindProgressCallback(URegularExpression              *regexp2,
   1118                                URegexFindProgressCallback      *callback,
   1119                                const void                      *context,
   1120                                UErrorCode                      *status) {
   1121    RegularExpression *regexp = (RegularExpression*)regexp2;
   1122    if (validateRE(regexp, false, status)) {
   1123        regexp->fMatcher->setFindProgressCallback(callback, context, *status);
   1124    }
   1125 }
   1126 
   1127 
   1128 //------------------------------------------------------------------------------
   1129 //
   1130 //    uregex_getMatchCallback
   1131 //
   1132 //------------------------------------------------------------------------------
   1133 U_CAPI void U_EXPORT2
   1134 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
   1135                                URegexFindProgressCallback        **callback,
   1136                                const void                        **context,
   1137                                UErrorCode                        *status) {
   1138    RegularExpression *regexp = (RegularExpression*)regexp2;
   1139     if (validateRE(regexp, false, status)) {
   1140         regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
   1141     }
   1142 }
   1143 
   1144 
   1145 //------------------------------------------------------------------------------
   1146 //
   1147 //    uregex_replaceAll
   1148 //
   1149 //------------------------------------------------------------------------------
   1150 U_CAPI int32_t U_EXPORT2
   1151 uregex_replaceAll(URegularExpression    *regexp2,
   1152                  const char16_t        *replacementText,
   1153                  int32_t                replacementLength,
   1154                  char16_t              *destBuf,
   1155                  int32_t                destCapacity,
   1156                  UErrorCode            *status)  {
   1157    RegularExpression *regexp = (RegularExpression*)regexp2;
   1158    if (validateRE(regexp, true, status) == false) {
   1159        return 0;
   1160    }
   1161    if (replacementText == nullptr || replacementLength < -1 ||
   1162        (destBuf == nullptr && destCapacity > 0) ||
   1163        destCapacity < 0) {
   1164        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1165        return 0;
   1166    }
   1167 
   1168    int32_t   len = 0;
   1169 
   1170    uregex_reset(regexp2, 0, status);
   1171 
   1172    // Note: Separate error code variables for findNext() and appendReplacement()
   1173    //       are used so that destination buffer overflow errors
   1174    //       in appendReplacement won't stop findNext() from working.
   1175    //       appendReplacement() and appendTail() special case incoming buffer
   1176    //       overflow errors, continuing to return the correct length.
   1177    UErrorCode  findStatus = *status;
   1178    while (uregex_findNext(regexp2, &findStatus)) {
   1179        len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1180                                        &destBuf, &destCapacity, status);
   1181    }
   1182    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1183 
   1184    if (U_FAILURE(findStatus)) {
   1185        // If anything went wrong with the findNext(), make that error trump
   1186        //   whatever may have happened with the append() operations.
   1187        //   Errors in findNext() are not expected.
   1188        *status = findStatus;
   1189    }
   1190 
   1191    return len;
   1192 }
   1193 
   1194 
   1195 //------------------------------------------------------------------------------
   1196 //
   1197 //    uregex_replaceAllUText
   1198 //
   1199 //------------------------------------------------------------------------------
   1200 U_CAPI UText * U_EXPORT2
   1201 uregex_replaceAllUText(URegularExpression    *regexp2,
   1202                       UText                 *replacementText,
   1203                       UText                 *dest,
   1204                       UErrorCode            *status)  {
   1205    RegularExpression *regexp = (RegularExpression*)regexp2;
   1206    if (validateRE(regexp, true, status) == false) {
   1207        return nullptr;
   1208    }
   1209    if (replacementText == nullptr) {
   1210        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1211        return nullptr;
   1212    }
   1213 
   1214    dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
   1215    return dest;
   1216 }
   1217 
   1218 
   1219 //------------------------------------------------------------------------------
   1220 //
   1221 //    uregex_replaceFirst
   1222 //
   1223 //------------------------------------------------------------------------------
   1224 U_CAPI int32_t U_EXPORT2
   1225 uregex_replaceFirst(URegularExpression  *regexp2,
   1226                    const char16_t      *replacementText,
   1227                    int32_t              replacementLength,
   1228                    char16_t            *destBuf,
   1229                    int32_t              destCapacity,
   1230                    UErrorCode          *status)  {
   1231    RegularExpression *regexp = (RegularExpression*)regexp2;
   1232    if (validateRE(regexp, true, status) == false) {
   1233        return 0;
   1234    }
   1235    if (replacementText == nullptr || replacementLength < -1 ||
   1236        (destBuf == nullptr && destCapacity > 0) ||
   1237        destCapacity < 0) {
   1238        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1239        return 0;
   1240    }
   1241 
   1242    int32_t   len = 0;
   1243    UBool     findSucceeded;
   1244    uregex_reset(regexp2, 0, status);
   1245    findSucceeded = uregex_find(regexp2, 0, status);
   1246    if (findSucceeded) {
   1247        len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
   1248                                       &destBuf, &destCapacity, status);
   1249    }
   1250    len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
   1251 
   1252    return len;
   1253 }
   1254 
   1255 
   1256 //------------------------------------------------------------------------------
   1257 //
   1258 //    uregex_replaceFirstUText
   1259 //
   1260 //------------------------------------------------------------------------------
   1261 U_CAPI UText * U_EXPORT2
   1262 uregex_replaceFirstUText(URegularExpression  *regexp2,
   1263                         UText                 *replacementText,
   1264                         UText                 *dest,
   1265                         UErrorCode            *status)  {
   1266    RegularExpression *regexp = (RegularExpression*)regexp2;
   1267    if (validateRE(regexp, true, status) == false) {
   1268        return nullptr;
   1269    }
   1270    if (replacementText == nullptr) {
   1271        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1272        return nullptr;
   1273    }
   1274 
   1275    dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
   1276    return dest;
   1277 }
   1278 
   1279 
   1280 //------------------------------------------------------------------------------
   1281 //
   1282 //    uregex_appendReplacement
   1283 //
   1284 //------------------------------------------------------------------------------
   1285 
   1286 U_NAMESPACE_BEGIN
   1287 //
   1288 //  Dummy class, because these functions need to be friends of class RegexMatcher,
   1289 //               and stand-alone C functions don't work as friends
   1290 //
   1291 class RegexCImpl {
   1292 public:
   1293   inline static  int32_t appendReplacement(RegularExpression    *regexp,
   1294                      const char16_t        *replacementText,
   1295                      int32_t                replacementLength,
   1296                      char16_t             **destBuf,
   1297                      int32_t               *destCapacity,
   1298                      UErrorCode            *status);
   1299 
   1300   inline static int32_t appendTail(RegularExpression    *regexp,
   1301        char16_t             **destBuf,
   1302        int32_t               *destCapacity,
   1303        UErrorCode            *status);
   1304 
   1305    inline static int32_t split(RegularExpression    *regexp,
   1306        char16_t              *destBuf,
   1307        int32_t                destCapacity,
   1308        int32_t               *requiredCapacity,
   1309        char16_t              *destFields[],
   1310        int32_t                destFieldsCapacity,
   1311        UErrorCode            *status);
   1312 };
   1313 
   1314 U_NAMESPACE_END
   1315 
   1316 
   1317 
   1318 static const char16_t BACKSLASH  = 0x5c;
   1319 static const char16_t DOLLARSIGN = 0x24;
   1320 static const char16_t LEFTBRACKET = 0x7b;
   1321 static const char16_t RIGHTBRACKET = 0x7d;
   1322 
   1323 //
   1324 //  Move a character to an output buffer, with bounds checking on the index.
   1325 //      Index advances even if capacity is exceeded, for preflight size computations.
   1326 //      This little sequence is used a LOT.
   1327 //
   1328 static inline void appendToBuf(char16_t c, int32_t *idx, char16_t *buf, int32_t bufCapacity) {
   1329    if (*idx < bufCapacity) {
   1330        buf[*idx] = c;
   1331    }
   1332    (*idx)++;
   1333 }
   1334 
   1335 
   1336 //
   1337 //  appendReplacement, the actual implementation.
   1338 //
   1339 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
   1340                                      const char16_t        *replacementText,
   1341                                      int32_t                replacementLength,
   1342                                      char16_t             **destBuf,
   1343                                      int32_t               *destCapacity,
   1344                                      UErrorCode            *status)  {
   1345 
   1346    // If we come in with a buffer overflow error, don't suppress the operation.
   1347    //  A series of appendReplacements, appendTail need to correctly preflight
   1348    //  the buffer size when an overflow happens somewhere in the middle.
   1349    UBool pendingBufferOverflow = false;
   1350    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != nullptr && *destCapacity == 0) {
   1351        pendingBufferOverflow = true;
   1352        *status = U_ZERO_ERROR;
   1353    }
   1354 
   1355    //
   1356    // Validate all parameters
   1357    //
   1358    if (validateRE(regexp, true, status) == false) {
   1359        return 0;
   1360    }
   1361    if (replacementText == nullptr || replacementLength < -1 ||
   1362        destCapacity == nullptr || destBuf == nullptr ||
   1363        (*destBuf == nullptr && *destCapacity > 0) ||
   1364        *destCapacity < 0) {
   1365        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1366        return 0;
   1367    }
   1368 
   1369    RegexMatcher *m = regexp->fMatcher;
   1370    if (m->fMatch == false) {
   1371        *status = U_REGEX_INVALID_STATE;
   1372        return 0;
   1373    }
   1374 
   1375    char16_t *dest             = *destBuf;
   1376    int32_t   capacity         = *destCapacity;
   1377    int32_t   destIdx          =  0;
   1378    int32_t   i;
   1379 
   1380    // If it wasn't supplied by the caller,  get the length of the replacement text.
   1381    //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
   1382    //          the fly and avoid this step.
   1383    if (replacementLength == -1) {
   1384        replacementLength = u_strlen(replacementText);
   1385    }
   1386 
   1387    // Copy input string from the end of previous match to start of current match
   1388    if (regexp->fText != nullptr) {
   1389        int32_t matchStart;
   1390        int32_t lastMatchEnd;
   1391        if (UTEXT_USES_U16(m->fInputText)) {
   1392            lastMatchEnd = static_cast<int32_t>(m->fLastMatchEnd);
   1393            matchStart = static_cast<int32_t>(m->fMatchStart);
   1394        } else {
   1395            // !!!: Would like a better way to do this!
   1396            UErrorCode tempStatus = U_ZERO_ERROR;
   1397            lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, nullptr, 0, &tempStatus);
   1398            tempStatus = U_ZERO_ERROR;
   1399            matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, nullptr, 0, &tempStatus);
   1400        }
   1401        for (i=lastMatchEnd; i<matchStart; i++) {
   1402            appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
   1403        }
   1404    } else {
   1405        UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
   1406        destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
   1407                                 dest==nullptr?nullptr:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
   1408                                 &possibleOverflowError);
   1409    }
   1410    U_ASSERT(destIdx >= 0);
   1411 
   1412    // scan the replacement text, looking for substitutions ($n) and \escapes.
   1413    int32_t  replIdx = 0;
   1414    while (replIdx < replacementLength && U_SUCCESS(*status)) {
   1415        char16_t  c = replacementText[replIdx];
   1416        replIdx++;
   1417        if (c != DOLLARSIGN && c != BACKSLASH) {
   1418            // Common case, no substitution, no escaping,
   1419            //  just copy the char to the dest buf.
   1420            appendToBuf(c, &destIdx, dest, capacity);
   1421            continue;
   1422        }
   1423 
   1424        if (c == BACKSLASH) {
   1425            // Backslash Escape.  Copy the following char out without further checks.
   1426            //                    Note:  Surrogate pairs don't need any special handling
   1427            //                           The second half wont be a '$' or a '\', and
   1428            //                           will move to the dest normally on the next
   1429            //                           loop iteration.
   1430            if (replIdx >= replacementLength) {
   1431                break;
   1432            }
   1433            c = replacementText[replIdx];
   1434 
   1435            if (c==0x55/*U*/ || c==0x75/*u*/) {
   1436                // We have a \udddd or \Udddddddd escape sequence.
   1437                UChar32 escapedChar =
   1438                    u_unescapeAt(uregex_ucstr_unescape_charAt,
   1439                       &replIdx,                   // Index is updated by unescapeAt
   1440                       replacementLength,          // Length of replacement text
   1441                       (void *)replacementText);
   1442 
   1443                if (escapedChar != static_cast<UChar32>(0xFFFFFFFF)) {
   1444                    if (escapedChar <= 0xffff) {
   1445                        appendToBuf(static_cast<char16_t>(escapedChar), &destIdx, dest, capacity);
   1446                    } else {
   1447                        appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
   1448                        appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
   1449                    }
   1450                    continue;
   1451                }
   1452                // Note:  if the \u escape was invalid, just fall through and
   1453                //        treat it as a plain \<anything> escape.
   1454            }
   1455 
   1456            // Plain backslash escape.  Just put out the escaped character.
   1457            appendToBuf(c, &destIdx, dest, capacity);
   1458 
   1459            replIdx++;
   1460            continue;
   1461        }
   1462 
   1463        // We've got a $.  Pick up the following capture group name or number.
   1464        // For numbers, consume only digits that produce a valid capture group for the pattern.
   1465 
   1466        int32_t groupNum  = 0;
   1467        U_ASSERT(c == DOLLARSIGN);
   1468        UChar32 c32 = -1;
   1469        if (replIdx < replacementLength) {
   1470            U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1471        }
   1472        if (u_isdigit(c32)) {
   1473            int32_t numDigits = 0;
   1474            int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
   1475            for (;;) {
   1476                if (replIdx >= replacementLength) {
   1477                    break;
   1478                }
   1479                U16_GET(replacementText, 0, replIdx, replacementLength, c32);
   1480                if (u_isdigit(c32) == false) {
   1481                    break;
   1482                }
   1483 
   1484                int32_t digitVal = u_charDigitValue(c32);
   1485                if (groupNum * 10 + digitVal <= numCaptureGroups) {
   1486                    groupNum = groupNum * 10 + digitVal;
   1487                    U16_FWD_1(replacementText, replIdx, replacementLength);
   1488                    numDigits++;
   1489                } else {
   1490                    if (numDigits == 0) {
   1491                        *status = U_INDEX_OUTOFBOUNDS_ERROR;
   1492                    }
   1493                    break;
   1494                }
   1495            }
   1496        } else if (c32 == LEFTBRACKET) {
   1497            // Scan for Named Capture Group, ${name}.
   1498            UnicodeString groupName;
   1499            U16_FWD_1(replacementText, replIdx, replacementLength);
   1500            while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) { 
   1501                if (replIdx >= replacementLength) {
   1502                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1503                    break;
   1504                }
   1505                U16_NEXT(replacementText, replIdx, replacementLength, c32);
   1506                if ((c32 >= 0x41 && c32 <= 0x5a) ||           // A..Z
   1507                        (c32 >= 0x61 && c32 <= 0x7a) ||       // a..z
   1508                        (c32 >= 0x31 && c32 <= 0x39)) {       // 0..9
   1509                    groupName.append(c32);
   1510                } else if (c32 == RIGHTBRACKET) {
   1511                    groupNum = regexp->fPat->fNamedCaptureMap ?
   1512                            uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName) : 0;
   1513                    if (groupNum == 0) {
   1514                        // Name not defined by pattern.
   1515                        *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1516                    }
   1517                } else {
   1518                    // Character was something other than a name char or a closing '}'
   1519                    *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1520                }
   1521            }
   1522        } else {
   1523            // $ not followed by {name} or digits.
   1524            *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
   1525        }
   1526 
   1527 
   1528        // Finally, append the capture group data to the destination.
   1529        if (U_SUCCESS(*status)) {
   1530            destIdx += uregex_group(reinterpret_cast<URegularExpression*>(regexp), groupNum,
   1531                                    dest==nullptr?nullptr:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
   1532            if (*status == U_BUFFER_OVERFLOW_ERROR) {
   1533                // Ignore buffer overflow when extracting the group.  We need to
   1534                //   continue on to get full size of the untruncated result.  We will
   1535                //   raise our own buffer overflow error at the end.
   1536                *status = U_ZERO_ERROR;
   1537            }
   1538        }
   1539 
   1540        if (U_FAILURE(*status)) {
   1541            // bad group number or name.
   1542            break;
   1543        }
   1544    }
   1545 
   1546    //
   1547    //  Nul Terminate the dest buffer if possible.
   1548    //  Set the appropriate buffer overflow or not terminated error, if needed.
   1549    //
   1550    if (destIdx < capacity) {
   1551        dest[destIdx] = 0;
   1552    } else if (U_SUCCESS(*status)) {
   1553        if (destIdx == *destCapacity) {
   1554            *status = U_STRING_NOT_TERMINATED_WARNING;
   1555        } else {
   1556            *status = U_BUFFER_OVERFLOW_ERROR;
   1557        }
   1558    }
   1559 
   1560    //
   1561    // Return an updated dest buffer and capacity to the caller.
   1562    //
   1563    if (destIdx > 0 &&  *destCapacity > 0) {
   1564        if (destIdx < capacity) {
   1565            *destBuf      += destIdx;
   1566            *destCapacity -= destIdx;
   1567        } else {
   1568            *destBuf      += capacity;
   1569            *destCapacity =  0;
   1570        }
   1571    }
   1572 
   1573    // If we came in with a buffer overflow, make sure we go out with one also.
   1574    //   (A zero length match right at the end of the previous match could
   1575    //    make this function succeed even though a previous call had overflowed the buf)
   1576    if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1577        *status = U_BUFFER_OVERFLOW_ERROR;
   1578    }
   1579 
   1580    return destIdx;
   1581 }
   1582 
   1583 //
   1584 //   appendReplacement   the actual API function,
   1585 //
   1586 U_CAPI int32_t U_EXPORT2
   1587 uregex_appendReplacement(URegularExpression    *regexp2,
   1588                         const char16_t        *replacementText,
   1589                         int32_t                replacementLength,
   1590                         char16_t             **destBuf,
   1591                         int32_t               *destCapacity,
   1592                         UErrorCode            *status) {
   1593 
   1594    RegularExpression *regexp = (RegularExpression*)regexp2;
   1595    return RegexCImpl::appendReplacement(
   1596        regexp, replacementText, replacementLength,destBuf, destCapacity, status);
   1597 }
   1598 
   1599 //
   1600 //   uregex_appendReplacementUText...can just use the normal C++ method
   1601 //
   1602 U_CAPI void U_EXPORT2
   1603 uregex_appendReplacementUText(URegularExpression    *regexp2,
   1604                              UText                 *replText,
   1605                              UText                 *dest,
   1606                              UErrorCode            *status)  {
   1607    RegularExpression *regexp = (RegularExpression*)regexp2;
   1608    regexp->fMatcher->appendReplacement(dest, replText, *status);
   1609 }
   1610 
   1611 
   1612 //------------------------------------------------------------------------------
   1613 //
   1614 //    uregex_appendTail
   1615 //
   1616 //------------------------------------------------------------------------------
   1617 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
   1618                               char16_t             **destBuf,
   1619                               int32_t               *destCapacity,
   1620                               UErrorCode            *status)
   1621 {
   1622 
   1623    // If we come in with a buffer overflow error, don't suppress the operation.
   1624    //  A series of appendReplacements, appendTail need to correctly preflight
   1625    //  the buffer size when an overflow happens somewhere in the middle.
   1626    UBool pendingBufferOverflow = false;
   1627    if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != nullptr && *destCapacity == 0) {
   1628        pendingBufferOverflow = true;
   1629        *status = U_ZERO_ERROR;
   1630    }
   1631 
   1632    if (validateRE(regexp, true, status) == false) {
   1633        return 0;
   1634    }
   1635 
   1636    if (destCapacity == nullptr || destBuf == nullptr ||
   1637        (*destBuf == nullptr && *destCapacity > 0) ||
   1638        *destCapacity < 0)
   1639    {
   1640        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1641        return 0;
   1642    }
   1643 
   1644    RegexMatcher *m = regexp->fMatcher;
   1645 
   1646    int32_t  destIdx     = 0;
   1647    int32_t  destCap     = *destCapacity;
   1648    char16_t *dest       = *destBuf;
   1649 
   1650    if (regexp->fText != nullptr) {
   1651        int32_t srcIdx;
   1652        int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
   1653        if (nativeIdx == -1) {
   1654            srcIdx = 0;
   1655        } else if (UTEXT_USES_U16(m->fInputText)) {
   1656            srcIdx = static_cast<int32_t>(nativeIdx);
   1657        } else {
   1658            UErrorCode newStatus = U_ZERO_ERROR;
   1659            srcIdx = utext_extract(m->fInputText, 0, nativeIdx, nullptr, 0, &newStatus);
   1660        }
   1661 
   1662        for (;;) {
   1663            U_ASSERT(destIdx >= 0);
   1664 
   1665            if (srcIdx == regexp->fTextLength) {
   1666                break;
   1667            }
   1668            char16_t c = regexp->fText[srcIdx];
   1669            if (c == 0 && regexp->fTextLength == -1) {
   1670                regexp->fTextLength = srcIdx;
   1671                break;
   1672            }
   1673 
   1674            if (destIdx < destCap) {
   1675                dest[destIdx] = c;
   1676            } else {
   1677                // We've overflowed the dest buffer.
   1678                //  If the total input string length is known, we can
   1679                //    compute the total buffer size needed without scanning through the string.
   1680                if (regexp->fTextLength > 0) {
   1681                    destIdx += (regexp->fTextLength - srcIdx);
   1682                    break;
   1683                }
   1684            }
   1685            srcIdx++;
   1686            destIdx++;
   1687        }
   1688    } else {
   1689        int64_t  srcIdx;
   1690        if (m->fMatch) {
   1691            // The most recent call to find() succeeded.
   1692            srcIdx = m->fMatchEnd;
   1693        } else {
   1694            // The last call to find() on this matcher failed().
   1695            //   Look back to the end of the last find() that succeeded for src index.
   1696            srcIdx = m->fLastMatchEnd;
   1697            if (srcIdx == -1)  {
   1698                // There has been no successful match with this matcher.
   1699                //   We want to copy the whole string.
   1700                srcIdx = 0;
   1701            }
   1702        }
   1703 
   1704        destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
   1705    }
   1706 
   1707    //
   1708    //  NUL terminate the output string, if possible, otherwise issue the
   1709    //   appropriate error or warning.
   1710    //
   1711    if (destIdx < destCap) {
   1712        dest[destIdx] = 0;
   1713    } else  if (destIdx == destCap) {
   1714        *status = U_STRING_NOT_TERMINATED_WARNING;
   1715    } else {
   1716        *status = U_BUFFER_OVERFLOW_ERROR;
   1717    }
   1718 
   1719    //
   1720    // Update the user's buffer ptr and capacity vars to reflect the
   1721    //   amount used.
   1722    //
   1723    if (destIdx < destCap) {
   1724        *destBuf      += destIdx;
   1725        *destCapacity -= destIdx;
   1726    } else if (*destBuf != nullptr) {
   1727        *destBuf      += destCap;
   1728        *destCapacity  = 0;
   1729    }
   1730 
   1731    if (pendingBufferOverflow && U_SUCCESS(*status)) {
   1732        *status = U_BUFFER_OVERFLOW_ERROR;
   1733    }
   1734 
   1735    return destIdx;
   1736 }
   1737 
   1738 
   1739 //
   1740 //   appendTail   the actual API function
   1741 //
   1742 U_CAPI int32_t U_EXPORT2
   1743 uregex_appendTail(URegularExpression    *regexp2,
   1744                  char16_t             **destBuf,
   1745                  int32_t               *destCapacity,
   1746                  UErrorCode            *status)  {
   1747    RegularExpression *regexp = (RegularExpression*)regexp2;
   1748    return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
   1749 }
   1750 
   1751 
   1752 //
   1753 //   uregex_appendTailUText...can just use the normal C++ method
   1754 //
   1755 U_CAPI UText * U_EXPORT2
   1756 uregex_appendTailUText(URegularExpression    *regexp2,
   1757                       UText                 *dest,
   1758                       UErrorCode            *status)  {
   1759    RegularExpression *regexp = (RegularExpression*)regexp2;
   1760    return regexp->fMatcher->appendTail(dest, *status);
   1761 }
   1762 
   1763 
   1764 //------------------------------------------------------------------------------
   1765 //
   1766 //    copyString     Internal utility to copy a string to an output buffer,
   1767 //                   while managing buffer overflow and preflight size
   1768 //                   computation.  NUL termination is added to destination,
   1769 //                   and the NUL is counted in the output size.
   1770 //
   1771 //------------------------------------------------------------------------------
   1772 #if 0
   1773 static void copyString(char16_t     *destBuffer,    //  Destination buffer.
   1774                       int32_t       destCapacity,  //  Total capacity of dest buffer
   1775                       int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
   1776                                                    //    Update not clipped to destCapacity.
   1777                       const char16_t  *srcPtr,        //  Pointer to source string
   1778                       int32_t       srcLen)        //  Source string len.
   1779 {
   1780    int32_t  si;
   1781    int32_t  di = *destIndex;
   1782    char16_t c;
   1783 
   1784    for (si=0; si<srcLen;  si++) {
   1785        c = srcPtr[si];
   1786        if (di < destCapacity) {
   1787            destBuffer[di] = c;
   1788            di++;
   1789        } else {
   1790            di += srcLen - si;
   1791            break;
   1792        }
   1793    }
   1794    if (di<destCapacity) {
   1795        destBuffer[di] = 0;
   1796    }
   1797    di++;
   1798    *destIndex = di;
   1799 }
   1800 #endif
   1801 
   1802 //------------------------------------------------------------------------------
   1803 //
   1804 //    uregex_split
   1805 //
   1806 //------------------------------------------------------------------------------
   1807 int32_t RegexCImpl::split(RegularExpression     *regexp,
   1808                          char16_t              *destBuf,
   1809                          int32_t                destCapacity,
   1810                          int32_t               *requiredCapacity,
   1811                          char16_t              *destFields[],
   1812                          int32_t                destFieldsCapacity,
   1813                          UErrorCode            *status) {
   1814    //
   1815    // Reset for the input text
   1816    //
   1817    regexp->fMatcher->reset();
   1818    UText *inputText = regexp->fMatcher->fInputText;
   1819    int64_t   nextOutputStringStart = 0;
   1820    int64_t   inputLen = regexp->fMatcher->fInputLength;
   1821    if (inputLen == 0) {
   1822        return 0;
   1823    }
   1824 
   1825    //
   1826    // Loop through the input text, searching for the delimiter pattern
   1827    //
   1828    int32_t   i;             // Index of the field being processed.
   1829    int32_t   destIdx = 0;   // Next available position in destBuf;
   1830    int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
   1831    UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
   1832    for (i=0; ; i++) {
   1833        if (i>=destFieldsCapacity-1) {
   1834            // There are one or zero output strings left.
   1835            // Fill the last output string with whatever is left from the input, then exit the loop.
   1836            //  ( i will be == destFieldsCapacity if we filled the output array while processing
   1837            //    capture groups of the delimiter expression, in which case we will discard the
   1838            //    last capture group saved in favor of the unprocessed remainder of the
   1839            //    input string.)
   1840            if (inputLen > nextOutputStringStart) {
   1841                if (i != destFieldsCapacity-1) {
   1842                    // No fields are left.  Recycle the last one for holding the trailing part of
   1843                    //   the input string.
   1844                    i = destFieldsCapacity-1;
   1845                    destIdx = static_cast<int32_t>(destFields[i] - destFields[0]);
   1846                }
   1847 
   1848                destFields[i] = (destBuf == nullptr) ? nullptr :  &destBuf[destIdx];
   1849                destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1850                                             destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1851            }
   1852            break;
   1853        }
   1854 
   1855        if (regexp->fMatcher->find()) {
   1856            // We found another delimiter.  Move everything from where we started looking
   1857            //  up until the start of the delimiter into the next output string.
   1858            destFields[i] = (destBuf == nullptr) ? nullptr :  &destBuf[destIdx];
   1859 
   1860            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
   1861                                         destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
   1862            if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1863                tStatus = U_ZERO_ERROR;
   1864            } else {
   1865                *status = tStatus;
   1866            }
   1867            nextOutputStringStart = regexp->fMatcher->fMatchEnd;
   1868 
   1869            // If the delimiter pattern has capturing parentheses, the captured
   1870            //  text goes out into the next n destination strings.
   1871            int32_t groupNum;
   1872            for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
   1873                // If we've run out of output string slots, bail out.
   1874                if (i==destFieldsCapacity-1) {
   1875                    break;
   1876                }
   1877                i++;
   1878 
   1879                // Set up to extract the capture group contents into the dest buffer.
   1880                destFields[i] = &destBuf[destIdx];
   1881                tStatus = U_ZERO_ERROR;
   1882                int32_t t = uregex_group(reinterpret_cast<URegularExpression*>(regexp),
   1883                                         groupNum,
   1884                                         destFields[i],
   1885                                         REMAINING_CAPACITY(destIdx, destCapacity),
   1886                                         &tStatus);
   1887                destIdx += t + 1;    // Record the space used in the output string buffer.
   1888                                     //  +1 for the NUL that terminates the string.
   1889                if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
   1890                    tStatus = U_ZERO_ERROR;
   1891                } else {
   1892                    *status = tStatus;
   1893                }
   1894            }
   1895 
   1896            if (nextOutputStringStart == inputLen) {
   1897                // The delimiter was at the end of the string.
   1898                // Output an empty string, and then we are done.
   1899                if (destIdx < destCapacity) {
   1900                    destBuf[destIdx] = 0;
   1901                }
   1902                if (i < destFieldsCapacity-1) {
   1903                   ++i;
   1904                }
   1905                if (destIdx < destCapacity) {
   1906                    destFields[i] = destBuf + destIdx;
   1907                }
   1908                ++destIdx;
   1909                break;
   1910            }
   1911 
   1912        }
   1913        else
   1914        {
   1915            // We ran off the end of the input while looking for the next delimiter.
   1916            // All the remaining text goes into the current output string.
   1917            destFields[i] = (destBuf == nullptr) ? nullptr : &destBuf[destIdx];
   1918            destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
   1919                                         destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), status);
   1920            break;
   1921        }
   1922    }
   1923 
   1924    // Zero out any unused portion of the destFields array
   1925    int j;
   1926    for (j=i+1; j<destFieldsCapacity; j++) {
   1927        destFields[j] = nullptr;
   1928    }
   1929 
   1930    if (requiredCapacity != nullptr) {
   1931        *requiredCapacity = destIdx;
   1932    }
   1933    if (destIdx > destCapacity) {
   1934        *status = U_BUFFER_OVERFLOW_ERROR;
   1935    }
   1936    return i+1;
   1937 }
   1938 
   1939 //
   1940 //   uregex_split   The actual API function
   1941 //
   1942 U_CAPI int32_t U_EXPORT2
   1943 uregex_split(URegularExpression      *regexp2,
   1944             char16_t                *destBuf,
   1945             int32_t                  destCapacity,
   1946             int32_t                 *requiredCapacity,
   1947             char16_t                *destFields[],
   1948             int32_t                  destFieldsCapacity,
   1949             UErrorCode              *status) {
   1950    RegularExpression *regexp = (RegularExpression*)regexp2;
   1951    if (validateRE(regexp, true, status) == false) {
   1952        return 0;
   1953    }
   1954    if ((destBuf == nullptr && destCapacity > 0) ||
   1955        destCapacity < 0 ||
   1956        destFields == nullptr ||
   1957        destFieldsCapacity < 1 ) {
   1958        *status = U_ILLEGAL_ARGUMENT_ERROR;
   1959        return 0;
   1960    }
   1961 
   1962    return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
   1963 }
   1964 
   1965 
   1966 //
   1967 //   uregex_splitUText...can just use the normal C++ method
   1968 //
   1969 U_CAPI int32_t U_EXPORT2
   1970 uregex_splitUText(URegularExpression    *regexp2,
   1971                  UText                 *destFields[],
   1972                  int32_t                destFieldsCapacity,
   1973                  UErrorCode            *status) {
   1974    RegularExpression *regexp = (RegularExpression*)regexp2;
   1975    return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
   1976 }
   1977 
   1978 
   1979 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS