tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

edits.cpp (29014B)


      1 // © 2017 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // edits.cpp
      5 // created: 2017feb08 Markus W. Scherer
      6 
      7 #include "unicode/edits.h"
      8 #include "unicode/unistr.h"
      9 #include "unicode/utypes.h"
     10 #include "cmemory.h"
     11 #include "uassert.h"
     12 #include "util.h"
     13 
     14 U_NAMESPACE_BEGIN
     15 
     16 namespace {
     17 
     18 // 0000uuuuuuuuuuuu records u+1 unchanged text units.
     19 const int32_t MAX_UNCHANGED_LENGTH = 0x1000;
     20 const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
     21 
     22 // 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units.
     23 const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6;
     24 const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7;
     25 const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff;
     26 const int32_t MAX_SHORT_CHANGE = 0x6fff;
     27 
     28 // 0111mmmmmmnnnnnn records a replacement of m text units with n.
     29 // m or n = 61: actual length follows in the next edits array unit.
     30 // m or n = 62..63: actual length follows in the next two edits array units.
     31 // Bit 30 of the actual length is in the head unit.
     32 // Trailing units have bit 15 set.
     33 const int32_t LENGTH_IN_1TRAIL = 61;
     34 const int32_t LENGTH_IN_2TRAIL = 62;
     35 
     36 }  // namespace
     37 
     38 void Edits::releaseArray() noexcept {
     39    if (array != stackArray) {
     40        uprv_free(array);
     41    }
     42 }
     43 
     44 Edits &Edits::copyArray(const Edits &other) {
     45    if (U_FAILURE(errorCode_)) {
     46        length = delta = numChanges = 0;
     47        return *this;
     48    }
     49    if (length > capacity) {
     50        uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(length) * 2));
     51        if (newArray == nullptr) {
     52            length = delta = numChanges = 0;
     53            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
     54            return *this;
     55        }
     56        releaseArray();
     57        array = newArray;
     58        capacity = length;
     59    }
     60    if (length > 0) {
     61        uprv_memcpy(array, other.array, (size_t)length * 2);
     62    }
     63    return *this;
     64 }
     65 
     66 Edits &Edits::moveArray(Edits &src) noexcept {
     67    if (U_FAILURE(errorCode_)) {
     68        length = delta = numChanges = 0;
     69        return *this;
     70    }
     71    releaseArray();
     72    if (length > STACK_CAPACITY) {
     73        array = src.array;
     74        capacity = src.capacity;
     75        src.array = src.stackArray;
     76        src.capacity = STACK_CAPACITY;
     77        src.reset();
     78        return *this;
     79    }
     80    array = stackArray;
     81    capacity = STACK_CAPACITY;
     82    if (length > 0) {
     83        uprv_memcpy(array, src.array, (size_t)length * 2);
     84    }
     85    return *this;
     86 }
     87 
     88 Edits &Edits::operator=(const Edits &other) {
     89    if (this == &other) { return *this; }  // self-assignment: no-op
     90    length = other.length;
     91    delta = other.delta;
     92    numChanges = other.numChanges;
     93    errorCode_ = other.errorCode_;
     94    return copyArray(other);
     95 }
     96 
     97 Edits &Edits::operator=(Edits &&src) noexcept {
     98    length = src.length;
     99    delta = src.delta;
    100    numChanges = src.numChanges;
    101    errorCode_ = src.errorCode_;
    102    return moveArray(src);
    103 }
    104 
    105 Edits::~Edits() {
    106    releaseArray();
    107 }
    108 
    109 void Edits::reset() noexcept {
    110    length = delta = numChanges = 0;
    111    errorCode_ = U_ZERO_ERROR;
    112 }
    113 
    114 void Edits::addUnchanged(int32_t unchangedLength) {
    115    if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
    116    if(unchangedLength < 0) {
    117        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
    118        return;
    119    }
    120    // Merge into previous unchanged-text record, if any.
    121    int32_t last = lastUnit();
    122    if(last < MAX_UNCHANGED) {
    123        int32_t remaining = MAX_UNCHANGED - last;
    124        if (remaining >= unchangedLength) {
    125            setLastUnit(last + unchangedLength);
    126            return;
    127        }
    128        setLastUnit(MAX_UNCHANGED);
    129        unchangedLength -= remaining;
    130    }
    131    // Split large lengths into multiple units.
    132    while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
    133        append(MAX_UNCHANGED);
    134        unchangedLength -= MAX_UNCHANGED_LENGTH;
    135    }
    136    // Write a small (remaining) length.
    137    if(unchangedLength > 0) {
    138        append(unchangedLength - 1);
    139    }
    140 }
    141 
    142 void Edits::addReplace(int32_t oldLength, int32_t newLength) {
    143    if(U_FAILURE(errorCode_)) { return; }
    144    if(oldLength < 0 || newLength < 0) {
    145        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
    146        return;
    147    }
    148    if (oldLength == 0 && newLength == 0) {
    149        return;
    150    }
    151    ++numChanges;
    152    int32_t newDelta = newLength - oldLength;
    153    if (newDelta != 0) {
    154        if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
    155                (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
    156            // Integer overflow or underflow.
    157            errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
    158            return;
    159        }
    160        delta += newDelta;
    161    }
    162 
    163    if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH &&
    164            newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) {
    165        // Merge into previous same-lengths short-replacement record, if any.
    166        int32_t u = (oldLength << 12) | (newLength << 9);
    167        int32_t last = lastUnit();
    168        if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
    169                (last & ~SHORT_CHANGE_NUM_MASK) == u &&
    170                (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) {
    171            setLastUnit(last + 1);
    172            return;
    173        }
    174        append(u);
    175        return;
    176    }
    177 
    178    int32_t head = 0x7000;
    179    if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
    180        head |= oldLength << 6;
    181        head |= newLength;
    182        append(head);
    183    } else if ((capacity - length) >= 5 || growArray()) {
    184        int32_t limit = length + 1;
    185        if(oldLength < LENGTH_IN_1TRAIL) {
    186            head |= oldLength << 6;
    187        } else if(oldLength <= 0x7fff) {
    188            head |= LENGTH_IN_1TRAIL << 6;
    189            array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
    190        } else {
    191            head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
    192            array[limit++] = static_cast<uint16_t>(0x8000 | (oldLength >> 15));
    193            array[limit++] = static_cast<uint16_t>(0x8000 | oldLength);
    194        }
    195        if(newLength < LENGTH_IN_1TRAIL) {
    196            head |= newLength;
    197        } else if(newLength <= 0x7fff) {
    198            head |= LENGTH_IN_1TRAIL;
    199            array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
    200        } else {
    201            head |= LENGTH_IN_2TRAIL + (newLength >> 30);
    202            array[limit++] = static_cast<uint16_t>(0x8000 | (newLength >> 15));
    203            array[limit++] = static_cast<uint16_t>(0x8000 | newLength);
    204        }
    205        array[length] = static_cast<uint16_t>(head);
    206        length = limit;
    207    }
    208 }
    209 
    210 void Edits::append(int32_t r) {
    211    if(length < capacity || growArray()) {
    212        array[length++] = static_cast<uint16_t>(r);
    213    }
    214 }
    215 
    216 UBool Edits::growArray() {
    217    int32_t newCapacity;
    218    if (array == stackArray) {
    219        newCapacity = 2000;
    220    } else if (capacity == INT32_MAX) {
    221        // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
    222        // with a result-string-buffer overflow.
    223        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
    224        return false;
    225    } else if (capacity >= (INT32_MAX / 2)) {
    226        newCapacity = INT32_MAX;
    227    } else {
    228        newCapacity = 2 * capacity;
    229    }
    230    // Grow by at least 5 units so that a maximal change record will fit.
    231    if ((newCapacity - capacity) < 5) {
    232        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
    233        return false;
    234    }
    235    uint16_t* newArray = static_cast<uint16_t*>(uprv_malloc(static_cast<size_t>(newCapacity) * 2));
    236    if (newArray == nullptr) {
    237        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
    238        return false;
    239    }
    240    uprv_memcpy(newArray, array, (size_t)length * 2);
    241    releaseArray();
    242    array = newArray;
    243    capacity = newCapacity;
    244    return true;
    245 }
    246 
    247 UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const {
    248    if (U_FAILURE(outErrorCode)) { return true; }
    249    if (U_SUCCESS(errorCode_)) { return false; }
    250    outErrorCode = errorCode_;
    251    return true;
    252 }
    253 
    254 Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
    255    if (copyErrorTo(errorCode)) { return *this; }
    256    // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
    257    // Parallel iteration over both Edits.
    258    Iterator abIter = ab.getFineIterator();
    259    Iterator bcIter = bc.getFineIterator();
    260    UBool abHasNext = true, bcHasNext = true;
    261    // Copy iterator state into local variables, so that we can modify and subdivide spans.
    262    // ab old & new length, bc old & new length
    263    int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
    264    // When we have different-intermediate-length changes, we accumulate a larger change.
    265    int32_t pending_aLength = 0, pending_cLength = 0;
    266    for (;;) {
    267        // At this point, for each of the two iterators:
    268        // Either we are done with the locally cached current edit,
    269        // and its intermediate-string length has been reset,
    270        // or we will continue to work with a truncated remainder of this edit.
    271        //
    272        // If the current edit is done, and the iterator has not yet reached the end,
    273        // then we fetch the next edit. This is true for at least one of the iterators.
    274        //
    275        // Normally it does not matter whether we fetch from ab and then bc or vice versa.
    276        // However, the result is observably different when
    277        // ab deletions meet bc insertions at the same intermediate-string index.
    278        // Some users expect the bc insertions to come first, so we fetch from bc first.
    279        if (bc_bLength == 0) {
    280            if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) {
    281                bc_bLength = bcIter.oldLength();
    282                cLength = bcIter.newLength();
    283                if (bc_bLength == 0) {
    284                    // insertion
    285                    if (ab_bLength == 0 || !abIter.hasChange()) {
    286                        addReplace(pending_aLength, pending_cLength + cLength);
    287                        pending_aLength = pending_cLength = 0;
    288                    } else {
    289                        pending_cLength += cLength;
    290                    }
    291                    continue;
    292                }
    293            }
    294            // else see if the other iterator is done, too.
    295        }
    296        if (ab_bLength == 0) {
    297            if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) {
    298                aLength = abIter.oldLength();
    299                ab_bLength = abIter.newLength();
    300                if (ab_bLength == 0) {
    301                    // deletion
    302                    if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
    303                        addReplace(pending_aLength + aLength, pending_cLength);
    304                        pending_aLength = pending_cLength = 0;
    305                    } else {
    306                        pending_aLength += aLength;
    307                    }
    308                    continue;
    309                }
    310            } else if (bc_bLength == 0) {
    311                // Both iterators are done at the same time:
    312                // The intermediate-string lengths match.
    313                break;
    314            } else {
    315                // The ab output string is shorter than the bc input string.
    316                if (!copyErrorTo(errorCode)) {
    317                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    318                }
    319                return *this;
    320            }
    321        }
    322        if (bc_bLength == 0) {
    323            // The bc input string is shorter than the ab output string.
    324            if (!copyErrorTo(errorCode)) {
    325                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    326            }
    327            return *this;
    328        }
    329        //  Done fetching: ab_bLength > 0 && bc_bLength > 0
    330 
    331        // The current state has two parts:
    332        // - Past: We accumulate a longer ac edit in the "pending" variables.
    333        // - Current: We have copies of the current ab/bc edits in local variables.
    334        //   At least one side is newly fetched.
    335        //   One side might be a truncated remainder of an edit we fetched earlier.
    336 
    337        if (!abIter.hasChange() && !bcIter.hasChange()) {
    338            // An unchanged span all the way from string a to string c.
    339            if (pending_aLength != 0 || pending_cLength != 0) {
    340                addReplace(pending_aLength, pending_cLength);
    341                pending_aLength = pending_cLength = 0;
    342            }
    343            int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
    344            addUnchanged(unchangedLength);
    345            ab_bLength = aLength -= unchangedLength;
    346            bc_bLength = cLength -= unchangedLength;
    347            // At least one of the unchanged spans is now empty.
    348            continue;
    349        }
    350        if (!abIter.hasChange() && bcIter.hasChange()) {
    351            // Unchanged a->b but changed b->c.
    352            if (ab_bLength >= bc_bLength) {
    353                // Split the longer unchanged span into change + remainder.
    354                addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
    355                pending_aLength = pending_cLength = 0;
    356                aLength = ab_bLength -= bc_bLength;
    357                bc_bLength = 0;
    358                continue;
    359            }
    360            // Handle the shorter unchanged span below like a change.
    361        } else if (abIter.hasChange() && !bcIter.hasChange()) {
    362            // Changed a->b and then unchanged b->c.
    363            if (ab_bLength <= bc_bLength) {
    364                // Split the longer unchanged span into change + remainder.
    365                addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
    366                pending_aLength = pending_cLength = 0;
    367                cLength = bc_bLength -= ab_bLength;
    368                ab_bLength = 0;
    369                continue;
    370            }
    371            // Handle the shorter unchanged span below like a change.
    372        } else {  // both abIter.hasChange() && bcIter.hasChange()
    373            if (ab_bLength == bc_bLength) {
    374                // Changes on both sides up to the same position. Emit & reset.
    375                addReplace(pending_aLength + aLength, pending_cLength + cLength);
    376                pending_aLength = pending_cLength = 0;
    377                ab_bLength = bc_bLength = 0;
    378                continue;
    379            }
    380        }
    381        // Accumulate the a->c change, reset the shorter side,
    382        // keep a remainder of the longer one.
    383        pending_aLength += aLength;
    384        pending_cLength += cLength;
    385        if (ab_bLength < bc_bLength) {
    386            bc_bLength -= ab_bLength;
    387            cLength = ab_bLength = 0;
    388        } else {  // ab_bLength > bc_bLength
    389            ab_bLength -= bc_bLength;
    390            aLength = bc_bLength = 0;
    391        }
    392    }
    393    if (pending_aLength != 0 || pending_cLength != 0) {
    394        addReplace(pending_aLength, pending_cLength);
    395    }
    396    copyErrorTo(errorCode);
    397    return *this;
    398 }
    399 
    400 Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
    401        array(a), index(0), length(len), remaining(0),
    402        onlyChanges_(oc), coarse(crs),
    403        dir(0), changed(false), oldLength_(0), newLength_(0),
    404        srcIndex(0), replIndex(0), destIndex(0) {}
    405 
    406 int32_t Edits::Iterator::readLength(int32_t head) {
    407    if (head < LENGTH_IN_1TRAIL) {
    408        return head;
    409    } else if (head < LENGTH_IN_2TRAIL) {
    410        U_ASSERT(index < length);
    411        U_ASSERT(array[index] >= 0x8000);
    412        return array[index++] & 0x7fff;
    413    } else {
    414        U_ASSERT((index + 2) <= length);
    415        U_ASSERT(array[index] >= 0x8000);
    416        U_ASSERT(array[index + 1] >= 0x8000);
    417        int32_t len = ((head & 1) << 30) |
    418                (static_cast<int32_t>(array[index] & 0x7fff) << 15) |
    419                (array[index + 1] & 0x7fff);
    420        index += 2;
    421        return len;
    422    }
    423 }
    424 
    425 void Edits::Iterator::updateNextIndexes() {
    426    srcIndex += oldLength_;
    427    if (changed) {
    428        replIndex += newLength_;
    429    }
    430    destIndex += newLength_;
    431 }
    432 
    433 void Edits::Iterator::updatePreviousIndexes() {
    434    srcIndex -= oldLength_;
    435    if (changed) {
    436        replIndex -= newLength_;
    437    }
    438    destIndex -= newLength_;
    439 }
    440 
    441 UBool Edits::Iterator::noNext() {
    442    // No change before or beyond the string.
    443    dir = 0;
    444    changed = false;
    445    oldLength_ = newLength_ = 0;
    446    return false;
    447 }
    448 
    449 UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
    450    // Forward iteration: Update the string indexes to the limit of the current span,
    451    // and post-increment-read array units to assemble a new span.
    452    // Leaves the array index one after the last unit of that span.
    453    if (U_FAILURE(errorCode)) { return false; }
    454    // We have an errorCode in case we need to start guarding against integer overflows.
    455    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    456    if (dir > 0) {
    457        updateNextIndexes();
    458    } else {
    459        if (dir < 0) {
    460            // Turn around from previous() to next().
    461            // Post-increment-read the same span again.
    462            if (remaining > 0) {
    463                // Fine-grained iterator:
    464                // Stay on the current one of a sequence of compressed changes.
    465                ++index;  // next() rests on the index after the sequence unit.
    466                dir = 1;
    467                return true;
    468            }
    469        }
    470        dir = 1;
    471    }
    472    if (remaining >= 1) {
    473        // Fine-grained iterator: Continue a sequence of compressed changes.
    474        if (remaining > 1) {
    475            --remaining;
    476            return true;
    477        }
    478        remaining = 0;
    479    }
    480    if (index >= length) {
    481        return noNext();
    482    }
    483    int32_t u = array[index++];
    484    if (u <= MAX_UNCHANGED) {
    485        // Combine adjacent unchanged ranges.
    486        changed = false;
    487        oldLength_ = u + 1;
    488        while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
    489            ++index;
    490            oldLength_ += u + 1;
    491        }
    492        newLength_ = oldLength_;
    493        if (onlyChanges) {
    494            updateNextIndexes();
    495            if (index >= length) {
    496                return noNext();
    497            }
    498            // already fetched u > MAX_UNCHANGED at index
    499            ++index;
    500        } else {
    501            return true;
    502        }
    503    }
    504    changed = true;
    505    if (u <= MAX_SHORT_CHANGE) {
    506        int32_t oldLen = u >> 12;
    507        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
    508        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
    509        if (coarse) {
    510            oldLength_ = num * oldLen;
    511            newLength_ = num * newLen;
    512        } else {
    513            // Split a sequence of changes that was compressed into one unit.
    514            oldLength_ = oldLen;
    515            newLength_ = newLen;
    516            if (num > 1) {
    517                remaining = num;  // This is the first of two or more changes.
    518            }
    519            return true;
    520        }
    521    } else {
    522        U_ASSERT(u <= 0x7fff);
    523        oldLength_ = readLength((u >> 6) & 0x3f);
    524        newLength_ = readLength(u & 0x3f);
    525        if (!coarse) {
    526            return true;
    527        }
    528    }
    529    // Combine adjacent changes.
    530    while (index < length && (u = array[index]) > MAX_UNCHANGED) {
    531        ++index;
    532        if (u <= MAX_SHORT_CHANGE) {
    533            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
    534            oldLength_ += (u >> 12) * num;
    535            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
    536        } else {
    537            U_ASSERT(u <= 0x7fff);
    538            oldLength_ += readLength((u >> 6) & 0x3f);
    539            newLength_ += readLength(u & 0x3f);
    540        }
    541    }
    542    return true;
    543 }
    544 
    545 UBool Edits::Iterator::previous(UErrorCode &errorCode) {
    546    // Backward iteration: Pre-decrement-read array units to assemble a new span,
    547    // then update the string indexes to the start of that span.
    548    // Leaves the array index on the head unit of that span.
    549    if (U_FAILURE(errorCode)) { return false; }
    550    // We have an errorCode in case we need to start guarding against integer overflows.
    551    // It is also convenient for caller loops if we bail out when an error was set elsewhere.
    552    if (dir >= 0) {
    553        if (dir > 0) {
    554            // Turn around from next() to previous().
    555            // Set the string indexes to the span limit and
    556            // pre-decrement-read the same span again.
    557            if (remaining > 0) {
    558                // Fine-grained iterator:
    559                // Stay on the current one of a sequence of compressed changes.
    560                --index;  // previous() rests on the sequence unit.
    561                dir = -1;
    562                return true;
    563            }
    564            updateNextIndexes();
    565        }
    566        dir = -1;
    567    }
    568    if (remaining > 0) {
    569        // Fine-grained iterator: Continue a sequence of compressed changes.
    570        int32_t u = array[index];
    571        U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
    572        if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) {
    573            ++remaining;
    574            updatePreviousIndexes();
    575            return true;
    576        }
    577        remaining = 0;
    578    }
    579    if (index <= 0) {
    580        return noNext();
    581    }
    582    int32_t u = array[--index];
    583    if (u <= MAX_UNCHANGED) {
    584        // Combine adjacent unchanged ranges.
    585        changed = false;
    586        oldLength_ = u + 1;
    587        while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) {
    588            --index;
    589            oldLength_ += u + 1;
    590        }
    591        newLength_ = oldLength_;
    592        // No need to handle onlyChanges as long as previous() is called only from findIndex().
    593        updatePreviousIndexes();
    594        return true;
    595    }
    596    changed = true;
    597    if (u <= MAX_SHORT_CHANGE) {
    598        int32_t oldLen = u >> 12;
    599        int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH;
    600        int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
    601        if (coarse) {
    602            oldLength_ = num * oldLen;
    603            newLength_ = num * newLen;
    604        } else {
    605            // Split a sequence of changes that was compressed into one unit.
    606            oldLength_ = oldLen;
    607            newLength_ = newLen;
    608            if (num > 1) {
    609                remaining = 1;  // This is the last of two or more changes.
    610            }
    611            updatePreviousIndexes();
    612            return true;
    613        }
    614    } else {
    615        if (u <= 0x7fff) {
    616            // The change is encoded in u alone.
    617            oldLength_ = readLength((u >> 6) & 0x3f);
    618            newLength_ = readLength(u & 0x3f);
    619        } else {
    620            // Back up to the head of the change, read the lengths,
    621            // and reset the index to the head again.
    622            U_ASSERT(index > 0);
    623            while ((u = array[--index]) > 0x7fff) {}
    624            U_ASSERT(u > MAX_SHORT_CHANGE);
    625            int32_t headIndex = index++;
    626            oldLength_ = readLength((u >> 6) & 0x3f);
    627            newLength_ = readLength(u & 0x3f);
    628            index = headIndex;
    629        }
    630        if (!coarse) {
    631            updatePreviousIndexes();
    632            return true;
    633        }
    634    }
    635    // Combine adjacent changes.
    636    while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) {
    637        --index;
    638        if (u <= MAX_SHORT_CHANGE) {
    639            int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1;
    640            oldLength_ += (u >> 12) * num;
    641            newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num;
    642        } else if (u <= 0x7fff) {
    643            // Read the lengths, and reset the index to the head again.
    644            int32_t headIndex = index++;
    645            oldLength_ += readLength((u >> 6) & 0x3f);
    646            newLength_ += readLength(u & 0x3f);
    647            index = headIndex;
    648        }
    649    }
    650    updatePreviousIndexes();
    651    return true;
    652 }
    653 
    654 int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
    655    if (U_FAILURE(errorCode) || i < 0) { return -1; }
    656    int32_t spanStart, spanLength;
    657    if (findSource) {  // find source index
    658        spanStart = srcIndex;
    659        spanLength = oldLength_;
    660    } else {  // find destination index
    661        spanStart = destIndex;
    662        spanLength = newLength_;
    663    }
    664    if (i < spanStart) {
    665        if (i >= (spanStart / 2)) {
    666            // Search backwards.
    667            for (;;) {
    668                UBool hasPrevious = previous(errorCode);
    669                U_ASSERT(hasPrevious);  // because i>=0 and the first span starts at 0
    670                (void)hasPrevious;  // avoid unused-variable warning
    671                spanStart = findSource ? srcIndex : destIndex;
    672                if (i >= spanStart) {
    673                    // The index is in the current span.
    674                    return 0;
    675                }
    676                if (remaining > 0) {
    677                    // Is the index in one of the remaining compressed edits?
    678                    // spanStart is the start of the current span, first of the remaining ones.
    679                    spanLength = findSource ? oldLength_ : newLength_;
    680                    int32_t u = array[index];
    681                    U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE);
    682                    int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining;
    683                    int32_t len = num * spanLength;
    684                    if (i >= (spanStart - len)) {
    685                        int32_t n = ((spanStart - i - 1) / spanLength) + 1;
    686                        // 1 <= n <= num
    687                        srcIndex -= n * oldLength_;
    688                        replIndex -= n * newLength_;
    689                        destIndex -= n * newLength_;
    690                        remaining += n;
    691                        return 0;
    692                    }
    693                    // Skip all of these edits at once.
    694                    srcIndex -= num * oldLength_;
    695                    replIndex -= num * newLength_;
    696                    destIndex -= num * newLength_;
    697                    remaining = 0;
    698                }
    699            }
    700        }
    701        // Reset the iterator to the start.
    702        dir = 0;
    703        index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
    704    } else if (i < (spanStart + spanLength)) {
    705        // The index is in the current span.
    706        return 0;
    707    }
    708    while (next(false, errorCode)) {
    709        if (findSource) {
    710            spanStart = srcIndex;
    711            spanLength = oldLength_;
    712        } else {
    713            spanStart = destIndex;
    714            spanLength = newLength_;
    715        }
    716        if (i < (spanStart + spanLength)) {
    717            // The index is in the current span.
    718            return 0;
    719        }
    720        if (remaining > 1) {
    721            // Is the index in one of the remaining compressed edits?
    722            // spanStart is the start of the current span, first of the remaining ones.
    723            int32_t len = remaining * spanLength;
    724            if (i < (spanStart + len)) {
    725                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining - 1
    726                srcIndex += n * oldLength_;
    727                replIndex += n * newLength_;
    728                destIndex += n * newLength_;
    729                remaining -= n;
    730                return 0;
    731            }
    732            // Make next() skip all of these edits at once.
    733            oldLength_ *= remaining;
    734            newLength_ *= remaining;
    735            remaining = 0;
    736        }
    737    }
    738    return 1;
    739 }
    740 
    741 int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
    742    int32_t where = findIndex(i, true, errorCode);
    743    if (where < 0) {
    744        // Error or before the string.
    745        return 0;
    746    }
    747    if (where > 0 || i == srcIndex) {
    748        // At or after string length, or at start of the found span.
    749        return destIndex;
    750    }
    751    if (changed) {
    752        // In a change span, map to its end.
    753        return destIndex + newLength_;
    754    } else {
    755        // In an unchanged span, offset 1:1 within it.
    756        return destIndex + (i - srcIndex);
    757    }
    758 }
    759 
    760 int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
    761    int32_t where = findIndex(i, false, errorCode);
    762    if (where < 0) {
    763        // Error or before the string.
    764        return 0;
    765    }
    766    if (where > 0 || i == destIndex) {
    767        // At or after string length, or at start of the found span.
    768        return srcIndex;
    769    }
    770    if (changed) {
    771        // In a change span, map to its end.
    772        return srcIndex + oldLength_;
    773    } else {
    774        // In an unchanged span, offset within it.
    775        return srcIndex + (i - destIndex);
    776    }
    777 }
    778 
    779 UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const {
    780    sb.append(u"{ src[", -1);
    781    ICU_Utility::appendNumber(sb, srcIndex);
    782    sb.append(u"..", -1);
    783    ICU_Utility::appendNumber(sb, srcIndex + oldLength_);
    784    if (changed) {
    785        sb.append(u"] ⇝ dest[", -1);
    786    } else {
    787        sb.append(u"] ≡ dest[", -1);
    788    }
    789    ICU_Utility::appendNumber(sb, destIndex);
    790    sb.append(u"..", -1);
    791    ICU_Utility::appendNumber(sb, destIndex + newLength_);
    792    if (changed) {
    793        sb.append(u"], repl[", -1);
    794        ICU_Utility::appendNumber(sb, replIndex);
    795        sb.append(u"..", -1);
    796        ICU_Utility::appendNumber(sb, replIndex + newLength_);
    797        sb.append(u"] }", -1);
    798    } else {
    799        sb.append(u"] (no-change) }", -1);
    800    }
    801    return sb;
    802 }
    803 
    804 U_NAMESPACE_END