tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ubidi.cpp (122949B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  ubidi.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999jul27
     16 *   created by: Markus W. Scherer, updated by Matitiahu Allouche
     17 *
     18 */
     19 
     20 #include "cmemory.h"
     21 #include "unicode/utypes.h"
     22 #include "unicode/ustring.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/ubidi.h"
     25 #include "unicode/utf16.h"
     26 #include "ubidi_props.h"
     27 #include "ubidiimp.h"
     28 #include "uassert.h"
     29 
     30 /*
     31 * General implementation notes:
     32 *
     33 * Throughout the implementation, there are comments like (W2) that refer to
     34 * rules of the BiDi algorithm, in this example to the second rule of the
     35 * resolution of weak types.
     36 *
     37 * For handling surrogate pairs, where two char16_t's form one "abstract" (or UTF-32)
     38 * character according to UTF-16, the second char16_t gets the directional property of
     39 * the entire character assigned, while the first one gets a BN, a boundary
     40 * neutral, type, which is ignored by most of the algorithm according to
     41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
     42 *
     43 * Later, adjustWSLevels() will set the level for each BN to that of the
     44 * following character (char16_t), which results in surrogate pairs getting the
     45 * same level on each of their surrogates.
     46 *
     47 * In a UTF-8 implementation, the same thing could be done: the last byte of
     48 * a multi-byte sequence would get the "real" property, while all previous
     49 * bytes of that sequence would get BN.
     50 *
     51 * It is not possible to assign all those parts of a character the same real
     52 * property because this would fail in the resolution of weak types with rules
     53 * that look at immediately surrounding types.
     54 *
     55 * As a related topic, this implementation does not remove Boundary Neutral
     56 * types from the input, but ignores them wherever this is relevant.
     57 * For example, the loop for the resolution of the weak types reads
     58 * types until it finds a non-BN.
     59 * Also, explicit embedding codes are neither changed into BN nor removed.
     60 * They are only treated the same way real BNs are.
     61 * As stated before, adjustWSLevels() takes care of them at the end.
     62 * For the purpose of conformance, the levels of all these codes
     63 * do not matter.
     64 *
     65 * Note that this implementation modifies the dirProps
     66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
     67 * X6, N0 (replace paired brackets by L or R).
     68 *
     69 * In this implementation, the resolution of weak types (W1 to W6),
     70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
     71 * are all done in one single loop, in resolveImplicitLevels().
     72 * Changes of dirProp values are done on the fly, without writing
     73 * them back to the dirProps array.
     74 *
     75 *
     76 * This implementation contains code that allows to bypass steps of the
     77 * algorithm that are not needed on the specific paragraph
     78 * in order to speed up the most common cases considerably,
     79 * like text that is entirely LTR, or RTL text without numbers.
     80 *
     81 * Most of this is done by setting a bit for each directional property
     82 * in a flags variable and later checking for whether there are
     83 * any LTR characters or any RTL characters, or both, whether
     84 * there are any explicit embedding codes, etc.
     85 *
     86 * If the (Xn) steps are performed, then the flags are re-evaluated,
     87 * because they will then not contain the embedding codes any more
     88 * and will be adjusted for override codes, so that subsequently
     89 * more bypassing may be possible than what the initial flags suggested.
     90 *
     91 * If the text is not mixed-directional, then the
     92 * algorithm steps for the weak type resolution are not performed,
     93 * and all levels are set to the paragraph level.
     94 *
     95 * If there are no explicit embedding codes, then the (Xn) steps
     96 * are not performed.
     97 *
     98 * If embedding levels are supplied as a parameter, then all
     99 * explicit embedding codes are ignored, and the (Xn) steps
    100 * are not performed.
    101 *
    102 * White Space types could get the level of the run they belong to,
    103 * and are checked with a test of (flags&MASK_EMBEDDING) to
    104 * consider if the paragraph direction should be considered in
    105 * the flags variable.
    106 *
    107 * If there are no White Space types in the paragraph, then
    108 * (L1) is not necessary in adjustWSLevels().
    109 */
    110 
    111 /* to avoid some conditional statements, use tiny constant arrays */
    112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
    113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
    114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
    115 
    116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
    117 #define DIRPROP_FLAG_E(level)  flagE[(level)&1]
    118 #define DIRPROP_FLAG_O(level)  flagO[(level)&1]
    119 
    120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
    121 
    122 #define NO_OVERRIDE(level)  ((level)&~UBIDI_LEVEL_OVERRIDE)
    123 
    124 /* UBiDi object management -------------------------------------------------- */
    125 
    126 U_CAPI UBiDi * U_EXPORT2
    127 ubidi_open()
    128 {
    129    UErrorCode errorCode=U_ZERO_ERROR;
    130    return ubidi_openSized(0, 0, &errorCode);
    131 }
    132 
    133 U_CAPI UBiDi * U_EXPORT2
    134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
    135    UBiDi *pBiDi;
    136 
    137    /* check the argument values */
    138    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    139        return nullptr;
    140    } else if(maxLength<0 || maxRunCount<0) {
    141        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    142        return nullptr;    /* invalid arguments */
    143    }
    144 
    145    /* allocate memory for the object */
    146    pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
    147    if(pBiDi==nullptr) {
    148        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    149        return nullptr;
    150    }
    151 
    152    /* reset the object, all pointers nullptr, all flags false, all sizes 0 */
    153    uprv_memset(pBiDi, 0, sizeof(UBiDi));
    154 
    155    /* allocate memory for arrays as requested */
    156    if(maxLength>0) {
    157        if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
    158            !getInitialLevelsMemory(pBiDi, maxLength)
    159        ) {
    160            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    161        }
    162    } else {
    163        pBiDi->mayAllocateText=true;
    164    }
    165 
    166    if(maxRunCount>0) {
    167        if(maxRunCount==1) {
    168            /* use simpleRuns[] */
    169            pBiDi->runsSize=sizeof(Run);
    170        } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
    171            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    172        }
    173    } else {
    174        pBiDi->mayAllocateRuns=true;
    175    }
    176 
    177    if(U_SUCCESS(*pErrorCode)) {
    178        return pBiDi;
    179    } else {
    180        ubidi_close(pBiDi);
    181        return nullptr;
    182    }
    183 }
    184 
    185 /*
    186 * We are allowed to allocate memory if memory==nullptr or
    187 * mayAllocate==true for each array that we need.
    188 * We also try to grow memory as needed if we
    189 * allocate it.
    190 *
    191 * Assume sizeNeeded>0.
    192 * If *pMemory!=nullptr, then assume *pSize>0.
    193 *
    194 * ### this realloc() may unnecessarily copy the old data,
    195 * which we know we don't need any more;
    196 * is this the best way to do this??
    197 */
    198 U_CFUNC UBool
    199 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
    200    void **pMemory = (void **)bidiMem;
    201    /* check for existing memory */
    202    if(*pMemory==nullptr) {
    203        /* we need to allocate memory */
    204        if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=nullptr) {
    205            *pSize=sizeNeeded;
    206            return true;
    207        } else {
    208            return false;
    209        }
    210    } else {
    211        if(sizeNeeded<=*pSize) {
    212            /* there is already enough memory */
    213            return true;
    214        }
    215        else if(!mayAllocate) {
    216            /* not enough memory, and we must not allocate */
    217            return false;
    218        } else {
    219            /* we try to grow */
    220            void *memory;
    221            /* in most cases, we do not need the copy-old-data part of
    222             * realloc, but it is needed when adding runs using getRunsMemory()
    223             * in setParaRunsOnly()
    224             */
    225            if((memory=uprv_realloc(*pMemory, sizeNeeded))!=nullptr) {
    226                *pMemory=memory;
    227                *pSize=sizeNeeded;
    228                return true;
    229            } else {
    230                /* we failed to grow */
    231                return false;
    232            }
    233        }
    234    }
    235 }
    236 
    237 U_CAPI void U_EXPORT2
    238 ubidi_close(UBiDi *pBiDi) {
    239    if(pBiDi!=nullptr) {
    240        pBiDi->pParaBiDi=nullptr;          /* in case one tries to reuse this block */
    241        if(pBiDi->dirPropsMemory!=nullptr) {
    242            uprv_free(pBiDi->dirPropsMemory);
    243        }
    244        if(pBiDi->levelsMemory!=nullptr) {
    245            uprv_free(pBiDi->levelsMemory);
    246        }
    247        if(pBiDi->openingsMemory!=nullptr) {
    248            uprv_free(pBiDi->openingsMemory);
    249        }
    250        if(pBiDi->parasMemory!=nullptr) {
    251            uprv_free(pBiDi->parasMemory);
    252        }
    253        if(pBiDi->runsMemory!=nullptr) {
    254            uprv_free(pBiDi->runsMemory);
    255        }
    256        if(pBiDi->isolatesMemory!=nullptr) {
    257            uprv_free(pBiDi->isolatesMemory);
    258        }
    259        if(pBiDi->insertPoints.points!=nullptr) {
    260            uprv_free(pBiDi->insertPoints.points);
    261        }
    262 
    263        uprv_free(pBiDi);
    264    }
    265 }
    266 
    267 /* set to approximate "inverse BiDi" ---------------------------------------- */
    268 
    269 U_CAPI void U_EXPORT2
    270 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
    271    if(pBiDi!=nullptr) {
    272        pBiDi->isInverse=isInverse;
    273        pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
    274                                          : UBIDI_REORDER_DEFAULT;
    275    }
    276 }
    277 
    278 U_CAPI UBool U_EXPORT2
    279 ubidi_isInverse(UBiDi *pBiDi) {
    280    if(pBiDi!=nullptr) {
    281        return pBiDi->isInverse;
    282    } else {
    283        return false;
    284    }
    285 }
    286 
    287 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
    288 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
    289 * concept of RUNS_ONLY which is a double operation.
    290 * It could be advantageous to divide this into 3 concepts:
    291 * a) Operation: direct / inverse / RUNS_ONLY
    292 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
    293 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
    294 * This would allow combinations not possible today like RUNS_ONLY with
    295 * NUMBERS_SPECIAL.
    296 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
    297 * REMOVE_CONTROLS for the inverse step.
    298 * Not all combinations would be supported, and probably not all do make sense.
    299 * This would need to document which ones are supported and what are the
    300 * fallbacks for unsupported combinations.
    301 */
    302 U_CAPI void U_EXPORT2
    303 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) UPRV_NO_SANITIZE_UNDEFINED {
    304    if ((pBiDi!=nullptr) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
    305                        && (reorderingMode < UBIDI_REORDER_COUNT)) {
    306        pBiDi->reorderingMode = reorderingMode;
    307        pBiDi->isInverse = reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L;
    308    }
    309 }
    310 
    311 U_CAPI UBiDiReorderingMode U_EXPORT2
    312 ubidi_getReorderingMode(UBiDi *pBiDi) {
    313    if (pBiDi!=nullptr) {
    314        return pBiDi->reorderingMode;
    315    } else {
    316        return UBIDI_REORDER_DEFAULT;
    317    }
    318 }
    319 
    320 U_CAPI void U_EXPORT2
    321 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
    322    if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
    323        reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
    324    }
    325    if (pBiDi!=nullptr) {
    326        pBiDi->reorderingOptions=reorderingOptions;
    327    }
    328 }
    329 
    330 U_CAPI uint32_t U_EXPORT2
    331 ubidi_getReorderingOptions(UBiDi *pBiDi) {
    332    if (pBiDi!=nullptr) {
    333        return pBiDi->reorderingOptions;
    334    } else {
    335        return 0;
    336    }
    337 }
    338 
    339 U_CAPI UBiDiDirection U_EXPORT2
    340 ubidi_getBaseDirection(const char16_t *text,
    341 int32_t length){
    342 
    343    int32_t i;
    344    UChar32 uchar;
    345    UCharDirection dir;
    346 
    347    if( text==nullptr || length<-1 ){
    348        return UBIDI_NEUTRAL;
    349    }
    350 
    351    if(length==-1) {
    352        length=u_strlen(text);
    353    }
    354 
    355    for( i = 0 ; i < length; ) {
    356        /* i is incremented by U16_NEXT */
    357        U16_NEXT(text, i, length, uchar);
    358        dir = u_charDirection(uchar);
    359        if( dir == U_LEFT_TO_RIGHT )
    360                return UBIDI_LTR;
    361        if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
    362                return UBIDI_RTL;
    363    }
    364    return UBIDI_NEUTRAL;
    365 }
    366 
    367 /* perform (P2)..(P3) ------------------------------------------------------- */
    368 
    369 /**
    370 * Returns the directionality of the first strong character
    371 * after the last B in prologue, if any.
    372 * Requires prologue!=null.
    373 */
    374 static DirProp
    375 firstL_R_AL(UBiDi *pBiDi) {
    376    const char16_t *text=pBiDi->prologue;
    377    int32_t length=pBiDi->proLength;
    378    int32_t i;
    379    UChar32 uchar;
    380    DirProp dirProp, result=ON;
    381    for(i=0; i<length; ) {
    382        /* i is incremented by U16_NEXT */
    383        U16_NEXT(text, i, length, uchar);
    384        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
    385        if(result==ON) {
    386            if(dirProp==L || dirProp==R || dirProp==AL) {
    387                result=dirProp;
    388            }
    389        } else {
    390            if(dirProp==B) {
    391                result=ON;
    392            }
    393        }
    394    }
    395    return result;
    396 }
    397 
    398 /*
    399 * Check that there are enough entries in the array pointed to by pBiDi->paras
    400 */
    401 static UBool
    402 checkParaCount(UBiDi *pBiDi) {
    403    int32_t count=pBiDi->paraCount;
    404    if(pBiDi->paras==pBiDi->simpleParas) {
    405        if(count<=SIMPLE_PARAS_COUNT)
    406            return true;
    407        if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
    408            return false;
    409        pBiDi->paras=pBiDi->parasMemory;
    410        uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
    411        return true;
    412    }
    413    if(!getInitialParasMemory(pBiDi, count * 2))
    414        return false;
    415    pBiDi->paras=pBiDi->parasMemory;
    416    return true;
    417 }
    418 
    419 /*
    420 * Get the directional properties for the text, calculate the flags bit-set, and
    421 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
    422 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
    423 * When encountering an FSI, it is initially replaced with an LRI, which is the
    424 * default. Only if a strong R or AL is found within its scope will the LRI be
    425 * replaced by an RLI.
    426 */
    427 static UBool
    428 getDirProps(UBiDi *pBiDi) {
    429    const char16_t *text=pBiDi->text;
    430    DirProp *dirProps=pBiDi->dirPropsMemory;    /* pBiDi->dirProps is const */
    431 
    432    int32_t i=0, originalLength=pBiDi->originalLength;
    433    Flags flags=0;      /* collect all directionalities in the text */
    434    UChar32 uchar;
    435    DirProp dirProp=0, defaultParaLevel=0;  /* initialize to avoid compiler warnings */
    436    UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
    437    /* for inverse BiDi, the default para level is set to RTL if there is a
    438       strong R or AL character at either end of the text                            */
    439    UBool isDefaultLevelInverse = isDefaultLevel && static_cast<UBool>(
    440            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
    441            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
    442    int32_t lastArabicPos=-1;
    443    int32_t controlCount=0;
    444    UBool removeBiDiControls =
    445        static_cast<UBool>(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS);
    446 
    447    enum State {
    448         NOT_SEEKING_STRONG,            /* 0: not contextual paraLevel, not after FSI */
    449         SEEKING_STRONG_FOR_PARA,       /* 1: looking for first strong char in para */
    450         SEEKING_STRONG_FOR_FSI,        /* 2: looking for first strong after FSI */
    451         LOOKING_FOR_PDI                /* 3: found strong after FSI, looking for PDI */
    452    };
    453    State state;
    454    DirProp lastStrong=ON;              /* for default level & inverse BiDi */
    455    /* The following stacks are used to manage isolate sequences. Those
    456       sequences may be nested, but obviously never more deeply than the
    457       maximum explicit embedding level.
    458       lastStack is the index of the last used entry in the stack. A value of -1
    459       means that there is no open isolate sequence.
    460       lastStack is reset to -1 on paragraph boundaries. */
    461    /* The following stack contains the position of the initiator of
    462       each open isolate sequence */
    463    int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
    464    /* The following stack contains the last known state before
    465       encountering the initiator of an isolate sequence */
    466    State  previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
    467    int32_t stackLast=-1;
    468 
    469    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
    470        pBiDi->length=0;
    471    defaultParaLevel=pBiDi->paraLevel&1;
    472    if(isDefaultLevel) {
    473        pBiDi->paras[0].level=defaultParaLevel;
    474        lastStrong=defaultParaLevel;
    475        if(pBiDi->proLength>0 &&                    /* there is a prologue */
    476           (dirProp=firstL_R_AL(pBiDi))!=ON) {  /* with a strong character */
    477            if(dirProp==L)
    478                pBiDi->paras[0].level=0;    /* set the default para level */
    479            else
    480                pBiDi->paras[0].level=1;    /* set the default para level */
    481            state=NOT_SEEKING_STRONG;
    482        } else {
    483            state=SEEKING_STRONG_FOR_PARA;
    484        }
    485    } else {
    486        pBiDi->paras[0].level=pBiDi->paraLevel;
    487        state=NOT_SEEKING_STRONG;
    488    }
    489    /* count paragraphs and determine the paragraph level (P2..P3) */
    490    /*
    491     * see comment in ubidi.h:
    492     * the UBIDI_DEFAULT_XXX values are designed so that
    493     * their bit 0 alone yields the intended default
    494     */
    495    for( /* i=0 above */ ; i<originalLength; ) {
    496        /* i is incremented by U16_NEXT */
    497        U16_NEXT(text, i, originalLength, uchar);
    498        flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
    499        dirProps[i-1]=dirProp;
    500        if(uchar>0xffff) {  /* set the lead surrogate's property to BN */
    501            flags|=DIRPROP_FLAG(BN);
    502            dirProps[i-2]=BN;
    503        }
    504        if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
    505            controlCount++;
    506        if(dirProp==L) {
    507            if(state==SEEKING_STRONG_FOR_PARA) {
    508                pBiDi->paras[pBiDi->paraCount-1].level=0;
    509                state=NOT_SEEKING_STRONG;
    510            }
    511            else if(state==SEEKING_STRONG_FOR_FSI) {
    512                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    513                    /* no need for next statement, already set by default */
    514                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
    515                    flags|=DIRPROP_FLAG(LRI);
    516                }
    517                state=LOOKING_FOR_PDI;
    518            }
    519            lastStrong=L;
    520            continue;
    521        }
    522        if(dirProp==R || dirProp==AL) {
    523            if(state==SEEKING_STRONG_FOR_PARA) {
    524                pBiDi->paras[pBiDi->paraCount-1].level=1;
    525                state=NOT_SEEKING_STRONG;
    526            }
    527            else if(state==SEEKING_STRONG_FOR_FSI) {
    528                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    529                    dirProps[isolateStartStack[stackLast]]=RLI;
    530                    flags|=DIRPROP_FLAG(RLI);
    531                }
    532                state=LOOKING_FOR_PDI;
    533            }
    534            lastStrong=R;
    535            if(dirProp==AL)
    536                lastArabicPos=i-1;
    537            continue;
    538        }
    539        if(dirProp>=FSI && dirProp<=RLI) {  /* FSI, LRI or RLI */
    540            stackLast++;
    541            if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    542                isolateStartStack[stackLast]=i-1;
    543                previousStateStack[stackLast]=state;
    544            }
    545            if(dirProp==FSI) {
    546                dirProps[i-1]=LRI;      /* default if no strong char */
    547                state=SEEKING_STRONG_FOR_FSI;
    548            }
    549            else
    550                state=LOOKING_FOR_PDI;
    551            continue;
    552        }
    553        if(dirProp==PDI) {
    554            if(state==SEEKING_STRONG_FOR_FSI) {
    555                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
    556                    /* no need for next statement, already set by default */
    557                    /* dirProps[isolateStartStack[stackLast]]=LRI; */
    558                    flags|=DIRPROP_FLAG(LRI);
    559                }
    560            }
    561            if(stackLast>=0) {
    562                if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
    563                    state=previousStateStack[stackLast];
    564                stackLast--;
    565            }
    566            continue;
    567        }
    568        if(dirProp==B) {
    569            if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
    570                continue;
    571            pBiDi->paras[pBiDi->paraCount-1].limit=i;
    572            if(isDefaultLevelInverse && lastStrong==R)
    573                pBiDi->paras[pBiDi->paraCount-1].level=1;
    574            if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    575                /* When streaming, we only process whole paragraphs
    576                   thus some updates are only done on paragraph boundaries */
    577                pBiDi->length=i;        /* i is index to next character */
    578                pBiDi->controlCount=controlCount;
    579            }
    580            if(i<originalLength) {              /* B not last char in text */
    581                pBiDi->paraCount++;
    582                if(checkParaCount(pBiDi)==false)    /* not enough memory for a new para entry */
    583                    return false;
    584                if(isDefaultLevel) {
    585                    pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
    586                    state=SEEKING_STRONG_FOR_PARA;
    587                    lastStrong=defaultParaLevel;
    588                } else {
    589                    pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
    590                    state=NOT_SEEKING_STRONG;
    591                }
    592                stackLast=-1;
    593            }
    594            continue;
    595        }
    596    }
    597    /* Ignore still open isolate sequences with overflow */
    598    if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
    599        stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
    600        state=SEEKING_STRONG_FOR_FSI;   /* to be on the safe side */
    601    }
    602    /* Resolve direction of still unresolved open FSI sequences */
    603    while(stackLast>=0) {
    604        if(state==SEEKING_STRONG_FOR_FSI) {
    605            /* no need for next statement, already set by default */
    606            /* dirProps[isolateStartStack[stackLast]]=LRI; */
    607            flags|=DIRPROP_FLAG(LRI);
    608            break;
    609        }
    610        state=previousStateStack[stackLast];
    611        stackLast--;
    612    }
    613    /* When streaming, ignore text after the last paragraph separator */
    614    if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
    615        if(pBiDi->length<originalLength)
    616            pBiDi->paraCount--;
    617    } else {
    618        pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
    619        pBiDi->controlCount=controlCount;
    620    }
    621    /* For inverse bidi, default para direction is RTL if there is
    622       a strong R or AL at either end of the paragraph */
    623    if(isDefaultLevelInverse && lastStrong==R) {
    624        pBiDi->paras[pBiDi->paraCount-1].level=1;
    625    }
    626    if(isDefaultLevel) {
    627        pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
    628    }
    629    /* The following is needed to resolve the text direction for default level
    630       paragraphs containing no strong character */
    631    for(i=0; i<pBiDi->paraCount; i++)
    632        flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
    633 
    634    if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
    635        flags|=DIRPROP_FLAG(L);
    636    }
    637    pBiDi->flags=flags;
    638    pBiDi->lastArabicPos=lastArabicPos;
    639    return true;
    640 }
    641 
    642 /* determine the paragraph level at position index */
    643 U_CFUNC UBiDiLevel
    644 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
    645    int32_t i;
    646    for(i=0; i<pBiDi->paraCount; i++)
    647        if(pindex<pBiDi->paras[i].limit)
    648            break;
    649    if(i>=pBiDi->paraCount)
    650        i=pBiDi->paraCount-1;
    651    return (UBiDiLevel)(pBiDi->paras[i].level);
    652 }
    653 
    654 /* Functions for handling paired brackets ----------------------------------- */
    655 
    656 /* In the isoRuns array, the first entry is used for text outside of any
    657   isolate sequence.  Higher entries are used for each more deeply nested
    658   isolate sequence. isoRunLast is the index of the last used entry.  The
    659   openings array is used to note the data of opening brackets not yet
    660   matched by a closing bracket, or matched but still susceptible to change
    661   level.
    662   Each isoRun entry contains the index of the first and
    663   one-after-last openings entries for pending opening brackets it
    664   contains.  The next openings entry to use is the one-after-last of the
    665   most deeply nested isoRun entry.
    666   isoRun entries also contain their current embedding level and the last
    667   encountered strong character, since these will be needed to resolve
    668   the level of paired brackets.  */
    669 
    670 static void
    671 bracketInit(UBiDi *pBiDi, BracketData *bd) {
    672    bd->pBiDi=pBiDi;
    673    bd->isoRunLast=0;
    674    bd->isoRuns[0].start=0;
    675    bd->isoRuns[0].limit=0;
    676    bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
    677    UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
    678    bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
    679    bd->isoRuns[0].contextDir = static_cast<UBiDiDirection>(t);
    680    bd->isoRuns[0].contextPos=0;
    681    if(pBiDi->openingsMemory) {
    682        bd->openings=pBiDi->openingsMemory;
    683        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
    684    } else {
    685        bd->openings=bd->simpleOpenings;
    686        bd->openingsCount=SIMPLE_OPENINGS_COUNT;
    687    }
    688    bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
    689                         bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
    690 }
    691 
    692 /* paragraph boundary */
    693 static void
    694 bracketProcessB(BracketData *bd, UBiDiLevel level) {
    695    bd->isoRunLast=0;
    696    bd->isoRuns[0].limit=0;
    697    bd->isoRuns[0].level=level;
    698    bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
    699    bd->isoRuns[0].contextDir = static_cast<UBiDiDirection>(level & 1);
    700    bd->isoRuns[0].contextPos=0;
    701 }
    702 
    703 /* LRE, LRO, RLE, RLO, PDF */
    704 static void
    705 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos,
    706                       UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
    707    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    708    DirProp *dirProps=bd->pBiDi->dirProps;
    709    if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO)  /* after an isolate */
    710        return;
    711    if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel))   /* not a PDF */
    712        contextLevel=embeddingLevel;
    713    pLastIsoRun->limit=pLastIsoRun->start;
    714    pLastIsoRun->level=embeddingLevel;
    715    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
    716    pLastIsoRun->contextDir = static_cast<UBiDiDirection>(contextLevel & 1);
    717    pLastIsoRun->contextPos = static_cast<UBiDiDirection>(lastCcPos);
    718 }
    719 
    720 /* LRI or RLI */
    721 static void
    722 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
    723    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    724    int16_t lastLimit;
    725    pLastIsoRun->lastBase=ON;
    726    lastLimit=pLastIsoRun->limit;
    727    bd->isoRunLast++;
    728    pLastIsoRun++;
    729    pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
    730    pLastIsoRun->level=level;
    731    pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
    732    pLastIsoRun->contextDir = static_cast<UBiDiDirection>(level & 1);
    733    pLastIsoRun->contextPos=0;
    734 }
    735 
    736 /* PDI */
    737 static void
    738 bracketProcessPDI(BracketData *bd) {
    739    IsoRun *pLastIsoRun;
    740    bd->isoRunLast--;
    741    pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    742    pLastIsoRun->lastBase=ON;
    743 }
    744 
    745 /* newly found opening bracket: create an openings entry */
    746 static UBool                            /* return true if success */
    747 bracketAddOpening(BracketData *bd, char16_t match, int32_t position) {
    748    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    749    Opening *pOpening;
    750    if(pLastIsoRun->limit>=bd->openingsCount) {  /* no available new entry */
    751        UBiDi *pBiDi=bd->pBiDi;
    752        if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
    753            return false;
    754        if(bd->openings==bd->simpleOpenings)
    755            uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
    756                        SIMPLE_OPENINGS_COUNT * sizeof(Opening));
    757        bd->openings=pBiDi->openingsMemory;     /* may have changed */
    758        bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
    759    }
    760    pOpening=&bd->openings[pLastIsoRun->limit];
    761    pOpening->position=position;
    762    pOpening->match=match;
    763    pOpening->contextDir=pLastIsoRun->contextDir;
    764    pOpening->contextPos=pLastIsoRun->contextPos;
    765    pOpening->flags=0;
    766    pLastIsoRun->limit++;
    767    return true;
    768 }
    769 
    770 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
    771 static void
    772 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
    773    /* This function calls itself recursively */
    774    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    775    Opening *qOpening;
    776    DirProp *dirProps=bd->pBiDi->dirProps;
    777    int32_t k, openingPosition, closingPosition;
    778    for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
    779        if(qOpening->match>=0)      /* not an N0c match */
    780            continue;
    781        if(newPropPosition<qOpening->contextPos)
    782            break;
    783        if(newPropPosition>=qOpening->position)
    784            continue;
    785        if(newProp==qOpening->contextDir)
    786            break;
    787        openingPosition=qOpening->position;
    788        dirProps[openingPosition]=newProp;
    789        closingPosition=-(qOpening->match);
    790        dirProps[closingPosition]=newProp;
    791        qOpening->match=0;                      /* prevent further changes */
    792        fixN0c(bd, k, openingPosition, newProp);
    793        fixN0c(bd, k, closingPosition, newProp);
    794    }
    795 }
    796 
    797 /* process closing bracket */
    798 static DirProp              /* return L or R if N0b or N0c, ON if N0d */
    799 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
    800    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    801    Opening *pOpening, *qOpening;
    802    UBiDiDirection direction;
    803    UBool stable;
    804    DirProp newProp;
    805    pOpening=&bd->openings[openIdx];
    806    direction = static_cast<UBiDiDirection>(pLastIsoRun->level & 1);
    807    stable=true;            /* assume stable until proved otherwise */
    808 
    809    /* The stable flag is set when brackets are paired and their
    810       level is resolved and cannot be changed by what will be
    811       found later in the source string.
    812       An unstable match can occur only when applying N0c, where
    813       the resolved level depends on the preceding context, and
    814       this context may be affected by text occurring later.
    815       Example: RTL paragraph containing:  abc[(latin) HEBREW]
    816       When the closing parenthesis is encountered, it appears
    817       that N0c1 must be applied since 'abc' sets an opposite
    818       direction context and both parentheses receive level 2.
    819       However, when the closing square bracket is processed,
    820       N0b applies because of 'HEBREW' being included within the
    821       brackets, thus the square brackets are treated like R and
    822       receive level 1. However, this changes the preceding
    823       context of the opening parenthesis, and it now appears
    824       that N0c2 must be applied to the parentheses rather than
    825       N0c1. */
    826 
    827    if((direction==0 && pOpening->flags&FOUND_L) ||
    828       (direction==1 && pOpening->flags&FOUND_R)) {                         /* N0b */
    829        newProp=static_cast<DirProp>(direction);
    830    }
    831    else if(pOpening->flags&(FOUND_L|FOUND_R)) {                            /* N0c */
    832        /* it is stable if there is no containing pair or in
    833           conditions too complicated and not worth checking */
    834        stable=(openIdx==pLastIsoRun->start);
    835        if(direction!=pOpening->contextDir)
    836            newProp= static_cast<DirProp>(pOpening->contextDir);           /* N0c1 */
    837        else
    838            newProp= static_cast<DirProp>(direction);                      /* N0c2 */
    839    } else {
    840        /* forget this and any brackets nested within this pair */
    841        pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
    842        return ON;                                                          /* N0d */
    843    }
    844    bd->pBiDi->dirProps[pOpening->position]=newProp;
    845    bd->pBiDi->dirProps[position]=newProp;
    846    /* Update nested N0c pairs that may be affected */
    847    fixN0c(bd, openIdx, pOpening->position, newProp);
    848    if(stable) {
    849        pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
    850        /* remove lower located synonyms if any */
    851        while(pLastIsoRun->limit>pLastIsoRun->start &&
    852              bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
    853            pLastIsoRun->limit--;
    854    } else {
    855        int32_t k;
    856        pOpening->match=-position;
    857        /* neutralize lower located synonyms if any */
    858        k=openIdx-1;
    859        while(k>=pLastIsoRun->start &&
    860              bd->openings[k].position==pOpening->position)
    861            bd->openings[k--].match=0;
    862        /* neutralize any unmatched opening between the current pair;
    863           this will also neutralize higher located synonyms if any */
    864        for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
    865            qOpening=&bd->openings[k];
    866            if(qOpening->position>=position)
    867                break;
    868            if(qOpening->match>0)
    869                qOpening->match=0;
    870        }
    871    }
    872    return newProp;
    873 }
    874 
    875 /* handle strong characters, digits and candidates for closing brackets */
    876 static UBool                            /* return true if success */
    877 bracketProcessChar(BracketData *bd, int32_t position) {
    878    IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
    879    DirProp *dirProps, dirProp, newProp;
    880    UBiDiLevel level;
    881    dirProps=bd->pBiDi->dirProps;
    882    dirProp=dirProps[position];
    883    if(dirProp==ON) {
    884        char16_t c, match;
    885        int32_t idx;
    886        /* First see if it is a matching closing bracket. Hopefully, this is
    887           more efficient than checking if it is a closing bracket at all */
    888        c=bd->pBiDi->text[position];
    889        for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
    890            if(bd->openings[idx].match!=c)
    891                continue;
    892            /* We have a match */
    893            newProp=bracketProcessClosing(bd, idx, position);
    894            if(newProp==ON) {           /* N0d */
    895                c=0;        /* prevent handling as an opening */
    896                break;
    897            }
    898            pLastIsoRun->lastBase=ON;
    899            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
    900            pLastIsoRun->contextPos=position;
    901            level=bd->pBiDi->levels[position];
    902            if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
    903                uint16_t flag;
    904                int32_t i;
    905                newProp=level&1;
    906                pLastIsoRun->lastStrong=newProp;
    907                flag=DIRPROP_FLAG(newProp);
    908                for(i=pLastIsoRun->start; i<idx; i++)
    909                    bd->openings[i].flags|=flag;
    910                /* matching brackets are not overridden by LRO/RLO */
    911                bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
    912            }
    913            /* matching brackets are not overridden by LRO/RLO */
    914            bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
    915            return true;
    916        }
    917        /* We get here only if the ON character is not a matching closing
    918           bracket or it is a case of N0d */
    919        /* Now see if it is an opening bracket */
    920        if(c)
    921            match= static_cast<char16_t>(u_getBidiPairedBracket(c));    /* get the matching char */
    922        else
    923            match=0;
    924        if(match!=c &&                  /* has a matching char */
    925           ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
    926            /* special case: process synonyms
    927               create an opening entry for each synonym */
    928            if(match==0x232A) {     /* RIGHT-POINTING ANGLE BRACKET */
    929                if(!bracketAddOpening(bd, 0x3009, position))
    930                    return false;
    931            }
    932            else if(match==0x3009) {         /* RIGHT ANGLE BRACKET */
    933                if(!bracketAddOpening(bd, 0x232A, position))
    934                    return false;
    935            }
    936            if(!bracketAddOpening(bd, match, position))
    937                return false;
    938        }
    939    }
    940    level=bd->pBiDi->levels[position];
    941    if(level&UBIDI_LEVEL_OVERRIDE) {    /* X4, X5 */
    942        newProp=level&1;
    943        if(dirProp!=S && dirProp!=WS && dirProp!=ON)
    944            dirProps[position]=newProp;
    945        pLastIsoRun->lastBase=newProp;
    946        pLastIsoRun->lastStrong=newProp;
    947        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
    948        pLastIsoRun->contextPos=position;
    949    }
    950    else if(dirProp<=R || dirProp==AL) {
    951        newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
    952        pLastIsoRun->lastBase=dirProp;
    953        pLastIsoRun->lastStrong=dirProp;
    954        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(newProp);
    955        pLastIsoRun->contextPos=position;
    956    }
    957    else if(dirProp==EN) {
    958        pLastIsoRun->lastBase=EN;
    959        if(pLastIsoRun->lastStrong==L) {
    960            newProp=L;                  /* W7 */
    961            if(!bd->isNumbersSpecial)
    962                dirProps[position]=ENL;
    963            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(L);
    964            pLastIsoRun->contextPos=position;
    965        }
    966        else {
    967            newProp=R;                  /* N0 */
    968            if(pLastIsoRun->lastStrong==AL)
    969                dirProps[position]=AN;  /* W2 */
    970            else
    971                dirProps[position]=ENR;
    972            pLastIsoRun->contextDir = static_cast<UBiDiDirection>(R);
    973            pLastIsoRun->contextPos=position;
    974        }
    975    }
    976    else if(dirProp==AN) {
    977        newProp=R;                      /* N0 */
    978        pLastIsoRun->lastBase=AN;
    979        pLastIsoRun->contextDir = static_cast<UBiDiDirection>(R);
    980        pLastIsoRun->contextPos=position;
    981    }
    982    else if(dirProp==NSM) {
    983        /* if the last real char was ON, change NSM to ON so that it
    984           will stay ON even if the last real char is a bracket which
    985           may be changed to L or R */
    986        newProp=pLastIsoRun->lastBase;
    987        if(newProp==ON)
    988            dirProps[position]=newProp;
    989    }
    990    else {
    991        newProp=dirProp;
    992        pLastIsoRun->lastBase=dirProp;
    993    }
    994    if(newProp<=R || newProp==AL) {
    995        int32_t i;
    996        uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
    997        for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
    998            if(position>bd->openings[i].position)
    999                bd->openings[i].flags|=flag;
   1000    }
   1001    return true;
   1002 }
   1003 
   1004 /* perform (X1)..(X9) ------------------------------------------------------- */
   1005 
   1006 /* determine if the text is mixed-directional or single-directional */
   1007 static UBiDiDirection
   1008 directionFromFlags(UBiDi *pBiDi) {
   1009    Flags flags=pBiDi->flags;
   1010    /* if the text contains AN and neutrals, then some neutrals may become RTL */
   1011    if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
   1012        return UBIDI_LTR;
   1013    } else if(!(flags&MASK_LTR)) {
   1014        return UBIDI_RTL;
   1015    } else {
   1016        return UBIDI_MIXED;
   1017    }
   1018 }
   1019 
   1020 /*
   1021 * Resolve the explicit levels as specified by explicit embedding codes.
   1022 * Recalculate the flags to have them reflect the real properties
   1023 * after taking the explicit embeddings into account.
   1024 *
   1025 * The BiDi algorithm is designed to result in the same behavior whether embedding
   1026 * levels are externally specified (from "styled text", supposedly the preferred
   1027 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
   1028 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
   1029 * However, in a real implementation, the removal of these codes and their index
   1030 * positions in the plain text is undesirable since it would result in
   1031 * reallocated, reindexed text.
   1032 * Instead, this implementation leaves the codes in there and just ignores them
   1033 * in the subsequent processing.
   1034 * In order to get the same reordering behavior, positions with a BN or a not-isolate
   1035 * explicit embedding code just get the same level assigned as the last "real"
   1036 * character.
   1037 *
   1038 * Some implementations, not this one, then overwrite some of these
   1039 * directionality properties at "real" same-level-run boundaries by
   1040 * L or R codes so that the resolution of weak types can be performed on the
   1041 * entire paragraph at once instead of having to parse it once more and
   1042 * perform that resolution on same-level-runs.
   1043 * This limits the scope of the implicit rules in effectively
   1044 * the same way as the run limits.
   1045 *
   1046 * Instead, this implementation does not modify these codes, except for
   1047 * paired brackets whose properties (ON) may be replaced by L or R.
   1048 * On one hand, the paragraph has to be scanned for same-level-runs, but
   1049 * on the other hand, this saves another loop to reset these codes,
   1050 * or saves making and modifying a copy of dirProps[].
   1051 *
   1052 *
   1053 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
   1054 *
   1055 *
   1056 * Handling the stack of explicit levels (Xn):
   1057 *
   1058 * With the BiDi stack of explicit levels, as pushed with each
   1059 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
   1060 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
   1061 *
   1062 * In order to have a correct push-pop semantics even in the case of overflows,
   1063 * overflow counters and a valid isolate counter are used as described in UAX#9
   1064 * section 3.3.2 "Explicit Levels and Directions".
   1065 *
   1066 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
   1067 *
   1068 * Returns normally the direction; -1 if there was a memory shortage
   1069 *
   1070 */
   1071 static UBiDiDirection
   1072 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
   1073    DirProp *dirProps=pBiDi->dirProps;
   1074    UBiDiLevel *levels=pBiDi->levels;
   1075    const char16_t *text=pBiDi->text;
   1076 
   1077    int32_t i=0, length=pBiDi->length;
   1078    Flags flags=pBiDi->flags;       /* collect all directionalities in the text */
   1079    DirProp dirProp;
   1080    UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
   1081    UBiDiDirection direction;
   1082    pBiDi->isolateCount=0;
   1083 
   1084    if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
   1085 
   1086    /* determine if the text is mixed-directional or single-directional */
   1087    direction=directionFromFlags(pBiDi);
   1088 
   1089    /* we may not need to resolve any explicit levels */
   1090    if((direction!=UBIDI_MIXED)) {
   1091        /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
   1092        return direction;
   1093    }
   1094    if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
   1095        /* inverse BiDi: mixed, but all characters are at the same embedding level */
   1096        /* set all levels to the paragraph level */
   1097        int32_t paraIndex, start, limit;
   1098        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
   1099            if(paraIndex==0)
   1100                start=0;
   1101            else
   1102                start=pBiDi->paras[paraIndex-1].limit;
   1103            limit=pBiDi->paras[paraIndex].limit;
   1104            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
   1105            for(i=start; i<limit; i++)
   1106                levels[i]=level;
   1107        }
   1108        return direction;   /* no bracket matching for inverse BiDi */
   1109    }
   1110    if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
   1111        /* no embeddings, set all levels to the paragraph level */
   1112        /* we still have to perform bracket matching */
   1113        int32_t paraIndex, start, limit;
   1114        BracketData bracketData;
   1115        bracketInit(pBiDi, &bracketData);
   1116        for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
   1117            if(paraIndex==0)
   1118                start=0;
   1119            else
   1120                start=pBiDi->paras[paraIndex-1].limit;
   1121            limit=pBiDi->paras[paraIndex].limit;
   1122            level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
   1123            for(i=start; i<limit; i++) {
   1124                levels[i]=level;
   1125                dirProp=dirProps[i];
   1126                if(dirProp==BN)
   1127                    continue;
   1128                if(dirProp==B) {
   1129                    if((i+1)<length) {
   1130                        if(text[i]==CR && text[i+1]==LF)
   1131                            continue;   /* skip CR when followed by LF */
   1132                        bracketProcessB(&bracketData, level);
   1133                    }
   1134                    continue;
   1135                }
   1136                if(!bracketProcessChar(&bracketData, i)) {
   1137                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   1138                    return UBIDI_LTR;
   1139                }
   1140            }
   1141        }
   1142        return direction;
   1143    }
   1144    {
   1145        /* continue to perform (Xn) */
   1146 
   1147        /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
   1148        /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
   1149        UBiDiLevel embeddingLevel=level, newLevel;
   1150        UBiDiLevel previousLevel=level;     /* previous level for regular (not CC) characters */
   1151        int32_t lastCcPos=0;                /* index of last effective LRx,RLx, PDx */
   1152 
   1153        /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
   1154           stackLast points to its current entry. */
   1155        uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2];   /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
   1156                                                        but we need one more entry as base */
   1157        uint32_t stackLast=0;
   1158        int32_t overflowIsolateCount=0;
   1159        int32_t overflowEmbeddingCount=0;
   1160        int32_t validIsolateCount=0;
   1161        BracketData bracketData;
   1162        bracketInit(pBiDi, &bracketData);
   1163        stack[0]=level;     /* initialize base entry to para level, no override, no isolate */
   1164 
   1165        /* recalculate the flags */
   1166        flags=0;
   1167 
   1168        for(i=0; i<length; ++i) {
   1169            dirProp=dirProps[i];
   1170            switch(dirProp) {
   1171            case LRE:
   1172            case RLE:
   1173            case LRO:
   1174            case RLO:
   1175                /* (X2, X3, X4, X5) */
   1176                flags|=DIRPROP_FLAG(BN);
   1177                levels[i]=previousLevel;
   1178                if (dirProp==LRE || dirProp==LRO)
   1179                    /* least greater even level */
   1180                    newLevel = static_cast<UBiDiLevel>((embeddingLevel + 2) & ~(UBIDI_LEVEL_OVERRIDE | 1));
   1181                else
   1182                    /* least greater odd level */
   1183                    newLevel = static_cast<UBiDiLevel>((NO_OVERRIDE(embeddingLevel) + 1) | 1);
   1184                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
   1185                                                         overflowEmbeddingCount==0) {
   1186                    lastCcPos=i;
   1187                    embeddingLevel=newLevel;
   1188                    if(dirProp==LRO || dirProp==RLO)
   1189                        embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
   1190                    stackLast++;
   1191                    stack[stackLast]=embeddingLevel;
   1192                    /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
   1193                       since this has already been done for newLevel which is
   1194                       the source for embeddingLevel.
   1195                     */
   1196                } else {
   1197                    if(overflowIsolateCount==0)
   1198                        overflowEmbeddingCount++;
   1199                }
   1200                break;
   1201            case PDF:
   1202                /* (X7) */
   1203                flags|=DIRPROP_FLAG(BN);
   1204                levels[i]=previousLevel;
   1205                /* handle all the overflow cases first */
   1206                if(overflowIsolateCount) {
   1207                    break;
   1208                }
   1209                if(overflowEmbeddingCount) {
   1210                    overflowEmbeddingCount--;
   1211                    break;
   1212                }
   1213                if(stackLast>0 && stack[stackLast]<ISOLATE) {   /* not an isolate entry */
   1214                    lastCcPos=i;
   1215                    stackLast--;
   1216                    embeddingLevel = static_cast<UBiDiLevel>(stack[stackLast]);
   1217                }
   1218                break;
   1219            case LRI:
   1220            case RLI:
   1221                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
   1222                levels[i]=NO_OVERRIDE(embeddingLevel);
   1223                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
   1224                    bracketProcessBoundary(&bracketData, lastCcPos,
   1225                                           previousLevel, embeddingLevel);
   1226                    flags|=DIRPROP_FLAG_MULTI_RUNS;
   1227                }
   1228                previousLevel=embeddingLevel;
   1229                /* (X5a, X5b) */
   1230                if(dirProp==LRI)
   1231                    /* least greater even level */
   1232                    newLevel = static_cast<UBiDiLevel>((embeddingLevel + 2) & ~(UBIDI_LEVEL_OVERRIDE | 1));
   1233                else
   1234                    /* least greater odd level */
   1235                    newLevel = static_cast<UBiDiLevel>((NO_OVERRIDE(embeddingLevel) + 1) | 1);
   1236                if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
   1237                                                         overflowEmbeddingCount==0) {
   1238                    flags|=DIRPROP_FLAG(dirProp);
   1239                    lastCcPos=i;
   1240                    validIsolateCount++;
   1241                    if(validIsolateCount>pBiDi->isolateCount)
   1242                        pBiDi->isolateCount=validIsolateCount;
   1243                    embeddingLevel=newLevel;
   1244                    /* we can increment stackLast without checking because newLevel
   1245                       will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
   1246                    stackLast++;
   1247                    stack[stackLast]=embeddingLevel+ISOLATE;
   1248                    bracketProcessLRI_RLI(&bracketData, embeddingLevel);
   1249                } else {
   1250                    /* make it WS so that it is handled by adjustWSLevels() */
   1251                    dirProps[i]=WS;
   1252                    overflowIsolateCount++;
   1253                }
   1254                break;
   1255            case PDI:
   1256                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
   1257                    bracketProcessBoundary(&bracketData, lastCcPos,
   1258                                           previousLevel, embeddingLevel);
   1259                    flags|=DIRPROP_FLAG_MULTI_RUNS;
   1260                }
   1261                /* (X6a) */
   1262                if(overflowIsolateCount) {
   1263                    overflowIsolateCount--;
   1264                    /* make it WS so that it is handled by adjustWSLevels() */
   1265                    dirProps[i]=WS;
   1266                }
   1267                else if(validIsolateCount) {
   1268                    flags|=DIRPROP_FLAG(PDI);
   1269                    lastCcPos=i;
   1270                    overflowEmbeddingCount=0;
   1271                    while(stack[stackLast]<ISOLATE) /* pop embedding entries */
   1272                        stackLast--;                /* until the last isolate entry */
   1273                    stackLast--;                    /* pop also the last isolate entry */
   1274                    validIsolateCount--;
   1275                    bracketProcessPDI(&bracketData);
   1276                } else
   1277                    /* make it WS so that it is handled by adjustWSLevels() */
   1278                    dirProps[i]=WS;
   1279                embeddingLevel = static_cast<UBiDiLevel>(stack[stackLast]) & ~ISOLATE;
   1280                flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
   1281                previousLevel=embeddingLevel;
   1282                levels[i]=NO_OVERRIDE(embeddingLevel);
   1283                break;
   1284            case B:
   1285                flags|=DIRPROP_FLAG(B);
   1286                levels[i]=GET_PARALEVEL(pBiDi, i);
   1287                if((i+1)<length) {
   1288                    if(text[i]==CR && text[i+1]==LF)
   1289                        break;          /* skip CR when followed by LF */
   1290                    overflowEmbeddingCount=overflowIsolateCount=0;
   1291                    validIsolateCount=0;
   1292                    stackLast=0;
   1293                    previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
   1294                    stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
   1295                    bracketProcessB(&bracketData, embeddingLevel);
   1296                }
   1297                break;
   1298            case BN:
   1299                /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
   1300                /* they will get their levels set correctly in adjustWSLevels() */
   1301                levels[i]=previousLevel;
   1302                flags|=DIRPROP_FLAG(BN);
   1303                break;
   1304            default:
   1305                /* all other types are normal characters and get the "real" level */
   1306                if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
   1307                    bracketProcessBoundary(&bracketData, lastCcPos,
   1308                                           previousLevel, embeddingLevel);
   1309                    flags|=DIRPROP_FLAG_MULTI_RUNS;
   1310                    if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
   1311                        flags|=DIRPROP_FLAG_O(embeddingLevel);
   1312                    else
   1313                        flags|=DIRPROP_FLAG_E(embeddingLevel);
   1314                }
   1315                previousLevel=embeddingLevel;
   1316                levels[i]=embeddingLevel;
   1317                if(!bracketProcessChar(&bracketData, i))
   1318                    return static_cast<UBiDiDirection>(-1);
   1319                /* the dirProp may have been changed in bracketProcessChar() */
   1320                flags|=DIRPROP_FLAG(dirProps[i]);
   1321                break;
   1322            }
   1323        }
   1324        if(flags&MASK_EMBEDDING)
   1325            flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
   1326        if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
   1327            flags|=DIRPROP_FLAG(L);
   1328        /* again, determine if the text is mixed-directional or single-directional */
   1329        pBiDi->flags=flags;
   1330        direction=directionFromFlags(pBiDi);
   1331    }
   1332    return direction;
   1333 }
   1334 
   1335 /*
   1336 * Use a pre-specified embedding levels array:
   1337 *
   1338 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
   1339 * ignore all explicit codes (X9),
   1340 * and check all the preset levels.
   1341 *
   1342 * Recalculate the flags to have them reflect the real properties
   1343 * after taking the explicit embeddings into account.
   1344 */
   1345 static UBiDiDirection
   1346 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
   1347    DirProp *dirProps=pBiDi->dirProps;
   1348    UBiDiLevel *levels=pBiDi->levels;
   1349    int32_t isolateCount=0;
   1350 
   1351    int32_t length=pBiDi->length;
   1352    Flags flags=0;  /* collect all directionalities in the text */
   1353    pBiDi->isolateCount=0;
   1354 
   1355    int32_t currentParaIndex = 0;
   1356    int32_t currentParaLimit = pBiDi->paras[0].limit;
   1357    int32_t currentParaLevel = pBiDi->paraLevel;
   1358 
   1359    for(int32_t i=0; i<length; ++i) {
   1360        UBiDiLevel level=levels[i];
   1361        DirProp dirProp=dirProps[i];
   1362        if(dirProp==LRI || dirProp==RLI) {
   1363            isolateCount++;
   1364            if(isolateCount>pBiDi->isolateCount)
   1365                pBiDi->isolateCount=isolateCount;
   1366        }
   1367        else if(dirProp==PDI)
   1368            isolateCount--;
   1369        else if(dirProp==B)
   1370            isolateCount=0;
   1371 
   1372        // optimized version of  int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
   1373        if (pBiDi->defaultParaLevel != 0 &&
   1374                i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
   1375            currentParaLevel = pBiDi->paras[++currentParaIndex].level;
   1376            currentParaLimit = pBiDi->paras[currentParaIndex].limit;
   1377        }
   1378 
   1379        UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
   1380        level &= ~UBIDI_LEVEL_OVERRIDE;
   1381        if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
   1382            if (level == 0) {
   1383                if (dirProp == B) {
   1384                    // Paragraph separators are ok with explicit level 0.
   1385                    // Prevents reordering of paragraphs.
   1386                } else {
   1387                    // Treat explicit level 0 as a wildcard for the paragraph level.
   1388                    // Avoid making the caller guess what the paragraph level would be.
   1389                    level = static_cast<UBiDiLevel>(currentParaLevel);
   1390                    levels[i] = level | overrideFlag;
   1391                }
   1392            } else {
   1393                // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
   1394                /* level out of bounds */
   1395                *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   1396                return UBIDI_LTR;
   1397            }
   1398        }
   1399        if (overrideFlag != 0) {
   1400            /* keep the override flag in levels[i] but adjust the flags */
   1401            flags|=DIRPROP_FLAG_O(level);
   1402        } else {
   1403            /* set the flags */
   1404            flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
   1405        }
   1406    }
   1407    if(flags&MASK_EMBEDDING)
   1408        flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
   1409    /* determine if the text is mixed-directional or single-directional */
   1410    pBiDi->flags=flags;
   1411    return directionFromFlags(pBiDi);
   1412 }
   1413 
   1414 /******************************************************************
   1415 The Properties state machine table
   1416 *******************************************************************
   1417 
   1418 All table cells are 8 bits:
   1419      bits 0..4:  next state
   1420      bits 5..7:  action to perform (if > 0)
   1421 
   1422 Cells may be of format "n" where n represents the next state
   1423 (except for the rightmost column).
   1424 Cells may also be of format "s(x,y)" where x represents an action
   1425 to perform and y represents the next state.
   1426 
   1427 *******************************************************************
   1428 Definitions and type for properties state table
   1429 *******************************************************************
   1430 */
   1431 #define IMPTABPROPS_COLUMNS 16
   1432 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
   1433 #define GET_STATEPROPS(cell) ((cell)&0x1f)
   1434 #define GET_ACTIONPROPS(cell) ((cell)>>5)
   1435 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
   1436 
   1437 static const uint8_t groupProp[] =          /* dirProp regrouped */
   1438 {
   1439 /*  L   R   EN  ES  ET  AN  CS  B   S   WS  ON  LRE LRO AL  RLE RLO PDF NSM BN  FSI LRI RLI PDI ENL ENR */
   1440    0,  1,  2,  7,  8,  3,  9,  6,  5,  4,  4,  10, 10, 12, 10, 10, 10, 11, 10, 4,  4,  4,  4,  13, 14
   1441 };
   1442 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
   1443 
   1444 /******************************************************************
   1445 
   1446      PROPERTIES  STATE  TABLE
   1447 
   1448 In table impTabProps,
   1449      - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
   1450      - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
   1451      - the Res column is the reduced property assigned to a run
   1452 
   1453 Action 1: process current run1, init new run1
   1454        2: init new run2
   1455        3: process run1, process run2, init new run1
   1456        4: process run1, set run1=run2, init new run2
   1457 
   1458 Notes:
   1459  1) This table is used in resolveImplicitLevels().
   1460  2) This table triggers actions when there is a change in the Bidi
   1461     property of incoming characters (action 1).
   1462  3) Most such property sequences are processed immediately (in
   1463     fact, passed to processPropertySeq().
   1464  4) However, numbers are assembled as one sequence. This means
   1465     that undefined situations (like CS following digits, until
   1466     it is known if the next char will be a digit) are held until
   1467     following chars define them.
   1468     Example: digits followed by CS, then comes another CS or ON;
   1469              the digits will be processed, then the CS assigned
   1470              as the start of an ON sequence (action 3).
   1471  5) There are cases where more than one sequence must be
   1472     processed, for instance digits followed by CS followed by L:
   1473     the digits must be processed as one sequence, and the CS
   1474     must be processed as an ON sequence, all this before starting
   1475     assembling chars for the opening L sequence.
   1476 
   1477 
   1478 */
   1479 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
   1480 {
   1481 /*                        L ,     R ,    EN ,    AN ,    ON ,     S ,     B ,    ES ,    ET ,    CS ,    BN ,   NSM ,    AL ,   ENL ,   ENR , Res */
   1482 /* 0 Init        */ {     1 ,     2 ,     4 ,     5 ,     7 ,    15 ,    17 ,     7 ,     9 ,     7 ,     0 ,     7 ,     3 ,    18 ,    21 , DirProp_ON },
   1483 /* 1 L           */ {     1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     1 ,     1 , s(1,3),s(1,18),s(1,21),  DirProp_L },
   1484 /* 2 R           */ { s(1,1),     2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7),     2 ,     2 , s(1,3),s(1,18),s(1,21),  DirProp_R },
   1485 /* 3 AL          */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8),     3 ,     3 ,     3 ,s(1,18),s(1,21),  DirProp_R },
   1486 /* 4 EN          */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10),    11 ,s(2,10),     4 ,     4 , s(1,3),    18 ,    21 , DirProp_EN },
   1487 /* 5 AN          */ { s(1,1), s(1,2), s(1,4),     5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12),     5 ,     5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
   1488 /* 6 AL:EN/AN    */ { s(1,1), s(1,2),     6 ,     6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13),     6 ,     6 , s(1,3),    18 ,    21 , DirProp_AN },
   1489 /* 7 ON          */ { s(1,1), s(1,2), s(1,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,s(2,14),     7 ,     7 ,     7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
   1490 /* 8 AL:ON       */ { s(1,1), s(1,2), s(1,6), s(1,6),     8 ,s(1,16),s(1,17),     8 ,     8 ,     8 ,     8 ,     8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
   1491 /* 9 ET          */ { s(1,1), s(1,2),     4 , s(1,5),     7 ,s(1,15),s(1,17),     7 ,     9 ,     7 ,     9 ,     9 , s(1,3),    18 ,    21 , DirProp_ON },
   1492 /*10 EN+ES/CS    */ { s(3,1), s(3,2),     4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    10 , s(4,7), s(3,3),    18 ,    21 , DirProp_EN },
   1493 /*11 EN+ET       */ { s(1,1), s(1,2),     4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    11 , s(1,7),    11 ,    11 , s(1,3),    18 ,    21 , DirProp_EN },
   1494 /*12 AN+CS       */ { s(3,1), s(3,2), s(3,4),     5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
   1495 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2),     6 ,     6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8),    13 , s(4,8), s(3,3),    18 ,    21 , DirProp_AN },
   1496 /*14 ON+ET       */ { s(1,1), s(1,2), s(4,4), s(1,5),     7 ,s(1,15),s(1,17),     7 ,    14 ,     7 ,    14 ,    14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
   1497 /*15 S           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),    15 ,s(1,17), s(1,7), s(1,9), s(1,7),    15 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_S },
   1498 /*16 AL:S        */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),    16 ,s(1,17), s(1,8), s(1,8), s(1,8),    16 , s(1,8), s(1,3),s(1,18),s(1,21),  DirProp_S },
   1499 /*17 B           */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),    17 , s(1,7), s(1,9), s(1,7),    17 , s(1,7), s(1,3),s(1,18),s(1,21),  DirProp_B },
   1500 /*18 ENL         */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19),    20 ,s(2,19),    18 ,    18 , s(1,3),    18 ,    21 ,  DirProp_L },
   1501 /*19 ENL+ES/CS   */ { s(3,1), s(3,2),    18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    19 , s(4,7), s(3,3),    18 ,    21 ,  DirProp_L },
   1502 /*20 ENL+ET      */ { s(1,1), s(1,2),    18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    20 , s(1,7),    20 ,    20 , s(1,3),    18 ,    21 ,  DirProp_L },
   1503 /*21 ENR         */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22),    23 ,s(2,22),    21 ,    21 , s(1,3),    18 ,    21 , DirProp_AN },
   1504 /*22 ENR+ES/CS   */ { s(3,1), s(3,2),    21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7),    22 , s(4,7), s(3,3),    18 ,    21 , DirProp_AN },
   1505 /*23 ENR+ET      */ { s(1,1), s(1,2),    21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7),    23 , s(1,7),    23 ,    23 , s(1,3),    18 ,    21 , DirProp_AN }
   1506 };
   1507 
   1508 /*  we must undef macro s because the levels tables have a different
   1509 *  structure (4 bits for action and 4 bits for next state.
   1510 */
   1511 #undef s
   1512 
   1513 /******************************************************************
   1514 The levels state machine tables
   1515 *******************************************************************
   1516 
   1517 All table cells are 8 bits:
   1518      bits 0..3:  next state
   1519      bits 4..7:  action to perform (if > 0)
   1520 
   1521 Cells may be of format "n" where n represents the next state
   1522 (except for the rightmost column).
   1523 Cells may also be of format "s(x,y)" where x represents an action
   1524 to perform and y represents the next state.
   1525 
   1526 This format limits each table to 16 states each and to 15 actions.
   1527 
   1528 *******************************************************************
   1529 Definitions and type for levels state tables
   1530 *******************************************************************
   1531 */
   1532 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
   1533 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
   1534 #define GET_STATE(cell) ((cell)&0x0f)
   1535 #define GET_ACTION(cell) ((cell)>>4)
   1536 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
   1537 
   1538 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
   1539 typedef uint8_t ImpAct[];
   1540 
   1541 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
   1542 * instead of having a pair of ImpTab and a pair of ImpAct.
   1543 */
   1544 typedef struct ImpTabPair {
   1545    const void * pImpTab[2];
   1546    const void * pImpAct[2];
   1547 } ImpTabPair;
   1548 
   1549 /******************************************************************
   1550 
   1551      LEVELS  STATE  TABLES
   1552 
   1553 In all levels state tables,
   1554      - state 0 is the initial state
   1555      - the Res column is the increment to add to the text level
   1556        for this property sequence.
   1557 
   1558 The impAct arrays for each table of a pair map the local action
   1559 numbers of the table to the total list of actions. For instance,
   1560 action 2 in a given table corresponds to the action number which
   1561 appears in entry [2] of the impAct array for that table.
   1562 The first entry of all impAct arrays must be 0.
   1563 
   1564 Action 1: init conditional sequence
   1565        2: prepend conditional sequence to current sequence
   1566        3: set ON sequence to new level - 1
   1567        4: init EN/AN/ON sequence
   1568        5: fix EN/AN/ON sequence followed by R
   1569        6: set previous level sequence to level 2
   1570 
   1571 Notes:
   1572  1) These tables are used in processPropertySeq(). The input
   1573     is property sequences as determined by resolveImplicitLevels.
   1574  2) Most such property sequences are processed immediately
   1575     (levels are assigned).
   1576  3) However, some sequences cannot be assigned a final level till
   1577     one or more following sequences are received. For instance,
   1578     ON following an R sequence within an even-level paragraph.
   1579     If the following sequence is R, the ON sequence will be
   1580     assigned basic run level+1, and so will the R sequence.
   1581  4) S is generally handled like ON, since its level will be fixed
   1582     to paragraph level in adjustWSLevels().
   1583 
   1584 */
   1585 
   1586 static const ImpTab impTabL_DEFAULT =   /* Even paragraph level */
   1587 /*  In this table, conditional sequences receive the lower possible level
   1588    until proven otherwise.
   1589 */
   1590 {
   1591 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1592 /* 0 : init       */ {     0 ,     1 ,     0 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1593 /* 1 : R          */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1594 /* 2 : AN         */ {     0 ,     1 ,     0 ,     2 , s(1,5), s(1,5),     0 ,  2 },
   1595 /* 3 : R+EN/AN    */ {     0 ,     1 ,     3 ,     3 , s(1,4), s(1,4),     0 ,  2 },
   1596 /* 4 : R+ON       */ {     0 , s(2,1), s(3,3), s(3,3),     4 ,     4 ,     0 ,  0 },
   1597 /* 5 : AN+ON      */ {     0 , s(2,1),     0 , s(3,2),     5 ,     5 ,     0 ,  0 }
   1598 };
   1599 static const ImpTab impTabR_DEFAULT =   /* Odd  paragraph level */
   1600 /*  In this table, conditional sequences receive the lower possible level
   1601    until proven otherwise.
   1602 */
   1603 {
   1604 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1605 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1606 /* 1 : L          */ {     1 ,     0 ,     1 ,     3 , s(1,4), s(1,4),     0 ,  1 },
   1607 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1608 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  1 },
   1609 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1),     3 ,     4 ,     4 ,     0 ,  0 },
   1610 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     3 ,     5 ,     5 ,     0 ,  0 }
   1611 };
   1612 static const ImpAct impAct0 = {0,1,2,3,4};
   1613 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
   1614                                           &impTabR_DEFAULT},
   1615                                          {&impAct0, &impAct0}};
   1616 
   1617 static const ImpTab impTabL_NUMBERS_SPECIAL =   /* Even paragraph level */
   1618 /*  In this table, conditional sequences receive the lower possible level
   1619    until proven otherwise.
   1620 */
   1621 {
   1622 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1623 /* 0 : init       */ {     0 ,     2 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
   1624 /* 1 : L+EN/AN    */ {     0 , s(4,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1625 /* 2 : R          */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  1 },
   1626 /* 3 : R+ON       */ {     0 , s(2,2), s(3,4), s(3,4),     3 ,     3 ,     0 ,  0 },
   1627 /* 4 : R+EN/AN    */ {     0 ,     2 ,     4 ,     4 , s(1,3), s(1,3),     0 ,  2 }
   1628 };
   1629 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
   1630                                                   &impTabR_DEFAULT},
   1631                                                  {&impAct0, &impAct0}};
   1632 
   1633 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
   1634 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1635    until proven that there is L or sor/eor on both sides. AN is handled like EN.
   1636 */
   1637 {
   1638 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1639 /* 0 init         */ {     0 ,     3 , s(1,1), s(1,1),     0 ,     0 ,     0 ,  0 },
   1640 /* 1 EN/AN        */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  2 },
   1641 /* 2 EN/AN+ON     */ { s(2,0),     3 ,     1 ,     1 ,     2 , s(2,0), s(2,0),  1 },
   1642 /* 3 R            */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  1 },
   1643 /* 4 R+ON         */ { s(2,0),     3 ,     5 ,     5 ,     4 , s(2,0), s(2,0),  1 },
   1644 /* 5 R+EN/AN      */ {     0 ,     3 ,     5 ,     5 , s(1,4),     0 ,     0 ,  2 }
   1645 };
   1646 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
   1647 /*  In this table, EN/AN+ON sequences receive levels as if associated with R
   1648    until proven that there is L on both sides. AN is handled like EN.
   1649 */
   1650 {
   1651 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1652 /* 0 init         */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1653 /* 1 EN/AN        */ {     2 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1654 /* 2 L            */ {     2 ,     0 , s(1,4), s(1,4), s(1,3),     0 ,     0 ,  1 },
   1655 /* 3 L+ON         */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  0 },
   1656 /* 4 L+EN/AN      */ { s(2,2),     0 ,     4 ,     4 ,     3 ,     0 ,     0 ,  1 }
   1657 };
   1658 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
   1659                        {&impTabL_GROUP_NUMBERS_WITH_R,
   1660                         &impTabR_GROUP_NUMBERS_WITH_R},
   1661                        {&impAct0, &impAct0}};
   1662 
   1663 
   1664 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
   1665 /*  This table is identical to the Default LTR table except that EN and AN are
   1666    handled like L.
   1667 */
   1668 {
   1669 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1670 /* 0 : init       */ {     0 ,     1 ,     0 ,     0 ,     0 ,     0 ,     0 ,  0 },
   1671 /* 1 : R          */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  1 },
   1672 /* 2 : AN         */ {     0 ,     1 ,     0 ,     0 , s(1,5), s(1,5),     0 ,  2 },
   1673 /* 3 : R+EN/AN    */ {     0 ,     1 ,     0 ,     0 , s(1,4), s(1,4),     0 ,  2 },
   1674 /* 4 : R+ON       */ { s(2,0),     1 , s(2,0), s(2,0),     4 ,     4 , s(2,0),  1 },
   1675 /* 5 : AN+ON      */ { s(2,0),     1 , s(2,0), s(2,0),     5 ,     5 , s(2,0),  1 }
   1676 };
   1677 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
   1678 /*  This table is identical to the Default RTL table except that EN and AN are
   1679    handled like L.
   1680 */
   1681 {
   1682 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1683 /* 0 : init       */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1684 /* 1 : L          */ {     1 ,     0 ,     1 ,     1 , s(1,4), s(1,4),     0 ,  1 },
   1685 /* 2 : EN/AN      */ {     1 ,     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  1 },
   1686 /* 3 : L+AN       */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  1 },
   1687 /* 4 : L+ON       */ { s(2,1),     0 , s(2,1), s(2,1),     4 ,     4 ,     0 ,  0 },
   1688 /* 5 : L+AN+ON    */ {     1 ,     0 ,     1 ,     1 ,     5 ,     5 ,     0 ,  0 }
   1689 };
   1690 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
   1691                        {&impTabL_INVERSE_NUMBERS_AS_L,
   1692                         &impTabR_INVERSE_NUMBERS_AS_L},
   1693                        {&impAct0, &impAct0}};
   1694 
   1695 static const ImpTab impTabR_INVERSE_LIKE_DIRECT =   /* Odd  paragraph level */
   1696 /*  In this table, conditional sequences receive the lower possible level
   1697    until proven otherwise.
   1698 */
   1699 {
   1700 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1701 /* 0 : init       */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  0 },
   1702 /* 1 : L          */ {     1 ,     0 ,     1 ,     2 , s(1,3), s(1,3),     0 ,  1 },
   1703 /* 2 : EN/AN      */ {     1 ,     0 ,     2 ,     2 ,     0 ,     0 ,     0 ,  1 },
   1704 /* 3 : L+ON       */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  0 },
   1705 /* 4 : L+ON+AN    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  3 },
   1706 /* 5 : L+AN+ON    */ { s(2,1), s(3,0),     6 ,     4 ,     5 ,     5 , s(3,0),  2 },
   1707 /* 6 : L+ON+EN    */ { s(2,1), s(3,0),     6 ,     4 ,     3 ,     3 , s(3,0),  1 }
   1708 };
   1709 static const ImpAct impAct1 = {0,1,13,14};
   1710 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
   1711 */
   1712 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
   1713                        {&impTabL_DEFAULT,
   1714                         &impTabR_INVERSE_LIKE_DIRECT},
   1715                        {&impAct0, &impAct1}};
   1716 
   1717 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1718 /*  The case handled in this table is (visually):  R EN L
   1719 */
   1720 {
   1721 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1722 /* 0 : init       */ {     0 , s(6,3),     0 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1723 /* 1 : L+AN       */ {     0 , s(6,3),     0 ,     1 , s(1,2), s(3,0),     0 ,  4 },
   1724 /* 2 : L+AN+ON    */ { s(2,0), s(6,3), s(2,0),     1 ,     2 , s(3,0), s(2,0),  3 },
   1725 /* 3 : R          */ {     0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0),     0 ,  3 },
   1726 /* 4 : R+ON       */ { s(3,0), s(4,3), s(5,5), s(5,6),     4 , s(3,0), s(3,0),  3 },
   1727 /* 5 : R+EN       */ { s(3,0), s(4,3),     5 , s(5,6), s(1,4), s(3,0), s(3,0),  4 },
   1728 /* 6 : R+AN       */ { s(3,0), s(4,3), s(5,5),     6 , s(1,4), s(3,0), s(3,0),  4 }
   1729 };
   1730 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
   1731 /*  The cases handled in this table are (visually):  R EN L
   1732                                                     R L AN L
   1733 */
   1734 {
   1735 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1736 /* 0 : init       */ { s(1,3),     0 ,     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1737 /* 1 : R+EN/AN    */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  1 },
   1738 /* 2 : R+EN/AN+ON */ { s(2,3),     0 ,     1 ,     1 ,     2 , s(4,0),     0 ,  0 },
   1739 /* 3 : L          */ {     3 ,     0 ,     3 , s(3,6), s(1,4), s(4,0),     0 ,  1 },
   1740 /* 4 : L+ON       */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  0 },
   1741 /* 5 : L+ON+EN    */ { s(5,3), s(4,0),     5 , s(3,6),     4 , s(4,0), s(4,0),  1 },
   1742 /* 6 : L+AN       */ { s(5,3), s(4,0),     6 ,     6 ,     4 , s(4,0), s(4,0),  3 }
   1743 };
   1744 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
   1745 static const ImpAct impAct3 = {0,1,9,10,11,12};
   1746 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
   1747                        {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
   1748                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1749                        {&impAct2, &impAct3}};
   1750 
   1751 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
   1752                        {&impTabL_NUMBERS_SPECIAL,
   1753                         &impTabR_INVERSE_LIKE_DIRECT},
   1754                        {&impAct0, &impAct1}};
   1755 
   1756 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
   1757 /*  The case handled in this table is (visually):  R EN L
   1758 */
   1759 {
   1760 /*                         L ,     R ,    EN ,    AN ,    ON ,     S ,     B , Res */
   1761 /* 0 : init       */ {     0 , s(6,2),     1 ,     1 ,     0 ,     0 ,     0 ,  0 },
   1762 /* 1 : L+EN/AN    */ {     0 , s(6,2),     1 ,     1 ,     0 , s(3,0),     0 ,  4 },
   1763 /* 2 : R          */ {     0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0),     0 ,  3 },
   1764 /* 3 : R+ON       */ { s(3,0), s(4,2), s(5,4), s(5,4),     3 , s(3,0), s(3,0),  3 },
   1765 /* 4 : R+EN/AN    */ { s(3,0), s(4,2),     4 ,     4 , s(1,3), s(3,0), s(3,0),  4 }
   1766 };
   1767 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
   1768                        {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
   1769                         &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
   1770                        {&impAct2, &impAct3}};
   1771 
   1772 #undef s
   1773 
   1774 typedef struct {
   1775    const ImpTab * pImpTab;             /* level table pointer          */
   1776    const ImpAct * pImpAct;             /* action map array             */
   1777    int32_t startON;                    /* start of ON sequence         */
   1778    int32_t startL2EN;                  /* start of level 2 sequence    */
   1779    int32_t lastStrongRTL;              /* index of last found R or AL  */
   1780    int32_t state;                      /* current state                */
   1781    int32_t runStart;                   /* start position of the run    */
   1782    UBiDiLevel runLevel;                /* run level before implicit solving */
   1783 } LevState;
   1784 
   1785 /*------------------------------------------------------------------------*/
   1786 
   1787 static void
   1788 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
   1789  /* param pos:     position where to insert
   1790     param flag:    one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
   1791  */
   1792 {
   1793 #define FIRSTALLOC  10
   1794    Point point;
   1795    InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
   1796 
   1797    if (pInsertPoints->capacity == 0)
   1798    {
   1799        pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
   1800        if (pInsertPoints->points == nullptr)
   1801        {
   1802            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1803            return;
   1804        }
   1805        pInsertPoints->capacity=FIRSTALLOC;
   1806    }
   1807    if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
   1808    {
   1809        Point * savePoints=pInsertPoints->points;
   1810        pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
   1811                                           pInsertPoints->capacity*2*sizeof(Point)));
   1812        if (pInsertPoints->points == nullptr)
   1813        {
   1814            pInsertPoints->points=savePoints;
   1815            pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
   1816            return;
   1817        }
   1818        else  pInsertPoints->capacity*=2;
   1819    }
   1820    point.pos=pos;
   1821    point.flag=flag;
   1822    pInsertPoints->points[pInsertPoints->size]=point;
   1823    pInsertPoints->size++;
   1824 #undef FIRSTALLOC
   1825 }
   1826 
   1827 static void
   1828 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
   1829 {
   1830    DirProp *dirProps=pBiDi->dirProps, dirProp;
   1831    UBiDiLevel *levels=pBiDi->levels;
   1832    int32_t isolateCount=0, k;
   1833    for(k=start; k<limit; k++) {
   1834        dirProp=dirProps[k];
   1835        if(dirProp==PDI)
   1836            isolateCount--;
   1837        if(isolateCount==0)
   1838            levels[k]=level;
   1839        if(dirProp==LRI || dirProp==RLI)
   1840            isolateCount++;
   1841    }
   1842 }
   1843 
   1844 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
   1845 
   1846 /*
   1847 * This implementation of the (Wn) rules applies all rules in one pass.
   1848 * In order to do so, it needs a look-ahead of typically 1 character
   1849 * (except for W5: sequences of ET) and keeps track of changes
   1850 * in a rule Wp that affect a later Wq (p<q).
   1851 *
   1852 * The (Nn) and (In) rules are also performed in that same single loop,
   1853 * but effectively one iteration behind for white space.
   1854 *
   1855 * Since all implicit rules are performed in one step, it is not necessary
   1856 * to actually store the intermediate directional properties in dirProps[].
   1857 */
   1858 
   1859 static void
   1860 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
   1861                   int32_t start, int32_t limit) {
   1862    uint8_t cell, oldStateSeq, actionSeq;
   1863    const ImpTab * pImpTab=pLevState->pImpTab;
   1864    const ImpAct * pImpAct=pLevState->pImpAct;
   1865    UBiDiLevel * levels=pBiDi->levels;
   1866    UBiDiLevel level, addLevel;
   1867    InsertPoints * pInsertPoints;
   1868    int32_t start0, k;
   1869 
   1870    start0=start;                           /* save original start position */
   1871    oldStateSeq = static_cast<uint8_t>(pLevState->state);
   1872    cell=(*pImpTab)[oldStateSeq][_prop];
   1873    pLevState->state=GET_STATE(cell);       /* isolate the new state */
   1874    actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
   1875    addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
   1876 
   1877    if(actionSeq) {
   1878        switch(actionSeq) {
   1879        case 1:                         /* init ON seq */
   1880            pLevState->startON=start0;
   1881            break;
   1882 
   1883        case 2:                         /* prepend ON seq to current seq */
   1884            start=pLevState->startON;
   1885            break;
   1886 
   1887        case 3:                         /* EN/AN after R+ON */
   1888            level=pLevState->runLevel+1;
   1889            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
   1890            break;
   1891 
   1892        case 4:                         /* EN/AN before R for NUMBERS_SPECIAL */
   1893            level=pLevState->runLevel+2;
   1894            setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
   1895            break;
   1896 
   1897        case 5:                         /* L or S after possible relevant EN/AN */
   1898            /* check if we had EN after R/AL */
   1899            if (pLevState->startL2EN >= 0) {
   1900                addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1901            }
   1902            pLevState->startL2EN=-1;  /* not within previous if since could also be -2 */
   1903            /* check if we had any relevant EN/AN after R/AL */
   1904            pInsertPoints=&(pBiDi->insertPoints);
   1905            if ((pInsertPoints->capacity == 0) ||
   1906                (pInsertPoints->size <= pInsertPoints->confirmed))
   1907            {
   1908                /* nothing, just clean up */
   1909                pLevState->lastStrongRTL=-1;
   1910                /* check if we have a pending conditional segment */
   1911                level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
   1912                if ((level & 1) && (pLevState->startON > 0)) {  /* after ON */
   1913                    start=pLevState->startON;   /* reset to basic run level */
   1914                }
   1915                if (_prop == DirProp_S)                /* add LRM before S */
   1916                {
   1917                    addPoint(pBiDi, start0, LRM_BEFORE);
   1918                    pInsertPoints->confirmed=pInsertPoints->size;
   1919                }
   1920                break;
   1921            }
   1922            /* reset previous RTL cont to level for LTR text */
   1923            for (k=pLevState->lastStrongRTL+1; k<start0; k++)
   1924            {
   1925                /* reset odd level, leave runLevel+2 as is */
   1926                levels[k]=(levels[k] - 2) & ~1;
   1927            }
   1928            /* mark insert points as confirmed */
   1929            pInsertPoints->confirmed=pInsertPoints->size;
   1930            pLevState->lastStrongRTL=-1;
   1931            if (_prop == DirProp_S)            /* add LRM before S */
   1932            {
   1933                addPoint(pBiDi, start0, LRM_BEFORE);
   1934                pInsertPoints->confirmed=pInsertPoints->size;
   1935            }
   1936            break;
   1937 
   1938        case 6:                         /* R/AL after possible relevant EN/AN */
   1939            /* just clean up */
   1940            pInsertPoints=&(pBiDi->insertPoints);
   1941            if (pInsertPoints->capacity > 0)
   1942                /* remove all non confirmed insert points */
   1943                pInsertPoints->size=pInsertPoints->confirmed;
   1944            pLevState->startON=-1;
   1945            pLevState->startL2EN=-1;
   1946            pLevState->lastStrongRTL=limit - 1;
   1947            break;
   1948 
   1949        case 7:                         /* EN/AN after R/AL + possible cont */
   1950            /* check for real AN */
   1951            if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
   1952                (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
   1953            {
   1954                /* real AN */
   1955                if (pLevState->startL2EN == -1) /* if no relevant EN already found */
   1956                {
   1957                    /* just note the righmost digit as a strong RTL */
   1958                    pLevState->lastStrongRTL=limit - 1;
   1959                    break;
   1960                }
   1961                if (pLevState->startL2EN >= 0)  /* after EN, no AN */
   1962                {
   1963                    addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
   1964                    pLevState->startL2EN=-2;
   1965                }
   1966                /* note AN */
   1967                addPoint(pBiDi, start0, LRM_BEFORE);
   1968                break;
   1969            }
   1970            /* if first EN/AN after R/AL */
   1971            if (pLevState->startL2EN == -1) {
   1972                pLevState->startL2EN=start0;
   1973            }
   1974            break;
   1975 
   1976        case 8:                         /* note location of latest R/AL */
   1977            pLevState->lastStrongRTL=limit - 1;
   1978            pLevState->startON=-1;
   1979            break;
   1980 
   1981        case 9:                         /* L after R+ON/EN/AN */
   1982            /* include possible adjacent number on the left */
   1983            for (k=start0-1; k>=0 && !(levels[k]&1); k--);
   1984            if(k>=0) {
   1985                addPoint(pBiDi, k, RLM_BEFORE);             /* add RLM before */
   1986                pInsertPoints=&(pBiDi->insertPoints);
   1987                pInsertPoints->confirmed=pInsertPoints->size;   /* confirm it */
   1988            }
   1989            pLevState->startON=start0;
   1990            break;
   1991 
   1992        case 10:                        /* AN after L */
   1993            /* AN numbers between L text on both sides may be trouble. */
   1994            /* tentatively bracket with LRMs; will be confirmed if followed by L */
   1995            addPoint(pBiDi, start0, LRM_BEFORE);    /* add LRM before */
   1996            addPoint(pBiDi, start0, LRM_AFTER);     /* add LRM after  */
   1997            break;
   1998 
   1999        case 11:                        /* R after L+ON/EN/AN */
   2000            /* false alert, infirm LRMs around previous AN */
   2001            pInsertPoints=&(pBiDi->insertPoints);
   2002            pInsertPoints->size=pInsertPoints->confirmed;
   2003            if (_prop == DirProp_S)            /* add RLM before S */
   2004            {
   2005                addPoint(pBiDi, start0, RLM_BEFORE);
   2006                pInsertPoints->confirmed=pInsertPoints->size;
   2007            }
   2008            break;
   2009 
   2010        case 12:                        /* L after L+ON/AN */
   2011            level=pLevState->runLevel + addLevel;
   2012            for(k=pLevState->startON; k<start0; k++) {
   2013                if (levels[k]<level)
   2014                    levels[k]=level;
   2015            }
   2016            pInsertPoints=&(pBiDi->insertPoints);
   2017            pInsertPoints->confirmed=pInsertPoints->size;   /* confirm inserts */
   2018            pLevState->startON=start0;
   2019            break;
   2020 
   2021        case 13:                        /* L after L+ON+EN/AN/ON */
   2022            level=pLevState->runLevel;
   2023            for(k=start0-1; k>=pLevState->startON; k--) {
   2024                if(levels[k]==level+3) {
   2025                    while(levels[k]==level+3) {
   2026                        levels[k--]-=2;
   2027                    }
   2028                    while(levels[k]==level) {
   2029                        k--;
   2030                    }
   2031                }
   2032                if(levels[k]==level+2) {
   2033                    levels[k]=level;
   2034                    continue;
   2035                }
   2036                levels[k]=level+1;
   2037            }
   2038            break;
   2039 
   2040        case 14:                        /* R after L+ON+EN/AN/ON */
   2041            level=pLevState->runLevel+1;
   2042            for(k=start0-1; k>=pLevState->startON; k--) {
   2043                if(levels[k]>level) {
   2044                    levels[k]-=2;
   2045                }
   2046            }
   2047            break;
   2048 
   2049        default:                        /* we should never get here */
   2050            UPRV_UNREACHABLE_EXIT;
   2051        }
   2052    }
   2053    if((addLevel) || (start < start0)) {
   2054        level=pLevState->runLevel + addLevel;
   2055        if(start>=pLevState->runStart) {
   2056            for(k=start; k<limit; k++) {
   2057                levels[k]=level;
   2058            }
   2059        } else {
   2060            setLevelsOutsideIsolates(pBiDi, start, limit, level);
   2061        }
   2062    }
   2063 }
   2064 
   2065 /**
   2066 * Returns the directionality of the last strong character at the end of the prologue, if any.
   2067 * Requires prologue!=null.
   2068 */
   2069 static DirProp
   2070 lastL_R_AL(UBiDi *pBiDi) {
   2071    const char16_t *text=pBiDi->prologue;
   2072    int32_t length=pBiDi->proLength;
   2073    int32_t i;
   2074    UChar32 uchar;
   2075    DirProp dirProp;
   2076    for(i=length; i>0; ) {
   2077        /* i is decremented by U16_PREV */
   2078        U16_PREV(text, 0, i, uchar);
   2079        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
   2080        if(dirProp==L) {
   2081            return DirProp_L;
   2082        }
   2083        if(dirProp==R || dirProp==AL) {
   2084            return DirProp_R;
   2085        }
   2086        if(dirProp==B) {
   2087            return DirProp_ON;
   2088        }
   2089    }
   2090    return DirProp_ON;
   2091 }
   2092 
   2093 /**
   2094 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
   2095 * Requires epilogue!=null.
   2096 */
   2097 static DirProp
   2098 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
   2099    const char16_t *text=pBiDi->epilogue;
   2100    int32_t length=pBiDi->epiLength;
   2101    int32_t i;
   2102    UChar32 uchar;
   2103    DirProp dirProp;
   2104    for(i=0; i<length; ) {
   2105        /* i is incremented by U16_NEXT */
   2106        U16_NEXT(text, i, length, uchar);
   2107        dirProp = static_cast<DirProp>(ubidi_getCustomizedClass(pBiDi, uchar));
   2108        if(dirProp==L) {
   2109            return DirProp_L;
   2110        }
   2111        if(dirProp==R || dirProp==AL) {
   2112            return DirProp_R;
   2113        }
   2114        if(dirProp==EN) {
   2115            return DirProp_EN;
   2116        }
   2117        if(dirProp==AN) {
   2118            return DirProp_AN;
   2119        }
   2120    }
   2121    return DirProp_ON;
   2122 }
   2123 
   2124 static void
   2125 resolveImplicitLevels(UBiDi *pBiDi,
   2126                      int32_t start, int32_t limit,
   2127                      DirProp sor, DirProp eor) {
   2128    const DirProp *dirProps=pBiDi->dirProps;
   2129    DirProp dirProp;
   2130    LevState levState;
   2131    int32_t i, start1, start2;
   2132    uint16_t oldStateImp, stateImp, actionImp;
   2133    uint8_t gprop, resProp, cell;
   2134    UBool inverseRTL;
   2135    DirProp nextStrongProp=R;
   2136    int32_t nextStrongPos=-1;
   2137 
   2138    /* check for RTL inverse BiDi mode */
   2139    /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
   2140     * loop on the text characters from end to start.
   2141     * This would need a different properties state table (at least different
   2142     * actions) and different levels state tables (maybe very similar to the
   2143     * LTR corresponding ones.
   2144     */
   2145    inverseRTL =
   2146        static_cast<UBool>((start < pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
   2147                           (pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
   2148                            pBiDi->reorderingMode == UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
   2149 
   2150    /* initialize for property and levels state tables */
   2151    levState.startL2EN=-1;              /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   2152    levState.lastStrongRTL=-1;          /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
   2153    levState.runStart=start;
   2154    levState.runLevel=pBiDi->levels[start];
   2155    levState.pImpTab = static_cast<const ImpTab*>(((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel & 1]);
   2156    levState.pImpAct = static_cast<const ImpAct*>(((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel & 1]);
   2157    if(start==0 && pBiDi->proLength>0) {
   2158        DirProp lastStrong=lastL_R_AL(pBiDi);
   2159        if(lastStrong!=DirProp_ON) {
   2160            sor=lastStrong;
   2161        }
   2162    }
   2163    /* The isolates[] entries contain enough information to
   2164       resume the bidi algorithm in the same state as it was
   2165       when it was interrupted by an isolate sequence. */
   2166    if(dirProps[start]==PDI  && pBiDi->isolateCount >= 0) {
   2167        levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
   2168        start1=pBiDi->isolates[pBiDi->isolateCount].start1;
   2169        stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
   2170        levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
   2171        pBiDi->isolateCount--;
   2172    } else {
   2173        levState.startON=-1;
   2174        start1=start;
   2175        if(dirProps[start]==NSM)
   2176            stateImp = 1 + sor;
   2177        else
   2178            stateImp=0;
   2179        levState.state=0;
   2180        processPropertySeq(pBiDi, &levState, sor, start, start);
   2181    }
   2182    start2=start;                       /* to make Java compiler happy */
   2183 
   2184    for(i=start; i<=limit; i++) {
   2185        if(i>=limit) {
   2186            int32_t k;
   2187            for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--);
   2188            dirProp=dirProps[k];
   2189            if(dirProp==LRI || dirProp==RLI)
   2190                break;      /* no forced closing for sequence ending with LRI/RLI */
   2191            gprop=eor;
   2192        } else {
   2193            DirProp prop, prop1;
   2194            prop=dirProps[i];
   2195            if(prop==B) {
   2196                pBiDi->isolateCount=-1; /* current isolates stack entry == none */
   2197            }
   2198            if(inverseRTL) {
   2199                if(prop==AL) {
   2200                    /* AL before EN does not make it AN */
   2201                    prop=R;
   2202                } else if(prop==EN) {
   2203                    if(nextStrongPos<=i) {
   2204                        /* look for next strong char (L/R/AL) */
   2205                        int32_t j;
   2206                        nextStrongProp=R;   /* set default */
   2207                        nextStrongPos=limit;
   2208                        for(j=i+1; j<limit; j++) {
   2209                            prop1=dirProps[j];
   2210                            if(prop1==L || prop1==R || prop1==AL) {
   2211                                nextStrongProp=prop1;
   2212                                nextStrongPos=j;
   2213                                break;
   2214                            }
   2215                        }
   2216                    }
   2217                    if(nextStrongProp==AL) {
   2218                        prop=AN;
   2219                    }
   2220                }
   2221            }
   2222            gprop=groupProp[prop];
   2223        }
   2224        oldStateImp=stateImp;
   2225        cell=impTabProps[oldStateImp][gprop];
   2226        stateImp=GET_STATEPROPS(cell);      /* isolate the new state */
   2227        actionImp=GET_ACTIONPROPS(cell);    /* isolate the action */
   2228        if((i==limit) && (actionImp==0)) {
   2229            /* there is an unprocessed sequence if its property == eor   */
   2230            actionImp=1;                    /* process the last sequence */
   2231        }
   2232        if(actionImp) {
   2233            resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
   2234            switch(actionImp) {
   2235            case 1:             /* process current seq1, init new seq1 */
   2236                processPropertySeq(pBiDi, &levState, resProp, start1, i);
   2237                start1=i;
   2238                break;
   2239            case 2:             /* init new seq2 */
   2240                start2=i;
   2241                break;
   2242            case 3:             /* process seq1, process seq2, init new seq1 */
   2243                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   2244                processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
   2245                start1=i;
   2246                break;
   2247            case 4:             /* process seq1, set seq1=seq2, init new seq2 */
   2248                processPropertySeq(pBiDi, &levState, resProp, start1, start2);
   2249                start1=start2;
   2250                start2=i;
   2251                break;
   2252            default:            /* we should never get here */
   2253                UPRV_UNREACHABLE_EXIT;
   2254            }
   2255        }
   2256    }
   2257 
   2258    /* flush possible pending sequence, e.g. ON */
   2259    if(limit==pBiDi->length && pBiDi->epiLength>0) {
   2260        DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
   2261        if(firstStrong!=DirProp_ON) {
   2262            eor=firstStrong;
   2263        }
   2264    }
   2265 
   2266    /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
   2267    for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--);
   2268    dirProp=dirProps[i];
   2269    if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
   2270        pBiDi->isolateCount++;
   2271        pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
   2272        pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
   2273        pBiDi->isolates[pBiDi->isolateCount].start1=start1;
   2274        pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
   2275    }
   2276    else
   2277        processPropertySeq(pBiDi, &levState, eor, limit, limit);
   2278 }
   2279 
   2280 /* perform (L1) and (X9) ---------------------------------------------------- */
   2281 
   2282 /*
   2283 * Reset the embedding levels for some non-graphic characters (L1).
   2284 * This function also sets appropriate levels for BN, and
   2285 * explicit embedding types that are supposed to have been removed
   2286 * from the paragraph in (X9).
   2287 */
   2288 static void
   2289 adjustWSLevels(UBiDi *pBiDi) {
   2290    const DirProp *dirProps=pBiDi->dirProps;
   2291    UBiDiLevel *levels=pBiDi->levels;
   2292    int32_t i;
   2293 
   2294    if(pBiDi->flags&MASK_WS) {
   2295        UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
   2296        Flags flag;
   2297 
   2298        i=pBiDi->trailingWSStart;
   2299        while(i>0) {
   2300            /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
   2301            while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
   2302                if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   2303                    levels[i]=0;
   2304                } else {
   2305                    levels[i]=GET_PARALEVEL(pBiDi, i);
   2306                }
   2307            }
   2308 
   2309            /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
   2310            /* here, i+1 is guaranteed to be <length */
   2311            while(i>0) {
   2312                flag=DIRPROP_FLAG(dirProps[--i]);
   2313                if(flag&MASK_BN_EXPLICIT) {
   2314                    levels[i]=levels[i+1];
   2315                } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
   2316                    levels[i]=0;
   2317                    break;
   2318                } else if(flag&MASK_B_S) {
   2319                    levels[i]=GET_PARALEVEL(pBiDi, i);
   2320                    break;
   2321                }
   2322            }
   2323        }
   2324    }
   2325 }
   2326 
   2327 U_CAPI void U_EXPORT2
   2328 ubidi_setContext(UBiDi *pBiDi,
   2329                 const char16_t *prologue, int32_t proLength,
   2330                 const char16_t *epilogue, int32_t epiLength,
   2331                 UErrorCode *pErrorCode) {
   2332    /* check the argument values */
   2333    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2334    if(pBiDi==nullptr || proLength<-1 || epiLength<-1 ||
   2335       (prologue==nullptr && proLength!=0) || (epilogue==nullptr && epiLength!=0)) {
   2336        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2337        return;
   2338    }
   2339 
   2340    if(proLength==-1) {
   2341        pBiDi->proLength=u_strlen(prologue);
   2342    } else {
   2343        pBiDi->proLength=proLength;
   2344    }
   2345    if(epiLength==-1) {
   2346        pBiDi->epiLength=u_strlen(epilogue);
   2347    } else {
   2348        pBiDi->epiLength=epiLength;
   2349    }
   2350    pBiDi->prologue=prologue;
   2351    pBiDi->epilogue=epilogue;
   2352 }
   2353 
   2354 static void
   2355 setParaSuccess(UBiDi *pBiDi) {
   2356    pBiDi->proLength=0;                 /* forget the last context */
   2357    pBiDi->epiLength=0;
   2358    pBiDi->pParaBiDi=pBiDi;             /* mark successful setPara */
   2359 }
   2360 
   2361 #define BIDI_MIN(x, y)   ((x)<(y) ? (x) : (y))
   2362 #define BIDI_ABS(x)      ((x)>=0  ? (x) : (-(x)))
   2363 
   2364 static void
   2365 setParaRunsOnly(UBiDi *pBiDi, const char16_t *text, int32_t length,
   2366                UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
   2367    int32_t *runsOnlyMemory = nullptr;
   2368    int32_t *visualMap;
   2369    char16_t *visualText;
   2370    int32_t saveLength, saveTrailingWSStart;
   2371    const UBiDiLevel *levels;
   2372    UBiDiLevel *saveLevels;
   2373    UBiDiDirection saveDirection;
   2374    UBool saveMayAllocateText;
   2375    Run *runs;
   2376    int32_t visualLength, i, j, visualStart, logicalStart,
   2377            runCount, runLength, addedRuns, insertRemove,
   2378            start, limit, step, indexOddBit, logicalPos,
   2379            index0, index1;
   2380    uint32_t saveOptions;
   2381 
   2382    pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
   2383    if(length==0) {
   2384        ubidi_setPara(pBiDi, text, length, paraLevel, nullptr, pErrorCode);
   2385        goto cleanup3;
   2386    }
   2387    /* obtain memory for mapping table and visual text */
   2388    runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(char16_t)+sizeof(UBiDiLevel))));
   2389    if(runsOnlyMemory==nullptr) {
   2390        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2391        goto cleanup3;
   2392    }
   2393    visualMap=runsOnlyMemory;
   2394    visualText = reinterpret_cast<char16_t*>(&visualMap[length]);
   2395    saveLevels = reinterpret_cast<UBiDiLevel*>(&visualText[length]);
   2396    saveOptions=pBiDi->reorderingOptions;
   2397    if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
   2398        pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
   2399        pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
   2400    }
   2401    paraLevel&=1;                       /* accept only 0 or 1 */
   2402    ubidi_setPara(pBiDi, text, length, paraLevel, nullptr, pErrorCode);
   2403    if(U_FAILURE(*pErrorCode)) {
   2404        goto cleanup3;
   2405    }
   2406    /* we cannot access directly pBiDi->levels since it is not yet set if
   2407     * direction is not MIXED
   2408     */
   2409    levels=ubidi_getLevels(pBiDi, pErrorCode);
   2410    uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
   2411    saveTrailingWSStart=pBiDi->trailingWSStart;
   2412    saveLength=pBiDi->length;
   2413    saveDirection=pBiDi->direction;
   2414 
   2415    /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
   2416     * the visual map and the dirProps array to drive the second call
   2417     * to ubidi_setPara (but must make provision for possible removal of
   2418     * BiDi controls.  Alternatively, only use the dirProps array via
   2419     * customized classifier callback.
   2420     */
   2421    visualLength=ubidi_writeReordered(pBiDi, visualText, length,
   2422                                      UBIDI_DO_MIRRORING, pErrorCode);
   2423    ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
   2424    if(U_FAILURE(*pErrorCode)) {
   2425        goto cleanup2;
   2426    }
   2427    pBiDi->reorderingOptions=saveOptions;
   2428 
   2429    pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
   2430    paraLevel^=1;
   2431    /* Because what we did with reorderingOptions, visualText may be shorter
   2432     * than the original text. But we don't want the levels memory to be
   2433     * reallocated shorter than the original length, since we need to restore
   2434     * the levels as after the first call to ubidi_setpara() before returning.
   2435     * We will force mayAllocateText to false before the second call to
   2436     * ubidi_setpara(), and will restore it afterwards.
   2437     */
   2438    saveMayAllocateText=pBiDi->mayAllocateText;
   2439    pBiDi->mayAllocateText=false;
   2440    ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, nullptr, pErrorCode);
   2441    pBiDi->mayAllocateText=saveMayAllocateText;
   2442    ubidi_getRuns(pBiDi, pErrorCode);
   2443    if(U_FAILURE(*pErrorCode)) {
   2444        goto cleanup1;
   2445    }
   2446    /* check if some runs must be split, count how many splits */
   2447    addedRuns=0;
   2448    runCount=pBiDi->runCount;
   2449    runs=pBiDi->runs;
   2450    visualStart=0;
   2451    for(i=0; i<runCount; i++, visualStart+=runLength) {
   2452        runLength=runs[i].visualLimit-visualStart;
   2453        if(runLength<2) {
   2454            continue;
   2455        }
   2456        logicalStart=GET_INDEX(runs[i].logicalStart);
   2457        for(j=logicalStart+1; j<logicalStart+runLength; j++) {
   2458            index0=visualMap[j];
   2459            index1=visualMap[j-1];
   2460            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   2461                addedRuns++;
   2462            }
   2463        }
   2464    }
   2465    if(addedRuns) {
   2466        if(getRunsMemory(pBiDi, runCount+addedRuns)) {
   2467            if(runCount==1) {
   2468                /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
   2469                pBiDi->runsMemory[0]=runs[0];
   2470            }
   2471            runs=pBiDi->runs=pBiDi->runsMemory;
   2472            pBiDi->runCount+=addedRuns;
   2473        } else {
   2474            goto cleanup1;
   2475        }
   2476    }
   2477    /* split runs which are not consecutive in source text */
   2478    for(i=runCount-1; i>=0; i--) {
   2479        runLength= i==0 ? runs[0].visualLimit :
   2480                          runs[i].visualLimit-runs[i-1].visualLimit;
   2481        logicalStart=runs[i].logicalStart;
   2482        indexOddBit=GET_ODD_BIT(logicalStart);
   2483        logicalStart=GET_INDEX(logicalStart);
   2484        if(runLength<2) {
   2485            if(addedRuns) {
   2486                runs[i+addedRuns]=runs[i];
   2487            }
   2488            logicalPos=visualMap[logicalStart];
   2489            runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2490                                            saveLevels[logicalPos]^indexOddBit);
   2491            continue;
   2492        }
   2493        if(indexOddBit) {
   2494            start=logicalStart;
   2495            limit=logicalStart+runLength-1;
   2496            step=1;
   2497        } else {
   2498            start=logicalStart+runLength-1;
   2499            limit=logicalStart;
   2500            step=-1;
   2501        }
   2502        for(j=start; j!=limit; j+=step) {
   2503            index0=visualMap[j];
   2504            index1=visualMap[j+step];
   2505            if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
   2506                logicalPos=BIDI_MIN(visualMap[start], index0);
   2507                runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2508                                            saveLevels[logicalPos]^indexOddBit);
   2509                runs[i+addedRuns].visualLimit=runs[i].visualLimit;
   2510                runs[i].visualLimit-=BIDI_ABS(j-start)+1;
   2511                insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
   2512                runs[i+addedRuns].insertRemove=insertRemove;
   2513                runs[i].insertRemove&=~insertRemove;
   2514                start=j+step;
   2515                addedRuns--;
   2516            }
   2517        }
   2518        if(addedRuns) {
   2519            runs[i+addedRuns]=runs[i];
   2520        }
   2521        logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
   2522        runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
   2523                                            saveLevels[logicalPos]^indexOddBit);
   2524    }
   2525 
   2526  cleanup1:
   2527    /* restore initial paraLevel */
   2528    pBiDi->paraLevel^=1;
   2529  cleanup2:
   2530    /* restore real text */
   2531    pBiDi->text=text;
   2532    pBiDi->length=saveLength;
   2533    pBiDi->originalLength=length;
   2534    pBiDi->direction=saveDirection;
   2535    /* the saved levels should never excess levelsSize, but we check anyway */
   2536    if(saveLength>pBiDi->levelsSize) {
   2537        saveLength=pBiDi->levelsSize;
   2538    }
   2539    uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
   2540    pBiDi->trailingWSStart=saveTrailingWSStart;
   2541    if(pBiDi->runCount>1) {
   2542        pBiDi->direction=UBIDI_MIXED;
   2543    }
   2544  cleanup3:
   2545    /* free memory for mapping table and visual text */
   2546    uprv_free(runsOnlyMemory);
   2547 
   2548    pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
   2549 }
   2550 
   2551 /* ubidi_setPara ------------------------------------------------------------ */
   2552 
   2553 U_CAPI void U_EXPORT2
   2554 ubidi_setPara(UBiDi *pBiDi, const char16_t *text, int32_t length,
   2555              UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
   2556              UErrorCode *pErrorCode) {
   2557    UBiDiDirection direction;
   2558    DirProp *dirProps;
   2559 
   2560    /* check the argument values */
   2561    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2562    if(pBiDi==nullptr || text==nullptr || length<-1 ||
   2563       (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
   2564        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2565        return;
   2566    }
   2567 
   2568    if(length==-1) {
   2569        length=u_strlen(text);
   2570    }
   2571 
   2572    /* special treatment for RUNS_ONLY mode */
   2573    if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
   2574        setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
   2575        return;
   2576    }
   2577 
   2578    /* initialize the UBiDi structure */
   2579    pBiDi->pParaBiDi=nullptr;          /* mark unfinished setPara */
   2580    pBiDi->text=text;
   2581    pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
   2582    pBiDi->paraLevel=paraLevel;
   2583    pBiDi->direction=(UBiDiDirection)(paraLevel&1);
   2584    pBiDi->paraCount=1;
   2585 
   2586    pBiDi->dirProps=nullptr;
   2587    pBiDi->levels=nullptr;
   2588    pBiDi->runs=nullptr;
   2589    pBiDi->insertPoints.size=0;         /* clean up from last call */
   2590    pBiDi->insertPoints.confirmed=0;    /* clean up from last call */
   2591 
   2592    /*
   2593     * Save the original paraLevel if contextual; otherwise, set to 0.
   2594     */
   2595    pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
   2596 
   2597    if(length==0) {
   2598        /*
   2599         * For an empty paragraph, create a UBiDi object with the paraLevel and
   2600         * the flags and the direction set but without allocating zero-length arrays.
   2601         * There is nothing more to do.
   2602         */
   2603        if(IS_DEFAULT_LEVEL(paraLevel)) {
   2604            pBiDi->paraLevel&=1;
   2605            pBiDi->defaultParaLevel=0;
   2606        }
   2607        pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
   2608        pBiDi->runCount=0;
   2609        pBiDi->paraCount=0;
   2610        setParaSuccess(pBiDi);          /* mark successful setPara */
   2611        return;
   2612    }
   2613 
   2614    pBiDi->runCount=-1;
   2615 
   2616    /* allocate paras memory */
   2617    if(pBiDi->parasMemory)
   2618        pBiDi->paras=pBiDi->parasMemory;
   2619    else
   2620        pBiDi->paras=pBiDi->simpleParas;
   2621 
   2622    /*
   2623     * Get the directional properties,
   2624     * the flags bit-set, and
   2625     * determine the paragraph level if necessary.
   2626     */
   2627    if(getDirPropsMemory(pBiDi, length)) {
   2628        pBiDi->dirProps=pBiDi->dirPropsMemory;
   2629        if(!getDirProps(pBiDi)) {
   2630            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2631            return;
   2632        }
   2633    } else {
   2634        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2635        return;
   2636    }
   2637    dirProps=pBiDi->dirProps;
   2638    /* the processed length may have changed if UBIDI_OPTION_STREAMING */
   2639    length= pBiDi->length;
   2640    pBiDi->trailingWSStart=length;  /* the levels[] will reflect the WS run */
   2641 
   2642    /* are explicit levels specified? */
   2643    if(embeddingLevels==nullptr) {
   2644        /* no: determine explicit levels according to the (Xn) rules */\
   2645        if(getLevelsMemory(pBiDi, length)) {
   2646            pBiDi->levels=pBiDi->levelsMemory;
   2647            direction=resolveExplicitLevels(pBiDi, pErrorCode);
   2648            if(U_FAILURE(*pErrorCode)) {
   2649                return;
   2650            }
   2651        } else {
   2652            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2653            return;
   2654        }
   2655    } else {
   2656        /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
   2657        pBiDi->levels=embeddingLevels;
   2658        direction=checkExplicitLevels(pBiDi, pErrorCode);
   2659        if(U_FAILURE(*pErrorCode)) {
   2660            return;
   2661        }
   2662    }
   2663 
   2664    /* allocate isolate memory */
   2665    if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
   2666        pBiDi->isolates=pBiDi->simpleIsolates;
   2667    else
   2668        if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
   2669            pBiDi->isolates=pBiDi->isolatesMemory;
   2670        else {
   2671            if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
   2672                pBiDi->isolates=pBiDi->isolatesMemory;
   2673            } else {
   2674                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
   2675                return;
   2676            }
   2677        }
   2678    pBiDi->isolateCount=-1;             /* current isolates stack entry == none */
   2679 
   2680    /*
   2681     * The steps after (X9) in the UBiDi algorithm are performed only if
   2682     * the paragraph text has mixed directionality!
   2683     */
   2684    pBiDi->direction=direction;
   2685    switch(direction) {
   2686    case UBIDI_LTR:
   2687        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2688        pBiDi->trailingWSStart=0;
   2689        break;
   2690    case UBIDI_RTL:
   2691        /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
   2692        pBiDi->trailingWSStart=0;
   2693        break;
   2694    default:
   2695        /*
   2696         *  Choose the right implicit state table
   2697         */
   2698        switch(pBiDi->reorderingMode) {
   2699        case UBIDI_REORDER_DEFAULT:
   2700            pBiDi->pImpTabPair=&impTab_DEFAULT;
   2701            break;
   2702        case UBIDI_REORDER_NUMBERS_SPECIAL:
   2703            pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
   2704            break;
   2705        case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
   2706            pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
   2707            break;
   2708        case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
   2709            pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
   2710            break;
   2711        case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
   2712            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2713                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
   2714            } else {
   2715                pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
   2716            }
   2717            break;
   2718        case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
   2719            if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
   2720                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
   2721            } else {
   2722                pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
   2723            }
   2724            break;
   2725        default:
   2726            /* we should never get here */
   2727            UPRV_UNREACHABLE_EXIT;
   2728        }
   2729        /*
   2730         * If there are no external levels specified and there
   2731         * are no significant explicit level codes in the text,
   2732         * then we can treat the entire paragraph as one run.
   2733         * Otherwise, we need to perform the following rules on runs of
   2734         * the text with the same embedding levels. (X10)
   2735         * "Significant" explicit level codes are ones that actually
   2736         * affect non-BN characters.
   2737         * Examples for "insignificant" ones are empty embeddings
   2738         * LRE-PDF, LRE-RLE-PDF-PDF, etc.
   2739         */
   2740        if(embeddingLevels==nullptr && pBiDi->paraCount<=1 &&
   2741                                   !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
   2742            resolveImplicitLevels(pBiDi, 0, length,
   2743                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
   2744                                    GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
   2745        } else {
   2746            /* sor, eor: start and end types of same-level-run */
   2747            UBiDiLevel *levels=pBiDi->levels;
   2748            int32_t start, limit=0;
   2749            UBiDiLevel level, nextLevel;
   2750            DirProp sor, eor;
   2751 
   2752            /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
   2753            level=GET_PARALEVEL(pBiDi, 0);
   2754            nextLevel=levels[0];
   2755            if(level<nextLevel) {
   2756                eor=GET_LR_FROM_LEVEL(nextLevel);
   2757            } else {
   2758                eor=GET_LR_FROM_LEVEL(level);
   2759            }
   2760 
   2761            do {
   2762                /* determine start and limit of the run (end points just behind the run) */
   2763 
   2764                /* the values for this run's start are the same as for the previous run's end */
   2765                start=limit;
   2766                level=nextLevel;
   2767                if((start>0) && (dirProps[start-1]==B)) {
   2768                    /* except if this is a new paragraph, then set sor = para level */
   2769                    sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
   2770                } else {
   2771                    sor=eor;
   2772                }
   2773 
   2774                /* search for the limit of this run */
   2775                while((++limit<length) &&
   2776                      ((levels[limit]==level) ||
   2777                       (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
   2778 
   2779                /* get the correct level of the next run */
   2780                if(limit<length) {
   2781                    nextLevel=levels[limit];
   2782                } else {
   2783                    nextLevel=GET_PARALEVEL(pBiDi, length-1);
   2784                }
   2785 
   2786                /* determine eor from max(level, nextLevel); sor is last run's eor */
   2787                if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
   2788                    eor=GET_LR_FROM_LEVEL(nextLevel);
   2789                } else {
   2790                    eor=GET_LR_FROM_LEVEL(level);
   2791                }
   2792 
   2793                /* if the run consists of overridden directional types, then there
   2794                   are no implicit types to be resolved */
   2795                if(!(level&UBIDI_LEVEL_OVERRIDE)) {
   2796                    resolveImplicitLevels(pBiDi, start, limit, sor, eor);
   2797                } else {
   2798                    /* remove the UBIDI_LEVEL_OVERRIDE flags */
   2799                    do {
   2800                        levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
   2801                    } while(start<limit);
   2802                }
   2803            } while(limit<length);
   2804        }
   2805        /* check if we got any memory shortage while adding insert points */
   2806        if (U_FAILURE(pBiDi->insertPoints.errorCode))
   2807        {
   2808            *pErrorCode=pBiDi->insertPoints.errorCode;
   2809            return;
   2810        }
   2811        /* reset the embedding levels for some non-graphic characters (L1), (X9) */
   2812        adjustWSLevels(pBiDi);
   2813        break;
   2814    }
   2815    /* add RLM for inverse Bidi with contextual orientation resolving
   2816     * to RTL which would not round-trip otherwise
   2817     */
   2818    if((pBiDi->defaultParaLevel>0) &&
   2819       (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
   2820       ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
   2821        (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
   2822        int32_t i, j, start, last;
   2823        UBiDiLevel level;
   2824        DirProp dirProp;
   2825        for(i=0; i<pBiDi->paraCount; i++) {
   2826            last=(pBiDi->paras[i].limit)-1;
   2827            level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
   2828            if(level==0)
   2829                continue;           /* LTR paragraph */
   2830            start= i==0 ? 0 : pBiDi->paras[i-1].limit;
   2831            for(j=last; j>=start; j--) {
   2832                dirProp=dirProps[j];
   2833                if(dirProp==L) {
   2834                    if(j<last) {
   2835                        while(dirProps[last]==B) {
   2836                            last--;
   2837                        }
   2838                    }
   2839                    addPoint(pBiDi, last, RLM_BEFORE);
   2840                    break;
   2841                }
   2842                if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
   2843                    break;
   2844                }
   2845            }
   2846        }
   2847    }
   2848 
   2849    if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
   2850        pBiDi->resultLength -= pBiDi->controlCount;
   2851    } else {
   2852        pBiDi->resultLength += pBiDi->insertPoints.size;
   2853    }
   2854    setParaSuccess(pBiDi);              /* mark successful setPara */
   2855 }
   2856 
   2857 U_CAPI void U_EXPORT2
   2858 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
   2859    if(pBiDi!=nullptr) {
   2860        pBiDi->orderParagraphsLTR=orderParagraphsLTR;
   2861    }
   2862 }
   2863 
   2864 U_CAPI UBool U_EXPORT2
   2865 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
   2866    if(pBiDi!=nullptr) {
   2867        return pBiDi->orderParagraphsLTR;
   2868    } else {
   2869        return false;
   2870    }
   2871 }
   2872 
   2873 U_CAPI UBiDiDirection U_EXPORT2
   2874 ubidi_getDirection(const UBiDi *pBiDi) {
   2875    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2876        return pBiDi->direction;
   2877    } else {
   2878        return UBIDI_LTR;
   2879    }
   2880 }
   2881 
   2882 U_CAPI const char16_t * U_EXPORT2
   2883 ubidi_getText(const UBiDi *pBiDi) {
   2884    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2885        return pBiDi->text;
   2886    } else {
   2887        return nullptr;
   2888    }
   2889 }
   2890 
   2891 U_CAPI int32_t U_EXPORT2
   2892 ubidi_getLength(const UBiDi *pBiDi) {
   2893    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2894        return pBiDi->originalLength;
   2895    } else {
   2896        return 0;
   2897    }
   2898 }
   2899 
   2900 U_CAPI int32_t U_EXPORT2
   2901 ubidi_getProcessedLength(const UBiDi *pBiDi) {
   2902    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2903        return pBiDi->length;
   2904    } else {
   2905        return 0;
   2906    }
   2907 }
   2908 
   2909 U_CAPI int32_t U_EXPORT2
   2910 ubidi_getResultLength(const UBiDi *pBiDi) {
   2911    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2912        return pBiDi->resultLength;
   2913    } else {
   2914        return 0;
   2915    }
   2916 }
   2917 
   2918 /* paragraphs API functions ------------------------------------------------- */
   2919 
   2920 U_CAPI UBiDiLevel U_EXPORT2
   2921 ubidi_getParaLevel(const UBiDi *pBiDi) {
   2922    if(IS_VALID_PARA_OR_LINE(pBiDi)) {
   2923        return pBiDi->paraLevel;
   2924    } else {
   2925        return 0;
   2926    }
   2927 }
   2928 
   2929 U_CAPI int32_t U_EXPORT2
   2930 ubidi_countParagraphs(UBiDi *pBiDi) {
   2931    if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
   2932        return 0;
   2933    } else {
   2934        return pBiDi->paraCount;
   2935    }
   2936 }
   2937 
   2938 U_CAPI void U_EXPORT2
   2939 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
   2940                          int32_t *pParaStart, int32_t *pParaLimit,
   2941                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2942    int32_t paraStart;
   2943 
   2944    /* check the argument values */
   2945    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2946    RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
   2947    RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
   2948 
   2949    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2950    if(paraIndex) {
   2951        paraStart=pBiDi->paras[paraIndex-1].limit;
   2952    } else {
   2953        paraStart=0;
   2954    }
   2955    if(pParaStart!=nullptr) {
   2956        *pParaStart=paraStart;
   2957    }
   2958    if(pParaLimit!=nullptr) {
   2959        *pParaLimit=pBiDi->paras[paraIndex].limit;
   2960    }
   2961    if(pParaLevel!=nullptr) {
   2962        *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
   2963    }
   2964 }
   2965 
   2966 U_CAPI int32_t U_EXPORT2
   2967 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
   2968                          int32_t *pParaStart, int32_t *pParaLimit,
   2969                          UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
   2970    int32_t paraIndex;
   2971 
   2972    /* check the argument values */
   2973    /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
   2974    RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
   2975    RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
   2976    pBiDi=pBiDi->pParaBiDi;             /* get Para object if Line object */
   2977    RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
   2978 
   2979    for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
   2980    ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
   2981    return paraIndex;
   2982 }
   2983 
   2984 U_CAPI void U_EXPORT2
   2985 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
   2986                       const void *newContext, UBiDiClassCallback **oldFn,
   2987                       const void **oldContext, UErrorCode *pErrorCode)
   2988 {
   2989    RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
   2990    if(pBiDi==nullptr) {
   2991        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
   2992        return;
   2993    }
   2994    if( oldFn )
   2995    {
   2996        *oldFn = pBiDi->fnClassCallback;
   2997    }
   2998    if( oldContext )
   2999    {
   3000        *oldContext = pBiDi->coClassCallback;
   3001    }
   3002    pBiDi->fnClassCallback = newFn;
   3003    pBiDi->coClassCallback = newContext;
   3004 }
   3005 
   3006 U_CAPI void U_EXPORT2
   3007 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
   3008 {
   3009    if(pBiDi==nullptr) {
   3010        return;
   3011    }
   3012    if( fn )
   3013    {
   3014        *fn = pBiDi->fnClassCallback;
   3015    }
   3016    if( context )
   3017    {
   3018        *context = pBiDi->coClassCallback;
   3019    }
   3020 }
   3021 
   3022 U_CAPI UCharDirection U_EXPORT2
   3023 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
   3024 {
   3025    UCharDirection dir;
   3026 
   3027    if( pBiDi->fnClassCallback == nullptr ||
   3028        (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
   3029    {
   3030        dir = ubidi_getClass(c);
   3031    }
   3032    if(dir >= U_CHAR_DIRECTION_COUNT) {
   3033        dir = (UCharDirection)ON;
   3034    }
   3035    return dir;
   3036 }