tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucnvscsu.cpp (76217B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 2000-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  ucnvscsu.c
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2000nov18
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This is an implementation of the Standard Compression Scheme for Unicode
     19 *   as defined in https://www.unicode.org/reports/tr6/ .
     20 *   Reserved commands and window settings are treated as illegal sequences and
     21 *   will result in callback calls.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 
     26 #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
     27 
     28 #include "unicode/ucnv.h"
     29 #include "unicode/ucnv_cb.h"
     30 #include "unicode/utf16.h"
     31 #include "ucnv_bld.h"
     32 #include "ucnv_cnv.h"
     33 #include "cmemory.h"
     34 
     35 /* SCSU definitions --------------------------------------------------------- */
     36 
     37 /* SCSU command byte values */
     38 enum {
     39    SQ0=0x01, /* Quote from window pair 0 */
     40    SQ7=0x08, /* Quote from window pair 7 */
     41    SDX=0x0B, /* Define a window as extended */
     42    Srs=0x0C, /* reserved */
     43    SQU=0x0E, /* Quote a single Unicode character */
     44    SCU=0x0F, /* Change to Unicode mode */
     45    SC0=0x10, /* Select window 0 */
     46    SC7=0x17, /* Select window 7 */
     47    SD0=0x18, /* Define and select window 0 */
     48    SD7=0x1F, /* Define and select window 7 */
     49 
     50    UC0=0xE0, /* Select window 0 */
     51    UC7=0xE7, /* Select window 7 */
     52    UD0=0xE8, /* Define and select window 0 */
     53    UD7=0xEF, /* Define and select window 7 */
     54    UQU=0xF0, /* Quote a single Unicode character */
     55    UDX=0xF1, /* Define a Window as extended */
     56    Urs=0xF2  /* reserved */
     57 };
     58 
     59 enum {
     60    /*
     61     * Unicode code points from 3400 to E000 are not adressible by
     62     * dynamic window, since in these areas no short run alphabets are
     63     * found. Therefore add gapOffset to all values from gapThreshold.
     64     */
     65    gapThreshold=0x68,
     66    gapOffset=0xAC00,
     67 
     68    /* values between reservedStart and fixedThreshold are reserved */
     69    reservedStart=0xA8,
     70 
     71    /* use table of predefined fixed offsets for values from fixedThreshold */
     72    fixedThreshold=0xF9
     73 };
     74 
     75 /* constant offsets for the 8 static windows */
     76 static const uint32_t staticOffsets[8]={
     77    0x0000, /* ASCII for quoted tags */
     78    0x0080, /* Latin - 1 Supplement (for access to punctuation) */
     79    0x0100, /* Latin Extended-A */
     80    0x0300, /* Combining Diacritical Marks */
     81    0x2000, /* General Punctuation */
     82    0x2080, /* Currency Symbols */
     83    0x2100, /* Letterlike Symbols and Number Forms */
     84    0x3000  /* CJK Symbols and punctuation */
     85 };
     86 
     87 /* initial offsets for the 8 dynamic (sliding) windows */
     88 static const uint32_t initialDynamicOffsets[8]={
     89    0x0080, /* Latin-1 */
     90    0x00C0, /* Latin Extended A */
     91    0x0400, /* Cyrillic */
     92    0x0600, /* Arabic */
     93    0x0900, /* Devanagari */
     94    0x3040, /* Hiragana */
     95    0x30A0, /* Katakana */
     96    0xFF00  /* Fullwidth ASCII */
     97 };
     98 
     99 /* Table of fixed predefined Offsets */
    100 static const uint32_t fixedOffsets[]={
    101    /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
    102    /* 0xFA */ 0x0250, /* IPA extensions */
    103    /* 0xFB */ 0x0370, /* Greek */
    104    /* 0xFC */ 0x0530, /* Armenian */
    105    /* 0xFD */ 0x3040, /* Hiragana */
    106    /* 0xFE */ 0x30A0, /* Katakana */
    107    /* 0xFF */ 0xFF60  /* Halfwidth Katakana */
    108 };
    109 
    110 /* state values */
    111 enum {
    112    readCommand,
    113    quotePairOne,
    114    quotePairTwo,
    115    quoteOne,
    116    definePairOne,
    117    definePairTwo,
    118    defineOne
    119 };
    120 
    121 typedef struct SCSUData {
    122    /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
    123    uint32_t toUDynamicOffsets[8];
    124    uint32_t fromUDynamicOffsets[8];
    125 
    126    /* state machine state - toUnicode */
    127    UBool toUIsSingleByteMode;
    128    uint8_t toUState;
    129    int8_t toUQuoteWindow, toUDynamicWindow;
    130    uint8_t toUByteOne;
    131    uint8_t toUPadding[3];
    132 
    133    /* state machine state - fromUnicode */
    134    UBool fromUIsSingleByteMode;
    135    int8_t fromUDynamicWindow;
    136 
    137    /*
    138     * windowUse[] keeps track of the use of the dynamic windows:
    139     * At nextWindowUseIndex there is the least recently used window,
    140     * and the following windows (in a wrapping manner) are more and more
    141     * recently used.
    142     * At nextWindowUseIndex-1 there is the most recently used window.
    143     */
    144    uint8_t locale;
    145    int8_t nextWindowUseIndex;
    146    int8_t windowUse[8];
    147 } SCSUData;
    148 
    149 static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
    150 static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
    151 
    152 enum {
    153    lGeneric, l_ja
    154 };
    155 
    156 /* SCSU setup functions ----------------------------------------------------- */
    157 U_CDECL_BEGIN
    158 static void U_CALLCONV
    159 _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
    160    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
    161 
    162    if(choice<=UCNV_RESET_TO_UNICODE) {
    163        /* reset toUnicode */
    164        uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
    165 
    166        scsu->toUIsSingleByteMode=true;
    167        scsu->toUState=readCommand;
    168        scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
    169        scsu->toUByteOne=0;
    170 
    171        cnv->toULength=0;
    172    }
    173    if(choice!=UCNV_RESET_TO_UNICODE) {
    174        /* reset fromUnicode */
    175        uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
    176 
    177        scsu->fromUIsSingleByteMode=true;
    178        scsu->fromUDynamicWindow=0;
    179 
    180        scsu->nextWindowUseIndex=0;
    181        switch(scsu->locale) {
    182        case l_ja:
    183            uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
    184            break;
    185        default:
    186            uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
    187            break;
    188        }
    189 
    190        cnv->fromUChar32=0;
    191    }
    192 }
    193 
    194 static void U_CALLCONV
    195 _SCSUOpen(UConverter *cnv,
    196          UConverterLoadArgs *pArgs,
    197          UErrorCode *pErrorCode) {
    198    const char *locale=pArgs->locale;
    199    if(pArgs->onlyTestIsLoadable) {
    200        return;
    201    }
    202    cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
    203    if(cnv->extraInfo!=nullptr) {
    204        if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
    205            ((SCSUData *)cnv->extraInfo)->locale=l_ja;
    206        } else {
    207            ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
    208        }
    209        _SCSUReset(cnv, UCNV_RESET_BOTH);
    210    } else {
    211        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
    212    }
    213 
    214    /* Set the substitution character U+fffd as a Unicode string. */
    215    cnv->subUChars[0]=0xfffd;
    216    cnv->subCharLen=-1;
    217 }
    218 
    219 static void U_CALLCONV
    220 _SCSUClose(UConverter *cnv) {
    221    if(cnv->extraInfo!=nullptr) {
    222        if(!cnv->isExtraLocal) {
    223            uprv_free(cnv->extraInfo);
    224        }
    225        cnv->extraInfo=nullptr;
    226    }
    227 }
    228 
    229 /* SCSU-to-Unicode conversion functions ------------------------------------- */
    230 
    231 static void U_CALLCONV
    232 _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
    233                          UErrorCode *pErrorCode) {
    234    UConverter *cnv;
    235    SCSUData *scsu;
    236    const uint8_t *source, *sourceLimit;
    237    char16_t *target;
    238    const char16_t *targetLimit;
    239    int32_t *offsets;
    240    UBool isSingleByteMode;
    241    uint8_t state, byteOne;
    242    int8_t quoteWindow, dynamicWindow;
    243 
    244    int32_t sourceIndex, nextSourceIndex;
    245 
    246    uint8_t b;
    247 
    248    /* set up the local pointers */
    249    cnv=pArgs->converter;
    250    scsu=(SCSUData *)cnv->extraInfo;
    251 
    252    source=(const uint8_t *)pArgs->source;
    253    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    254    target=pArgs->target;
    255    targetLimit=pArgs->targetLimit;
    256    offsets=pArgs->offsets;
    257 
    258    /* get the state machine state */
    259    isSingleByteMode=scsu->toUIsSingleByteMode;
    260    state=scsu->toUState;
    261    quoteWindow=scsu->toUQuoteWindow;
    262    dynamicWindow=scsu->toUDynamicWindow;
    263    byteOne=scsu->toUByteOne;
    264 
    265    /* sourceIndex=-1 if the current character began in the previous buffer */
    266    sourceIndex=state==readCommand ? 0 : -1;
    267    nextSourceIndex=0;
    268 
    269    /*
    270     * conversion "loop"
    271     *
    272     * For performance, this is not a normal C loop.
    273     * Instead, there are two code blocks for the two SCSU modes.
    274     * The function branches to either one, and a change of the mode is done with a goto to
    275     * the other branch.
    276     *
    277     * Each branch has two conventional loops:
    278     * - a fast-path loop for the most common codes in the mode
    279     * - a loop for all other codes in the mode
    280     * When the fast-path runs into a code that it cannot handle, its loop ends and it
    281     * runs into the following loop to handle the other codes.
    282     * The end of the input or output buffer is also handled by the slower loop.
    283     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
    284     *
    285     * The callback handling is done by returning with an error code.
    286     * The conversion framework actually calls the callback function.
    287     */
    288    if(isSingleByteMode) {
    289        /* fast path for single-byte mode */
    290        if(state==readCommand) {
    291 fastSingle:
    292            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
    293                ++source;
    294                ++nextSourceIndex;
    295                if(b<=0x7f) {
    296                    /* write US-ASCII graphic character or DEL */
    297                    *target++=(char16_t)b;
    298                    if(offsets!=nullptr) {
    299                        *offsets++=sourceIndex;
    300                    }
    301                } else {
    302                    /* write from dynamic window */
    303                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
    304                    if(c<=0xffff) {
    305                        *target++=(char16_t)c;
    306                        if(offsets!=nullptr) {
    307                            *offsets++=sourceIndex;
    308                        }
    309                    } else {
    310                        /* output surrogate pair */
    311                        *target++=(char16_t)(0xd7c0+(c>>10));
    312                        if(target<targetLimit) {
    313                            *target++=(char16_t)(0xdc00|(c&0x3ff));
    314                            if(offsets!=nullptr) {
    315                                *offsets++=sourceIndex;
    316                                *offsets++=sourceIndex;
    317                            }
    318                        } else {
    319                            /* target overflow */
    320                            if(offsets!=nullptr) {
    321                                *offsets++=sourceIndex;
    322                            }
    323                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
    324                            cnv->UCharErrorBufferLength=1;
    325                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    326                            goto endloop;
    327                        }
    328                    }
    329                }
    330                sourceIndex=nextSourceIndex;
    331            }
    332        }
    333 
    334        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
    335 singleByteMode:
    336        while(source<sourceLimit) {
    337            if(target>=targetLimit) {
    338                /* target is full */
    339                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    340                break;
    341            }
    342            b=*source++;
    343            ++nextSourceIndex;
    344            switch(state) {
    345            case readCommand:
    346                /* redundant conditions are commented out */
    347                /* here: b<0x20 because otherwise we would be in fastSingle */
    348                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
    349                    /* CR/LF/TAB/NUL */
    350                    *target++=(char16_t)b;
    351                    if(offsets!=nullptr) {
    352                        *offsets++=sourceIndex;
    353                    }
    354                    sourceIndex=nextSourceIndex;
    355                    goto fastSingle;
    356                } else if(SC0<=b) {
    357                    if(b<=SC7) {
    358                        dynamicWindow=(int8_t)(b-SC0);
    359                        sourceIndex=nextSourceIndex;
    360                        goto fastSingle;
    361                    } else /* if(SD0<=b && b<=SD7) */ {
    362                        dynamicWindow=(int8_t)(b-SD0);
    363                        state=defineOne;
    364                    }
    365                } else if(/* SQ0<=b && */ b<=SQ7) {
    366                    quoteWindow=(int8_t)(b-SQ0);
    367                    state=quoteOne;
    368                } else if(b==SDX) {
    369                    state=definePairOne;
    370                } else if(b==SQU) {
    371                    state=quotePairOne;
    372                } else if(b==SCU) {
    373                    sourceIndex=nextSourceIndex;
    374                    isSingleByteMode=false;
    375                    goto fastUnicode;
    376                } else /* Srs */ {
    377                    /* callback(illegal) */
    378                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    379                    cnv->toUBytes[0]=b;
    380                    cnv->toULength=1;
    381                    goto endloop;
    382                }
    383 
    384                /* store the first byte of a multibyte sequence in toUBytes[] */
    385                cnv->toUBytes[0]=b;
    386                cnv->toULength=1;
    387                break;
    388            case quotePairOne:
    389                byteOne=b;
    390                cnv->toUBytes[1]=b;
    391                cnv->toULength=2;
    392                state=quotePairTwo;
    393                break;
    394            case quotePairTwo:
    395                *target++=(char16_t)((byteOne<<8)|b);
    396                if(offsets!=nullptr) {
    397                    *offsets++=sourceIndex;
    398                }
    399                sourceIndex=nextSourceIndex;
    400                state=readCommand;
    401                goto fastSingle;
    402            case quoteOne:
    403                if(b<0x80) {
    404                    /* all static offsets are in the BMP */
    405                    *target++=(char16_t)(staticOffsets[quoteWindow]+b);
    406                    if(offsets!=nullptr) {
    407                        *offsets++=sourceIndex;
    408                    }
    409                } else {
    410                    /* write from dynamic window */
    411                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
    412                    if(c<=0xffff) {
    413                        *target++=(char16_t)c;
    414                        if(offsets!=nullptr) {
    415                            *offsets++=sourceIndex;
    416                        }
    417                    } else {
    418                        /* output surrogate pair */
    419                        *target++=(char16_t)(0xd7c0+(c>>10));
    420                        if(target<targetLimit) {
    421                            *target++=(char16_t)(0xdc00|(c&0x3ff));
    422                            if(offsets!=nullptr) {
    423                                *offsets++=sourceIndex;
    424                                *offsets++=sourceIndex;
    425                            }
    426                        } else {
    427                            /* target overflow */
    428                            if(offsets!=nullptr) {
    429                                *offsets++=sourceIndex;
    430                            }
    431                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
    432                            cnv->UCharErrorBufferLength=1;
    433                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    434                            goto endloop;
    435                        }
    436                    }
    437                }
    438                sourceIndex=nextSourceIndex;
    439                state=readCommand;
    440                goto fastSingle;
    441            case definePairOne:
    442                dynamicWindow=(int8_t)((b>>5)&7);
    443                byteOne=(uint8_t)(b&0x1f);
    444                cnv->toUBytes[1]=b;
    445                cnv->toULength=2;
    446                state=definePairTwo;
    447                break;
    448            case definePairTwo:
    449                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
    450                sourceIndex=nextSourceIndex;
    451                state=readCommand;
    452                goto fastSingle;
    453            case defineOne:
    454                if(b==0) {
    455                    /* callback(illegal): Reserved window offset value 0 */
    456                    cnv->toUBytes[1]=b;
    457                    cnv->toULength=2;
    458                    goto endloop;
    459                } else if(b<gapThreshold) {
    460                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
    461                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
    462                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
    463                } else if(b>=fixedThreshold) {
    464                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
    465                } else {
    466                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
    467                    cnv->toUBytes[1]=b;
    468                    cnv->toULength=2;
    469                    goto endloop;
    470                }
    471                sourceIndex=nextSourceIndex;
    472                state=readCommand;
    473                goto fastSingle;
    474            }
    475        }
    476    } else {
    477        /* fast path for Unicode mode */
    478        if(state==readCommand) {
    479 fastUnicode:
    480            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
    481                *target++=(char16_t)((b<<8)|source[1]);
    482                if(offsets!=nullptr) {
    483                    *offsets++=sourceIndex;
    484                }
    485                sourceIndex=nextSourceIndex;
    486                nextSourceIndex+=2;
    487                source+=2;
    488            }
    489        }
    490 
    491        /* normal state machine for Unicode mode */
    492 /* unicodeByteMode: */
    493        while(source<sourceLimit) {
    494            if(target>=targetLimit) {
    495                /* target is full */
    496                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    497                break;
    498            }
    499            b=*source++;
    500            ++nextSourceIndex;
    501            switch(state) {
    502            case readCommand:
    503                if((uint8_t)(b-UC0)>(Urs-UC0)) {
    504                    byteOne=b;
    505                    cnv->toUBytes[0]=b;
    506                    cnv->toULength=1;
    507                    state=quotePairTwo;
    508                } else if(/* UC0<=b && */ b<=UC7) {
    509                    dynamicWindow=(int8_t)(b-UC0);
    510                    sourceIndex=nextSourceIndex;
    511                    isSingleByteMode=true;
    512                    goto fastSingle;
    513                } else if(/* UD0<=b && */ b<=UD7) {
    514                    dynamicWindow=(int8_t)(b-UD0);
    515                    isSingleByteMode=true;
    516                    cnv->toUBytes[0]=b;
    517                    cnv->toULength=1;
    518                    state=defineOne;
    519                    goto singleByteMode;
    520                } else if(b==UDX) {
    521                    isSingleByteMode=true;
    522                    cnv->toUBytes[0]=b;
    523                    cnv->toULength=1;
    524                    state=definePairOne;
    525                    goto singleByteMode;
    526                } else if(b==UQU) {
    527                    cnv->toUBytes[0]=b;
    528                    cnv->toULength=1;
    529                    state=quotePairOne;
    530                } else /* Urs */ {
    531                    /* callback(illegal) */
    532                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    533                    cnv->toUBytes[0]=b;
    534                    cnv->toULength=1;
    535                    goto endloop;
    536                }
    537                break;
    538            case quotePairOne:
    539                byteOne=b;
    540                cnv->toUBytes[1]=b;
    541                cnv->toULength=2;
    542                state=quotePairTwo;
    543                break;
    544            case quotePairTwo:
    545                *target++=(char16_t)((byteOne<<8)|b);
    546                if(offsets!=nullptr) {
    547                    *offsets++=sourceIndex;
    548                }
    549                sourceIndex=nextSourceIndex;
    550                state=readCommand;
    551                goto fastUnicode;
    552            }
    553        }
    554    }
    555 endloop:
    556 
    557    /* set the converter state back into UConverter */
    558    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
    559        /* reset to deal with the next character */
    560        state=readCommand;
    561    } else if(state==readCommand) {
    562        /* not in a multi-byte sequence, reset toULength */
    563        cnv->toULength=0;
    564    }
    565    scsu->toUIsSingleByteMode=isSingleByteMode;
    566    scsu->toUState=state;
    567    scsu->toUQuoteWindow=quoteWindow;
    568    scsu->toUDynamicWindow=dynamicWindow;
    569    scsu->toUByteOne=byteOne;
    570 
    571    /* write back the updated pointers */
    572    pArgs->source=(const char *)source;
    573    pArgs->target=target;
    574    pArgs->offsets=offsets;
    575 }
    576 
    577 /*
    578 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
    579 * If a change is made in the original function, then either
    580 * change this function the same way or
    581 * re-copy the original function and remove the variables
    582 * offsets, sourceIndex, and nextSourceIndex.
    583 */
    584 static void U_CALLCONV
    585 _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
    586               UErrorCode *pErrorCode) {
    587    UConverter *cnv;
    588    SCSUData *scsu;
    589    const uint8_t *source, *sourceLimit;
    590    char16_t *target;
    591    const char16_t *targetLimit;
    592    UBool isSingleByteMode;
    593    uint8_t state, byteOne;
    594    int8_t quoteWindow, dynamicWindow;
    595 
    596    uint8_t b;
    597 
    598    /* set up the local pointers */
    599    cnv=pArgs->converter;
    600    scsu=(SCSUData *)cnv->extraInfo;
    601 
    602    source=(const uint8_t *)pArgs->source;
    603    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    604    target=pArgs->target;
    605    targetLimit=pArgs->targetLimit;
    606 
    607    /* get the state machine state */
    608    isSingleByteMode=scsu->toUIsSingleByteMode;
    609    state=scsu->toUState;
    610    quoteWindow=scsu->toUQuoteWindow;
    611    dynamicWindow=scsu->toUDynamicWindow;
    612    byteOne=scsu->toUByteOne;
    613 
    614    /*
    615     * conversion "loop"
    616     *
    617     * For performance, this is not a normal C loop.
    618     * Instead, there are two code blocks for the two SCSU modes.
    619     * The function branches to either one, and a change of the mode is done with a goto to
    620     * the other branch.
    621     *
    622     * Each branch has two conventional loops:
    623     * - a fast-path loop for the most common codes in the mode
    624     * - a loop for all other codes in the mode
    625     * When the fast-path runs into a code that it cannot handle, its loop ends and it
    626     * runs into the following loop to handle the other codes.
    627     * The end of the input or output buffer is also handled by the slower loop.
    628     * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
    629     *
    630     * The callback handling is done by returning with an error code.
    631     * The conversion framework actually calls the callback function.
    632     */
    633    if(isSingleByteMode) {
    634        /* fast path for single-byte mode */
    635        if(state==readCommand) {
    636 fastSingle:
    637            while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
    638                ++source;
    639                if(b<=0x7f) {
    640                    /* write US-ASCII graphic character or DEL */
    641                    *target++=(char16_t)b;
    642                } else {
    643                    /* write from dynamic window */
    644                    uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
    645                    if(c<=0xffff) {
    646                        *target++=(char16_t)c;
    647                    } else {
    648                        /* output surrogate pair */
    649                        *target++=(char16_t)(0xd7c0+(c>>10));
    650                        if(target<targetLimit) {
    651                            *target++=(char16_t)(0xdc00|(c&0x3ff));
    652                        } else {
    653                            /* target overflow */
    654                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
    655                            cnv->UCharErrorBufferLength=1;
    656                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    657                            goto endloop;
    658                        }
    659                    }
    660                }
    661            }
    662        }
    663 
    664        /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
    665 singleByteMode:
    666        while(source<sourceLimit) {
    667            if(target>=targetLimit) {
    668                /* target is full */
    669                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    670                break;
    671            }
    672            b=*source++;
    673            switch(state) {
    674            case readCommand:
    675                /* redundant conditions are commented out */
    676                /* here: b<0x20 because otherwise we would be in fastSingle */
    677                if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
    678                    /* CR/LF/TAB/NUL */
    679                    *target++=(char16_t)b;
    680                    goto fastSingle;
    681                } else if(SC0<=b) {
    682                    if(b<=SC7) {
    683                        dynamicWindow=(int8_t)(b-SC0);
    684                        goto fastSingle;
    685                    } else /* if(SD0<=b && b<=SD7) */ {
    686                        dynamicWindow=(int8_t)(b-SD0);
    687                        state=defineOne;
    688                    }
    689                } else if(/* SQ0<=b && */ b<=SQ7) {
    690                    quoteWindow=(int8_t)(b-SQ0);
    691                    state=quoteOne;
    692                } else if(b==SDX) {
    693                    state=definePairOne;
    694                } else if(b==SQU) {
    695                    state=quotePairOne;
    696                } else if(b==SCU) {
    697                    isSingleByteMode=false;
    698                    goto fastUnicode;
    699                } else /* Srs */ {
    700                    /* callback(illegal) */
    701                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    702                    cnv->toUBytes[0]=b;
    703                    cnv->toULength=1;
    704                    goto endloop;
    705                }
    706 
    707                /* store the first byte of a multibyte sequence in toUBytes[] */
    708                cnv->toUBytes[0]=b;
    709                cnv->toULength=1;
    710                break;
    711            case quotePairOne:
    712                byteOne=b;
    713                cnv->toUBytes[1]=b;
    714                cnv->toULength=2;
    715                state=quotePairTwo;
    716                break;
    717            case quotePairTwo:
    718                *target++=(char16_t)((byteOne<<8)|b);
    719                state=readCommand;
    720                goto fastSingle;
    721            case quoteOne:
    722                if(b<0x80) {
    723                    /* all static offsets are in the BMP */
    724                    *target++=(char16_t)(staticOffsets[quoteWindow]+b);
    725                } else {
    726                    /* write from dynamic window */
    727                    uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
    728                    if(c<=0xffff) {
    729                        *target++=(char16_t)c;
    730                    } else {
    731                        /* output surrogate pair */
    732                        *target++=(char16_t)(0xd7c0+(c>>10));
    733                        if(target<targetLimit) {
    734                            *target++=(char16_t)(0xdc00|(c&0x3ff));
    735                        } else {
    736                            /* target overflow */
    737                            cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
    738                            cnv->UCharErrorBufferLength=1;
    739                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    740                            goto endloop;
    741                        }
    742                    }
    743                }
    744                state=readCommand;
    745                goto fastSingle;
    746            case definePairOne:
    747                dynamicWindow=(int8_t)((b>>5)&7);
    748                byteOne=(uint8_t)(b&0x1f);
    749                cnv->toUBytes[1]=b;
    750                cnv->toULength=2;
    751                state=definePairTwo;
    752                break;
    753            case definePairTwo:
    754                scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
    755                state=readCommand;
    756                goto fastSingle;
    757            case defineOne:
    758                if(b==0) {
    759                    /* callback(illegal): Reserved window offset value 0 */
    760                    cnv->toUBytes[1]=b;
    761                    cnv->toULength=2;
    762                    goto endloop;
    763                } else if(b<gapThreshold) {
    764                    scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
    765                } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
    766                    scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
    767                } else if(b>=fixedThreshold) {
    768                    scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
    769                } else {
    770                    /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
    771                    cnv->toUBytes[1]=b;
    772                    cnv->toULength=2;
    773                    goto endloop;
    774                }
    775                state=readCommand;
    776                goto fastSingle;
    777            }
    778        }
    779    } else {
    780        /* fast path for Unicode mode */
    781        if(state==readCommand) {
    782 fastUnicode:
    783            while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
    784                *target++=(char16_t)((b<<8)|source[1]);
    785                source+=2;
    786            }
    787        }
    788 
    789        /* normal state machine for Unicode mode */
    790 /* unicodeByteMode: */
    791        while(source<sourceLimit) {
    792            if(target>=targetLimit) {
    793                /* target is full */
    794                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    795                break;
    796            }
    797            b=*source++;
    798            switch(state) {
    799            case readCommand:
    800                if((uint8_t)(b-UC0)>(Urs-UC0)) {
    801                    byteOne=b;
    802                    cnv->toUBytes[0]=b;
    803                    cnv->toULength=1;
    804                    state=quotePairTwo;
    805                } else if(/* UC0<=b && */ b<=UC7) {
    806                    dynamicWindow=(int8_t)(b-UC0);
    807                    isSingleByteMode=true;
    808                    goto fastSingle;
    809                } else if(/* UD0<=b && */ b<=UD7) {
    810                    dynamicWindow=(int8_t)(b-UD0);
    811                    isSingleByteMode=true;
    812                    cnv->toUBytes[0]=b;
    813                    cnv->toULength=1;
    814                    state=defineOne;
    815                    goto singleByteMode;
    816                } else if(b==UDX) {
    817                    isSingleByteMode=true;
    818                    cnv->toUBytes[0]=b;
    819                    cnv->toULength=1;
    820                    state=definePairOne;
    821                    goto singleByteMode;
    822                } else if(b==UQU) {
    823                    cnv->toUBytes[0]=b;
    824                    cnv->toULength=1;
    825                    state=quotePairOne;
    826                } else /* Urs */ {
    827                    /* callback(illegal) */
    828                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
    829                    cnv->toUBytes[0]=b;
    830                    cnv->toULength=1;
    831                    goto endloop;
    832                }
    833                break;
    834            case quotePairOne:
    835                byteOne=b;
    836                cnv->toUBytes[1]=b;
    837                cnv->toULength=2;
    838                state=quotePairTwo;
    839                break;
    840            case quotePairTwo:
    841                *target++=(char16_t)((byteOne<<8)|b);
    842                state=readCommand;
    843                goto fastUnicode;
    844            }
    845        }
    846    }
    847 endloop:
    848 
    849    /* set the converter state back into UConverter */
    850    if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
    851        /* reset to deal with the next character */
    852        state=readCommand;
    853    } else if(state==readCommand) {
    854        /* not in a multi-byte sequence, reset toULength */
    855        cnv->toULength=0;
    856    }
    857    scsu->toUIsSingleByteMode=isSingleByteMode;
    858    scsu->toUState=state;
    859    scsu->toUQuoteWindow=quoteWindow;
    860    scsu->toUDynamicWindow=dynamicWindow;
    861    scsu->toUByteOne=byteOne;
    862 
    863    /* write back the updated pointers */
    864    pArgs->source=(const char *)source;
    865    pArgs->target=target;
    866 }
    867 U_CDECL_END
    868 /* SCSU-from-Unicode conversion functions ----------------------------------- */
    869 
    870 /*
    871 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
    872 * reasonable results. The lookahead is minimal.
    873 * Many cases are simple:
    874 * A character fits directly into the current mode, a dynamic or static window,
    875 * or is not compressible. These cases are tested first.
    876 * Real compression heuristics are applied to the rest, in code branches for
    877 * single/Unicode mode and BMP/supplementary code points.
    878 * The heuristics used here are extremely simple.
    879 */
    880 
    881 /* get the number of the window that this character is in, or -1 */
    882 static int8_t
    883 getWindow(const uint32_t offsets[8], uint32_t c) {
    884    int i;
    885    for(i=0; i<8; ++i) {
    886        if (c - offsets[i] <= 0x7f) {
    887            return static_cast<int8_t>(i);
    888        }
    889    }
    890    return -1;
    891 }
    892 
    893 /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
    894 static UBool
    895 isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
    896    return c<=offset+0x7f &&
    897          (c>=offset || (c<=0x7f &&
    898                        (c>=0x20 || (1UL<<c)&0x2601)));
    899                                /* binary 0010 0110 0000 0001,
    900                                   check for b==0xd || b==0xa || b==9 || b==0 */
    901 }
    902 
    903 /*
    904 * getNextDynamicWindow returns the next dynamic window to be redefined
    905 */
    906 static int8_t
    907 getNextDynamicWindow(SCSUData *scsu) {
    908    int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
    909    if(++scsu->nextWindowUseIndex==8) {
    910        scsu->nextWindowUseIndex=0;
    911    }
    912    return window;
    913 }
    914 
    915 /*
    916 * useDynamicWindow() adjusts
    917 * windowUse[] and nextWindowUseIndex for the algorithm to choose
    918 * the next dynamic window to be defined;
    919 * a subclass may override it and provide its own algorithm.
    920 */
    921 static void
    922 useDynamicWindow(SCSUData *scsu, int8_t window) {
    923    /*
    924     * move the existing window, which just became the most recently used one,
    925     * up in windowUse[] to nextWindowUseIndex-1
    926     */
    927 
    928    /* first, find the index of the window - backwards to favor the more recently used windows */
    929    int i, j;
    930 
    931    i=scsu->nextWindowUseIndex;
    932    do {
    933        if(--i<0) {
    934            i=7;
    935        }
    936    } while(scsu->windowUse[i]!=window);
    937 
    938    /* now copy each windowUse[i+1] to [i] */
    939    j=i+1;
    940    if(j==8) {
    941        j=0;
    942    }
    943    while(j!=scsu->nextWindowUseIndex) {
    944        scsu->windowUse[i]=scsu->windowUse[j];
    945        i=j;
    946        if(++j==8) { j=0; }
    947    }
    948 
    949    /* finally, set the window into the most recently used index */
    950    scsu->windowUse[i]=window;
    951 }
    952 
    953 /*
    954 * calculate the offset and the code for a dynamic window that contains the character
    955 * takes fixed offsets into account
    956 * the offset of the window is stored in the offset variable,
    957 * the code is returned
    958 *
    959 * return offset code: -1 none  <=0xff code for SDn/UDn  else code for SDX/UDX, subtract 0x200 to get the true code
    960 */
    961 static int
    962 getDynamicOffset(uint32_t c, uint32_t *pOffset) {
    963    int i;
    964 
    965    for(i=0; i<7; ++i) {
    966        if (c - fixedOffsets[i] <= 0x7f) {
    967            *pOffset=fixedOffsets[i];
    968            return 0xf9+i;
    969        }
    970    }
    971 
    972    if(c<0x80) {
    973        /* No dynamic window for US-ASCII. */
    974        return -1;
    975    } else if(c<0x3400 ||
    976              c - 0x10000 < 0x14000 - 0x10000 ||
    977              c - 0x1d000 <= 0x1ffff - 0x1d000
    978    ) {
    979        /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
    980        *pOffset=c&0x7fffff80;
    981        return static_cast<int>(c >> 7);
    982    } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
    983        /* For these characters we need to take the gapOffset into account. */
    984        *pOffset=c&0x7fffff80;
    985        return static_cast<int>((c - gapOffset) >> 7);
    986    } else {
    987        return -1;
    988    }
    989 }
    990 U_CDECL_BEGIN
    991 /*
    992 * Idea for compression:
    993 *  - save SCSUData and other state before really starting work
    994 *  - at endloop, see if compression could be better with just unicode mode
    995 *  - don't do this if a callback has been called
    996 *  - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
    997 *  - different buffer handling!
    998 *
    999 * Drawback or need for corrective handling:
   1000 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
   1001 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
   1002 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
   1003 *
   1004 * How to achieve both?
   1005 *  - Only replace the result after an SDX or SCU?
   1006 */
   1007 
   1008 static void U_CALLCONV
   1009 _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
   1010                            UErrorCode *pErrorCode) {
   1011    UConverter *cnv;
   1012    SCSUData *scsu;
   1013    const char16_t *source, *sourceLimit;
   1014    uint8_t *target;
   1015    int32_t targetCapacity;
   1016    int32_t *offsets;
   1017 
   1018    UBool isSingleByteMode;
   1019    uint8_t dynamicWindow;
   1020    uint32_t currentOffset;
   1021 
   1022    uint32_t c, delta;
   1023 
   1024    int32_t sourceIndex, nextSourceIndex;
   1025 
   1026    int32_t length;
   1027 
   1028    /* variables for compression heuristics */
   1029    uint32_t offset;
   1030    char16_t lead, trail;
   1031    int code;
   1032    int8_t window;
   1033 
   1034    /* set up the local pointers */
   1035    cnv=pArgs->converter;
   1036    scsu=(SCSUData *)cnv->extraInfo;
   1037 
   1038    /* set up the local pointers */
   1039    source=pArgs->source;
   1040    sourceLimit=pArgs->sourceLimit;
   1041    target=(uint8_t *)pArgs->target;
   1042    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
   1043    offsets=pArgs->offsets;
   1044 
   1045    /* get the state machine state */
   1046    isSingleByteMode=scsu->fromUIsSingleByteMode;
   1047    dynamicWindow=scsu->fromUDynamicWindow;
   1048    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1049 
   1050    c=cnv->fromUChar32;
   1051 
   1052    /* sourceIndex=-1 if the current character began in the previous buffer */
   1053    sourceIndex= c==0 ? 0 : -1;
   1054    nextSourceIndex=0;
   1055 
   1056    /* similar conversion "loop" as in toUnicode */
   1057 loop:
   1058    if(isSingleByteMode) {
   1059        if(c!=0 && targetCapacity>0) {
   1060            goto getTrailSingle;
   1061        }
   1062 
   1063        /* state machine for single-byte mode */
   1064 /* singleByteMode: */
   1065        while(source<sourceLimit) {
   1066            if(targetCapacity<=0) {
   1067                /* target is full */
   1068                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1069                break;
   1070            }
   1071            c=*source++;
   1072            ++nextSourceIndex;
   1073 
   1074            if((c-0x20)<=0x5f) {
   1075                /* pass US-ASCII graphic character through */
   1076                *target++=(uint8_t)c;
   1077                if(offsets!=nullptr) {
   1078                    *offsets++=sourceIndex;
   1079                }
   1080                --targetCapacity;
   1081            } else if(c<0x20) {
   1082                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1083                    /* CR/LF/TAB/NUL */
   1084                    *target++=(uint8_t)c;
   1085                    if(offsets!=nullptr) {
   1086                        *offsets++=sourceIndex;
   1087                    }
   1088                    --targetCapacity;
   1089                } else {
   1090                    /* quote C0 control character */
   1091                    c|=SQ0<<8;
   1092                    length=2;
   1093                    goto outputBytes;
   1094                }
   1095            } else if((delta=c-currentOffset)<=0x7f) {
   1096                /* use the current dynamic window */
   1097                *target++=(uint8_t)(delta|0x80);
   1098                if(offsets!=nullptr) {
   1099                    *offsets++=sourceIndex;
   1100                }
   1101                --targetCapacity;
   1102            } else if(U16_IS_SURROGATE(c)) {
   1103                if(U16_IS_SURROGATE_LEAD(c)) {
   1104 getTrailSingle:
   1105                    lead=(char16_t)c;
   1106                    if(source<sourceLimit) {
   1107                        /* test the following code unit */
   1108                        trail=*source;
   1109                        if(U16_IS_TRAIL(trail)) {
   1110                            ++source;
   1111                            ++nextSourceIndex;
   1112                            c=U16_GET_SUPPLEMENTARY(c, trail);
   1113                            /* convert this surrogate code point */
   1114                            /* exit this condition tree */
   1115                        } else {
   1116                            /* this is an unmatched lead code unit (1st surrogate) */
   1117                            /* callback(illegal) */
   1118                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1119                            goto endloop;
   1120                        }
   1121                    } else {
   1122                        /* no more input */
   1123                        break;
   1124                    }
   1125                } else {
   1126                    /* this is an unmatched trail code unit (2nd surrogate) */
   1127                    /* callback(illegal) */
   1128                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1129                    goto endloop;
   1130                }
   1131 
   1132                /* compress supplementary character U+10000..U+10ffff */
   1133                if((delta=c-currentOffset)<=0x7f) {
   1134                    /* use the current dynamic window */
   1135                    *target++=(uint8_t)(delta|0x80);
   1136                    if(offsets!=nullptr) {
   1137                        *offsets++=sourceIndex;
   1138                    }
   1139                    --targetCapacity;
   1140                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1141                    /* there is a dynamic window that contains this character, change to it */
   1142                    dynamicWindow=window;
   1143                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1144                    useDynamicWindow(scsu, dynamicWindow);
   1145                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1146                    length=2;
   1147                    goto outputBytes;
   1148                } else if((code=getDynamicOffset(c, &offset))>=0) {
   1149                    /* might check if there are more characters in this window to come */
   1150                    /* define an extended window with this character */
   1151                    code-=0x200;
   1152                    dynamicWindow=getNextDynamicWindow(scsu);
   1153                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1154                    useDynamicWindow(scsu, dynamicWindow);
   1155                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1156                    length=4;
   1157                    goto outputBytes;
   1158                } else {
   1159                    /* change to Unicode mode and output this (lead, trail) pair */
   1160                    isSingleByteMode=false;
   1161                    *target++=(uint8_t)SCU;
   1162                    if(offsets!=nullptr) {
   1163                        *offsets++=sourceIndex;
   1164                    }
   1165                    --targetCapacity;
   1166                    c=((uint32_t)lead<<16)|trail;
   1167                    length=4;
   1168                    goto outputBytes;
   1169                }
   1170            } else if(c<0xa0) {
   1171                /* quote C1 control character */
   1172                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
   1173                length=2;
   1174                goto outputBytes;
   1175            } else if(c==0xfeff || c>=0xfff0) {
   1176                /* quote signature character=byte order mark and specials */
   1177                c|=SQU<<16;
   1178                length=3;
   1179                goto outputBytes;
   1180            } else {
   1181                /* compress all other BMP characters */
   1182                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1183                    /* there is a window defined that contains this character - switch to it or quote from it? */
   1184                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
   1185                        /* change to dynamic window */
   1186                        dynamicWindow=window;
   1187                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1188                        useDynamicWindow(scsu, dynamicWindow);
   1189                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1190                        length=2;
   1191                        goto outputBytes;
   1192                    } else {
   1193                        /* quote from dynamic window */
   1194                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
   1195                        length=2;
   1196                        goto outputBytes;
   1197                    }
   1198                } else if((window=getWindow(staticOffsets, c))>=0) {
   1199                    /* quote from static window */
   1200                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
   1201                    length=2;
   1202                    goto outputBytes;
   1203                } else if((code=getDynamicOffset(c, &offset))>=0) {
   1204                    /* define a dynamic window with this character */
   1205                    dynamicWindow=getNextDynamicWindow(scsu);
   1206                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1207                    useDynamicWindow(scsu, dynamicWindow);
   1208                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1209                    length=3;
   1210                    goto outputBytes;
   1211                } else if ((c - 0x3400) < (0xd800 - 0x3400) &&
   1212                           (source >= sourceLimit || (uint32_t)(*source - 0x3400) < (0xd800 - 0x3400))
   1213                ) {
   1214                    /*
   1215                     * this character is not compressible (a BMP ideograph or similar);
   1216                     * switch to Unicode mode if this is the last character in the block
   1217                     * or there is at least one more ideograph following immediately
   1218                     */
   1219                    isSingleByteMode=false;
   1220                    c|=SCU<<16;
   1221                    length=3;
   1222                    goto outputBytes;
   1223                } else {
   1224                    /* quote Unicode */
   1225                    c|=SQU<<16;
   1226                    length=3;
   1227                    goto outputBytes;
   1228                }
   1229            }
   1230 
   1231            /* normal end of conversion: prepare for a new character */
   1232            c=0;
   1233            sourceIndex=nextSourceIndex;
   1234        }
   1235    } else {
   1236        if(c!=0 && targetCapacity>0) {
   1237            goto getTrailUnicode;
   1238        }
   1239 
   1240        /* state machine for Unicode mode */
   1241 /* unicodeByteMode: */
   1242        while(source<sourceLimit) {
   1243            if(targetCapacity<=0) {
   1244                /* target is full */
   1245                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1246                break;
   1247            }
   1248            c=*source++;
   1249            ++nextSourceIndex;
   1250 
   1251            if ((c - 0x3400) < (0xd800 - 0x3400)) {
   1252                /* not compressible, write character directly */
   1253                if(targetCapacity>=2) {
   1254                    *target++=(uint8_t)(c>>8);
   1255                    *target++=(uint8_t)c;
   1256                    if(offsets!=nullptr) {
   1257                        *offsets++=sourceIndex;
   1258                        *offsets++=sourceIndex;
   1259                    }
   1260                    targetCapacity-=2;
   1261                } else {
   1262                    length=2;
   1263                    goto outputBytes;
   1264                }
   1265            } else if (c - 0x3400 >= 0xf300 - 0x3400 /* c<0x3400 || c>=0xf300 */) {
   1266                /* compress BMP character if the following one is not an uncompressible ideograph */
   1267                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
   1268                    if (c - 0x30 < 10 || c - 0x61 < 26 || c - 0x41 < 26) {
   1269                        /* ASCII digit or letter */
   1270                        isSingleByteMode=true;
   1271                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
   1272                        length=2;
   1273                        goto outputBytes;
   1274                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1275                        /* there is a dynamic window that contains this character, change to it */
   1276                        isSingleByteMode=true;
   1277                        dynamicWindow=window;
   1278                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1279                        useDynamicWindow(scsu, dynamicWindow);
   1280                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1281                        length=2;
   1282                        goto outputBytes;
   1283                    } else if((code=getDynamicOffset(c, &offset))>=0) {
   1284                        /* define a dynamic window with this character */
   1285                        isSingleByteMode=true;
   1286                        dynamicWindow=getNextDynamicWindow(scsu);
   1287                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1288                        useDynamicWindow(scsu, dynamicWindow);
   1289                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1290                        length=3;
   1291                        goto outputBytes;
   1292                    }
   1293                }
   1294 
   1295                /* don't know how to compress this character, just write it directly */
   1296                length=2;
   1297                goto outputBytes;
   1298            } else if(c<0xe000) {
   1299                /* c is a surrogate */
   1300                if(U16_IS_SURROGATE_LEAD(c)) {
   1301 getTrailUnicode:
   1302                    lead=(char16_t)c;
   1303                    if(source<sourceLimit) {
   1304                        /* test the following code unit */
   1305                        trail=*source;
   1306                        if(U16_IS_TRAIL(trail)) {
   1307                            ++source;
   1308                            ++nextSourceIndex;
   1309                            c=U16_GET_SUPPLEMENTARY(c, trail);
   1310                            /* convert this surrogate code point */
   1311                            /* exit this condition tree */
   1312                        } else {
   1313                            /* this is an unmatched lead code unit (1st surrogate) */
   1314                            /* callback(illegal) */
   1315                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1316                            goto endloop;
   1317                        }
   1318                    } else {
   1319                        /* no more input */
   1320                        break;
   1321                    }
   1322                } else {
   1323                    /* this is an unmatched trail code unit (2nd surrogate) */
   1324                    /* callback(illegal) */
   1325                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1326                    goto endloop;
   1327                }
   1328 
   1329                /* compress supplementary character */
   1330                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
   1331                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1332                ) {
   1333                    /*
   1334                     * there is a dynamic window that contains this character and
   1335                     * the following character is not uncompressible,
   1336                     * change to the window
   1337                     */
   1338                    isSingleByteMode=true;
   1339                    dynamicWindow=window;
   1340                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1341                    useDynamicWindow(scsu, dynamicWindow);
   1342                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1343                    length=2;
   1344                    goto outputBytes;
   1345                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
   1346                          (code=getDynamicOffset(c, &offset))>=0
   1347                ) {
   1348                    /* two supplementary characters in (probably) the same window - define an extended one */
   1349                    isSingleByteMode=true;
   1350                    code-=0x200;
   1351                    dynamicWindow=getNextDynamicWindow(scsu);
   1352                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1353                    useDynamicWindow(scsu, dynamicWindow);
   1354                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1355                    length=4;
   1356                    goto outputBytes;
   1357                } else {
   1358                    /* don't know how to compress this character, just write it directly */
   1359                    c=((uint32_t)lead<<16)|trail;
   1360                    length=4;
   1361                    goto outputBytes;
   1362                }
   1363            } else /* 0xe000<=c<0xf300 */ {
   1364                /* quote to avoid SCSU tags */
   1365                c|=UQU<<16;
   1366                length=3;
   1367                goto outputBytes;
   1368            }
   1369 
   1370            /* normal end of conversion: prepare for a new character */
   1371            c=0;
   1372            sourceIndex=nextSourceIndex;
   1373        }
   1374    }
   1375 endloop:
   1376 
   1377    /* set the converter state back into UConverter */
   1378    scsu->fromUIsSingleByteMode=isSingleByteMode;
   1379    scsu->fromUDynamicWindow=dynamicWindow;
   1380 
   1381    cnv->fromUChar32=c;
   1382 
   1383    /* write back the updated pointers */
   1384    pArgs->source=source;
   1385    pArgs->target=(char *)target;
   1386    pArgs->offsets=offsets;
   1387    return;
   1388 
   1389 outputBytes:
   1390    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
   1391    /* from the first if in the loop we know that targetCapacity>0 */
   1392    if(length<=targetCapacity) {
   1393        if(offsets==nullptr) {
   1394            switch(length) {
   1395                /* each branch falls through to the next one */
   1396            case 4:
   1397                *target++=(uint8_t)(c>>24);
   1398                U_FALLTHROUGH;
   1399            case 3:
   1400                *target++=(uint8_t)(c>>16);
   1401                U_FALLTHROUGH;
   1402            case 2:
   1403                *target++=(uint8_t)(c>>8);
   1404                U_FALLTHROUGH;
   1405            case 1:
   1406                *target++=(uint8_t)c;
   1407                U_FALLTHROUGH;
   1408            default:
   1409                /* will never occur */
   1410                break;
   1411            }
   1412        } else {
   1413            switch(length) {
   1414                /* each branch falls through to the next one */
   1415            case 4:
   1416                *target++=(uint8_t)(c>>24);
   1417                *offsets++=sourceIndex;
   1418                U_FALLTHROUGH;
   1419            case 3:
   1420                *target++=(uint8_t)(c>>16);
   1421                *offsets++=sourceIndex;
   1422                U_FALLTHROUGH;
   1423            case 2:
   1424                *target++=(uint8_t)(c>>8);
   1425                *offsets++=sourceIndex;
   1426                U_FALLTHROUGH;
   1427            case 1:
   1428                *target++=(uint8_t)c;
   1429                *offsets++=sourceIndex;
   1430                U_FALLTHROUGH;
   1431            default:
   1432                /* will never occur */
   1433                break;
   1434            }
   1435        }
   1436        targetCapacity-=length;
   1437 
   1438        /* normal end of conversion: prepare for a new character */
   1439        c=0;
   1440        sourceIndex=nextSourceIndex;
   1441        goto loop;
   1442    } else {
   1443        uint8_t *p;
   1444 
   1445        /*
   1446         * We actually do this backwards here:
   1447         * In order to save an intermediate variable, we output
   1448         * first to the overflow buffer what does not fit into the
   1449         * regular target.
   1450         */
   1451        /* we know that 0<=targetCapacity<length<=4 */
   1452        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
   1453        length-=targetCapacity;
   1454        p=(uint8_t *)cnv->charErrorBuffer;
   1455        switch(length) {
   1456            /* each branch falls through to the next one */
   1457        case 4:
   1458            *p++=(uint8_t)(c>>24);
   1459            U_FALLTHROUGH;
   1460        case 3:
   1461            *p++=(uint8_t)(c>>16);
   1462            U_FALLTHROUGH;
   1463        case 2:
   1464            *p++=(uint8_t)(c>>8);
   1465            U_FALLTHROUGH;
   1466        case 1:
   1467            *p=(uint8_t)c;
   1468            U_FALLTHROUGH;
   1469        default:
   1470            /* will never occur */
   1471            break;
   1472        }
   1473        cnv->charErrorBufferLength=(int8_t)length;
   1474 
   1475        /* now output what fits into the regular target */
   1476        c>>=8*length; /* length was reduced by targetCapacity */
   1477        switch(targetCapacity) {
   1478            /* each branch falls through to the next one */
   1479        case 3:
   1480            *target++=(uint8_t)(c>>16);
   1481            if(offsets!=nullptr) {
   1482                *offsets++=sourceIndex;
   1483            }
   1484            U_FALLTHROUGH;
   1485        case 2:
   1486            *target++=(uint8_t)(c>>8);
   1487            if(offsets!=nullptr) {
   1488                *offsets++=sourceIndex;
   1489            }
   1490            U_FALLTHROUGH;
   1491        case 1:
   1492            *target++=(uint8_t)c;
   1493            if(offsets!=nullptr) {
   1494                *offsets++=sourceIndex;
   1495            }
   1496            U_FALLTHROUGH;
   1497        default:
   1498            break;
   1499        }
   1500 
   1501        /* target overflow */
   1502        targetCapacity=0;
   1503        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1504        c=0;
   1505        goto endloop;
   1506    }
   1507 }
   1508 
   1509 /*
   1510 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
   1511 * If a change is made in the original function, then either
   1512 * change this function the same way or
   1513 * re-copy the original function and remove the variables
   1514 * offsets, sourceIndex, and nextSourceIndex.
   1515 */
   1516 static void U_CALLCONV
   1517 _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
   1518                 UErrorCode *pErrorCode) {
   1519    UConverter *cnv;
   1520    SCSUData *scsu;
   1521    const char16_t *source, *sourceLimit;
   1522    uint8_t *target;
   1523    int32_t targetCapacity;
   1524 
   1525    UBool isSingleByteMode;
   1526    uint8_t dynamicWindow;
   1527    uint32_t currentOffset;
   1528 
   1529    uint32_t c, delta;
   1530 
   1531    int32_t length;
   1532 
   1533    /* variables for compression heuristics */
   1534    uint32_t offset;
   1535    char16_t lead, trail;
   1536    int code;
   1537    int8_t window;
   1538 
   1539    /* set up the local pointers */
   1540    cnv=pArgs->converter;
   1541    scsu=(SCSUData *)cnv->extraInfo;
   1542 
   1543    /* set up the local pointers */
   1544    source=pArgs->source;
   1545    sourceLimit=pArgs->sourceLimit;
   1546    target=(uint8_t *)pArgs->target;
   1547    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
   1548 
   1549    /* get the state machine state */
   1550    isSingleByteMode=scsu->fromUIsSingleByteMode;
   1551    dynamicWindow=scsu->fromUDynamicWindow;
   1552    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1553 
   1554    c=cnv->fromUChar32;
   1555 
   1556    /* similar conversion "loop" as in toUnicode */
   1557 loop:
   1558    if(isSingleByteMode) {
   1559        if(c!=0 && targetCapacity>0) {
   1560            goto getTrailSingle;
   1561        }
   1562 
   1563        /* state machine for single-byte mode */
   1564 /* singleByteMode: */
   1565        while(source<sourceLimit) {
   1566            if(targetCapacity<=0) {
   1567                /* target is full */
   1568                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1569                break;
   1570            }
   1571            c=*source++;
   1572 
   1573            if((c-0x20)<=0x5f) {
   1574                /* pass US-ASCII graphic character through */
   1575                *target++=(uint8_t)c;
   1576                --targetCapacity;
   1577            } else if(c<0x20) {
   1578                if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
   1579                    /* CR/LF/TAB/NUL */
   1580                    *target++=(uint8_t)c;
   1581                    --targetCapacity;
   1582                } else {
   1583                    /* quote C0 control character */
   1584                    c|=SQ0<<8;
   1585                    length=2;
   1586                    goto outputBytes;
   1587                }
   1588            } else if((delta=c-currentOffset)<=0x7f) {
   1589                /* use the current dynamic window */
   1590                *target++=(uint8_t)(delta|0x80);
   1591                --targetCapacity;
   1592            } else if(U16_IS_SURROGATE(c)) {
   1593                if(U16_IS_SURROGATE_LEAD(c)) {
   1594 getTrailSingle:
   1595                    lead=(char16_t)c;
   1596                    if(source<sourceLimit) {
   1597                        /* test the following code unit */
   1598                        trail=*source;
   1599                        if(U16_IS_TRAIL(trail)) {
   1600                            ++source;
   1601                            c=U16_GET_SUPPLEMENTARY(c, trail);
   1602                            /* convert this surrogate code point */
   1603                            /* exit this condition tree */
   1604                        } else {
   1605                            /* this is an unmatched lead code unit (1st surrogate) */
   1606                            /* callback(illegal) */
   1607                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1608                            goto endloop;
   1609                        }
   1610                    } else {
   1611                        /* no more input */
   1612                        break;
   1613                    }
   1614                } else {
   1615                    /* this is an unmatched trail code unit (2nd surrogate) */
   1616                    /* callback(illegal) */
   1617                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1618                    goto endloop;
   1619                }
   1620 
   1621                /* compress supplementary character U+10000..U+10ffff */
   1622                if((delta=c-currentOffset)<=0x7f) {
   1623                    /* use the current dynamic window */
   1624                    *target++=(uint8_t)(delta|0x80);
   1625                    --targetCapacity;
   1626                } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1627                    /* there is a dynamic window that contains this character, change to it */
   1628                    dynamicWindow=window;
   1629                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1630                    useDynamicWindow(scsu, dynamicWindow);
   1631                    c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1632                    length=2;
   1633                    goto outputBytes;
   1634                } else if((code=getDynamicOffset(c, &offset))>=0) {
   1635                    /* might check if there are more characters in this window to come */
   1636                    /* define an extended window with this character */
   1637                    code-=0x200;
   1638                    dynamicWindow=getNextDynamicWindow(scsu);
   1639                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1640                    useDynamicWindow(scsu, dynamicWindow);
   1641                    c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1642                    length=4;
   1643                    goto outputBytes;
   1644                } else {
   1645                    /* change to Unicode mode and output this (lead, trail) pair */
   1646                    isSingleByteMode=false;
   1647                    *target++=(uint8_t)SCU;
   1648                    --targetCapacity;
   1649                    c=((uint32_t)lead<<16)|trail;
   1650                    length=4;
   1651                    goto outputBytes;
   1652                }
   1653            } else if(c<0xa0) {
   1654                /* quote C1 control character */
   1655                c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
   1656                length=2;
   1657                goto outputBytes;
   1658            } else if(c==0xfeff || c>=0xfff0) {
   1659                /* quote signature character=byte order mark and specials */
   1660                c|=SQU<<16;
   1661                length=3;
   1662                goto outputBytes;
   1663            } else {
   1664                /* compress all other BMP characters */
   1665                if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1666                    /* there is a window defined that contains this character - switch to it or quote from it? */
   1667                    if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
   1668                        /* change to dynamic window */
   1669                        dynamicWindow=window;
   1670                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1671                        useDynamicWindow(scsu, dynamicWindow);
   1672                        c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1673                        length=2;
   1674                        goto outputBytes;
   1675                    } else {
   1676                        /* quote from dynamic window */
   1677                        c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
   1678                        length=2;
   1679                        goto outputBytes;
   1680                    }
   1681                } else if((window=getWindow(staticOffsets, c))>=0) {
   1682                    /* quote from static window */
   1683                    c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
   1684                    length=2;
   1685                    goto outputBytes;
   1686                } else if((code=getDynamicOffset(c, &offset))>=0) {
   1687                    /* define a dynamic window with this character */
   1688                    dynamicWindow=getNextDynamicWindow(scsu);
   1689                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1690                    useDynamicWindow(scsu, dynamicWindow);
   1691                    c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1692                    length=3;
   1693                    goto outputBytes;
   1694                } else if (c - 0x3400 < 0xd800 - 0x3400 &&
   1695                           (source >= sourceLimit || static_cast<uint32_t>(*source - 0x3400) < 0xd800 - 0x3400)
   1696                ) {
   1697                    /*
   1698                     * this character is not compressible (a BMP ideograph or similar);
   1699                     * switch to Unicode mode if this is the last character in the block
   1700                     * or there is at least one more ideograph following immediately
   1701                     */
   1702                    isSingleByteMode=false;
   1703                    c|=SCU<<16;
   1704                    length=3;
   1705                    goto outputBytes;
   1706                } else {
   1707                    /* quote Unicode */
   1708                    c|=SQU<<16;
   1709                    length=3;
   1710                    goto outputBytes;
   1711                }
   1712            }
   1713 
   1714            /* normal end of conversion: prepare for a new character */
   1715            c=0;
   1716        }
   1717    } else {
   1718        if(c!=0 && targetCapacity>0) {
   1719            goto getTrailUnicode;
   1720        }
   1721 
   1722        /* state machine for Unicode mode */
   1723 /* unicodeByteMode: */
   1724        while(source<sourceLimit) {
   1725            if(targetCapacity<=0) {
   1726                /* target is full */
   1727                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1728                break;
   1729            }
   1730            c=*source++;
   1731 
   1732            if (c - 0x3400 < 0xd800 - 0x3400) {
   1733                /* not compressible, write character directly */
   1734                if(targetCapacity>=2) {
   1735                    *target++=(uint8_t)(c>>8);
   1736                    *target++=(uint8_t)c;
   1737                    targetCapacity-=2;
   1738                } else {
   1739                    length=2;
   1740                    goto outputBytes;
   1741                }
   1742            } else if (c - 0x3400 >= 0xf300 - 0x3400 /* c<0x3400 || c>=0xf300 */) {
   1743                /* compress BMP character if the following one is not an uncompressible ideograph */
   1744                if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
   1745                    if (c - 0x30 < 10 || c - 0x61 < 26 || c - 0x41 < 26) {
   1746                        /* ASCII digit or letter */
   1747                        isSingleByteMode=true;
   1748                        c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
   1749                        length=2;
   1750                        goto outputBytes;
   1751                    } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
   1752                        /* there is a dynamic window that contains this character, change to it */
   1753                        isSingleByteMode=true;
   1754                        dynamicWindow=window;
   1755                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1756                        useDynamicWindow(scsu, dynamicWindow);
   1757                        c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1758                        length=2;
   1759                        goto outputBytes;
   1760                    } else if((code=getDynamicOffset(c, &offset))>=0) {
   1761                        /* define a dynamic window with this character */
   1762                        isSingleByteMode=true;
   1763                        dynamicWindow=getNextDynamicWindow(scsu);
   1764                        currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1765                        useDynamicWindow(scsu, dynamicWindow);
   1766                        c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1767                        length=3;
   1768                        goto outputBytes;
   1769                    }
   1770                }
   1771 
   1772                /* don't know how to compress this character, just write it directly */
   1773                length=2;
   1774                goto outputBytes;
   1775            } else if(c<0xe000) {
   1776                /* c is a surrogate */
   1777                if(U16_IS_SURROGATE_LEAD(c)) {
   1778 getTrailUnicode:
   1779                    lead=(char16_t)c;
   1780                    if(source<sourceLimit) {
   1781                        /* test the following code unit */
   1782                        trail=*source;
   1783                        if(U16_IS_TRAIL(trail)) {
   1784                            ++source;
   1785                            c=U16_GET_SUPPLEMENTARY(c, trail);
   1786                            /* convert this surrogate code point */
   1787                            /* exit this condition tree */
   1788                        } else {
   1789                            /* this is an unmatched lead code unit (1st surrogate) */
   1790                            /* callback(illegal) */
   1791                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1792                            goto endloop;
   1793                        }
   1794                    } else {
   1795                        /* no more input */
   1796                        break;
   1797                    }
   1798                } else {
   1799                    /* this is an unmatched trail code unit (2nd surrogate) */
   1800                    /* callback(illegal) */
   1801                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
   1802                    goto endloop;
   1803                }
   1804 
   1805                /* compress supplementary character */
   1806                if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
   1807                    !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
   1808                ) {
   1809                    /*
   1810                     * there is a dynamic window that contains this character and
   1811                     * the following character is not uncompressible,
   1812                     * change to the window
   1813                     */
   1814                    isSingleByteMode=true;
   1815                    dynamicWindow=window;
   1816                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
   1817                    useDynamicWindow(scsu, dynamicWindow);
   1818                    c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
   1819                    length=2;
   1820                    goto outputBytes;
   1821                } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
   1822                          (code=getDynamicOffset(c, &offset))>=0
   1823                ) {
   1824                    /* two supplementary characters in (probably) the same window - define an extended one */
   1825                    isSingleByteMode=true;
   1826                    code-=0x200;
   1827                    dynamicWindow=getNextDynamicWindow(scsu);
   1828                    currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
   1829                    useDynamicWindow(scsu, dynamicWindow);
   1830                    c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
   1831                    length=4;
   1832                    goto outputBytes;
   1833                } else {
   1834                    /* don't know how to compress this character, just write it directly */
   1835                    c=((uint32_t)lead<<16)|trail;
   1836                    length=4;
   1837                    goto outputBytes;
   1838                }
   1839            } else /* 0xe000<=c<0xf300 */ {
   1840                /* quote to avoid SCSU tags */
   1841                c|=UQU<<16;
   1842                length=3;
   1843                goto outputBytes;
   1844            }
   1845 
   1846            /* normal end of conversion: prepare for a new character */
   1847            c=0;
   1848        }
   1849    }
   1850 endloop:
   1851 
   1852    /* set the converter state back into UConverter */
   1853    scsu->fromUIsSingleByteMode=isSingleByteMode;
   1854    scsu->fromUDynamicWindow=dynamicWindow;
   1855 
   1856    cnv->fromUChar32=c;
   1857 
   1858    /* write back the updated pointers */
   1859    pArgs->source=source;
   1860    pArgs->target=(char *)target;
   1861    return;
   1862 
   1863 outputBytes:
   1864    /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
   1865    /* from the first if in the loop we know that targetCapacity>0 */
   1866    if(length<=targetCapacity) {
   1867        switch(length) {
   1868            /* each branch falls through to the next one */
   1869        case 4:
   1870            *target++=(uint8_t)(c>>24);
   1871            U_FALLTHROUGH;
   1872        case 3:
   1873            *target++=(uint8_t)(c>>16);
   1874            U_FALLTHROUGH;
   1875        case 2:
   1876            *target++=(uint8_t)(c>>8);
   1877            U_FALLTHROUGH;
   1878        case 1:
   1879            *target++=(uint8_t)c;
   1880            U_FALLTHROUGH;
   1881        default:
   1882            /* will never occur */
   1883            break;
   1884        }
   1885        targetCapacity-=length;
   1886 
   1887        /* normal end of conversion: prepare for a new character */
   1888        c=0;
   1889        goto loop;
   1890    } else {
   1891        uint8_t *p;
   1892 
   1893        /*
   1894         * We actually do this backwards here:
   1895         * In order to save an intermediate variable, we output
   1896         * first to the overflow buffer what does not fit into the
   1897         * regular target.
   1898         */
   1899        /* we know that 0<=targetCapacity<length<=4 */
   1900        /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
   1901        length-=targetCapacity;
   1902        p=(uint8_t *)cnv->charErrorBuffer;
   1903        switch(length) {
   1904            /* each branch falls through to the next one */
   1905        case 4:
   1906            *p++=(uint8_t)(c>>24);
   1907            U_FALLTHROUGH;
   1908        case 3:
   1909            *p++=(uint8_t)(c>>16);
   1910            U_FALLTHROUGH;
   1911        case 2:
   1912            *p++=(uint8_t)(c>>8);
   1913            U_FALLTHROUGH;
   1914        case 1:
   1915            *p=(uint8_t)c;
   1916            U_FALLTHROUGH;
   1917        default:
   1918            /* will never occur */
   1919            break;
   1920        }
   1921        cnv->charErrorBufferLength=(int8_t)length;
   1922 
   1923        /* now output what fits into the regular target */
   1924        c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */
   1925        switch(targetCapacity) {
   1926            /* each branch falls through to the next one */
   1927        case 3:
   1928            *target++=(uint8_t)(c>>16);
   1929            U_FALLTHROUGH;
   1930        case 2:
   1931            *target++=(uint8_t)(c>>8);
   1932            U_FALLTHROUGH;
   1933        case 1:
   1934            *target++=(uint8_t)c;
   1935            U_FALLTHROUGH;
   1936        default:
   1937            break;
   1938        }
   1939 
   1940        /* target overflow */
   1941        targetCapacity=0;
   1942        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
   1943        c=0;
   1944        goto endloop;
   1945    }
   1946 }
   1947 
   1948 /* miscellaneous ------------------------------------------------------------ */
   1949 
   1950 static const char *  U_CALLCONV
   1951 _SCSUGetName(const UConverter *cnv) {
   1952    SCSUData *scsu=(SCSUData *)cnv->extraInfo;
   1953 
   1954    switch(scsu->locale) {
   1955    case l_ja:
   1956        return "SCSU,locale=ja";
   1957    default:
   1958        return "SCSU";
   1959    }
   1960 }
   1961 
   1962 /* structure for SafeClone calculations */
   1963 struct cloneSCSUStruct
   1964 {
   1965    UConverter cnv;
   1966    SCSUData mydata;
   1967 };
   1968 
   1969 static UConverter *  U_CALLCONV
   1970 _SCSUSafeClone(const UConverter *cnv, 
   1971               void *stackBuffer, 
   1972               int32_t *pBufferSize, 
   1973               UErrorCode *status)
   1974 {
   1975    struct cloneSCSUStruct * localClone;
   1976    int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
   1977 
   1978    if (U_FAILURE(*status)){
   1979        return nullptr;
   1980    }
   1981 
   1982    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
   1983        *pBufferSize = bufferSizeNeeded;
   1984        return nullptr;
   1985    }
   1986 
   1987    localClone = (struct cloneSCSUStruct *)stackBuffer;
   1988    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   1989 
   1990    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
   1991    localClone->cnv.extraInfo = &localClone->mydata;
   1992    localClone->cnv.isExtraLocal = true;
   1993 
   1994    return &localClone->cnv;
   1995 }
   1996 U_CDECL_END
   1997 
   1998 static const UConverterImpl _SCSUImpl={
   1999    UCNV_SCSU,
   2000 
   2001    nullptr,
   2002    nullptr,
   2003 
   2004    _SCSUOpen,
   2005    _SCSUClose,
   2006    _SCSUReset,
   2007 
   2008    _SCSUToUnicode,
   2009    _SCSUToUnicodeWithOffsets,
   2010    _SCSUFromUnicode,
   2011    _SCSUFromUnicodeWithOffsets,
   2012    nullptr,
   2013 
   2014    nullptr,
   2015    _SCSUGetName,
   2016    nullptr,
   2017    _SCSUSafeClone,
   2018    ucnv_getCompleteUnicodeSet,
   2019    nullptr,
   2020    nullptr
   2021 };
   2022 
   2023 static const UConverterStaticData _SCSUStaticData={
   2024    sizeof(UConverterStaticData),
   2025    "SCSU",
   2026    1212, /* CCSID for SCSU */
   2027    UCNV_IBM, UCNV_SCSU,
   2028    1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */
   2029    /*
   2030     * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
   2031     * substitution string.
   2032     */
   2033    { 0x0e, 0xff, 0xfd, 0 }, 3,
   2034    false, false,
   2035    0,
   2036    0,
   2037    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   2038 };
   2039 
   2040 const UConverterSharedData _SCSUData=
   2041        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
   2042 
   2043 #endif