[ tor-browser ].git.dasho

ucnv2022.cpp (159706B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2000-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   file name:  ucnv2022.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2000feb03
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Change history:
     17 *
     18 *   06/29/2000  helena  Major rewrite of the callback APIs.
     19 *   08/08/2000  Ram     Included support for ISO-2022-JP-2
     20 *                       Changed implementation of toUnicode
     21 *                       function
     22 *   08/21/2000  Ram     Added support for ISO-2022-KR
     23 *   08/29/2000  Ram     Seperated implementation of EBCDIC to
     24 *                       ucnvebdc.c
     25 *   09/20/2000  Ram     Added support for ISO-2022-CN
     26 *                       Added implementations for getNextUChar()
     27 *                       for specific 2022 country variants.
     28 *   10/31/2000  Ram     Implemented offsets logic functions
     29 */
     30 
     31 #include "unicode/utypes.h"
     32 
     33 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
     34 
     35 #include "unicode/ucnv.h"
     36 #include "unicode/uset.h"
     37 #include "unicode/ucnv_err.h"
     38 #include "unicode/ucnv_cb.h"
     39 #include "unicode/utf16.h"
     40 #include "ucnv_imp.h"
     41 #include "ucnv_bld.h"
     42 #include "ucnv_cnv.h"
     43 #include "ucnvmbcs.h"
     44 #include "cstring.h"
     45 #include "cmemory.h"
     46 #include "uassert.h"
     47 
     48 #ifdef U_ENABLE_GENERIC_ISO_2022
     49 /*
     50 * I am disabling the generic ISO-2022 converter after proposing to do so on
     51 * the icu mailing list two days ago.
     52 *
     53 * Reasons:
     54 * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of
     55 *    its designation sequences, single shifts with return to the previous state,
     56 *    switch-with-no-return to UTF-16BE or similar, etc.
     57 *    This is unlike the language-specific variants like ISO-2022-JP which
     58 *    require a much smaller repertoire of ISO-2022 features.
     59 *    These variants continue to be supported.
     60 * 2. I believe that no one is really using the generic ISO-2022 converter
     61 *    but rather always one of the language-specific variants.
     62 *    Note that ICU's generic ISO-2022 converter has always output one escape
     63 *    sequence followed by UTF-8 for the whole stream.
     64 * 3. Switching between subcharsets is extremely slow, because each time
     65 *    the previous converter is closed and a new one opened,
     66 *    without any kind of caching, least-recently-used list, etc.
     67 * 4. The code is currently buggy, and given the above it does not seem
     68 *    reasonable to spend the time on maintenance.
     69 * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.
     70 *    This means, for example, that when ISO-8859-7 is designated, the following
     71 *    ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.
     72 *    The ICU ISO-2022 converter does not handle this - and has no information
     73 *    about which subconverter would have to be shifted vs. which is designed
     74 *    for 7-bit ISO-2022.
     75 *
     76 * Markus Scherer 2003-dec-03
     77 */
     78 #endif
     79 
     80 #if !UCONFIG_ONLY_HTML_CONVERSION
     81 static const char SHIFT_IN_STR[]  = "\x0F";
     82 // static const char SHIFT_OUT_STR[] = "\x0E";
     83 #endif
     84 
     85 #define CR      0x0D
     86 #define LF      0x0A
     87 #define H_TAB   0x09
     88 #define V_TAB   0x0B
     89 #define SPACE   0x20
     90 
     91 enum {
     92    HWKANA_START=0xff61,
     93    HWKANA_END=0xff9f
     94 };
     95 
     96 /*
     97 * 94-character sets with native byte values A1..FE are encoded in ISO 2022
     98 * as bytes 21..7E. (Subtract 0x80.)
     99 * 96-character sets with native byte values A0..FF are encoded in ISO 2022
    100 * as bytes 20..7F. (Subtract 0x80.)
    101 * Do not encode C1 control codes with native bytes 80..9F
    102 * as bytes 00..1F (C0 control codes).
    103 */
    104 enum {
    105    GR94_START=0xa1,
    106    GR94_END=0xfe,
    107    GR96_START=0xa0,
    108    GR96_END=0xff
    109 };
    110 
    111 /*
    112 * ISO 2022 control codes must not be converted from Unicode
    113 * because they would mess up the byte stream.
    114 * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
    115 * corresponding to SO, SI, and ESC.
    116 */
    117 #define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)
    118 
    119 /* for ISO-2022-JP and -CN implementations */
    120 typedef enum  {
    121        /* shared values */
    122        INVALID_STATE=-1,
    123        ASCII = 0,
    124 
    125        SS2_STATE=0x10,
    126        SS3_STATE,
    127 
    128        /* JP */
    129        ISO8859_1 = 1 ,
    130        ISO8859_7 = 2 ,
    131        JISX201  = 3,
    132        JISX208 = 4,
    133        JISX212 = 5,
    134        GB2312  =6,
    135        KSC5601 =7,
    136        HWKANA_7BIT=8,    /* Halfwidth Katakana 7 bit */
    137 
    138        /* CN */
    139        /* the first few enum constants must keep their values because they correspond to myConverterArray[] */
    140        GB2312_1=1,
    141        ISO_IR_165=2,
    142        CNS_11643=3,
    143 
    144        /*
    145         * these are used in StateEnum and ISO2022State variables,
    146         * but CNS_11643 must be used to index into myConverterArray[]
    147         */
    148        CNS_11643_0=0x20,
    149        CNS_11643_1,
    150        CNS_11643_2,
    151        CNS_11643_3,
    152        CNS_11643_4,
    153        CNS_11643_5,
    154        CNS_11643_6,
    155        CNS_11643_7
    156 } StateEnum;
    157 
    158 /* is the StateEnum charset value for a DBCS charset? */
    159 #if UCONFIG_ONLY_HTML_CONVERSION
    160 #define IS_JP_DBCS(cs) (JISX208==(cs))
    161 #else
    162 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
    163 #endif
    164 
    165 #define CSM(cs) ((uint16_t)1<<(cs))
    166 
    167 /*
    168 * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
    169 * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x
    170 *
    171 * Note: The converter uses some leniency:
    172 * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
    173 *   all versions, not just JIS7 and JIS8.
    174 * - ICU does not distinguish between different versions of JIS X 0208.
    175 */
    176 #if UCONFIG_ONLY_HTML_CONVERSION
    177 enum { MAX_JA_VERSION=0 };
    178 #else
    179 enum { MAX_JA_VERSION=4 };
    180 #endif
    181 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
    182    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
    183 #if !UCONFIG_ONLY_HTML_CONVERSION
    184    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
    185    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    186    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
    187    CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
    188 #endif
    189 };
    190 
    191 typedef enum {
    192        ASCII1=0,
    193        LATIN1,
    194        SBCS,
    195        DBCS,
    196        MBCS,
    197        HWKANA
    198 }Cnv2022Type;
    199 
    200 typedef struct ISO2022State {
    201    int8_t cs[4];       /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
    202    int8_t g;           /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
    203    int8_t prevG;       /* g before single shift (SS2 or SS3) */
    204 } ISO2022State;
    205 
    206 #define UCNV_OPTIONS_VERSION_MASK 0xf
    207 #define UCNV_2022_MAX_CONVERTERS 10
    208 
    209 typedef struct{
    210    UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
    211    UConverter *currentConverter;
    212    Cnv2022Type currentType;
    213    ISO2022State toU2022State, fromU2022State;
    214    uint32_t key;
    215    uint32_t version;
    216 #ifdef U_ENABLE_GENERIC_ISO_2022
    217    UBool isFirstBuffer;
    218 #endif
    219    UBool isEmptySegment;
    220    char name[30];
    221    char locale[3];
    222 }UConverterDataISO2022;
    223 
    224 /* Protos */
    225 /* ISO-2022 ----------------------------------------------------------------- */
    226 
    227 /*Forward declaration */
    228 U_CFUNC void U_CALLCONV
    229 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
    230                      UErrorCode * err);
    231 U_CFUNC void U_CALLCONV
    232 ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,
    233                                    UErrorCode * err);
    234 
    235 #define ESC_2022 0x1B /*ESC*/
    236 
    237 typedef enum
    238 {
    239        INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/
    240        VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/
    241        VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/
    242        VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/
    243 } UCNV_TableStates_2022;
    244 
    245 /*
    246 * The way these state transition arrays work is:
    247 * ex : ESC$B is the sequence for JISX208
    248 *      a) First Iteration: char is ESC
    249 *          i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index
    250 *             int x = normalize_esq_chars_2022[27] which is equal to 1
    251 *         ii) Search for this value in escSeqStateTable_Key_2022[]
    252 *             value of x is stored at escSeqStateTable_Key_2022[0]
    253 *        iii) Save this index as offset
    254 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    255 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    256 *     b) Switch on this state and continue to next char
    257 *          i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index
    258 *             which is normalize_esq_chars_2022[36] == 4
    259 *         ii) x is currently 1(from above)
    260 *               x<<=5 -- x is now 32
    261 *               x+=normalize_esq_chars_2022[36]
    262 *               now x is 36
    263 *        iii) Search for this value in escSeqStateTable_Key_2022[]
    264 *             value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2
    265 *         iv) Get state of this sequence from escSeqStateTable_Value_2022[]
    266 *             escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022
    267 *     c) Switch on this state and continue to next char
    268 *        i)  Get the value of B from normalize_esq_chars_2022[] with int value of B as index
    269 *        ii) x is currently 36 (from above)
    270 *            x<<=5 -- x is now 1152
    271 *            x+=normalize_esq_chars_2022[66]
    272 *            now x is 1161
    273 *       iii) Search for this value in escSeqStateTable_Key_2022[]
    274 *            value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21
    275 *        iv) Get state of this sequence from escSeqStateTable_Value_2022[21]
    276 *            escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022
    277 *         v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208
    278 */
    279 
    280 
    281 /*Below are the 3 arrays depicting a state transition table*/
    282 static const int8_t normalize_esq_chars_2022[256] = {
    283 /*       0      1       2       3       4      5       6        7       8       9           */
    284 
    285         0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    286        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    287        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,1      ,0      ,0
    288        ,0     ,0      ,0      ,0      ,0      ,0      ,4      ,7      ,29      ,0
    289        ,2     ,24     ,26     ,27     ,0      ,3      ,23     ,6      ,0      ,0
    290        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    291        ,0     ,0      ,0      ,0      ,5      ,8      ,9      ,10     ,11     ,12
    292        ,13    ,14     ,15     ,16     ,17     ,18     ,19     ,20     ,25     ,28
    293        ,0     ,0      ,21     ,0      ,0      ,0      ,0      ,0      ,0      ,0
    294        ,22    ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    295        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    296        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    297        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    298        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    299        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    300        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    301        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    302        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    303        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    304        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    305        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    306        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    307        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    308        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    309        ,0     ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0      ,0
    310        ,0     ,0      ,0      ,0      ,0      ,0
    311 };
    312 
    313 #ifdef U_ENABLE_GENERIC_ISO_2022
    314 /*
    315 * When the generic ISO-2022 converter is completely removed, not just disabled
    316 * per #ifdef, then the following state table and the associated tables that are
    317 * dimensioned with MAX_STATES_2022 should be trimmed.
    318 *
    319 * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of
    320 * the associated escape sequences starting with ESC ( B should be removed.
    321 * This includes the ones with key values 1097 and all of the ones above 1000000.
    322 *
    323 * For the latter, the tables can simply be truncated.
    324 * For the former, since the tables must be kept parallel, it is probably best
    325 * to simply duplicate an adjacent table cell, parallel in all tables.
    326 *
    327 * It may make sense to restructure the tables, especially by using small search
    328 * tables for the variants instead of indexing them parallel to the table here.
    329 */
    330 #endif
    331 
    332 #define MAX_STATES_2022 74
    333 static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
    334 /*   0           1           2           3           4           5           6           7           8           9           */
    335 
    336     1          ,34         ,36         ,39         ,55         ,57         ,60         ,61         ,1093       ,1096
    337    ,1097       ,1098       ,1099       ,1100       ,1101       ,1102       ,1103       ,1104       ,1105       ,1106
    338    ,1109       ,1154       ,1157       ,1160       ,1161       ,1176       ,1178       ,1179       ,1254       ,1257
    339    ,1768       ,1773       ,1957       ,35105      ,36933      ,36936      ,36937      ,36938      ,36939      ,36940
    340    ,36942      ,36943      ,36944      ,36945      ,36946      ,36947      ,36948      ,37640      ,37642      ,37644
    341    ,37646      ,37711      ,37744      ,37745      ,37746      ,37747      ,37748      ,40133      ,40136      ,40138
    342    ,40139      ,40140      ,40141      ,1123363    ,35947624   ,35947625   ,35947626   ,35947627   ,35947629   ,35947630
    343    ,35947631   ,35947635   ,35947636   ,35947638
    344 };
    345 
    346 #ifdef U_ENABLE_GENERIC_ISO_2022
    347 
    348 static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {
    349 /*  0                      1                        2                      3                   4                   5                        6                      7                       8                       9    */
    350 
    351     nullptr                   ,nullptr                   ,nullptr                   ,nullptr               ,nullptr               ,nullptr                   ,nullptr                   ,nullptr                   ,"latin1"               ,"latin1"
    352    ,"latin1"               ,"ibm-865"              ,"ibm-865"              ,"ibm-865"          ,"ibm-865"          ,"ibm-865"              ,"ibm-865"              ,"JISX0201"             ,"JISX0201"             ,"latin1"
    353    ,"latin1"               ,nullptr                   ,"JISX-208"             ,"ibm-5478"         ,"JISX-208"         ,nullptr                   ,nullptr                   ,nullptr                   ,nullptr                   ,"UTF8"
    354    ,"ISO-8859-1"           ,"ISO-8859-7"           ,"JIS-X-208"            ,nullptr               ,"ibm-955"          ,"ibm-367"              ,"ibm-952"              ,"ibm-949"              ,"JISX-212"             ,"ibm-1383"
    355    ,"ibm-952"              ,"ibm-964"              ,"ibm-964"              ,"ibm-964"          ,"ibm-964"          ,"ibm-964"              ,"ibm-964"              ,"ibm-5478"         ,"ibm-949"              ,"ISO-IR-165"
    356    ,"CNS-11643-1992,1"     ,"CNS-11643-1992,2"     ,"CNS-11643-1992,3"     ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6"     ,"CNS-11643-1992,7"     ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"
    357    ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,nullptr               ,"latin1"           ,"ibm-912"              ,"ibm-913"              ,"ibm-914"              ,"ibm-813"              ,"ibm-1089"
    358    ,"ibm-920"              ,"ibm-915"              ,"ibm-915"              ,"latin1"
    359 };
    360 
    361 #endif
    362 
    363 static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
    364 /*          0                           1                         2                             3                           4                           5                               6                        7                          8                           9       */
    365     VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022     ,VALID_NON_TERMINAL_2022   ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    366    ,VALID_MAYBE_TERMINAL_2022  ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    367    ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022
    368    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    369    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    370    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    371    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_NON_TERMINAL_2022    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    372    ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022        ,VALID_TERMINAL_2022
    373 };
    374 
    375 /* Type def for refactoring changeState_2022 code*/
    376 typedef enum{
    377 #ifdef U_ENABLE_GENERIC_ISO_2022
    378    ISO_2022=0,
    379 #endif
    380    ISO_2022_JP=1,
    381 #if !UCONFIG_ONLY_HTML_CONVERSION
    382    ISO_2022_KR=2,
    383    ISO_2022_CN=3
    384 #endif
    385 } Variant2022;
    386 
    387 /*********** ISO 2022 Converter Protos ***********/
    388 static void U_CALLCONV
    389 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode);
    390 
    391 static void U_CALLCONV
    392 _ISO2022Close(UConverter *converter);
    393 
    394 static void U_CALLCONV
    395 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice);
    396 
    397 U_CDECL_BEGIN
    398 static const char * U_CALLCONV
    399 _ISO2022getName(const UConverter* cnv);
    400 U_CDECL_END
    401 
    402 static void  U_CALLCONV
    403 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);
    404 
    405 U_CDECL_BEGIN
    406 static UConverter * U_CALLCONV
    407 _ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);
    408 
    409 U_CDECL_END
    410 
    411 #ifdef U_ENABLE_GENERIC_ISO_2022
    412 static void U_CALLCONV
    413 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);
    414 #endif
    415 
    416 namespace {
    417 
    418 /*const UConverterSharedData _ISO2022Data;*/
    419 extern const UConverterSharedData _ISO2022JPData;
    420 
    421 #if !UCONFIG_ONLY_HTML_CONVERSION
    422 extern const UConverterSharedData _ISO2022KRData;
    423 extern const UConverterSharedData _ISO2022CNData;
    424 #endif
    425 
    426 }  // namespace
    427 
    428 /*************** Converter implementations ******************/
    429 
    430 /* The purpose of this function is to get around gcc compiler warnings. */
    431 static inline void
    432 fromUWriteUInt8(UConverter *cnv,
    433                 const char *bytes, int32_t length,
    434                 uint8_t **target, const char *targetLimit,
    435                 int32_t **offsets,
    436                 int32_t sourceIndex,
    437                 UErrorCode *pErrorCode)
    438 {
    439    char* targetChars = reinterpret_cast<char*>(*target);
    440    ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,
    441                         offsets, sourceIndex, pErrorCode);
    442    *target = reinterpret_cast<uint8_t*>(targetChars);
    443 
    444 }
    445 
    446 static inline void
    447 setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){
    448    if(myConverterData->version == 1) {
    449        UConverter *cnv = myConverterData->currentConverter;
    450 
    451        cnv->toUnicodeStatus=0;     /* offset */
    452        cnv->mode=0;                /* state */
    453        cnv->toULength=0;           /* byteIndex */
    454    }
    455 }
    456 
    457 static inline void
    458 setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){
    459   /* in ISO-2022-KR the designator sequence appears only once
    460    * in a file so we append it only once
    461    */
    462    if( converter->charErrorBufferLength==0){
    463 
    464        converter->charErrorBufferLength = 4;
    465        converter->charErrorBuffer[0] = 0x1b;
    466        converter->charErrorBuffer[1] = 0x24;
    467        converter->charErrorBuffer[2] = 0x29;
    468        converter->charErrorBuffer[3] = 0x43;
    469    }
    470    if(myConverterData->version == 1) {
    471        UConverter *cnv = myConverterData->currentConverter;
    472 
    473        cnv->fromUChar32=0;
    474        cnv->fromUnicodeStatus=1;   /* prevLength */
    475    }
    476 }
    477 
    478 static void U_CALLCONV
    479 _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
    480 
    481    char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'};
    482 
    483    cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
    484    if(cnv->extraInfo != nullptr) {
    485        UConverterNamePieces stackPieces;
    486        UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER;
    487        UConverterDataISO2022* myConverterData = static_cast<UConverterDataISO2022*>(cnv->extraInfo);
    488        uint32_t version;
    489 
    490        stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable;
    491 
    492        uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));
    493        myConverterData->currentType = ASCII1;
    494        cnv->fromUnicodeStatus =false;
    495        if(pArgs->locale){
    496            uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1);
    497        }
    498        version = pArgs->options & UCNV_OPTIONS_VERSION_MASK;
    499        myConverterData->version = version;
    500        if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
    501            (myLocale[2]=='_' || myLocale[2]=='\0'))
    502        {
    503            /* open the required converters and cache them */
    504            if(version>MAX_JA_VERSION) {
    505                // ICU 55 fails to open a converter for an unsupported version.
    506                // Previously, it fell back to version 0, but that would yield
    507                // unexpected behavior.
    508                *errorCode = U_MISSING_RESOURCE_ERROR;
    509                return;
    510            }
    511            if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
    512                myConverterData->myConverterArray[ISO8859_7] =
    513                    ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
    514            }
    515            myConverterData->myConverterArray[JISX208] =
    516                ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
    517            if(jpCharsetMasks[version]&CSM(JISX212)) {
    518                myConverterData->myConverterArray[JISX212] =
    519                    ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
    520            }
    521            if(jpCharsetMasks[version]&CSM(GB2312)) {
    522                myConverterData->myConverterArray[GB2312] =
    523                    ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
    524            }
    525            if(jpCharsetMasks[version]&CSM(KSC5601)) {
    526                myConverterData->myConverterArray[KSC5601] =
    527                    ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
    528            }
    529 
    530            /* set the function pointers to appropriate functions */
    531            cnv->sharedData = const_cast<UConverterSharedData*>(&_ISO2022JPData);
    532            uprv_strcpy(myConverterData->locale,"ja");
    533 
    534            (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
    535            size_t len = uprv_strlen(myConverterData->name);
    536            myConverterData->name[len] = static_cast<char>(myConverterData->version + static_cast<int>('0'));
    537            myConverterData->name[len+1]='\0';
    538        }
    539 #if !UCONFIG_ONLY_HTML_CONVERSION
    540        else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
    541            (myLocale[2]=='_' || myLocale[2]=='\0'))
    542        {
    543            if(version>1) {
    544                // ICU 55 fails to open a converter for an unsupported version.
    545                // Previously, it fell back to version 0, but that would yield
    546                // unexpected behavior.
    547                *errorCode = U_MISSING_RESOURCE_ERROR;
    548                return;
    549            }
    550            const char *cnvName;
    551            if(version==1) {
    552                cnvName="icu-internal-25546";
    553            } else {
    554                cnvName="ibm-949";
    555                myConverterData->version=version=0;
    556            }
    557            if(pArgs->onlyTestIsLoadable) {
    558                ucnv_canCreateConverter(cnvName, errorCode);  /* errorCode carries result */
    559                uprv_free(cnv->extraInfo);
    560                cnv->extraInfo=nullptr;
    561                return;
    562            } else {
    563                myConverterData->currentConverter=ucnv_open(cnvName, errorCode);
    564                if (U_FAILURE(*errorCode)) {
    565                    _ISO2022Close(cnv);
    566                    return;
    567                }
    568 
    569                if(version==1) {
    570                    (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");
    571                    uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);
    572                    cnv->subCharLen = myConverterData->currentConverter->subCharLen;
    573                }else{
    574                    (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");
    575                }
    576 
    577                /* initialize the state variables */
    578                setInitialStateToUnicodeKR(cnv, myConverterData);
    579                setInitialStateFromUnicodeKR(cnv, myConverterData);
    580 
    581                /* set the function pointers to appropriate functions */
    582                cnv->sharedData = const_cast<UConverterSharedData*>(&_ISO2022KRData);
    583                uprv_strcpy(myConverterData->locale,"ko");
    584            }
    585        }
    586        else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
    587            (myLocale[2]=='_' || myLocale[2]=='\0'))
    588        {
    589            if(version>2) {
    590                // ICU 55 fails to open a converter for an unsupported version.
    591                // Previously, it fell back to version 0, but that would yield
    592                // unexpected behavior.
    593                *errorCode = U_MISSING_RESOURCE_ERROR;
    594                return;
    595            }
    596 
    597            /* open the required converters and cache them */
    598            myConverterData->myConverterArray[GB2312_1] =
    599                ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
    600            if(version>=1) {
    601                myConverterData->myConverterArray[ISO_IR_165] =
    602                    ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
    603            }
    604            myConverterData->myConverterArray[CNS_11643] =
    605                ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
    606 
    607 
    608            /* set the function pointers to appropriate functions */
    609            cnv->sharedData = const_cast<UConverterSharedData*>(&_ISO2022CNData);
    610            uprv_strcpy(myConverterData->locale,"cn");
    611 
    612            if (version==0){
    613                myConverterData->version = 0;
    614                (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");
    615            }else if (version==1){
    616                myConverterData->version = 1;
    617                (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");
    618            }else {
    619                myConverterData->version = 2;
    620                (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
    621            }
    622        }
    623 #endif  // !UCONFIG_ONLY_HTML_CONVERSION
    624        else{
    625 #ifdef U_ENABLE_GENERIC_ISO_2022
    626            myConverterData->isFirstBuffer = true;
    627 
    628            /* append the UTF-8 escape sequence */
    629            cnv->charErrorBufferLength = 3;
    630            cnv->charErrorBuffer[0] = 0x1b;
    631            cnv->charErrorBuffer[1] = 0x25;
    632            cnv->charErrorBuffer[2] = 0x42;
    633 
    634            cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;
    635            /* initialize the state variables */
    636            uprv_strcpy(myConverterData->name,"ISO_2022");
    637 #else
    638            *errorCode = U_MISSING_RESOURCE_ERROR;
    639            // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
    640            // data loading error code.
    641            return;
    642 #endif
    643        }
    644 
    645        cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;
    646 
    647        if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) {
    648            _ISO2022Close(cnv);
    649        }
    650    } else {
    651        *errorCode = U_MEMORY_ALLOCATION_ERROR;
    652    }
    653 }
    654 
    655 
    656 static void U_CALLCONV
    657 _ISO2022Close(UConverter *converter) {
    658    UConverterDataISO2022* myData = static_cast<UConverterDataISO2022*>(converter->extraInfo);
    659    UConverterSharedData **array = myData->myConverterArray;
    660    int32_t i;
    661 
    662    if (converter->extraInfo != nullptr) {
    663        /*close the array of converter pointers and free the memory*/
    664        for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
    665            if(array[i]!=nullptr) {
    666                ucnv_unloadSharedDataIfReady(array[i]);
    667            }
    668        }
    669 
    670        ucnv_close(myData->currentConverter);
    671 
    672        if(!converter->isExtraLocal){
    673            uprv_free (converter->extraInfo);
    674            converter->extraInfo = nullptr;
    675        }
    676    }
    677 }
    678 
    679 static void U_CALLCONV
    680 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
    681    UConverterDataISO2022* myConverterData = static_cast<UConverterDataISO2022*>(converter->extraInfo);
    682    if(choice<=UCNV_RESET_TO_UNICODE) {
    683        uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
    684        myConverterData->key = 0;
    685        myConverterData->isEmptySegment = false;
    686    }
    687    if(choice!=UCNV_RESET_TO_UNICODE) {
    688        uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
    689    }
    690 #ifdef U_ENABLE_GENERIC_ISO_2022
    691    if(myConverterData->locale[0] == 0){
    692        if(choice<=UCNV_RESET_TO_UNICODE) {
    693            myConverterData->isFirstBuffer = true;
    694            myConverterData->key = 0;
    695            if (converter->mode == UCNV_SO){
    696                ucnv_close (myConverterData->currentConverter);
    697                myConverterData->currentConverter=nullptr;
    698            }
    699            converter->mode = UCNV_SI;
    700        }
    701        if(choice!=UCNV_RESET_TO_UNICODE) {
    702            /* re-append UTF-8 escape sequence */
    703            converter->charErrorBufferLength = 3;
    704            converter->charErrorBuffer[0] = 0x1b;
    705            converter->charErrorBuffer[1] = 0x28;
    706            converter->charErrorBuffer[2] = 0x42;
    707        }
    708    }
    709    else
    710 #endif
    711    {
    712        /* reset the state variables */
    713        if(myConverterData->locale[0] == 'k'){
    714            if(choice<=UCNV_RESET_TO_UNICODE) {
    715                setInitialStateToUnicodeKR(converter, myConverterData);
    716            }
    717            if(choice!=UCNV_RESET_TO_UNICODE) {
    718                setInitialStateFromUnicodeKR(converter, myConverterData);
    719            }
    720        }
    721    }
    722 }
    723 
    724 U_CDECL_BEGIN
    725 
    726 static const char * U_CALLCONV
    727 _ISO2022getName(const UConverter* cnv){
    728    if(cnv->extraInfo){
    729        UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;
    730        return myData->name;
    731    }
    732    return nullptr;
    733 }
    734 
    735 U_CDECL_END
    736 
    737 
    738 /*************** to unicode *******************/
    739 /****************************************************************************
    740 * Recognized escape sequences are
    741 * <ESC>(B  ASCII
    742 * <ESC>.A  ISO-8859-1
    743 * <ESC>.F  ISO-8859-7
    744 * <ESC>(J  JISX-201
    745 * <ESC>(I  JISX-201
    746 * <ESC>$B  JISX-208
    747 * <ESC>$@  JISX-208
    748 * <ESC>$(D JISX-212
    749 * <ESC>$A  GB2312
    750 * <ESC>$(C KSC5601
    751 */
    752 static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
    753 /*      0                1               2               3               4               5               6               7               8               9    */
    754    INVALID_STATE   ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    755    ,ASCII          ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,JISX201        ,HWKANA_7BIT    ,JISX201        ,INVALID_STATE
    756    ,INVALID_STATE  ,INVALID_STATE  ,JISX208        ,GB2312         ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    757    ,ISO8859_1      ,ISO8859_7      ,JISX208        ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,KSC5601        ,JISX212        ,INVALID_STATE
    758    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    759    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    760    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    761    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    762 };
    763 
    764 #if !UCONFIG_ONLY_HTML_CONVERSION
    765 /*************** to unicode *******************/
    766 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
    767 /*      0                1               2               3               4               5               6               7               8               9    */
    768     INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,SS2_STATE      ,SS3_STATE      ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    769    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    770    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    771    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    772    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,GB2312_1       ,INVALID_STATE  ,ISO_IR_165
    773    ,CNS_11643_1    ,CNS_11643_2    ,CNS_11643_3    ,CNS_11643_4    ,CNS_11643_5    ,CNS_11643_6    ,CNS_11643_7    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    774    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    775    ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE  ,INVALID_STATE
    776 };
    777 #endif
    778 
    779 
    780 static UCNV_TableStates_2022
    781 getKey_2022(char c,int32_t* key,int32_t* offset){
    782    int32_t togo;
    783    int32_t low = 0;
    784    int32_t hi = MAX_STATES_2022;
    785    int32_t oldmid=0;
    786 
    787    togo = normalize_esq_chars_2022[static_cast<uint8_t>(c)];
    788    if(togo == 0) {
    789        /* not a valid character anywhere in an escape sequence */
    790        *key = 0;
    791        *offset = 0;
    792        return INVALID_2022;
    793    }
    794    togo = (*key << 5) + togo;
    795 
    796    while (hi != low)  /*binary search*/{
    797 
    798        int32_t mid = (hi+low) >> 1; /*Finds median*/
    799 
    800        if (mid == oldmid)
    801            break;
    802 
    803        if (escSeqStateTable_Key_2022[mid] > togo){
    804            hi = mid;
    805        }
    806        else if (escSeqStateTable_Key_2022[mid] < togo){
    807            low = mid;
    808        }
    809        else /*we found it*/{
    810            *key = togo;
    811            *offset = mid;
    812            return static_cast<UCNV_TableStates_2022>(escSeqStateTable_Value_2022[mid]);
    813        }
    814        oldmid = mid;
    815 
    816    }
    817 
    818    *key = 0;
    819    *offset = 0;
    820    return INVALID_2022;
    821 }
    822 
    823 /*runs through a state machine to determine the escape sequence - codepage correspondence
    824 */
    825 static void
    826 changeState_2022(UConverter* _this,
    827                const char** source,
    828                const char* sourceLimit,
    829                Variant2022 var,
    830                UErrorCode* err){
    831    UCNV_TableStates_2022 value;
    832    UConverterDataISO2022* myData2022 = static_cast<UConverterDataISO2022*>(_this->extraInfo);
    833    uint32_t key = myData2022->key;
    834    int32_t offset = 0;
    835    int8_t initialToULength = _this->toULength;
    836    char c;
    837 
    838    value = VALID_NON_TERMINAL_2022;
    839    while (*source < sourceLimit) {
    840        c = *(*source)++;
    841        _this->toUBytes[_this->toULength++] = static_cast<uint8_t>(c);
    842        value = getKey_2022(c, reinterpret_cast<int32_t*>(&key), &offset);
    843 
    844        switch (value){
    845 
    846        case VALID_NON_TERMINAL_2022 :
    847            /* continue with the loop */
    848            break;
    849 
    850        case VALID_TERMINAL_2022:
    851            key = 0;
    852            goto DONE;
    853 
    854        case INVALID_2022:
    855            goto DONE;
    856 
    857        case VALID_MAYBE_TERMINAL_2022:
    858 #ifdef U_ENABLE_GENERIC_ISO_2022
    859            /* ESC ( B is ambiguous only for ISO_2022 itself */
    860            if(var == ISO_2022) {
    861                /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */
    862                _this->toULength = 0;
    863 
    864                /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */
    865 
    866                /* continue with the loop */
    867                value = VALID_NON_TERMINAL_2022;
    868                break;
    869            } else
    870 #endif
    871            {
    872                /* not ISO_2022 itself, finish here */
    873                value = VALID_TERMINAL_2022;
    874                key = 0;
    875                goto DONE;
    876            }
    877        }
    878    }
    879 
    880 DONE:
    881    myData2022->key = key;
    882 
    883    if (value == VALID_NON_TERMINAL_2022) {
    884        /* indicate that the escape sequence is incomplete: key!=0 */
    885        return;
    886    } else if (value == INVALID_2022 ) {
    887        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    888    } else /* value == VALID_TERMINAL_2022 */ {
    889        switch(var){
    890 #ifdef U_ENABLE_GENERIC_ISO_2022
    891        case ISO_2022:
    892        {
    893            const char *chosenConverterName = escSeqStateTable_Result_2022[offset];
    894            if(chosenConverterName == nullptr) {
    895                /* SS2 or SS3 */
    896                *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    897                _this->toUCallbackReason = UCNV_UNASSIGNED;
    898                return;
    899            }
    900 
    901            _this->mode = UCNV_SI;
    902            ucnv_close(myData2022->currentConverter);
    903            myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);
    904            if(U_SUCCESS(*err)) {
    905                myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
    906                _this->mode = UCNV_SO;
    907            }
    908            break;
    909        }
    910 #endif
    911        case ISO_2022_JP:
    912            {
    913                StateEnum tempState = static_cast<StateEnum>(nextStateToUnicodeJP[offset]);
    914                switch(tempState) {
    915                case INVALID_STATE:
    916                    *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    917                    break;
    918                case SS2_STATE:
    919                    if(myData2022->toU2022State.cs[2]!=0) {
    920                        if(myData2022->toU2022State.g<2) {
    921                            myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    922                        }
    923                        myData2022->toU2022State.g=2;
    924                    } else {
    925                        /* illegal to have SS2 before a matching designator */
    926                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    927                    }
    928                    break;
    929                /* case SS3_STATE: not used in ISO-2022-JP-x */
    930                case ISO8859_1:
    931                case ISO8859_7:
    932                    if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    933                        *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    934                    } else {
    935                        /* G2 charset for SS2 */
    936                        myData2022->toU2022State.cs[2] = static_cast<int8_t>(tempState);
    937                    }
    938                    break;
    939                default:
    940                    if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
    941                        *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    942                    } else {
    943                        /* G0 charset */
    944                        myData2022->toU2022State.cs[0] = static_cast<int8_t>(tempState);
    945                    }
    946                    break;
    947                }
    948            }
    949            break;
    950 #if !UCONFIG_ONLY_HTML_CONVERSION
    951        case ISO_2022_CN:
    952            {
    953                StateEnum tempState = static_cast<StateEnum>(nextStateToUnicodeCN[offset]);
    954                switch(tempState) {
    955                case INVALID_STATE:
    956                    *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    957                    break;
    958                case SS2_STATE:
    959                    if(myData2022->toU2022State.cs[2]!=0) {
    960                        if(myData2022->toU2022State.g<2) {
    961                            myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    962                        }
    963                        myData2022->toU2022State.g=2;
    964                    } else {
    965                        /* illegal to have SS2 before a matching designator */
    966                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    967                    }
    968                    break;
    969                case SS3_STATE:
    970                    if(myData2022->toU2022State.cs[3]!=0) {
    971                        if(myData2022->toU2022State.g<2) {
    972                            myData2022->toU2022State.prevG=myData2022->toU2022State.g;
    973                        }
    974                        myData2022->toU2022State.g=3;
    975                    } else {
    976                        /* illegal to have SS3 before a matching designator */
    977                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
    978                    }
    979                    break;
    980                case ISO_IR_165:
    981                    if(myData2022->version==0) {
    982                        *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    983                        break;
    984                    }
    985                    U_FALLTHROUGH;
    986                case GB2312_1:
    987                    U_FALLTHROUGH;
    988                case CNS_11643_1:
    989                    myData2022->toU2022State.cs[1] = static_cast<int8_t>(tempState);
    990                    break;
    991                case CNS_11643_2:
    992                    myData2022->toU2022State.cs[2] = static_cast<int8_t>(tempState);
    993                    break;
    994                default:
    995                    /* other CNS 11643 planes */
    996                    if(myData2022->version==0) {
    997                        *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
    998                    } else {
    999                        myData2022->toU2022State.cs[3] = static_cast<int8_t>(tempState);
   1000                    }
   1001                    break;
   1002                }
   1003            }
   1004            break;
   1005        case ISO_2022_KR:
   1006            if(offset==0x30){
   1007                /* nothing to be done, just accept this one escape sequence */
   1008            } else {
   1009                *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
   1010            }
   1011            break;
   1012 #endif  // !UCONFIG_ONLY_HTML_CONVERSION
   1013 
   1014        default:
   1015            *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   1016            break;
   1017        }
   1018    }
   1019    if(U_SUCCESS(*err)) {
   1020        _this->toULength = 0;
   1021    } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
   1022        if(_this->toULength>1) {
   1023            /*
   1024             * Ticket 5691: consistent illegal sequences:
   1025             * - We include at least the first byte (ESC) in the illegal sequence.
   1026             * - If any of the non-initial bytes could be the start of a character,
   1027             *   we stop the illegal sequence before the first one of those.
   1028             *   In escape sequences, all following bytes are "printable", that is,
   1029             *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
   1030             *   they are valid single/lead bytes.
   1031             *   For simplicity, we always only report the initial ESC byte as the
   1032             *   illegal sequence and back out all other bytes we looked at.
   1033             */
   1034            /* Back out some bytes. */
   1035            int8_t backOutDistance=_this->toULength-1;
   1036            int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
   1037            if(backOutDistance<=bytesFromThisBuffer) {
   1038                /* same as initialToULength<=1 */
   1039                *source-=backOutDistance;
   1040            } else {
   1041                /* Back out bytes from the previous buffer: Need to replay them. */
   1042                _this->preToULength = static_cast<int8_t>(bytesFromThisBuffer - backOutDistance);
   1043                /* same as -(initialToULength-1) */
   1044                /* preToULength is negative! */
   1045                uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
   1046                *source-=bytesFromThisBuffer;
   1047            }
   1048            _this->toULength=1;
   1049        }
   1050    } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
   1051        _this->toUCallbackReason = UCNV_UNASSIGNED;
   1052    }
   1053 }
   1054 
   1055 #if !UCONFIG_ONLY_HTML_CONVERSION
   1056 /*Checks the characters of the buffer against valid 2022 escape sequences
   1057 *if the match we return a pointer to the initial start of the sequence otherwise
   1058 *we return sourceLimit
   1059 */
   1060 /*for 2022 looks ahead in the stream
   1061 *to determine the longest possible convertible
   1062 *data stream
   1063 */
   1064 static inline const char*
   1065 getEndOfBuffer_2022(const char** source,
   1066                   const char* sourceLimit,
   1067                   UBool /*flush*/){
   1068 
   1069    const char* mySource = *source;
   1070 
   1071 #ifdef U_ENABLE_GENERIC_ISO_2022
   1072    if (*source >= sourceLimit)
   1073        return sourceLimit;
   1074 
   1075    do{
   1076 
   1077        if (*mySource == ESC_2022){
   1078            int8_t i;
   1079            int32_t key = 0;
   1080            int32_t offset;
   1081            UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;
   1082 
   1083            /* Kludge: I could not
   1084            * figure out the reason for validating an escape sequence
   1085            * twice - once here and once in changeState_2022().
   1086            * is it possible to have an ESC character in a ISO2022
   1087            * byte stream which is valid in a code page? Is it legal?
   1088            */
   1089            for (i=0;
   1090            (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);
   1091            i++) {
   1092                value =  getKey_2022(*(mySource+i), &key, &offset);
   1093            }
   1094            if (value > 0 || *mySource==ESC_2022)
   1095                return mySource;
   1096 
   1097            if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )
   1098                return sourceLimit;
   1099        }
   1100    }while (++mySource < sourceLimit);
   1101 
   1102    return sourceLimit;
   1103 #else
   1104    while(mySource < sourceLimit && *mySource != ESC_2022) {
   1105        ++mySource;
   1106    }
   1107    return mySource;
   1108 #endif
   1109 }
   1110 #endif
   1111 
   1112 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
   1113 * any future change in _MBCSFromUChar32() function should be reflected here.
   1114 * @return number of bytes in *value; negative number if fallback; 0 if no mapping
   1115 */
   1116 static inline int32_t
   1117 MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
   1118                                         UChar32 c,
   1119                                         uint32_t* value,
   1120                                         UBool useFallback,
   1121                                         int outputType)
   1122 {
   1123    const int32_t *cx;
   1124    const uint16_t *table;
   1125    uint32_t stage2Entry;
   1126    uint32_t myValue;
   1127    int32_t length;
   1128    const uint8_t *p;
   1129    /*
   1130     * TODO(markus): Use and require new, faster MBCS conversion table structures.
   1131     * Use internal version of ucnv_open() that verifies that the new structures are available,
   1132     * else U_INTERNAL_PROGRAM_ERROR.
   1133     */
   1134    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1135    if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1136        table=sharedData->mbcs.fromUnicodeTable;
   1137        stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
   1138        /* get the bytes and the length for the output */
   1139        if(outputType==MBCS_OUTPUT_2){
   1140            myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1141            if(myValue<=0xff) {
   1142                length=1;
   1143            } else {
   1144                length=2;
   1145            }
   1146        } else /* outputType==MBCS_OUTPUT_3 */ {
   1147            p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
   1148            myValue = (static_cast<uint32_t>(*p) << 16) | (static_cast<uint32_t>(p[1]) << 8) | p[2];
   1149            if(myValue<=0xff) {
   1150                length=1;
   1151            } else if(myValue<=0xffff) {
   1152                length=2;
   1153            } else {
   1154                length=3;
   1155            }
   1156        }
   1157        /* is this code point assigned, or do we use fallbacks? */
   1158        if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
   1159            /* assigned */
   1160            *value=myValue;
   1161            return length;
   1162        } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {
   1163            /*
   1164             * We allow a 0 byte output if the "assigned" bit is set for this entry.
   1165             * There is no way with this data structure for fallback output
   1166             * to be a zero byte.
   1167             */
   1168            *value=myValue;
   1169            return -length;
   1170        }
   1171    }
   1172 
   1173    cx=sharedData->mbcs.extIndexes;
   1174    if(cx!=nullptr) {
   1175        return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
   1176    }
   1177 
   1178    /* unassigned */
   1179    return 0;
   1180 }
   1181 
   1182 /* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c
   1183 * any future change in _MBCSSingleFromUChar32() function should be reflected here.
   1184 * @param retval pointer to output byte
   1185 * @return 1 roundtrip byte  0 no mapping  -1 fallback byte
   1186 */
   1187 static inline int32_t
   1188 MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
   1189                                       UChar32 c,
   1190                                       uint32_t* retval,
   1191                                       UBool useFallback)
   1192 {
   1193    const uint16_t *table;
   1194    int32_t value;
   1195    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
   1196    if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
   1197        return 0;
   1198    }
   1199    /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
   1200    table=sharedData->mbcs.fromUnicodeTable;
   1201    /* get the byte for the output */
   1202    value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
   1203    /* is this code point assigned, or do we use fallbacks? */
   1204    *retval = static_cast<uint32_t>(value & 0xff);
   1205    if(value>=0xf00) {
   1206        return 1;  /* roundtrip */
   1207    } else if(useFallback ? value>=0x800 : value>=0xc00) {
   1208        return -1;  /* fallback taken */
   1209    } else {
   1210        return 0;  /* no mapping */
   1211    }
   1212 }
   1213 
   1214 /*
   1215 * Check that the result is a 2-byte value with each byte in the range A1..FE
   1216 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
   1217 * to move it to the ISO 2022 range 21..7E.
   1218 * Return 0 if out of range.
   1219 */
   1220 static inline uint32_t
   1221 _2022FromGR94DBCS(uint32_t value) {
   1222    if (static_cast<uint16_t>(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1223        static_cast<uint8_t>(value - 0xa1) <= (0xfe - 0xa1)
   1224    ) {
   1225        return value - 0x8080;  /* shift down to 21..7e byte range */
   1226    } else {
   1227        return 0;  /* not valid for ISO 2022 */
   1228    }
   1229 }
   1230 
   1231 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
   1232 /*
   1233 * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
   1234 * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
   1235 * unchanged. 
   1236 */
   1237 static inline uint32_t
   1238 _2022ToGR94DBCS(uint32_t value) {
   1239    uint32_t returnValue = value + 0x8080;
   1240    if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
   1241        (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
   1242        return returnValue;
   1243    } else {
   1244        return value;
   1245    }
   1246 }
   1247 #endif
   1248 
   1249 #ifdef U_ENABLE_GENERIC_ISO_2022
   1250 
   1251 /**********************************************************************************
   1252 *  ISO-2022 Converter
   1253 *
   1254 *
   1255 */
   1256 
   1257 static void U_CALLCONV
   1258 T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,
   1259                                                           UErrorCode* err){
   1260    const char* mySourceLimit, *realSourceLimit;
   1261    const char* sourceStart;
   1262    const char16_t* myTargetStart;
   1263    UConverter* saveThis;
   1264    UConverterDataISO2022* myData;
   1265    int8_t length;
   1266 
   1267    saveThis = args->converter;
   1268    myData=((UConverterDataISO2022*)(saveThis->extraInfo));
   1269 
   1270    realSourceLimit = args->sourceLimit;
   1271    while (args->source < realSourceLimit) {
   1272        if(myData->key == 0) { /* are we in the middle of an escape sequence? */
   1273            /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   1274            mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);
   1275 
   1276            if(args->source < mySourceLimit) {
   1277                if(myData->currentConverter==nullptr) {
   1278                    myData->currentConverter = ucnv_open("ASCII",err);
   1279                    if(U_FAILURE(*err)){
   1280                        return;
   1281                    }
   1282 
   1283                    myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
   1284                    saveThis->mode = UCNV_SO;
   1285                }
   1286 
   1287                /* convert to before the ESC or until the end of the buffer */
   1288                myData->isFirstBuffer=false;
   1289                sourceStart = args->source;
   1290                myTargetStart = args->target;
   1291                args->converter = myData->currentConverter;
   1292                ucnv_toUnicode(args->converter,
   1293                    &args->target,
   1294                    args->targetLimit,
   1295                    &args->source,
   1296                    mySourceLimit,
   1297                    args->offsets,
   1298                    (UBool)(args->flush && mySourceLimit == realSourceLimit),
   1299                    err);
   1300                args->converter = saveThis;
   1301 
   1302                if (*err == U_BUFFER_OVERFLOW_ERROR) {
   1303                    /* move the overflow buffer */
   1304                    length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;
   1305                    myData->currentConverter->UCharErrorBufferLength = 0;
   1306                    if(length > 0) {
   1307                        uprv_memcpy(saveThis->UCharErrorBuffer,
   1308                                    myData->currentConverter->UCharErrorBuffer,
   1309                                    length*U_SIZEOF_UCHAR);
   1310                    }
   1311                    return;
   1312                }
   1313 
   1314                /*
   1315                 * At least one of:
   1316                 * -Error while converting
   1317                 * -Done with entire buffer
   1318                 * -Need to write offsets or update the current offset
   1319                 *  (leave that up to the code in ucnv.c)
   1320                 *
   1321                 * or else we just stopped at an ESC byte and continue with changeState_2022()
   1322                 */
   1323                if (U_FAILURE(*err) ||
   1324                    (args->source == realSourceLimit) ||
   1325                    (args->offsets != nullptr && (args->target != myTargetStart || args->source != sourceStart) ||
   1326                    (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))
   1327                ) {
   1328                    /* copy partial or error input for truncated detection and error handling */
   1329                    if(U_FAILURE(*err)) {
   1330                        length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;
   1331                        if(length > 0) {
   1332                            uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);
   1333                        }
   1334                    } else {
   1335                        length = saveThis->toULength = myData->currentConverter->toULength;
   1336                        if(length > 0) {
   1337                            uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);
   1338                            if(args->source < mySourceLimit) {
   1339                                *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */
   1340                            }
   1341                        }
   1342                    }
   1343                    return;
   1344                }
   1345            }
   1346        }
   1347 
   1348        sourceStart = args->source;
   1349        changeState_2022(args->converter,
   1350               &(args->source),
   1351               realSourceLimit,
   1352               ISO_2022,
   1353               err);
   1354        if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != nullptr)) {
   1355            /* let the ucnv.c code update its current offset */
   1356            return;
   1357        }
   1358    }
   1359 }
   1360 
   1361 #endif
   1362 
   1363 /*
   1364 * To Unicode Callback helper function
   1365 */
   1366 static void
   1367 toUnicodeCallback(UConverter *cnv,
   1368                  const uint32_t sourceChar, const uint32_t targetUniChar,
   1369                  UErrorCode* err){
   1370    if(sourceChar>0xff){
   1371        cnv->toUBytes[0] = static_cast<uint8_t>(sourceChar >> 8);
   1372        cnv->toUBytes[1] = static_cast<uint8_t>(sourceChar);
   1373        cnv->toULength = 2;
   1374    }
   1375    else{
   1376        cnv->toUBytes[0] = static_cast<char>(sourceChar);
   1377        cnv->toULength = 1;
   1378    }
   1379 
   1380    if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
   1381        *err = U_INVALID_CHAR_FOUND;
   1382    }
   1383    else{
   1384        *err = U_ILLEGAL_CHAR_FOUND;
   1385    }
   1386 }
   1387 
   1388 /**************************************ISO-2022-JP*************************************************/
   1389 
   1390 /************************************** IMPORTANT **************************************************
   1391 * The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and
   1392 * MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().
   1393 * The converter iterates over each Unicode codepoint
   1394 * to obtain the equivalent codepoints from the codepages supported. Since the source buffer is
   1395 * processed one char at a time it would make sense to reduce the extra processing a canned converter
   1396 * would do as far as possible.
   1397 *
   1398 * If the implementation of these macros or structure of sharedData struct change in the future, make
   1399 * sure that ISO-2022 is also changed.
   1400 ***************************************************************************************************
   1401 */
   1402 
   1403 /***************************************************************************************************
   1404 * Rules for ISO-2022-jp encoding
   1405 * (i)   Escape sequences must be fully contained within a line they should not
   1406 *       span new lines or CRs
   1407 * (ii)  If the last character on a line is represented by two bytes then an ASCII or
   1408 *       JIS-Roman character escape sequence should follow before the line terminates
   1409 * (iii) If the first character on the line is represented by two bytes then a two
   1410 *       byte character escape sequence should precede it
   1411 * (iv)  If no escape sequence is encountered then the characters are ASCII
   1412 * (v)   Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,
   1413 *       and invoked with SS2 (ESC N).
   1414 * (vi)  If there is any G0 designation in text, there must be a switch to
   1415 *       ASCII or to JIS X 0201-Roman before a space character (but not
   1416 *       necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control
   1417 *       characters such as tab or CRLF.
   1418 * (vi)  Supported encodings:
   1419 *          ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7
   1420 *
   1421 *  source : RFC-1554
   1422 *
   1423 *          JISX201, JISX208,JISX212 : new .cnv data files created
   1424 *          KSC5601 : alias to ibm-949 mapping table
   1425 *          GB2312 : alias to ibm-1386 mapping table
   1426 *          ISO-8859-1 : Algorithmic implemented as LATIN1 case
   1427 *          ISO-8859-7 : alias to ibm-9409 mapping table
   1428 */
   1429 
   1430 /* preference order of JP charsets */
   1431 static const StateEnum jpCharsetPref[]={
   1432    ASCII,
   1433    JISX201,
   1434    ISO8859_1,
   1435    JISX208,
   1436    ISO8859_7,
   1437    JISX212,
   1438    GB2312,
   1439    KSC5601,
   1440    HWKANA_7BIT
   1441 };
   1442 
   1443 /*
   1444 * The escape sequences must be in order of the enum constants like JISX201  = 3,
   1445 * not in order of jpCharsetPref[]!
   1446 */
   1447 static const char escSeqChars[][6] ={
   1448    "\x1B\x28\x42",         /* <ESC>(B  ASCII       */
   1449    "\x1B\x2E\x41",         /* <ESC>.A  ISO-8859-1  */
   1450    "\x1B\x2E\x46",         /* <ESC>.F  ISO-8859-7  */
   1451    "\x1B\x28\x4A",         /* <ESC>(J  JISX-201    */
   1452    "\x1B\x24\x42",         /* <ESC>$B  JISX-208    */
   1453    "\x1B\x24\x28\x44",     /* <ESC>$(D JISX-212    */
   1454    "\x1B\x24\x41",         /* <ESC>$A  GB2312      */
   1455    "\x1B\x24\x28\x43",     /* <ESC>$(C KSC5601     */
   1456    "\x1B\x28\x49"          /* <ESC>(I  HWKANA_7BIT */
   1457 
   1458 };
   1459 static  const int8_t escSeqCharsLen[] ={
   1460    3, /* length of <ESC>(B  ASCII       */
   1461    3, /* length of <ESC>.A  ISO-8859-1  */
   1462    3, /* length of <ESC>.F  ISO-8859-7  */
   1463    3, /* length of <ESC>(J  JISX-201    */
   1464    3, /* length of <ESC>$B  JISX-208    */
   1465    4, /* length of <ESC>$(D JISX-212    */
   1466    3, /* length of <ESC>$A  GB2312      */
   1467    4, /* length of <ESC>$(C KSC5601     */
   1468    3  /* length of <ESC>(I  HWKANA_7BIT */
   1469 };
   1470 
   1471 /*
   1472 * The iteration over various code pages works this way:
   1473 * i)   Get the currentState from myConverterData->currentState
   1474 * ii)  Check if the character is mapped to a valid character in the currentState
   1475 *      Yes ->  a) set the initIterState to currentState
   1476 *       b) remain in this state until an invalid character is found
   1477 *      No  ->  a) go to the next code page and find the character
   1478 * iii) Before changing the state increment the current state check if the current state
   1479 *      is equal to the intitIteration state
   1480 *      Yes ->  A character that cannot be represented in any of the supported encodings
   1481 *       break and return a U_INVALID_CHARACTER error
   1482 *      No  ->  Continue and find the character in next code page
   1483 *
   1484 *
   1485 * TODO: Implement a priority technique where the users are allowed to set the priority of code pages
   1486 */
   1487 
   1488 /* Map 00..7F to Unicode according to JIS X 0201. */
   1489 static inline uint32_t
   1490 jisx201ToU(uint32_t value) {
   1491    if(value < 0x5c) {
   1492        return value;
   1493    } else if(value == 0x5c) {
   1494        return 0xa5;
   1495    } else if(value == 0x7e) {
   1496        return 0x203e;
   1497    } else /* value <= 0x7f */ {
   1498        return value;
   1499    }
   1500 }
   1501 
   1502 /* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
   1503 static inline uint32_t
   1504 jisx201FromU(uint32_t value) {
   1505    if(value<=0x7f) {
   1506        if(value!=0x5c && value!=0x7e) {
   1507            return value;
   1508        }
   1509    } else if(value==0xa5) {
   1510        return 0x5c;
   1511    } else if(value==0x203e) {
   1512        return 0x7e;
   1513    }
   1514    return 0xfffe;
   1515 }
   1516 
   1517 /*
   1518 * Take a valid Shift-JIS byte pair, check that it is in the range corresponding
   1519 * to JIS X 0208, and convert it to a pair of 21..7E bytes.
   1520 * Return 0 if the byte pair is out of range.
   1521 */
   1522 static inline uint32_t
   1523 _2022FromSJIS(uint32_t value) {
   1524    uint8_t trail;
   1525 
   1526    if(value > 0xEFFC) {
   1527        return 0;  /* beyond JIS X 0208 */
   1528    }
   1529 
   1530    trail = static_cast<uint8_t>(value);
   1531 
   1532    value &= 0xff00;  /* lead byte */
   1533    if(value <= 0x9f00) {
   1534        value -= 0x7000;
   1535    } else /* 0xe000 <= value <= 0xef00 */ {
   1536        value -= 0xb000;
   1537    }
   1538    value <<= 1;
   1539 
   1540    if(trail <= 0x9e) {
   1541        value -= 0x100;
   1542        if(trail <= 0x7e) {
   1543            value |= trail - 0x1f;
   1544        } else {
   1545            value |= trail - 0x20;
   1546        }
   1547    } else /* trail <= 0xfc */ {
   1548        value |= trail - 0x7e;
   1549    }
   1550    return value;
   1551 }
   1552 
   1553 /*
   1554 * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
   1555 * If either byte is outside 21..7E make sure that the result is not valid
   1556 * for Shift-JIS so that the converter catches it.
   1557 * Some invalid byte values already turn into equally invalid Shift-JIS
   1558 * byte values and need not be tested explicitly.
   1559 */
   1560 static inline void
   1561 _2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
   1562    if(c1&1) {
   1563        ++c1;
   1564        if(c2 <= 0x5f) {
   1565            c2 += 0x1f;
   1566        } else if(c2 <= 0x7e) {
   1567            c2 += 0x20;
   1568        } else {
   1569            c2 = 0;  /* invalid */
   1570        }
   1571    } else {
   1572        if (static_cast<uint8_t>(c2 - 0x21) <= ((0x7e) - 0x21)) {
   1573            c2 += 0x7e;
   1574        } else {
   1575            c2 = 0;  /* invalid */
   1576        }
   1577    }
   1578    c1 >>= 1;
   1579    if(c1 <= 0x2f) {
   1580        c1 += 0x70;
   1581    } else if(c1 <= 0x3f) {
   1582        c1 += 0xb0;
   1583    } else {
   1584        c1 = 0;  /* invalid */
   1585    }
   1586    bytes[0] = static_cast<char>(c1);
   1587    bytes[1] = static_cast<char>(c2);
   1588 }
   1589 
   1590 /*
   1591 * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
   1592 * Katakana.
   1593 * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
   1594 * because Shift-JIS roundtrips half-width Katakana to single bytes.
   1595 * These were the only fallbacks in ICU's jisx-208.ucm file.
   1596 */
   1597 static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
   1598    0x2123,  /* U+FF61 */
   1599    0x2156,
   1600    0x2157,
   1601    0x2122,
   1602    0x2126,
   1603    0x2572,
   1604    0x2521,
   1605    0x2523,
   1606    0x2525,
   1607    0x2527,
   1608    0x2529,
   1609    0x2563,
   1610    0x2565,
   1611    0x2567,
   1612    0x2543,
   1613    0x213C,  /* U+FF70 */
   1614    0x2522,
   1615    0x2524,
   1616    0x2526,
   1617    0x2528,
   1618    0x252A,
   1619    0x252B,
   1620    0x252D,
   1621    0x252F,
   1622    0x2531,
   1623    0x2533,
   1624    0x2535,
   1625    0x2537,
   1626    0x2539,
   1627    0x253B,
   1628    0x253D,
   1629    0x253F,  /* U+FF80 */
   1630    0x2541,
   1631    0x2544,
   1632    0x2546,
   1633    0x2548,
   1634    0x254A,
   1635    0x254B,
   1636    0x254C,
   1637    0x254D,
   1638    0x254E,
   1639    0x254F,
   1640    0x2552,
   1641    0x2555,
   1642    0x2558,
   1643    0x255B,
   1644    0x255E,
   1645    0x255F,  /* U+FF90 */
   1646    0x2560,
   1647    0x2561,
   1648    0x2562,
   1649    0x2564,
   1650    0x2566,
   1651    0x2568,
   1652    0x2569,
   1653    0x256A,
   1654    0x256B,
   1655    0x256C,
   1656    0x256D,
   1657    0x256F,
   1658    0x2573,
   1659    0x212B,
   1660    0x212C   /* U+FF9F */
   1661 };
   1662 
   1663 static void U_CALLCONV
   1664 UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
   1665    UConverter *cnv = args->converter;
   1666    UConverterDataISO2022 *converterData;
   1667    ISO2022State *pFromU2022State;
   1668    uint8_t* target = reinterpret_cast<uint8_t*>(args->target);
   1669    const uint8_t* targetLimit = reinterpret_cast<const uint8_t*>(args->targetLimit);
   1670    const char16_t* source = args->source;
   1671    const char16_t* sourceLimit = args->sourceLimit;
   1672    int32_t* offsets = args->offsets;
   1673    UChar32 sourceChar;
   1674    char buffer[8];
   1675    int32_t len, outLen;
   1676    int8_t choices[10];
   1677    int32_t choiceCount;
   1678    uint32_t targetValue = 0;
   1679    UBool useFallback;
   1680 
   1681    int32_t i;
   1682    int8_t cs, g;
   1683 
   1684    /* set up the state */
   1685    converterData = static_cast<UConverterDataISO2022*>(cnv->extraInfo);
   1686    pFromU2022State   = &converterData->fromU2022State;
   1687 
   1688    choiceCount = 0;
   1689 
   1690    /* check if the last codepoint of previous buffer was a lead surrogate*/
   1691    if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   1692        goto getTrail;
   1693    }
   1694 
   1695    while(source < sourceLimit) {
   1696        if(target < targetLimit) {
   1697 
   1698            sourceChar  = *(source++);
   1699            /*check if the char is a First surrogate*/
   1700            if(U16_IS_SURROGATE(sourceChar)) {
   1701                if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   1702 getTrail:
   1703                    /*look ahead to find the trail surrogate*/
   1704                    if(source < sourceLimit) {
   1705                        /* test the following code unit */
   1706                        char16_t trail = *source;
   1707                        if(U16_IS_TRAIL(trail)) {
   1708                            source++;
   1709                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1710                            cnv->fromUChar32=0x00;
   1711                            /* convert this supplementary code point */
   1712                            /* exit this condition tree */
   1713                        } else {
   1714                            /* this is an unmatched lead code unit (1st surrogate) */
   1715                            /* callback(illegal) */
   1716                            *err=U_ILLEGAL_CHAR_FOUND;
   1717                            cnv->fromUChar32=sourceChar;
   1718                            break;
   1719                        }
   1720                    } else {
   1721                        /* no more input */
   1722                        cnv->fromUChar32=sourceChar;
   1723                        break;
   1724                    }
   1725                } else {
   1726                    /* this is an unmatched trail code unit (2nd surrogate) */
   1727                    /* callback(illegal) */
   1728                    *err=U_ILLEGAL_CHAR_FOUND;
   1729                    cnv->fromUChar32=sourceChar;
   1730                    break;
   1731                }
   1732            }
   1733 
   1734            /* do not convert SO/SI/ESC */
   1735            if(IS_2022_CONTROL(sourceChar)) {
   1736                /* callback(illegal) */
   1737                *err=U_ILLEGAL_CHAR_FOUND;
   1738                cnv->fromUChar32=sourceChar;
   1739                break;
   1740            }
   1741 
   1742            /* do the conversion */
   1743 
   1744            if(choiceCount == 0) {
   1745                uint16_t csm;
   1746 
   1747                /*
   1748                 * The csm variable keeps track of which charsets are allowed
   1749                 * and not used yet while building the choices[].
   1750                 */
   1751                csm = jpCharsetMasks[converterData->version];
   1752                choiceCount = 0;
   1753 
   1754                /* JIS7/8: try single-byte half-width Katakana before JISX208 */
   1755                if(converterData->version == 3 || converterData->version == 4) {
   1756                    choices[choiceCount++] = static_cast<int8_t>(HWKANA_7BIT);
   1757                }
   1758                /* Do not try single-byte half-width Katakana for other versions. */
   1759                csm &= ~CSM(HWKANA_7BIT);
   1760 
   1761                /* try the current G0 charset */
   1762                choices[choiceCount++] = cs = pFromU2022State->cs[0];
   1763                csm &= ~CSM(cs);
   1764 
   1765                /* try the current G2 charset */
   1766                if((cs = pFromU2022State->cs[2]) != 0) {
   1767                    choices[choiceCount++] = cs;
   1768                    csm &= ~CSM(cs);
   1769                }
   1770 
   1771                /* try all the other possible charsets */
   1772                for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) {
   1773                    cs = static_cast<int8_t>(jpCharsetPref[i]);
   1774                    if(CSM(cs) & csm) {
   1775                        choices[choiceCount++] = cs;
   1776                        csm &= ~CSM(cs);
   1777                    }
   1778                }
   1779            }
   1780 
   1781            cs = g = 0;
   1782            /*
   1783             * len==0: no mapping found yet
   1784             * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   1785             * len>0: found a roundtrip result, done
   1786             */
   1787            len = 0;
   1788            /*
   1789             * We will turn off useFallback after finding a fallback,
   1790             * but we still get fallbacks from PUA code points as usual.
   1791             * Therefore, we will also need to check that we don't overwrite
   1792             * an early fallback with a later one.
   1793             */
   1794            useFallback = cnv->useFallback;
   1795 
   1796            for(i = 0; i < choiceCount && len <= 0; ++i) {
   1797                uint32_t value;
   1798                int32_t len2;
   1799                int8_t cs0 = choices[i];
   1800                switch(cs0) {
   1801                case ASCII:
   1802                    if(sourceChar <= 0x7f) {
   1803                        targetValue = static_cast<uint32_t>(sourceChar);
   1804                        len = 1;
   1805                        cs = cs0;
   1806                        g = 0;
   1807                    }
   1808                    break;
   1809                case ISO8859_1:
   1810                    if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
   1811                        targetValue = static_cast<uint32_t>(sourceChar) - 0x80;
   1812                        len = 1;
   1813                        cs = cs0;
   1814                        g = 2;
   1815                    }
   1816                    break;
   1817                case HWKANA_7BIT:
   1818                    if (static_cast<uint32_t>(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1819                        if(converterData->version==3) {
   1820                            /* JIS7: use G1 (SO) */
   1821                            /* Shift U+FF61..U+FF9F to bytes 21..5F. */
   1822                            targetValue = static_cast<uint32_t>(sourceChar - (HWKANA_START - 0x21));
   1823                            len = 1;
   1824                            pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */
   1825                            g = 1;
   1826                        } else if(converterData->version==4) {
   1827                            /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */
   1828                            /* Shift U+FF61..U+FF9F to bytes A1..DF. */
   1829                            targetValue = static_cast<uint32_t>(sourceChar - (HWKANA_START - 0xa1));
   1830                            len = 1;
   1831 
   1832                            cs = pFromU2022State->cs[0];
   1833                            if(IS_JP_DBCS(cs)) {
   1834                                /* switch from a DBCS charset to JISX201 */
   1835                                cs = static_cast<int8_t>(JISX201);
   1836                            }
   1837                            /* else stay in the current G0 charset */
   1838                            g = 0;
   1839                        }
   1840                        /* else do not use HWKANA_7BIT with other versions */
   1841                    }
   1842                    break;
   1843                case JISX201:
   1844                    /* G0 SBCS */
   1845                    value = jisx201FromU(sourceChar);
   1846                    if(value <= 0x7f) {
   1847                        targetValue = value;
   1848                        len = 1;
   1849                        cs = cs0;
   1850                        g = 0;
   1851                        useFallback = false;
   1852                    }
   1853                    break;
   1854                case JISX208:
   1855                    /* G0 DBCS from Shift-JIS table */
   1856                    len2 = MBCS_FROM_UCHAR32_ISO2022(
   1857                                converterData->myConverterArray[cs0],
   1858                                sourceChar, &value,
   1859                                useFallback, MBCS_OUTPUT_2);
   1860                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1861                        value = _2022FromSJIS(value);
   1862                        if(value != 0) {
   1863                            targetValue = value;
   1864                            len = len2;
   1865                            cs = cs0;
   1866                            g = 0;
   1867                            useFallback = false;
   1868                        }
   1869                    } else if(len == 0 && useFallback &&
   1870                              static_cast<uint32_t>(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
   1871                        targetValue = hwkana_fb[sourceChar - HWKANA_START];
   1872                        len = -2;
   1873                        cs = cs0;
   1874                        g = 0;
   1875                        useFallback = false;
   1876                    }
   1877                    break;
   1878                case ISO8859_7:
   1879                    /* G0 SBCS forced to 7-bit output */
   1880                    len2 = MBCS_SINGLE_FROM_UCHAR32(
   1881                                converterData->myConverterArray[cs0],
   1882                                sourceChar, &value,
   1883                                useFallback);
   1884                    if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {
   1885                        targetValue = value - 0x80;
   1886                        len = len2;
   1887                        cs = cs0;
   1888                        g = 2;
   1889                        useFallback = false;
   1890                    }
   1891                    break;
   1892                default:
   1893                    /* G0 DBCS */
   1894                    len2 = MBCS_FROM_UCHAR32_ISO2022(
   1895                                converterData->myConverterArray[cs0],
   1896                                sourceChar, &value,
   1897                                useFallback, MBCS_OUTPUT_2);
   1898                    if(len2 == 2 || (len2 == -2 && len == 0)) {  /* only accept DBCS: abs(len)==2 */
   1899                        if(cs0 == KSC5601) {
   1900                            /*
   1901                             * Check for valid bytes for the encoding scheme.
   1902                             * This is necessary because the sub-converter (windows-949)
   1903                             * has a broader encoding scheme than is valid for 2022.
   1904                             */
   1905                            value = _2022FromGR94DBCS(value);
   1906                            if(value == 0) {
   1907                                break;
   1908                            }
   1909                        }
   1910                        targetValue = value;
   1911                        len = len2;
   1912                        cs = cs0;
   1913                        g = 0;
   1914                        useFallback = false;
   1915                    }
   1916                    break;
   1917                }
   1918            }
   1919 
   1920            if(len != 0) {
   1921                if(len < 0) {
   1922                    len = -len;  /* fallback */
   1923                }
   1924                outLen = 0; /* count output bytes */
   1925 
   1926                /* write SI if necessary (only for JIS7) */
   1927                if(pFromU2022State->g == 1 && g == 0) {
   1928                    buffer[outLen++] = UCNV_SI;
   1929                    pFromU2022State->g = 0;
   1930                }
   1931 
   1932                /* write the designation sequence if necessary */
   1933                if(cs != pFromU2022State->cs[g]) {
   1934                    int32_t escLen = escSeqCharsLen[cs];
   1935                    uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);
   1936                    outLen += escLen;
   1937                    pFromU2022State->cs[g] = cs;
   1938 
   1939                    /* invalidate the choices[] */
   1940                    choiceCount = 0;
   1941                }
   1942 
   1943                /* write the shift sequence if necessary */
   1944                if(g != pFromU2022State->g) {
   1945                    switch(g) {
   1946                    /* case 0 handled before writing escapes */
   1947                    case 1:
   1948                        buffer[outLen++] = UCNV_SO;
   1949                        pFromU2022State->g = 1;
   1950                        break;
   1951                    default: /* case 2 */
   1952                        buffer[outLen++] = 0x1b;
   1953                        buffer[outLen++] = 0x4e;
   1954                        break;
   1955                    /* no case 3: no SS3 in ISO-2022-JP-x */
   1956                    }
   1957                }
   1958 
   1959                /* write the output bytes */
   1960                if(len == 1) {
   1961                    buffer[outLen++] = static_cast<char>(targetValue);
   1962                } else /* len == 2 */ {
   1963                    buffer[outLen++] = static_cast<char>(targetValue >> 8);
   1964                    buffer[outLen++] = static_cast<char>(targetValue);
   1965                }
   1966            } else {
   1967                /*
   1968                 * if we cannot find the character after checking all codepages
   1969                 * then this is an error
   1970                 */
   1971                *err = U_INVALID_CHAR_FOUND;
   1972                cnv->fromUChar32=sourceChar;
   1973                break;
   1974            }
   1975 
   1976            if(sourceChar == CR || sourceChar == LF) {
   1977                /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */
   1978                pFromU2022State->cs[2] = 0;
   1979                choiceCount = 0;
   1980            }
   1981 
   1982            /* output outLen>0 bytes in buffer[] */
   1983            if(outLen == 1) {
   1984                *target++ = buffer[0];
   1985                if(offsets) {
   1986                    *offsets++ = static_cast<int32_t>(source - args->source - 1); /* -1: known to be ASCII */
   1987                }
   1988            } else if(outLen == 2 && (target + 2) <= targetLimit) {
   1989                *target++ = buffer[0];
   1990                *target++ = buffer[1];
   1991                if(offsets) {
   1992                    int32_t sourceIndex = static_cast<int32_t>(source - args->source - U16_LENGTH(sourceChar));
   1993                    *offsets++ = sourceIndex;
   1994                    *offsets++ = sourceIndex;
   1995                }
   1996            } else {
   1997                fromUWriteUInt8(
   1998                    cnv,
   1999                    buffer, outLen,
   2000                    &target, reinterpret_cast<const char*>(targetLimit),
   2001                    &offsets, static_cast<int32_t>(source - args->source - U16_LENGTH(sourceChar)),
   2002                    err);
   2003                if(U_FAILURE(*err)) {
   2004                    break;
   2005                }
   2006            }
   2007        } /* end if(myTargetIndex<myTargetLength) */
   2008        else{
   2009            *err =U_BUFFER_OVERFLOW_ERROR;
   2010            break;
   2011        }
   2012 
   2013    }/* end while(mySourceIndex<mySourceLength) */
   2014 
   2015    /*
   2016     * the end of the input stream and detection of truncated input
   2017     * are handled by the framework, but for ISO-2022-JP conversion
   2018     * we need to be in ASCII mode at the very end
   2019     *
   2020     * conditions:
   2021     *   successful
   2022     *   in SO mode or not in ASCII mode
   2023     *   end of input and no truncated input
   2024     */
   2025    if( U_SUCCESS(*err) &&
   2026        (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
   2027        args->flush && source>=sourceLimit && cnv->fromUChar32==0
   2028    ) {
   2029        int32_t sourceIndex;
   2030 
   2031        outLen = 0;
   2032 
   2033        if(pFromU2022State->g != 0) {
   2034            buffer[outLen++] = UCNV_SI;
   2035            pFromU2022State->g = 0;
   2036        }
   2037 
   2038        if(pFromU2022State->cs[0] != ASCII) {
   2039            int32_t escLen = escSeqCharsLen[ASCII];
   2040            uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
   2041            outLen += escLen;
   2042            pFromU2022State->cs[0] = static_cast<int8_t>(ASCII);
   2043        }
   2044 
   2045        /* get the source index of the last input character */
   2046        /*
   2047         * TODO this would be simpler and more reliable if we used a pair
   2048         * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2049         * so that we could simply use the prevSourceIndex here;
   2050         * this code gives an incorrect result for the rare case of an unmatched
   2051         * trail surrogate that is alone in the last buffer of the text stream
   2052         */
   2053        sourceIndex = static_cast<int32_t>(source - args->source);
   2054        if(sourceIndex>0) {
   2055            --sourceIndex;
   2056            if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2057                (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2058            ) {
   2059                --sourceIndex;
   2060            }
   2061        } else {
   2062            sourceIndex=-1;
   2063        }
   2064 
   2065        fromUWriteUInt8(
   2066            cnv,
   2067            buffer, outLen,
   2068            &target, reinterpret_cast<const char*>(targetLimit),
   2069            &offsets, sourceIndex,
   2070            err);
   2071    }
   2072 
   2073    /*save the state and return */
   2074    args->source = source;
   2075    args->target = reinterpret_cast<char*>(target);
   2076 }
   2077 
   2078 /*************** to unicode *******************/
   2079 
   2080 static void U_CALLCONV
   2081 UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2082                                               UErrorCode* err){
   2083    char tempBuf[2];
   2084    const char* mySource = const_cast<char*>(args->source);
   2085    char16_t *myTarget = args->target;
   2086    const char *mySourceLimit = args->sourceLimit;
   2087    uint32_t targetUniChar = 0x0000;
   2088    uint32_t mySourceChar = 0x0000;
   2089    uint32_t tmpSourceChar = 0x0000;
   2090    UConverterDataISO2022* myData;
   2091    ISO2022State *pToU2022State;
   2092    StateEnum cs;
   2093 
   2094    myData = static_cast<UConverterDataISO2022*>(args->converter->extraInfo);
   2095    pToU2022State = &myData->toU2022State;
   2096 
   2097    if(myData->key != 0) {
   2098        /* continue with a partial escape sequence */
   2099        goto escape;
   2100    } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2101        /* continue with a partial double-byte character */
   2102        mySourceChar = args->converter->toUBytes[0];
   2103        args->converter->toULength = 0;
   2104        cs = static_cast<StateEnum>(pToU2022State->cs[pToU2022State->g]);
   2105        targetUniChar = missingCharMarker;
   2106        goto getTrailByte;
   2107    }
   2108 
   2109    while(mySource < mySourceLimit){
   2110 
   2111        targetUniChar =missingCharMarker;
   2112 
   2113        if(myTarget < args->targetLimit){
   2114 
   2115            mySourceChar = static_cast<unsigned char>(*mySource++);
   2116 
   2117            switch(mySourceChar) {
   2118            case UCNV_SI:
   2119                if(myData->version==3) {
   2120                    pToU2022State->g=0;
   2121                    continue;
   2122                } else {
   2123                    /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2124                    myData->isEmptySegment = false;	/* reset this, we have a different error */
   2125                    break;
   2126                }
   2127 
   2128            case UCNV_SO:
   2129                if(myData->version==3) {
   2130                    /* JIS7: switch to G1 half-width Katakana */
   2131                    pToU2022State->cs[1] = static_cast<int8_t>(HWKANA_7BIT);
   2132                    pToU2022State->g=1;
   2133                    continue;
   2134                } else {
   2135                    /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
   2136                    myData->isEmptySegment = false;	/* reset this, we have a different error */
   2137                    break;
   2138                }
   2139 
   2140            case ESC_2022:
   2141                mySource--;
   2142 escape:
   2143                {
   2144                    const char * mySourceBefore = mySource;
   2145                    int8_t toULengthBefore = args->converter->toULength;
   2146 
   2147                    changeState_2022(args->converter,&(mySource),
   2148                        mySourceLimit, ISO_2022_JP,err);
   2149 
   2150                    /* If in ISO-2022-JP only and we successfully completed an escape sequence, but previous segment was empty, create an error */
   2151                    if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   2152                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2153                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2154                        args->converter->toULength = static_cast<int8_t>(toULengthBefore + (mySource - mySourceBefore));
   2155                    }
   2156                }
   2157 
   2158                /* invalid or illegal escape sequence */
   2159                if(U_FAILURE(*err)){
   2160                    args->target = myTarget;
   2161                    args->source = mySource;
   2162                    myData->isEmptySegment = false;	/* Reset to avoid future spurious errors */
   2163                    return;
   2164                }
   2165                /* If we successfully completed an escape sequence, we begin a new segment, empty so far */
   2166                if(myData->key==0) {
   2167                    myData->isEmptySegment = true;
   2168                }
   2169                continue;
   2170 
   2171            /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
   2172 
   2173            case CR:
   2174            case LF:
   2175                /* automatically reset to single-byte mode */
   2176                if (static_cast<StateEnum>(pToU2022State->cs[0]) != ASCII &&
   2177                    static_cast<StateEnum>(pToU2022State->cs[0]) != JISX201) {
   2178                    pToU2022State->cs[0] = static_cast<int8_t>(ASCII);
   2179                }
   2180                pToU2022State->cs[2] = 0;
   2181                pToU2022State->g = 0;
   2182                U_FALLTHROUGH;
   2183            default:
   2184                /* convert one or two bytes */
   2185                myData->isEmptySegment = false;
   2186                cs = static_cast<StateEnum>(pToU2022State->cs[pToU2022State->g]);
   2187                if (static_cast<uint8_t>(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version == 4 &&
   2188                    !IS_JP_DBCS(cs)
   2189                ) {
   2190                    /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
   2191                    targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
   2192 
   2193                    /* return from a single-shift state to the previous one */
   2194                    if(pToU2022State->g >= 2) {
   2195                        pToU2022State->g=pToU2022State->prevG;
   2196                    }
   2197                } else switch(cs) {
   2198                case ASCII:
   2199                    if(mySourceChar <= 0x7f) {
   2200                        targetUniChar = mySourceChar;
   2201                    }
   2202                    break;
   2203                case ISO8859_1:
   2204                    if(mySourceChar <= 0x7f) {
   2205                        targetUniChar = mySourceChar + 0x80;
   2206                    }
   2207                    /* return from a single-shift state to the previous one */
   2208                    pToU2022State->g=pToU2022State->prevG;
   2209                    break;
   2210                case ISO8859_7:
   2211                    if(mySourceChar <= 0x7f) {
   2212                        /* convert mySourceChar+0x80 to use a normal 8-bit table */
   2213                        targetUniChar =
   2214                            _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
   2215                                myData->myConverterArray[cs],
   2216                                mySourceChar + 0x80);
   2217                    }
   2218                    /* return from a single-shift state to the previous one */
   2219                    pToU2022State->g=pToU2022State->prevG;
   2220                    break;
   2221                case JISX201:
   2222                    if(mySourceChar <= 0x7f) {
   2223                        targetUniChar = jisx201ToU(mySourceChar);
   2224                    }
   2225                    break;
   2226                case HWKANA_7BIT:
   2227                    if (static_cast<uint8_t>(mySourceChar - 0x21) <= (0x5f - 0x21)) {
   2228                        /* 7-bit halfwidth Katakana */
   2229                        targetUniChar = mySourceChar + (HWKANA_START - 0x21);
   2230                    }
   2231                    break;
   2232                default:
   2233                    /* G0 DBCS */
   2234                    if(mySource < mySourceLimit) {
   2235                        int leadIsOk, trailIsOk;
   2236                        uint8_t trailByte;
   2237 getTrailByte:
   2238                        trailByte = static_cast<uint8_t>(*mySource);
   2239                        /*
   2240                         * Ticket 5691: consistent illegal sequences:
   2241                         * - We include at least the first byte in the illegal sequence.
   2242                         * - If any of the non-initial bytes could be the start of a character,
   2243                         *   we stop the illegal sequence before the first one of those.
   2244                         *
   2245                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2246                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2247                         * Otherwise we convert or report the pair of bytes.
   2248                         */
   2249                        leadIsOk = static_cast<uint8_t>(mySourceChar - 0x21) <= (0x7e - 0x21);
   2250                        trailIsOk = static_cast<uint8_t>(trailByte - 0x21) <= (0x7e - 0x21);
   2251                        if (leadIsOk && trailIsOk) {
   2252                            ++mySource;
   2253                            tmpSourceChar = (mySourceChar << 8) | trailByte;
   2254                            if(cs == JISX208) {
   2255                                _2022ToSJIS(static_cast<uint8_t>(mySourceChar), trailByte, tempBuf);
   2256                                mySourceChar = tmpSourceChar;
   2257                            } else {
   2258                                /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
   2259                                mySourceChar = tmpSourceChar;
   2260                                if (cs == KSC5601) {
   2261                                    tmpSourceChar += 0x8080;  /* = _2022ToGR94DBCS(tmpSourceChar) */
   2262                                }
   2263                                tempBuf[0] = static_cast<char>(tmpSourceChar >> 8);
   2264                                tempBuf[1] = static_cast<char>(tmpSourceChar);
   2265                            }
   2266                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, false);
   2267                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2268                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2269                            ++mySource;
   2270                            /* add another bit so that the code below writes 2 bytes in case of error */
   2271                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   2272                        }
   2273                    } else {
   2274                        args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
   2275                        args->converter->toULength = 1;
   2276                        goto endloop;
   2277                    }
   2278                }  /* End of inner switch */
   2279                break;
   2280            }  /* End of outer switch */
   2281            if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   2282                if(args->offsets){
   2283                    args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2284                }
   2285                *(myTarget++) = static_cast<char16_t>(targetUniChar);
   2286            }
   2287            else if(targetUniChar > missingCharMarker){
   2288                /* disassemble the surrogate pair and write to output*/
   2289                targetUniChar-=0x0010000;
   2290                *myTarget = static_cast<char16_t>(0xd800 + static_cast<char16_t>(targetUniChar >> 10));
   2291                if(args->offsets){
   2292                    args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2293                }
   2294                ++myTarget;
   2295                if(myTarget< args->targetLimit){
   2296                    *myTarget = static_cast<char16_t>(0xdc00 + static_cast<char16_t>(targetUniChar & 0x3ff));
   2297                    if(args->offsets){
   2298                        args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2299                    }
   2300                    ++myTarget;
   2301                }else{
   2302                    args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   2303                                    static_cast<char16_t>(0xdc00 + static_cast<char16_t>(targetUniChar & 0x3ff));
   2304                }
   2305 
   2306            }
   2307            else{
   2308                /* Call the callback function*/
   2309                toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2310                break;
   2311            }
   2312        }
   2313        else{    /* goes with "if(myTarget < args->targetLimit)"  way up near top of function */
   2314            *err =U_BUFFER_OVERFLOW_ERROR;
   2315            break;
   2316        }
   2317    }
   2318 endloop:
   2319    args->target = myTarget;
   2320    args->source = mySource;
   2321 }
   2322 
   2323 
   2324 #if !UCONFIG_ONLY_HTML_CONVERSION
   2325 /***************************************************************
   2326 *   Rules for ISO-2022-KR encoding
   2327 *   i) The KSC5601 designator sequence should appear only once in a file,
   2328 *      at the beginning of a line before any KSC5601 characters. This usually
   2329 *      means that it appears by itself on the first line of the file
   2330 *  ii) There are only 2 shifting sequences SO to shift into double byte mode
   2331 *      and SI to shift into single byte mode
   2332 */
   2333 static void U_CALLCONV
   2334 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2335 
   2336    UConverter* saveConv = args->converter;
   2337    UConverterDataISO2022* myConverterData = static_cast<UConverterDataISO2022*>(saveConv->extraInfo);
   2338    args->converter=myConverterData->currentConverter;
   2339 
   2340    myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;
   2341    ucnv_MBCSFromUnicodeWithOffsets(args,err);
   2342    saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   2343 
   2344    if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2345        if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   2346            uprv_memcpy(
   2347                saveConv->charErrorBuffer,
   2348                myConverterData->currentConverter->charErrorBuffer,
   2349                myConverterData->currentConverter->charErrorBufferLength);
   2350        }
   2351        saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   2352        myConverterData->currentConverter->charErrorBufferLength = 0;
   2353    }
   2354    args->converter=saveConv;
   2355 }
   2356 
   2357 static void U_CALLCONV
   2358 UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2359 
   2360    const char16_t *source = args->source;
   2361    const char16_t *sourceLimit = args->sourceLimit;
   2362    unsigned char *target = reinterpret_cast<unsigned char*>(args->target);
   2363    unsigned char *targetLimit = reinterpret_cast<unsigned char*>(const_cast<char*>(args->targetLimit));
   2364    int32_t* offsets = args->offsets;
   2365    uint32_t targetByteUnit = 0x0000;
   2366    UChar32 sourceChar = 0x0000;
   2367    UBool isTargetByteDBCS;
   2368    UBool oldIsTargetByteDBCS;
   2369    UConverterDataISO2022 *converterData;
   2370    UConverterSharedData* sharedData;
   2371    UBool useFallback;
   2372    int32_t length =0;
   2373 
   2374    converterData = static_cast<UConverterDataISO2022*>(args->converter->extraInfo);
   2375    /* if the version is 1 then the user is requesting
   2376     * conversion with ibm-25546 pass the arguments to
   2377     * MBCS converter and return
   2378     */
   2379    if(converterData->version==1){
   2380        UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2381        return;
   2382    }
   2383 
   2384    /* initialize data */
   2385    sharedData = converterData->currentConverter->sharedData;
   2386    useFallback = args->converter->useFallback;
   2387    isTargetByteDBCS = static_cast<UBool>(args->converter->fromUnicodeStatus);
   2388    oldIsTargetByteDBCS = isTargetByteDBCS;
   2389 
   2390    isTargetByteDBCS = static_cast<UBool>(args->converter->fromUnicodeStatus);
   2391    if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {
   2392        goto getTrail;
   2393    }
   2394    while(source < sourceLimit){
   2395 
   2396        targetByteUnit = missingCharMarker;
   2397 
   2398        if(target < (unsigned char*) args->targetLimit){
   2399            sourceChar = *source++;
   2400 
   2401            /* do not convert SO/SI/ESC */
   2402            if(IS_2022_CONTROL(sourceChar)) {
   2403                /* callback(illegal) */
   2404                *err=U_ILLEGAL_CHAR_FOUND;
   2405                args->converter->fromUChar32=sourceChar;
   2406                break;
   2407            }
   2408 
   2409            length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);
   2410            if(length < 0) {
   2411                length = -length;  /* fallback */
   2412            }
   2413            /* only DBCS or SBCS characters are expected*/
   2414            /* DB characters with high bit set to 1 are expected */
   2415            if( length > 2 || length==0 ||
   2416                (length == 1 && targetByteUnit > 0x7f) ||
   2417                (length == 2 &&
   2418                    (static_cast<uint16_t>(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
   2419                    static_cast<uint8_t>(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
   2420            ) {
   2421                targetByteUnit=missingCharMarker;
   2422            }
   2423            if (targetByteUnit != missingCharMarker){
   2424 
   2425                oldIsTargetByteDBCS = isTargetByteDBCS;
   2426                isTargetByteDBCS = static_cast<UBool>(targetByteUnit > 0x00FF);
   2427                  /* append the shift sequence */
   2428                if (oldIsTargetByteDBCS != isTargetByteDBCS ){
   2429 
   2430                    if (isTargetByteDBCS)
   2431                        *target++ = UCNV_SO;
   2432                    else
   2433                        *target++ = UCNV_SI;
   2434                    if(offsets)
   2435                        *(offsets++) = static_cast<int32_t>(source - args->source - 1);
   2436                }
   2437                /* write the targetUniChar  to target */
   2438                if(targetByteUnit <= 0x00FF){
   2439                    if( target < targetLimit){
   2440                        *(target++) = static_cast<unsigned char>(targetByteUnit);
   2441                        if(offsets){
   2442                            *(offsets++) = static_cast<int32_t>(source - args->source - 1);
   2443                        }
   2444 
   2445                    }else{
   2446                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = static_cast<unsigned char>(targetByteUnit);
   2447                        *err = U_BUFFER_OVERFLOW_ERROR;
   2448                    }
   2449                }else{
   2450                    if(target < targetLimit){
   2451                        *(target++) = static_cast<unsigned char>((targetByteUnit >> 8) - 0x80);
   2452                        if(offsets){
   2453                            *(offsets++) = static_cast<int32_t>(source - args->source - 1);
   2454                        }
   2455                        if(target < targetLimit){
   2456                            *(target++) = static_cast<unsigned char>(targetByteUnit - 0x80);
   2457                            if(offsets){
   2458                                *(offsets++) = static_cast<int32_t>(source - args->source - 1);
   2459                            }
   2460                        }else{
   2461                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = static_cast<unsigned char>(targetByteUnit - 0x80);
   2462                            *err = U_BUFFER_OVERFLOW_ERROR;
   2463                        }
   2464                    }else{
   2465                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = static_cast<unsigned char>((targetByteUnit >> 8) - 0x80);
   2466                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = static_cast<unsigned char>(targetByteUnit - 0x80);
   2467                        *err = U_BUFFER_OVERFLOW_ERROR;
   2468                    }
   2469                }
   2470 
   2471            }
   2472            else{
   2473                /* oops.. the code point is unassingned
   2474                 * set the error and reason
   2475                 */
   2476 
   2477                /*check if the char is a First surrogate*/
   2478                if(U16_IS_SURROGATE(sourceChar)) {
   2479                    if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2480 getTrail:
   2481                        /*look ahead to find the trail surrogate*/
   2482                        if(source <  sourceLimit) {
   2483                            /* test the following code unit */
   2484                            char16_t trail = *source;
   2485                            if(U16_IS_TRAIL(trail)) {
   2486                                source++;
   2487                                sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2488                                *err = U_INVALID_CHAR_FOUND;
   2489                                /* convert this surrogate code point */
   2490                                /* exit this condition tree */
   2491                            } else {
   2492                                /* this is an unmatched lead code unit (1st surrogate) */
   2493                                /* callback(illegal) */
   2494                                *err=U_ILLEGAL_CHAR_FOUND;
   2495                            }
   2496                        } else {
   2497                            /* no more input */
   2498                            *err = U_ZERO_ERROR;
   2499                        }
   2500                    } else {
   2501                        /* this is an unmatched trail code unit (2nd surrogate) */
   2502                        /* callback(illegal) */
   2503                        *err=U_ILLEGAL_CHAR_FOUND;
   2504                    }
   2505                } else {
   2506                    /* callback(unassigned) for a BMP code point */
   2507                    *err = U_INVALID_CHAR_FOUND;
   2508                }
   2509 
   2510                args->converter->fromUChar32=sourceChar;
   2511                break;
   2512            }
   2513        } /* end if(myTargetIndex<myTargetLength) */
   2514        else{
   2515            *err =U_BUFFER_OVERFLOW_ERROR;
   2516            break;
   2517        }
   2518 
   2519    }/* end while(mySourceIndex<mySourceLength) */
   2520 
   2521    /*
   2522     * the end of the input stream and detection of truncated input
   2523     * are handled by the framework, but for ISO-2022-KR conversion
   2524     * we need to be in ASCII mode at the very end
   2525     *
   2526     * conditions:
   2527     *   successful
   2528     *   not in ASCII mode
   2529     *   end of input and no truncated input
   2530     */
   2531    if( U_SUCCESS(*err) &&
   2532        isTargetByteDBCS &&
   2533        args->flush && source>=sourceLimit && args->converter->fromUChar32==0
   2534    ) {
   2535        int32_t sourceIndex;
   2536 
   2537        /* we are switching to ASCII */
   2538        isTargetByteDBCS=false;
   2539 
   2540        /* get the source index of the last input character */
   2541        /*
   2542         * TODO this would be simpler and more reliable if we used a pair
   2543         * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   2544         * so that we could simply use the prevSourceIndex here;
   2545         * this code gives an incorrect result for the rare case of an unmatched
   2546         * trail surrogate that is alone in the last buffer of the text stream
   2547         */
   2548        sourceIndex = static_cast<int32_t>(source - args->source);
   2549        if(sourceIndex>0) {
   2550            --sourceIndex;
   2551            if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   2552                (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   2553            ) {
   2554                --sourceIndex;
   2555            }
   2556        } else {
   2557            sourceIndex=-1;
   2558        }
   2559 
   2560        fromUWriteUInt8(
   2561            args->converter,
   2562            SHIFT_IN_STR, 1,
   2563            &target, reinterpret_cast<const char*>(targetLimit),
   2564            &offsets, sourceIndex,
   2565            err);
   2566    }
   2567 
   2568    /*save the state and return */
   2569    args->source = source;
   2570    args->target = reinterpret_cast<char*>(target);
   2571    args->converter->fromUnicodeStatus = static_cast<uint32_t>(isTargetByteDBCS);
   2572 }
   2573 
   2574 /************************ To Unicode ***************************************/
   2575 
   2576 static void U_CALLCONV
   2577 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,
   2578                                                            UErrorCode* err){
   2579    char const* sourceStart;
   2580    UConverterDataISO2022* myData = static_cast<UConverterDataISO2022*>(args->converter->extraInfo);
   2581 
   2582    UConverterToUnicodeArgs subArgs;
   2583    int32_t minArgsSize;
   2584 
   2585    /* set up the subconverter arguments */
   2586    if(args->size<sizeof(UConverterToUnicodeArgs)) {
   2587        minArgsSize = args->size;
   2588    } else {
   2589        minArgsSize = static_cast<int32_t>(sizeof(UConverterToUnicodeArgs));
   2590    }
   2591 
   2592    uprv_memcpy(&subArgs, args, minArgsSize);
   2593    subArgs.size = static_cast<uint16_t>(minArgsSize);
   2594    subArgs.converter = myData->currentConverter;
   2595 
   2596    /* remember the original start of the input for offsets */
   2597    sourceStart = args->source;
   2598 
   2599    if(myData->key != 0) {
   2600        /* continue with a partial escape sequence */
   2601        goto escape;
   2602    }
   2603 
   2604    while(U_SUCCESS(*err) && args->source < args->sourceLimit) {
   2605        /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/
   2606        subArgs.source = args->source;
   2607        subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);
   2608        if(subArgs.source != subArgs.sourceLimit) {
   2609            /*
   2610             * get the current partial byte sequence
   2611             *
   2612             * it needs to be moved between the public and the subconverter
   2613             * so that the conversion framework, which only sees the public
   2614             * converter, can handle truncated and illegal input etc.
   2615             */
   2616            if(args->converter->toULength > 0) {
   2617                uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);
   2618            }
   2619            subArgs.converter->toULength = args->converter->toULength;
   2620 
   2621            /*
   2622             * Convert up to the end of the input, or to before the next escape character.
   2623             * Does not handle conversion extensions because the preToU[] state etc.
   2624             * is not copied.
   2625             */
   2626            ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);
   2627 
   2628            if(args->offsets != nullptr && sourceStart != args->source) {
   2629                /* update offsets to base them on the actual start of the input */
   2630                int32_t *offsets = args->offsets;
   2631                char16_t *target = args->target;
   2632                int32_t delta = static_cast<int32_t>(args->source - sourceStart);
   2633                while(target < subArgs.target) {
   2634                    if(*offsets >= 0) {
   2635                        *offsets += delta;
   2636                    }
   2637                    ++offsets;
   2638                    ++target;
   2639                }
   2640            }
   2641            args->source = subArgs.source;
   2642            args->target = subArgs.target;
   2643            args->offsets = subArgs.offsets;
   2644 
   2645            /* copy input/error/overflow buffers */
   2646            if(subArgs.converter->toULength > 0) {
   2647                uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);
   2648            }
   2649            args->converter->toULength = subArgs.converter->toULength;
   2650 
   2651            if(*err == U_BUFFER_OVERFLOW_ERROR) {
   2652                if(subArgs.converter->UCharErrorBufferLength > 0) {
   2653                    uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,
   2654                                subArgs.converter->UCharErrorBufferLength);
   2655                }
   2656                args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;
   2657                subArgs.converter->UCharErrorBufferLength = 0;
   2658            }
   2659        }
   2660 
   2661        if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {
   2662            return;
   2663        }
   2664 
   2665 escape:
   2666        changeState_2022(args->converter,
   2667               &(args->source),
   2668               args->sourceLimit,
   2669               ISO_2022_KR,
   2670               err);
   2671    }
   2672 }
   2673 
   2674 static void U_CALLCONV
   2675 UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   2676                                                            UErrorCode* err){
   2677    char tempBuf[2];
   2678    const char* mySource = const_cast<char*>(args->source);
   2679    char16_t *myTarget = args->target;
   2680    const char *mySourceLimit = args->sourceLimit;
   2681    UChar32 targetUniChar = 0x0000;
   2682    char16_t mySourceChar = 0x0000;
   2683    UConverterDataISO2022* myData;
   2684    UConverterSharedData* sharedData ;
   2685    UBool useFallback;
   2686 
   2687    myData = static_cast<UConverterDataISO2022*>(args->converter->extraInfo);
   2688    if(myData->version==1){
   2689        UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);
   2690        return;
   2691    }
   2692 
   2693    /* initialize state */
   2694    sharedData = myData->currentConverter->sharedData;
   2695    useFallback = args->converter->useFallback;
   2696 
   2697    if(myData->key != 0) {
   2698        /* continue with a partial escape sequence */
   2699        goto escape;
   2700    } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   2701        /* continue with a partial double-byte character */
   2702        mySourceChar = args->converter->toUBytes[0];
   2703        args->converter->toULength = 0;
   2704        goto getTrailByte;
   2705    }
   2706 
   2707    while(mySource< mySourceLimit){
   2708 
   2709        if(myTarget < args->targetLimit){
   2710 
   2711            mySourceChar = static_cast<unsigned char>(*mySource++);
   2712 
   2713            if(mySourceChar==UCNV_SI){
   2714                myData->toU2022State.g = 0;
   2715                if (myData->isEmptySegment) {
   2716                    myData->isEmptySegment = false;	/* we are handling it, reset to avoid future spurious errors */
   2717                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   2718                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
   2719                    args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
   2720                    args->converter->toULength = 1;
   2721                    args->target = myTarget;
   2722                    args->source = mySource;
   2723                    return;
   2724                }
   2725                /*consume the source */
   2726                continue;
   2727            }else if(mySourceChar==UCNV_SO){
   2728                myData->toU2022State.g = 1;
   2729                myData->isEmptySegment = true;	/* Begin a new segment, empty so far */
   2730                /*consume the source */
   2731                continue;
   2732            }else if(mySourceChar==ESC_2022){
   2733                mySource--;
   2734 escape:
   2735                myData->isEmptySegment = false;	/* Any invalid ESC sequences will be detected separately, so just reset this */
   2736                changeState_2022(args->converter,&(mySource),
   2737                                mySourceLimit, ISO_2022_KR, err);
   2738                if(U_FAILURE(*err)){
   2739                    args->target = myTarget;
   2740                    args->source = mySource;
   2741                    return;
   2742                }
   2743                continue;
   2744            }
   2745 
   2746            myData->isEmptySegment = false;	/* Any invalid char errors will be detected separately, so just reset this */
   2747            if(myData->toU2022State.g == 1) {
   2748                if(mySource < mySourceLimit) {
   2749                    int leadIsOk, trailIsOk;
   2750                    uint8_t trailByte;
   2751 getTrailByte:
   2752                    targetUniChar = missingCharMarker;
   2753                    trailByte = static_cast<uint8_t>(*mySource);
   2754                    /*
   2755                     * Ticket 5691: consistent illegal sequences:
   2756                     * - We include at least the first byte in the illegal sequence.
   2757                     * - If any of the non-initial bytes could be the start of a character,
   2758                     *   we stop the illegal sequence before the first one of those.
   2759                     *
   2760                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   2761                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   2762                     * Otherwise we convert or report the pair of bytes.
   2763                     */
   2764                    leadIsOk = static_cast<uint8_t>(mySourceChar - 0x21) <= (0x7e - 0x21);
   2765                    trailIsOk = static_cast<uint8_t>(trailByte - 0x21) <= (0x7e - 0x21);
   2766                    if (leadIsOk && trailIsOk) {
   2767                        ++mySource;
   2768                        tempBuf[0] = static_cast<char>(mySourceChar + 0x80);
   2769                        tempBuf[1] = static_cast<char>(trailByte + 0x80);
   2770                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
   2771                        mySourceChar = (mySourceChar << 8) | trailByte;
   2772                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   2773                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   2774                        ++mySource;
   2775                        /* add another bit so that the code below writes 2 bytes in case of error */
   2776                        mySourceChar = static_cast<char16_t>(0x10000 | (mySourceChar << 8) | trailByte);
   2777                    }
   2778                } else {
   2779                    args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
   2780                    args->converter->toULength = 1;
   2781                    break;
   2782                }
   2783            }
   2784            else if(mySourceChar <= 0x7f) {
   2785                targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
   2786            } else {
   2787                targetUniChar = 0xffff;
   2788            }
   2789            if(targetUniChar < 0xfffe){
   2790                if(args->offsets) {
   2791                    args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   2792                }
   2793                *(myTarget++) = static_cast<char16_t>(targetUniChar);
   2794            }
   2795            else {
   2796                /* Call the callback function*/
   2797                toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   2798                break;
   2799            }
   2800        }
   2801        else{
   2802            *err =U_BUFFER_OVERFLOW_ERROR;
   2803            break;
   2804        }
   2805    }
   2806    args->target = myTarget;
   2807    args->source = mySource;
   2808 }
   2809 
   2810 /*************************** END ISO2022-KR *********************************/
   2811 
   2812 /*************************** ISO-2022-CN *********************************
   2813 *
   2814 * Rules for ISO-2022-CN Encoding:
   2815 * i)   The designator sequence must appear once on a line before any instance
   2816 *      of character set it designates.
   2817 * ii)  If two lines contain characters from the same character set, both lines
   2818 *      must include the designator sequence.
   2819 * iii) Once the designator sequence is known, a shifting sequence has to be found
   2820 *      to invoke the  shifting
   2821 * iv)  All lines start in ASCII and end in ASCII.
   2822 * v)   Four shifting sequences are employed for this purpose:
   2823 *
   2824 *      Sequcence   ASCII Eq    Charsets
   2825 *      ----------  -------    ---------
   2826 *      SI           <SI>        US-ASCII
   2827 *      SO           <SO>        CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
   2828 *      SS2          <ESC>N      CNS-11643-1992 Plane 2
   2829 *      SS3          <ESC>O      CNS-11643-1992 Planes 3-7
   2830 *
   2831 * vi)
   2832 *      SOdesignator  : ESC "$" ")" finalchar_for_SO
   2833 *      SS2designator : ESC "$" "*" finalchar_for_SS2
   2834 *      SS3designator : ESC "$" "+" finalchar_for_SS3
   2835 *
   2836 *      ESC $ ) A       Indicates the bytes following SO are Chinese
   2837 *       characters as defined in GB 2312-80, until
   2838 *       another SOdesignation appears
   2839 *
   2840 *
   2841 *      ESC $ ) E       Indicates the bytes following SO are as defined
   2842 *       in ISO-IR-165 (for details, see section 2.1),
   2843 *       until another SOdesignation appears
   2844 *
   2845 *      ESC $ ) G       Indicates the bytes following SO are as defined
   2846 *       in CNS 11643-plane-1, until another
   2847 *       SOdesignation appears
   2848 *
   2849 *      ESC $ * H       Indicates the two bytes immediately following
   2850 *       SS2 is a Chinese character as defined in CNS
   2851 *       11643-plane-2, until another SS2designation
   2852 *       appears
   2853 *       (Meaning <ESC>N must precede every 2 byte
   2854 *        sequence.)
   2855 *
   2856 *      ESC $ + I       Indicates the immediate two bytes following SS3
   2857 *       is a Chinese character as defined in CNS
   2858 *       11643-plane-3, until another SS3designation
   2859 *       appears
   2860 *       (Meaning <ESC>O must precede every 2 byte
   2861 *        sequence.)
   2862 *
   2863 *      ESC $ + J       Indicates the immediate two bytes following SS3
   2864 *       is a Chinese character as defined in CNS
   2865 *       11643-plane-4, until another SS3designation
   2866 *       appears
   2867 *       (In English: <ESC>O must precede every 2 byte
   2868 *        sequence.)
   2869 *
   2870 *      ESC $ + K       Indicates the immediate two bytes following SS3
   2871 *       is a Chinese character as defined in CNS
   2872 *       11643-plane-5, until another SS3designation
   2873 *       appears
   2874 *
   2875 *      ESC $ + L       Indicates the immediate two bytes following SS3
   2876 *       is a Chinese character as defined in CNS
   2877 *       11643-plane-6, until another SS3designation
   2878 *       appears
   2879 *
   2880 *      ESC $ + M       Indicates the immediate two bytes following SS3
   2881 *       is a Chinese character as defined in CNS
   2882 *       11643-plane-7, until another SS3designation
   2883 *       appears
   2884 *
   2885 *       As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
   2886 *       has its own designation information before any Chinese characters
   2887 *       appear
   2888 *
   2889 */
   2890 
   2891 /* The following are defined this way to make the strings truly readonly */
   2892 static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
   2893 static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
   2894 static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
   2895 static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";
   2896 static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";
   2897 static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";
   2898 static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";
   2899 static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";
   2900 static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";
   2901 
   2902 /********************** ISO2022-CN Data **************************/
   2903 static const char* const escSeqCharsCN[10] ={
   2904        SHIFT_IN_STR,                   /* 0 ASCII */
   2905        GB_2312_80_STR,                 /* 1 GB2312_1 */
   2906        ISO_IR_165_STR,                 /* 2 ISO_IR_165 */
   2907        CNS_11643_1992_Plane_1_STR,
   2908        CNS_11643_1992_Plane_2_STR,
   2909        CNS_11643_1992_Plane_3_STR,
   2910        CNS_11643_1992_Plane_4_STR,
   2911        CNS_11643_1992_Plane_5_STR,
   2912        CNS_11643_1992_Plane_6_STR,
   2913        CNS_11643_1992_Plane_7_STR
   2914 };
   2915 
   2916 static void U_CALLCONV
   2917 UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){
   2918    UConverter *cnv = args->converter;
   2919    UConverterDataISO2022 *converterData;
   2920    ISO2022State *pFromU2022State;
   2921    uint8_t* target = reinterpret_cast<uint8_t*>(args->target);
   2922    const uint8_t* targetLimit = reinterpret_cast<const uint8_t*>(args->targetLimit);
   2923    const char16_t* source = args->source;
   2924    const char16_t* sourceLimit = args->sourceLimit;
   2925    int32_t* offsets = args->offsets;
   2926    UChar32 sourceChar;
   2927    char buffer[8];
   2928    int32_t len;
   2929    int8_t choices[3];
   2930    int32_t choiceCount;
   2931    uint32_t targetValue = 0;
   2932    UBool useFallback;
   2933 
   2934    /* set up the state */
   2935    converterData = static_cast<UConverterDataISO2022*>(cnv->extraInfo);
   2936    pFromU2022State   = &converterData->fromU2022State;
   2937 
   2938    choiceCount = 0;
   2939 
   2940    /* check if the last codepoint of previous buffer was a lead surrogate*/
   2941    if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {
   2942        goto getTrail;
   2943    }
   2944 
   2945    while( source < sourceLimit){
   2946        if(target < targetLimit){
   2947 
   2948            sourceChar  = *(source++);
   2949            /*check if the char is a First surrogate*/
   2950             if(U16_IS_SURROGATE(sourceChar)) {
   2951                if(U16_IS_SURROGATE_LEAD(sourceChar)) {
   2952 getTrail:
   2953                    /*look ahead to find the trail surrogate*/
   2954                    if(source < sourceLimit) {
   2955                        /* test the following code unit */
   2956                        char16_t trail = *source;
   2957                        if(U16_IS_TRAIL(trail)) {
   2958                            source++;
   2959                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   2960                            cnv->fromUChar32=0x00;
   2961                            /* convert this supplementary code point */
   2962                            /* exit this condition tree */
   2963                        } else {
   2964                            /* this is an unmatched lead code unit (1st surrogate) */
   2965                            /* callback(illegal) */
   2966                            *err=U_ILLEGAL_CHAR_FOUND;
   2967                            cnv->fromUChar32=sourceChar;
   2968                            break;
   2969                        }
   2970                    } else {
   2971                        /* no more input */
   2972                        cnv->fromUChar32=sourceChar;
   2973                        break;
   2974                    }
   2975                } else {
   2976                    /* this is an unmatched trail code unit (2nd surrogate) */
   2977                    /* callback(illegal) */
   2978                    *err=U_ILLEGAL_CHAR_FOUND;
   2979                    cnv->fromUChar32=sourceChar;
   2980                    break;
   2981                }
   2982            }
   2983 
   2984            /* do the conversion */
   2985            if(sourceChar <= 0x007f ){
   2986                /* do not convert SO/SI/ESC */
   2987                if(IS_2022_CONTROL(sourceChar)) {
   2988                    /* callback(illegal) */
   2989                    *err=U_ILLEGAL_CHAR_FOUND;
   2990                    cnv->fromUChar32=sourceChar;
   2991                    break;
   2992                }
   2993 
   2994                /* US-ASCII */
   2995                if(pFromU2022State->g == 0) {
   2996                    buffer[0] = static_cast<char>(sourceChar);
   2997                    len = 1;
   2998                } else {
   2999                    buffer[0] = UCNV_SI;
   3000                    buffer[1] = static_cast<char>(sourceChar);
   3001                    len = 2;
   3002                    pFromU2022State->g = 0;
   3003                    choiceCount = 0;
   3004                }
   3005                if(sourceChar == CR || sourceChar == LF) {
   3006                    /* reset the state at the end of a line */
   3007                    uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));
   3008                    choiceCount = 0;
   3009                }
   3010            }
   3011            else{
   3012                /* convert U+0080..U+10ffff */
   3013                int32_t i;
   3014                int8_t cs, g;
   3015 
   3016                if(choiceCount == 0) {
   3017                    /* try the current SO/G1 converter first */
   3018                    choices[0] = pFromU2022State->cs[1];
   3019 
   3020                    /* default to GB2312_1 if none is designated yet */
   3021                    if(choices[0] == 0) {
   3022                        choices[0] = GB2312_1;
   3023                    }
   3024 
   3025                    if(converterData->version == 0) {
   3026                        /* ISO-2022-CN */
   3027 
   3028                        /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */
   3029                        if(choices[0] == GB2312_1) {
   3030                            choices[1] = static_cast<int8_t>(CNS_11643_1);
   3031                        } else {
   3032                            choices[1] = static_cast<int8_t>(GB2312_1);
   3033                        }
   3034 
   3035                        choiceCount = 2;
   3036                    } else if (converterData->version == 1) {
   3037                        /* ISO-2022-CN-EXT */
   3038 
   3039                        /* try one of the other converters */
   3040                        switch(choices[0]) {
   3041                        case GB2312_1:
   3042                            choices[1] = static_cast<int8_t>(CNS_11643_1);
   3043                            choices[2] = static_cast<int8_t>(ISO_IR_165);
   3044                            break;
   3045                        case ISO_IR_165:
   3046                            choices[1] = static_cast<int8_t>(GB2312_1);
   3047                            choices[2] = static_cast<int8_t>(CNS_11643_1);
   3048                            break;
   3049                        default: /* CNS_11643_x */
   3050                            choices[1] = static_cast<int8_t>(GB2312_1);
   3051                            choices[2] = static_cast<int8_t>(ISO_IR_165);
   3052                            break;
   3053                        }
   3054 
   3055                        choiceCount = 3;
   3056                    } else {
   3057                        choices[0] = static_cast<int8_t>(CNS_11643_1);
   3058                        choices[1] = static_cast<int8_t>(GB2312_1);
   3059                    }
   3060                }
   3061 
   3062                cs = g = 0;
   3063                /*
   3064                 * len==0: no mapping found yet
   3065                 * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
   3066                 * len>0: found a roundtrip result, done
   3067                 */
   3068                len = 0;
   3069                /*
   3070                 * We will turn off useFallback after finding a fallback,
   3071                 * but we still get fallbacks from PUA code points as usual.
   3072                 * Therefore, we will also need to check that we don't overwrite
   3073                 * an early fallback with a later one.
   3074                 */
   3075                useFallback = cnv->useFallback;
   3076 
   3077                for(i = 0; i < choiceCount && len <= 0; ++i) {
   3078                    int8_t cs0 = choices[i];
   3079                    if(cs0 > 0) {
   3080                        uint32_t value;
   3081                        int32_t len2;
   3082                        if(cs0 >= CNS_11643_0) {
   3083                            len2 = MBCS_FROM_UCHAR32_ISO2022(
   3084                                        converterData->myConverterArray[CNS_11643],
   3085                                        sourceChar,
   3086                                        &value,
   3087                                        useFallback,
   3088                                        MBCS_OUTPUT_3);
   3089                            if(len2 == 3 || (len2 == -3 && len == 0)) {
   3090                                targetValue = value;
   3091                                cs = static_cast<int8_t>(CNS_11643_0 + (value >> 16) - 0x80);
   3092                                if(len2 >= 0) {
   3093                                    len = 2;
   3094                                } else {
   3095                                    len = -2;
   3096                                    useFallback = false;
   3097                                }
   3098                                if(cs == CNS_11643_1) {
   3099                                    g = 1;
   3100                                } else if(cs == CNS_11643_2) {
   3101                                    g = 2;
   3102                                } else /* plane 3..7 */ if(converterData->version == 1) {
   3103                                    g = 3;
   3104                                } else {
   3105                                    /* ISO-2022-CN (without -EXT) does not support plane 3..7 */
   3106                                    len = 0;
   3107                                }
   3108                            }
   3109                        } else {
   3110                            /* GB2312_1 or ISO-IR-165 */
   3111                            U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS);
   3112                            len2 = MBCS_FROM_UCHAR32_ISO2022(
   3113                                        converterData->myConverterArray[cs0],
   3114                                        sourceChar,
   3115                                        &value,
   3116                                        useFallback,
   3117                                        MBCS_OUTPUT_2);
   3118                            if(len2 == 2 || (len2 == -2 && len == 0)) {
   3119                                targetValue = value;
   3120                                len = len2;
   3121                                cs = cs0;
   3122                                g = 1;
   3123                                useFallback = false;
   3124                            }
   3125                        }
   3126                    }
   3127                }
   3128 
   3129                if(len != 0) {
   3130                    len = 0; /* count output bytes; it must have been abs(len) == 2 */
   3131 
   3132                    /* write the designation sequence if necessary */
   3133                    if(cs != pFromU2022State->cs[g]) {
   3134                        if(cs < CNS_11643) {
   3135                            uprv_memcpy(buffer, escSeqCharsCN[cs], 4);
   3136                        } else {
   3137                            U_ASSERT(cs >= CNS_11643_1);
   3138                            uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);
   3139                        }
   3140                        len = 4;
   3141                        pFromU2022State->cs[g] = cs;
   3142                        if(g == 1) {
   3143                            /* changing the SO/G1 charset invalidates the choices[] */
   3144                            choiceCount = 0;
   3145                        }
   3146                    }
   3147 
   3148                    /* write the shift sequence if necessary */
   3149                    if(g != pFromU2022State->g) {
   3150                        switch(g) {
   3151                        case 1:
   3152                            buffer[len++] = UCNV_SO;
   3153 
   3154                            /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
   3155                            pFromU2022State->g = 1;
   3156                            break;
   3157                        case 2:
   3158                            buffer[len++] = 0x1b;
   3159                            buffer[len++] = 0x4e;
   3160                            break;
   3161                        default: /* case 3 */
   3162                            buffer[len++] = 0x1b;
   3163                            buffer[len++] = 0x4f;
   3164                            break;
   3165                        }
   3166                    }
   3167 
   3168                    /* write the two output bytes */
   3169                    buffer[len++] = static_cast<char>(targetValue >> 8);
   3170                    buffer[len++] = static_cast<char>(targetValue);
   3171                } else {
   3172                    /* if we cannot find the character after checking all codepages
   3173                     * then this is an error
   3174                     */
   3175                    *err = U_INVALID_CHAR_FOUND;
   3176                    cnv->fromUChar32=sourceChar;
   3177                    break;
   3178                }
   3179            }
   3180 
   3181            /* output len>0 bytes in buffer[] */
   3182            if(len == 1) {
   3183                *target++ = buffer[0];
   3184                if(offsets) {
   3185                    *offsets++ = static_cast<int32_t>(source - args->source - 1); /* -1: known to be ASCII */
   3186                }
   3187            } else if(len == 2 && (target + 2) <= targetLimit) {
   3188                *target++ = buffer[0];
   3189                *target++ = buffer[1];
   3190                if(offsets) {
   3191                    int32_t sourceIndex = static_cast<int32_t>(source - args->source - U16_LENGTH(sourceChar));
   3192                    *offsets++ = sourceIndex;
   3193                    *offsets++ = sourceIndex;
   3194                }
   3195            } else {
   3196                fromUWriteUInt8(
   3197                    cnv,
   3198                    buffer, len,
   3199                    &target, reinterpret_cast<const char*>(targetLimit),
   3200                    &offsets, static_cast<int32_t>(source - args->source - U16_LENGTH(sourceChar)),
   3201                    err);
   3202                if(U_FAILURE(*err)) {
   3203                    break;
   3204                }
   3205            }
   3206        } /* end if(myTargetIndex<myTargetLength) */
   3207        else{
   3208            *err =U_BUFFER_OVERFLOW_ERROR;
   3209            break;
   3210        }
   3211 
   3212    }/* end while(mySourceIndex<mySourceLength) */
   3213 
   3214    /*
   3215     * the end of the input stream and detection of truncated input
   3216     * are handled by the framework, but for ISO-2022-CN conversion
   3217     * we need to be in ASCII mode at the very end
   3218     *
   3219     * conditions:
   3220     *   successful
   3221     *   not in ASCII mode
   3222     *   end of input and no truncated input
   3223     */
   3224    if( U_SUCCESS(*err) &&
   3225        pFromU2022State->g!=0 &&
   3226        args->flush && source>=sourceLimit && cnv->fromUChar32==0
   3227    ) {
   3228        int32_t sourceIndex;
   3229 
   3230        /* we are switching to ASCII */
   3231        pFromU2022State->g=0;
   3232 
   3233        /* get the source index of the last input character */
   3234        /*
   3235         * TODO this would be simpler and more reliable if we used a pair
   3236         * of sourceIndex/prevSourceIndex like in ucnvmbcs.c
   3237         * so that we could simply use the prevSourceIndex here;
   3238         * this code gives an incorrect result for the rare case of an unmatched
   3239         * trail surrogate that is alone in the last buffer of the text stream
   3240         */
   3241        sourceIndex = static_cast<int32_t>(source - args->source);
   3242        if(sourceIndex>0) {
   3243            --sourceIndex;
   3244            if( U16_IS_TRAIL(args->source[sourceIndex]) &&
   3245                (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
   3246            ) {
   3247                --sourceIndex;
   3248            }
   3249        } else {
   3250            sourceIndex=-1;
   3251        }
   3252 
   3253        fromUWriteUInt8(
   3254            cnv,
   3255            SHIFT_IN_STR, 1,
   3256            &target, reinterpret_cast<const char*>(targetLimit),
   3257            &offsets, sourceIndex,
   3258            err);
   3259    }
   3260 
   3261    /*save the state and return */
   3262    args->source = source;
   3263    args->target = reinterpret_cast<char*>(target);
   3264 }
   3265 
   3266 
   3267 static void U_CALLCONV
   3268 UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
   3269                                               UErrorCode* err){
   3270    char tempBuf[3];
   3271    const char* mySource = const_cast<char*>(args->source);
   3272    char16_t *myTarget = args->target;
   3273    const char *mySourceLimit = args->sourceLimit;
   3274    uint32_t targetUniChar = 0x0000;
   3275    uint32_t mySourceChar = 0x0000;
   3276    UConverterDataISO2022* myData;
   3277    ISO2022State *pToU2022State;
   3278 
   3279    myData = static_cast<UConverterDataISO2022*>(args->converter->extraInfo);
   3280    pToU2022State = &myData->toU2022State;
   3281 
   3282    if(myData->key != 0) {
   3283        /* continue with a partial escape sequence */
   3284        goto escape;
   3285    } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {
   3286        /* continue with a partial double-byte character */
   3287        mySourceChar = args->converter->toUBytes[0];
   3288        args->converter->toULength = 0;
   3289        targetUniChar = missingCharMarker;
   3290        goto getTrailByte;
   3291    }
   3292 
   3293    while(mySource < mySourceLimit){
   3294 
   3295        targetUniChar =missingCharMarker;
   3296 
   3297        if(myTarget < args->targetLimit){
   3298 
   3299            mySourceChar = static_cast<unsigned char>(*mySource++);
   3300 
   3301            switch(mySourceChar){
   3302            case UCNV_SI:
   3303                pToU2022State->g=0;
   3304                if (myData->isEmptySegment) {
   3305                    myData->isEmptySegment = false;	/* we are handling it, reset to avoid future spurious errors */
   3306                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3307                    args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3308                    args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
   3309                    args->converter->toULength = 1;
   3310                    args->target = myTarget;
   3311                    args->source = mySource;
   3312                    return;
   3313                }
   3314                continue;
   3315 
   3316            case UCNV_SO:
   3317                if(pToU2022State->cs[1] != 0) {
   3318                    pToU2022State->g=1;
   3319                    myData->isEmptySegment = true;	/* Begin a new segment, empty so far */
   3320                    continue;
   3321                } else {
   3322                    /* illegal to have SO before a matching designator */
   3323                    myData->isEmptySegment = false;	/* Handling a different error, reset this to avoid future spurious errs */
   3324                    break;
   3325                }
   3326 
   3327            case ESC_2022:
   3328                mySource--;
   3329 escape:
   3330                {
   3331                    const char * mySourceBefore = mySource;
   3332                    int8_t toULengthBefore = args->converter->toULength;
   3333 
   3334                    changeState_2022(args->converter,&(mySource),
   3335                        mySourceLimit, ISO_2022_CN,err);
   3336 
   3337                    /* After SO there must be at least one character before a designator (designator error handled separately) */
   3338                    if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
   3339                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
   3340                        args->converter->toUCallbackReason = UCNV_IRREGULAR;
   3341                        args->converter->toULength = static_cast<int8_t>(toULengthBefore + (mySource - mySourceBefore));
   3342                    }
   3343                }
   3344 
   3345                /* invalid or illegal escape sequence */
   3346                if(U_FAILURE(*err)){
   3347                    args->target = myTarget;
   3348                    args->source = mySource;
   3349                    myData->isEmptySegment = false;	/* Reset to avoid future spurious errors */
   3350                    return;
   3351                }
   3352                continue;
   3353 
   3354            /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
   3355 
   3356            case CR:
   3357            case LF:
   3358                uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
   3359                U_FALLTHROUGH;
   3360            default:
   3361                /* convert one or two bytes */
   3362                myData->isEmptySegment = false;
   3363                if(pToU2022State->g != 0) {
   3364                    if(mySource < mySourceLimit) {
   3365                        UConverterSharedData *cnv;
   3366                        StateEnum tempState;
   3367                        int32_t tempBufLen;
   3368                        int leadIsOk, trailIsOk;
   3369                        uint8_t trailByte;
   3370 getTrailByte:
   3371                        trailByte = static_cast<uint8_t>(*mySource);
   3372                        /*
   3373                         * Ticket 5691: consistent illegal sequences:
   3374                         * - We include at least the first byte in the illegal sequence.
   3375                         * - If any of the non-initial bytes could be the start of a character,
   3376                         *   we stop the illegal sequence before the first one of those.
   3377                         *
   3378                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
   3379                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
   3380                         * Otherwise we convert or report the pair of bytes.
   3381                         */
   3382                        leadIsOk = static_cast<uint8_t>(mySourceChar - 0x21) <= (0x7e - 0x21);
   3383                        trailIsOk = static_cast<uint8_t>(trailByte - 0x21) <= (0x7e - 0x21);
   3384                        if (leadIsOk && trailIsOk) {
   3385                            ++mySource;
   3386                            tempState = static_cast<StateEnum>(pToU2022State->cs[pToU2022State->g]);
   3387                            if(tempState >= CNS_11643_0) {
   3388                                cnv = myData->myConverterArray[CNS_11643];
   3389                                tempBuf[0] = static_cast<char>(0x80 + (tempState - CNS_11643_0));
   3390                                tempBuf[1] = static_cast<char>(mySourceChar);
   3391                                tempBuf[2] = static_cast<char>(trailByte);
   3392                                tempBufLen = 3;
   3393 
   3394                            }else{
   3395                                U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS);
   3396                                cnv = myData->myConverterArray[tempState];
   3397                                tempBuf[0] = static_cast<char>(mySourceChar);
   3398                                tempBuf[1] = static_cast<char>(trailByte);
   3399                                tempBufLen = 2;
   3400                            }
   3401                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, false);
   3402                            mySourceChar = (mySourceChar << 8) | trailByte;
   3403                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
   3404                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
   3405                            ++mySource;
   3406                            /* add another bit so that the code below writes 2 bytes in case of error */
   3407                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
   3408                        }
   3409                        if(pToU2022State->g>=2) {
   3410                            /* return from a single-shift state to the previous one */
   3411                            pToU2022State->g=pToU2022State->prevG;
   3412                        }
   3413                    } else {
   3414                        args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar);
   3415                        args->converter->toULength = 1;
   3416                        goto endloop;
   3417                    }
   3418                }
   3419                else{
   3420                    if(mySourceChar <= 0x7f) {
   3421                        targetUniChar = static_cast<char16_t>(mySourceChar);
   3422                    }
   3423                }
   3424                break;
   3425            }
   3426            if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){
   3427                if(args->offsets){
   3428                    args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3429                }
   3430                *(myTarget++) = static_cast<char16_t>(targetUniChar);
   3431            }
   3432            else if(targetUniChar > missingCharMarker){
   3433                /* disassemble the surrogate pair and write to output*/
   3434                targetUniChar-=0x0010000;
   3435                *myTarget = static_cast<char16_t>(0xd800 + static_cast<char16_t>(targetUniChar >> 10));
   3436                if(args->offsets){
   3437                    args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3438                }
   3439                ++myTarget;
   3440                if(myTarget< args->targetLimit){
   3441                    *myTarget = static_cast<char16_t>(0xdc00 + static_cast<char16_t>(targetUniChar & 0x3ff));
   3442                    if(args->offsets){
   3443                        args->offsets[myTarget - args->target] = static_cast<int32_t>(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));
   3444                    }
   3445                    ++myTarget;
   3446                }else{
   3447                    args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
   3448                                    static_cast<char16_t>(0xdc00 + static_cast<char16_t>(targetUniChar & 0x3ff));
   3449                }
   3450 
   3451            }
   3452            else{
   3453                /* Call the callback function*/
   3454                toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);
   3455                break;
   3456            }
   3457        }
   3458        else{
   3459            *err =U_BUFFER_OVERFLOW_ERROR;
   3460            break;
   3461        }
   3462    }
   3463 endloop:
   3464    args->target = myTarget;
   3465    args->source = mySource;
   3466 }
   3467 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
   3468 
   3469 static void U_CALLCONV
   3470 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
   3471    UConverter *cnv = args->converter;
   3472    UConverterDataISO2022* myConverterData = static_cast<UConverterDataISO2022*>(cnv->extraInfo);
   3473    ISO2022State *pFromU2022State=&myConverterData->fromU2022State;
   3474    char *p, *subchar;
   3475    char buffer[8];
   3476    int32_t length;
   3477 
   3478    subchar = reinterpret_cast<char*>(cnv->subChars);
   3479    length=cnv->subCharLen; /* assume length==1 for most variants */
   3480 
   3481    p = buffer;
   3482    switch(myConverterData->locale[0]){
   3483    case 'j':
   3484        {
   3485            int8_t cs;
   3486 
   3487            if(pFromU2022State->g == 1) {
   3488                /* JIS7: switch from G1 to G0 */
   3489                pFromU2022State->g = 0;
   3490                *p++ = UCNV_SI;
   3491            }
   3492 
   3493            cs = pFromU2022State->cs[0];
   3494            if(cs != ASCII && cs != JISX201) {
   3495                /* not in ASCII or JIS X 0201: switch to ASCII */
   3496                pFromU2022State->cs[0] = static_cast<int8_t>(ASCII);
   3497                *p++ = '\x1b';
   3498                *p++ = '\x28';
   3499                *p++ = '\x42';
   3500            }
   3501 
   3502            *p++ = subchar[0];
   3503            break;
   3504        }
   3505    case 'c':
   3506        if(pFromU2022State->g != 0) {
   3507            /* not in ASCII mode: switch to ASCII */
   3508            pFromU2022State->g = 0;
   3509            *p++ = UCNV_SI;
   3510        }
   3511        *p++ = subchar[0];
   3512        break;
   3513    case 'k':
   3514        if(myConverterData->version == 0) {
   3515            if(length == 1) {
   3516                if(args->converter->fromUnicodeStatus) {
   3517                    /* in DBCS mode: switch to SBCS */
   3518                    args->converter->fromUnicodeStatus = 0;
   3519                    *p++ = UCNV_SI;
   3520                }
   3521                *p++ = subchar[0];
   3522            } else /* length == 2*/ {
   3523                if(!args->converter->fromUnicodeStatus) {
   3524                    /* in SBCS mode: switch to DBCS */
   3525                    args->converter->fromUnicodeStatus = 1;
   3526                    *p++ = UCNV_SO;
   3527                }
   3528                *p++ = subchar[0];
   3529                *p++ = subchar[1];
   3530            }
   3531            break;
   3532        } else {
   3533            /* save the subconverter's substitution string */
   3534            uint8_t *currentSubChars = myConverterData->currentConverter->subChars;
   3535            int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;
   3536 
   3537            /* set our substitution string into the subconverter */
   3538            myConverterData->currentConverter->subChars = reinterpret_cast<uint8_t*>(subchar);
   3539            myConverterData->currentConverter->subCharLen = static_cast<int8_t>(length);
   3540 
   3541            /* let the subconverter write the subchar, set/retrieve fromUChar32 state */
   3542            args->converter = myConverterData->currentConverter;
   3543            myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;
   3544            ucnv_cbFromUWriteSub(args, 0, err);
   3545            cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;
   3546            args->converter = cnv;
   3547 
   3548            /* restore the subconverter's substitution string */
   3549            myConverterData->currentConverter->subChars = currentSubChars;
   3550            myConverterData->currentConverter->subCharLen = currentSubCharLen;
   3551 
   3552            if(*err == U_BUFFER_OVERFLOW_ERROR) {
   3553                if(myConverterData->currentConverter->charErrorBufferLength > 0) {
   3554                    uprv_memcpy(
   3555                        cnv->charErrorBuffer,
   3556                        myConverterData->currentConverter->charErrorBuffer,
   3557                        myConverterData->currentConverter->charErrorBufferLength);
   3558                }
   3559                cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;
   3560                myConverterData->currentConverter->charErrorBufferLength = 0;
   3561            }
   3562            return;
   3563        }
   3564    default:
   3565        /* not expected */
   3566        break;
   3567    }
   3568    ucnv_cbFromUWriteBytes(args,
   3569                           buffer, static_cast<int32_t>(p - buffer),
   3570                           offsetIndex, err);
   3571 }
   3572 
   3573 /*
   3574 * Structure for cloning an ISO 2022 converter into a single memory block.
   3575 */
   3576 struct cloneStruct
   3577 {
   3578    UConverter cnv;
   3579    UConverter currentConverter;
   3580    UConverterDataISO2022 mydata;
   3581 };
   3582 
   3583 
   3584 U_CDECL_BEGIN
   3585 
   3586 static UConverter * U_CALLCONV
   3587 _ISO_2022_SafeClone(
   3588            const UConverter *cnv,
   3589            void *stackBuffer,
   3590            int32_t *pBufferSize,
   3591            UErrorCode *status)
   3592 {
   3593    struct cloneStruct * localClone;
   3594    UConverterDataISO2022 *cnvData;
   3595    int32_t i, size;
   3596 
   3597    if (U_FAILURE(*status)){
   3598        return nullptr;
   3599    }
   3600 
   3601    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   3602        *pBufferSize = (int32_t)sizeof(struct cloneStruct);
   3603        return nullptr;
   3604    }
   3605 
   3606    cnvData = (UConverterDataISO2022 *)cnv->extraInfo;
   3607    localClone = (struct cloneStruct *)stackBuffer;
   3608 
   3609    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   3610 
   3611    uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));
   3612    localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */
   3613    localClone->cnv.isExtraLocal = true;
   3614 
   3615    /* share the subconverters */
   3616 
   3617    if(cnvData->currentConverter != nullptr) {
   3618        size = (int32_t)sizeof(UConverter);
   3619        localClone->mydata.currentConverter =
   3620            ucnv_safeClone(cnvData->currentConverter,
   3621                            &localClone->currentConverter,
   3622                            &size, status);
   3623        if(U_FAILURE(*status)) {
   3624            return nullptr;
   3625        }
   3626    }
   3627 
   3628    for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {
   3629        if(cnvData->myConverterArray[i] != nullptr) {
   3630            ucnv_incrementRefCount(cnvData->myConverterArray[i]);
   3631        }
   3632    }
   3633 
   3634    return &localClone->cnv;
   3635 }
   3636 
   3637 U_CDECL_END
   3638 
   3639 static void U_CALLCONV
   3640 _ISO_2022_GetUnicodeSet(const UConverter *cnv,
   3641                    const USetAdder *sa,
   3642                    UConverterUnicodeSet which,
   3643                    UErrorCode *pErrorCode)
   3644 {
   3645    int32_t i;
   3646    UConverterDataISO2022* cnvData;
   3647 
   3648    if (U_FAILURE(*pErrorCode)) {
   3649        return;
   3650    }
   3651 #ifdef U_ENABLE_GENERIC_ISO_2022
   3652    if (cnv->sharedData == &_ISO2022Data) {
   3653        /* We use UTF-8 in this case */
   3654        sa->addRange(sa->set, 0, 0xd7FF);
   3655        sa->addRange(sa->set, 0xE000, 0x10FFFF);
   3656        return;
   3657    }
   3658 #endif
   3659 
   3660    cnvData = static_cast<UConverterDataISO2022*>(cnv->extraInfo);
   3661 
   3662    /* open a set and initialize it with code points that are algorithmically round-tripped */
   3663    switch(cnvData->locale[0]){
   3664    case 'j':
   3665        /* include JIS X 0201 which is hardcoded */
   3666        sa->add(sa->set, 0xa5);
   3667        sa->add(sa->set, 0x203e);
   3668        if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
   3669            /* include Latin-1 for some variants of JP */
   3670            sa->addRange(sa->set, 0, 0xff);
   3671        } else {
   3672            /* include ASCII for JP */
   3673            sa->addRange(sa->set, 0, 0x7f);
   3674        }
   3675        if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
   3676            /*
   3677             * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
   3678             * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
   3679             * use half-width Katakana.
   3680             * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
   3681             * half-width Katakana via the ESC ( I sequence.
   3682             * However, we only emit (fromUnicode) half-width Katakana according to the
   3683             * definition of each variant.
   3684             *
   3685             * When including fallbacks,
   3686             * we need to include half-width Katakana Unicode code points for all JP variants because
   3687             * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana).
   3688             */
   3689            /* include half-width Katakana for JP */
   3690            sa->addRange(sa->set, HWKANA_START, HWKANA_END);
   3691        }
   3692        break;
   3693 #if !UCONFIG_ONLY_HTML_CONVERSION
   3694    case 'c':
   3695    case 'z':
   3696        /* include ASCII for CN */
   3697        sa->addRange(sa->set, 0, 0x7f);
   3698        break;
   3699    case 'k':
   3700        /* there is only one converter for KR, and it is not in the myConverterArray[] */
   3701        cnvData->currentConverter->sharedData->impl->getUnicodeSet(
   3702                cnvData->currentConverter, sa, which, pErrorCode);
   3703        /* the loop over myConverterArray[] will simply not find another converter */
   3704        break;
   3705 #endif
   3706    default:
   3707        break;
   3708    }
   3709 
   3710 #if 0  /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
   3711            if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3712                cnvData->version==0 && i==CNS_11643
   3713            ) {
   3714                /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */
   3715                ucnv_MBCSGetUnicodeSetForBytes(
   3716                        cnvData->myConverterArray[i],
   3717                        sa, UCNV_ROUNDTRIP_SET,
   3718                        0, 0x81, 0x82,
   3719                        pErrorCode);
   3720            }
   3721 #endif
   3722 
   3723    for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
   3724        UConverterSetFilter filter;
   3725        if(cnvData->myConverterArray[i]!=nullptr) {
   3726            if(cnvData->locale[0]=='j' && i==JISX208) {
   3727                /*
   3728                 * Only add code points that map to Shift-JIS codes
   3729                 * corresponding to JIS X 0208.
   3730                 */
   3731                filter=UCNV_SET_FILTER_SJIS;
   3732 #if !UCONFIG_ONLY_HTML_CONVERSION
   3733            } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
   3734                       cnvData->version==0 && i==CNS_11643) {
   3735                /*
   3736                 * Version-specific for CN:
   3737                 * CN version 0 does not map CNS planes 3..7 although
   3738                 * they are all available in the CNS conversion table;
   3739                 * CN version 1 (-EXT) does map them all.
   3740                 * The two versions create different Unicode sets.
   3741                 */
   3742                filter=UCNV_SET_FILTER_2022_CN;
   3743            } else if(i==KSC5601) {
   3744                /*
   3745                 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
   3746                 * are broader than GR94.
   3747                 */
   3748                filter=UCNV_SET_FILTER_GR94DBCS;
   3749 #endif
   3750            } else {
   3751                filter=UCNV_SET_FILTER_NONE;
   3752            }
   3753            ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
   3754        }
   3755    }
   3756 
   3757    /*
   3758     * ISO 2022 converters must not convert SO/SI/ESC despite what
   3759     * sub-converters do by themselves.
   3760     * Remove these characters from the set.
   3761     */
   3762    sa->remove(sa->set, 0x0e);
   3763    sa->remove(sa->set, 0x0f);
   3764    sa->remove(sa->set, 0x1b);
   3765 
   3766    /* ISO 2022 converters do not convert C1 controls either */
   3767    sa->removeRange(sa->set, 0x80, 0x9f);
   3768 }
   3769 
   3770 static const UConverterImpl _ISO2022Impl={
   3771    UCNV_ISO_2022,
   3772 
   3773    nullptr,
   3774    nullptr,
   3775 
   3776    _ISO2022Open,
   3777    _ISO2022Close,
   3778    _ISO2022Reset,
   3779 
   3780 #ifdef U_ENABLE_GENERIC_ISO_2022
   3781    T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3782    T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
   3783    ucnv_fromUnicode_UTF8,
   3784    ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,
   3785 #else
   3786    nullptr,
   3787    nullptr,
   3788    nullptr,
   3789    nullptr,
   3790 #endif
   3791    nullptr,
   3792 
   3793    nullptr,
   3794    _ISO2022getName,
   3795    _ISO_2022_WriteSub,
   3796    _ISO_2022_SafeClone,
   3797    _ISO_2022_GetUnicodeSet,
   3798 
   3799    nullptr,
   3800    nullptr
   3801 };
   3802 static const UConverterStaticData _ISO2022StaticData={
   3803    sizeof(UConverterStaticData),
   3804    "ISO_2022",
   3805    2022,
   3806    UCNV_IBM,
   3807    UCNV_ISO_2022,
   3808    1,
   3809    3, /* max 3 bytes per char16_t from UTF-8 (4 bytes from surrogate _pair_) */
   3810    { 0x1a, 0, 0, 0 },
   3811    1,
   3812    false,
   3813    false,
   3814    0,
   3815    0,
   3816    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3817 };
   3818 const UConverterSharedData _ISO2022Data=
   3819        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl);
   3820 
   3821 /*************JP****************/
   3822 static const UConverterImpl _ISO2022JPImpl={
   3823    UCNV_ISO_2022,
   3824 
   3825    nullptr,
   3826    nullptr,
   3827 
   3828    _ISO2022Open,
   3829    _ISO2022Close,
   3830    _ISO2022Reset,
   3831 
   3832    UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3833    UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3834    UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3835    UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,
   3836    nullptr,
   3837 
   3838    nullptr,
   3839    _ISO2022getName,
   3840    _ISO_2022_WriteSub,
   3841    _ISO_2022_SafeClone,
   3842    _ISO_2022_GetUnicodeSet,
   3843 
   3844    nullptr,
   3845    nullptr
   3846 };
   3847 static const UConverterStaticData _ISO2022JPStaticData={
   3848    sizeof(UConverterStaticData),
   3849    "ISO_2022_JP",
   3850    0,
   3851    UCNV_IBM,
   3852    UCNV_ISO_2022,
   3853    1,
   3854    6, /* max 6 bytes per char16_t: 4-byte escape sequence + DBCS */
   3855    { 0x1a, 0, 0, 0 },
   3856    1,
   3857    false,
   3858    false,
   3859    0,
   3860    0,
   3861    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3862 };
   3863 
   3864 namespace {
   3865 
   3866 const UConverterSharedData _ISO2022JPData=
   3867        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl);
   3868 
   3869 }  // namespace
   3870 
   3871 #if !UCONFIG_ONLY_HTML_CONVERSION
   3872 /************* KR ***************/
   3873 static const UConverterImpl _ISO2022KRImpl={
   3874    UCNV_ISO_2022,
   3875 
   3876    nullptr,
   3877    nullptr,
   3878 
   3879    _ISO2022Open,
   3880    _ISO2022Close,
   3881    _ISO2022Reset,
   3882 
   3883    UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3884    UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3885    UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3886    UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,
   3887    nullptr,
   3888 
   3889    nullptr,
   3890    _ISO2022getName,
   3891    _ISO_2022_WriteSub,
   3892    _ISO_2022_SafeClone,
   3893    _ISO_2022_GetUnicodeSet,
   3894 
   3895    nullptr,
   3896    nullptr
   3897 };
   3898 static const UConverterStaticData _ISO2022KRStaticData={
   3899    sizeof(UConverterStaticData),
   3900    "ISO_2022_KR",
   3901    0,
   3902    UCNV_IBM,
   3903    UCNV_ISO_2022,
   3904    1,
   3905    8, /* max 8 bytes per char16_t */
   3906    { 0x1a, 0, 0, 0 },
   3907    1,
   3908    false,
   3909    false,
   3910    0,
   3911    0,
   3912    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3913 };
   3914 
   3915 namespace {
   3916 
   3917 const UConverterSharedData _ISO2022KRData=
   3918        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl);
   3919 
   3920 }  // namespace
   3921 
   3922 /*************** CN ***************/
   3923 static const UConverterImpl _ISO2022CNImpl={
   3924 
   3925    UCNV_ISO_2022,
   3926 
   3927    nullptr,
   3928    nullptr,
   3929 
   3930    _ISO2022Open,
   3931    _ISO2022Close,
   3932    _ISO2022Reset,
   3933 
   3934    UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3935    UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3936    UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3937    UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,
   3938    nullptr,
   3939 
   3940    nullptr,
   3941    _ISO2022getName,
   3942    _ISO_2022_WriteSub,
   3943    _ISO_2022_SafeClone,
   3944    _ISO_2022_GetUnicodeSet,
   3945 
   3946    nullptr,
   3947    nullptr
   3948 };
   3949 static const UConverterStaticData _ISO2022CNStaticData={
   3950    sizeof(UConverterStaticData),
   3951    "ISO_2022_CN",
   3952    0,
   3953    UCNV_IBM,
   3954    UCNV_ISO_2022,
   3955    1,
   3956    8, /* max 8 bytes per char16_t: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */
   3957    { 0x1a, 0, 0, 0 },
   3958    1,
   3959    false,
   3960    false,
   3961    0,
   3962    0,
   3963    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
   3964 };
   3965 
   3966 namespace {
   3967 
   3968 const UConverterSharedData _ISO2022CNData=
   3969        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
   3970 
   3971 }  // namespace
   3972 #endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
   3973 
   3974 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE