tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucnvisci.cpp (72789B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2000-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *   file name:  ucnvisci.c
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2001JUN26
     14 *   created by: Ram Viswanadha
     15 *
     16 *   Date        Name        Description
     17 *   24/7/2001   Ram         Added support for EXT character handling
     18 */
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
     23 
     24 #include "unicode/ucnv.h"
     25 #include "unicode/ucnv_cb.h"
     26 #include "unicode/utf16.h"
     27 #include "cmemory.h"
     28 #include "ucnv_bld.h"
     29 #include "ucnv_cnv.h"
     30 #include "cstring.h"
     31 #include "uassert.h"
     32 
     33 #define UCNV_OPTIONS_VERSION_MASK 0xf
     34 #define NUKTA               0x093c
     35 #define HALANT              0x094d
     36 #define ZWNJ                0x200c /* Zero Width Non Joiner */
     37 #define ZWJ                 0x200d /* Zero width Joiner */
     38 #define INVALID_CHAR        0xffff
     39 #define ATR                 0xEF   /* Attribute code */
     40 #define EXT                 0xF0   /* Extension code */
     41 #define DANDA               0x0964
     42 #define DOUBLE_DANDA        0x0965
     43 #define ISCII_NUKTA         0xE9
     44 #define ISCII_HALANT        0xE8
     45 #define ISCII_DANDA         0xEA
     46 #define ISCII_INV           0xD9
     47 #define ISCII_VOWEL_SIGN_E  0xE0
     48 #define INDIC_BLOCK_BEGIN   0x0900
     49 #define INDIC_BLOCK_END     0x0D7F
     50 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
     51 #define VOCALLIC_RR         0x0931
     52 #define LF                  0x0A
     53 #define ASCII_END           0xA0
     54 #define NO_CHAR_MARKER      0xFFFE
     55 #define TELUGU_DELTA        DELTA * TELUGU
     56 #define DEV_ABBR_SIGN       0x0970
     57 #define DEV_ANUDATTA        0x0952
     58 #define EXT_RANGE_BEGIN     0xA1
     59 #define EXT_RANGE_END       0xEE
     60 
     61 #define PNJ_DELTA           0x0100
     62 #define PNJ_BINDI           0x0A02
     63 #define PNJ_TIPPI           0x0A70
     64 #define PNJ_SIGN_VIRAMA     0x0A4D
     65 #define PNJ_ADHAK           0x0A71
     66 #define PNJ_HA              0x0A39
     67 #define PNJ_RRA             0x0A5C
     68 
     69 typedef enum {
     70    DEVANAGARI =0,
     71    BENGALI,
     72    GURMUKHI,
     73    GUJARATI,
     74    ORIYA,
     75    TAMIL,
     76    TELUGU,
     77    KANNADA,
     78    MALAYALAM,
     79    DELTA=0x80
     80 }UniLang;
     81 
     82 /**
     83 * Enumeration for switching code pages if <ATR>+<one of below values>
     84 * is encountered
     85 */
     86 typedef enum {
     87    DEF = 0x40,
     88    RMN = 0x41,
     89    DEV = 0x42,
     90    BNG = 0x43,
     91    TML = 0x44,
     92    TLG = 0x45,
     93    ASM = 0x46,
     94    ORI = 0x47,
     95    KND = 0x48,
     96    MLM = 0x49,
     97    GJR = 0x4A,
     98    PNJ = 0x4B,
     99    ARB = 0x71,
    100    PES = 0x72,
    101    URD = 0x73,
    102    SND = 0x74,
    103    KSM = 0x75,
    104    PST = 0x76
    105 }ISCIILang;
    106 
    107 typedef enum {
    108    DEV_MASK =0x80,
    109    PNJ_MASK =0x40,
    110    GJR_MASK =0x20,
    111    ORI_MASK =0x10,
    112    BNG_MASK =0x08,
    113    KND_MASK =0x04,
    114    MLM_MASK =0x02,
    115    TML_MASK =0x01,
    116    ZERO =0x00
    117 }MaskEnum;
    118 
    119 #define ISCII_CNV_PREFIX "ISCII,version="
    120 
    121 typedef struct {
    122    char16_t contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
    123    char16_t contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
    124    uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
    125    uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
    126    uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
    127    MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
    128    MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
    129    MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
    130    UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
    131    UBool resetToDefaultToUnicode;      /* boolean for resetting to default delta and mask when a newline is encountered*/
    132    char name[sizeof(ISCII_CNV_PREFIX) + 1];
    133    UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
    134 } UConverterDataISCII;
    135 
    136 typedef struct LookupDataStruct {
    137    UniLang uniLang;
    138    MaskEnum maskEnum;
    139    ISCIILang isciiLang;
    140 } LookupDataStruct;
    141 
    142 static const LookupDataStruct lookupInitialData[]={
    143    { DEVANAGARI, DEV_MASK,  DEV },
    144    { BENGALI,    BNG_MASK,  BNG },
    145    { GURMUKHI,   PNJ_MASK,  PNJ },
    146    { GUJARATI,   GJR_MASK,  GJR },
    147    { ORIYA,      ORI_MASK,  ORI },
    148    { TAMIL,      TML_MASK,  TML },
    149    { TELUGU,     KND_MASK,  TLG },
    150    { KANNADA,    KND_MASK,  KND },
    151    { MALAYALAM,  MLM_MASK,  MLM }
    152 };
    153 
    154 /*
    155 * For special handling of certain Gurmukhi characters.
    156 * Bit 0 (value 1): PNJ consonant
    157 * Bit 1 (value 2): PNJ Bindi Tippi
    158 */
    159 static const uint8_t pnjMap[80] = {
    160    /* 0A00..0A0F */
    161    0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
    162    /* 0A10..0A1F */
    163    0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
    164    /* 0A20..0A2F */
    165    3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
    166    /* 0A30..0A3F */
    167    3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
    168    /* 0A40..0A4F */
    169    0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
    170 };
    171 
    172 static UBool
    173 isPNJConsonant(UChar32 c) {
    174    if (c < 0xa00 || 0xa50 <= c) {
    175        return false;
    176    } else {
    177        return pnjMap[c - 0xa00] & 1;
    178    }
    179 }
    180 
    181 static UBool
    182 isPNJBindiTippi(UChar32 c) {
    183    if (c < 0xa00 || 0xa50 <= c) {
    184        return false;
    185    } else {
    186        return pnjMap[c - 0xa00] >> 1;
    187    }
    188 }
    189 U_CDECL_BEGIN
    190 static void  U_CALLCONV
    191 _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
    192    if(pArgs->onlyTestIsLoadable) {
    193        return;
    194    }
    195 
    196    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
    197 
    198    if (cnv->extraInfo != nullptr) {
    199        int32_t len=0;
    200        UConverterDataISCII *converterData=
    201                (UConverterDataISCII *) cnv->extraInfo;
    202        converterData->contextCharToUnicode=NO_CHAR_MARKER;
    203        cnv->toUnicodeStatus = missingCharMarker;
    204        converterData->contextCharFromUnicode=0x0000;
    205        converterData->resetToDefaultToUnicode=false;
    206        /* check if the version requested is supported */
    207        if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
    208            /* initialize state variables */
    209            converterData->currentDeltaFromUnicode
    210                    = converterData->currentDeltaToUnicode
    211                            = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
    212 
    213            converterData->currentMaskFromUnicode
    214                    = converterData->currentMaskToUnicode
    215                            = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
    216            
    217            converterData->isFirstBuffer=true;
    218            (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
    219            len = (int32_t)uprv_strlen(converterData->name);
    220            converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
    221            converterData->name[len+1]=0;
    222            
    223            converterData->prevToUnicodeStatus = 0x0000;
    224        } else {
    225            uprv_free(cnv->extraInfo);
    226            cnv->extraInfo = nullptr;
    227            *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    228        }
    229 
    230    } else {
    231        *errorCode =U_MEMORY_ALLOCATION_ERROR;
    232    }
    233 }
    234 
    235 static void U_CALLCONV
    236 _ISCIIClose(UConverter *cnv) {
    237    if (cnv->extraInfo!=nullptr) {
    238        if (!cnv->isExtraLocal) {
    239            uprv_free(cnv->extraInfo);
    240        }
    241        cnv->extraInfo=nullptr;
    242    }
    243 }
    244 
    245 static const char*  U_CALLCONV
    246 _ISCIIgetName(const UConverter* cnv) {
    247    if (cnv->extraInfo) {
    248        UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
    249        return myData->name;
    250    }
    251    return nullptr;
    252 }
    253 
    254 static void U_CALLCONV
    255 _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
    256    UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
    257    if (choice<=UCNV_RESET_TO_UNICODE) {
    258        cnv->toUnicodeStatus = missingCharMarker;
    259        cnv->mode=0;
    260        data->currentDeltaToUnicode=data->defDeltaToUnicode;
    261        data->currentMaskToUnicode = data->defMaskToUnicode;
    262        data->contextCharToUnicode=NO_CHAR_MARKER;
    263        data->prevToUnicodeStatus = 0x0000;
    264    }
    265    if (choice!=UCNV_RESET_TO_UNICODE) {
    266        cnv->fromUChar32=0x0000;
    267        data->contextCharFromUnicode=0x00;
    268        data->currentMaskFromUnicode=data->defMaskToUnicode;
    269        data->currentDeltaFromUnicode=data->defDeltaToUnicode;
    270        data->isFirstBuffer=true;
    271        data->resetToDefaultToUnicode=false;
    272    }
    273 }
    274 
    275 /**
    276 * The values in validity table are indexed by the lower bits of Unicode
    277 * range 0x0900 - 0x09ff. The values have a structure like:
    278 *       ---------------------------------------------------------------
    279 *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
    280 *      |       |       |       |       | ASM   | KND   |       |       |
    281 *       ---------------------------------------------------------------
    282 * If a code point is valid in a particular script
    283 * then that bit is turned on
    284 *
    285 * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
    286 * to represent these languages
    287 *
    288 * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
    289 * and combine and use 1 bit to represent these languages.
    290 *
    291 * TODO: It is probably easier to understand and maintain to change this
    292 * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
    293 */
    294 
    295 static const uint8_t validityTable[128] = {
    296 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
    297 /* Note: This table was edited to mirror the Windows XP implementation */
    298 /*ISCII:Valid:Unicode */
    299 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    300 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    301 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    302 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    303 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    304 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    305 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    306 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    307 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    308 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    309 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    310 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    311 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    312 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    313 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    314 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    315 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    316 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    317 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    318 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    319 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    320 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    321 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    322 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    323 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    324 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    325 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    326 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    327 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    328 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    329 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    330 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    331 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    332 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    333 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    334 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    335 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    336 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    337 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    338 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    339 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    340 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
    341 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    342 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    343 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    344 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    345 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    346 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    347 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    348 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
    349 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    350 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    351 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
    352 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    353 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    354 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    355 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    356 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    357 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    358 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    359 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    360 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    361 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    362 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    363 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    364 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    365 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    366 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    367 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
    368 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    369 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    370 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    371 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    372 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    373 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
    374 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    375 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    376 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    377 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    378 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    379 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    380 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    381 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    382 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    383 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    384 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
    385 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
    386 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
    387 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    388 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    389 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    390 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    391 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    392 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    393 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    394 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    395 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    396 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
    397 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    398 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
    399 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    400 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    401 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    402 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    403 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    404 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    405 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    406 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    407 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    408 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    409 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    410 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
    411 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
    412 /*
    413 * The length of the array is 128 to provide values for 0x900..0x97f.
    414 * The last 15 entries for 0x971..0x97f of the validity table are all zero
    415 * because no Indic script uses such Unicode code points.
    416 */
    417 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
    418 };
    419 
    420 static const uint16_t fromUnicodeTable[128]={
    421    0x00a0 ,/* 0x0900 */
    422    0x00a1 ,/* 0x0901 */
    423    0x00a2 ,/* 0x0902 */
    424    0x00a3 ,/* 0x0903 */
    425    0xa4e0 ,/* 0x0904 */
    426    0x00a4 ,/* 0x0905 */
    427    0x00a5 ,/* 0x0906 */
    428    0x00a6 ,/* 0x0907 */
    429    0x00a7 ,/* 0x0908 */
    430    0x00a8 ,/* 0x0909 */
    431    0x00a9 ,/* 0x090a */
    432    0x00aa ,/* 0x090b */
    433    0xA6E9 ,/* 0x090c */
    434    0x00ae ,/* 0x090d */
    435    0x00ab ,/* 0x090e */
    436    0x00ac ,/* 0x090f */
    437    0x00ad ,/* 0x0910 */
    438    0x00b2 ,/* 0x0911 */
    439    0x00af ,/* 0x0912 */
    440    0x00b0 ,/* 0x0913 */
    441    0x00b1 ,/* 0x0914 */
    442    0x00b3 ,/* 0x0915 */
    443    0x00b4 ,/* 0x0916 */
    444    0x00b5 ,/* 0x0917 */
    445    0x00b6 ,/* 0x0918 */
    446    0x00b7 ,/* 0x0919 */
    447    0x00b8 ,/* 0x091a */
    448    0x00b9 ,/* 0x091b */
    449    0x00ba ,/* 0x091c */
    450    0x00bb ,/* 0x091d */
    451    0x00bc ,/* 0x091e */
    452    0x00bd ,/* 0x091f */
    453    0x00be ,/* 0x0920 */
    454    0x00bf ,/* 0x0921 */
    455    0x00c0 ,/* 0x0922 */
    456    0x00c1 ,/* 0x0923 */
    457    0x00c2 ,/* 0x0924 */
    458    0x00c3 ,/* 0x0925 */
    459    0x00c4 ,/* 0x0926 */
    460    0x00c5 ,/* 0x0927 */
    461    0x00c6 ,/* 0x0928 */
    462    0x00c7 ,/* 0x0929 */
    463    0x00c8 ,/* 0x092a */
    464    0x00c9 ,/* 0x092b */
    465    0x00ca ,/* 0x092c */
    466    0x00cb ,/* 0x092d */
    467    0x00cc ,/* 0x092e */
    468    0x00cd ,/* 0x092f */
    469    0x00cf ,/* 0x0930 */
    470    0x00d0 ,/* 0x0931 */
    471    0x00d1 ,/* 0x0932 */
    472    0x00d2 ,/* 0x0933 */
    473    0x00d3 ,/* 0x0934 */
    474    0x00d4 ,/* 0x0935 */
    475    0x00d5 ,/* 0x0936 */
    476    0x00d6 ,/* 0x0937 */
    477    0x00d7 ,/* 0x0938 */
    478    0x00d8 ,/* 0x0939 */
    479    0xFFFF ,/* 0x093A */
    480    0xFFFF ,/* 0x093B */
    481    0x00e9 ,/* 0x093c */
    482    0xEAE9 ,/* 0x093d */
    483    0x00da ,/* 0x093e */
    484    0x00db ,/* 0x093f */
    485    0x00dc ,/* 0x0940 */
    486    0x00dd ,/* 0x0941 */
    487    0x00de ,/* 0x0942 */
    488    0x00df ,/* 0x0943 */
    489    0xDFE9 ,/* 0x0944 */
    490    0x00e3 ,/* 0x0945 */
    491    0x00e0 ,/* 0x0946 */
    492    0x00e1 ,/* 0x0947 */
    493    0x00e2 ,/* 0x0948 */
    494    0x00e7 ,/* 0x0949 */
    495    0x00e4 ,/* 0x094a */
    496    0x00e5 ,/* 0x094b */
    497    0x00e6 ,/* 0x094c */
    498    0x00e8 ,/* 0x094d */
    499    0x00ec ,/* 0x094e */
    500    0x00ed ,/* 0x094f */
    501    0xA1E9 ,/* 0x0950 */ /* OM Symbol */
    502    0xFFFF ,/* 0x0951 */
    503    0xF0B8 ,/* 0x0952 */
    504    0xFFFF ,/* 0x0953 */
    505    0xFFFF ,/* 0x0954 */
    506    0xFFFF ,/* 0x0955 */
    507    0xFFFF ,/* 0x0956 */
    508    0xFFFF ,/* 0x0957 */
    509    0xb3e9 ,/* 0x0958 */
    510    0xb4e9 ,/* 0x0959 */
    511    0xb5e9 ,/* 0x095a */
    512    0xbae9 ,/* 0x095b */
    513    0xbfe9 ,/* 0x095c */
    514    0xC0E9 ,/* 0x095d */
    515    0xc9e9 ,/* 0x095e */
    516    0x00ce ,/* 0x095f */
    517    0xAAe9 ,/* 0x0960 */
    518    0xA7E9 ,/* 0x0961 */
    519    0xDBE9 ,/* 0x0962 */
    520    0xDCE9 ,/* 0x0963 */
    521    0x00ea ,/* 0x0964 */
    522    0xeaea ,/* 0x0965 */
    523    0x00f1 ,/* 0x0966 */
    524    0x00f2 ,/* 0x0967 */
    525    0x00f3 ,/* 0x0968 */
    526    0x00f4 ,/* 0x0969 */
    527    0x00f5 ,/* 0x096a */
    528    0x00f6 ,/* 0x096b */
    529    0x00f7 ,/* 0x096c */
    530    0x00f8 ,/* 0x096d */
    531    0x00f9 ,/* 0x096e */
    532    0x00fa ,/* 0x096f */
    533    0xF0BF ,/* 0x0970 */
    534    0xFFFF ,/* 0x0971 */
    535    0xFFFF ,/* 0x0972 */
    536    0xFFFF ,/* 0x0973 */
    537    0xFFFF ,/* 0x0974 */
    538    0xFFFF ,/* 0x0975 */
    539    0xFFFF ,/* 0x0976 */
    540    0xFFFF ,/* 0x0977 */
    541    0xFFFF ,/* 0x0978 */
    542    0xFFFF ,/* 0x0979 */
    543    0xFFFF ,/* 0x097a */
    544    0xFFFF ,/* 0x097b */
    545    0xFFFF ,/* 0x097c */
    546    0xFFFF ,/* 0x097d */
    547    0xFFFF ,/* 0x097e */
    548    0xFFFF ,/* 0x097f */
    549 };
    550 static const uint16_t toUnicodeTable[256]={
    551    0x0000,/* 0x00 */
    552    0x0001,/* 0x01 */
    553    0x0002,/* 0x02 */
    554    0x0003,/* 0x03 */
    555    0x0004,/* 0x04 */
    556    0x0005,/* 0x05 */
    557    0x0006,/* 0x06 */
    558    0x0007,/* 0x07 */
    559    0x0008,/* 0x08 */
    560    0x0009,/* 0x09 */
    561    0x000a,/* 0x0a */
    562    0x000b,/* 0x0b */
    563    0x000c,/* 0x0c */
    564    0x000d,/* 0x0d */
    565    0x000e,/* 0x0e */
    566    0x000f,/* 0x0f */
    567    0x0010,/* 0x10 */
    568    0x0011,/* 0x11 */
    569    0x0012,/* 0x12 */
    570    0x0013,/* 0x13 */
    571    0x0014,/* 0x14 */
    572    0x0015,/* 0x15 */
    573    0x0016,/* 0x16 */
    574    0x0017,/* 0x17 */
    575    0x0018,/* 0x18 */
    576    0x0019,/* 0x19 */
    577    0x001a,/* 0x1a */
    578    0x001b,/* 0x1b */
    579    0x001c,/* 0x1c */
    580    0x001d,/* 0x1d */
    581    0x001e,/* 0x1e */
    582    0x001f,/* 0x1f */
    583    0x0020,/* 0x20 */
    584    0x0021,/* 0x21 */
    585    0x0022,/* 0x22 */
    586    0x0023,/* 0x23 */
    587    0x0024,/* 0x24 */
    588    0x0025,/* 0x25 */
    589    0x0026,/* 0x26 */
    590    0x0027,/* 0x27 */
    591    0x0028,/* 0x28 */
    592    0x0029,/* 0x29 */
    593    0x002a,/* 0x2a */
    594    0x002b,/* 0x2b */
    595    0x002c,/* 0x2c */
    596    0x002d,/* 0x2d */
    597    0x002e,/* 0x2e */
    598    0x002f,/* 0x2f */
    599    0x0030,/* 0x30 */
    600    0x0031,/* 0x31 */
    601    0x0032,/* 0x32 */
    602    0x0033,/* 0x33 */
    603    0x0034,/* 0x34 */
    604    0x0035,/* 0x35 */
    605    0x0036,/* 0x36 */
    606    0x0037,/* 0x37 */
    607    0x0038,/* 0x38 */
    608    0x0039,/* 0x39 */
    609    0x003A,/* 0x3A */
    610    0x003B,/* 0x3B */
    611    0x003c,/* 0x3c */
    612    0x003d,/* 0x3d */
    613    0x003e,/* 0x3e */
    614    0x003f,/* 0x3f */
    615    0x0040,/* 0x40 */
    616    0x0041,/* 0x41 */
    617    0x0042,/* 0x42 */
    618    0x0043,/* 0x43 */
    619    0x0044,/* 0x44 */
    620    0x0045,/* 0x45 */
    621    0x0046,/* 0x46 */
    622    0x0047,/* 0x47 */
    623    0x0048,/* 0x48 */
    624    0x0049,/* 0x49 */
    625    0x004a,/* 0x4a */
    626    0x004b,/* 0x4b */
    627    0x004c,/* 0x4c */
    628    0x004d,/* 0x4d */
    629    0x004e,/* 0x4e */
    630    0x004f,/* 0x4f */
    631    0x0050,/* 0x50 */
    632    0x0051,/* 0x51 */
    633    0x0052,/* 0x52 */
    634    0x0053,/* 0x53 */
    635    0x0054,/* 0x54 */
    636    0x0055,/* 0x55 */
    637    0x0056,/* 0x56 */
    638    0x0057,/* 0x57 */
    639    0x0058,/* 0x58 */
    640    0x0059,/* 0x59 */
    641    0x005a,/* 0x5a */
    642    0x005b,/* 0x5b */
    643    0x005c,/* 0x5c */
    644    0x005d,/* 0x5d */
    645    0x005e,/* 0x5e */
    646    0x005f,/* 0x5f */
    647    0x0060,/* 0x60 */
    648    0x0061,/* 0x61 */
    649    0x0062,/* 0x62 */
    650    0x0063,/* 0x63 */
    651    0x0064,/* 0x64 */
    652    0x0065,/* 0x65 */
    653    0x0066,/* 0x66 */
    654    0x0067,/* 0x67 */
    655    0x0068,/* 0x68 */
    656    0x0069,/* 0x69 */
    657    0x006a,/* 0x6a */
    658    0x006b,/* 0x6b */
    659    0x006c,/* 0x6c */
    660    0x006d,/* 0x6d */
    661    0x006e,/* 0x6e */
    662    0x006f,/* 0x6f */
    663    0x0070,/* 0x70 */
    664    0x0071,/* 0x71 */
    665    0x0072,/* 0x72 */
    666    0x0073,/* 0x73 */
    667    0x0074,/* 0x74 */
    668    0x0075,/* 0x75 */
    669    0x0076,/* 0x76 */
    670    0x0077,/* 0x77 */
    671    0x0078,/* 0x78 */
    672    0x0079,/* 0x79 */
    673    0x007a,/* 0x7a */
    674    0x007b,/* 0x7b */
    675    0x007c,/* 0x7c */
    676    0x007d,/* 0x7d */
    677    0x007e,/* 0x7e */
    678    0x007f,/* 0x7f */
    679    0x0080,/* 0x80 */
    680    0x0081,/* 0x81 */
    681    0x0082,/* 0x82 */
    682    0x0083,/* 0x83 */
    683    0x0084,/* 0x84 */
    684    0x0085,/* 0x85 */
    685    0x0086,/* 0x86 */
    686    0x0087,/* 0x87 */
    687    0x0088,/* 0x88 */
    688    0x0089,/* 0x89 */
    689    0x008a,/* 0x8a */
    690    0x008b,/* 0x8b */
    691    0x008c,/* 0x8c */
    692    0x008d,/* 0x8d */
    693    0x008e,/* 0x8e */
    694    0x008f,/* 0x8f */
    695    0x0090,/* 0x90 */
    696    0x0091,/* 0x91 */
    697    0x0092,/* 0x92 */
    698    0x0093,/* 0x93 */
    699    0x0094,/* 0x94 */
    700    0x0095,/* 0x95 */
    701    0x0096,/* 0x96 */
    702    0x0097,/* 0x97 */
    703    0x0098,/* 0x98 */
    704    0x0099,/* 0x99 */
    705    0x009a,/* 0x9a */
    706    0x009b,/* 0x9b */
    707    0x009c,/* 0x9c */
    708    0x009d,/* 0x9d */
    709    0x009e,/* 0x9e */
    710    0x009f,/* 0x9f */
    711    0x00A0,/* 0xa0 */
    712    0x0901,/* 0xa1 */
    713    0x0902,/* 0xa2 */
    714    0x0903,/* 0xa3 */
    715    0x0905,/* 0xa4 */
    716    0x0906,/* 0xa5 */
    717    0x0907,/* 0xa6 */
    718    0x0908,/* 0xa7 */
    719    0x0909,/* 0xa8 */
    720    0x090a,/* 0xa9 */
    721    0x090b,/* 0xaa */
    722    0x090e,/* 0xab */
    723    0x090f,/* 0xac */
    724    0x0910,/* 0xad */
    725    0x090d,/* 0xae */
    726    0x0912,/* 0xaf */
    727    0x0913,/* 0xb0 */
    728    0x0914,/* 0xb1 */
    729    0x0911,/* 0xb2 */
    730    0x0915,/* 0xb3 */
    731    0x0916,/* 0xb4 */
    732    0x0917,/* 0xb5 */
    733    0x0918,/* 0xb6 */
    734    0x0919,/* 0xb7 */
    735    0x091a,/* 0xb8 */
    736    0x091b,/* 0xb9 */
    737    0x091c,/* 0xba */
    738    0x091d,/* 0xbb */
    739    0x091e,/* 0xbc */
    740    0x091f,/* 0xbd */
    741    0x0920,/* 0xbe */
    742    0x0921,/* 0xbf */
    743    0x0922,/* 0xc0 */
    744    0x0923,/* 0xc1 */
    745    0x0924,/* 0xc2 */
    746    0x0925,/* 0xc3 */
    747    0x0926,/* 0xc4 */
    748    0x0927,/* 0xc5 */
    749    0x0928,/* 0xc6 */
    750    0x0929,/* 0xc7 */
    751    0x092a,/* 0xc8 */
    752    0x092b,/* 0xc9 */
    753    0x092c,/* 0xca */
    754    0x092d,/* 0xcb */
    755    0x092e,/* 0xcc */
    756    0x092f,/* 0xcd */
    757    0x095f,/* 0xce */
    758    0x0930,/* 0xcf */
    759    0x0931,/* 0xd0 */
    760    0x0932,/* 0xd1 */
    761    0x0933,/* 0xd2 */
    762    0x0934,/* 0xd3 */
    763    0x0935,/* 0xd4 */
    764    0x0936,/* 0xd5 */
    765    0x0937,/* 0xd6 */
    766    0x0938,/* 0xd7 */
    767    0x0939,/* 0xd8 */
    768    0x200D,/* 0xd9 */
    769    0x093e,/* 0xda */
    770    0x093f,/* 0xdb */
    771    0x0940,/* 0xdc */
    772    0x0941,/* 0xdd */
    773    0x0942,/* 0xde */
    774    0x0943,/* 0xdf */
    775    0x0946,/* 0xe0 */
    776    0x0947,/* 0xe1 */
    777    0x0948,/* 0xe2 */
    778    0x0945,/* 0xe3 */
    779    0x094a,/* 0xe4 */
    780    0x094b,/* 0xe5 */
    781    0x094c,/* 0xe6 */
    782    0x0949,/* 0xe7 */
    783    0x094d,/* 0xe8 */
    784    0x093c,/* 0xe9 */
    785    0x0964,/* 0xea */
    786    0xFFFF,/* 0xeb */
    787    0xFFFF,/* 0xec */
    788    0xFFFF,/* 0xed */
    789    0xFFFF,/* 0xee */
    790    0xFFFF,/* 0xef */
    791    0xFFFF,/* 0xf0 */
    792    0x0966,/* 0xf1 */
    793    0x0967,/* 0xf2 */
    794    0x0968,/* 0xf3 */
    795    0x0969,/* 0xf4 */
    796    0x096a,/* 0xf5 */
    797    0x096b,/* 0xf6 */
    798    0x096c,/* 0xf7 */
    799    0x096d,/* 0xf8 */
    800    0x096e,/* 0xf9 */
    801    0x096f,/* 0xfa */
    802    0xFFFF,/* 0xfb */
    803    0xFFFF,/* 0xfc */
    804    0xFFFF,/* 0xfd */
    805    0xFFFF,/* 0xfe */
    806    0xFFFF /* 0xff */
    807 };
    808 
    809 static const uint16_t vowelSignESpecialCases[][2]={
    810 { 2 /*length of array*/    , 0      },
    811 { 0xA4 , 0x0904 },
    812 };
    813 
    814 static const uint16_t nuktaSpecialCases[][2]={
    815    { 16 /*length of array*/   , 0      },
    816    { 0xA6 , 0x090c },
    817    { 0xEA , 0x093D },
    818    { 0xDF , 0x0944 },
    819    { 0xA1 , 0x0950 },
    820    { 0xb3 , 0x0958 },
    821    { 0xb4 , 0x0959 },
    822    { 0xb5 , 0x095a },
    823    { 0xba , 0x095b },
    824    { 0xbf , 0x095c },
    825    { 0xC0 , 0x095d },
    826    { 0xc9 , 0x095e },
    827    { 0xAA , 0x0960 },
    828    { 0xA7 , 0x0961 },
    829    { 0xDB , 0x0962 },
    830    { 0xDC , 0x0963 },
    831 };
    832 
    833 
    834 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \
    835    int32_t offset = (int32_t)(source - args->source-1);                                        \
    836      /* write the targetUniChar  to target */                                                  \
    837    if(target < targetLimit){                                                                   \
    838        if(targetByteUnit <= 0xFF){                                                             \
    839            *(target)++ = (uint8_t)(targetByteUnit);                                            \
    840            if(offsets){                                                                        \
    841                *(offsets++) = offset;                                                          \
    842            }                                                                                   \
    843        }else{                                                                                  \
    844            if (targetByteUnit > 0xFFFF) {                                                      \
    845                *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
    846                if (offsets) {                                                                  \
    847                    --offset;                                                                   \
    848                    *(offsets++) = offset;                                                      \
    849                }                                                                               \
    850            }                                                                                   \
    851            if (!(target < targetLimit)) {                                                      \
    852                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
    853                                (uint8_t)(targetByteUnit >> 8);                                 \
    854                args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
    855                                (uint8_t)targetByteUnit;                                        \
    856                *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
    857            } else {                                                                            \
    858                *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
    859                if(offsets){                                                                    \
    860                    *(offsets++) = offset;                                                      \
    861                }                                                                               \
    862                if(target < targetLimit){                                                       \
    863                    *(target)++ = (uint8_t)  targetByteUnit;                                    \
    864                    if(offsets){                                                                \
    865                        *(offsets++) = offset                            ;                      \
    866                    }                                                                           \
    867                }else{                                                                          \
    868                    args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
    869                                (uint8_t) (targetByteUnit);                                     \
    870                    *err = U_BUFFER_OVERFLOW_ERROR;                                             \
    871                }                                                                               \
    872            }                                                                                   \
    873        }                                                                                       \
    874    }else{                                                                                      \
    875        if (targetByteUnit & 0xFF0000) {                                                        \
    876            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
    877                        (uint8_t) (targetByteUnit >>16);                                        \
    878        }                                                                                       \
    879        if(targetByteUnit & 0xFF00){                                                            \
    880            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
    881                        (uint8_t) (targetByteUnit >>8);                                         \
    882        }                                                                                       \
    883        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
    884                        (uint8_t) (targetByteUnit);                                             \
    885        *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
    886    }                                                                                           \
    887 } UPRV_BLOCK_MACRO_END
    888 
    889 /* Rules:
    890 *    Explicit Halant :
    891 *                      <HALANT> + <ZWNJ>
    892 *    Soft Halant :
    893 *                      <HALANT> + <ZWJ>
    894 */
    895 static void U_CALLCONV
    896 UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
    897        UConverterFromUnicodeArgs * args, UErrorCode * err) {
    898    const char16_t *source = args->source;
    899    const char16_t *sourceLimit = args->sourceLimit;
    900    unsigned char *target = (unsigned char *) args->target;
    901    unsigned char *targetLimit = (unsigned char *) args->targetLimit;
    902    int32_t* offsets = args->offsets;
    903    uint32_t targetByteUnit = 0x0000;
    904    UChar32 sourceChar = 0x0000;
    905    UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
    906    UConverterDataISCII *converterData;
    907    uint16_t newDelta=0;
    908    uint16_t range = 0;
    909    UBool deltaChanged = false;
    910 
    911    if ((args->converter == nullptr) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
    912        *err = U_ILLEGAL_ARGUMENT_ERROR;
    913        return;
    914    }
    915    /* initialize data */
    916    converterData=(UConverterDataISCII*)args->converter->extraInfo;
    917    newDelta=converterData->currentDeltaFromUnicode;
    918    range = (uint16_t)(newDelta/DELTA);
    919 
    920    if ((sourceChar = args->converter->fromUChar32)!=0) {
    921        goto getTrail;
    922    }
    923 
    924    /*writing the char to the output stream */
    925    while (source < sourceLimit) {
    926        /* Write the language code following LF only if LF is not the last character. */
    927        if (args->converter->fromUnicodeStatus == LF) {
    928            targetByteUnit = ATR<<8;
    929            targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
    930            args->converter->fromUnicodeStatus = 0x0000;
    931            /* now append ATR and language code */
    932            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
    933            if (U_FAILURE(*err)) {
    934                break;
    935            }
    936        }
    937        
    938        sourceChar = *source++;
    939        tempContextFromUnicode = converterData->contextCharFromUnicode;
    940        
    941        targetByteUnit = missingCharMarker;
    942        
    943        /*check if input is in ASCII and C0 control codes range*/
    944        if (sourceChar <= ASCII_END) {
    945            args->converter->fromUnicodeStatus = sourceChar;
    946            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
    947            if (U_FAILURE(*err)) {
    948                break;
    949            }
    950            continue;
    951        }
    952        switch (sourceChar) {
    953        case ZWNJ:
    954            /* contextChar has HALANT */
    955            if (converterData->contextCharFromUnicode) {
    956                converterData->contextCharFromUnicode = 0x00;
    957                targetByteUnit = ISCII_HALANT;
    958            } else {
    959                /* consume ZWNJ and continue */
    960                converterData->contextCharFromUnicode = 0x00;
    961                continue;
    962            }
    963            break;
    964        case ZWJ:
    965            /* contextChar has HALANT */
    966            if (converterData->contextCharFromUnicode) {
    967                targetByteUnit = ISCII_NUKTA;
    968            } else {
    969                targetByteUnit =ISCII_INV;
    970            }
    971            converterData->contextCharFromUnicode = 0x00;
    972            break;
    973        default:
    974            /* is the sourceChar in the INDIC_RANGE? */
    975            if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
    976                /* Danda and Double Danda are valid in Northern scripts.. since Unicode
    977                 * does not include these codepoints in all Northern scrips we need to
    978                 * filter them out
    979                 */
    980                if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
    981                    /* find out to which block the souceChar belongs*/
    982                    range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
    983                    newDelta =(uint16_t)(range*DELTA);
    984 
    985                    /* Now are we in the same block as the previous? */
    986                    if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
    987                        converterData->currentDeltaFromUnicode = newDelta;
    988                        converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
    989                        deltaChanged =true;
    990                        converterData->isFirstBuffer=false;
    991                    }
    992                    
    993                    if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { 
    994                        if (sourceChar == PNJ_TIPPI) {
    995                            /* Make sure Tippi is converted to Bindi. */
    996                            sourceChar = PNJ_BINDI;
    997                        } else if (sourceChar == PNJ_ADHAK) {
    998                            /* This is for consonant cluster handling. */
    999                            converterData->contextCharFromUnicode = PNJ_ADHAK;
   1000                        }
   1001                        
   1002                    }
   1003                    /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
   1004                    /* now subtract the new delta from sourceChar*/
   1005                    sourceChar -= converterData->currentDeltaFromUnicode;
   1006                }
   1007 
   1008                /* get the target byte unit */
   1009                targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
   1010 
   1011                /* is the code point valid in current script? */
   1012                if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
   1013                    /* Vocallic RR is assigned in ISCII Telugu and Unicode */
   1014                    if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
   1015                        targetByteUnit=missingCharMarker;
   1016                    }
   1017                }
   1018 
   1019                if (deltaChanged) {
   1020                    /* we are in a script block which is different than
   1021                     * previous sourceChar's script block write ATR and language codes
   1022                     */
   1023                    uint32_t temp=0;
   1024                    temp =(uint16_t)(ATR<<8);
   1025                    temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
   1026                    /* reset */
   1027                    deltaChanged=false;
   1028                    /* now append ATR and language code */
   1029                    WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
   1030                    if (U_FAILURE(*err)) {
   1031                        break;
   1032                    }
   1033                }
   1034                
   1035                if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
   1036                    continue;
   1037                }
   1038            }
   1039            /* reset context char */
   1040            converterData->contextCharFromUnicode = 0x00;
   1041            break;
   1042        }
   1043        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
   1044            /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
   1045            /* reset context char */
   1046            converterData->contextCharFromUnicode = 0x0000;
   1047            targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
   1048            /* write targetByteUnit to target */
   1049            WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
   1050            if (U_FAILURE(*err)) {
   1051                break;
   1052            }
   1053        } else if (targetByteUnit != missingCharMarker) {
   1054            if (targetByteUnit==ISCII_HALANT) {
   1055                converterData->contextCharFromUnicode = (char16_t)targetByteUnit;
   1056            }
   1057            /* write targetByteUnit to target*/
   1058            WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
   1059            if (U_FAILURE(*err)) {
   1060                break;
   1061            }
   1062        } else {
   1063            /* oops.. the code point is unassigned */
   1064            /*check if the char is a First surrogate*/
   1065            if (U16_IS_SURROGATE(sourceChar)) {
   1066                if (U16_IS_SURROGATE_LEAD(sourceChar)) {
   1067 getTrail:
   1068                    /*look ahead to find the trail surrogate*/
   1069                    if (source < sourceLimit) {
   1070                        /* test the following code unit */
   1071                        char16_t trail= (*source);
   1072                        if (U16_IS_TRAIL(trail)) {
   1073                            source++;
   1074                            sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
   1075                            *err =U_INVALID_CHAR_FOUND;
   1076                            /* convert this surrogate code point */
   1077                            /* exit this condition tree */
   1078                        } else {
   1079                            /* this is an unmatched lead code unit (1st surrogate) */
   1080                            /* callback(illegal) */
   1081                            *err=U_ILLEGAL_CHAR_FOUND;
   1082                        }
   1083                    } else {
   1084                        /* no more input */
   1085                        *err = U_ZERO_ERROR;
   1086                    }
   1087                } else {
   1088                    /* this is an unmatched trail code unit (2nd surrogate) */
   1089                    /* callback(illegal) */
   1090                    *err=U_ILLEGAL_CHAR_FOUND;
   1091                }
   1092            } else {
   1093                /* callback(unassigned) for a BMP code point */
   1094                *err = U_INVALID_CHAR_FOUND;
   1095            }
   1096 
   1097            args->converter->fromUChar32=sourceChar;
   1098            break;
   1099        }
   1100    }/* end while(mySourceIndex<mySourceLength) */
   1101 
   1102    /*save the state and return */
   1103    args->source = source;
   1104    args->target = (char*)target;
   1105 }
   1106 
   1107 static const uint16_t lookupTable[][2]={
   1108    { ZERO,       ZERO     },     /*DEFAULT*/
   1109    { ZERO,       ZERO     },     /*ROMAN*/
   1110    { DEVANAGARI, DEV_MASK },
   1111    { BENGALI,    BNG_MASK },
   1112    { TAMIL,      TML_MASK },
   1113    { TELUGU,     KND_MASK },
   1114    { BENGALI,    BNG_MASK },
   1115    { ORIYA,      ORI_MASK },
   1116    { KANNADA,    KND_MASK },
   1117    { MALAYALAM,  MLM_MASK },
   1118    { GUJARATI,   GJR_MASK },
   1119    { GURMUKHI,   PNJ_MASK }
   1120 };
   1121 
   1122 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \
   1123    /* add offset to current Indic Block */                                              \
   1124    if(targetUniChar>ASCII_END &&                                                        \
   1125           targetUniChar != ZWJ &&                                                       \
   1126           targetUniChar != ZWNJ &&                                                      \
   1127           targetUniChar != DANDA &&                                                     \
   1128           targetUniChar != DOUBLE_DANDA){                                               \
   1129                                                                                         \
   1130           targetUniChar+=(uint16_t)(delta);                                             \
   1131    }                                                                                    \
   1132    /* now write the targetUniChar */                                                    \
   1133    if(target<args->targetLimit){                                                        \
   1134        *(target)++ = (char16_t)targetUniChar;                                              \
   1135        if(offsets){                                                                     \
   1136            *(offsets)++ = (int32_t)(offset);                                            \
   1137        }                                                                                \
   1138    }else{                                                                               \
   1139        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
   1140            (char16_t)targetUniChar;                                                        \
   1141        *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
   1142    }                                                                                    \
   1143 } UPRV_BLOCK_MACRO_END
   1144 
   1145 #define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN {              \
   1146    targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
   1147    /* is the code point valid in current script? */                                     \
   1148    if(sourceChar> ASCII_END &&                                                          \
   1149            (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
   1150        /* Vocallic RR is assigned in ISCII Telugu and Unicode */                         \
   1151        if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
   1152                    targetUniChar!=VOCALLIC_RR){                                         \
   1153            targetUniChar=missingCharMarker;                                             \
   1154        }                                                                                \
   1155    }                                                                                    \
   1156 } UPRV_BLOCK_MACRO_END
   1157 
   1158 /***********
   1159 *  Rules for ISCII to Unicode converter
   1160 *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
   1161 *  which has both precomposed and decomposed forms characters
   1162 *  pre-context and post-context need to be considered.
   1163 *
   1164 *  Post context
   1165 *  i)  ATR : Attribute code is used to declare the font and script switching.
   1166 *      Currently we only switch scripts and font codes consumed without generating an error
   1167 *  ii) EXT : Extension code is used to declare switching to Sanskrit and for obscure,
   1168 *      obsolete characters
   1169 *  Pre context
   1170 *  i)  Halant: if preceded by a halant then it is a explicit halant
   1171 *  ii) Nukta :
   1172 *       a) if preceded by a halant then it is a soft halant
   1173 *       b) if preceded by specific consonants and the ligatures have pre-composed
   1174 *          characters in Unicode then convert to pre-composed characters
   1175 *  iii) Danda: If Danda is preceded by a Danda then convert to Double Danda
   1176 *
   1177 */
   1178 
   1179 static void U_CALLCONV
   1180 UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
   1181    const char *source = ( char *) args->source;
   1182    char16_t *target = args->target;
   1183    const char *sourceLimit = args->sourceLimit;
   1184    const char16_t* targetLimit = args->targetLimit;
   1185    uint32_t targetUniChar = 0x0000;
   1186    uint8_t sourceChar = 0x0000;
   1187    UConverterDataISCII* data;
   1188    UChar32* toUnicodeStatus=nullptr;
   1189    UChar32 tempTargetUniChar = 0x0000;
   1190    char16_t* contextCharToUnicode= nullptr;
   1191    UBool found;
   1192    int i; 
   1193    int offset = 0;
   1194 
   1195    if ((args->converter == nullptr) || (target < args->target) || (source < args->source)) {
   1196        *err = U_ILLEGAL_ARGUMENT_ERROR;
   1197        return;
   1198    }
   1199 
   1200    data = (UConverterDataISCII*)(args->converter->extraInfo);
   1201    contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
   1202    toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
   1203 
   1204    while (U_SUCCESS(*err) && source<sourceLimit) {
   1205 
   1206        targetUniChar = missingCharMarker;
   1207 
   1208        if (target < targetLimit) {
   1209            sourceChar = (unsigned char)*(source)++;
   1210 
   1211            /* look at the post-context perform special processing */
   1212            if (*contextCharToUnicode==ATR) {
   1213 
   1214                /* If we have ATR in *contextCharToUnicode then we need to change our
   1215                 * state to the Indic Script specified by sourceChar
   1216                 */
   1217 
   1218                /* check if the sourceChar is supported script range*/
   1219                if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
   1220                    data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
   1221                    data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
   1222                } else if (sourceChar==DEF) {
   1223                    /* switch back to default */
   1224                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
   1225                    data->currentMaskToUnicode = data->defMaskToUnicode;
   1226                } else {
   1227                    if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
   1228                        /* these are display codes consume and continue */
   1229                    } else {
   1230                        *err =U_ILLEGAL_CHAR_FOUND;
   1231                        /* reset */
   1232                        *contextCharToUnicode=NO_CHAR_MARKER;
   1233                        goto CALLBACK;
   1234                    }
   1235                }
   1236 
   1237                /* reset */
   1238                *contextCharToUnicode=NO_CHAR_MARKER;
   1239 
   1240                continue;
   1241 
   1242            } else if (*contextCharToUnicode==EXT) {
   1243                /* check if sourceChar is in 0xA1-0xEE range */
   1244                if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
   1245                    /* We currently support only Anudatta and Devanagari abbreviation sign */
   1246                    if (sourceChar==0xBF || sourceChar == 0xB8) {
   1247                        targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
   1248                        
   1249                        /* find out if the mapping is valid in this state */
   1250                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1251                            *contextCharToUnicode= NO_CHAR_MARKER;
   1252 
   1253                            /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1254                            if (data->prevToUnicodeStatus) {
   1255                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1256                                data->prevToUnicodeStatus = 0x0000;
   1257                            }
   1258                            /* write to target */
   1259                            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1260 
   1261                            continue;
   1262                        }
   1263                    }
   1264                    /* byte unit is unassigned */
   1265                    targetUniChar = missingCharMarker;
   1266                    *err= U_INVALID_CHAR_FOUND;
   1267                } else {
   1268                    /* only 0xA1 - 0xEE are legal after EXT char */
   1269                    *contextCharToUnicode= NO_CHAR_MARKER;
   1270                    *err = U_ILLEGAL_CHAR_FOUND;
   1271                }
   1272                goto CALLBACK;
   1273            } else if (*contextCharToUnicode==ISCII_INV) {
   1274                if (sourceChar==ISCII_HALANT) {
   1275                    targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
   1276                } else {
   1277                    targetUniChar = ZWJ;
   1278                }
   1279 
   1280                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1281                if (data->prevToUnicodeStatus) {
   1282                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1283                    data->prevToUnicodeStatus = 0x0000;
   1284                }
   1285                /* write to target */
   1286                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1287                /* reset */
   1288                *contextCharToUnicode=NO_CHAR_MARKER;
   1289            }
   1290 
   1291            /* look at the pre-context and perform special processing */
   1292            switch (sourceChar) {
   1293            case ISCII_INV:
   1294            case EXT:
   1295            case ATR:
   1296                *contextCharToUnicode = (char16_t)sourceChar;
   1297 
   1298                if (*toUnicodeStatus != missingCharMarker) {
   1299                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1300                    if (data->prevToUnicodeStatus) {
   1301                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1302                        data->prevToUnicodeStatus = 0x0000;
   1303                    }
   1304                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1305                    *toUnicodeStatus = missingCharMarker;
   1306                }
   1307                continue;
   1308            case ISCII_DANDA:
   1309                /* handle double danda*/
   1310                if (*contextCharToUnicode== ISCII_DANDA) {
   1311                    targetUniChar = DOUBLE_DANDA;
   1312                    /* clear the context */
   1313                    *contextCharToUnicode = NO_CHAR_MARKER;
   1314                    *toUnicodeStatus = missingCharMarker;
   1315                } else {
   1316                    GET_MAPPING(sourceChar,targetUniChar,data);
   1317                    *contextCharToUnicode = sourceChar;
   1318                }
   1319                break;
   1320            case ISCII_HALANT:
   1321                /* handle explicit halant */
   1322                if (*contextCharToUnicode == ISCII_HALANT) {
   1323                    targetUniChar = ZWNJ;
   1324                    /* clear the context */
   1325                    *contextCharToUnicode = NO_CHAR_MARKER;
   1326                } else {
   1327                    GET_MAPPING(sourceChar,targetUniChar,data);
   1328                    *contextCharToUnicode = sourceChar;
   1329                }
   1330                break;
   1331            case 0x0A:
   1332            case 0x0D:
   1333                data->resetToDefaultToUnicode = true;
   1334                GET_MAPPING(sourceChar,targetUniChar,data)
   1335                ;
   1336                *contextCharToUnicode = sourceChar;
   1337                break;
   1338 
   1339            case ISCII_VOWEL_SIGN_E:
   1340                i=1;
   1341                found=false;
   1342                for (; i<vowelSignESpecialCases[0][0]; i++) {
   1343                    U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
   1344                    if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
   1345                        targetUniChar=vowelSignESpecialCases[i][1];
   1346                        found=true;
   1347                        break;
   1348                    }
   1349                }
   1350                if (found) {
   1351                    /* find out if the mapping is valid in this state */
   1352                    if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1353                        /*targetUniChar += data->currentDeltaToUnicode ;*/
   1354                        *contextCharToUnicode= NO_CHAR_MARKER;
   1355                        *toUnicodeStatus = missingCharMarker;
   1356                        break;
   1357                    }
   1358                }
   1359                GET_MAPPING(sourceChar,targetUniChar,data);
   1360                *contextCharToUnicode = sourceChar;
   1361                break;
   1362 
   1363            case ISCII_NUKTA:
   1364                /* handle soft halant */
   1365                if (*contextCharToUnicode == ISCII_HALANT) {
   1366                    targetUniChar = ZWJ;
   1367                    /* clear the context */
   1368                    *contextCharToUnicode = NO_CHAR_MARKER;
   1369                    break;
   1370                } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
   1371                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1372                    if (data->prevToUnicodeStatus) {
   1373                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1374                        data->prevToUnicodeStatus = 0x0000;
   1375                    }
   1376                    /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
   1377                     * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
   1378                     */
   1379                    targetUniChar = PNJ_RRA;
   1380                    WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1381                    if (U_SUCCESS(*err)) {
   1382                        targetUniChar = PNJ_SIGN_VIRAMA;
   1383                        WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1384                        if (U_SUCCESS(*err)) {
   1385                            targetUniChar = PNJ_HA;
   1386                            WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
   1387                        } else {
   1388                            args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
   1389                        }
   1390                    } else {
   1391                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
   1392                        args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
   1393                    }
   1394                    *toUnicodeStatus = missingCharMarker;
   1395                    data->contextCharToUnicode = NO_CHAR_MARKER;
   1396                    continue;
   1397                } else {
   1398                    /* try to handle <CHAR> + ISCII_NUKTA special mappings */
   1399                    i=1;
   1400                    found =false;
   1401                    for (; i<nuktaSpecialCases[0][0]; i++) {
   1402                        if (nuktaSpecialCases[i][0]==(uint8_t)
   1403                                *contextCharToUnicode) {
   1404                            targetUniChar=nuktaSpecialCases[i][1];
   1405                            found =true;
   1406                            break;
   1407                        }
   1408                    }
   1409                    if (found) {
   1410                        /* find out if the mapping is valid in this state */
   1411                        if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
   1412                            /*targetUniChar += data->currentDeltaToUnicode ;*/
   1413                            *contextCharToUnicode= NO_CHAR_MARKER;
   1414                            *toUnicodeStatus = missingCharMarker;
   1415                            if (data->currentDeltaToUnicode == PNJ_DELTA) {
   1416                                /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1417                                if (data->prevToUnicodeStatus) {
   1418                                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1419                                    data->prevToUnicodeStatus = 0x0000;
   1420                                }
   1421                                WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
   1422                                continue;
   1423                            }
   1424                            break;
   1425                        }
   1426                        /* else fall through to default */
   1427                    }
   1428                    /* else fall through to default */
   1429                    U_FALLTHROUGH;
   1430                }
   1431            default:GET_MAPPING(sourceChar,targetUniChar,data)
   1432                ;
   1433                *contextCharToUnicode = sourceChar;
   1434                break;
   1435            }
   1436 
   1437            if (*toUnicodeStatus != missingCharMarker) {
   1438                /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
   1439                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
   1440                        (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) {
   1441                    /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
   1442                    offset = (int)(source-args->source - 3);
   1443                    tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
   1444                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
   1445                    WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
   1446                    data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
   1447                    *toUnicodeStatus = missingCharMarker;
   1448                    continue;
   1449                } else {
   1450                    /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
   1451                    if (data->prevToUnicodeStatus) {
   1452                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
   1453                        data->prevToUnicodeStatus = 0x0000;
   1454                    }
   1455                    /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 
   1456                     * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
   1457                     */
   1458                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
   1459                        targetUniChar = PNJ_TIPPI - PNJ_DELTA;
   1460                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
   1461                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
   1462                        /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
   1463                        data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
   1464                    } else {
   1465                        /* write the previously mapped codepoint */
   1466                        WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1467                    }
   1468                }
   1469                *toUnicodeStatus = missingCharMarker;
   1470            }
   1471 
   1472            if (targetUniChar != missingCharMarker) {
   1473                /* now save the targetUniChar for delayed write */
   1474                *toUnicodeStatus = (char16_t) targetUniChar;
   1475                if (data->resetToDefaultToUnicode) {
   1476                    data->currentDeltaToUnicode = data->defDeltaToUnicode;
   1477                    data->currentMaskToUnicode = data->defMaskToUnicode;
   1478                    data->resetToDefaultToUnicode=false;
   1479                }
   1480            } else {
   1481 
   1482                /* we reach here only if targetUniChar == missingCharMarker
   1483                 * so assign codes to reason and err
   1484                 */
   1485                *err = U_INVALID_CHAR_FOUND;
   1486 CALLBACK:
   1487                args->converter->toUBytes[0] = sourceChar;
   1488                args->converter->toULength = 1;
   1489                break;
   1490            }
   1491 
   1492        } else {
   1493            *err =U_BUFFER_OVERFLOW_ERROR;
   1494            break;
   1495        }
   1496    }
   1497 
   1498    if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
   1499        /* end of the input stream */
   1500        UConverter *cnv = args->converter;
   1501 
   1502        if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
   1503            /* set toUBytes[] */
   1504            cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
   1505            cnv->toULength = 1;
   1506 
   1507            /* avoid looping on truncated sequences */
   1508            *contextCharToUnicode = NO_CHAR_MARKER;
   1509        } else {
   1510            cnv->toULength = 0;
   1511        }
   1512 
   1513        if (*toUnicodeStatus != missingCharMarker) {
   1514            /* output a remaining target character */
   1515            WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
   1516            *toUnicodeStatus = missingCharMarker;
   1517        }
   1518    }
   1519 
   1520    args->target = target;
   1521    args->source = source;
   1522 }
   1523 
   1524 /* structure for SafeClone calculations */
   1525 struct cloneISCIIStruct {
   1526    UConverter cnv;
   1527    UConverterDataISCII mydata;
   1528 };
   1529 
   1530 static UConverter * U_CALLCONV
   1531 _ISCII_SafeClone(const UConverter *cnv,
   1532              void *stackBuffer,
   1533              int32_t *pBufferSize,
   1534              UErrorCode *status)
   1535 {
   1536    struct cloneISCIIStruct * localClone;
   1537    int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
   1538 
   1539    if (U_FAILURE(*status)) {
   1540        return nullptr;
   1541    }
   1542 
   1543    if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
   1544        *pBufferSize = bufferSizeNeeded;
   1545        return nullptr;
   1546    }
   1547 
   1548    localClone = (struct cloneISCIIStruct *)stackBuffer;
   1549    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
   1550 
   1551    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
   1552    localClone->cnv.extraInfo = &localClone->mydata;
   1553    localClone->cnv.isExtraLocal = true;
   1554 
   1555    return &localClone->cnv;
   1556 }
   1557 
   1558 static void U_CALLCONV
   1559 _ISCIIGetUnicodeSet(const UConverter *cnv,
   1560                    const USetAdder *sa,
   1561                    UConverterUnicodeSet which,
   1562                    UErrorCode *pErrorCode)
   1563 {
   1564    (void)cnv;
   1565    (void)which;
   1566    (void)pErrorCode;
   1567    int32_t idx, script;
   1568    uint8_t mask;
   1569 
   1570    /* Since all ISCII versions allow switching to other ISCII
   1571    scripts, we add all roundtrippable characters to this set. */
   1572    sa->addRange(sa->set, 0, ASCII_END);
   1573    for (script = DEVANAGARI; script <= MALAYALAM; script++) {
   1574        mask = (uint8_t)(lookupInitialData[script].maskEnum);
   1575        for (idx = 0; idx < DELTA; idx++) {
   1576            /* added check for TELUGU character */
   1577            if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
   1578                sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
   1579            }
   1580        }
   1581    }
   1582    sa->add(sa->set, DANDA);
   1583    sa->add(sa->set, DOUBLE_DANDA);
   1584    sa->add(sa->set, ZWNJ);
   1585    sa->add(sa->set, ZWJ);
   1586 }
   1587 U_CDECL_END
   1588 static const UConverterImpl _ISCIIImpl={
   1589 
   1590    UCNV_ISCII,
   1591 
   1592    nullptr,
   1593    nullptr,
   1594 
   1595    _ISCIIOpen,
   1596    _ISCIIClose,
   1597    _ISCIIReset,
   1598 
   1599    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
   1600    UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
   1601    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
   1602    UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
   1603    nullptr,
   1604 
   1605    nullptr,
   1606    _ISCIIgetName,
   1607    nullptr,
   1608    _ISCII_SafeClone,
   1609    _ISCIIGetUnicodeSet,
   1610    nullptr,
   1611    nullptr
   1612 };
   1613 
   1614 static const UConverterStaticData _ISCIIStaticData={
   1615    sizeof(UConverterStaticData),
   1616        "ISCII",
   1617         0,
   1618         UCNV_IBM,
   1619         UCNV_ISCII,
   1620         1,
   1621         4,
   1622        { 0x1a, 0, 0, 0 },
   1623        0x1,
   1624        false,
   1625        false,
   1626        0x0,
   1627        0x0,
   1628        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
   1629 
   1630 };
   1631 
   1632 const UConverterSharedData _ISCIIData=
   1633        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
   1634 
   1635 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */