[ tor-browser ].git.dasho

read.c (13517B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File read.c
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   05/26/99    stephen     Creation.
     17 *   5/10/01     Ram         removed ustdio dependency
     18 *******************************************************************************
     19 */
     20 
     21 #include <stdbool.h>
     22 
     23 #include "read.h"
     24 #include "errmsg.h"
     25 #include "toolutil.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/utf16.h"
     28 
     29 #define OPENBRACE    0x007B
     30 #define CLOSEBRACE   0x007D
     31 #define COMMA        0x002C
     32 #define QUOTE        0x0022
     33 #define ESCAPE       0x005C
     34 #define SLASH        0x002F
     35 #define ASTERISK     0x002A
     36 #define SPACE        0x0020
     37 #define COLON        0x003A
     38 #define BADBOM       0xFFFE
     39 #define CR           0x000D
     40 #define LF           0x000A
     41               
     42 static int32_t lineCount;
     43 
     44 /* Protos */
     45 static enum ETokenType getStringToken(UCHARBUF *buf,
     46                                      UChar32 initialChar,
     47                                      struct UString *token,
     48                                      UErrorCode *status);
     49 
     50 static UChar32 getNextChar           (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
     51 static void    seekUntilNewline      (UCHARBUF *buf, struct UString *token, UErrorCode *status);
     52 static void    seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
     53 static UBool   isWhitespace          (UChar32 c);
     54 static UBool   isNewline             (UChar32 c);
     55 
     56 U_CFUNC void resetLineNumber(void) {
     57    lineCount = 1;
     58 }
     59 
     60 /* Read and return the next token from the stream.  If the token is of
     61   type eString, fill in the token parameter with the token.  If the
     62   token is eError, then the status parameter will contain the
     63   specific error.  This will be eItemNotFound at the end of file,
     64   indicating that all tokens have been returned.  This method will
     65   never return eString twice in a row; instead, multiple adjacent
     66   string tokens will be merged into one, with no intervening
     67   space. */
     68 U_CFUNC enum ETokenType
     69 getNextToken(UCHARBUF* buf,
     70             struct UString *token,
     71             uint32_t *linenumber, /* out: linenumber of token */
     72             struct UString *comment,
     73             UErrorCode *status) {
     74    enum ETokenType result;
     75    UChar32         c;
     76 
     77    if (U_FAILURE(*status)) {
     78        return TOK_ERROR;
     79    }
     80 
     81    /* Skip whitespace */
     82    c = getNextChar(buf, true, comment, status);
     83 
     84    if (U_FAILURE(*status)) {
     85        return TOK_ERROR;
     86    }
     87 
     88    *linenumber = lineCount;
     89 
     90    switch(c) {
     91    case BADBOM:
     92        return TOK_ERROR;
     93    case OPENBRACE:
     94        return TOK_OPEN_BRACE;
     95    case CLOSEBRACE:
     96        return TOK_CLOSE_BRACE;
     97    case COMMA:
     98        return TOK_COMMA;
     99    case U_EOF:
    100        return TOK_EOF;
    101    case COLON:
    102        return TOK_COLON;
    103 
    104    default:
    105        result = getStringToken(buf, c, token, status);
    106    }
    107 
    108    *linenumber = lineCount;
    109    return result;
    110 }
    111 
    112 /* Copy a string token into the given UnicodeString.  Upon entry, we
    113   have already read the first character of the string token, which is
    114   not a whitespace character (but may be a QUOTE or ESCAPE). This
    115   function reads all subsequent characters that belong with this
    116   string, and copy them into the token parameter. The other
    117   important, and slightly convoluted purpose of this function is to
    118   merge adjacent strings.  It looks forward a bit, and if the next
    119   non comment, non whitespace item is a string, it reads it in as
    120   well.  If two adjacent strings are quoted, they are merged without
    121   intervening space.  Otherwise a single SPACE character is
    122   inserted. */
    123 static enum ETokenType getStringToken(UCHARBUF* buf,
    124                                      UChar32 initialChar,
    125                                      struct UString *token,
    126                                      UErrorCode *status) {
    127    UBool    lastStringWasQuoted;
    128    UChar32  c;
    129    UChar    target[3] = { '\0' };
    130    UChar    *pTarget   = target;
    131    int      len=0;
    132    UBool    isFollowingCharEscaped=false;
    133    UBool    isNLUnescaped = false;
    134    UChar32  prevC=0;
    135 
    136    /* We are guaranteed on entry that initialChar is not a whitespace
    137       character. If we are at the EOF, or have some other problem, it
    138       doesn't matter; we still want to validly return the initialChar
    139       (if nothing else) as a string token. */
    140 
    141    if (U_FAILURE(*status)) {
    142        return TOK_ERROR;
    143    }
    144 
    145    /* setup */
    146    lastStringWasQuoted = false;
    147    c = initialChar;
    148    ustr_setlen(token, 0, status);
    149 
    150    if (U_FAILURE(*status)) {
    151        return TOK_ERROR;
    152    }
    153 
    154    for (;;) {
    155        if (c == QUOTE) {
    156            if (!lastStringWasQuoted && token->fLength > 0) {
    157                ustr_ucat(token, SPACE, status);
    158 
    159                if (U_FAILURE(*status)) {
    160                    return TOK_ERROR;
    161                }
    162            }
    163 
    164            lastStringWasQuoted = true;
    165 
    166            for (;;) {
    167                c = ucbuf_getc(buf,status);
    168 
    169                /* EOF reached */
    170                if (c == U_EOF) {
    171                    return TOK_EOF;
    172                }
    173 
    174                /* Unterminated quoted strings */
    175                if (U_FAILURE(*status)) {
    176                    return TOK_ERROR;
    177                }
    178 
    179                if (c == QUOTE && !isFollowingCharEscaped) {
    180                    break;
    181                }
    182 
    183                if (c == ESCAPE  && !isFollowingCharEscaped) {
    184                    pTarget = target;
    185                    c       = unescape(buf, status);
    186 
    187                    if (c == U_ERR) {
    188                        return TOK_ERROR;
    189                    }
    190                    if(c == CR || c == LF){
    191                        isNLUnescaped = true;
    192                    }
    193                }               
    194 
    195                if(c==ESCAPE && !isFollowingCharEscaped){
    196                    isFollowingCharEscaped = true;
    197                }else{
    198                    U_APPEND_CHAR32(c, pTarget,len);
    199                    pTarget = target;
    200                    ustr_uscat(token, pTarget,len, status);
    201                    isFollowingCharEscaped = false;
    202                    len=0;
    203                    if(c == CR || c == LF){
    204                        if(isNLUnescaped == false && prevC!=CR){
    205                            lineCount++;
    206                        }
    207                        isNLUnescaped = false;
    208                    }
    209                }
    210                
    211                if (U_FAILURE(*status)) {
    212                    return TOK_ERROR;
    213                }
    214                prevC = c;
    215            }
    216        } else {
    217            if (token->fLength > 0) {
    218                ustr_ucat(token, SPACE, status);
    219 
    220                if (U_FAILURE(*status)) {
    221                    return TOK_ERROR;
    222                }
    223            }
    224            
    225            if(lastStringWasQuoted){
    226                if(getShowWarning()){
    227                    warning(lineCount, "Mixing quoted and unquoted strings");
    228                }
    229                if(isStrict()){
    230                    return TOK_ERROR;
    231                }
    232 
    233            }
    234 
    235            lastStringWasQuoted = false;
    236            
    237            /* if we reach here we are mixing 
    238             * quoted and unquoted strings
    239             * warn in normal mode and error in
    240             * pedantic mode
    241             */
    242 
    243            if (c == ESCAPE) {
    244                pTarget = target;
    245                c       = unescape(buf, status);
    246 
    247                /* EOF reached */
    248                if (c == U_EOF) {
    249                    return TOK_ERROR;
    250                }
    251            }
    252 
    253            U_APPEND_CHAR32(c, pTarget,len);
    254            pTarget = target;
    255            ustr_uscat(token, pTarget,len, status);
    256            len=0;
    257            
    258            if (U_FAILURE(*status)) {
    259                return TOK_ERROR;
    260            }
    261 
    262            for (;;) {
    263                /* DON'T skip whitespace */
    264                c = getNextChar(buf, false, NULL, status);
    265 
    266                /* EOF reached */
    267                if (c == U_EOF) {
    268                    ucbuf_ungetc(c, buf);
    269                    return TOK_STRING;
    270                }
    271 
    272                if (U_FAILURE(*status)) {
    273                    return TOK_STRING;
    274                }
    275 
    276                if (c == QUOTE
    277                        || c == OPENBRACE
    278                        || c == CLOSEBRACE
    279                        || c == COMMA
    280                        || c == COLON) {
    281                    ucbuf_ungetc(c, buf);
    282                    break;
    283                }
    284 
    285                if (isWhitespace(c)) {
    286                    break;
    287                }
    288 
    289                if (c == ESCAPE) {
    290                    pTarget = target;
    291                    c       = unescape(buf, status);
    292 
    293                    if (c == U_ERR) {
    294                        return TOK_ERROR;
    295                    }
    296                }
    297 
    298                U_APPEND_CHAR32(c, pTarget,len);
    299                pTarget = target;
    300                ustr_uscat(token, pTarget,len, status);
    301                len=0;
    302                if (U_FAILURE(*status)) {
    303                    return TOK_ERROR;
    304                }
    305            }
    306        }
    307 
    308        /* DO skip whitespace */
    309        c = getNextChar(buf, true, NULL, status);
    310 
    311        if (U_FAILURE(*status)) {
    312            return TOK_STRING;
    313        }
    314 
    315        if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
    316            ucbuf_ungetc(c, buf);
    317            return TOK_STRING;
    318        }
    319    }
    320 }
    321 
    322 /* Retrieve the next character.  If skipwhite is
    323   true, whitespace is skipped as well. */
    324 static UChar32 getNextChar(UCHARBUF* buf,
    325                           UBool skipwhite,
    326                           struct UString *token,
    327                           UErrorCode *status) {
    328    UChar32 c, c2;
    329 
    330    if (U_FAILURE(*status)) {
    331        return U_EOF;
    332    }
    333 
    334    for (;;) {
    335        c = ucbuf_getc(buf,status);
    336 
    337        if (c == U_EOF) {
    338            return U_EOF;
    339        }
    340 
    341        if (skipwhite && isWhitespace(c)) {
    342            continue;
    343        }
    344 
    345        /* This also handles the get() failing case */
    346        if (c != SLASH) {
    347            return c;
    348        }
    349 
    350        c = ucbuf_getc(buf,status); /* "/c" */
    351 
    352        if (c == U_EOF) {
    353            return U_EOF;
    354        }
    355 
    356        switch (c) {
    357        case SLASH:  /* "//" */
    358            seekUntilNewline(buf, NULL, status);
    359            break;
    360 
    361        case ASTERISK:  /* " / * " */
    362            c2 = ucbuf_getc(buf, status); /* "/ * c" */
    363            if(c2 == ASTERISK){  /* "/ * *" */
    364                /* parse multi-line comment and store it in token*/
    365                seekUntilEndOfComment(buf, token, status);
    366            } else {
    367                ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *".  Include c2  back in buffer.  */
    368                seekUntilEndOfComment(buf, NULL, status);
    369            }
    370            break;
    371 
    372        default:
    373            ucbuf_ungetc(c, buf); /* "/c" - put back the c */
    374            /* If get() failed this is a NOP */
    375            return SLASH;
    376        }
    377 
    378    }
    379 }
    380 
    381 static void seekUntilNewline(UCHARBUF* buf,
    382                             struct UString *token,
    383                             UErrorCode *status) {
    384    UChar32 c;
    385 
    386    if (U_FAILURE(*status)) {
    387        return;
    388    }
    389 
    390    do {
    391        c = ucbuf_getc(buf,status);
    392        /* add the char to token */
    393        if(token!=NULL){
    394            ustr_u32cat(token, c, status);
    395        }
    396    } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
    397 }
    398 
    399 static void seekUntilEndOfComment(UCHARBUF *buf,
    400                                  struct UString *token,
    401                                  UErrorCode *status) {
    402    UChar32  c, d;
    403    uint32_t line;
    404 
    405    if (U_FAILURE(*status)) {
    406        return;
    407    }
    408 
    409    line = lineCount;
    410 
    411    do {
    412        c = ucbuf_getc(buf, status);
    413 
    414        if (c == ASTERISK) {
    415            d = ucbuf_getc(buf, status);
    416 
    417            if (d != SLASH) {
    418                ucbuf_ungetc(d, buf);
    419            } else {
    420                break;
    421            }
    422        }
    423        /* add the char to token */
    424        if(token!=NULL){
    425            ustr_u32cat(token, c, status);
    426        }
    427        /* increment the lineCount */
    428        isNewline(c);
    429 
    430    } while (c != U_EOF && *status == U_ZERO_ERROR);
    431 
    432    if (c == U_EOF) {
    433        *status = U_INVALID_FORMAT_ERROR;
    434        error(line, "unterminated comment detected");
    435    }
    436 }
    437 
    438 U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
    439    if (U_FAILURE(*status)) {
    440        return U_EOF;
    441    }
    442 
    443    /* We expect to be called after the ESCAPE has been seen, but
    444     * u_fgetcx needs an ESCAPE to do its magic. */
    445    ucbuf_ungetc(ESCAPE, buf);
    446 
    447    return ucbuf_getcx32(buf, status);
    448 }
    449 
    450 static UBool isWhitespace(UChar32 c) {
    451    switch (c) {
    452        /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
    453    case 0x000A:
    454    case 0x2029:
    455        lineCount++;
    456        U_FALLTHROUGH;
    457    case 0x000D:
    458    case 0x0020:
    459    case 0x0009:
    460    case 0xFEFF:
    461        return true;
    462 
    463    default:
    464        return false;
    465    }
    466 }
    467 
    468 static UBool isNewline(UChar32 c) {
    469    switch (c) {
    470        /* '\n', '\r', 0x2029 */
    471    case 0x000A:
    472    case 0x2029:
    473        lineCount++;
    474        U_FALLTHROUGH;
    475    case 0x000D:
    476        return true;
    477 
    478    default:
    479        return false;
    480    }
    481 }
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE