tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ustream.cpp (5782B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *  FILE NAME : ustream.cpp
      9 *
     10 *   Modification History:
     11 *
     12 *   Date        Name        Description
     13 *   06/25/2001  grhoten     Move iostream from unistr.h to here
     14 ******************************************************************************
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_CONVERSION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/ustream.h"
     23 #include "unicode/ucnv.h"
     24 #include "unicode/uchar.h"
     25 #include "unicode/utf16.h"
     26 #include "ustr_cnv.h"
     27 #include "cmemory.h"
     28 #include <string.h>
     29 
     30 // console IO
     31 
     32 #define STD_NAMESPACE std::
     33 
     34 #define STD_OSTREAM STD_NAMESPACE ostream
     35 #define STD_ISTREAM STD_NAMESPACE istream
     36 
     37 U_NAMESPACE_BEGIN
     38 
     39 U_IO_API STD_OSTREAM & U_EXPORT2
     40 operator<<(STD_OSTREAM& stream, const UnicodeString& str)
     41 {
     42    if(str.length() > 0) {
     43        char buffer[200];
     44        UConverter *converter;
     45        UErrorCode errorCode = U_ZERO_ERROR;
     46 
     47        // use the default converter to convert chunks of text
     48        converter = u_getDefaultConverter(&errorCode);
     49        if(U_SUCCESS(errorCode)) {
     50            const char16_t *us = str.getBuffer();
     51            const char16_t *uLimit = us + str.length();
     52            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
     53            do {
     54                errorCode = U_ZERO_ERROR;
     55                s = buffer;
     56                ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, nullptr, false, &errorCode);
     57                *s = 0;
     58 
     59                // write this chunk
     60                if(s > buffer) {
     61                    stream << buffer;
     62                }
     63            } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
     64            u_releaseDefaultConverter(converter);
     65        }
     66    }
     67 
     68 /*    stream.flush();*/
     69    return stream;
     70 }
     71 
     72 U_IO_API STD_ISTREAM & U_EXPORT2
     73 operator>>(STD_ISTREAM& stream, UnicodeString& str)
     74 {
     75    // This is like ICU status checking.
     76    if (stream.fail()) {
     77        return stream;
     78    }
     79 
     80    /* ipfx should eat whitespace when ios::skipws is set */
     81    char16_t uBuffer[16];
     82    char buffer[16];
     83    int32_t idx = 0;
     84    UConverter *converter;
     85    UErrorCode errorCode = U_ZERO_ERROR;
     86 
     87    // use the default converter to convert chunks of text
     88    converter = u_getDefaultConverter(&errorCode);
     89    if(U_SUCCESS(errorCode)) {
     90        char16_t *us = uBuffer;
     91        const char16_t *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
     92        const char *s, *sLimit;
     93        char ch;
     94        char16_t ch32;
     95        UBool initialWhitespace = true;
     96        UBool continueReading = true;
     97 
     98        /* We need to consume one byte at a time to see what is considered whitespace. */
     99        while (continueReading) {
    100            ch = stream.get();
    101            if (stream.eof()) {
    102                // The EOF is only set after the get() of an unavailable byte.
    103                if (!initialWhitespace) {
    104                    stream.clear(stream.eofbit);
    105                }
    106                continueReading = false;
    107            }
    108            sLimit = &ch + static_cast<int>(continueReading);
    109            us = uBuffer;
    110            s = &ch;
    111            errorCode = U_ZERO_ERROR;
    112            /*
    113            Since we aren't guaranteed to see the state before this call,
    114            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
    115            We flush on the last byte to ensure that we output truncated multibyte characters.
    116            */
    117            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, nullptr, !continueReading, &errorCode);
    118            if(U_FAILURE(errorCode)) {
    119                /* Something really bad happened. setstate() isn't always an available API */
    120                stream.clear(stream.failbit);
    121                goto STOP_READING;
    122            }
    123            /* Was the character consumed? */
    124            if (us != uBuffer) {
    125                /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
    126                int32_t uBuffSize = static_cast<int32_t>(us-uBuffer);
    127                int32_t uBuffIdx = 0;
    128                while (uBuffIdx < uBuffSize) {
    129                    U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
    130                    if (u_isWhitespace(ch32)) {
    131                        if (!initialWhitespace) {
    132                            buffer[idx++] = ch;
    133                            while (idx > 0) {
    134                                stream.putback(buffer[--idx]);
    135                            }
    136                            goto STOP_READING;
    137                        }
    138                        /* else skip intialWhitespace */
    139                    }
    140                    else {
    141                        if (initialWhitespace) {
    142                            /*
    143                            When initialWhitespace is true, we haven't appended any
    144                            character yet.  This is where we truncate the string,
    145                            to avoid modifying the string before we know if we can
    146                            actually read from the stream.
    147                            */
    148                            str.truncate(0);
    149                            initialWhitespace = false;
    150                        }
    151                        str.append(ch32);
    152                    }
    153                }
    154                idx = 0;
    155            }
    156            else {
    157                buffer[idx++] = ch;
    158            }
    159        }
    160 STOP_READING:
    161        u_releaseDefaultConverter(converter);
    162    }
    163 
    164 /*    stream.flush();*/
    165    return stream;
    166 }
    167 
    168 U_NAMESPACE_END
    169 
    170 #endif