tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucbuf.h (8413B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1998-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File ucbuf.h
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   05/10/01    Ram         Creation.
     17 *
     18 * This API reads in files and returns UChars
     19 *******************************************************************************
     20 */
     21 
     22 #include "unicode/localpointer.h"
     23 #include "unicode/ucnv.h"
     24 #include "filestrm.h"
     25 
     26 #if !UCONFIG_NO_CONVERSION
     27 
     28 #ifndef UCBUF_H
     29 #define UCBUF_H 1
     30 
     31 typedef struct UCHARBUF UCHARBUF;
     32 /**
     33 * End of file value
     34 */
     35 #define U_EOF ((int32_t)0xFFFFFFFF)
     36 /**
     37 * Error value if a sequence cannot be unescaped
     38 */
     39 #define U_ERR ((int32_t)0xFFFFFFFE)
     40 
     41 typedef struct ULine ULine;
     42 
     43 struct  ULine {
     44    UChar     *name;
     45    int32_t   len;
     46 };
     47 
     48 /**
     49 * Opens the UCHARBUF with the given file stream and code page for conversion
     50 * @param fileName  Name of the file to open.
     51 * @param codepage  The encoding of the file stream to convert to Unicode.
     52 *                  If *codepage is NULL on input the API will try to autodetect
     53 *                  popular Unicode encodings
     54 * @param showWarning Flag to print out warnings to STDOUT
     55 * @param buffered  If true performs a buffered read of the input file. If false reads
     56 *                  the whole file into memory and converts it.
     57 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     58 *        indicates a failure on entry, the function will immediately return.
     59 *        On exit the value will indicate the success of the operation.
     60 * @return pointer to the newly opened UCHARBUF
     61 */
     62 U_CAPI UCHARBUF* U_EXPORT2
     63 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
     64 
     65 /**
     66 * Gets a UTF-16 code unit at the current position from the converted buffer
     67 * and increments the current position
     68 * @param buf Pointer to UCHARBUF structure
     69 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     70 *        indicates a failure on entry, the function will immediately return.
     71 *        On exit the value will indicate the success of the operation.
     72 */
     73 U_CAPI int32_t U_EXPORT2
     74 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
     75 
     76 /**
     77 * Gets a UTF-32 code point at the current position from the converted buffer
     78 * and increments the current position
     79 * @param buf Pointer to UCHARBUF structure
     80 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     81 *        indicates a failure on entry, the function will immediately return.
     82 *        On exit the value will indicate the success of the operation.
     83 */
     84 U_CAPI int32_t U_EXPORT2
     85 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
     86 
     87 /**
     88 * Gets a UTF-16 code unit at the current position from the converted buffer after
     89 * unescaping and increments the current position. If the escape sequence is for UTF-32
     90 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
     91 * @param buf Pointer to UCHARBUF structure
     92 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     93 *        indicates a failure on entry, the function will immediately return.
     94 *        On exit the value will indicate the success of the operation.
     95 */
     96 U_CAPI int32_t U_EXPORT2
     97 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
     98 
     99 /**
    100 * Gets a pointer to the current position in the internal buffer and length of the line.
    101 * It imperative to make a copy of the returned buffer before performing operations on it.
    102 * @param buf Pointer to UCHARBUF structure
    103 * @param len Output param to receive the len of the buffer returned till end of the line
    104 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    105 *        indicates a failure on entry, the function will immediately return.
    106 *        On exit the value will indicate the success of the operation.
    107 *        Error: U_TRUNCATED_CHAR_FOUND
    108 * @return Pointer to the internal buffer, NULL if EOF
    109 */
    110 U_CAPI const UChar* U_EXPORT2
    111 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
    112 
    113 
    114 /**
    115 * Resets the buffers and the underlying file stream.
    116 * @param buf Pointer to UCHARBUF structure
    117 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    118 *        indicates a failure on entry, the function will immediately return.
    119 *        On exit the value will indicate the success of the operation.
    120 */
    121 U_CAPI void U_EXPORT2
    122 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
    123 
    124 /**
    125 * Returns a pointer to the internal converted buffer
    126 * @param buf Pointer to UCHARBUF structure
    127 * @param len Pointer to int32_t to receive the length of buffer
    128 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    129 *        indicates a failure on entry, the function will immediately return.
    130 *        On exit the value will indicate the success of the operation.
    131 * @return Pointer to internal UChar buffer
    132 */
    133 U_CAPI const UChar* U_EXPORT2
    134 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
    135 
    136 /**
    137 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
    138 * @param buf Pointer to UCHARBUF structure
    139 */
    140 U_CAPI void U_EXPORT2
    141 ucbuf_close(UCHARBUF* buf);
    142 
    143 #if U_SHOW_CPLUSPLUS_API
    144 
    145 U_NAMESPACE_BEGIN
    146 
    147 /**
    148 * \class LocalUCHARBUFPointer
    149 * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
    150 * For most methods see the LocalPointerBase base class.
    151 *
    152 * @see LocalPointerBase
    153 * @see LocalPointer
    154 */
    155 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
    156 
    157 U_NAMESPACE_END
    158 
    159 #endif
    160 
    161 /**
    162 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
    163 */
    164 U_CAPI void U_EXPORT2
    165 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
    166 
    167 
    168 /**
    169 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
    170 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
    171 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
    172 * is necessary.
    173 * If the charset was autodetected, the caller must close both the input FileStream
    174 * and the converter.
    175 *
    176 * @param fileName The file name to be opened and encoding autodected
    177 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
    178 * @param cp Output param to receive the detected encoding
    179 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    180 *        indicates a failure on entry, the function will immediately return.
    181 *        On exit the value will indicate the success of the operation.
    182 * @return The input FileStream if its charset was autodetected; NULL otherwise.
    183 */
    184 U_CAPI FileStream * U_EXPORT2
    185 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
    186 int32_t* signatureLength, UErrorCode* status);
    187 
    188 /**
    189 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
    190 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
    191 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
    192 * is necessary.
    193 * If the charset was autodetected, the caller must close the converter.
    194 *
    195 * @param fileStream The file stream whose encoding is to be detected
    196 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
    197 * @param cp Output param to receive the detected encoding
    198 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    199 *        indicates a failure on entry, the function will immediately return.
    200 *        On exit the value will indicate the success of the operation.
    201 * @return Boolean whether the Unicode charset was autodetected.
    202 */
    203 
    204 U_CAPI UBool U_EXPORT2
    205 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
    206 
    207 /**
    208 * Returns the approximate size in UChars required for converting the file to UChars
    209 */
    210 U_CAPI int32_t U_EXPORT2
    211 ucbuf_size(UCHARBUF* buf);
    212 
    213 U_CAPI const char* U_EXPORT2
    214 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
    215 
    216 #endif
    217 #endif