tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uparse.h (5023B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2000-2010, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  uparse.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2000apr18
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This file provides a parser for files that are delimited by one single
     19 *   character like ';' or TAB. Example: the Unicode Character Properties files
     20 *   like UnicodeData.txt are semicolon-delimited.
     21 */
     22 
     23 #ifndef __UPARSE_H__
     24 #define __UPARSE_H__
     25 
     26 #include "unicode/utypes.h"
     27 
     28 /**
     29 * Is c an invariant-character whitespace?
     30 * @param c invariant character
     31 */
     32 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
     33 
     34 U_CDECL_BEGIN
     35 
     36 /**
     37 * Skip space ' ' and TAB '\t' characters.
     38 *
     39 * @param s Pointer to characters.
     40 * @return Pointer to first character at or after s that is not a space or TAB.
     41 */
     42 U_CAPI const char * U_EXPORT2
     43 u_skipWhitespace(const char *s);
     44 
     45 /**
     46 * Trim whitespace (including line endings) from the end of the string.
     47 *
     48 * @param s Pointer to the string.
     49 * @return Pointer to the new end of the string.
     50 */
     51 U_CAPI char * U_EXPORT2
     52 u_rtrim(char *s);
     53 
     54 /** Function type for u_parseDelimitedFile(). */
     55 typedef void U_CALLCONV
     56 UParseLineFn(void *context,
     57              char *fields[][2],
     58              int32_t fieldCount,
     59              UErrorCode *pErrorCode);
     60 
     61 /**
     62 * Parser for files that are similar to UnicodeData.txt:
     63 * This function opens the file and reads it line by line. It skips empty lines
     64 * and comment lines that start with a '#'.
     65 * All other lines are separated into fields with one delimiter character
     66 * (semicolon for Unicode Properties files) between two fields. The last field in
     67 * a line does not need to be terminated with a delimiter.
     68 *
     69 * For each line, after segmenting it, a line function is called.
     70 * It gets passed the array of field start and limit pointers that is
     71 * passed into this parser and filled by it for each line.
     72 * For each field i of the line, the start pointer in fields[i][0]
     73 * points to the beginning of the field, while the limit pointer in fields[i][1]
     74 * points behind the field, i.e., to the delimiter or the line end.
     75 *
     76 * The context parameter of the line function is
     77 * the same as the one for the parse function.
     78 *
     79 * The line function may modify the contents of the fields including the
     80 * limit characters.
     81 *
     82 * If the file cannot be opened, or there is a parsing error or a field function
     83 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
     84 */
     85 U_CAPI void U_EXPORT2
     86 u_parseDelimitedFile(const char *filename, char delimiter,
     87                     char *fields[][2], int32_t fieldCount,
     88                     UParseLineFn *lineFn, void *context,
     89                     UErrorCode *pErrorCode);
     90 
     91 /**
     92 * Parse a string of code points like 0061 0308 0300.
     93 * s must end with either ';' or NUL.
     94 *
     95 * @return Number of code points.
     96 */
     97 U_CAPI int32_t U_EXPORT2
     98 u_parseCodePoints(const char *s,
     99                  uint32_t *dest, int32_t destCapacity,
    100                  UErrorCode *pErrorCode);
    101 
    102 /**
    103 * Parse a list of code points like 0061 0308 0300
    104 * into a UChar * string.
    105 * s must end with either ';' or NUL.
    106 *
    107 * Set the first code point in *pFirst.
    108 *
    109 * @param s Input char * string.
    110 * @param dest Output string buffer.
    111 * @param destCapacity Capacity of dest in numbers of UChars.
    112 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
    113 *               code point in the string.
    114 * @param pErrorCode ICU error code.
    115 * @return The length of the string in numbers of UChars.
    116 */
    117 U_CAPI int32_t U_EXPORT2
    118 u_parseString(const char *s,
    119              UChar *dest, int32_t destCapacity,
    120              uint32_t *pFirst,
    121              UErrorCode *pErrorCode);
    122 
    123 /**
    124 * Parse a code point range like
    125 * 0085 or
    126 * 4E00..9FA5.
    127 *
    128 * s must contain such a range and end with either ';' or NUL.
    129 *
    130 * @return Length of code point range, end-start+1
    131 */
    132 U_CAPI int32_t U_EXPORT2
    133 u_parseCodePointRange(const char *s,
    134                      uint32_t *pStart, uint32_t *pEnd,
    135                      UErrorCode *pErrorCode);
    136 
    137 /**
    138 * Same as u_parseCodePointRange() but the range may be terminated by
    139 * any character. The position of the terminating character is returned via
    140 * the *terminator output parameter.
    141 */
    142 U_CAPI int32_t U_EXPORT2
    143 u_parseCodePointRangeAnyTerminator(const char *s,
    144                                   uint32_t *pStart, uint32_t *pEnd,
    145                                   const char **terminator,
    146                                   UErrorCode *pErrorCode);
    147 
    148 U_CAPI int32_t U_EXPORT2
    149 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
    150 
    151 U_CDECL_END
    152 
    153 #endif