tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

avstring.h (14940B)


      1 /*
      2 * Copyright (c) 2007 Mans Rullgard
      3 *
      4 * This file is part of FFmpeg.
      5 *
      6 * FFmpeg is free software; you can redistribute it and/or
      7 * modify it under the terms of the GNU Lesser General Public
      8 * License as published by the Free Software Foundation; either
      9 * version 2.1 of the License, or (at your option) any later version.
     10 *
     11 * FFmpeg is distributed in the hope that it will be useful,
     12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 * Lesser General Public License for more details.
     15 *
     16 * You should have received a copy of the GNU Lesser General Public
     17 * License along with FFmpeg; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
     19 */
     20 
     21 #ifndef AVUTIL_AVSTRING_H
     22 #define AVUTIL_AVSTRING_H
     23 
     24 #include <stddef.h>
     25 #include <stdint.h>
     26 #include "attributes.h"
     27 
     28 /**
     29 * @addtogroup lavu_string
     30 * @{
     31 */
     32 
     33 /**
     34 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to
     35 * the address of the first character in str after the prefix.
     36 *
     37 * @param str input string
     38 * @param pfx prefix to test
     39 * @param ptr updated if the prefix is matched inside str
     40 * @return non-zero if the prefix matches, zero otherwise
     41 */
     42 int av_strstart(const char *str, const char *pfx, const char **ptr);
     43 
     44 /**
     45 * Return non-zero if pfx is a prefix of str independent of case. If
     46 * it is, *ptr is set to the address of the first character in str
     47 * after the prefix.
     48 *
     49 * @param str input string
     50 * @param pfx prefix to test
     51 * @param ptr updated if the prefix is matched inside str
     52 * @return non-zero if the prefix matches, zero otherwise
     53 */
     54 int av_stristart(const char *str, const char *pfx, const char **ptr);
     55 
     56 /**
     57 * Locate the first case-independent occurrence in the string haystack
     58 * of the string needle.  A zero-length string needle is considered to
     59 * match at the start of haystack.
     60 *
     61 * This function is a case-insensitive version of the standard strstr().
     62 *
     63 * @param haystack string to search in
     64 * @param needle   string to search for
     65 * @return         pointer to the located match within haystack
     66 *                 or a null pointer if no match
     67 */
     68 char *av_stristr(const char *haystack, const char *needle);
     69 
     70 /**
     71 * Locate the first occurrence of the string needle in the string haystack
     72 * where not more than hay_length characters are searched. A zero-length
     73 * string needle is considered to match at the start of haystack.
     74 *
     75 * This function is a length-limited version of the standard strstr().
     76 *
     77 * @param haystack   string to search in
     78 * @param needle     string to search for
     79 * @param hay_length length of string to search in
     80 * @return           pointer to the located match within haystack
     81 *                   or a null pointer if no match
     82 */
     83 char *av_strnstr(const char *haystack, const char *needle, size_t hay_length);
     84 
     85 /**
     86 * Copy the string src to dst, but no more than size - 1 bytes, and
     87 * null-terminate dst.
     88 *
     89 * This function is the same as BSD strlcpy().
     90 *
     91 * @param dst destination buffer
     92 * @param src source string
     93 * @param size size of destination buffer
     94 * @return the length of src
     95 *
     96 * @warning since the return value is the length of src, src absolutely
     97 * _must_ be a properly 0-terminated string, otherwise this will read beyond
     98 * the end of the buffer and possibly crash.
     99 */
    100 size_t av_strlcpy(char *dst, const char *src, size_t size);
    101 
    102 /**
    103 * Append the string src to the string dst, but to a total length of
    104 * no more than size - 1 bytes, and null-terminate dst.
    105 *
    106 * This function is similar to BSD strlcat(), but differs when
    107 * size <= strlen(dst).
    108 *
    109 * @param dst destination buffer
    110 * @param src source string
    111 * @param size size of destination buffer
    112 * @return the total length of src and dst
    113 *
    114 * @warning since the return value use the length of src and dst, these
    115 * absolutely _must_ be a properly 0-terminated strings, otherwise this
    116 * will read beyond the end of the buffer and possibly crash.
    117 */
    118 size_t av_strlcat(char *dst, const char *src, size_t size);
    119 
    120 /**
    121 * Append output to a string, according to a format. Never write out of
    122 * the destination buffer, and always put a terminating 0 within
    123 * the buffer.
    124 * @param dst destination buffer (string to which the output is
    125 *  appended)
    126 * @param size total size of the destination buffer
    127 * @param fmt printf-compatible format string, specifying how the
    128 *  following parameters are used
    129 * @return the length of the string that would have been generated
    130 *  if enough space had been available
    131 */
    132 size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4);
    133 
    134 /**
    135 * Get the count of continuous non zero chars starting from the beginning.
    136 *
    137 * @param s   the string whose length to count
    138 * @param len maximum number of characters to check in the string, that
    139 *            is the maximum value which is returned by the function
    140 */
    141 static inline size_t av_strnlen(const char *s, size_t len)
    142 {
    143    size_t i;
    144    for (i = 0; i < len && s[i]; i++)
    145        ;
    146    return i;
    147 }
    148 
    149 /**
    150 * Print arguments following specified format into a large enough auto
    151 * allocated buffer. It is similar to GNU asprintf().
    152 * @param fmt printf-compatible format string, specifying how the
    153 *            following parameters are used.
    154 * @return the allocated string
    155 * @note You have to free the string yourself with av_free().
    156 */
    157 char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2);
    158 
    159 /**
    160 * Unescape the given string until a non escaped terminating char,
    161 * and return the token corresponding to the unescaped string.
    162 *
    163 * The normal \ and ' escaping is supported. Leading and trailing
    164 * whitespaces are removed, unless they are escaped with '\' or are
    165 * enclosed between ''.
    166 *
    167 * @param buf the buffer to parse, buf will be updated to point to the
    168 * terminating char
    169 * @param term a 0-terminated list of terminating chars
    170 * @return the malloced unescaped string, which must be av_freed by
    171 * the user, NULL in case of allocation failure
    172 */
    173 char *av_get_token(const char **buf, const char *term);
    174 
    175 /**
    176 * Split the string into several tokens which can be accessed by
    177 * successive calls to av_strtok().
    178 *
    179 * A token is defined as a sequence of characters not belonging to the
    180 * set specified in delim.
    181 *
    182 * On the first call to av_strtok(), s should point to the string to
    183 * parse, and the value of saveptr is ignored. In subsequent calls, s
    184 * should be NULL, and saveptr should be unchanged since the previous
    185 * call.
    186 *
    187 * This function is similar to strtok_r() defined in POSIX.1.
    188 *
    189 * @param s the string to parse, may be NULL
    190 * @param delim 0-terminated list of token delimiters, must be non-NULL
    191 * @param saveptr user-provided pointer which points to stored
    192 * information necessary for av_strtok() to continue scanning the same
    193 * string. saveptr is updated to point to the next character after the
    194 * first delimiter found, or to NULL if the string was terminated
    195 * @return the found token, or NULL when no token is found
    196 */
    197 char *av_strtok(char *s, const char *delim, char **saveptr);
    198 
    199 /**
    200 * Locale-independent conversion of ASCII isdigit.
    201 */
    202 static inline av_const int av_isdigit(int c)
    203 {
    204    return c >= '0' && c <= '9';
    205 }
    206 
    207 /**
    208 * Locale-independent conversion of ASCII isgraph.
    209 */
    210 static inline av_const int av_isgraph(int c)
    211 {
    212    return c > 32 && c < 127;
    213 }
    214 
    215 /**
    216 * Locale-independent conversion of ASCII isspace.
    217 */
    218 static inline av_const int av_isspace(int c)
    219 {
    220    return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' ||
    221           c == '\v';
    222 }
    223 
    224 /**
    225 * Locale-independent conversion of ASCII characters to uppercase.
    226 */
    227 static inline av_const int av_toupper(int c)
    228 {
    229    if (c >= 'a' && c <= 'z')
    230        c ^= 0x20;
    231    return c;
    232 }
    233 
    234 /**
    235 * Locale-independent conversion of ASCII characters to lowercase.
    236 */
    237 static inline av_const int av_tolower(int c)
    238 {
    239    if (c >= 'A' && c <= 'Z')
    240        c ^= 0x20;
    241    return c;
    242 }
    243 
    244 /**
    245 * Locale-independent conversion of ASCII isxdigit.
    246 */
    247 static inline av_const int av_isxdigit(int c)
    248 {
    249    c = av_tolower(c);
    250    return av_isdigit(c) || (c >= 'a' && c <= 'f');
    251 }
    252 
    253 /**
    254 * Locale-independent case-insensitive compare.
    255 * @note This means only ASCII-range characters are case-insensitive
    256 */
    257 int av_strcasecmp(const char *a, const char *b);
    258 
    259 /**
    260 * Locale-independent case-insensitive compare.
    261 * @note This means only ASCII-range characters are case-insensitive
    262 */
    263 int av_strncasecmp(const char *a, const char *b, size_t n);
    264 
    265 /**
    266 * Locale-independent strings replace.
    267 * @note This means only ASCII-range characters are replaced.
    268 */
    269 char *av_strireplace(const char *str, const char *from, const char *to);
    270 
    271 /**
    272 * Thread safe basename.
    273 * @param path the string to parse, on DOS both \ and / are considered separators.
    274 * @return pointer to the basename substring.
    275 * If path does not contain a slash, the function returns a copy of path.
    276 * If path is a NULL pointer or points to an empty string, a pointer
    277 * to a string "." is returned.
    278 */
    279 const char *av_basename(const char *path);
    280 
    281 /**
    282 * Thread safe dirname.
    283 * @param path the string to parse, on DOS both \ and / are considered separators.
    284 * @return A pointer to a string that's the parent directory of path.
    285 * If path is a NULL pointer or points to an empty string, a pointer
    286 * to a string "." is returned.
    287 * @note the function may modify the contents of the path, so copies should be passed.
    288 */
    289 const char *av_dirname(char *path);
    290 
    291 /**
    292 * Match instances of a name in a comma-separated list of names.
    293 * List entries are checked from the start to the end of the names list,
    294 * the first match ends further processing. If an entry prefixed with '-'
    295 * matches, then 0 is returned. The "ALL" list entry is considered to
    296 * match all names.
    297 *
    298 * @param name  Name to look for.
    299 * @param names List of names.
    300 * @return 1 on match, 0 otherwise.
    301 */
    302 int av_match_name(const char *name, const char *names);
    303 
    304 /**
    305 * Append path component to the existing path.
    306 * Path separator '/' is placed between when needed.
    307 * Resulting string have to be freed with av_free().
    308 * @param path      base path
    309 * @param component component to be appended
    310 * @return new path or NULL on error.
    311 */
    312 char *av_append_path_component(const char *path, const char *component);
    313 
    314 enum AVEscapeMode {
    315    AV_ESCAPE_MODE_AUTO,      ///< Use auto-selected escaping mode.
    316    AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping.
    317    AV_ESCAPE_MODE_QUOTE,     ///< Use single-quote escaping.
    318    AV_ESCAPE_MODE_XML,       ///< Use XML non-markup character data escaping.
    319 };
    320 
    321 /**
    322 * Consider spaces special and escape them even in the middle of the
    323 * string.
    324 *
    325 * This is equivalent to adding the whitespace characters to the special
    326 * characters lists, except it is guaranteed to use the exact same list
    327 * of whitespace characters as the rest of libavutil.
    328 */
    329 #define AV_ESCAPE_FLAG_WHITESPACE (1 << 0)
    330 
    331 /**
    332 * Escape only specified special characters.
    333 * Without this flag, escape also any characters that may be considered
    334 * special by av_get_token(), such as the single quote.
    335 */
    336 #define AV_ESCAPE_FLAG_STRICT (1 << 1)
    337 
    338 /**
    339 * Within AV_ESCAPE_MODE_XML, additionally escape single quotes for single
    340 * quoted attributes.
    341 */
    342 #define AV_ESCAPE_FLAG_XML_SINGLE_QUOTES (1 << 2)
    343 
    344 /**
    345 * Within AV_ESCAPE_MODE_XML, additionally escape double quotes for double
    346 * quoted attributes.
    347 */
    348 #define AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES (1 << 3)
    349 
    350 
    351 /**
    352 * Escape string in src, and put the escaped string in an allocated
    353 * string in *dst, which must be freed with av_free().
    354 *
    355 * @param dst           pointer where an allocated string is put
    356 * @param src           string to escape, must be non-NULL
    357 * @param special_chars string containing the special characters which
    358 *                      need to be escaped, can be NULL
    359 * @param mode          escape mode to employ, see AV_ESCAPE_MODE_* macros.
    360 *                      Any unknown value for mode will be considered equivalent to
    361 *                      AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without
    362 *                      notice.
    363 * @param flags         flags which control how to escape, see AV_ESCAPE_FLAG_ macros
    364 * @return the length of the allocated string, or a negative error code in case of error
    365 * @see av_bprint_escape()
    366 */
    367 av_warn_unused_result
    368 int av_escape(char **dst, const char *src, const char *special_chars,
    369              enum AVEscapeMode mode, int flags);
    370 
    371 #define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES          1 ///< accept codepoints over 0x10FFFF
    372 #define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS             2 ///< accept non-characters - 0xFFFE and 0xFFFF
    373 #define AV_UTF8_FLAG_ACCEPT_SURROGATES                 4 ///< accept UTF-16 surrogates codes
    374 #define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML
    375 
    376 #define AV_UTF8_FLAG_ACCEPT_ALL \
    377    AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES
    378 
    379 /**
    380 * Read and decode a single UTF-8 code point (character) from the
    381 * buffer in *buf, and update *buf to point to the next byte to
    382 * decode.
    383 *
    384 * In case of an invalid byte sequence, the pointer will be updated to
    385 * the next byte after the invalid sequence and the function will
    386 * return an error code.
    387 *
    388 * Depending on the specified flags, the function will also fail in
    389 * case the decoded code point does not belong to a valid range.
    390 *
    391 * @note For speed-relevant code a carefully implemented use of
    392 * GET_UTF8() may be preferred.
    393 *
    394 * @param codep   pointer used to return the parsed code in case of success.
    395 *                The value in *codep is set even in case the range check fails.
    396 * @param bufp    pointer to the address the first byte of the sequence
    397 *                to decode, updated by the function to point to the
    398 *                byte next after the decoded sequence
    399 * @param buf_end pointer to the end of the buffer, points to the next
    400 *                byte past the last in the buffer. This is used to
    401 *                avoid buffer overreads (in case of an unfinished
    402 *                UTF-8 sequence towards the end of the buffer).
    403 * @param flags   a collection of AV_UTF8_FLAG_* flags
    404 * @return >= 0 in case a sequence was successfully read, a negative
    405 * value in case of invalid sequence
    406 */
    407 av_warn_unused_result
    408 int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end,
    409                   unsigned int flags);
    410 
    411 /**
    412 * Check if a name is in a list.
    413 * @returns 0 if not found, or the 1 based index where it has been found in the
    414 *            list.
    415 */
    416 int av_match_list(const char *name, const char *list, char separator);
    417 
    418 /**
    419 * See libc sscanf manual for more information.
    420 * Locale-independent sscanf implementation.
    421 */
    422 int av_sscanf(const char *string, const char *format, ...);
    423 
    424 /**
    425 * @}
    426 */
    427 
    428 #endif /* AVUTIL_AVSTRING_H */