tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

utfstring.h (5011B)


      1 // © 2025 and later: Unicode, Inc. and others.
      2 // License & terms of use: https://www.unicode.org/copyright.html
      3 
      4 // utfstring.h
      5 // created: 2025jul18 Markus W. Scherer
      6 
      7 #ifndef __UTFSTRING_H__
      8 #define __UTFSTRING_H__
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
     13 
     14 #include "unicode/utf16.h"
     15 
     16 /**
     17 * \file
     18 * \brief C++ header-only API: C++ string helper functions.
     19 */
     20 
     21 #ifndef U_HIDE_DRAFT_API
     22 
     23 namespace U_HEADER_ONLY_NAMESPACE {
     24 namespace utfstring {
     25 
     26 // Write code points to strings -------------------------------------------- ***
     27 
     28 #ifndef U_IN_DOXYGEN
     29 namespace prv {
     30 
     31 // This function, and the public wrappers,
     32 // want to be U_FORCE_INLINE but the gcc-debug-build-and-test CI check failed with
     33 // error: ‘always_inline’ function might not be inlinable [-Werror=attributes]
     34 template<typename StringClass, bool validate>
     35 inline StringClass &appendCodePoint(StringClass &s, uint32_t c) {
     36    using Unit = typename StringClass::value_type;
     37    if constexpr (sizeof(Unit) == 1) {
     38        // UTF-8: Similar to U8_APPEND().
     39        if (c <= 0x7f) {
     40            s.push_back(static_cast<Unit>(c));
     41        } else {
     42            Unit buf[4];
     43            uint8_t len;
     44            if (c <= 0x7ff) {
     45                len = 2;
     46                buf[2] = (c >> 6) | 0xc0;
     47            } else {
     48                if (validate ?
     49                        c < 0xd800 ||
     50                            (c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
     51                        c <= 0xffff) {
     52                    len = 3;
     53                    buf[1] = (c >> 12) | 0xe0;
     54                } else {
     55                    len = 4;
     56                    buf[0] = (c >> 18) | 0xf0;
     57                    buf[1] = ((c >> 12) & 0x3f) | 0x80;
     58                }
     59                buf[2] = ((c >> 6) & 0x3f) | 0x80;
     60            }
     61            buf[3] = (c & 0x3f) | 0x80;
     62            s.append(buf + 4 - len, len);
     63        }
     64    } else if constexpr (sizeof(Unit) == 2) {
     65        // UTF-16: Similar to U16_APPEND().
     66        if (validate ?
     67                c < 0xd800 || (c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
     68                c <= 0xffff) {
     69            s.push_back(static_cast<Unit>(c));
     70        } else {
     71            Unit buf[2] = { U16_LEAD(c), U16_TRAIL(c) };
     72            s.append(buf, 2);
     73        }
     74    } else {
     75        // UTF-32
     76        s.push_back(!validate || U_IS_SCALAR_VALUE(c) ? c : 0xfffd);
     77    }
     78    return s;
     79 }
     80 
     81 }  // namespace prv
     82 #endif  // U_IN_DOXYGEN
     83 
     84 #ifndef U_HIDE_DRAFT_API
     85 /**
     86 * Appends the code point to the string.
     87 * Appends the U+FFFD replacement character instead if c is not a scalar value.
     88 * See https://www.unicode.org/glossary/#unicode_scalar_value
     89 *
     90 * @tparam StringClass A version of std::basic_string (or a compatible type)
     91 * @param s The string to append to
     92 * @param c The code point to append
     93 * @return s
     94 * @draft ICU 78
     95 * @see U_IS_SCALAR_VALUE
     96 */
     97 template<typename StringClass>
     98 inline StringClass &appendOrFFFD(StringClass &s, UChar32 c) {
     99    return prv::appendCodePoint<StringClass, true>(s, c);
    100 }
    101 
    102 /**
    103 * Appends the code point to the string.
    104 * The code point must be a scalar value; otherwise the behavior is undefined.
    105 * See https://www.unicode.org/glossary/#unicode_scalar_value
    106 *
    107 * @tparam StringClass A version of std::basic_string (or a compatible type)
    108 * @param s The string to append to
    109 * @param c The code point to append (must be a scalar value)
    110 * @return s
    111 * @draft ICU 78
    112 * @see U_IS_SCALAR_VALUE
    113 */
    114 template<typename StringClass>
    115 inline StringClass &appendUnsafe(StringClass &s, UChar32 c) {
    116    return prv::appendCodePoint<StringClass, false>(s, c);
    117 }
    118 
    119 /**
    120 * Returns the code point as a string of code units.
    121 * Returns the U+FFFD replacement character instead if c is not a scalar value.
    122 * See https://www.unicode.org/glossary/#unicode_scalar_value
    123 *
    124 * @tparam StringClass A version of std::basic_string (or a compatible type)
    125 * @param c The code point
    126 * @return the string of c's code units
    127 * @draft ICU 78
    128 * @see U_IS_SCALAR_VALUE
    129 */
    130 template<typename StringClass>
    131 inline StringClass encodeOrFFFD(UChar32 c) {
    132    StringClass s;
    133    prv::appendCodePoint<StringClass, true>(s, c);
    134    return s;
    135 }
    136 
    137 /**
    138 * Returns the code point as a string of code units.
    139 * The code point must be a scalar value; otherwise the behavior is undefined.
    140 * See https://www.unicode.org/glossary/#unicode_scalar_value
    141 *
    142 * @tparam StringClass A version of std::basic_string (or a compatible type)
    143 * @param c The code point
    144 * @return the string of c's code units
    145 * @draft ICU 78
    146 * @see U_IS_SCALAR_VALUE
    147 */
    148 template<typename StringClass>
    149 inline StringClass encodeUnsafe(UChar32 c) {
    150    StringClass s;
    151    prv::appendCodePoint<StringClass, false>(s, c);
    152    return s;
    153 }
    154 #endif  // U_HIDE_DRAFT_API
    155 
    156 }  // namespace utfstring
    157 }  // namespace U_HEADER_ONLY_NAMESPACE
    158 
    159 #endif  // U_HIDE_DRAFT_API
    160 #endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
    161 #endif  // __UTFSTRING_H__