tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsUnicharUtils.h (6785B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef nsUnicharUtils_h__
      7 #define nsUnicharUtils_h__
      8 
      9 #include "nsString.h"
     10 
     11 /* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
     12 /* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables          */
     13 #define IS_CJ_CHAR(u)                                                          \
     14  ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \
     15   (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu))
     16 
     17 #define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B)
     18 
     19 #define IS_ASCII(u) ((u) < 0x80)
     20 #define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z'))
     21 #define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z'))
     22 #define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))
     23 #define IS_ASCII_SPACE(u) (' ' == (u))
     24 
     25 void ToLowerCase(nsAString& aString);
     26 void ToLowerCaseASCII(nsAString& aString);
     27 void ToUpperCase(nsAString& aString);
     28 
     29 void ToLowerCase(const nsAString& aSource, nsAString& aDest);
     30 void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest);
     31 void ToUpperCase(const nsAString& aSource, nsAString& aDest);
     32 
     33 uint32_t ToLowerCase(uint32_t aChar);
     34 uint32_t ToUpperCase(uint32_t aChar);
     35 uint32_t ToTitleCase(uint32_t aChar);
     36 
     37 void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
     38 void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen);
     39 void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
     40 
     41 char ToLowerCaseASCII(const char aChar);
     42 char16_t ToLowerCaseASCII(const char16_t aChar);
     43 char32_t ToLowerCaseASCII(const char32_t aChar);
     44 
     45 char ToUpperCaseASCII(const char aChar);
     46 char16_t ToUpperCaseASCII(const char16_t aChar);
     47 char32_t ToUpperCaseASCII(const char32_t aChar);
     48 
     49 inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; }
     50 
     51 inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; }
     52 
     53 #ifdef MOZILLA_INTERNAL_API
     54 
     55 uint32_t ToFoldedCase(uint32_t aChar);
     56 void ToFoldedCase(nsAString& aString);
     57 void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
     58 
     59 uint32_t ToNaked(uint32_t aChar);
     60 void ToNaked(nsAString& aString);
     61 
     62 int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*,
     63                                          size_t, size_t);
     64 
     65 int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t,
     66                                              size_t);
     67 
     68 class nsCaseInsensitiveStringArrayComparator {
     69 public:
     70  template <class A, class B>
     71  bool Equals(const A& a, const B& b) const {
     72    return a.Equals(b, nsCaseInsensitiveStringComparator);
     73  }
     74 };
     75 
     76 int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*,
     77                                               size_t, size_t);
     78 
     79 inline bool CaseInsensitiveFindInReadable(
     80    const nsAString& aPattern, nsAString::const_iterator& aSearchStart,
     81    nsAString::const_iterator& aSearchEnd) {
     82  return FindInReadable(aPattern, aSearchStart, aSearchEnd,
     83                        nsCaseInsensitiveStringComparator);
     84 }
     85 
     86 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
     87                                          const nsAString& aHay) {
     88  nsAString::const_iterator searchBegin, searchEnd;
     89  return FindInReadable(aPattern, aHay.BeginReading(searchBegin),
     90                        aHay.EndReading(searchEnd),
     91                        nsCaseInsensitiveStringComparator);
     92 }
     93 
     94 #endif  // MOZILLA_INTERNAL_API
     95 
     96 int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b,
     97                               size_t len);
     98 
     99 int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
    100                               size_t aLeftBytes, size_t aRightBytes);
    101 
    102 /**
    103 * Calculates the lower-case of the codepoint of the UTF8 sequence starting at
    104 * aStr.  Sets aNext to the byte following the end of the sequence.
    105 *
    106 * If the sequence is invalid, or if computing the codepoint would take us off
    107 * the end of the string (as marked by aEnd), returns -1 and does not set
    108 * aNext.  Note that this function doesn't check that aStr < aEnd -- it assumes
    109 * you've done that already.
    110 */
    111 uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
    112                               const char** aNext);
    113 
    114 /**
    115 * This function determines whether the UTF-8 sequence pointed to by aLeft is
    116 * case insensitively equal to the UTF-8 sequence pointed to by aRight (or
    117 * optionally, case and diacritic insensitively equal), as defined by having
    118 * matching (naked) lower-cased codepoints.
    119 *
    120 * aLeftEnd marks the first memory location past aLeft that is not part of
    121 * aLeft; aRightEnd similarly marks the end of aRight.
    122 *
    123 * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
    124 *
    125 * The function stores the addresses of the next characters in the sequence
    126 * into aLeftNext and aRightNext.  It's up to the caller to make sure that the
    127 * returned pointers are valid -- i.e. the function may return aLeftNext >=
    128 * aLeftEnd or aRightNext >= aRightEnd.
    129 *
    130 * If the function encounters invalid text, it sets aErr to true and returns
    131 * false, possibly leaving aLeftNext and aRightNext uninitialized.  If the
    132 * function returns true, aErr is guaranteed to be false and both aLeftNext and
    133 * aRightNext are guaranteed to be initialized.
    134 *
    135 * If aMatchDiacritics is false, the comparison is neither case-sensitive nor
    136 * diacritic-sensitive.
    137 */
    138 bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
    139                                   const char* aLeftEnd, const char* aRightEnd,
    140                                   const char** aLeftNext,
    141                                   const char** aRightNext, bool* aErr,
    142                                   bool aMatchDiacritics = true);
    143 
    144 namespace mozilla {
    145 
    146 /**
    147 * Hash a UTF8 string as though it were a UTF16 string.
    148 *
    149 * The value returned is the same as if we converted the string to UTF16 and
    150 * then ran HashString() on the result.
    151 *
    152 * The given |length| is in bytes.
    153 */
    154 uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr);
    155 
    156 /**
    157 * Tests used in CSS Segment Break Transformation to determine whether a
    158 * newline is discardable.
    159 */
    160 bool IsSegmentBreakSkipChar(uint32_t u);
    161 bool IsEastAsianPunctuation(uint32_t u);
    162 
    163 /**
    164 * Return true for all Punctuation categories (Unicode general category P?),
    165 * and also for Symbol categories (S?) except for Modifier Symbol, which is
    166 * kept together with any adjacent letter/number. (Bug 1066756)
    167 */
    168 bool IsPunctuationForWordSelect(char16_t aCh);
    169 
    170 }  // namespace mozilla
    171 
    172 #endif /* nsUnicharUtils_h__ */