nsUnicharUtils.h (6785B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef nsUnicharUtils_h__ 7 #define nsUnicharUtils_h__ 8 9 #include "nsString.h" 10 11 /* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */ 12 /* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables */ 13 #define IS_CJ_CHAR(u) \ 14 ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \ 15 (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu)) 16 17 #define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B) 18 19 #define IS_ASCII(u) ((u) < 0x80) 20 #define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z')) 21 #define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z')) 22 #define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u)) 23 #define IS_ASCII_SPACE(u) (' ' == (u)) 24 25 void ToLowerCase(nsAString& aString); 26 void ToLowerCaseASCII(nsAString& aString); 27 void ToUpperCase(nsAString& aString); 28 29 void ToLowerCase(const nsAString& aSource, nsAString& aDest); 30 void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest); 31 void ToUpperCase(const nsAString& aSource, nsAString& aDest); 32 33 uint32_t ToLowerCase(uint32_t aChar); 34 uint32_t ToUpperCase(uint32_t aChar); 35 uint32_t ToTitleCase(uint32_t aChar); 36 37 void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen); 38 void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen); 39 void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen); 40 41 char ToLowerCaseASCII(const char aChar); 42 char16_t ToLowerCaseASCII(const char16_t aChar); 43 char32_t ToLowerCaseASCII(const char32_t aChar); 44 45 char ToUpperCaseASCII(const char aChar); 46 char16_t ToUpperCaseASCII(const char16_t aChar); 47 char32_t ToUpperCaseASCII(const char32_t aChar); 48 49 inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; } 50 51 inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; } 52 53 #ifdef MOZILLA_INTERNAL_API 54 55 uint32_t ToFoldedCase(uint32_t aChar); 56 void ToFoldedCase(nsAString& aString); 57 void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen); 58 59 uint32_t ToNaked(uint32_t aChar); 60 void ToNaked(nsAString& aString); 61 62 int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*, 63 size_t, size_t); 64 65 int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t, 66 size_t); 67 68 class nsCaseInsensitiveStringArrayComparator { 69 public: 70 template <class A, class B> 71 bool Equals(const A& a, const B& b) const { 72 return a.Equals(b, nsCaseInsensitiveStringComparator); 73 } 74 }; 75 76 int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*, 77 size_t, size_t); 78 79 inline bool CaseInsensitiveFindInReadable( 80 const nsAString& aPattern, nsAString::const_iterator& aSearchStart, 81 nsAString::const_iterator& aSearchEnd) { 82 return FindInReadable(aPattern, aSearchStart, aSearchEnd, 83 nsCaseInsensitiveStringComparator); 84 } 85 86 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern, 87 const nsAString& aHay) { 88 nsAString::const_iterator searchBegin, searchEnd; 89 return FindInReadable(aPattern, aHay.BeginReading(searchBegin), 90 aHay.EndReading(searchEnd), 91 nsCaseInsensitiveStringComparator); 92 } 93 94 #endif // MOZILLA_INTERNAL_API 95 96 int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b, 97 size_t len); 98 99 int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight, 100 size_t aLeftBytes, size_t aRightBytes); 101 102 /** 103 * Calculates the lower-case of the codepoint of the UTF8 sequence starting at 104 * aStr. Sets aNext to the byte following the end of the sequence. 105 * 106 * If the sequence is invalid, or if computing the codepoint would take us off 107 * the end of the string (as marked by aEnd), returns -1 and does not set 108 * aNext. Note that this function doesn't check that aStr < aEnd -- it assumes 109 * you've done that already. 110 */ 111 uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd, 112 const char** aNext); 113 114 /** 115 * This function determines whether the UTF-8 sequence pointed to by aLeft is 116 * case insensitively equal to the UTF-8 sequence pointed to by aRight (or 117 * optionally, case and diacritic insensitively equal), as defined by having 118 * matching (naked) lower-cased codepoints. 119 * 120 * aLeftEnd marks the first memory location past aLeft that is not part of 121 * aLeft; aRightEnd similarly marks the end of aRight. 122 * 123 * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd. 124 * 125 * The function stores the addresses of the next characters in the sequence 126 * into aLeftNext and aRightNext. It's up to the caller to make sure that the 127 * returned pointers are valid -- i.e. the function may return aLeftNext >= 128 * aLeftEnd or aRightNext >= aRightEnd. 129 * 130 * If the function encounters invalid text, it sets aErr to true and returns 131 * false, possibly leaving aLeftNext and aRightNext uninitialized. If the 132 * function returns true, aErr is guaranteed to be false and both aLeftNext and 133 * aRightNext are guaranteed to be initialized. 134 * 135 * If aMatchDiacritics is false, the comparison is neither case-sensitive nor 136 * diacritic-sensitive. 137 */ 138 bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight, 139 const char* aLeftEnd, const char* aRightEnd, 140 const char** aLeftNext, 141 const char** aRightNext, bool* aErr, 142 bool aMatchDiacritics = true); 143 144 namespace mozilla { 145 146 /** 147 * Hash a UTF8 string as though it were a UTF16 string. 148 * 149 * The value returned is the same as if we converted the string to UTF16 and 150 * then ran HashString() on the result. 151 * 152 * The given |length| is in bytes. 153 */ 154 uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr); 155 156 /** 157 * Tests used in CSS Segment Break Transformation to determine whether a 158 * newline is discardable. 159 */ 160 bool IsSegmentBreakSkipChar(uint32_t u); 161 bool IsEastAsianPunctuation(uint32_t u); 162 163 /** 164 * Return true for all Punctuation categories (Unicode general category P?), 165 * and also for Symbol categories (S?) except for Modifier Symbol, which is 166 * kept together with any adjacent letter/number. (Bug 1066756) 167 */ 168 bool IsPunctuationForWordSelect(char16_t aCh); 169 170 } // namespace mozilla 171 172 #endif /* nsUnicharUtils_h__ */