usetiter.h (9856B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 #ifndef USETITER_H 10 #define USETITER_H 11 12 #include "unicode/utypes.h" 13 14 #if U_SHOW_CPLUSPLUS_API 15 16 #include "unicode/uobject.h" 17 #include "unicode/unistr.h" 18 19 /** 20 * \file 21 * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. 22 */ 23 24 U_NAMESPACE_BEGIN 25 26 class UnicodeSet; 27 class UnicodeString; 28 29 /** 30 * 31 * UnicodeSetIterator iterates over the contents of a UnicodeSet. It 32 * iterates over either code points or code point ranges. After all 33 * code points or ranges have been returned, it returns the 34 * multicharacter strings of the UnicodeSet, if any. 35 * 36 * This class is not intended for public subclassing. 37 * 38 * <p>To iterate over code points and strings, use a loop like this: 39 * <pre> 40 * UnicodeSetIterator it(set); 41 * while (it.next()) { 42 * processItem(it.getString()); 43 * } 44 * </pre> 45 * <p>Each item in the set is accessed as a string. Set elements 46 * consisting of single code points are returned as strings containing 47 * just the one code point. 48 * 49 * <p>To iterate over code point ranges, instead of individual code points, 50 * use a loop like this: 51 * <pre> 52 * UnicodeSetIterator it(set); 53 * while (it.nextRange()) { 54 * if (it.isString()) { 55 * processString(it.getString()); 56 * } else { 57 * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); 58 * } 59 * } 60 * </pre> 61 * 62 * To iterate over only the strings, start with <code>skipToStrings()</code>. 63 * 64 * @author M. Davis 65 * @stable ICU 2.4 66 */ 67 class U_COMMON_API UnicodeSetIterator final : public UObject { 68 /** 69 * Value of <tt>codepoint</tt> if the iterator points to a string. 70 * If <tt>codepoint == IS_STRING</tt>, then examine 71 * <tt>string</tt> for the current iteration result. 72 */ 73 enum { IS_STRING = -1 }; 74 75 /** 76 * Current code point, or the special value <tt>IS_STRING</tt>, if 77 * the iterator points to a string. 78 */ 79 UChar32 codepoint; 80 81 /** 82 * When iterating over ranges using <tt>nextRange()</tt>, 83 * <tt>codepointEnd</tt> contains the inclusive end of the 84 * iteration range, if <tt>codepoint != IS_STRING</tt>. If 85 * iterating over code points using <tt>next()</tt>, or if 86 * <tt>codepoint == IS_STRING</tt>, then the value of 87 * <tt>codepointEnd</tt> is undefined. 88 */ 89 UChar32 codepointEnd; 90 91 /** 92 * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points 93 * to the current string. If <tt>codepoint != IS_STRING</tt>, the 94 * value of <tt>string</tt> is undefined. 95 */ 96 const UnicodeString* string; 97 98 public: 99 100 /** 101 * Create an iterator over the given set. The iterator is valid 102 * only so long as <tt>set</tt> is valid. 103 * @param set set to iterate over 104 * @stable ICU 2.4 105 */ 106 UnicodeSetIterator(const UnicodeSet& set); 107 108 /** 109 * Create an iterator over nothing. <tt>next()</tt> and 110 * <tt>nextRange()</tt> return false. This is a convenience 111 * constructor allowing the target to be set later. 112 * @stable ICU 2.4 113 */ 114 UnicodeSetIterator(); 115 116 /** 117 * Destructor. 118 * @stable ICU 2.4 119 */ 120 virtual ~UnicodeSetIterator(); 121 122 /** 123 * Returns true if the current element is a string. If so, the 124 * caller can retrieve it with <tt>getString()</tt>. If this 125 * method returns false, the current element is a code point or 126 * code point range, depending on whether <tt>next()</tt> or 127 * <tt>nextRange()</tt> was called. 128 * Elements of types string and codepoint can both be retrieved 129 * with the function <tt>getString()</tt>. 130 * Elements of type codepoint can also be retrieved with 131 * <tt>getCodepoint()</tt>. 132 * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint 133 * of the range, and <tt>getCodepointEnd()</tt> returns the end 134 * of the range. 135 * @stable ICU 2.4 136 */ 137 inline UBool isString() const; 138 139 /** 140 * Returns the current code point, if <tt>isString()</tt> returned 141 * false. Otherwise returns an undefined result. 142 * @stable ICU 2.4 143 */ 144 inline UChar32 getCodepoint() const; 145 146 /** 147 * Returns the end of the current code point range, if 148 * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was 149 * called. Otherwise returns an undefined result. 150 * @stable ICU 2.4 151 */ 152 inline UChar32 getCodepointEnd() const; 153 154 /** 155 * Returns the current string, if <tt>isString()</tt> returned 156 * true. If the current iteration item is a code point, a UnicodeString 157 * containing that single code point is returned. 158 * 159 * Ownership of the returned string remains with the iterator. 160 * The string is guaranteed to remain valid only until the iterator is 161 * advanced to the next item, or until the iterator is deleted. 162 * 163 * @stable ICU 2.4 164 */ 165 const UnicodeString& getString(); 166 167 /** 168 * Skips over the remaining code points/ranges, if any. 169 * A following call to next() or nextRange() will yield a string, if there is one. 170 * No-op if next() would return false, or if it would yield a string anyway. 171 * 172 * @return *this 173 * @stable ICU 70 174 * @see UnicodeSet#strings() 175 */ 176 inline UnicodeSetIterator &skipToStrings() { 177 // Finish code point/range iteration. 178 range = endRange; 179 endElement = -1; 180 nextElement = 0; 181 return *this; 182 } 183 184 /** 185 * Advances the iteration position to the next element in the set, 186 * which can be either a single code point or a string. 187 * If there are no more elements in the set, return false. 188 * 189 * <p> 190 * If <tt>isString() == true</tt>, the value is a 191 * string, otherwise the value is a 192 * single code point. Elements of either type can be retrieved 193 * with the function <tt>getString()</tt>, while elements of 194 * consisting of a single code point can be retrieved with 195 * <tt>getCodepoint()</tt> 196 * 197 * <p>The order of iteration is all code points in sorted order, 198 * followed by all strings sorted order. Do not mix 199 * calls to <tt>next()</tt> and <tt>nextRange()</tt> without 200 * calling <tt>reset()</tt> between them. The results of doing so 201 * are undefined. 202 * 203 * @return true if there was another element in the set. 204 * @stable ICU 2.4 205 */ 206 UBool next(); 207 208 /** 209 * Returns the next element in the set, either a code point range 210 * or a string. If there are no more elements in the set, return 211 * false. If <tt>isString() == true</tt>, the value is a 212 * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a 213 * range of one or more code points from <tt>getCodepoint()</tt> to 214 * <tt>getCodepointeEnd()</tt> inclusive. 215 * 216 * <p>The order of iteration is all code points ranges in sorted 217 * order, followed by all strings sorted order. Ranges are 218 * disjoint and non-contiguous. The value returned from <tt>getString()</tt> 219 * is undefined unless <tt>isString() == true</tt>. Do not mix calls to 220 * <tt>next()</tt> and <tt>nextRange()</tt> without calling 221 * <tt>reset()</tt> between them. The results of doing so are 222 * undefined. 223 * 224 * @return true if there was another element in the set. 225 * @stable ICU 2.4 226 */ 227 UBool nextRange(); 228 229 /** 230 * Sets this iterator to visit the elements of the given set and 231 * resets it to the start of that set. The iterator is valid only 232 * so long as <tt>set</tt> is valid. 233 * @param set the set to iterate over. 234 * @stable ICU 2.4 235 */ 236 void reset(const UnicodeSet& set); 237 238 /** 239 * Resets this iterator to the start of the set. 240 * @stable ICU 2.4 241 */ 242 void reset(); 243 244 /** 245 * ICU "poor man's RTTI", returns a UClassID for this class. 246 * 247 * @stable ICU 2.4 248 */ 249 static UClassID U_EXPORT2 getStaticClassID(); 250 251 /** 252 * ICU "poor man's RTTI", returns a UClassID for the actual class. 253 * 254 * @stable ICU 2.4 255 */ 256 virtual UClassID getDynamicClassID() const override; 257 258 // ======================= PRIVATES =========================== 259 260 private: 261 262 // endElement and nextElements are really UChar32's, but we keep 263 // them as signed int32_t's so we can do comparisons with 264 // endElement set to -1. Leave them as int32_t's. 265 /** The set 266 */ 267 const UnicodeSet* set; 268 /** End range 269 */ 270 int32_t endRange; 271 /** Range 272 */ 273 int32_t range; 274 /** End element 275 */ 276 int32_t endElement; 277 /** Next element 278 */ 279 int32_t nextElement; 280 /** Next string 281 */ 282 int32_t nextString; 283 /** String count 284 */ 285 int32_t stringCount; 286 287 /** 288 * Points to the string to use when the caller asks for a 289 * string and the current iteration item is a code point, not a string. 290 */ 291 UnicodeString *cpString; 292 293 /** Copy constructor. Disallowed. 294 */ 295 UnicodeSetIterator(const UnicodeSetIterator&) = delete; 296 297 /** Assignment operator. Disallowed. 298 */ 299 UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete; 300 301 /** Load range 302 */ 303 void loadRange(int32_t range); 304 }; 305 306 inline UBool UnicodeSetIterator::isString() const { 307 return codepoint < 0; 308 } 309 310 inline UChar32 UnicodeSetIterator::getCodepoint() const { 311 return codepoint; 312 } 313 314 inline UChar32 UnicodeSetIterator::getCodepointEnd() const { 315 return codepointEnd; 316 } 317 318 319 U_NAMESPACE_END 320 321 #endif /* U_SHOW_CPLUSPLUS_API */ 322 323 #endif