stsearch.cpp (17351B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2001-2014 IBM and others. All rights reserved. 6 ********************************************************************** 7 * Date Name Description 8 * 03/22/2000 helena Creation. 9 ********************************************************************** 10 */ 11 12 #include "unicode/utypes.h" 13 14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION 15 16 #include "unicode/stsearch.h" 17 #include "usrchimp.h" 18 #include "cmemory.h" 19 20 U_NAMESPACE_BEGIN 21 22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) 23 24 // public constructors and destructors ----------------------------------- 25 26 StringSearch::StringSearch(const UnicodeString &pattern, 27 const UnicodeString &text, 28 const Locale &locale, 29 BreakIterator *breakiter, 30 UErrorCode &status) : 31 SearchIterator(text, breakiter), 32 m_pattern_(pattern) 33 { 34 if (U_FAILURE(status)) { 35 m_strsrch_ = nullptr; 36 return; 37 } 38 39 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 40 m_text_.getBuffer(), m_text_.length(), 41 locale.getName(), reinterpret_cast<UBreakIterator*>(breakiter), 42 &status); 43 uprv_free(m_search_); 44 m_search_ = nullptr; 45 46 if (U_SUCCESS(status)) { 47 // m_search_ has been created by the base SearchIterator class 48 m_search_ = m_strsrch_->search; 49 } 50 } 51 52 StringSearch::StringSearch(const UnicodeString &pattern, 53 const UnicodeString &text, 54 RuleBasedCollator *coll, 55 BreakIterator *breakiter, 56 UErrorCode &status) : 57 SearchIterator(text, breakiter), 58 m_pattern_(pattern) 59 { 60 if (U_FAILURE(status)) { 61 m_strsrch_ = nullptr; 62 return; 63 } 64 if (coll == nullptr) { 65 status = U_ILLEGAL_ARGUMENT_ERROR; 66 m_strsrch_ = nullptr; 67 return; 68 } 69 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 70 m_pattern_.length(), 71 m_text_.getBuffer(), 72 m_text_.length(), coll->toUCollator(), 73 reinterpret_cast<UBreakIterator*>(breakiter), 74 &status); 75 uprv_free(m_search_); 76 m_search_ = nullptr; 77 78 if (U_SUCCESS(status)) { 79 // m_search_ has been created by the base SearchIterator class 80 m_search_ = m_strsrch_->search; 81 } 82 } 83 84 StringSearch::StringSearch(const UnicodeString &pattern, 85 CharacterIterator &text, 86 const Locale &locale, 87 BreakIterator *breakiter, 88 UErrorCode &status) : 89 SearchIterator(text, breakiter), 90 m_pattern_(pattern) 91 { 92 if (U_FAILURE(status)) { 93 m_strsrch_ = nullptr; 94 return; 95 } 96 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 97 m_text_.getBuffer(), m_text_.length(), 98 locale.getName(), reinterpret_cast<UBreakIterator*>(breakiter), 99 &status); 100 uprv_free(m_search_); 101 m_search_ = nullptr; 102 103 if (U_SUCCESS(status)) { 104 // m_search_ has been created by the base SearchIterator class 105 m_search_ = m_strsrch_->search; 106 } 107 } 108 109 StringSearch::StringSearch(const UnicodeString &pattern, 110 CharacterIterator &text, 111 RuleBasedCollator *coll, 112 BreakIterator *breakiter, 113 UErrorCode &status) : 114 SearchIterator(text, breakiter), 115 m_pattern_(pattern) 116 { 117 if (U_FAILURE(status)) { 118 m_strsrch_ = nullptr; 119 return; 120 } 121 if (coll == nullptr) { 122 status = U_ILLEGAL_ARGUMENT_ERROR; 123 m_strsrch_ = nullptr; 124 return; 125 } 126 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 127 m_pattern_.length(), 128 m_text_.getBuffer(), 129 m_text_.length(), coll->toUCollator(), 130 reinterpret_cast<UBreakIterator*>(breakiter), 131 &status); 132 uprv_free(m_search_); 133 m_search_ = nullptr; 134 135 if (U_SUCCESS(status)) { 136 // m_search_ has been created by the base SearchIterator class 137 m_search_ = m_strsrch_->search; 138 } 139 } 140 141 StringSearch::StringSearch(const StringSearch &that) : 142 SearchIterator(that.m_text_, that.m_breakiterator_), 143 m_pattern_(that.m_pattern_) 144 { 145 UErrorCode status = U_ZERO_ERROR; 146 147 // Free m_search_ from the superclass 148 uprv_free(m_search_); 149 m_search_ = nullptr; 150 151 if (that.m_strsrch_ == nullptr) { 152 // This was not a good copy 153 m_strsrch_ = nullptr; 154 } 155 else { 156 // Make a deep copy 157 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 158 m_pattern_.length(), 159 m_text_.getBuffer(), 160 m_text_.length(), 161 that.m_strsrch_->collator, 162 reinterpret_cast<UBreakIterator*>(that.m_breakiterator_), 163 &status); 164 if (U_SUCCESS(status)) { 165 // m_search_ has been created by the base SearchIterator class 166 m_search_ = m_strsrch_->search; 167 } 168 } 169 } 170 171 StringSearch::~StringSearch() 172 { 173 if (m_strsrch_ != nullptr) { 174 usearch_close(m_strsrch_); 175 m_search_ = nullptr; 176 } 177 } 178 179 StringSearch * 180 StringSearch::clone() const { 181 return new StringSearch(*this); 182 } 183 184 // operator overloading --------------------------------------------- 185 StringSearch & StringSearch::operator=(const StringSearch &that) 186 { 187 if (this != &that) { 188 UErrorCode status = U_ZERO_ERROR; 189 m_text_ = that.m_text_; 190 m_breakiterator_ = that.m_breakiterator_; 191 m_pattern_ = that.m_pattern_; 192 // all m_search_ in the parent class is linked up with m_strsrch_ 193 usearch_close(m_strsrch_); 194 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), 195 m_pattern_.length(), 196 m_text_.getBuffer(), 197 m_text_.length(), 198 that.m_strsrch_->collator, 199 nullptr, &status); 200 // Check null pointer 201 if (m_strsrch_ != nullptr) { 202 m_search_ = m_strsrch_->search; 203 } 204 } 205 return *this; 206 } 207 208 bool StringSearch::operator==(const SearchIterator &that) const 209 { 210 if (this == &that) { 211 return true; 212 } 213 if (SearchIterator::operator ==(that)) { 214 const StringSearch *thatsrch = dynamic_cast<const StringSearch *>(&that); 215 if (thatsrch == nullptr) return false; 216 return (this->m_pattern_ == thatsrch->m_pattern_ && 217 this->m_strsrch_->collator == thatsrch->m_strsrch_->collator); 218 } 219 return false; 220 } 221 222 // public get and set methods ---------------------------------------- 223 224 void StringSearch::setOffset(int32_t position, UErrorCode &status) 225 { 226 // status checked in usearch_setOffset 227 usearch_setOffset(m_strsrch_, position, &status); 228 } 229 230 int32_t StringSearch::getOffset() const 231 { 232 return usearch_getOffset(m_strsrch_); 233 } 234 235 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) 236 { 237 if (U_SUCCESS(status)) { 238 m_text_ = text; 239 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); 240 } 241 } 242 243 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) 244 { 245 if (U_SUCCESS(status)) { 246 text.getText(m_text_); 247 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); 248 } 249 } 250 251 RuleBasedCollator * StringSearch::getCollator() const 252 { 253 // Note the const_cast. It would be cleaner if this const method returned a const collator. 254 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator)); 255 } 256 257 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) 258 { 259 if (U_SUCCESS(status)) { 260 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status); 261 } 262 } 263 264 void StringSearch::setPattern(const UnicodeString &pattern, 265 UErrorCode &status) 266 { 267 if (U_SUCCESS(status)) { 268 m_pattern_ = pattern; 269 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), 270 &status); 271 } 272 } 273 274 const UnicodeString & StringSearch::getPattern() const 275 { 276 return m_pattern_; 277 } 278 279 // public methods ---------------------------------------------------- 280 281 void StringSearch::reset() 282 { 283 usearch_reset(m_strsrch_); 284 } 285 286 StringSearch * StringSearch::safeClone() const 287 { 288 UErrorCode status = U_ZERO_ERROR; 289 StringSearch *result = new StringSearch(m_pattern_, m_text_, 290 getCollator(), 291 m_breakiterator_, 292 status); 293 /* test for nullptr */ 294 if (result == nullptr) { 295 status = U_MEMORY_ALLOCATION_ERROR; 296 return nullptr; 297 } 298 result->setOffset(getOffset(), status); 299 result->setMatchStart(m_strsrch_->search->matchedIndex); 300 result->setMatchLength(m_strsrch_->search->matchedLength); 301 if (U_FAILURE(status)) { 302 return nullptr; 303 } 304 return result; 305 } 306 307 // protected method ------------------------------------------------- 308 309 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) 310 { 311 // values passed here are already in the pre-shift position 312 if (U_SUCCESS(status)) { 313 if (m_strsrch_->pattern.cesLength == 0) { 314 m_search_->matchedIndex = 315 m_search_->matchedIndex == USEARCH_DONE ? 316 getOffset() : m_search_->matchedIndex + 1; 317 m_search_->matchedLength = 0; 318 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 319 &status); 320 if (m_search_->matchedIndex == m_search_->textLength) { 321 m_search_->matchedIndex = USEARCH_DONE; 322 } 323 } 324 else { 325 // looking at usearch.cpp, this part is shifted out to 326 // StringSearch instead of SearchIterator because m_strsrch_ is 327 // not accessible in SearchIterator 328 #if 0 329 if (position + m_strsrch_->pattern.defaultShiftSize 330 > m_search_->textLength) { 331 setMatchNotFound(); 332 return USEARCH_DONE; 333 } 334 #endif 335 if (m_search_->matchedLength <= 0) { 336 // the flipping direction issue has already been handled 337 // in next() 338 // for boundary check purposes. this will ensure that the 339 // next match will not precede the current offset 340 // note search->matchedIndex will always be set to something 341 // in the code 342 m_search_->matchedIndex = position - 1; 343 } 344 345 ucol_setOffset(m_strsrch_->textIter, position, &status); 346 347 #if 0 348 for (;;) { 349 if (m_search_->isCanonicalMatch) { 350 // can't use exact here since extra accents are allowed. 351 usearch_handleNextCanonical(m_strsrch_, &status); 352 } 353 else { 354 usearch_handleNextExact(m_strsrch_, &status); 355 } 356 if (U_FAILURE(status)) { 357 return USEARCH_DONE; 358 } 359 if (m_breakiterator_ == nullptr 360 #if !UCONFIG_NO_BREAK_ITERATION 361 || 362 m_search_->matchedIndex == USEARCH_DONE || 363 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 364 m_breakiterator_->isBoundary(m_search_->matchedIndex + 365 m_search_->matchedLength)) 366 #endif 367 ) { 368 if (m_search_->matchedIndex == USEARCH_DONE) { 369 ucol_setOffset(m_strsrch_->textIter, 370 m_search_->textLength, &status); 371 } 372 else { 373 ucol_setOffset(m_strsrch_->textIter, 374 m_search_->matchedIndex, &status); 375 } 376 return m_search_->matchedIndex; 377 } 378 } 379 #else 380 // if m_strsrch_->breakIter is always the same as m_breakiterator_ 381 // then we don't need to check the match boundaries here because 382 // usearch_handleNextXXX will already have done it. 383 if (m_search_->isCanonicalMatch) { 384 // *could* actually use exact here 'cause no extra accents allowed... 385 usearch_handleNextCanonical(m_strsrch_, &status); 386 } else { 387 usearch_handleNextExact(m_strsrch_, &status); 388 } 389 390 if (U_FAILURE(status)) { 391 return USEARCH_DONE; 392 } 393 394 if (m_search_->matchedIndex == USEARCH_DONE) { 395 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); 396 } else { 397 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); 398 } 399 400 return m_search_->matchedIndex; 401 #endif 402 } 403 } 404 return USEARCH_DONE; 405 } 406 407 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) 408 { 409 // values passed here are already in the pre-shift position 410 if (U_SUCCESS(status)) { 411 if (m_strsrch_->pattern.cesLength == 0) { 412 m_search_->matchedIndex = 413 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 414 m_search_->matchedIndex); 415 if (m_search_->matchedIndex == 0) { 416 setMatchNotFound(); 417 } 418 else { 419 m_search_->matchedIndex --; 420 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 421 &status); 422 m_search_->matchedLength = 0; 423 } 424 } 425 else { 426 // looking at usearch.cpp, this part is shifted out to 427 // StringSearch instead of SearchIterator because m_strsrch_ is 428 // not accessible in SearchIterator 429 #if 0 430 if (!m_search_->isOverlap && 431 position - m_strsrch_->pattern.defaultShiftSize < 0) { 432 setMatchNotFound(); 433 return USEARCH_DONE; 434 } 435 436 for (;;) { 437 if (m_search_->isCanonicalMatch) { 438 // can't use exact here since extra accents are allowed. 439 usearch_handlePreviousCanonical(m_strsrch_, &status); 440 } 441 else { 442 usearch_handlePreviousExact(m_strsrch_, &status); 443 } 444 if (U_FAILURE(status)) { 445 return USEARCH_DONE; 446 } 447 if (m_breakiterator_ == nullptr 448 #if !UCONFIG_NO_BREAK_ITERATION 449 || 450 m_search_->matchedIndex == USEARCH_DONE || 451 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && 452 m_breakiterator_->isBoundary(m_search_->matchedIndex + 453 m_search_->matchedLength)) 454 #endif 455 ) { 456 return m_search_->matchedIndex; 457 } 458 } 459 #else 460 ucol_setOffset(m_strsrch_->textIter, position, &status); 461 462 if (m_search_->isCanonicalMatch) { 463 // *could* use exact match here since extra accents *not* allowed! 464 usearch_handlePreviousCanonical(m_strsrch_, &status); 465 } else { 466 usearch_handlePreviousExact(m_strsrch_, &status); 467 } 468 469 if (U_FAILURE(status)) { 470 return USEARCH_DONE; 471 } 472 473 return m_search_->matchedIndex; 474 #endif 475 } 476 477 return m_search_->matchedIndex; 478 } 479 return USEARCH_DONE; 480 } 481 482 U_NAMESPACE_END 483 484 #endif /* #if !UCONFIG_NO_COLLATION */