tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

filteredbrk.cpp (26203B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014-2015, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
     12 
     13 #include "cmemory.h"
     14 
     15 #include "unicode/filteredbrk.h"
     16 #include "unicode/ucharstriebuilder.h"
     17 #include "unicode/ures.h"
     18 
     19 #include "uresimp.h" // ures_getByKeyWithFallback
     20 #include "ubrkimpl.h" // U_ICUDATA_BRKITR
     21 #include "uvector.h"
     22 #include "cmemory.h"
     23 #include "umutex.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 #ifndef FB_DEBUG
     28 #define FB_DEBUG 0
     29 #endif
     30 
     31 #if FB_DEBUG
     32 #include <stdio.h>
     33 static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
     34  char buf[2048];
     35  if(s) {
     36    s->extract(0,s->length(),buf,2048);
     37  } else {
     38    strcpy(buf,"nullptr");
     39  }
     40  fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
     41          f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
     42 }
     43 
     44 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
     45 #else
     46 #define FB_TRACE(m,s,b,d)
     47 #endif
     48 
     49 /**
     50 * Used with sortedInsert()
     51 */
     52 static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
     53    const UnicodeString& a = *static_cast<const UnicodeString*>(t1.pointer);
     54    const UnicodeString& b = *static_cast<const UnicodeString*>(t2.pointer);
     55    return a.compare(b);
     56 }
     57 
     58 /**
     59 * A UVector which implements a set of strings.
     60 */
     61 class UStringSet : public UVector {
     62 public:
     63  UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
     64                                           uhash_compareUnicodeString,
     65                                           1,
     66                                           status) {}
     67  virtual ~UStringSet();
     68  /**
     69   * Is this UnicodeSet contained?
     70   */
     71  inline UBool contains(const UnicodeString& s) {
     72    return contains((void*) &s);
     73  }
     74  using UVector::contains;
     75  /**
     76   * Return the ith UnicodeString alias
     77   */
     78  inline const UnicodeString* getStringAt(int32_t i) const {
     79    return static_cast<const UnicodeString*>(elementAt(i));
     80  }
     81  /**
     82   * Adopt the UnicodeString if not already contained.
     83   * Caller no longer owns the pointer in any case.
     84   * @return true if adopted successfully, false otherwise (error, or else duplicate)
     85   */
     86  inline UBool adopt(UnicodeString *str, UErrorCode &status) {
     87    if(U_FAILURE(status) || contains(*str)) {
     88      delete str;
     89      return false;
     90    } else {
     91      sortedInsert(str, compareUnicodeString, status);
     92      if(U_FAILURE(status)) {
     93        return false;
     94      }
     95      return true;
     96    }
     97  }
     98  /**
     99   * Add by value.
    100   * @return true if successfully adopted.
    101   */
    102  inline UBool add(const UnicodeString& str, UErrorCode &status) {
    103    if(U_FAILURE(status)) return false;
    104    UnicodeString *t = new UnicodeString(str);
    105    if(t==nullptr) {
    106      status = U_MEMORY_ALLOCATION_ERROR; return false;
    107    }
    108    return adopt(t, status);
    109  }
    110  /**
    111   * Remove this string.
    112   * @return true if successfully removed, false otherwise (error, or else it wasn't there)
    113   */
    114  inline UBool remove(const UnicodeString &s, UErrorCode &status) {
    115    if(U_FAILURE(status)) return false;
    116    return removeElement((void*) &s);
    117  }
    118 };
    119 
    120 /**
    121 * Virtual, won't be inlined
    122 */
    123 UStringSet::~UStringSet() {}
    124 
    125 /* ----------------------------------------------------------- */
    126 
    127 
    128 /* Filtered Break constants */
    129 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
    130 static const int32_t kMATCH   = (1<<1); //< exact match - skip this one.
    131 static const int32_t kSuppressInReverse = (1<<0);
    132 static const int32_t kAddToForward = (1<<1);
    133 static const char16_t kFULLSTOP = 0x002E; // '.'
    134 
    135 /**
    136 * Shared data for SimpleFilteredSentenceBreakIterator
    137 */
    138 class SimpleFilteredSentenceBreakData : public UMemory {
    139 public:
    140  SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) 
    141      : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
    142    SimpleFilteredSentenceBreakData *incr() {
    143        umtx_atomic_inc(&refcount);
    144        return this;
    145    }
    146    SimpleFilteredSentenceBreakData *decr() {
    147        if(umtx_atomic_dec(&refcount) <= 0) {
    148            delete this;
    149        }
    150        return nullptr;
    151    }
    152    virtual ~SimpleFilteredSentenceBreakData();
    153 
    154    bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
    155    bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
    156 
    157    const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
    158    const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
    159 
    160 private:
    161    // These tries own their data arrays.
    162    // They are shared and must therefore not be modified.
    163    LocalPointer<UCharsTrie>    fForwardsPartialTrie; //  Has ".a" for "a.M."
    164    LocalPointer<UCharsTrie>    fBackwardsTrie; //  i.e. ".srM" for Mrs.
    165    u_atomic_int32_t            refcount;
    166 };
    167 
    168 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
    169 
    170 /**
    171 * Concrete implementation
    172 */
    173 class SimpleFilteredSentenceBreakIterator : public BreakIterator {
    174 public:
    175  SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
    176  SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
    177  virtual ~SimpleFilteredSentenceBreakIterator();
    178 private:
    179  SimpleFilteredSentenceBreakData *fData;
    180  LocalPointer<BreakIterator> fDelegate;
    181  LocalUTextPointer           fText;
    182 
    183  /* -- subclass interface -- */
    184 public:
    185  /* -- cloning and other subclass stuff -- */
    186  virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
    187                                             int32_t &/*BufferSize*/,
    188                                             UErrorCode &status) override {
    189    // for now - always deep clone
    190    status = U_SAFECLONE_ALLOCATED_WARNING;
    191    return clone();
    192  }
    193  virtual SimpleFilteredSentenceBreakIterator* clone() const override { return new SimpleFilteredSentenceBreakIterator(*this); }
    194  virtual UClassID getDynamicClassID() const override { return nullptr; }
    195  virtual bool operator==(const BreakIterator& o) const override { if(this==&o) return true; return false; }
    196 
    197  /* -- text modifying -- */
    198  virtual void setText(UText *text, UErrorCode &status) override { fDelegate->setText(text,status); }
    199  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) override { fDelegate->refreshInputText(input,status); return *this; }
    200  virtual void adoptText(CharacterIterator* it) override { fDelegate->adoptText(it); }
    201  virtual void setText(const UnicodeString &text) override { fDelegate->setText(text); }
    202 
    203  /* -- other functions that are just delegated -- */
    204  virtual UText *getUText(UText *fillIn, UErrorCode &status) const override { return fDelegate->getUText(fillIn,status); }
    205  virtual CharacterIterator& getText() const override { return fDelegate->getText(); }
    206 
    207  /* -- ITERATION -- */
    208  virtual int32_t first() override;
    209  virtual int32_t preceding(int32_t offset) override;
    210  virtual int32_t previous() override;
    211  virtual UBool isBoundary(int32_t offset) override;
    212  virtual int32_t current() const override { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
    213 
    214  virtual int32_t next() override;
    215 
    216  virtual int32_t next(int32_t n) override;
    217  virtual int32_t following(int32_t offset) override;
    218  virtual int32_t last() override;
    219 
    220 private:
    221    /**
    222     * Given that the fDelegate has already given its "initial" answer,
    223     * find the NEXT actual (non-excepted) break.
    224     * @param n initial position from delegate
    225     * @return new break position or UBRK_DONE
    226     */
    227    int32_t internalNext(int32_t n);
    228    /**
    229     * Given that the fDelegate has already given its "initial" answer,
    230     * find the PREV actual (non-excepted) break.
    231     * @param n initial position from delegate
    232     * @return new break position or UBRK_DONE
    233     */
    234    int32_t internalPrev(int32_t n);
    235    /**
    236     * set up the UText with the value of the fDelegate.
    237     * Call this before calling breakExceptionAt. 
    238     * May be able to avoid excess calls
    239     */
    240    void resetState(UErrorCode &status);
    241    /**
    242     * Is there a match  (exception) at this spot?
    243     */
    244    enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
    245    /**
    246     * Determine if there is an exception at this spot
    247     * @param n spot to check
    248     * @return kNoExceptionHere or kExceptionHere
    249     **/
    250    enum EFBMatchResult breakExceptionAt(int32_t n);
    251 };
    252 
    253 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
    254  : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone())
    255 {
    256 }
    257 
    258 
    259 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
    260  BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
    261  fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
    262  fDelegate(adopt)
    263 {
    264    if (fData == nullptr) {
    265        delete forwards;
    266        delete backwards;
    267        if (U_SUCCESS(status)) {
    268            status = U_MEMORY_ALLOCATION_ERROR;
    269        }
    270    }
    271 }
    272 
    273 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
    274    fData = fData->decr();
    275 }
    276 
    277 void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
    278  fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
    279 }
    280 
    281 SimpleFilteredSentenceBreakIterator::EFBMatchResult
    282 SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
    283    int64_t bestPosn = -1;
    284    int32_t bestValue = -1;
    285    // loops while 'n' points to an exception.
    286    utext_setNativeIndex(fText.getAlias(), n); // from n..
    287 
    288    //if(debug2) u_printf(" n@ %d\n", n);
    289    // Assume a space is following the '.'  (so we handle the case:  "Mr. /Brown")
    290    if(utext_previous32(fText.getAlias())==u' ') {  // TODO: skip a class of chars here??
    291      // TODO only do this the 1st time?
    292      //if(debug2) u_printf("skipping prev: |%C| \n", (char16_t)uch);
    293    } else {
    294      //if(debug2) u_printf("not skipping prev: |%C| \n", (char16_t)uch);
    295      utext_next32(fText.getAlias());
    296      //if(debug2) u_printf(" -> : |%C| \n", (char16_t)uch);
    297    }
    298 
    299    {
    300        // Do not modify the shared trie!
    301        UCharsTrie iter(fData->getBackwardsTrie());
    302        UChar32 uch;
    303        while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) {  // more to consume backwards
    304            UStringTrieResult r = iter.nextForCodePoint(uch);
    305            if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
    306                bestPosn = utext_getNativeIndex(fText.getAlias());
    307                bestValue = iter.getValue();
    308            }
    309            if(!USTRINGTRIE_HAS_NEXT(r)) {
    310                break;
    311            }
    312            //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (char16_t)uch, r, utext_getNativeIndex(fText.getAlias()));
    313        }
    314    }
    315 
    316    //if(bestValue >= 0) {
    317        //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (char16_t)uch, r, bestPosn, bestValue);
    318    //}
    319 
    320    if(bestPosn>=0) {
    321      //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (char16_t)uch, r, bestPosn, bestValue);
    322 
    323      //if(USTRINGTRIE_MATCHES(r)) {  // matched - so, now what?
    324      //int32_t bestValue = iter.getValue();
    325      ////if(debug2) u_printf("rev< /%C/ matched, skip..%d  bestValue=%d\n", (char16_t)uch, r, bestValue);
    326 
    327      if(bestValue == kMATCH) { // exact match!
    328        //if(debug2) u_printf(" exact backward match\n");
    329        return kExceptionHere; // See if the next is another exception.
    330      } else if(bestValue == kPARTIAL
    331                && fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
    332        //if(debug2) u_printf(" partial backward match\n");
    333        // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
    334        // to see if it matches something going forward.
    335        UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
    336        utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
    337        //if(debug2) u_printf("Retrying at %d\n", bestPosn);
    338        // Do not modify the shared trie!
    339        UCharsTrie iter(fData->getForwardsPartialTrie());
    340        UChar32 uch;
    341        while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
    342              USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
    343          //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (char16_t)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
    344        }
    345        if(USTRINGTRIE_MATCHES(rfwd)) {
    346          //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (char16_t)uch);
    347          // only full matches here, nothing to check
    348          // skip the next:
    349            return kExceptionHere;
    350        } else {
    351          //if(debug2) u_printf("fwd> /%C/ no match.\n", (char16_t)uch);
    352          // no match (no exception) -return the 'underlying' break
    353          return kNoExceptionHere;
    354        }
    355      } else {
    356        return kNoExceptionHere; // internal error and/or no forwards trie
    357      }
    358    } else {
    359      //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (char16_t)uch, r);  // no best match
    360      return kNoExceptionHere; // No match - so exit. Not an exception.
    361    }
    362 }
    363 
    364 // the workhorse single next.
    365 int32_t
    366 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
    367  if(n == UBRK_DONE || // at end  or
    368    !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
    369      return n;
    370  }
    371  // OK, do we need to break here?
    372  UErrorCode status = U_ZERO_ERROR;
    373  // refresh text
    374  resetState(status);
    375  if(U_FAILURE(status)) return UBRK_DONE; // bail out
    376  int64_t utextLen = utext_nativeLength(fText.getAlias());
    377 
    378  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
    379  while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate).
    380    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
    381 
    382    switch(m) {
    383    case kExceptionHere:
    384      n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
    385      continue;
    386 
    387    default:
    388    case kNoExceptionHere:
    389      return n;
    390    }    
    391  }
    392  return n;
    393 }
    394 
    395 int32_t
    396 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
    397  if(n == 0 || n == UBRK_DONE || // at end  or
    398    !fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
    399      return n;
    400  }
    401  // OK, do we need to break here?
    402  UErrorCode status = U_ZERO_ERROR;
    403  // refresh text
    404  resetState(status);
    405  if(U_FAILURE(status)) return UBRK_DONE; // bail out
    406 
    407  //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
    408  while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate).
    409    SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
    410 
    411    switch(m) {
    412    case kExceptionHere:
    413      n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
    414      continue;
    415 
    416    default:
    417    case kNoExceptionHere:
    418      return n;
    419    }    
    420  }
    421  return n;
    422 }
    423 
    424 
    425 int32_t
    426 SimpleFilteredSentenceBreakIterator::next() {
    427  return internalNext(fDelegate->next());
    428 }
    429 
    430 int32_t
    431 SimpleFilteredSentenceBreakIterator::first() {
    432  // Don't suppress a break opportunity at the beginning of text.
    433  return fDelegate->first();
    434 }
    435 
    436 int32_t
    437 SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
    438  return internalPrev(fDelegate->preceding(offset));
    439 }
    440 
    441 int32_t
    442 SimpleFilteredSentenceBreakIterator::previous() {
    443  return internalPrev(fDelegate->previous());
    444 }
    445 
    446 UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
    447  if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
    448 
    449  if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
    450 
    451  UErrorCode status = U_ZERO_ERROR;
    452  resetState(status);
    453 
    454  SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset);
    455 
    456  switch(m) {
    457  case kExceptionHere:
    458    return false;
    459  default:
    460  case kNoExceptionHere:
    461    return true;
    462  }    
    463 }
    464 
    465 int32_t
    466 SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
    467  return internalNext(fDelegate->next(offset));
    468 }
    469 
    470 int32_t
    471 SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
    472  return internalNext(fDelegate->following(offset));
    473 }
    474 
    475 int32_t
    476 SimpleFilteredSentenceBreakIterator::last() {
    477  // Don't suppress a break opportunity at the end of text.
    478  return fDelegate->last();
    479 }
    480 
    481 
    482 /**
    483 * Concrete implementation of builder class.
    484 */
    485 class SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
    486 public:
    487  virtual ~SimpleFilteredBreakIteratorBuilder();
    488  SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
    489  SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
    490  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
    491  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
    492  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) override;
    493 private:
    494  UStringSet fSet;
    495 };
    496 
    497 SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
    498 {
    499 }
    500 
    501 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status) 
    502  : fSet(status)
    503 {
    504 }
    505 
    506 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
    507  : fSet(status)
    508 {
    509  if(U_SUCCESS(status)) {
    510    UErrorCode subStatus = U_ZERO_ERROR;
    511    LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus));
    512    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {    
    513      status = subStatus; // copy the failing status 
    514 #if FB_DEBUG
    515      fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
    516 #endif
    517      return;  // leaves the builder empty, if you try to use it.
    518    }
    519    LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", nullptr, &subStatus));
    520    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {    
    521      status = subStatus; // copy the failing status 
    522 #if FB_DEBUG
    523      fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
    524 #endif
    525      return;  // leaves the builder empty, if you try to use it.
    526    }
    527    LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", nullptr, &subStatus));
    528 
    529 #if FB_DEBUG
    530    {
    531      UErrorCode subsub = subStatus;
    532      fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus));
    533    }
    534 #endif
    535    
    536    if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) {    
    537      status = subStatus; // copy the failing status 
    538 #if FB_DEBUG
    539      fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status));
    540 #endif
    541      return;  // leaves the builder empty, if you try to use it.
    542    }
    543 
    544    LocalUResourceBundlePointer strs;
    545    subStatus = status; // Pick up inherited warning status now 
    546    do {
    547      strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
    548      if(strs.isValid() && U_SUCCESS(subStatus)) {
    549        UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
    550        suppressBreakAfter(str, status); // load the string
    551      }
    552    } while (strs.isValid() && U_SUCCESS(subStatus));
    553    if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
    554      status = subStatus;
    555    }
    556  }
    557 }
    558 
    559 UBool
    560 SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
    561 {
    562  UBool r = fSet.add(exception, status);
    563  FB_TRACE("suppressBreakAfter",&exception,r,0);
    564  return r;
    565 }
    566 
    567 UBool
    568 SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
    569 {
    570  UBool r = fSet.remove(exception, status);
    571  FB_TRACE("unsuppressBreakAfter",&exception,r,0);
    572  return r;
    573 }
    574 
    575 /**
    576 * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
    577 * Work around this.
    578 *
    579 * Note: "new UnicodeString[subCount]" ends up calling global operator new
    580 * on MSVC2012 for some reason.
    581 */
    582 static inline UnicodeString* newUnicodeStringArray(size_t count) {
    583    return new UnicodeString[count ? count : 1];
    584 }
    585 
    586 BreakIterator *
    587 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
    588  LocalPointer<BreakIterator> adopt(adoptBreakIterator);
    589 
    590  LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
    591  LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
    592  if(U_FAILURE(status)) {
    593    return nullptr;
    594  }
    595 
    596  int32_t revCount = 0;
    597  int32_t fwdCount = 0;
    598 
    599  int32_t subCount = fSet.size();
    600 
    601  UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
    602  
    603  LocalArray<UnicodeString> ustrs(ustrs_ptr);
    604 
    605  LocalMemory<int> partials;
    606  partials.allocateInsteadAndReset(subCount);
    607 
    608  LocalPointer<UCharsTrie>    backwardsTrie; //  i.e. ".srM" for Mrs.
    609  LocalPointer<UCharsTrie>    forwardsPartialTrie; //  Has ".a" for "a.M."
    610 
    611  int n=0;
    612  for ( int32_t i = 0;
    613        i<fSet.size();
    614        i++) {
    615    const UnicodeString *abbr = fSet.getStringAt(i);
    616    if(abbr) {
    617      FB_TRACE("build",abbr,true,i);
    618      ustrs[n] = *abbr; // copy by value
    619      FB_TRACE("ustrs[n]",&ustrs[n],true,i);
    620    } else {
    621      FB_TRACE("build",abbr,false,i);
    622      status = U_MEMORY_ALLOCATION_ERROR;
    623      return nullptr;
    624    }
    625    partials[n] = 0; // default: not partial
    626    n++;
    627  }
    628  // first pass - find partials.
    629  for(int i=0;i<subCount;i++) {
    630    int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
    631    if(nn>-1 && (nn+1)!=ustrs[i].length()) {
    632      FB_TRACE("partial",&ustrs[i],false,i);
    633      // is partial.
    634      // is it unique?
    635      int sameAs = -1;
    636      for(int j=0;j<subCount;j++) {
    637        if(j==i) continue;
    638        if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
    639          FB_TRACE("prefix",&ustrs[j],false,nn+1);
    640          //UBool otherIsPartial = ((nn+1)!=ustrs[j].length());  // true if ustrs[j] doesn't end at nn
    641          if(partials[j]==0) { // hasn't been processed yet
    642            partials[j] = kSuppressInReverse | kAddToForward;
    643            FB_TRACE("suppressing",&ustrs[j],false,j);
    644          } else if(partials[j] & kSuppressInReverse) {
    645            sameAs = j; // the other entry is already in the reverse table.
    646          }
    647        }
    648      }
    649      FB_TRACE("for partial same-",&ustrs[i],false,sameAs);
    650      FB_TRACE(" == partial #",&ustrs[i],false,partials[i]);
    651      UnicodeString prefix(ustrs[i], 0, nn+1);
    652      if(sameAs == -1 && partials[i] == 0) {
    653        // first one - add the prefix to the reverse table.
    654        prefix.reverse();
    655        builder->add(prefix, kPARTIAL, status);
    656        revCount++;
    657        FB_TRACE("Added partial",&prefix,false, i);
    658        FB_TRACE(u_errorName(status),&ustrs[i],false,i);
    659        partials[i] = kSuppressInReverse | kAddToForward;
    660      } else {
    661        FB_TRACE("NOT adding partial",&prefix,false, i);
    662        FB_TRACE(u_errorName(status),&ustrs[i],false,i);
    663      }
    664    }
    665  }
    666  for(int i=0;i<subCount;i++) {
    667    if(partials[i]==0) {
    668      ustrs[i].reverse();
    669      builder->add(ustrs[i], kMATCH, status);
    670      revCount++;
    671      FB_TRACE(u_errorName(status), &ustrs[i], false, i);
    672    } else {
    673      FB_TRACE("Adding fwd",&ustrs[i], false, i);
    674 
    675      // an optimization would be to only add the portion after the '.'
    676      // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
    677      // instead of "Ph.D." since we already know the "Ph." part is a match.
    678      // would need the trie to be able to hold 0-length strings, though.
    679      builder2->add(ustrs[i], kMATCH, status); // forward
    680      fwdCount++;
    681      //ustrs[i].reverse();
    682      ////if(debug2) u_printf("SUPPRESS- not Added(%d):  /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
    683    }
    684  }
    685  FB_TRACE("AbbrCount",nullptr,false, subCount);
    686 
    687  if(revCount>0) {
    688    backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
    689    if(U_FAILURE(status)) {
    690      FB_TRACE(u_errorName(status),nullptr,false, -1);
    691      return nullptr;
    692    }
    693  }
    694 
    695  if(fwdCount>0) {
    696    forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
    697    if(U_FAILURE(status)) {
    698      FB_TRACE(u_errorName(status),nullptr,false, -1);
    699      return nullptr;
    700    }
    701  }
    702 
    703  return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
    704 }
    705 
    706 
    707 // ----------- Base class implementation
    708 
    709 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
    710 }
    711 
    712 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
    713 }
    714 
    715 FilteredBreakIteratorBuilder *
    716 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
    717  if(U_FAILURE(status)) return nullptr;
    718  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
    719  return (U_SUCCESS(status))? ret.orphan(): nullptr;
    720 }
    721 
    722 FilteredBreakIteratorBuilder *
    723 FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) {
    724  return createEmptyInstance(status);
    725 }
    726 
    727 FilteredBreakIteratorBuilder *
    728 FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) {
    729  if(U_FAILURE(status)) return nullptr;
    730  LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
    731  return (U_SUCCESS(status))? ret.orphan(): nullptr;
    732 }
    733 
    734 U_NAMESPACE_END
    735 
    736 #endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION