tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

locdistance.h (5307B)


      1 // © 2019 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // locdistance.h
      5 // created: 2019may08 Markus W. Scherer
      6 
      7 #ifndef __LOCDISTANCE_H__
      8 #define __LOCDISTANCE_H__
      9 
     10 #include "unicode/utypes.h"
     11 #include "unicode/bytestrie.h"
     12 #include "unicode/localematcher.h"
     13 #include "unicode/locid.h"
     14 #include "unicode/uobject.h"
     15 #include "lsr.h"
     16 
     17 U_NAMESPACE_BEGIN
     18 
     19 struct LocaleDistanceData;
     20 
     21 /**
     22 * Offline-built data for LocaleMatcher.
     23 * Mostly but not only the data for mapping locales to their maximized forms.
     24 */
     25 class LocaleDistance final : public UMemory {
     26 public:
     27    static const LocaleDistance *getSingleton(UErrorCode &errorCode);
     28 
     29    static int32_t shiftDistance(int32_t distance) {
     30        return distance << DISTANCE_SHIFT;
     31    }
     32 
     33    static int32_t getShiftedDistance(int32_t indexAndDistance) {
     34        return indexAndDistance & DISTANCE_MASK;
     35    }
     36 
     37    static double getDistanceDouble(int32_t indexAndDistance) {
     38        double shiftedDistance = getShiftedDistance(indexAndDistance);
     39        return shiftedDistance / (1 << DISTANCE_SHIFT);
     40    }
     41 
     42    static int32_t getDistanceFloor(int32_t indexAndDistance) {
     43        return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
     44    }
     45 
     46    static int32_t getIndex(int32_t indexAndDistance) {
     47        // assert indexAndDistance >= 0;
     48        return indexAndDistance >> INDEX_SHIFT;
     49    }
     50 
     51    /**
     52     * Finds the supported LSR with the smallest distance from the desired one.
     53     * Equivalent LSR subtags must be normalized into a canonical form.
     54     *
     55     * <p>Returns the index of the lowest-distance supported LSR in the high bits
     56     * (negative if none has a distance below the threshold),
     57     * and its distance (0..ABOVE_THRESHOLD) in the low bits.
     58     */
     59    int32_t getBestIndexAndDistance(const LSR &desired,
     60                                    const LSR **supportedLSRs, int32_t supportedLSRsLength,
     61                                    int32_t shiftedThreshold,
     62                                    ULocMatchFavorSubtag favorSubtag,
     63                                    ULocMatchDirection direction) const;
     64 
     65    bool isParadigmLSR(const LSR &lsr) const;
     66 
     67    int32_t getDefaultScriptDistance() const {
     68        return defaultScriptDistance;
     69    }
     70 
     71    int32_t getDefaultDemotionPerDesiredLocale() const {
     72        return defaultDemotionPerDesiredLocale;
     73    }
     74 
     75 private:
     76    // The distance is shifted left to gain some fraction bits.
     77    static constexpr int32_t DISTANCE_SHIFT = 3;
     78    static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
     79    // 7 bits for 0..100
     80    static constexpr int32_t DISTANCE_INT_SHIFT = 7;
     81    static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
     82    static constexpr int32_t DISTANCE_MASK = 0x3ff;
     83    // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
     84    static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
     85 
     86    LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely);
     87    LocaleDistance(const LocaleDistance &other) = delete;
     88    LocaleDistance &operator=(const LocaleDistance &other) = delete;
     89 
     90    static void initLocaleDistance(UErrorCode &errorCode);
     91 
     92    bool isMatch(const LSR &desired, const LSR &supported,
     93                 int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
     94        const LSR *pSupp = &supported;
     95        return getBestIndexAndDistance(
     96            desired, &pSupp, 1,
     97            shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
     98    }
     99 
    100    static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
    101                                            const char *desired, const char *supported);
    102 
    103    static int32_t getRegionPartitionsDistance(
    104        BytesTrie &iter, uint64_t startState,
    105        const char *desiredPartitions, const char *supportedPartitions,
    106        int32_t threshold);
    107 
    108    static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);
    109 
    110    static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);
    111 
    112    const char *partitionsForRegion(const LSR &lsr) const {
    113        // ill-formed region -> one non-matching string
    114        int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
    115        return partitionArrays[pIndex];
    116    }
    117 
    118    int32_t getDefaultRegionDistance() const {
    119        return defaultRegionDistance;
    120    }
    121 
    122    const LikelySubtags &likelySubtags;
    123 
    124    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
    125    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
    126    // There is also a trie value for each subsequence of whole subtags.
    127    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
    128    BytesTrie trie;
    129 
    130    /**
    131     * Maps each region to zero or more single-character partitions.
    132     */
    133    const uint8_t *regionToPartitionsIndex;
    134    const char **partitionArrays;
    135 
    136    /**
    137     * Used to get the paradigm region for a cluster, if there is one.
    138     */
    139    const LSR *paradigmLSRs;
    140    int32_t paradigmLSRsLength;
    141 
    142    int32_t defaultLanguageDistance;
    143    int32_t defaultScriptDistance;
    144    int32_t defaultRegionDistance;
    145    int32_t minRegionDistance;
    146    int32_t defaultDemotionPerDesiredLocale;
    147 };
    148 
    149 U_NAMESPACE_END
    150 
    151 #endif  // __LOCDISTANCE_H__