tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

measunit_impl.h (12791B)


      1 // © 2020 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #ifndef __MEASUNIT_IMPL_H__
      5 #define __MEASUNIT_IMPL_H__
      6 
      7 #include "unicode/utypes.h"
      8 
      9 #if !UCONFIG_NO_FORMATTING
     10 
     11 #include "unicode/measunit.h"
     12 #include "cmemory.h"
     13 #include "charstr.h"
     14 #include "fixedstring.h"
     15 
     16 U_NAMESPACE_BEGIN
     17 
     18 namespace number::impl {
     19 class LongNameHandler;
     20 }
     21 
     22 static const char16_t kDefaultCurrency[] = u"XXX";
     23 static const char kDefaultCurrency8[] = "XXX";
     24 
     25 /**
     26 * Looks up the "unitQuantity" (aka "type" or "category") of a base unit
     27 * identifier. The category is returned via `result`, which must initially be
     28 * empty.
     29 *
     30 * This only supports base units: other units must be resolved to base units
     31 * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be
     32 * returned.
     33 *
     34 * Categories are found in `unitQuantities` in the `units` resource (see
     35 * `units.txt`).
     36 */
     37 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class.
     38 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status);
     39 
     40 /**
     41 * A struct representing a single unit (optional SI or binary prefix, and dimensionality).
     42 */
     43 struct U_I18N_API_CLASS SingleUnitImpl : public UMemory {
     44    /**
     45     * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
     46     * code and returns the base dimensionless unit. Parses if necessary.
     47     */
     48    U_I18N_API static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
     49 
     50    /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
     51    MeasureUnit build(UErrorCode& status) const;
     52 
     53    /**
     54     * Returns the "simple unit ID", without SI or dimensionality prefix: this
     55     * instance may represent a square-kilometer, but only "meter" will be
     56     * returned.
     57     *
     58     * The returned pointer points at memory that exists for the duration of the
     59     * program's running.
     60     */
     61    U_I18N_API const char* getSimpleUnitID() const;
     62 
     63    /**
     64     * Generates and append a neutral identifier string for a single unit which means we do not include
     65     * the dimension signal.
     66     */
     67    void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
     68 
     69    /**
     70     * Returns the index of this unit's "quantity" in unitQuantities (in
     71     * measunit_extra.cpp). The value of this index determines sort order for
     72     * normalization of unit identifiers.
     73     */
     74    int32_t getUnitCategoryIndex() const;
     75 
     76    /**
     77     * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
     78     * sorting and coalescing.
     79     *
     80     * Sort order of units is specified by UTS #35
     81     * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
     82     *
     83     * Takes the sign of dimensionality into account, but not the absolute
     84     * value: per-meter is not considered the same as meter, but meter is
     85     * considered the same as square-meter.
     86     *
     87     * The dimensionless unit generally does not get compared, but if it did, it
     88     * would sort before other units by virtue of index being < 0 and
     89     * dimensionality not being negative.
     90     */
     91    int32_t compareTo(const SingleUnitImpl& other) const {
     92        if (dimensionality < 0 && other.dimensionality > 0) {
     93            // Positive dimensions first
     94            return 1;
     95        }
     96        if (dimensionality > 0 && other.dimensionality < 0) {
     97            return -1;
     98        }
     99 
    100        // Sort by official quantity order
    101        int32_t thisQuantity = this->getUnitCategoryIndex();
    102        int32_t otherQuantity = other.getUnitCategoryIndex();
    103        if (thisQuantity < otherQuantity) {
    104            return -1;
    105        }
    106        if (thisQuantity > otherQuantity) {
    107            return 1;
    108        }
    109 
    110        // If quantity order didn't help, then we go by index.
    111        if (index < other.index) {
    112            return -1;
    113        }
    114        if (index > other.index) {
    115            return 1;
    116        }
    117 
    118        // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
    119        // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
    120        // compare the bases.
    121        // NOTE: this methodology will fail if the binary prefix more than or equal 98.
    122        int32_t unitBase = umeas_getPrefixBase(unitPrefix);
    123        int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
    124 
    125        // Values for comparison purposes only.
    126        int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
    127                                                                 : umeas_getPrefixPower(unitPrefix);
    128        int32_t otherUnitPower =
    129            otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
    130                                                      : umeas_getPrefixPower(other.unitPrefix);
    131 
    132        // NOTE: if the unitPower is less than the other,
    133        // we return 1 not -1. Thus because we want th sorting order
    134        // for the bigger prefix to be before the smaller.
    135        // Example: megabyte should come before kilobyte.
    136        if (unitPower < otherUnitPower) {
    137            return 1;
    138        }
    139        if (unitPower > otherUnitPower) {
    140            return -1;
    141        }
    142 
    143        if (unitBase < otherUnitBase) {
    144            return 1;
    145        }
    146        if (unitBase > otherUnitBase) {
    147            return -1;
    148        }
    149 
    150        return 0;
    151    }
    152 
    153    /**
    154     * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing.
    155     *
    156     * Units with the same base unit and SI or binary prefix should match, except that they must also
    157     * have the same dimensionality sign, such that we don't merge numerator and denominator.
    158     */
    159    bool isCompatibleWith(const SingleUnitImpl& other) const {
    160        return (compareTo(other) == 0);
    161    }
    162 
    163    /**
    164     * Returns true if this unit is the "dimensionless base unit", as produced
    165     * by the MeasureUnit() default constructor. (This does not include the
    166     * likes of concentrations or angles.)
    167     */
    168    bool isDimensionless() const {
    169        return index == -1;
    170    }
    171 
    172    /**
    173     * Simple unit index, unique for every simple unit, -1 for the dimensionless
    174     * unit. This is an index into a string list in measunit_extra.cpp, as
    175     * loaded by SimpleUnitIdentifiersSink.
    176     *
    177     * The default value is -1, meaning the dimensionless unit:
    178     * isDimensionless() will return true, until index is changed.
    179     */
    180    int32_t index = -1;
    181 
    182    /**
    183     * SI or binary prefix.
    184     *
    185     * This is ignored for the dimensionless unit.
    186     */
    187    UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE;
    188 
    189    /**
    190     * Dimensionality.
    191     *
    192     * This is meaningless for the dimensionless unit.
    193     */
    194    int32_t dimensionality = 1;
    195 };
    196 
    197 // Forward declaration
    198 struct MeasureUnitImplWithIndex;
    199 
    200 /**
    201 * Internal representation of measurement units. Capable of representing all complexities of units,
    202 * including mixed and compound units.
    203 */
    204 class U_I18N_API_CLASS MeasureUnitImpl : public UMemory {
    205  public:
    206    MeasureUnitImpl() = default;
    207    MeasureUnitImpl(MeasureUnitImpl &&other) = default;
    208    // No copy constructor, use MeasureUnitImpl::copy() to make it explicit.
    209    MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete;
    210    MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status);
    211 
    212    MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default;
    213 
    214    /** Extract the MeasureUnitImpl from a MeasureUnit. */
    215    static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) {
    216        return measureUnit.fImpl;
    217    }
    218 
    219    /**
    220     * Parse a unit identifier into a MeasureUnitImpl.
    221     *
    222     * @param identifier The unit identifier string.
    223     * @param status Set if the identifier string is not valid.
    224     * @return A newly parsed value object. Behaviour of this unit is
    225     * unspecified if an error is returned via status.
    226     */
    227    U_I18N_API static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
    228 
    229    /**
    230     * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
    231     * 
    232     * @param measureUnit The source MeasureUnit.
    233     * @param memory A place to write the new MeasureUnitImpl if parsing is required.
    234     * @param status Set if an error occurs.
    235     * @return A reference to either measureUnit.fImpl or memory.
    236     */
    237    U_I18N_API static const MeasureUnitImpl& forMeasureUnit(
    238        const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status);
    239 
    240    /**
    241     * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
    242     *
    243     * @param measureUnit The source MeasureUnit.
    244     * @param status Set if an error occurs.
    245     * @return A value object, either newly parsed or copied from measureUnit.
    246     */
    247    static MeasureUnitImpl forMeasureUnitMaybeCopy(
    248        const MeasureUnit& measureUnit, UErrorCode& status);
    249 
    250    /**
    251     * Used for currency units.
    252     */
    253    static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode, UErrorCode& status) {
    254        MeasureUnitImpl result;
    255        if (U_SUCCESS(status)) {
    256            result.identifier = currencyCode;
    257            if (result.identifier.isEmpty() != currencyCode.empty()) {
    258                status = U_MEMORY_ALLOCATION_ERROR;
    259            }
    260        }
    261        return result;
    262    }
    263 
    264    /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */
    265    U_I18N_API MeasureUnit build(UErrorCode& status) &&;
    266 
    267    /**
    268     * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit.
    269     */
    270    MeasureUnitImpl copy(UErrorCode& status) const;
    271 
    272    /**
    273     * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices.
    274     *      For example:    
    275     *          -   if the `MeasureUnitImpl` is `foot-per-hour`
    276     *                  it will return a list of 1 {(0, `foot-per-hour`)} 
    277     *          -   if the `MeasureUnitImpl` is `foot-and-inch` 
    278     *                  it will return a list of 2 {(0, `foot`), (1, `inch`)}
    279     */
    280    MaybeStackVector<MeasureUnitImplWithIndex>
    281    extractIndividualUnitsWithIndices(UErrorCode &status) const;
    282 
    283    /** Mutates this MeasureUnitImpl to take the reciprocal. */
    284    void takeReciprocal(UErrorCode& status);
    285 
    286    /**
    287     * Returns a simplified version of the unit.
    288     * NOTE: the simplification happen when there are two units equals in their base unit and their
    289     * prefixes.
    290     *
    291     * Example 1: "square-meter-per-meter" --> "meter"
    292     * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
    293     */
    294    MeasureUnitImpl copyAndSimplify(UErrorCode &status) const;
    295 
    296    /**
    297     * Mutates this MeasureUnitImpl to append a single unit.
    298     *
    299     * @return true if a new item was added. If unit is the dimensionless unit,
    300     * it is never added: the return value will always be false.
    301     */
    302    U_I18N_API bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status);
    303 
    304    /**
    305     * Normalizes a MeasureUnitImpl and generate the identifier string in place.
    306     */
    307    void serialize(UErrorCode &status);
    308 
    309    /** The complexity, either SINGLE, COMPOUND, or MIXED. */
    310    UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
    311 
    312    /**
    313     * The list of single units. These may be summed or multiplied, based on the
    314     * value of the complexity field.
    315     *
    316     * The "dimensionless" unit (SingleUnitImpl default constructor) must not be
    317     * added to this list.
    318     */
    319    MaybeStackVector<SingleUnitImpl> singleUnits;
    320 
    321    /**
    322     * The full unit identifier.  Owned by the MeasureUnitImpl.  Empty if not computed.
    323     */
    324    FixedString identifier;
    325 
    326    /**
    327     * Represents the unit constant denominator.
    328     *
    329     * NOTE:
    330     *   if set to 0, it means that the constant is not set.
    331     */
    332    uint64_t constantDenominator = 0;
    333 
    334    // For calling serialize
    335    // TODO(icu-units#147): revisit serialization
    336    friend class number::impl::LongNameHandler;
    337 };
    338 
    339 struct MeasureUnitImplWithIndex : public UMemory {
    340    const int32_t index;
    341    MeasureUnitImpl unitImpl;
    342    // Makes a copy of unitImpl.
    343    MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status)
    344        : index(index), unitImpl(unitImpl.copy(status)) {
    345    }
    346    MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status)
    347        : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) {
    348    }
    349 };
    350 
    351 U_NAMESPACE_END
    352 
    353 #endif /* #if !UCONFIG_NO_FORMATTING */
    354 #endif //__MEASUNIT_IMPL_H__