measunit_impl.h (12791B)
1 // © 2020 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #ifndef __MEASUNIT_IMPL_H__ 5 #define __MEASUNIT_IMPL_H__ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_FORMATTING 10 11 #include "unicode/measunit.h" 12 #include "cmemory.h" 13 #include "charstr.h" 14 #include "fixedstring.h" 15 16 U_NAMESPACE_BEGIN 17 18 namespace number::impl { 19 class LongNameHandler; 20 } 21 22 static const char16_t kDefaultCurrency[] = u"XXX"; 23 static const char kDefaultCurrency8[] = "XXX"; 24 25 /** 26 * Looks up the "unitQuantity" (aka "type" or "category") of a base unit 27 * identifier. The category is returned via `result`, which must initially be 28 * empty. 29 * 30 * This only supports base units: other units must be resolved to base units 31 * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be 32 * returned. 33 * 34 * Categories are found in `unitQuantities` in the `units` resource (see 35 * `units.txt`). 36 */ 37 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class. 38 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status); 39 40 /** 41 * A struct representing a single unit (optional SI or binary prefix, and dimensionality). 42 */ 43 struct U_I18N_API_CLASS SingleUnitImpl : public UMemory { 44 /** 45 * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error 46 * code and returns the base dimensionless unit. Parses if necessary. 47 */ 48 U_I18N_API static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); 49 50 /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ 51 MeasureUnit build(UErrorCode& status) const; 52 53 /** 54 * Returns the "simple unit ID", without SI or dimensionality prefix: this 55 * instance may represent a square-kilometer, but only "meter" will be 56 * returned. 57 * 58 * The returned pointer points at memory that exists for the duration of the 59 * program's running. 60 */ 61 U_I18N_API const char* getSimpleUnitID() const; 62 63 /** 64 * Generates and append a neutral identifier string for a single unit which means we do not include 65 * the dimension signal. 66 */ 67 void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; 68 69 /** 70 * Returns the index of this unit's "quantity" in unitQuantities (in 71 * measunit_extra.cpp). The value of this index determines sort order for 72 * normalization of unit identifiers. 73 */ 74 int32_t getUnitCategoryIndex() const; 75 76 /** 77 * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of 78 * sorting and coalescing. 79 * 80 * Sort order of units is specified by UTS #35 81 * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization). 82 * 83 * Takes the sign of dimensionality into account, but not the absolute 84 * value: per-meter is not considered the same as meter, but meter is 85 * considered the same as square-meter. 86 * 87 * The dimensionless unit generally does not get compared, but if it did, it 88 * would sort before other units by virtue of index being < 0 and 89 * dimensionality not being negative. 90 */ 91 int32_t compareTo(const SingleUnitImpl& other) const { 92 if (dimensionality < 0 && other.dimensionality > 0) { 93 // Positive dimensions first 94 return 1; 95 } 96 if (dimensionality > 0 && other.dimensionality < 0) { 97 return -1; 98 } 99 100 // Sort by official quantity order 101 int32_t thisQuantity = this->getUnitCategoryIndex(); 102 int32_t otherQuantity = other.getUnitCategoryIndex(); 103 if (thisQuantity < otherQuantity) { 104 return -1; 105 } 106 if (thisQuantity > otherQuantity) { 107 return 1; 108 } 109 110 // If quantity order didn't help, then we go by index. 111 if (index < other.index) { 112 return -1; 113 } 114 if (index > other.index) { 115 return 1; 116 } 117 118 // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can 119 // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can 120 // compare the bases. 121 // NOTE: this methodology will fail if the binary prefix more than or equal 98. 122 int32_t unitBase = umeas_getPrefixBase(unitPrefix); 123 int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix); 124 125 // Values for comparison purposes only. 126 int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3 127 : umeas_getPrefixPower(unitPrefix); 128 int32_t otherUnitPower = 129 otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3 130 : umeas_getPrefixPower(other.unitPrefix); 131 132 // NOTE: if the unitPower is less than the other, 133 // we return 1 not -1. Thus because we want th sorting order 134 // for the bigger prefix to be before the smaller. 135 // Example: megabyte should come before kilobyte. 136 if (unitPower < otherUnitPower) { 137 return 1; 138 } 139 if (unitPower > otherUnitPower) { 140 return -1; 141 } 142 143 if (unitBase < otherUnitBase) { 144 return 1; 145 } 146 if (unitBase > otherUnitBase) { 147 return -1; 148 } 149 150 return 0; 151 } 152 153 /** 154 * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. 155 * 156 * Units with the same base unit and SI or binary prefix should match, except that they must also 157 * have the same dimensionality sign, such that we don't merge numerator and denominator. 158 */ 159 bool isCompatibleWith(const SingleUnitImpl& other) const { 160 return (compareTo(other) == 0); 161 } 162 163 /** 164 * Returns true if this unit is the "dimensionless base unit", as produced 165 * by the MeasureUnit() default constructor. (This does not include the 166 * likes of concentrations or angles.) 167 */ 168 bool isDimensionless() const { 169 return index == -1; 170 } 171 172 /** 173 * Simple unit index, unique for every simple unit, -1 for the dimensionless 174 * unit. This is an index into a string list in measunit_extra.cpp, as 175 * loaded by SimpleUnitIdentifiersSink. 176 * 177 * The default value is -1, meaning the dimensionless unit: 178 * isDimensionless() will return true, until index is changed. 179 */ 180 int32_t index = -1; 181 182 /** 183 * SI or binary prefix. 184 * 185 * This is ignored for the dimensionless unit. 186 */ 187 UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE; 188 189 /** 190 * Dimensionality. 191 * 192 * This is meaningless for the dimensionless unit. 193 */ 194 int32_t dimensionality = 1; 195 }; 196 197 // Forward declaration 198 struct MeasureUnitImplWithIndex; 199 200 /** 201 * Internal representation of measurement units. Capable of representing all complexities of units, 202 * including mixed and compound units. 203 */ 204 class U_I18N_API_CLASS MeasureUnitImpl : public UMemory { 205 public: 206 MeasureUnitImpl() = default; 207 MeasureUnitImpl(MeasureUnitImpl &&other) = default; 208 // No copy constructor, use MeasureUnitImpl::copy() to make it explicit. 209 MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete; 210 MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status); 211 212 MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default; 213 214 /** Extract the MeasureUnitImpl from a MeasureUnit. */ 215 static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) { 216 return measureUnit.fImpl; 217 } 218 219 /** 220 * Parse a unit identifier into a MeasureUnitImpl. 221 * 222 * @param identifier The unit identifier string. 223 * @param status Set if the identifier string is not valid. 224 * @return A newly parsed value object. Behaviour of this unit is 225 * unspecified if an error is returned via status. 226 */ 227 U_I18N_API static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); 228 229 /** 230 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 231 * 232 * @param measureUnit The source MeasureUnit. 233 * @param memory A place to write the new MeasureUnitImpl if parsing is required. 234 * @param status Set if an error occurs. 235 * @return A reference to either measureUnit.fImpl or memory. 236 */ 237 U_I18N_API static const MeasureUnitImpl& forMeasureUnit( 238 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); 239 240 /** 241 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 242 * 243 * @param measureUnit The source MeasureUnit. 244 * @param status Set if an error occurs. 245 * @return A value object, either newly parsed or copied from measureUnit. 246 */ 247 static MeasureUnitImpl forMeasureUnitMaybeCopy( 248 const MeasureUnit& measureUnit, UErrorCode& status); 249 250 /** 251 * Used for currency units. 252 */ 253 static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode, UErrorCode& status) { 254 MeasureUnitImpl result; 255 if (U_SUCCESS(status)) { 256 result.identifier = currencyCode; 257 if (result.identifier.isEmpty() != currencyCode.empty()) { 258 status = U_MEMORY_ALLOCATION_ERROR; 259 } 260 } 261 return result; 262 } 263 264 /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ 265 U_I18N_API MeasureUnit build(UErrorCode& status) &&; 266 267 /** 268 * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. 269 */ 270 MeasureUnitImpl copy(UErrorCode& status) const; 271 272 /** 273 * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices. 274 * For example: 275 * - if the `MeasureUnitImpl` is `foot-per-hour` 276 * it will return a list of 1 {(0, `foot-per-hour`)} 277 * - if the `MeasureUnitImpl` is `foot-and-inch` 278 * it will return a list of 2 {(0, `foot`), (1, `inch`)} 279 */ 280 MaybeStackVector<MeasureUnitImplWithIndex> 281 extractIndividualUnitsWithIndices(UErrorCode &status) const; 282 283 /** Mutates this MeasureUnitImpl to take the reciprocal. */ 284 void takeReciprocal(UErrorCode& status); 285 286 /** 287 * Returns a simplified version of the unit. 288 * NOTE: the simplification happen when there are two units equals in their base unit and their 289 * prefixes. 290 * 291 * Example 1: "square-meter-per-meter" --> "meter" 292 * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" 293 */ 294 MeasureUnitImpl copyAndSimplify(UErrorCode &status) const; 295 296 /** 297 * Mutates this MeasureUnitImpl to append a single unit. 298 * 299 * @return true if a new item was added. If unit is the dimensionless unit, 300 * it is never added: the return value will always be false. 301 */ 302 U_I18N_API bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); 303 304 /** 305 * Normalizes a MeasureUnitImpl and generate the identifier string in place. 306 */ 307 void serialize(UErrorCode &status); 308 309 /** The complexity, either SINGLE, COMPOUND, or MIXED. */ 310 UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; 311 312 /** 313 * The list of single units. These may be summed or multiplied, based on the 314 * value of the complexity field. 315 * 316 * The "dimensionless" unit (SingleUnitImpl default constructor) must not be 317 * added to this list. 318 */ 319 MaybeStackVector<SingleUnitImpl> singleUnits; 320 321 /** 322 * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. 323 */ 324 FixedString identifier; 325 326 /** 327 * Represents the unit constant denominator. 328 * 329 * NOTE: 330 * if set to 0, it means that the constant is not set. 331 */ 332 uint64_t constantDenominator = 0; 333 334 // For calling serialize 335 // TODO(icu-units#147): revisit serialization 336 friend class number::impl::LongNameHandler; 337 }; 338 339 struct MeasureUnitImplWithIndex : public UMemory { 340 const int32_t index; 341 MeasureUnitImpl unitImpl; 342 // Makes a copy of unitImpl. 343 MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status) 344 : index(index), unitImpl(unitImpl.copy(status)) { 345 } 346 MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status) 347 : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) { 348 } 349 }; 350 351 U_NAMESPACE_END 352 353 #endif /* #if !UCONFIG_NO_FORMATTING */ 354 #endif //__MEASUNIT_IMPL_H__