SourceText.h (13555B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /* 7 * SourceText encapsulates a count of char16_t (UTF-16) or Utf8Unit (UTF-8) 8 * code units (note: code *units*, not bytes or code points) and those code 9 * units ("source units"). (Latin-1 is not supported: all places where Latin-1 10 * must be compiled first convert to a supported encoding.) 11 * 12 * A SourceText either observes without owning, or takes ownership of, source 13 * units passed to |SourceText::init|. Thus SourceText can be used to 14 * efficiently avoid copying. 15 * 16 * Rules for use: 17 * 18 * 1) The passed-in source units must be allocated with js_malloc(), 19 * js_calloc(), or js_realloc() if |SourceText::init| is instructed to take 20 * ownership of the source units. 21 * 2) If |SourceText::init| merely borrows the source units, the user must 22 * keep them alive until associated JS compilation is complete. 23 * 3) Code that calls |SourceText::take{Chars,Units}()| must keep the source 24 * units alive until JS compilation completes. Normally only the JS engine 25 * should call |SourceText::take{Chars,Units}()|. 26 * 4) Use the appropriate SourceText parameterization depending on the source 27 * units encoding. 28 * 29 * Example use: 30 * 31 * size_t length = 512; 32 * char16_t* chars = js_pod_malloc<char16_t>(length); 33 * if (!chars) { 34 * JS_ReportOutOfMemory(cx); 35 * return false; 36 * } 37 * JS::SourceText<char16_t> srcBuf; 38 * if (!srcBuf.init(cx, chars, length, JS::SourceOwnership::TakeOwnership)) { 39 * return false; 40 * } 41 * JS::Rooted<JSScript*> script(cx); 42 * if (!JS::Compile(cx, options, srcBuf, &script)) { 43 * return false; 44 * } 45 */ 46 47 #ifndef js_SourceText_h 48 #define js_SourceText_h 49 50 #include "mozilla/Assertions.h" // MOZ_ASSERT 51 #include "mozilla/Attributes.h" // MOZ_COLD, MOZ_IS_CLASS_INIT 52 #include "mozilla/Likely.h" // MOZ_UNLIKELY 53 54 #include <stddef.h> // size_t 55 #include <stdint.h> // UINT32_MAX 56 #include <type_traits> // std::conditional_t, std::is_same_v 57 58 #include "js/UniquePtr.h" // js::UniquePtr 59 #include "js/Utility.h" // JS::FreePolicy 60 61 namespace mozilla { 62 union Utf8Unit; 63 } 64 65 namespace js { 66 class FrontendContext; 67 } // namespace js 68 69 namespace JS { 70 71 class JS_PUBLIC_API AutoStableStringChars; 72 using FrontendContext = js::FrontendContext; 73 74 namespace detail { 75 76 MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JSContext* cx); 77 MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JS::FrontendContext* fc); 78 79 } // namespace detail 80 81 enum class SourceOwnership { 82 Borrowed, 83 TakeOwnership, 84 }; 85 86 template <typename Unit> 87 class SourceText final { 88 private: 89 static_assert(std::is_same_v<Unit, mozilla::Utf8Unit> || 90 std::is_same_v<Unit, char16_t>, 91 "Unit must be either char16_t or Utf8Unit for " 92 "SourceText<Unit>"); 93 94 /** |char16_t| or |Utf8Unit| source units of uncertain validity. */ 95 const Unit* units_ = nullptr; 96 97 /** The length in code units of |units_|. */ 98 uint32_t length_ = 0; 99 100 /** 101 * Whether this owns |units_| or merely observes source units owned by some 102 * other object. 103 */ 104 bool ownsUnits_ = false; 105 106 public: 107 // A C++ character type that can represent the source units -- suitable for 108 // passing to C++ string functions. 109 using CharT = 110 std::conditional_t<std::is_same_v<Unit, char16_t>, char16_t, char>; 111 112 public: 113 /** 114 * Construct a SourceText. It must be initialized using |init()| before it 115 * can be used as compilation source text. 116 */ 117 SourceText() = default; 118 119 /** 120 * Construct a SourceText from contents extracted from |other|. This 121 * SourceText will then act exactly as |other| would have acted, had it 122 * not been passed to this function. |other| will return to its default- 123 * constructed state and must have |init()| called on it to use it. 124 */ 125 SourceText(SourceText&& other) 126 : units_(other.units_), 127 length_(other.length_), 128 ownsUnits_(other.ownsUnits_) { 129 other.units_ = nullptr; 130 other.length_ = 0; 131 other.ownsUnits_ = false; 132 } 133 134 ~SourceText() { 135 if (ownsUnits_) { 136 js_free(const_cast<Unit*>(units_)); 137 } 138 } 139 140 private: 141 template <typename ContextT> 142 [[nodiscard]] MOZ_IS_CLASS_INIT bool initImpl(ContextT* context, 143 const Unit* units, 144 size_t unitsLength, 145 SourceOwnership ownership) { 146 MOZ_ASSERT_IF(units == nullptr, unitsLength == 0); 147 148 // Ideally we'd use |Unit| and not cast below, but the risk of a static 149 // initializer is too great. 150 static const CharT emptyString[] = {'\0'}; 151 152 // Initialize all fields *before* checking length. This ensures that 153 // if |ownership == SourceOwnership::TakeOwnership|, |units| will be 154 // freed when |this|'s destructor is called. 155 if (units) { 156 units_ = units; 157 length_ = static_cast<uint32_t>(unitsLength); 158 ownsUnits_ = ownership == SourceOwnership::TakeOwnership; 159 } else { 160 units_ = reinterpret_cast<const Unit*>(emptyString); 161 length_ = 0; 162 ownsUnits_ = false; 163 } 164 165 // IMPLEMENTATION DETAIL, DO NOT RELY ON: This limit is used so we can 166 // store offsets in |JSScript|s as |uint32_t|. It could be lifted 167 // fairly easily if desired, as the compiler uses |size_t| internally. 168 if (MOZ_UNLIKELY(unitsLength > UINT32_MAX)) { 169 detail::ReportSourceTooLong(context); 170 return false; 171 } 172 173 return true; 174 } 175 176 public: 177 /** 178 * Initialize this with source unit data: |char16_t| for UTF-16 source 179 * units, or |Utf8Unit| for UTF-8 source units. 180 * 181 * If |ownership == TakeOwnership|, *this function* takes ownership of 182 * |units|, *even if* this function fails, and you MUST NOT free |units| 183 * yourself. This single-owner-friendly approach reduces risk of leaks on 184 * failure. 185 * 186 * |units| may be null if |unitsLength == 0|; if so, this will silently be 187 * initialized using non-null, unowned units. 188 */ 189 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Unit* units, 190 size_t unitsLength, 191 SourceOwnership ownership) { 192 return initImpl(cx, units, unitsLength, ownership); 193 } 194 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc, 195 const Unit* units, 196 size_t unitsLength, 197 SourceOwnership ownership) { 198 return initImpl(fc, units, unitsLength, ownership); 199 } 200 201 /** 202 * Exactly identical to the |init()| overload above that accepts 203 * |const Unit*|, but instead takes character data: |const CharT*|. 204 * 205 * (We can't just write this to accept |const CharT*|, because then in the 206 * UTF-16 case this overload and the one above would be identical. So we 207 * use SFINAE to expose the |CharT| overload only if it's different.) 208 */ 209 template <typename Char, 210 typename = std::enable_if_t<std::is_same_v<Char, CharT> && 211 !std::is_same_v<Char, Unit>>> 212 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JSContext* cx, const Char* chars, 213 size_t charsLength, 214 SourceOwnership ownership) { 215 return initImpl(cx, reinterpret_cast<const Unit*>(chars), charsLength, 216 ownership); 217 } 218 template <typename Char, 219 typename = std::enable_if_t<std::is_same_v<Char, CharT> && 220 !std::is_same_v<Char, Unit>>> 221 [[nodiscard]] MOZ_IS_CLASS_INIT bool init(JS::FrontendContext* fc, 222 const Char* chars, 223 size_t charsLength, 224 SourceOwnership ownership) { 225 return initImpl(fc, reinterpret_cast<const Unit*>(chars), charsLength, 226 ownership); 227 } 228 229 /** 230 * Initialize this using source units transferred out of |data|. 231 */ 232 [[nodiscard]] bool init(JSContext* cx, 233 js::UniquePtr<Unit[], JS::FreePolicy> data, 234 size_t dataLength) { 235 return initImpl(cx, data.release(), dataLength, 236 SourceOwnership::TakeOwnership); 237 } 238 [[nodiscard]] bool init(JS::FrontendContext* fc, 239 js::UniquePtr<Unit[], JS::FreePolicy> data, 240 size_t dataLength) { 241 return initImpl(fc, data.release(), dataLength, 242 SourceOwnership::TakeOwnership); 243 } 244 245 /** 246 * Exactly identical to the |init()| overload above that accepts 247 * |UniquePtr<Unit[], JS::FreePolicy>|, but instead takes character data: 248 * |UniquePtr<CharT[], JS::FreePolicy>|. 249 * 250 * (We can't just duplicate the signature above with s/Unit/CharT/, because 251 * then in the UTF-16 case this overload and the one above would be identical. 252 * So we use SFINAE to expose the |CharT| overload only if it's different.) 253 */ 254 template <typename Char, 255 typename = std::enable_if_t<std::is_same_v<Char, CharT> && 256 !std::is_same_v<Char, Unit>>> 257 [[nodiscard]] bool init(JSContext* cx, 258 js::UniquePtr<Char[], JS::FreePolicy> data, 259 size_t dataLength) { 260 return init(cx, data.release(), dataLength, SourceOwnership::TakeOwnership); 261 } 262 template <typename Char, 263 typename = std::enable_if_t<std::is_same_v<Char, CharT> && 264 !std::is_same_v<Char, Unit>>> 265 [[nodiscard]] bool init(JS::FrontendContext* fc, 266 js::UniquePtr<Char[], JS::FreePolicy> data, 267 size_t dataLength) { 268 return init(fc, data.release(), dataLength, SourceOwnership::TakeOwnership); 269 } 270 271 /** 272 * Initialize this using an AutoStableStringChars. Transfers the code units if 273 * they are owned by the AutoStableStringChars, otherwise borrow directly from 274 * the underlying JSString. The AutoStableStringChars must outlive this 275 * SourceText and must be explicitly configured to the same unit type as this 276 * SourceText. 277 */ 278 [[nodiscard]] bool initMaybeBorrowed(JSContext* cx, 279 AutoStableStringChars& linearChars); 280 [[nodiscard]] bool initMaybeBorrowed(JS::FrontendContext* fc, 281 AutoStableStringChars& linearChars); 282 283 /** 284 * Access the encapsulated data using a code unit type. 285 * 286 * This function is useful for code that wants to interact with source text 287 * as *code units*, not as string data. This doesn't matter for UTF-16, 288 * but it's a crucial distinction for UTF-8. When UTF-8 source text is 289 * encapsulated, |Unit| being |mozilla::Utf8Unit| unambiguously indicates 290 * that the code units are UTF-8. In contrast |const char*| returned by 291 * |get()| below could hold UTF-8 (or its ASCII subset) or Latin-1 or (in 292 * particularly cursed embeddings) EBCDIC or some other legacy character 293 * set. Prefer this function to |get()| wherever possible. 294 */ 295 const Unit* units() const { return units_; } 296 297 /** 298 * Access the encapsulated data using a character type. 299 * 300 * This function is useful for interactions with character-centric actions 301 * like interacting with UniqueChars/UniqueTwoByteChars or printing out 302 * text in a debugger, that only work with |CharT|. But as |CharT| loses 303 * encoding specificity when UTF-8 source text is encapsulated, prefer 304 * |units()| to this function. 305 */ 306 const CharT* get() const { return reinterpret_cast<const CharT*>(units_); } 307 308 /** 309 * Returns true if this owns the source units and will free them on 310 * destruction. If true, it is legal to call |take{Chars,Units}()|. 311 */ 312 bool ownsUnits() const { return ownsUnits_; } 313 314 /** 315 * Count of the underlying source units -- code units, not bytes or code 316 * points -- in this. 317 */ 318 uint32_t length() const { return length_; } 319 320 /** 321 * Retrieve and take ownership of the underlying source units. The caller 322 * is now responsible for calling js_free() on the returned value, *but 323 * only after JS script compilation has completed*. 324 * 325 * After underlying source units have been taken, this will continue to 326 * refer to the same data -- it just won't own the data. get() and 327 * length() will return the same values, but ownsUnits() will be false. 328 * The taken source units must be kept alive until after JS script 329 * compilation completes, as noted above, for this to be safe. 330 * 331 * The caller must check ownsUnits() before calling takeUnits(). Taking 332 * and then free'ing an unowned buffer will have dire consequences. 333 */ 334 Unit* takeUnits() { 335 MOZ_ASSERT(ownsUnits_); 336 ownsUnits_ = false; 337 return const_cast<Unit*>(units_); 338 } 339 340 /** 341 * Akin to |takeUnits()| in all respects, but returns characters rather 342 * than units. 343 */ 344 CharT* takeChars() { return reinterpret_cast<CharT*>(takeUnits()); } 345 346 private: 347 SourceText(const SourceText&) = delete; 348 void operator=(const SourceText&) = delete; 349 }; 350 351 } // namespace JS 352 353 #endif /* js_SourceText_h */