WasmBCFrame.h (52366B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * 4 * Copyright 2016 Mozilla Foundation 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 // This is an INTERNAL header for Wasm baseline compiler: CPU stack frame, 20 // stack maps, and associated logic. 21 22 #ifndef wasm_wasm_baseline_frame_h 23 #define wasm_wasm_baseline_frame_h 24 25 #include "wasm/WasmBaselineCompile.h" // For BaseLocalIter 26 #include "wasm/WasmBCDefs.h" 27 #include "wasm/WasmBCRegDefs.h" 28 #include "wasm/WasmBCStk.h" 29 #include "wasm/WasmConstants.h" // For MaxFrameSize 30 31 // [SMDOC] Wasm baseline compiler's stack frame. 32 // 33 // For background, see "Wasm's ABIs" in WasmFrame.h, the following should never 34 // be in conflict with that. 35 // 36 // The stack frame has four parts ("below" means at lower addresses): 37 // 38 // - the Frame element; 39 // - the Local area, including the DebugFrame element and possibly a spilled 40 // pointer to stack results, if any; allocated below the header with various 41 // forms of alignment; 42 // - the Dynamic area, comprising the temporary storage the compiler uses for 43 // register spilling, allocated below the Local area; 44 // - the Arguments area, comprising memory allocated for outgoing calls, 45 // allocated below the Dynamic area. 46 // 47 // +==============================+ 48 // | Incoming stack arg | 49 // | ... | 50 // ------------- +==============================+ 51 // | Frame (fixed size) | 52 // ------------- +==============================+ <-------------------- FP 53 // ^ | DebugFrame (optional) | ^ ^ ^^ 54 // localSize | Register arg local | | | || 55 // | | ... | | | framePushed 56 // | | Register stack result ptr?| | | || 57 // | | Non-arg local | | | || 58 // | | ... | | | || 59 // | | (padding) | | | || 60 // | | Instance pointer | | | || 61 // | +------------------------------+ | | || 62 // v | (padding) | | v || 63 // ------------- +==============================+ currentStackHeight || 64 // ^ | Dynamic (variable size) | | || 65 // dynamicSize | ... | | || 66 // v | ... | v || 67 // ------------- | (free space, sometimes) | --------- v| 68 // +==============================+ <----- SP not-during calls 69 // | Arguments (sometimes) | | 70 // | ... | v 71 // +==============================+ <----- SP during calls 72 // 73 // The Frame is addressed off the stack pointer. masm.framePushed() is always 74 // correct, and masm.getStackPointer() + masm.framePushed() always addresses the 75 // Frame, with the DebugFrame optionally below it. 76 // 77 // The Local area (including the DebugFrame and, if needed, the spilled value of 78 // the stack results area pointer) is laid out by BaseLocalIter and is allocated 79 // and deallocated by standard prologue and epilogue functions that manipulate 80 // the stack pointer, but it is accessed via BaseStackFrame. 81 // 82 // The Dynamic area is maintained by and accessed via BaseStackFrame. On some 83 // systems (such as ARM64), the Dynamic memory may be allocated in chunks 84 // because the SP needs a specific alignment, and in this case there will 85 // normally be some free space directly above the SP. The stack height does not 86 // include the free space, it reflects the logically used space only. 87 // 88 // The Dynamic area is where space for stack results is allocated when calling 89 // functions that return results on the stack. If a function has stack results, 90 // a pointer to the low address of the stack result area is passed as an 91 // additional argument, according to the usual ABI. See 92 // ABIResultIter::HasStackResults. 93 // 94 // The Arguments area is allocated and deallocated via BaseStackFrame (see 95 // comments later) but is accessed directly off the stack pointer. 96 97 namespace js { 98 namespace wasm { 99 100 using namespace js::jit; 101 102 // Abstraction of the height of the stack frame, to avoid type confusion. 103 104 class StackHeight { 105 friend class BaseStackFrameAllocator; 106 107 uint32_t height; 108 109 public: 110 explicit StackHeight(uint32_t h) : height(h) {} 111 static StackHeight Invalid() { return StackHeight(UINT32_MAX); } 112 bool isValid() const { return height != UINT32_MAX; } 113 bool operator==(StackHeight rhs) const { 114 MOZ_ASSERT(isValid() && rhs.isValid()); 115 return height == rhs.height; 116 } 117 bool operator!=(StackHeight rhs) const { return !(*this == rhs); } 118 }; 119 120 // Abstraction for where multi-value results go on the machine stack. 121 122 class StackResultsLoc { 123 uint32_t bytes_; 124 size_t count_; 125 mozilla::Maybe<uint32_t> height_; 126 127 public: 128 StackResultsLoc() : bytes_(0), count_(0) {}; 129 StackResultsLoc(uint32_t bytes, size_t count, uint32_t height) 130 : bytes_(bytes), count_(count), height_(mozilla::Some(height)) { 131 MOZ_ASSERT(bytes != 0); 132 MOZ_ASSERT(count != 0); 133 MOZ_ASSERT(height != 0); 134 } 135 136 uint32_t bytes() const { return bytes_; } 137 uint32_t count() const { return count_; } 138 uint32_t height() const { return height_.value(); } 139 140 bool hasStackResults() const { return bytes() != 0; } 141 StackResults stackResults() const { 142 return hasStackResults() ? StackResults::HasStackResults 143 : StackResults::NoStackResults; 144 } 145 }; 146 147 // Abstraction of the baseline compiler's stack frame (except for the Frame / 148 // DebugFrame parts). See comments above for more. Remember, "below" on the 149 // stack means at lower addresses. 150 // 151 // The abstraction is split into two parts: BaseStackFrameAllocator is 152 // responsible for allocating and deallocating space on the stack and for 153 // performing computations that are affected by how the allocation is performed; 154 // BaseStackFrame then provides a pleasant interface for stack frame management. 155 156 class BaseStackFrameAllocator { 157 MacroAssembler& masm; 158 159 #ifdef RABALDR_CHUNKY_STACK 160 // On platforms that require the stack pointer to be aligned on a boundary 161 // greater than the typical stack item (eg, ARM64 requires 16-byte alignment 162 // but items are 8 bytes), allocate stack memory in chunks, and use a 163 // separate stack height variable to track the effective stack pointer 164 // within the allocated area. Effectively, there's a variable amount of 165 // free space directly above the stack pointer. See diagram above. 166 167 // The following must be true in order for the stack height to be 168 // predictable at control flow joins: 169 // 170 // - The Local area is always aligned according to WasmStackAlignment, ie, 171 // masm.framePushed() % WasmStackAlignment is zero after allocating 172 // locals. 173 // 174 // - ChunkSize is always a multiple of WasmStackAlignment. 175 // 176 // - Pushing and popping are always in units of ChunkSize (hence preserving 177 // alignment). 178 // 179 // - The free space on the stack (masm.framePushed() - currentStackHeight_) 180 // is a predictable (nonnegative) amount. 181 182 // As an optimization, we pre-allocate some space on the stack, the size of 183 // this allocation is InitialChunk and it must be a multiple of ChunkSize. 184 // It is allocated as part of the function prologue and deallocated as part 185 // of the epilogue, along with the locals. 186 // 187 // If ChunkSize is too large then we risk overflowing the stack on simple 188 // recursions with few live values where stack overflow should not be a 189 // risk; if it is too small we spend too much time adjusting the stack 190 // pointer. 191 // 192 // Good values for ChunkSize are the subject of future empirical analysis; 193 // eight words is just an educated guess. 194 195 static constexpr uint32_t ChunkSize = 8 * sizeof(void*); 196 static constexpr uint32_t InitialChunk = ChunkSize; 197 198 // The current logical height of the frame is 199 // currentStackHeight_ = localSize_ + dynamicSize 200 // where dynamicSize is not accounted for explicitly and localSize_ also 201 // includes size for the DebugFrame. 202 // 203 // The allocated size of the frame, provided by masm.framePushed(), is usually 204 // larger than currentStackHeight_, notably at the beginning of execution when 205 // we've allocated InitialChunk extra space. 206 207 uint32_t currentStackHeight_; 208 #endif 209 210 // Size of the Local area in bytes (stable after BaseCompiler::init() has 211 // called BaseStackFrame::setupLocals(), which in turn calls 212 // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper 213 // stack alignment. The Local area is then allocated in beginFunction(), 214 // following the allocation of the Header. See onFixedStackAllocated() 215 // below. 216 217 uint32_t localSize_; 218 219 protected: 220 /////////////////////////////////////////////////////////////////////////// 221 // 222 // Initialization 223 224 explicit BaseStackFrameAllocator(MacroAssembler& masm) 225 : masm(masm), 226 #ifdef RABALDR_CHUNKY_STACK 227 currentStackHeight_(0), 228 #endif 229 localSize_(UINT32_MAX) { 230 } 231 232 protected: 233 ////////////////////////////////////////////////////////////////////// 234 // 235 // The Local area - the static part of the frame. 236 237 // Record the size of the Local area, once it is known. 238 239 void setLocalSize(uint32_t localSize) { 240 MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)), 241 "localSize_ should be aligned to at least a pointer"); 242 MOZ_ASSERT(localSize_ == UINT32_MAX); 243 localSize_ = localSize; 244 } 245 246 // Record the current stack height, after it has become stable in 247 // beginFunction(). See also BaseStackFrame::onFixedStackAllocated(). 248 249 void onFixedStackAllocated() { 250 MOZ_ASSERT(localSize_ != UINT32_MAX); 251 #ifdef RABALDR_CHUNKY_STACK 252 currentStackHeight_ = localSize_; 253 #endif 254 } 255 256 public: 257 // The fixed amount of memory, in bytes, allocated on the stack below the 258 // Header for purposes such as locals and other fixed values. Includes all 259 // necessary alignment, and on ARM64 also the initial chunk for the working 260 // stack memory. 261 262 uint32_t fixedAllocSize() const { 263 MOZ_ASSERT(localSize_ != UINT32_MAX); 264 #ifdef RABALDR_CHUNKY_STACK 265 return localSize_ + InitialChunk; 266 #else 267 return localSize_; 268 #endif 269 } 270 271 #ifdef RABALDR_CHUNKY_STACK 272 // The allocated frame size is frequently larger than the logical stack 273 // height; we round up to a chunk boundary, and special case the initial 274 // chunk. 275 uint32_t framePushedForHeight(uint32_t logicalHeight) { 276 if (logicalHeight <= fixedAllocSize()) { 277 return fixedAllocSize(); 278 } 279 return fixedAllocSize() + 280 AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize); 281 } 282 #endif 283 284 protected: 285 ////////////////////////////////////////////////////////////////////// 286 // 287 // The Dynamic area - the dynamic part of the frame, for spilling and saving 288 // intermediate values. 289 290 // Offset off of sp_ for the slot at stack area location `offset`. 291 292 int32_t stackOffset(int32_t offset) { 293 MOZ_ASSERT(offset > 0); 294 return masm.framePushed() - offset; 295 } 296 297 uint32_t computeHeightWithStackResults(StackHeight stackBase, 298 uint32_t stackResultBytes) { 299 MOZ_ASSERT(stackResultBytes); 300 MOZ_ASSERT(currentStackHeight() >= stackBase.height); 301 return stackBase.height + stackResultBytes; 302 } 303 304 #ifdef RABALDR_CHUNKY_STACK 305 void pushChunkyBytes(uint32_t bytes) { 306 checkChunkyInvariants(); 307 uint32_t freeSpace = masm.framePushed() - currentStackHeight_; 308 if (freeSpace < bytes) { 309 uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize); 310 MOZ_ASSERT(bytesToReserve + freeSpace >= bytes); 311 masm.reserveStack(bytesToReserve); 312 } 313 currentStackHeight_ += bytes; 314 checkChunkyInvariants(); 315 } 316 317 void popChunkyBytes(uint32_t bytes) { 318 checkChunkyInvariants(); 319 currentStackHeight_ -= bytes; 320 // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop 321 // values consumed by a call, and we may need to drop several chunks. But 322 // never drop the initial chunk. Crucially, the amount we drop is always an 323 // integral number of chunks. 324 uint32_t freeSpace = masm.framePushed() - currentStackHeight_; 325 if (freeSpace >= ChunkSize) { 326 uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_); 327 uint32_t amountToFree = masm.framePushed() - targetAllocSize; 328 MOZ_ASSERT(amountToFree % ChunkSize == 0); 329 if (amountToFree) { 330 masm.freeStack(amountToFree); 331 } 332 } 333 checkChunkyInvariants(); 334 } 335 #endif 336 337 uint32_t currentStackHeight() const { 338 #ifdef RABALDR_CHUNKY_STACK 339 return currentStackHeight_; 340 #else 341 return masm.framePushed(); 342 #endif 343 } 344 345 private: 346 #ifdef RABALDR_CHUNKY_STACK 347 void checkChunkyInvariants() { 348 MOZ_ASSERT(masm.framePushed() >= fixedAllocSize()); 349 MOZ_ASSERT(masm.framePushed() >= currentStackHeight_); 350 MOZ_ASSERT(masm.framePushed() == fixedAllocSize() || 351 masm.framePushed() - currentStackHeight_ < ChunkSize); 352 MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0); 353 } 354 #endif 355 356 // For a given stack height, return the appropriate size of the allocated 357 // frame. 358 359 uint32_t framePushedForHeight(StackHeight stackHeight) { 360 #ifdef RABALDR_CHUNKY_STACK 361 // A more complicated adjustment is needed. 362 return framePushedForHeight(stackHeight.height); 363 #else 364 // The allocated frame size equals the stack height. 365 return stackHeight.height; 366 #endif 367 } 368 369 public: 370 // The current height of the stack area, not necessarily zero-based, in a 371 // type-safe way. 372 373 StackHeight stackHeight() const { return StackHeight(currentStackHeight()); } 374 375 // Set the frame height to a previously recorded value. 376 377 void setStackHeight(StackHeight amount) { 378 #ifdef RABALDR_CHUNKY_STACK 379 currentStackHeight_ = amount.height; 380 masm.setFramePushed(framePushedForHeight(amount)); 381 checkChunkyInvariants(); 382 #else 383 masm.setFramePushed(amount.height); 384 #endif 385 } 386 387 // The current height of the dynamic part of the stack area (ie, the backing 388 // store for the evaluation stack), zero-based. 389 390 uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; } 391 392 // Before branching to an outer control label, pop the execution stack to 393 // the level expected by that region, but do not update masm.framePushed() 394 // as that will happen as compilation leaves the block. 395 // 396 // Note these operate directly on the stack pointer register. 397 398 void popStackBeforeBranch(StackHeight destStackHeight, 399 uint32_t stackResultBytes) { 400 uint32_t framePushedHere = masm.framePushed(); 401 StackHeight heightThere = 402 StackHeight(destStackHeight.height + stackResultBytes); 403 uint32_t framePushedThere = framePushedForHeight(heightThere); 404 if (framePushedHere > framePushedThere) { 405 masm.addToStackPtr(Imm32(framePushedHere - framePushedThere)); 406 } 407 } 408 409 void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) { 410 popStackBeforeBranch(destStackHeight, 411 ABIResultIter::MeasureStackBytes(type)); 412 } 413 414 // Given that there are |stackParamSize| bytes on the dynamic stack 415 // corresponding to the stack results, return the stack height once these 416 // parameters are popped. 417 418 StackHeight stackResultsBase(uint32_t stackParamSize) { 419 return StackHeight(currentStackHeight() - stackParamSize); 420 } 421 422 // For most of WebAssembly, adjacent instructions have fallthrough control 423 // flow between them, which allows us to simply thread the current stack 424 // height through the compiler. There are two exceptions to this rule: when 425 // leaving a block via dead code, and when entering the "else" arm of an "if". 426 // In these cases, the stack height is the block entry height, plus any stack 427 // values (results in the block exit case, parameters in the else entry case). 428 429 void resetStackHeight(StackHeight destStackHeight, ResultType type) { 430 uint32_t height = destStackHeight.height; 431 height += ABIResultIter::MeasureStackBytes(type); 432 setStackHeight(StackHeight(height)); 433 } 434 435 // Return offset of stack result. 436 437 uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase, 438 uint32_t stackResultBytes) { 439 MOZ_ASSERT(result.onStack()); 440 MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes); 441 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); 442 return end - result.stackOffset(); 443 } 444 445 public: 446 ////////////////////////////////////////////////////////////////////// 447 // 448 // The Argument area - for outgoing calls. 449 // 450 // We abstract these operations as an optimization: we can merge the freeing 451 // of the argument area and dropping values off the stack after a call. But 452 // they always amount to manipulating the real stack pointer by some amount. 453 // 454 // Note that we do not update currentStackHeight_ for this; the frame does 455 // not know about outgoing arguments. But we do update framePushed(), so we 456 // can still index into the frame below the outgoing arguments area. 457 458 // This is always equivalent to a masm.reserveStack() call. 459 460 void allocArgArea(size_t argSize) { 461 if (argSize) { 462 masm.reserveStack(argSize); 463 } 464 } 465 466 // This frees the argument area allocated by allocArgArea(), and `argSize` 467 // must be equal to the `argSize` argument to allocArgArea(). In addition 468 // we drop some values from the frame, corresponding to the values that were 469 // consumed by the call. 470 471 void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) { 472 // The method is called to re-initialize SP after the call. Note that 473 // this operation shall not be optimized for argSize + dropSize == 0. 474 #ifdef RABALDR_CHUNKY_STACK 475 // Freeing the outgoing arguments and freeing the consumed values have 476 // different semantics here, which is why the operation is split. 477 masm.freeStackTo(masm.framePushed() - argSize); 478 popChunkyBytes(dropSize); 479 #else 480 masm.freeStackTo(masm.framePushed() - (argSize + dropSize)); 481 #endif 482 } 483 }; 484 485 class BaseStackFrame final : public BaseStackFrameAllocator { 486 MacroAssembler& masm; 487 488 // The largest observed value of masm.framePushed(), ie, the size of the 489 // stack frame. Read this for its true value only when code generation is 490 // finished. 491 uint32_t maxFramePushed_; 492 493 // Patch point where we check for stack overflow. 494 CodeOffset stackAddOffset_; 495 496 // Low byte offset of pointer to stack results, if any. 497 mozilla::Maybe<int32_t> stackResultsPtrOffset_; 498 499 // The offset of instance pointer. 500 uint32_t instancePointerOffset_; 501 502 // Low byte offset of local area for true locals (not parameters). 503 uint32_t varLow_; 504 505 // High byte offset + 1 of local area for true locals. 506 uint32_t varHigh_; 507 508 // The stack pointer, cached for brevity. 509 RegisterOrSP sp_; 510 511 public: 512 explicit BaseStackFrame(MacroAssembler& masm) 513 : BaseStackFrameAllocator(masm), 514 masm(masm), 515 maxFramePushed_(0), 516 stackAddOffset_(0), 517 instancePointerOffset_(UINT32_MAX), 518 varLow_(UINT32_MAX), 519 varHigh_(UINT32_MAX), 520 sp_(masm.getStackPointer()) {} 521 522 /////////////////////////////////////////////////////////////////////////// 523 // 524 // Stack management and overflow checking 525 526 // This must be called once beginFunction has allocated space for the Header 527 // (the Frame and DebugFrame) and the Local area, and will record the current 528 // frame size for internal use by the stack abstractions. 529 530 void onFixedStackAllocated() { 531 maxFramePushed_ = masm.framePushed(); 532 BaseStackFrameAllocator::onFixedStackAllocated(); 533 } 534 535 // We won't know until after we've generated code how big the frame will be 536 // (we may need arbitrary spill slots and outgoing param slots) so emit a 537 // patchable add that is patched in endFunction(). 538 // 539 // Note the platform scratch register may be used by branchPtr(), so 540 // generally tmp must be something else. 541 542 void checkStack(Register tmp1, Register tmp2, Label* stackOverflowTrap) { 543 masm.loadPtr(Address(InstanceReg, wasm::Instance::offsetOfCx()), tmp2); 544 stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp1); 545 masm.branchPtr(Assembler::AboveOrEqual, 546 Address(tmp2, JSContext::offsetOfWasm() + 547 wasm::Context::offsetOfStackLimit()), 548 tmp1, stackOverflowTrap); 549 } 550 551 void patchCheckStack() { 552 masm.patchSub32FromStackPtr(stackAddOffset_, 553 Imm32(int32_t(maxFramePushed_))); 554 } 555 556 // Very large frames are implausible, probably an attack. 557 558 bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; } 559 560 /////////////////////////////////////////////////////////////////////////// 561 // 562 // Local area 563 564 struct Local { 565 // Type of the value. 566 const MIRType type; 567 568 // Byte offset from Frame "into" the locals, ie positive for true locals 569 // and negative for incoming args that read directly from the arg area. 570 // It assumes the stack is growing down and that locals are on the stack 571 // at lower addresses than Frame, and is the offset from Frame of the 572 // lowest-addressed byte of the local. 573 const int32_t offs; 574 575 Local(MIRType type, int32_t offs) : type(type), offs(offs) {} 576 577 bool isStackArgument() const { return offs < 0; } 578 }; 579 580 // Profiling shows that the number of parameters and locals frequently 581 // touches or exceeds 8. So 16 seems like a reasonable starting point. 582 using LocalVector = Vector<Local, 16, SystemAllocPolicy>; 583 584 // Initialize `localInfo` based on the types of `locals` and `args`. 585 [[nodiscard]] bool setupLocals(const ValTypeVector& locals, 586 const ArgTypeVector& args, bool debugEnabled, 587 LocalVector* localInfo) { 588 if (!localInfo->reserve(locals.length())) { 589 return false; 590 } 591 592 mozilla::DebugOnly<uint32_t> index = 0; 593 BaseLocalIter i(locals, args, debugEnabled); 594 for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) { 595 MOZ_ASSERT(i.isArg()); 596 MOZ_ASSERT(i.index() == index); 597 localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); 598 index++; 599 } 600 601 varLow_ = i.frameSize(); 602 for (; !i.done(); i++) { 603 MOZ_ASSERT(!i.isArg()); 604 MOZ_ASSERT(i.index() == index); 605 localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset()); 606 index++; 607 } 608 varHigh_ = i.frameSize(); 609 610 // Reserve an additional stack slot for the instance pointer. 611 const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*)); 612 const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*); 613 instancePointerOffset_ = localSize; 614 615 setLocalSize(AlignBytes(localSize, WasmStackAlignment)); 616 617 if (args.hasSyntheticStackResultPointerArg()) { 618 stackResultsPtrOffset_ = mozilla::Some(i.stackResultPointerOffset()); 619 } 620 621 return true; 622 } 623 624 void zeroLocals(BaseRegAlloc* ra); 625 626 Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) { 627 if (local.isStackArgument()) { 628 return Address(FramePointer, 629 stackArgumentOffsetFromFp(local) + additionalOffset); 630 } 631 return Address(sp_, localOffsetFromSp(local) + additionalOffset); 632 } 633 634 void loadLocalI32(const Local& src, RegI32 dest) { 635 masm.load32(addressOfLocal(src), dest); 636 } 637 638 #ifndef JS_PUNBOX64 639 void loadLocalI64Low(const Local& src, RegI32 dest) { 640 masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest); 641 } 642 643 void loadLocalI64High(const Local& src, RegI32 dest) { 644 masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest); 645 } 646 #endif 647 648 void loadLocalI64(const Local& src, RegI64 dest) { 649 masm.load64(addressOfLocal(src), dest); 650 } 651 652 void loadLocalRef(const Local& src, RegRef dest) { 653 masm.loadPtr(addressOfLocal(src), dest); 654 } 655 656 void loadLocalF64(const Local& src, RegF64 dest) { 657 masm.loadDouble(addressOfLocal(src), dest); 658 } 659 660 void loadLocalF32(const Local& src, RegF32 dest) { 661 masm.loadFloat32(addressOfLocal(src), dest); 662 } 663 664 #ifdef ENABLE_WASM_SIMD 665 void loadLocalV128(const Local& src, RegV128 dest) { 666 masm.loadUnalignedSimd128(addressOfLocal(src), dest); 667 } 668 #endif 669 670 void storeLocalI32(RegI32 src, const Local& dest) { 671 masm.store32(src, addressOfLocal(dest)); 672 } 673 674 void storeLocalI64(RegI64 src, const Local& dest) { 675 masm.store64(src, addressOfLocal(dest)); 676 } 677 678 void storeLocalRef(RegRef src, const Local& dest) { 679 masm.storePtr(src, addressOfLocal(dest)); 680 } 681 682 void storeLocalF64(RegF64 src, const Local& dest) { 683 masm.storeDouble(src, addressOfLocal(dest)); 684 } 685 686 void storeLocalF32(RegF32 src, const Local& dest) { 687 masm.storeFloat32(src, addressOfLocal(dest)); 688 } 689 690 #ifdef ENABLE_WASM_SIMD 691 void storeLocalV128(RegV128 src, const Local& dest) { 692 masm.storeUnalignedSimd128(src, addressOfLocal(dest)); 693 } 694 #endif 695 696 // Offset off of sp_ for `local`. 697 int32_t localOffsetFromSp(const Local& local) { 698 MOZ_ASSERT(!local.isStackArgument()); 699 return localOffset(local.offs); 700 } 701 702 // Offset off of frame pointer for `stack argument`. 703 int32_t stackArgumentOffsetFromFp(const Local& local) { 704 MOZ_ASSERT(local.isStackArgument()); 705 return -local.offs; 706 } 707 708 // The incoming stack result area pointer is for stack results of the function 709 // being compiled. 710 void loadIncomingStackResultAreaPtr(RegPtr reg) { 711 const int32_t offset = stackResultsPtrOffset_.value(); 712 Address src = offset < 0 ? Address(FramePointer, -offset) 713 : Address(sp_, stackOffset(offset)); 714 masm.loadPtr(src, reg); 715 } 716 717 void storeIncomingStackResultAreaPtr(RegPtr reg) { 718 // If we get here, that means the pointer to the stack results area was 719 // passed in as a register, and therefore it will be spilled below the 720 // frame, so the offset is a positive height. 721 MOZ_ASSERT(stackResultsPtrOffset_.value() > 0); 722 masm.storePtr(reg, 723 Address(sp_, stackOffset(stackResultsPtrOffset_.value()))); 724 } 725 726 void loadInstancePtr(Register dst) { 727 // Sometimes loadInstancePtr is used in context when SP is not sync is FP, 728 // e.g. just after tail calls returns. 729 masm.loadPtr(Address(FramePointer, -instancePointerOffset_), dst); 730 } 731 732 void storeInstancePtr(Register instance) { 733 masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_))); 734 } 735 736 int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); } 737 738 // An outgoing stack result area pointer is for stack results of callees of 739 // the function being compiled. 740 void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results, 741 RegPtr dest) { 742 MOZ_ASSERT(results.height() <= masm.framePushed()); 743 uint32_t offsetFromSP = masm.framePushed() - results.height(); 744 masm.moveStackPtrTo(dest); 745 if (offsetFromSP) { 746 masm.addPtr(Imm32(offsetFromSP), dest); 747 } 748 } 749 750 private: 751 // Offset off of sp_ for a local with offset `offset` from Frame. 752 int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; } 753 754 public: 755 /////////////////////////////////////////////////////////////////////////// 756 // 757 // Dynamic area 758 759 static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr; 760 static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64; 761 static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat; 762 static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble; 763 #ifdef ENABLE_WASM_SIMD 764 static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128; 765 #endif 766 767 // Pushes the register `r` to the stack. This pushes the full 64-bit width on 768 // 64-bit systems, and 32-bits otherwise. 769 uint32_t pushGPR(Register r) { 770 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 771 #ifdef RABALDR_CHUNKY_STACK 772 pushChunkyBytes(StackSizeOfPtr); 773 masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight()))); 774 #else 775 masm.Push(r); 776 #endif 777 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 778 MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight()); 779 return currentStackHeight(); 780 } 781 782 uint32_t pushFloat32(FloatRegister r) { 783 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 784 #ifdef RABALDR_CHUNKY_STACK 785 pushChunkyBytes(StackSizeOfFloat); 786 masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight()))); 787 #else 788 masm.Push(r); 789 #endif 790 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 791 MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight()); 792 return currentStackHeight(); 793 } 794 795 #ifdef ENABLE_WASM_SIMD 796 uint32_t pushV128(RegV128 r) { 797 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 798 # ifdef RABALDR_CHUNKY_STACK 799 pushChunkyBytes(StackSizeOfV128); 800 # else 801 masm.adjustStack(-(int)StackSizeOfV128); 802 # endif 803 masm.storeUnalignedSimd128(r, 804 Address(sp_, stackOffset(currentStackHeight()))); 805 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 806 MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight()); 807 return currentStackHeight(); 808 } 809 #endif 810 811 uint32_t pushDouble(FloatRegister r) { 812 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 813 #ifdef RABALDR_CHUNKY_STACK 814 pushChunkyBytes(StackSizeOfDouble); 815 masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight()))); 816 #else 817 masm.Push(r); 818 #endif 819 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 820 MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight()); 821 return currentStackHeight(); 822 } 823 824 // Pops the stack into the register `r`. This pops the full 64-bit width on 825 // 64-bit systems, and 32-bits otherwise. 826 void popGPR(Register r) { 827 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 828 #ifdef RABALDR_CHUNKY_STACK 829 masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r); 830 popChunkyBytes(StackSizeOfPtr); 831 #else 832 masm.Pop(r); 833 #endif 834 MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight()); 835 } 836 837 void popFloat32(FloatRegister r) { 838 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 839 #ifdef RABALDR_CHUNKY_STACK 840 masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r); 841 popChunkyBytes(StackSizeOfFloat); 842 #else 843 masm.Pop(r); 844 #endif 845 MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight()); 846 } 847 848 void popDouble(FloatRegister r) { 849 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 850 #ifdef RABALDR_CHUNKY_STACK 851 masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r); 852 popChunkyBytes(StackSizeOfDouble); 853 #else 854 masm.Pop(r); 855 #endif 856 MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight()); 857 } 858 859 #ifdef ENABLE_WASM_SIMD 860 void popV128(RegV128 r) { 861 mozilla::DebugOnly<uint32_t> stackBefore = currentStackHeight(); 862 masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())), 863 r); 864 # ifdef RABALDR_CHUNKY_STACK 865 popChunkyBytes(StackSizeOfV128); 866 # else 867 masm.adjustStack((int)StackSizeOfV128); 868 # endif 869 MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight()); 870 } 871 #endif 872 873 void popBytes(size_t bytes) { 874 if (bytes > 0) { 875 #ifdef RABALDR_CHUNKY_STACK 876 popChunkyBytes(bytes); 877 #else 878 masm.freeStack(bytes); 879 #endif 880 } 881 } 882 883 void loadStackI32(int32_t offset, RegI32 dest) { 884 masm.load32(Address(sp_, stackOffset(offset)), dest); 885 } 886 887 void loadStackI64(int32_t offset, RegI64 dest) { 888 masm.load64(Address(sp_, stackOffset(offset)), dest); 889 } 890 891 #ifndef JS_PUNBOX64 892 void loadStackI64Low(int32_t offset, RegI32 dest) { 893 masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest); 894 } 895 896 void loadStackI64High(int32_t offset, RegI32 dest) { 897 masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest); 898 } 899 #endif 900 901 void loadStackRef(int32_t offset, RegRef dest) { 902 masm.loadPtr(Address(sp_, stackOffset(offset)), dest); 903 } 904 905 void loadStackF64(int32_t offset, RegF64 dest) { 906 masm.loadDouble(Address(sp_, stackOffset(offset)), dest); 907 } 908 909 void loadStackF32(int32_t offset, RegF32 dest) { 910 masm.loadFloat32(Address(sp_, stackOffset(offset)), dest); 911 } 912 913 #ifdef ENABLE_WASM_SIMD 914 void loadStackV128(int32_t offset, RegV128 dest) { 915 masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest); 916 } 917 #endif 918 919 uint32_t prepareStackResultArea(StackHeight stackBase, 920 uint32_t stackResultBytes) { 921 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); 922 if (currentStackHeight() < end) { 923 uint32_t bytes = end - currentStackHeight(); 924 #ifdef RABALDR_CHUNKY_STACK 925 pushChunkyBytes(bytes); 926 #else 927 masm.reserveStack(bytes); 928 #endif 929 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 930 } 931 return end; 932 } 933 934 void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) { 935 uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); 936 MOZ_ASSERT(currentStackHeight() >= end); 937 popBytes(currentStackHeight() - end); 938 } 939 940 // |srcHeight| and |destHeight| are stack heights *including* |bytes|. 941 void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight, 942 uint32_t bytes, Register temp) { 943 MOZ_ASSERT(destHeight < srcHeight); 944 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); 945 // The shuffleStackResultsTowardFP is used when SP/framePushed is not 946 // tracked by the compiler, e.g. after possible return call -- use 947 // FramePointer instead of sp_. 948 int32_t destOffset = int32_t(-destHeight + bytes); 949 int32_t srcOffset = int32_t(-srcHeight + bytes); 950 while (bytes >= sizeof(intptr_t)) { 951 destOffset -= sizeof(intptr_t); 952 srcOffset -= sizeof(intptr_t); 953 bytes -= sizeof(intptr_t); 954 masm.loadPtr(Address(FramePointer, srcOffset), temp); 955 masm.storePtr(temp, Address(FramePointer, destOffset)); 956 } 957 if (bytes) { 958 MOZ_ASSERT(bytes == sizeof(uint32_t)); 959 destOffset -= sizeof(uint32_t); 960 srcOffset -= sizeof(uint32_t); 961 masm.load32(Address(FramePointer, srcOffset), temp); 962 masm.store32(temp, Address(FramePointer, destOffset)); 963 } 964 } 965 966 // Unlike the overload that operates on raw heights, |srcHeight| and 967 // |destHeight| are stack heights *not including* |bytes|. 968 void shuffleStackResultsTowardFP(StackHeight srcHeight, 969 StackHeight destHeight, uint32_t bytes, 970 Register temp) { 971 MOZ_ASSERT(srcHeight.isValid()); 972 MOZ_ASSERT(destHeight.isValid()); 973 uint32_t src = computeHeightWithStackResults(srcHeight, bytes); 974 uint32_t dest = computeHeightWithStackResults(destHeight, bytes); 975 MOZ_ASSERT(src <= currentStackHeight()); 976 MOZ_ASSERT(dest <= currentStackHeight()); 977 shuffleStackResultsTowardFP(src, dest, bytes, temp); 978 } 979 980 // |srcHeight| and |destHeight| are stack heights *including* |bytes|. 981 void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight, 982 uint32_t bytes, Register temp) { 983 MOZ_ASSERT(destHeight > srcHeight); 984 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); 985 uint32_t destOffset = stackOffset(destHeight); 986 uint32_t srcOffset = stackOffset(srcHeight); 987 while (bytes >= sizeof(intptr_t)) { 988 masm.loadPtr(Address(sp_, srcOffset), temp); 989 masm.storePtr(temp, Address(sp_, destOffset)); 990 destOffset += sizeof(intptr_t); 991 srcOffset += sizeof(intptr_t); 992 bytes -= sizeof(intptr_t); 993 } 994 if (bytes) { 995 MOZ_ASSERT(bytes == sizeof(uint32_t)); 996 masm.load32(Address(sp_, srcOffset), temp); 997 masm.store32(temp, Address(sp_, destOffset)); 998 } 999 } 1000 1001 // Copy results from the top of the current stack frame to an area of memory, 1002 // and pop the stack accordingly. `dest` is the address of the low byte of 1003 // that memory. 1004 void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) { 1005 MOZ_ASSERT(bytes <= currentStackHeight()); 1006 MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); 1007 uint32_t bytesToPop = bytes; 1008 uint32_t srcOffset = stackOffset(currentStackHeight()); 1009 uint32_t destOffset = 0; 1010 while (bytes >= sizeof(intptr_t)) { 1011 masm.loadPtr(Address(sp_, srcOffset), temp); 1012 masm.storePtr(temp, Address(dest, destOffset)); 1013 destOffset += sizeof(intptr_t); 1014 srcOffset += sizeof(intptr_t); 1015 bytes -= sizeof(intptr_t); 1016 } 1017 if (bytes) { 1018 MOZ_ASSERT(bytes == sizeof(uint32_t)); 1019 masm.load32(Address(sp_, srcOffset), temp); 1020 masm.store32(temp, Address(dest, destOffset)); 1021 } 1022 popBytes(bytesToPop); 1023 } 1024 1025 void allocArgArea(size_t argSize) { 1026 if (argSize) { 1027 BaseStackFrameAllocator::allocArgArea(argSize); 1028 maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed()); 1029 } 1030 } 1031 1032 private: 1033 void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) { 1034 masm.move32(Imm32(imm), temp); 1035 masm.store32(temp, Address(sp_, stackOffset(destHeight))); 1036 } 1037 1038 void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) { 1039 #ifdef JS_PUNBOX64 1040 masm.move64(Imm64(imm), Register64(temp)); 1041 masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight))); 1042 #else 1043 union { 1044 int64_t i64; 1045 int32_t i32[2]; 1046 } bits = {.i64 = imm}; 1047 static_assert(sizeof(bits) == 8); 1048 store32BitsToStack(bits.i32[0], destHeight, temp); 1049 store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp); 1050 #endif 1051 } 1052 1053 public: 1054 void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight, 1055 Register temp) { 1056 #ifdef JS_PUNBOX64 1057 static_assert(StackSizeOfPtr == 8); 1058 store64BitsToStack(imm, destHeight, temp); 1059 #else 1060 static_assert(StackSizeOfPtr == 4); 1061 store32BitsToStack(int32_t(imm), destHeight, temp); 1062 #endif 1063 } 1064 1065 void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight, 1066 Register temp) { 1067 store64BitsToStack(imm, destHeight, temp); 1068 } 1069 1070 void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) { 1071 union { 1072 int32_t i32; 1073 float f32; 1074 } bits = {.f32 = imm}; 1075 static_assert(sizeof(bits) == 4); 1076 // Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do 1077 // so, but it costs little to store something predictable. 1078 if (StackSizeOfFloat == 4) { 1079 store32BitsToStack(bits.i32, destHeight, temp); 1080 } else { 1081 store64BitsToStack(uint32_t(bits.i32), destHeight, temp); 1082 } 1083 } 1084 1085 void storeImmediateF64ToStack(double imm, uint32_t destHeight, 1086 Register temp) { 1087 union { 1088 int64_t i64; 1089 double f64; 1090 } bits = {.f64 = imm}; 1091 static_assert(sizeof(bits) == 8); 1092 store64BitsToStack(bits.i64, destHeight, temp); 1093 } 1094 1095 #ifdef ENABLE_WASM_SIMD 1096 void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) { 1097 union { 1098 int32_t i32[4]; 1099 uint8_t bytes[16]; 1100 } bits{}; 1101 static_assert(sizeof(bits) == 16); 1102 memcpy(bits.bytes, imm.bytes, 16); 1103 for (unsigned i = 0; i < 4; i++) { 1104 store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp); 1105 } 1106 } 1107 #endif 1108 }; 1109 1110 ////////////////////////////////////////////////////////////////////////////// 1111 // 1112 // MachineStackTracker, used for stack-slot pointerness tracking. 1113 1114 // An expensive operation in stack-map creation is copying of the 1115 // MachineStackTracker (MST) into the final StackMap. This is done in 1116 // StackMapGenerator::createStackMap. Given that this is basically a 1117 // bit-array copy, it is reasonable to ask whether the two classes could have 1118 // a more similar representation, so that the copy could then be done with 1119 // `memcpy`. 1120 // 1121 // Although in principle feasible, the follow complications exist, and so for 1122 // the moment, this has not been done. 1123 // 1124 // * StackMap is optimised for compact size (storage) since there will be 1125 // many, so it uses a true bitmap. MST is intended to be fast and simple, 1126 // and only one exists at once (per compilation thread). Doing this would 1127 // require MST to use a true bitmap, and hence .. 1128 // 1129 // * .. the copying can't be a straight memcpy, since StackMap has entries for 1130 // words not covered by MST. Hence the copy would need to shift bits in 1131 // each byte left or right (statistically speaking, in 7 cases out of 8) in 1132 // order to ensure no "holes" in the resulting bitmap. 1133 // 1134 // * Furthermore the copying would need to logically invert the direction of 1135 // the stacks. For MST, index zero in the vector corresponds to the highest 1136 // address in the stack. For StackMap, bit index zero corresponds to the 1137 // lowest address in the stack. 1138 // 1139 // * Finally, StackMap is a variable-length structure whose size must be known 1140 // at creation time. The size of an MST by contrast isn't known at creation 1141 // time -- it grows as the baseline compiler pushes stuff on its value 1142 // stack. That's why it has to have vector entry 0 being the highest address. 1143 // 1144 // * Although not directly relevant, StackMaps are also created by the via-Ion 1145 // compilation routes, by translation from the pre-existing "JS-era" 1146 // LSafePoints (CreateStackMapFromLSafepoint). So if we want to mash 1147 // StackMap around to suit baseline better, we also need to ensure it 1148 // doesn't break Ion somehow. 1149 1150 class MachineStackTracker { 1151 // Simulates the machine's stack, with one bool per word. The booleans are 1152 // represented as `uint8_t`s so as to guarantee the element size is one 1153 // byte. Index zero in this vector corresponds to the highest address in 1154 // the machine's stack. The last entry corresponds to what SP currently 1155 // points at. This all assumes a grow-down stack. 1156 // 1157 // numPtrs_ contains the number of "true" values in vec_, and is therefore 1158 // redundant. But it serves as a constant-time way to detect the common 1159 // case where vec_ holds no "true" values. 1160 size_t numPtrs_; 1161 Vector<uint8_t, 64, SystemAllocPolicy> vec_; 1162 1163 public: 1164 MachineStackTracker() : numPtrs_(0) {} 1165 1166 ~MachineStackTracker() { 1167 #ifdef DEBUG 1168 size_t n = 0; 1169 for (uint8_t b : vec_) { 1170 n += (b ? 1 : 0); 1171 } 1172 MOZ_ASSERT(n == numPtrs_); 1173 #endif 1174 } 1175 1176 // Clone this MachineStackTracker, writing the result at |dst|. 1177 [[nodiscard]] bool cloneTo(MachineStackTracker* dst); 1178 1179 // Notionally push |n| non-pointers on the stack. 1180 [[nodiscard]] bool pushNonGCPointers(size_t n) { 1181 return vec_.appendN(uint8_t(false), n); 1182 } 1183 1184 // Mark the stack slot |offsetFromSP| up from the bottom as holding a 1185 // pointer. 1186 void setGCPointer(size_t offsetFromSP) { 1187 // offsetFromSP == 0 denotes the most recently pushed item, == 1 the 1188 // second most recently pushed item, etc. 1189 MOZ_ASSERT(offsetFromSP < vec_.length()); 1190 1191 size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; 1192 numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0); 1193 vec_[offsetFromTop] = uint8_t(true); 1194 } 1195 1196 // Query the pointerness of the slot |offsetFromSP| up from the bottom. 1197 bool isGCPointer(size_t offsetFromSP) const { 1198 MOZ_ASSERT(offsetFromSP < vec_.length()); 1199 1200 size_t offsetFromTop = vec_.length() - 1 - offsetFromSP; 1201 return bool(vec_[offsetFromTop]); 1202 } 1203 1204 // Return the number of words tracked by this MachineStackTracker. 1205 size_t length() const { return vec_.length(); } 1206 1207 // Return the number of pointer-typed words tracked by this 1208 // MachineStackTracker. 1209 size_t numPtrs() const { 1210 MOZ_ASSERT(numPtrs_ <= length()); 1211 return numPtrs_; 1212 } 1213 1214 // Discard all contents, but (per mozilla::Vector::clear semantics) don't 1215 // free or reallocate any dynamic storage associated with |vec_|. 1216 void clear() { 1217 vec_.clear(); 1218 numPtrs_ = 0; 1219 } 1220 1221 // An iterator that produces indices of reftyped slots, starting at the 1222 // logical bottom of the (grow-down) stack. Indices have the same meaning 1223 // as the arguments to `isGCPointer`. That is, if this iterator produces a 1224 // value `i`, then it means that `isGCPointer(i) == true`; if the value `i` 1225 // is never produced then `isGCPointer(i) == false`. The values are 1226 // produced in ascending order. 1227 // 1228 // Because most slots are non-reftyped, some effort has been put into 1229 // skipping over large groups of non-reftyped slots quickly. 1230 class Iter { 1231 // Both `bufU8_` and `bufU32_` are made to point to `vec_`s array of 1232 // `uint8_t`s, so we can scan (backwards) through it either in bytes or 1233 // 32-bit words. Recall that the last element in `vec_` pertains to the 1234 // lowest-addressed word in the machine's grow-down stack, and we want to 1235 // iterate logically "up" this stack, so we need to iterate backwards 1236 // through `vec_`. 1237 // 1238 // This dual-pointer scheme assumes that the `vec_`s content array is at 1239 // least 32-bit aligned. 1240 const uint8_t* bufU8_; 1241 const uint32_t* bufU32_; 1242 // The number of elements in `bufU8_`. 1243 const size_t nElems_; 1244 // The index in `bufU8_` where the next search should start. 1245 size_t next_; 1246 1247 public: 1248 explicit Iter(const MachineStackTracker& mst) 1249 : bufU8_((uint8_t*)mst.vec_.begin()), 1250 bufU32_((uint32_t*)mst.vec_.begin()), 1251 nElems_(mst.vec_.length()), 1252 next_(mst.vec_.length() - 1) { 1253 MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); 1254 // Check minimum alignment constraint on the array. 1255 MOZ_ASSERT(0 == (uintptr_t(bufU8_) & 3)); 1256 } 1257 1258 ~Iter() { MOZ_ASSERT(uintptr_t(bufU8_) == uintptr_t(bufU32_)); } 1259 1260 // It is important, for termination of the search loop in `next()`, that 1261 // this has the value obtained by subtracting 1 from size_t(0). 1262 static constexpr size_t FINISHED = ~size_t(0); 1263 static_assert(FINISHED == size_t(0) - 1); 1264 1265 // Returns the next index `i` for which `isGCPointer(i) == true`. 1266 size_t get() { 1267 while (next_ != FINISHED) { 1268 if (bufU8_[next_]) { 1269 next_--; 1270 return nElems_ - 1 - (next_ + 1); 1271 } 1272 // Invariant: next_ != FINISHED (so it's still a valid index) 1273 // and: bufU8_[next_] == 0 1274 // (so we need to move backwards by at least 1) 1275 // 1276 // BEGIN optimization -- this could be removed without affecting 1277 // correctness. 1278 if ((next_ & 7) == 0) { 1279 // We're at the "bottom" of the current dual-4-element word. Check 1280 // if we can jump backwards by 8. This saves a conditional branch 1281 // and a few cycles by ORing two adjacent 32-bit words together, 1282 // whilst not requiring 64-bit alignment of `bufU32_`. 1283 while (next_ >= 8 && 1284 (bufU32_[(next_ - 4) >> 2] | bufU32_[(next_ - 8) >> 2]) == 0) { 1285 next_ -= 8; 1286 } 1287 } 1288 // END optimization 1289 next_--; 1290 } 1291 return FINISHED; 1292 } 1293 }; 1294 }; 1295 1296 ////////////////////////////////////////////////////////////////////////////// 1297 // 1298 // StackMapGenerator, which carries all state needed to create stackmaps. 1299 1300 enum class HasDebugFrameWithLiveRefs { No, Maybe }; 1301 1302 struct StackMapGenerator { 1303 private: 1304 // --- These are constant for the life of the function's compilation --- 1305 1306 // For generating stackmaps, we'll need to know the offsets of registers 1307 // as saved by the trap exit stub. 1308 const RegisterOffsets& trapExitLayout_; 1309 const size_t trapExitLayoutNumWords_; 1310 1311 // Completed stackmaps are added here 1312 StackMaps* stackMaps_; 1313 1314 // So as to be able to get current offset when creating stackmaps 1315 const MacroAssembler& masm_; 1316 1317 public: 1318 // --- These are constant once we've completed beginFunction() --- 1319 1320 // The number of bytes of arguments passed to this function in memory. 1321 size_t numStackArgBytes; 1322 1323 MachineStackTracker machineStackTracker; // tracks machine stack pointerness 1324 1325 // This holds masm.framePushed at entry to the function's body. It is a 1326 // Maybe because createStackMap needs to know whether or not we're still 1327 // in the prologue. It makes a Nothing-to-Some transition just once per 1328 // function. 1329 mozilla::Maybe<uint32_t> framePushedAtEntryToBody; 1330 1331 // --- These can change at any point --- 1332 1333 // This holds masm.framePushed at it would be be for a function call 1334 // instruction, but excluding the stack area used to pass arguments in 1335 // memory. That is, for an upcoming function call, this will hold 1336 // 1337 // masm.framePushed() at the call instruction - 1338 // StackArgAreaSizeAligned(argumentTypes) 1339 // 1340 // This value denotes the lowest-addressed stack word covered by the current 1341 // function's stackmap. Words below this point form the highest-addressed 1342 // area of the callee's stackmap. Note that all alignment padding above the 1343 // arguments-in-memory themselves belongs to the callee's stackmap, as return 1344 // calls will replace the function arguments with a new set of arguments which 1345 // may have different alignment. 1346 // 1347 // When not inside a function call setup/teardown sequence, it is Nothing. 1348 // It can make Nothing-to/from-Some transitions arbitrarily as we progress 1349 // through the function body. 1350 mozilla::Maybe<uint32_t> framePushedExcludingOutboundCallArgs; 1351 1352 // The number of memory-resident, ref-typed entries on the containing 1353 // BaseCompiler::stk_. 1354 size_t memRefsOnStk; 1355 1356 // This is a copy of machineStackTracker that is used only within individual 1357 // calls to createStackMap. It is here only to avoid possible heap allocation 1358 // costs resulting from making it local to createStackMap(). 1359 MachineStackTracker augmentedMst; 1360 1361 StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout, 1362 const size_t trapExitLayoutNumWords, 1363 const MacroAssembler& masm) 1364 : trapExitLayout_(trapExitLayout), 1365 trapExitLayoutNumWords_(trapExitLayoutNumWords), 1366 stackMaps_(stackMaps), 1367 masm_(masm), 1368 numStackArgBytes(0), 1369 memRefsOnStk(0) {} 1370 1371 // At the beginning of a function, we may have live roots in registers (as 1372 // arguments) at the point where we perform a stack overflow check. This 1373 // method generates the "extra" stackmap entries to describe that, in the 1374 // case that the check fails and we wind up calling into the wasm exit 1375 // stub, as generated by GenerateTrapExit(). 1376 // 1377 // The resulting map must correspond precisely with the stack layout 1378 // created for the integer registers as saved by (code generated by) 1379 // GenerateTrapExit(). To do that we use trapExitLayout_ and 1380 // trapExitLayoutNumWords_, which together comprise a description of the 1381 // layout and are created by GenerateTrapExitRegisterOffsets(). 1382 [[nodiscard]] bool generateStackmapEntriesForTrapExit( 1383 const ArgTypeVector& args, ExitStubMapVector* extras); 1384 1385 // Creates a stackmap incorporating pointers from the current operand 1386 // stack |stk|, incorporating possible extra pointers in |extra| at the 1387 // lower addressed end, and possibly with the associated frame having a 1388 // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|. 1389 [[nodiscard]] bool createStackMap( 1390 const char* who, const ExitStubMapVector& extras, 1391 HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk, 1392 wasm::StackMap** result); 1393 }; 1394 1395 } // namespace wasm 1396 } // namespace js 1397 1398 #endif // wasm_wasm_baseline_frame_h