Assembler-arm64.h (32990B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef A64_ASSEMBLER_A64_H_ 8 #define A64_ASSEMBLER_A64_H_ 9 10 #include "jit/arm64/vixl/Assembler-vixl.h" 11 12 #include "jit/CompactBuffer.h" 13 #include "jit/shared/Disassembler-shared.h" 14 #include "wasm/WasmTypeDecls.h" 15 16 namespace js { 17 namespace jit { 18 19 // VIXL imports. 20 using ARMRegister = vixl::Register; 21 using ARMFPRegister = vixl::FPRegister; 22 using vixl::ARMBuffer; 23 using vixl::Instruction; 24 25 using LabelDoc = DisassemblerSpew::LabelDoc; 26 using LiteralDoc = DisassemblerSpew::LiteralDoc; 27 28 static const uint32_t AlignmentAtPrologue = 0; 29 static const uint32_t AlignmentMidPrologue = 8; 30 static const Scale ScalePointer = TimesEight; 31 32 // The MacroAssembler uses scratch registers extensively and unexpectedly. 33 // For safety, scratch registers should always be acquired using 34 // vixl::UseScratchRegisterScope. 35 static constexpr Register ScratchReg{Registers::ip0}; 36 static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64}; 37 38 static constexpr Register ScratchReg2{Registers::ip1}; 39 static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64}; 40 41 static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0, 42 FloatRegisters::Double}; 43 static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31, 44 FloatRegisters::Double}; 45 struct ScratchDoubleScope : public AutoFloatRegisterScope { 46 explicit ScratchDoubleScope(MacroAssembler& masm) 47 : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {} 48 }; 49 50 static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0, 51 FloatRegisters::Single}; 52 static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31, 53 FloatRegisters::Single}; 54 struct ScratchFloat32Scope : public AutoFloatRegisterScope { 55 explicit ScratchFloat32Scope(MacroAssembler& masm) 56 : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {} 57 }; 58 59 #ifdef ENABLE_WASM_SIMD 60 static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0, 61 FloatRegisters::Simd128}; 62 static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31, 63 FloatRegisters::Simd128}; 64 struct ScratchSimd128Scope : public AutoFloatRegisterScope { 65 explicit ScratchSimd128Scope(MacroAssembler& masm) 66 : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {} 67 }; 68 #else 69 struct ScratchSimd128Scope : public AutoFloatRegisterScope { 70 explicit ScratchSimd128Scope(MacroAssembler& masm) 71 : AutoFloatRegisterScope(masm, ScratchDoubleReg_) { 72 MOZ_CRASH("SIMD not enabled"); 73 } 74 }; 75 #endif 76 77 static constexpr Register InvalidReg{Registers::Invalid}; 78 static constexpr FloatRegister InvalidFloatReg = {}; 79 80 static constexpr Register OsrFrameReg{Registers::x3}; 81 static constexpr Register CallTempReg0{Registers::x9}; 82 static constexpr Register CallTempReg1{Registers::x10}; 83 static constexpr Register CallTempReg2{Registers::x11}; 84 static constexpr Register CallTempReg3{Registers::x12}; 85 static constexpr Register CallTempReg4{Registers::x13}; 86 static constexpr Register CallTempReg5{Registers::x14}; 87 88 static constexpr Register PreBarrierReg{Registers::x1}; 89 90 static constexpr Register InterpreterPCReg{Registers::x9}; 91 92 static constexpr Register ReturnReg{Registers::x0}; 93 static constexpr Register64 ReturnReg64(ReturnReg); 94 static constexpr Register JSReturnReg{Registers::x2}; 95 static constexpr Register FramePointer{Registers::fp}; 96 static constexpr ARMRegister FramePointer64{FramePointer, 64}; 97 static constexpr Register ZeroRegister{Registers::sp}; 98 static constexpr ARMRegister ZeroRegister64{Registers::sp, 64}; 99 static constexpr ARMRegister ZeroRegister32{Registers::sp, 32}; 100 101 // [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions 102 // 103 // ================ 104 // 105 // Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer: 106 // 107 // The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it 108 // is used as the base pointer for a memory access. (SP+offset need not be 109 // 16-byte aligned, but the SP value itself must be.) The SP register may 110 // take on unaligned values but may not be used for a memory access while it 111 // is unaligned. 112 // 113 // Stack-alignment checking can be enabled or disabled by a control register; 114 // however that register cannot be modified by user space. We have to assume 115 // stack alignment checking is enabled, and that does usually appear to be the 116 // case. See the ARM Architecture Reference Manual, "D1.8.2 SP alignment 117 // checking", for further details. 118 // 119 // A second constraint is forced upon us by the ARM64 ABI. This requires that 120 // all accesses to the stack must be at or above SP. Accesses below SP are 121 // strictly forbidden, presumably because the kernel might use that area of 122 // memory for its own purposes -- in particular, signal delivery -- and hence 123 // it may get trashed at any time. 124 // 125 // Note this doesn't mean that accesses to the stack must be based off 126 // register SP. Only that the effective addresses must be >= SP, regardless 127 // of how the address is formed. 128 // 129 // In order to allow word-wise pushes and pops, some of our ARM64 jits 130 // (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to 131 // be used as a PseudoStackPointer (PSP). 132 // 133 // Initially the PSP will have the same value as the SP. Code can, if it 134 // wants, push a single word by subtracting 8 from the PSP, doing SP := PSP, 135 // then storing the value at PSP+0. Given other constraints on the alignment 136 // of the SP at function call boundaries, this works out OK, at the cost of 137 // the two extra instructions per push / pop. 138 // 139 // This is all a bit messy, and is probably not robustly adhered to. However, 140 // the following appear to be the intended, and mostly implemented, current 141 // invariants: 142 // 143 // (1) PSP is "primary", SP is "secondary". Most stack refs are 144 // PSP-relative. SP-relative is rare and (obviously) only done when we 145 // know that SP is aligned. 146 // 147 // (2) At all times, the relationship SP <= PSP is maintained. The fact that 148 // SP may validly be less than PSP means that pushes on the stack force 149 // the two values to become equal, by copying PSP into SP. However, pops 150 // behave differently: PSP moves back up and SP stays the same, since that 151 // doesn't break the SP <= PSP invariant. 152 // 153 // (3) However, immediately before a call instruction, SP and PSP must be the 154 // same. To enforce this, PSP is copied into SP by the arm64-specific 155 // MacroAssembler::call routines. 156 // 157 // (4) Also, after a function has returned, it is expected that SP holds the 158 // "primary" value. How exactly this is implemented remains not entirely 159 // clear and merits further investigation. The following points are 160 // believed to be relevant: 161 // 162 // - For calls to functions observing the system AArch64 ABI, PSP (x28) is 163 // callee-saved. That, combined with (3) above, implies SP == PSP 164 // immediately after the call returns. 165 // 166 // - JIT-generated routines return using MacroAssemblerCompat::retn, and 167 // that copies PSP into SP (bizarrely; this would make more sense if it 168 // copied SP into PSP); but in any case, the point is that they are the 169 // same at the point that the return instruction executes. 170 // 171 // - MacroAssembler::callWithABIPost copies PSP into SP after the return 172 // of a call requiring dynamic alignment. 173 // 174 // Given the above, it is unclear exactly where in the return sequence it 175 // is expected that SP == PSP, and also whether it is the callee or caller 176 // that is expected to enforce it. 177 // 178 // In general it would be nice to be able to move (at some time in the future, 179 // not now) to a world where *every* assignment to PSP or SP is followed 180 // immediately by a copy into the other register. That would make all 181 // required correctness proofs trivial in the sense that it would require only 182 // local inspection of code immediately following (dominated by) any such 183 // assignment. For the moment, however, this is a guideline, not a hard 184 // requirement. 185 // 186 // ================ 187 // 188 // Mechanics of keeping the stack pointers in sync: 189 // 190 // The following two methods require that the masm's SP has been set to the PSP 191 // with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be 192 // no-ops. The setup is performed manually by the jits after creating the masm. 193 // 194 // * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has 195 // been updated, so SP needs to move too. This is used pretty liberally 196 // throughout the code base. 197 // 198 // * MacroAssembler::initPseudoStackPtr() performs PSP := SP. This can be used 199 // after calls to non-ABI compliant code; it's not used much. 200 // 201 // In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that 202 // recognizes the instruction emitted by syncStackPtr(), and this is used to 203 // skip that instruction a few places, should it be present, in the JS JIT where 204 // code is generated to deal with toggled calls. 205 // 206 // In various places there are calls to MacroAssembler::syncStackPtr() which 207 // appear to be redundant. Investigation shows that they often are redundant, 208 // but not always. Finding and removing such redundancies would be quite some 209 // work, so we live for now with the occasional redundant update. Perusal of 210 // the Cortex-A55 and -A72 optimization guides shows no evidence that such 211 // assignments are any more expensive than assignments between vanilla integer 212 // registers, so the costs of such redundant updates are assumed to be small. 213 // 214 // Invariants on the PSP at function call boundaries: 215 // 216 // It *appears* that the following invariants exist: 217 // 218 // * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via 219 // both registers. 220 // 221 // * On entry to C++ code, PSP == SP. Certainly it appears that all calls 222 // created by the MacroAssembler::call(..) routines perform 'syncStackPtr' 223 // immediately before the call, and all ABI calls are routed through the 224 // MacroAssembler::call layer. 225 // 226 // * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the 227 // active stack pointer and that PSP is dead. 228 // 229 // * The PSP is non-volatile (callee-saved). Along a normal return path from 230 // JIT code, simply having PSP == SP on exit is correct, since the exit SP is 231 // the same as the entry SP by the JIT ABI. 232 // 233 // * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be 234 // used), and will not need to restore the PSP on return because x28 is 235 // non-volatile in the ARM64 ABI. 236 // 237 // ================ 238 // 239 // Future cleanups to the SP-vs-PSP machinery: 240 // 241 // Currently we have somewhat unclear invariants, which are not obviously 242 // always enforced, and which may require complex non-local reasoning. 243 // Auditing the code to ensure that the invariants always hold, whilst not 244 // generating duplicate syncs, is close to impossible. A future rework to 245 // tidy this might be as follows. (This suggestion pertains the the entire 246 // JIT complex: all of the JS compilers, wasm compilers, stub generators, 247 // regexp compilers, etc). 248 // 249 // Currently we have that, in JIT-generated code, PSP is "primary" and SP is 250 // "secondary", meaning that PSP has the "real" stack pointer value and SP is 251 // updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP. 252 // An exception to this scheme is the stubs code generated by WasmStubs.cpp, 253 // which assumes that SP is "primary" and PSP is dead. 254 // 255 // It might give us an easier incremental path to eventually removing PSP 256 // entirely if we switched to having SP always be the primary. That is: 257 // 258 // (1) SP is primary, PSP is secondary 259 // (2) After any assignment to SP, it is copied into PSP 260 // (3) All (non-frame-pointer-based) stack accesses are PSP-relative 261 // (as at present) 262 // 263 // This would have the effect that: 264 // 265 // * It would reinstate the invariant that on all targets, the "real" SP value 266 // is in the ABI-and-or-hardware-mandated stack pointer register. 267 // 268 // * It would give us a simple story about calls and returns: 269 // - for calls to non-JIT generated code (viz, C++ etc), we need no extra 270 // copies, because PSP (x28) is callee-saved 271 // - for calls to JIT-generated code, we need no extra copies, because of (2) 272 // above 273 // 274 // * We could incrementally migrate those parts of the code generator where we 275 // know that SP is 16-aligned, to use SP- rather than PSP-relative accesses 276 // 277 // * The consistent use of (2) would remove the requirement to have to perform 278 // path-dependent reasoning (for paths in the generated code, not in the 279 // compiler) when reading/understanding the code. 280 // 281 // * x28 would become free for use by stubs and the baseline compiler without 282 // having to worry about interoperating with code that expects x28 to hold a 283 // valid PSP. 284 // 285 // One might ask what mechanical checks we can add to ensure correctness, rather 286 // than having to verify these invariants by hand indefinitely. Maybe some 287 // combination of: 288 // 289 // * In debug builds, compiling-in assert(SP == PSP) at critical places. This 290 // can be done using the existing `assertStackPtrsSynced` function. 291 // 292 // * In debug builds, scanning sections of generated code to ensure no 293 // SP-relative stack accesses have been created -- for some sections, at 294 // least every assignment to SP is immediately followed by a copy to x28. 295 // This would also facilitate detection of duplicate syncs. 296 // 297 // ================ 298 // 299 // Other investigative notes, for the code base at present: 300 // 301 // * Some disassembly dumps suggest that we sync the stack pointer too often. 302 // This could be the result of various pieces of code working at cross 303 // purposes when syncing the stack pointer, or of not paying attention to the 304 // precise invariants. 305 // 306 // * As documented in RegExpNativeMacroAssembler.cpp, function 307 // SMRegExpMacroAssembler::createStackFrame: 308 // 309 // // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for 310 // // addressing. The register we use for PSP may however also be used by 311 // // calling code, and it is nonvolatile, so save it. Do this as a special 312 // // case first because the generic save/restore code needs the PSP to be 313 // // initialized already. 314 // 315 // and also in function SMRegExpMacroAssembler::exitHandler: 316 // 317 // // Restore the saved value of the PSP register, this value is whatever the 318 // // caller had saved in it, not any actual SP value, and it must not be 319 // // overwritten subsequently. 320 // 321 // The original source for these comments was a patch for bug 1445907. 322 // 323 // * MacroAssembler-arm64.h has an interesting comment in the retn() 324 // function: 325 // 326 // syncStackPtr(); // SP is always used to transmit the stack between calls. 327 // 328 // Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp, 329 // at callWithABIPre and callWithABIPost. 330 // 331 // * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find 332 // 333 // // SP is used to transfer stack across call boundaries. 334 // masm.initPseudoStackPtr(); 335 // 336 // after the return point of a callWithVMWrapper. The only reasonable 337 // conclusion from all those (assuming they are right) is that SP == PSP. 338 // 339 // * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP 340 // and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP 341 // before a call to wasm code. 342 // 343 // ================ 344 345 // StackPointer is intentionally undefined on ARM64 to prevent misuse: using 346 // sp as a base register is only valid if sp % 16 == 0. 347 static constexpr Register RealStackPointer{Registers::sp}; 348 349 static constexpr Register PseudoStackPointer{Registers::x28}; 350 static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64}; 351 static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32}; 352 353 static constexpr Register IntArgReg0{Registers::x0}; 354 static constexpr Register IntArgReg1{Registers::x1}; 355 static constexpr Register IntArgReg2{Registers::x2}; 356 static constexpr Register IntArgReg3{Registers::x3}; 357 static constexpr Register IntArgReg4{Registers::x4}; 358 static constexpr Register IntArgReg5{Registers::x5}; 359 static constexpr Register IntArgReg6{Registers::x6}; 360 static constexpr Register IntArgReg7{Registers::x7}; 361 362 // Define unsized Registers. 363 #define DEFINE_UNSIZED_REGISTERS(N) \ 364 static constexpr Register r##N{Registers::x##N}; 365 REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS) 366 #undef DEFINE_UNSIZED_REGISTERS 367 static constexpr Register ip0{Registers::x16}; 368 static constexpr Register ip1{Registers::x17}; 369 static constexpr Register fp{Registers::x29}; 370 static constexpr Register lr{Registers::x30}; 371 static constexpr Register rzr{Registers::xzr}; 372 373 // Import VIXL registers into the js::jit namespace. 374 #define IMPORT_VIXL_REGISTERS(N) \ 375 static constexpr ARMRegister w##N = vixl::w##N; \ 376 static constexpr ARMRegister x##N = vixl::x##N; 377 REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS) 378 #undef IMPORT_VIXL_REGISTERS 379 static constexpr ARMRegister wzr = vixl::wzr; 380 static constexpr ARMRegister xzr = vixl::xzr; 381 static constexpr ARMRegister wsp = vixl::wsp; 382 static constexpr ARMRegister sp = vixl::sp; 383 384 // Import VIXL VRegisters into the js::jit namespace. 385 #define IMPORT_VIXL_VREGISTERS(N) \ 386 static constexpr ARMFPRegister s##N = vixl::s##N; \ 387 static constexpr ARMFPRegister d##N = vixl::d##N; 388 REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS) 389 #undef IMPORT_VIXL_VREGISTERS 390 391 static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg); 392 393 // Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use 394 // JSReturnOperand). 395 static constexpr Register RegExpMatcherRegExpReg = CallTempReg0; 396 static constexpr Register RegExpMatcherStringReg = CallTempReg1; 397 static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2; 398 399 // Registers used by RegExpExecTest stub (do not use ReturnReg). 400 static constexpr Register RegExpExecTestRegExpReg = CallTempReg0; 401 static constexpr Register RegExpExecTestStringReg = CallTempReg1; 402 403 // Registers used by RegExpSearcher stub (do not use ReturnReg). 404 static constexpr Register RegExpSearcherRegExpReg = CallTempReg0; 405 static constexpr Register RegExpSearcherStringReg = CallTempReg1; 406 static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2; 407 408 static constexpr Register JSReturnReg_Type = r3; 409 static constexpr Register JSReturnReg_Data = r2; 410 411 static constexpr FloatRegister NANReg = {FloatRegisters::d14, 412 FloatRegisters::Single}; 413 // N.B. r8 isn't listed as an aapcs temp register, but we can use it as such 414 // because we never use return-structs. 415 static constexpr Register CallTempNonArgRegs[] = {r8, r9, r10, r11, 416 r12, r13, r14, r15}; 417 static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs); 418 419 static constexpr uint32_t JitStackAlignment = 16; 420 421 static constexpr uint32_t JitStackValueAlignment = 422 JitStackAlignment / sizeof(Value); 423 static_assert(JitStackAlignment % sizeof(Value) == 0 && 424 JitStackValueAlignment >= 1, 425 "Stack alignment should be a non-zero multiple of sizeof(Value)"); 426 427 static constexpr uint32_t SimdMemoryAlignment = 16; 428 429 static_assert(CodeAlignment % SimdMemoryAlignment == 0, 430 "Code alignment should be larger than any of the alignments " 431 "which are used for " 432 "the constant sections of the code buffer. Thus it should be " 433 "larger than the " 434 "alignment for SIMD constants."); 435 436 static const uint32_t WasmStackAlignment = SimdMemoryAlignment; 437 static const uint32_t WasmTrapInstructionLength = 4; 438 439 // See comments in wasm::GenerateFunctionPrologue. The difference between these 440 // is the size of the largest callable prologue on the platform. 441 static constexpr uint32_t WasmCheckedCallEntryOffset = 0u; 442 443 class Assembler : public vixl::Assembler { 444 public: 445 Assembler() : vixl::Assembler() {} 446 447 using Condition = vixl::Condition; 448 449 void finish(); 450 bool appendRawCode(const uint8_t* code, size_t numBytes); 451 bool reserve(size_t size); 452 bool swapBuffer(wasm::Bytes& bytes); 453 454 // Emit the jump table, returning the BufferOffset to the first entry in the 455 // table. 456 BufferOffset emitExtendedJumpTable(); 457 BufferOffset ExtendedJumpTable_; 458 void executableCopy(uint8_t* buffer); 459 460 BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op, 461 const LiteralDoc& doc, 462 ARMBuffer::PoolEntry* pe = nullptr); 463 BufferOffset immPool64(ARMRegister dest, uint64_t value, 464 ARMBuffer::PoolEntry* pe = nullptr); 465 BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value, 466 vixl::LoadLiteralOp op, const LiteralDoc& doc); 467 BufferOffset fImmPool64(ARMFPRegister dest, double value); 468 BufferOffset fImmPool32(ARMFPRegister dest, float value); 469 470 uint32_t currentOffset() const { return nextOffset().getOffset(); } 471 472 void bind(Label* label) { bind(label, nextOffset()); } 473 void bind(Label* label, BufferOffset boff); 474 void bind(CodeLabel* label) { label->target()->bind(currentOffset()); } 475 476 void setUnlimitedBuffer() { armbuffer_.setUnlimited(); } 477 bool oom() const { 478 return AssemblerShared::oom() || armbuffer_.oom() || 479 jumpRelocations_.oom() || dataRelocations_.oom(); 480 } 481 482 void copyJumpRelocationTable(uint8_t* dest) const { 483 if (jumpRelocations_.length()) { 484 memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length()); 485 } 486 } 487 void copyDataRelocationTable(uint8_t* dest) const { 488 if (dataRelocations_.length()) { 489 memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length()); 490 } 491 } 492 493 size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); } 494 size_t dataRelocationTableBytes() const { return dataRelocations_.length(); } 495 size_t bytesNeeded() const { 496 return SizeOfCodeGenerated() + jumpRelocationTableBytes() + 497 dataRelocationTableBytes(); 498 } 499 500 void processCodeLabels(uint8_t* rawCode) { 501 for (const CodeLabel& label : codeLabels_) { 502 Bind(rawCode, label); 503 } 504 } 505 506 static void UpdateLoad64Value(Instruction* inst0, uint64_t value); 507 508 static void Bind(uint8_t* rawCode, const CodeLabel& label) { 509 auto mode = label.linkMode(); 510 size_t patchAtOffset = label.patchAt().offset(); 511 size_t targetOffset = label.target().offset(); 512 513 if (mode == CodeLabel::MoveImmediate) { 514 Instruction* inst = (Instruction*)(rawCode + patchAtOffset); 515 Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset)); 516 } else { 517 *reinterpret_cast<const void**>(rawCode + patchAtOffset) = 518 rawCode + targetOffset; 519 } 520 } 521 522 void retarget(Label* cur, Label* next); 523 524 // The buffer is about to be linked. Ensure any constant pools or 525 // excess bookkeeping has been flushed to the instruction stream. 526 void flush() { armbuffer_.flushPool(); } 527 528 void comment(const char* msg) { 529 #ifdef JS_DISASM_ARM64 530 spew_.spew("; %s", msg); 531 #endif 532 } 533 534 void setPrinter(Sprinter* sp) { 535 #ifdef JS_DISASM_ARM64 536 spew_.setPrinter(sp); 537 #endif 538 } 539 540 static bool SupportsFloatingPoint() { return true; } 541 static bool SupportsUnalignedAccesses() { return true; } 542 static bool SupportsFastUnalignedFPAccesses() { return true; } 543 static bool SupportsWasmSimd() { return true; } 544 static bool SupportsFloat64To16() { return true; } 545 static bool SupportsFloat32To16() { return true; } 546 547 static bool HasRoundInstruction(RoundingMode mode) { 548 switch (mode) { 549 case RoundingMode::Up: 550 case RoundingMode::Down: 551 case RoundingMode::NearestTiesToEven: 552 case RoundingMode::TowardsZero: 553 return true; 554 } 555 MOZ_CRASH("unexpected mode"); 556 } 557 558 protected: 559 // Add a jump whose target is unknown until finalization. 560 // The jump may not be patched at runtime. 561 void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind); 562 563 public: 564 static uint32_t PatchWrite_NearCallSize() { return 4; } 565 566 static uint32_t NopSize() { return 4; } 567 568 static void PatchWrite_NearCall(CodeLocationLabel start, 569 CodeLocationLabel toCall); 570 static void PatchDataWithValueCheck(CodeLocationLabel label, 571 PatchedImmPtr newValue, 572 PatchedImmPtr expected); 573 574 static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue, 575 ImmPtr expected); 576 577 static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) { 578 // Raw is going to be the return address. 579 uint32_t* raw = (uint32_t*)label.raw(); 580 // Overwrite the 4 bytes before the return address, which will end up being 581 // the call instruction. 582 *(raw - 1) = imm.value; 583 } 584 static uint32_t AlignDoubleArg(uint32_t offset) { 585 MOZ_CRASH("AlignDoubleArg()"); 586 } 587 static uintptr_t GetPointer(uint8_t* ptr) { 588 Instruction* i = reinterpret_cast<Instruction*>(ptr); 589 uint64_t ret = i->Literal64(); 590 return ret; 591 } 592 593 // Toggle a jmp or cmp emitted by toggledJump(). 594 static void ToggleToJmp(CodeLocationLabel inst_); 595 static void ToggleToCmp(CodeLocationLabel inst_); 596 static void ToggleCall(CodeLocationLabel inst_, bool enabled); 597 598 static void TraceJumpRelocations(JSTracer* trc, JitCode* code, 599 CompactBufferReader& reader); 600 static void TraceDataRelocations(JSTracer* trc, JitCode* code, 601 CompactBufferReader& reader); 602 603 void assertNoGCThings() const { 604 #ifdef DEBUG 605 MOZ_ASSERT(dataRelocations_.length() == 0); 606 for (auto& j : pendingJumps_) { 607 MOZ_ASSERT(j.kind == RelocationKind::HARDCODED); 608 } 609 #endif 610 } 611 612 public: 613 // A Jump table entry is 2 instructions, with 8 bytes of raw data 614 static const size_t SizeOfJumpTableEntry = 16; 615 616 struct JumpTableEntry { 617 uint32_t ldr; 618 uint32_t br; 619 void* data; 620 621 Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); } 622 }; 623 624 // Offset of the patchable target for the given entry. 625 static const size_t OffsetOfJumpTableEntryPointer = 8; 626 627 public: 628 void writeCodePointer(CodeLabel* label) { 629 armbuffer_.assertNoPoolAndNoNops(); 630 uintptr_t x = uintptr_t(-1); 631 BufferOffset off = EmitData(&x, sizeof(uintptr_t)); 632 label->patchAt()->bind(off.getOffset()); 633 } 634 635 void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end, 636 const Disassembler::HeapAccess& heapAccess) { 637 MOZ_CRASH("verifyHeapAccessDisassembly"); 638 } 639 640 protected: 641 // Structure for fixing up pc-relative loads/jumps when the machine 642 // code gets moved (executable copy, gc, etc.). 643 struct RelativePatch { 644 BufferOffset offset; 645 void* target; 646 RelocationKind kind; 647 648 RelativePatch(BufferOffset offset, void* target, RelocationKind kind) 649 : offset(offset), target(target), kind(kind) {} 650 }; 651 652 // List of jumps for which the target is either unknown until finalization, 653 // or cannot be known due to GC. Each entry here requires a unique entry 654 // in the extended jump table, and is patched at finalization. 655 js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_; 656 657 // Final output formatters. 658 CompactBufferWriter jumpRelocations_; 659 CompactBufferWriter dataRelocations_; 660 }; 661 662 static const uint32_t NumIntArgRegs = 8; 663 static const uint32_t NumFloatArgRegs = 8; 664 665 class ABIArgGenerator : public ABIArgGeneratorShared { 666 public: 667 explicit ABIArgGenerator(ABIKind kind) 668 : ABIArgGeneratorShared(kind), 669 intRegIndex_(0), 670 floatRegIndex_(0), 671 current_() {} 672 673 ABIArg next(MIRType argType); 674 ABIArg& current() { return current_; } 675 676 protected: 677 unsigned intRegIndex_; 678 unsigned floatRegIndex_; 679 ABIArg current_; 680 }; 681 682 // See "ABI special registers" in Assembler-shared.h for more information. 683 static constexpr Register ABINonArgReg0 = r8; 684 static constexpr Register ABINonArgReg1 = r9; 685 static constexpr Register ABINonArgReg2 = r10; 686 static constexpr Register ABINonArgReg3 = r11; 687 688 // See "ABI special registers" in Assembler-shared.h for more information. 689 // Avoid d31 which is the ScratchDoubleReg_. 690 static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16, 691 FloatRegisters::Single}; 692 693 // See "ABI special registers" in Assembler-shared.h for more information. 694 static constexpr Register ABINonArgReturnReg0 = r8; 695 static constexpr Register ABINonArgReturnReg1 = r9; 696 static constexpr Register ABINonVolatileReg{Registers::x19}; 697 698 // See "ABI special registers" in Assembler-shared.h for more information. 699 static constexpr Register ABINonArgReturnVolatileReg = r8; 700 701 // See "ABI special registers" in Assembler-shared.h, and "The WASM ABIs" in 702 // WasmFrame.h for more information. 703 static constexpr Register InstanceReg{Registers::x23}; 704 static constexpr Register HeapReg{Registers::x21}; 705 706 // Registers used for wasm table calls. These registers must be disjoint 707 // from the ABI argument registers, InstanceReg and each other. 708 static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0; 709 static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1; 710 static constexpr Register WasmTableCallSigReg = ABINonArgReg2; 711 static constexpr Register WasmTableCallIndexReg = ABINonArgReg3; 712 713 // Registers used for ref calls. 714 static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0; 715 static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1; 716 static constexpr Register WasmCallRefCallScratchReg2 = ABINonArgReg2; 717 static constexpr Register WasmCallRefReg = ABINonArgReg3; 718 719 // Registers used for wasm tail calls operations. 720 static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1; 721 static constexpr Register WasmTailCallRAScratchReg = lr; 722 static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3; 723 724 // Register used as a scratch along the return path in the fast js -> wasm stub 725 // code. This must not overlap ReturnReg, JSReturnOperand, or InstanceReg. 726 // It must be a volatile register. 727 static constexpr Register WasmJitEntryReturnScratch = r9; 728 729 static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, 730 Register* out) { 731 if (usedIntArgs >= NumIntArgRegs) { 732 return false; 733 } 734 *out = Register::FromCode(usedIntArgs); 735 return true; 736 } 737 738 static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, 739 FloatRegister* out) { 740 if (usedFloatArgs >= NumFloatArgRegs) { 741 return false; 742 } 743 *out = FloatRegister::FromCode(usedFloatArgs); 744 return true; 745 } 746 747 // Get a register in which we plan to put a quantity that will be used as an 748 // integer argument. This differs from GetIntArgReg in that if we have no more 749 // actual argument registers to use we will fall back on using whatever 750 // CallTempReg* don't overlap the argument registers, and only fail once those 751 // run out too. 752 static inline bool GetTempRegForIntArg(uint32_t usedIntArgs, 753 uint32_t usedFloatArgs, Register* out) { 754 if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) { 755 return true; 756 } 757 // Unfortunately, we have to assume things about the point at which 758 // GetIntArgReg returns false, because we need to know how many registers it 759 // can allocate. 760 usedIntArgs -= NumIntArgRegs; 761 if (usedIntArgs >= NumCallTempNonArgRegs) { 762 return false; 763 } 764 *out = CallTempNonArgRegs[usedIntArgs]; 765 return true; 766 } 767 768 // Forbids nop filling for testing purposes. Nestable, but nested calls have 769 // no effect on the no-nops status; it is only the top level one that counts. 770 class AutoForbidNops { 771 protected: 772 Assembler* asm_; 773 774 public: 775 explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); } 776 ~AutoForbidNops() { asm_->leaveNoNops(); } 777 }; 778 779 // Forbids pool generation during a specified interval. Nestable, but nested 780 // calls must imply a no-pool area of the assembler buffer that is completely 781 // contained within the area implied by the outermost level call. 782 class AutoForbidPoolsAndNops : public AutoForbidNops { 783 public: 784 AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst) 785 : AutoForbidNops(asm_) { 786 asm_->enterNoPool(maxInst); 787 } 788 ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); } 789 }; 790 791 } // namespace jit 792 } // namespace js 793 794 #endif // A64_ASSEMBLER_A64_H_