[ tor-browser ].git.dasho

Assembler-arm64.h (32990B)
      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef A64_ASSEMBLER_A64_H_
      8 #define A64_ASSEMBLER_A64_H_
      9 
     10 #include "jit/arm64/vixl/Assembler-vixl.h"
     11 
     12 #include "jit/CompactBuffer.h"
     13 #include "jit/shared/Disassembler-shared.h"
     14 #include "wasm/WasmTypeDecls.h"
     15 
     16 namespace js {
     17 namespace jit {
     18 
     19 // VIXL imports.
     20 using ARMRegister = vixl::Register;
     21 using ARMFPRegister = vixl::FPRegister;
     22 using vixl::ARMBuffer;
     23 using vixl::Instruction;
     24 
     25 using LabelDoc = DisassemblerSpew::LabelDoc;
     26 using LiteralDoc = DisassemblerSpew::LiteralDoc;
     27 
     28 static const uint32_t AlignmentAtPrologue = 0;
     29 static const uint32_t AlignmentMidPrologue = 8;
     30 static const Scale ScalePointer = TimesEight;
     31 
     32 // The MacroAssembler uses scratch registers extensively and unexpectedly.
     33 // For safety, scratch registers should always be acquired using
     34 // vixl::UseScratchRegisterScope.
     35 static constexpr Register ScratchReg{Registers::ip0};
     36 static constexpr ARMRegister ScratchReg64 = {ScratchReg, 64};
     37 
     38 static constexpr Register ScratchReg2{Registers::ip1};
     39 static constexpr ARMRegister ScratchReg2_64 = {ScratchReg2, 64};
     40 
     41 static constexpr FloatRegister ReturnDoubleReg = {FloatRegisters::d0,
     42                                                  FloatRegisters::Double};
     43 static constexpr FloatRegister ScratchDoubleReg_ = {FloatRegisters::d31,
     44                                                    FloatRegisters::Double};
     45 struct ScratchDoubleScope : public AutoFloatRegisterScope {
     46  explicit ScratchDoubleScope(MacroAssembler& masm)
     47      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {}
     48 };
     49 
     50 static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0,
     51                                                   FloatRegisters::Single};
     52 static constexpr FloatRegister ScratchFloat32Reg_ = {FloatRegisters::s31,
     53                                                     FloatRegisters::Single};
     54 struct ScratchFloat32Scope : public AutoFloatRegisterScope {
     55  explicit ScratchFloat32Scope(MacroAssembler& masm)
     56      : AutoFloatRegisterScope(masm, ScratchFloat32Reg_) {}
     57 };
     58 
     59 #ifdef ENABLE_WASM_SIMD
     60 static constexpr FloatRegister ReturnSimd128Reg = {FloatRegisters::v0,
     61                                                   FloatRegisters::Simd128};
     62 static constexpr FloatRegister ScratchSimd128Reg = {FloatRegisters::v31,
     63                                                    FloatRegisters::Simd128};
     64 struct ScratchSimd128Scope : public AutoFloatRegisterScope {
     65  explicit ScratchSimd128Scope(MacroAssembler& masm)
     66      : AutoFloatRegisterScope(masm, ScratchSimd128Reg) {}
     67 };
     68 #else
     69 struct ScratchSimd128Scope : public AutoFloatRegisterScope {
     70  explicit ScratchSimd128Scope(MacroAssembler& masm)
     71      : AutoFloatRegisterScope(masm, ScratchDoubleReg_) {
     72    MOZ_CRASH("SIMD not enabled");
     73  }
     74 };
     75 #endif
     76 
     77 static constexpr Register InvalidReg{Registers::Invalid};
     78 static constexpr FloatRegister InvalidFloatReg = {};
     79 
     80 static constexpr Register OsrFrameReg{Registers::x3};
     81 static constexpr Register CallTempReg0{Registers::x9};
     82 static constexpr Register CallTempReg1{Registers::x10};
     83 static constexpr Register CallTempReg2{Registers::x11};
     84 static constexpr Register CallTempReg3{Registers::x12};
     85 static constexpr Register CallTempReg4{Registers::x13};
     86 static constexpr Register CallTempReg5{Registers::x14};
     87 
     88 static constexpr Register PreBarrierReg{Registers::x1};
     89 
     90 static constexpr Register InterpreterPCReg{Registers::x9};
     91 
     92 static constexpr Register ReturnReg{Registers::x0};
     93 static constexpr Register64 ReturnReg64(ReturnReg);
     94 static constexpr Register JSReturnReg{Registers::x2};
     95 static constexpr Register FramePointer{Registers::fp};
     96 static constexpr ARMRegister FramePointer64{FramePointer, 64};
     97 static constexpr Register ZeroRegister{Registers::sp};
     98 static constexpr ARMRegister ZeroRegister64{Registers::sp, 64};
     99 static constexpr ARMRegister ZeroRegister32{Registers::sp, 32};
    100 
    101 // [SMDOC] AArch64 Stack Pointer and Pseudo Stack Pointer conventions
    102 //
    103 //                               ================
    104 //
    105 // Stack pointer (SP), PseudoStackPointer (PSP), and RealStackPointer:
    106 //
    107 // The ARM64 real SP has a constraint: it must be 16-byte aligned whenever it
    108 // is used as the base pointer for a memory access.  (SP+offset need not be
    109 // 16-byte aligned, but the SP value itself must be.)  The SP register may
    110 // take on unaligned values but may not be used for a memory access while it
    111 // is unaligned.
    112 //
    113 // Stack-alignment checking can be enabled or disabled by a control register;
    114 // however that register cannot be modified by user space.  We have to assume
    115 // stack alignment checking is enabled, and that does usually appear to be the
    116 // case.  See the ARM Architecture Reference Manual, "D1.8.2 SP alignment
    117 // checking", for further details.
    118 //
    119 // A second constraint is forced upon us by the ARM64 ABI.  This requires that
    120 // all accesses to the stack must be at or above SP.  Accesses below SP are
    121 // strictly forbidden, presumably because the kernel might use that area of
    122 // memory for its own purposes -- in particular, signal delivery -- and hence
    123 // it may get trashed at any time.
    124 //
    125 // Note this doesn't mean that accesses to the stack must be based off
    126 // register SP.  Only that the effective addresses must be >= SP, regardless
    127 // of how the address is formed.
    128 //
    129 // In order to allow word-wise pushes and pops, some of our ARM64 jits
    130 // (JS-Baseline, JS-Ion, and Wasm-Ion, but not Wasm-Baseline) dedicate x28 to
    131 // be used as a PseudoStackPointer (PSP).
    132 //
    133 // Initially the PSP will have the same value as the SP.  Code can, if it
    134 // wants, push a single word by subtracting 8 from the PSP, doing SP := PSP,
    135 // then storing the value at PSP+0.  Given other constraints on the alignment
    136 // of the SP at function call boundaries, this works out OK, at the cost of
    137 // the two extra instructions per push / pop.
    138 //
    139 // This is all a bit messy, and is probably not robustly adhered to.  However,
    140 // the following appear to be the intended, and mostly implemented, current
    141 // invariants:
    142 //
    143 // (1) PSP is "primary", SP is "secondary".  Most stack refs are
    144 //     PSP-relative. SP-relative is rare and (obviously) only done when we
    145 //     know that SP is aligned.
    146 //
    147 // (2) At all times, the relationship SP <= PSP is maintained.  The fact that
    148 //     SP may validly be less than PSP means that pushes on the stack force
    149 //     the two values to become equal, by copying PSP into SP.  However, pops
    150 //     behave differently: PSP moves back up and SP stays the same, since that
    151 //     doesn't break the SP <= PSP invariant.
    152 //
    153 // (3) However, immediately before a call instruction, SP and PSP must be the
    154 //     same.  To enforce this, PSP is copied into SP by the arm64-specific
    155 //     MacroAssembler::call routines.
    156 //
    157 // (4) Also, after a function has returned, it is expected that SP holds the
    158 //     "primary" value.  How exactly this is implemented remains not entirely
    159 //     clear and merits further investigation.  The following points are
    160 //     believed to be relevant:
    161 //
    162 //     - For calls to functions observing the system AArch64 ABI, PSP (x28) is
    163 //       callee-saved.  That, combined with (3) above, implies SP == PSP
    164 //       immediately after the call returns.
    165 //
    166 //     - JIT-generated routines return using MacroAssemblerCompat::retn, and
    167 //       that copies PSP into SP (bizarrely; this would make more sense if it
    168 //       copied SP into PSP); but in any case, the point is that they are the
    169 //       same at the point that the return instruction executes.
    170 //
    171 //     - MacroAssembler::callWithABIPost copies PSP into SP after the return
    172 //       of a call requiring dynamic alignment.
    173 //
    174 //     Given the above, it is unclear exactly where in the return sequence it
    175 //     is expected that SP == PSP, and also whether it is the callee or caller
    176 //     that is expected to enforce it.
    177 //
    178 // In general it would be nice to be able to move (at some time in the future,
    179 // not now) to a world where *every* assignment to PSP or SP is followed
    180 // immediately by a copy into the other register.  That would make all
    181 // required correctness proofs trivial in the sense that it would require only
    182 // local inspection of code immediately following (dominated by) any such
    183 // assignment.  For the moment, however, this is a guideline, not a hard
    184 // requirement.
    185 //
    186 //                               ================
    187 //
    188 // Mechanics of keeping the stack pointers in sync:
    189 //
    190 // The following two methods require that the masm's SP has been set to the PSP
    191 // with MacroAssembler::SetStackPointer64(PseudoStackPointer64), or they will be
    192 // no-ops.  The setup is performed manually by the jits after creating the masm.
    193 //
    194 // * MacroAssembler::syncStackPtr() performs SP := PSP, presumably after PSP has
    195 //   been updated, so SP needs to move too.  This is used pretty liberally
    196 //   throughout the code base.
    197 //
    198 // * MacroAssembler::initPseudoStackPtr() performs PSP := SP.  This can be used
    199 //   after calls to non-ABI compliant code; it's not used much.
    200 //
    201 // In the ARM64 assembler there is a function Instruction::IsStackPtrSync() that
    202 // recognizes the instruction emitted by syncStackPtr(), and this is used to
    203 // skip that instruction a few places, should it be present, in the JS JIT where
    204 // code is generated to deal with toggled calls.
    205 //
    206 // In various places there are calls to MacroAssembler::syncStackPtr() which
    207 // appear to be redundant.  Investigation shows that they often are redundant,
    208 // but not always.  Finding and removing such redundancies would be quite some
    209 // work, so we live for now with the occasional redundant update.  Perusal of
    210 // the Cortex-A55 and -A72 optimization guides shows no evidence that such
    211 // assignments are any more expensive than assignments between vanilla integer
    212 // registers, so the costs of such redundant updates are assumed to be small.
    213 //
    214 // Invariants on the PSP at function call boundaries:
    215 //
    216 // It *appears* that the following invariants exist:
    217 //
    218 // * On entry to JIT code, PSP == SP, ie the stack pointer is transmitted via
    219 //   both registers.
    220 //
    221 // * On entry to C++ code, PSP == SP.  Certainly it appears that all calls
    222 //   created by the MacroAssembler::call(..) routines perform 'syncStackPtr'
    223 //   immediately before the call, and all ABI calls are routed through the
    224 //   MacroAssembler::call layer.
    225 //
    226 // * The stubs generated by WasmStubs.cpp assume that, on entry, SP is the
    227 //   active stack pointer and that PSP is dead.
    228 //
    229 // * The PSP is non-volatile (callee-saved).  Along a normal return path from
    230 //   JIT code, simply having PSP == SP on exit is correct, since the exit SP is
    231 //   the same as the entry SP by the JIT ABI.
    232 //
    233 // * Call-outs to non-JIT C++ code do not need to set up the PSP (it won't be
    234 //   used), and will not need to restore the PSP on return because x28 is
    235 //   non-volatile in the ARM64 ABI.
    236 //
    237 //                               ================
    238 //
    239 // Future cleanups to the SP-vs-PSP machinery:
    240 //
    241 // Currently we have somewhat unclear invariants, which are not obviously
    242 // always enforced, and which may require complex non-local reasoning.
    243 // Auditing the code to ensure that the invariants always hold, whilst not
    244 // generating duplicate syncs, is close to impossible.  A future rework to
    245 // tidy this might be as follows.  (This suggestion pertains the the entire
    246 // JIT complex: all of the JS compilers, wasm compilers, stub generators,
    247 // regexp compilers, etc).
    248 //
    249 // Currently we have that, in JIT-generated code, PSP is "primary" and SP is
    250 // "secondary", meaning that PSP has the "real" stack pointer value and SP is
    251 // updated whenever PSP acquires a lower value, so as to ensure that SP <= PSP.
    252 // An exception to this scheme is the stubs code generated by WasmStubs.cpp,
    253 // which assumes that SP is "primary" and PSP is dead.
    254 //
    255 // It might give us an easier incremental path to eventually removing PSP
    256 // entirely if we switched to having SP always be the primary.  That is:
    257 //
    258 // (1) SP is primary, PSP is secondary
    259 // (2) After any assignment to SP, it is copied into PSP
    260 // (3) All (non-frame-pointer-based) stack accesses are PSP-relative
    261 //     (as at present)
    262 //
    263 // This would have the effect that:
    264 //
    265 // * It would reinstate the invariant that on all targets, the "real" SP value
    266 //   is in the ABI-and-or-hardware-mandated stack pointer register.
    267 //
    268 // * It would give us a simple story about calls and returns:
    269 //   - for calls to non-JIT generated code (viz, C++ etc), we need no extra
    270 //     copies, because PSP (x28) is callee-saved
    271 //   - for calls to JIT-generated code, we need no extra copies, because of (2)
    272 //     above
    273 //
    274 // * We could incrementally migrate those parts of the code generator where we
    275 //   know that SP is 16-aligned, to use SP- rather than PSP-relative accesses
    276 //
    277 // * The consistent use of (2) would remove the requirement to have to perform
    278 //   path-dependent reasoning (for paths in the generated code, not in the
    279 //   compiler) when reading/understanding the code.
    280 //
    281 // * x28 would become free for use by stubs and the baseline compiler without
    282 //   having to worry about interoperating with code that expects x28 to hold a
    283 //   valid PSP.
    284 //
    285 // One might ask what mechanical checks we can add to ensure correctness, rather
    286 // than having to verify these invariants by hand indefinitely.  Maybe some
    287 // combination of:
    288 //
    289 // * In debug builds, compiling-in assert(SP == PSP) at critical places.  This
    290 //   can be done using the existing `assertStackPtrsSynced` function.
    291 //
    292 // * In debug builds, scanning sections of generated code to ensure no
    293 //   SP-relative stack accesses have been created -- for some sections, at
    294 //   least every assignment to SP is immediately followed by a copy to x28.
    295 //   This would also facilitate detection of duplicate syncs.
    296 //
    297 //                               ================
    298 //
    299 // Other investigative notes, for the code base at present:
    300 //
    301 // * Some disassembly dumps suggest that we sync the stack pointer too often.
    302 //   This could be the result of various pieces of code working at cross
    303 //   purposes when syncing the stack pointer, or of not paying attention to the
    304 //   precise invariants.
    305 //
    306 // * As documented in RegExpNativeMacroAssembler.cpp, function
    307 //   SMRegExpMacroAssembler::createStackFrame:
    308 //
    309 //   // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
    310 //   // addressing.  The register we use for PSP may however also be used by
    311 //   // calling code, and it is nonvolatile, so save it.  Do this as a special
    312 //   // case first because the generic save/restore code needs the PSP to be
    313 //   // initialized already.
    314 //
    315 //   and also in function SMRegExpMacroAssembler::exitHandler:
    316 //
    317 //   // Restore the saved value of the PSP register, this value is whatever the
    318 //   // caller had saved in it, not any actual SP value, and it must not be
    319 //   // overwritten subsequently.
    320 //
    321 //   The original source for these comments was a patch for bug 1445907.
    322 //
    323 // * MacroAssembler-arm64.h has an interesting comment in the retn()
    324 //   function:
    325 //
    326 //   syncStackPtr();  // SP is always used to transmit the stack between calls.
    327 //
    328 //   Same comment at abiret() in that file, and in MacroAssembler-arm64.cpp,
    329 //   at callWithABIPre and callWithABIPost.
    330 //
    331 // * In Trampoline-arm64.cpp function JitRuntime::generateVMWrapper we find
    332 //
    333 //   // SP is used to transfer stack across call boundaries.
    334 //   masm.initPseudoStackPtr();
    335 //
    336 //   after the return point of a callWithVMWrapper.  The only reasonable
    337 //   conclusion from all those (assuming they are right) is that SP == PSP.
    338 //
    339 // * Wasm-Baseline does not use the PSP, but as Wasm-Ion code requires SP==PSP
    340 //   and tiered code can have Baseline->Ion calls, Baseline will set PSP=SP
    341 //   before a call to wasm code.
    342 //
    343 //                               ================
    344 
    345 // StackPointer is intentionally undefined on ARM64 to prevent misuse: using
    346 // sp as a base register is only valid if sp % 16 == 0.
    347 static constexpr Register RealStackPointer{Registers::sp};
    348 
    349 static constexpr Register PseudoStackPointer{Registers::x28};
    350 static constexpr ARMRegister PseudoStackPointer64 = {Registers::x28, 64};
    351 static constexpr ARMRegister PseudoStackPointer32 = {Registers::x28, 32};
    352 
    353 static constexpr Register IntArgReg0{Registers::x0};
    354 static constexpr Register IntArgReg1{Registers::x1};
    355 static constexpr Register IntArgReg2{Registers::x2};
    356 static constexpr Register IntArgReg3{Registers::x3};
    357 static constexpr Register IntArgReg4{Registers::x4};
    358 static constexpr Register IntArgReg5{Registers::x5};
    359 static constexpr Register IntArgReg6{Registers::x6};
    360 static constexpr Register IntArgReg7{Registers::x7};
    361 
    362 // Define unsized Registers.
    363 #define DEFINE_UNSIZED_REGISTERS(N) \
    364  static constexpr Register r##N{Registers::x##N};
    365 REGISTER_CODE_LIST(DEFINE_UNSIZED_REGISTERS)
    366 #undef DEFINE_UNSIZED_REGISTERS
    367 static constexpr Register ip0{Registers::x16};
    368 static constexpr Register ip1{Registers::x17};
    369 static constexpr Register fp{Registers::x29};
    370 static constexpr Register lr{Registers::x30};
    371 static constexpr Register rzr{Registers::xzr};
    372 
    373 // Import VIXL registers into the js::jit namespace.
    374 #define IMPORT_VIXL_REGISTERS(N)                  \
    375  static constexpr ARMRegister w##N = vixl::w##N; \
    376  static constexpr ARMRegister x##N = vixl::x##N;
    377 REGISTER_CODE_LIST(IMPORT_VIXL_REGISTERS)
    378 #undef IMPORT_VIXL_REGISTERS
    379 static constexpr ARMRegister wzr = vixl::wzr;
    380 static constexpr ARMRegister xzr = vixl::xzr;
    381 static constexpr ARMRegister wsp = vixl::wsp;
    382 static constexpr ARMRegister sp = vixl::sp;
    383 
    384 // Import VIXL VRegisters into the js::jit namespace.
    385 #define IMPORT_VIXL_VREGISTERS(N)                   \
    386  static constexpr ARMFPRegister s##N = vixl::s##N; \
    387  static constexpr ARMFPRegister d##N = vixl::d##N;
    388 REGISTER_CODE_LIST(IMPORT_VIXL_VREGISTERS)
    389 #undef IMPORT_VIXL_VREGISTERS
    390 
    391 static constexpr ValueOperand JSReturnOperand = ValueOperand(JSReturnReg);
    392 
    393 // Registers used by RegExpMatcher and RegExpExecMatch stubs (do not use
    394 // JSReturnOperand).
    395 static constexpr Register RegExpMatcherRegExpReg = CallTempReg0;
    396 static constexpr Register RegExpMatcherStringReg = CallTempReg1;
    397 static constexpr Register RegExpMatcherLastIndexReg = CallTempReg2;
    398 
    399 // Registers used by RegExpExecTest stub (do not use ReturnReg).
    400 static constexpr Register RegExpExecTestRegExpReg = CallTempReg0;
    401 static constexpr Register RegExpExecTestStringReg = CallTempReg1;
    402 
    403 // Registers used by RegExpSearcher stub (do not use ReturnReg).
    404 static constexpr Register RegExpSearcherRegExpReg = CallTempReg0;
    405 static constexpr Register RegExpSearcherStringReg = CallTempReg1;
    406 static constexpr Register RegExpSearcherLastIndexReg = CallTempReg2;
    407 
    408 static constexpr Register JSReturnReg_Type = r3;
    409 static constexpr Register JSReturnReg_Data = r2;
    410 
    411 static constexpr FloatRegister NANReg = {FloatRegisters::d14,
    412                                         FloatRegisters::Single};
    413 // N.B. r8 isn't listed as an aapcs temp register, but we can use it as such
    414 // because we never use return-structs.
    415 static constexpr Register CallTempNonArgRegs[] = {r8,  r9,  r10, r11,
    416                                                  r12, r13, r14, r15};
    417 static const uint32_t NumCallTempNonArgRegs = std::size(CallTempNonArgRegs);
    418 
    419 static constexpr uint32_t JitStackAlignment = 16;
    420 
    421 static constexpr uint32_t JitStackValueAlignment =
    422    JitStackAlignment / sizeof(Value);
    423 static_assert(JitStackAlignment % sizeof(Value) == 0 &&
    424                  JitStackValueAlignment >= 1,
    425              "Stack alignment should be a non-zero multiple of sizeof(Value)");
    426 
    427 static constexpr uint32_t SimdMemoryAlignment = 16;
    428 
    429 static_assert(CodeAlignment % SimdMemoryAlignment == 0,
    430              "Code alignment should be larger than any of the alignments "
    431              "which are used for "
    432              "the constant sections of the code buffer.  Thus it should be "
    433              "larger than the "
    434              "alignment for SIMD constants.");
    435 
    436 static const uint32_t WasmStackAlignment = SimdMemoryAlignment;
    437 static const uint32_t WasmTrapInstructionLength = 4;
    438 
    439 // See comments in wasm::GenerateFunctionPrologue.  The difference between these
    440 // is the size of the largest callable prologue on the platform.
    441 static constexpr uint32_t WasmCheckedCallEntryOffset = 0u;
    442 
    443 class Assembler : public vixl::Assembler {
    444 public:
    445  Assembler() : vixl::Assembler() {}
    446 
    447  using Condition = vixl::Condition;
    448 
    449  void finish();
    450  bool appendRawCode(const uint8_t* code, size_t numBytes);
    451  bool reserve(size_t size);
    452  bool swapBuffer(wasm::Bytes& bytes);
    453 
    454  // Emit the jump table, returning the BufferOffset to the first entry in the
    455  // table.
    456  BufferOffset emitExtendedJumpTable();
    457  BufferOffset ExtendedJumpTable_;
    458  void executableCopy(uint8_t* buffer);
    459 
    460  BufferOffset immPool(ARMRegister dest, uint8_t* value, vixl::LoadLiteralOp op,
    461                       const LiteralDoc& doc,
    462                       ARMBuffer::PoolEntry* pe = nullptr);
    463  BufferOffset immPool64(ARMRegister dest, uint64_t value,
    464                         ARMBuffer::PoolEntry* pe = nullptr);
    465  BufferOffset fImmPool(ARMFPRegister dest, uint8_t* value,
    466                        vixl::LoadLiteralOp op, const LiteralDoc& doc);
    467  BufferOffset fImmPool64(ARMFPRegister dest, double value);
    468  BufferOffset fImmPool32(ARMFPRegister dest, float value);
    469 
    470  uint32_t currentOffset() const { return nextOffset().getOffset(); }
    471 
    472  void bind(Label* label) { bind(label, nextOffset()); }
    473  void bind(Label* label, BufferOffset boff);
    474  void bind(CodeLabel* label) { label->target()->bind(currentOffset()); }
    475 
    476  void setUnlimitedBuffer() { armbuffer_.setUnlimited(); }
    477  bool oom() const {
    478    return AssemblerShared::oom() || armbuffer_.oom() ||
    479           jumpRelocations_.oom() || dataRelocations_.oom();
    480  }
    481 
    482  void copyJumpRelocationTable(uint8_t* dest) const {
    483    if (jumpRelocations_.length()) {
    484      memcpy(dest, jumpRelocations_.buffer(), jumpRelocations_.length());
    485    }
    486  }
    487  void copyDataRelocationTable(uint8_t* dest) const {
    488    if (dataRelocations_.length()) {
    489      memcpy(dest, dataRelocations_.buffer(), dataRelocations_.length());
    490    }
    491  }
    492 
    493  size_t jumpRelocationTableBytes() const { return jumpRelocations_.length(); }
    494  size_t dataRelocationTableBytes() const { return dataRelocations_.length(); }
    495  size_t bytesNeeded() const {
    496    return SizeOfCodeGenerated() + jumpRelocationTableBytes() +
    497           dataRelocationTableBytes();
    498  }
    499 
    500  void processCodeLabels(uint8_t* rawCode) {
    501    for (const CodeLabel& label : codeLabels_) {
    502      Bind(rawCode, label);
    503    }
    504  }
    505 
    506  static void UpdateLoad64Value(Instruction* inst0, uint64_t value);
    507 
    508  static void Bind(uint8_t* rawCode, const CodeLabel& label) {
    509    auto mode = label.linkMode();
    510    size_t patchAtOffset = label.patchAt().offset();
    511    size_t targetOffset = label.target().offset();
    512 
    513    if (mode == CodeLabel::MoveImmediate) {
    514      Instruction* inst = (Instruction*)(rawCode + patchAtOffset);
    515      Assembler::UpdateLoad64Value(inst, (uint64_t)(rawCode + targetOffset));
    516    } else {
    517      *reinterpret_cast<const void**>(rawCode + patchAtOffset) =
    518          rawCode + targetOffset;
    519    }
    520  }
    521 
    522  void retarget(Label* cur, Label* next);
    523 
    524  // The buffer is about to be linked. Ensure any constant pools or
    525  // excess bookkeeping has been flushed to the instruction stream.
    526  void flush() { armbuffer_.flushPool(); }
    527 
    528  void comment(const char* msg) {
    529 #ifdef JS_DISASM_ARM64
    530    spew_.spew("; %s", msg);
    531 #endif
    532  }
    533 
    534  void setPrinter(Sprinter* sp) {
    535 #ifdef JS_DISASM_ARM64
    536    spew_.setPrinter(sp);
    537 #endif
    538  }
    539 
    540  static bool SupportsFloatingPoint() { return true; }
    541  static bool SupportsUnalignedAccesses() { return true; }
    542  static bool SupportsFastUnalignedFPAccesses() { return true; }
    543  static bool SupportsWasmSimd() { return true; }
    544  static bool SupportsFloat64To16() { return true; }
    545  static bool SupportsFloat32To16() { return true; }
    546 
    547  static bool HasRoundInstruction(RoundingMode mode) {
    548    switch (mode) {
    549      case RoundingMode::Up:
    550      case RoundingMode::Down:
    551      case RoundingMode::NearestTiesToEven:
    552      case RoundingMode::TowardsZero:
    553        return true;
    554    }
    555    MOZ_CRASH("unexpected mode");
    556  }
    557 
    558 protected:
    559  // Add a jump whose target is unknown until finalization.
    560  // The jump may not be patched at runtime.
    561  void addPendingJump(BufferOffset src, ImmPtr target, RelocationKind kind);
    562 
    563 public:
    564  static uint32_t PatchWrite_NearCallSize() { return 4; }
    565 
    566  static uint32_t NopSize() { return 4; }
    567 
    568  static void PatchWrite_NearCall(CodeLocationLabel start,
    569                                  CodeLocationLabel toCall);
    570  static void PatchDataWithValueCheck(CodeLocationLabel label,
    571                                      PatchedImmPtr newValue,
    572                                      PatchedImmPtr expected);
    573 
    574  static void PatchDataWithValueCheck(CodeLocationLabel label, ImmPtr newValue,
    575                                      ImmPtr expected);
    576 
    577  static void PatchWrite_Imm32(CodeLocationLabel label, Imm32 imm) {
    578    // Raw is going to be the return address.
    579    uint32_t* raw = (uint32_t*)label.raw();
    580    // Overwrite the 4 bytes before the return address, which will end up being
    581    // the call instruction.
    582    *(raw - 1) = imm.value;
    583  }
    584  static uint32_t AlignDoubleArg(uint32_t offset) {
    585    MOZ_CRASH("AlignDoubleArg()");
    586  }
    587  static uintptr_t GetPointer(uint8_t* ptr) {
    588    Instruction* i = reinterpret_cast<Instruction*>(ptr);
    589    uint64_t ret = i->Literal64();
    590    return ret;
    591  }
    592 
    593  // Toggle a jmp or cmp emitted by toggledJump().
    594  static void ToggleToJmp(CodeLocationLabel inst_);
    595  static void ToggleToCmp(CodeLocationLabel inst_);
    596  static void ToggleCall(CodeLocationLabel inst_, bool enabled);
    597 
    598  static void TraceJumpRelocations(JSTracer* trc, JitCode* code,
    599                                   CompactBufferReader& reader);
    600  static void TraceDataRelocations(JSTracer* trc, JitCode* code,
    601                                   CompactBufferReader& reader);
    602 
    603  void assertNoGCThings() const {
    604 #ifdef DEBUG
    605    MOZ_ASSERT(dataRelocations_.length() == 0);
    606    for (auto& j : pendingJumps_) {
    607      MOZ_ASSERT(j.kind == RelocationKind::HARDCODED);
    608    }
    609 #endif
    610  }
    611 
    612 public:
    613  // A Jump table entry is 2 instructions, with 8 bytes of raw data
    614  static const size_t SizeOfJumpTableEntry = 16;
    615 
    616  struct JumpTableEntry {
    617    uint32_t ldr;
    618    uint32_t br;
    619    void* data;
    620 
    621    Instruction* getLdr() { return reinterpret_cast<Instruction*>(&ldr); }
    622  };
    623 
    624  // Offset of the patchable target for the given entry.
    625  static const size_t OffsetOfJumpTableEntryPointer = 8;
    626 
    627 public:
    628  void writeCodePointer(CodeLabel* label) {
    629    armbuffer_.assertNoPoolAndNoNops();
    630    uintptr_t x = uintptr_t(-1);
    631    BufferOffset off = EmitData(&x, sizeof(uintptr_t));
    632    label->patchAt()->bind(off.getOffset());
    633  }
    634 
    635  void verifyHeapAccessDisassembly(uint32_t begin, uint32_t end,
    636                                   const Disassembler::HeapAccess& heapAccess) {
    637    MOZ_CRASH("verifyHeapAccessDisassembly");
    638  }
    639 
    640 protected:
    641  // Structure for fixing up pc-relative loads/jumps when the machine
    642  // code gets moved (executable copy, gc, etc.).
    643  struct RelativePatch {
    644    BufferOffset offset;
    645    void* target;
    646    RelocationKind kind;
    647 
    648    RelativePatch(BufferOffset offset, void* target, RelocationKind kind)
    649        : offset(offset), target(target), kind(kind) {}
    650  };
    651 
    652  // List of jumps for which the target is either unknown until finalization,
    653  // or cannot be known due to GC. Each entry here requires a unique entry
    654  // in the extended jump table, and is patched at finalization.
    655  js::Vector<RelativePatch, 8, SystemAllocPolicy> pendingJumps_;
    656 
    657  // Final output formatters.
    658  CompactBufferWriter jumpRelocations_;
    659  CompactBufferWriter dataRelocations_;
    660 };
    661 
    662 static const uint32_t NumIntArgRegs = 8;
    663 static const uint32_t NumFloatArgRegs = 8;
    664 
    665 class ABIArgGenerator : public ABIArgGeneratorShared {
    666 public:
    667  explicit ABIArgGenerator(ABIKind kind)
    668      : ABIArgGeneratorShared(kind),
    669        intRegIndex_(0),
    670        floatRegIndex_(0),
    671        current_() {}
    672 
    673  ABIArg next(MIRType argType);
    674  ABIArg& current() { return current_; }
    675 
    676 protected:
    677  unsigned intRegIndex_;
    678  unsigned floatRegIndex_;
    679  ABIArg current_;
    680 };
    681 
    682 // See "ABI special registers" in Assembler-shared.h for more information.
    683 static constexpr Register ABINonArgReg0 = r8;
    684 static constexpr Register ABINonArgReg1 = r9;
    685 static constexpr Register ABINonArgReg2 = r10;
    686 static constexpr Register ABINonArgReg3 = r11;
    687 
    688 // See "ABI special registers" in Assembler-shared.h for more information.
    689 // Avoid d31 which is the ScratchDoubleReg_.
    690 static constexpr FloatRegister ABINonArgDoubleReg = {FloatRegisters::s16,
    691                                                     FloatRegisters::Single};
    692 
    693 // See "ABI special registers" in Assembler-shared.h for more information.
    694 static constexpr Register ABINonArgReturnReg0 = r8;
    695 static constexpr Register ABINonArgReturnReg1 = r9;
    696 static constexpr Register ABINonVolatileReg{Registers::x19};
    697 
    698 // See "ABI special registers" in Assembler-shared.h for more information.
    699 static constexpr Register ABINonArgReturnVolatileReg = r8;
    700 
    701 // See "ABI special registers" in Assembler-shared.h, and "The WASM ABIs" in
    702 // WasmFrame.h for more information.
    703 static constexpr Register InstanceReg{Registers::x23};
    704 static constexpr Register HeapReg{Registers::x21};
    705 
    706 // Registers used for wasm table calls. These registers must be disjoint
    707 // from the ABI argument registers, InstanceReg and each other.
    708 static constexpr Register WasmTableCallScratchReg0 = ABINonArgReg0;
    709 static constexpr Register WasmTableCallScratchReg1 = ABINonArgReg1;
    710 static constexpr Register WasmTableCallSigReg = ABINonArgReg2;
    711 static constexpr Register WasmTableCallIndexReg = ABINonArgReg3;
    712 
    713 // Registers used for ref calls.
    714 static constexpr Register WasmCallRefCallScratchReg0 = ABINonArgReg0;
    715 static constexpr Register WasmCallRefCallScratchReg1 = ABINonArgReg1;
    716 static constexpr Register WasmCallRefCallScratchReg2 = ABINonArgReg2;
    717 static constexpr Register WasmCallRefReg = ABINonArgReg3;
    718 
    719 // Registers used for wasm tail calls operations.
    720 static constexpr Register WasmTailCallInstanceScratchReg = ABINonArgReg1;
    721 static constexpr Register WasmTailCallRAScratchReg = lr;
    722 static constexpr Register WasmTailCallFPScratchReg = ABINonArgReg3;
    723 
    724 // Register used as a scratch along the return path in the fast js -> wasm stub
    725 // code.  This must not overlap ReturnReg, JSReturnOperand, or InstanceReg.
    726 // It must be a volatile register.
    727 static constexpr Register WasmJitEntryReturnScratch = r9;
    728 
    729 static inline bool GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
    730                                Register* out) {
    731  if (usedIntArgs >= NumIntArgRegs) {
    732    return false;
    733  }
    734  *out = Register::FromCode(usedIntArgs);
    735  return true;
    736 }
    737 
    738 static inline bool GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs,
    739                                  FloatRegister* out) {
    740  if (usedFloatArgs >= NumFloatArgRegs) {
    741    return false;
    742  }
    743  *out = FloatRegister::FromCode(usedFloatArgs);
    744  return true;
    745 }
    746 
    747 // Get a register in which we plan to put a quantity that will be used as an
    748 // integer argument.  This differs from GetIntArgReg in that if we have no more
    749 // actual argument registers to use we will fall back on using whatever
    750 // CallTempReg* don't overlap the argument registers, and only fail once those
    751 // run out too.
    752 static inline bool GetTempRegForIntArg(uint32_t usedIntArgs,
    753                                       uint32_t usedFloatArgs, Register* out) {
    754  if (GetIntArgReg(usedIntArgs, usedFloatArgs, out)) {
    755    return true;
    756  }
    757  // Unfortunately, we have to assume things about the point at which
    758  // GetIntArgReg returns false, because we need to know how many registers it
    759  // can allocate.
    760  usedIntArgs -= NumIntArgRegs;
    761  if (usedIntArgs >= NumCallTempNonArgRegs) {
    762    return false;
    763  }
    764  *out = CallTempNonArgRegs[usedIntArgs];
    765  return true;
    766 }
    767 
    768 // Forbids nop filling for testing purposes.  Nestable, but nested calls have
    769 // no effect on the no-nops status; it is only the top level one that counts.
    770 class AutoForbidNops {
    771 protected:
    772  Assembler* asm_;
    773 
    774 public:
    775  explicit AutoForbidNops(Assembler* asm_) : asm_(asm_) { asm_->enterNoNops(); }
    776  ~AutoForbidNops() { asm_->leaveNoNops(); }
    777 };
    778 
    779 // Forbids pool generation during a specified interval.  Nestable, but nested
    780 // calls must imply a no-pool area of the assembler buffer that is completely
    781 // contained within the area implied by the outermost level call.
    782 class AutoForbidPoolsAndNops : public AutoForbidNops {
    783 public:
    784  AutoForbidPoolsAndNops(Assembler* asm_, size_t maxInst)
    785      : AutoForbidNops(asm_) {
    786    asm_->enterNoPool(maxInst);
    787  }
    788  ~AutoForbidPoolsAndNops() { asm_->leaveNoPool(); }
    789 };
    790 
    791 }  // namespace jit
    792 }  // namespace js
    793 
    794 #endif  // A64_ASSEMBLER_A64_H_
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE