Assembler-arm64.cpp (21106B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/arm64/Assembler-arm64.h" 8 9 #include "mozilla/DebugOnly.h" 10 #include "mozilla/MathAlgorithms.h" 11 #include "mozilla/Maybe.h" 12 13 #include "gc/Marking.h" 14 #include "jit/arm64/Architecture-arm64.h" 15 #include "jit/arm64/MacroAssembler-arm64.h" 16 #include "jit/arm64/vixl/Disasm-vixl.h" 17 #include "jit/AutoWritableJitCode.h" 18 #include "jit/ExecutableAllocator.h" 19 #include "vm/Realm.h" 20 #include "wasm/WasmFrame.h" 21 22 #include "gc/StoreBuffer-inl.h" 23 24 using namespace js; 25 using namespace js::jit; 26 27 using mozilla::CountLeadingZeroes32; 28 using mozilla::DebugOnly; 29 30 ABIArg ABIArgGenerator::next(MIRType type) { 31 switch (type) { 32 case MIRType::Int32: 33 case MIRType::Int64: 34 case MIRType::Pointer: 35 case MIRType::WasmAnyRef: 36 case MIRType::WasmArrayData: 37 case MIRType::StackResults: 38 if (intRegIndex_ == NumIntArgRegs) { 39 current_ = ABIArg(stackOffset_); 40 stackOffset_ += sizeof(uintptr_t); 41 break; 42 } 43 current_ = ABIArg(Register::FromCode(intRegIndex_)); 44 intRegIndex_++; 45 break; 46 47 case MIRType::Float32: 48 case MIRType::Double: 49 if (floatRegIndex_ == NumFloatArgRegs) { 50 current_ = ABIArg(stackOffset_); 51 stackOffset_ += sizeof(double); 52 break; 53 } 54 current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_), 55 type == MIRType::Double 56 ? FloatRegisters::Double 57 : FloatRegisters::Single)); 58 floatRegIndex_++; 59 break; 60 61 #ifdef ENABLE_WASM_SIMD 62 case MIRType::Simd128: 63 if (floatRegIndex_ == NumFloatArgRegs) { 64 stackOffset_ = AlignBytes(stackOffset_, SimdMemoryAlignment); 65 current_ = ABIArg(stackOffset_); 66 stackOffset_ += FloatRegister::SizeOfSimd128; 67 break; 68 } 69 current_ = ABIArg(FloatRegister(FloatRegisters::Encoding(floatRegIndex_), 70 FloatRegisters::Simd128)); 71 floatRegIndex_++; 72 break; 73 #endif 74 75 default: 76 // Note that in Assembler-x64.cpp there's a special case for Win64 which 77 // does not allow passing SIMD by value. Since there's Win64 on ARM64 we 78 // may need to duplicate that logic here. 79 MOZ_CRASH("Unexpected argument type"); 80 } 81 return current_; 82 } 83 84 namespace js { 85 namespace jit { 86 87 void Assembler::finish() { 88 armbuffer_.flushPool(); 89 90 // The extended jump table is part of the code buffer. 91 ExtendedJumpTable_ = emitExtendedJumpTable(); 92 Assembler::FinalizeCode(); 93 } 94 95 bool Assembler::appendRawCode(const uint8_t* code, size_t numBytes) { 96 flush(); 97 return armbuffer_.appendRawCode(code, numBytes); 98 } 99 100 bool Assembler::reserve(size_t size) { 101 // This buffer uses fixed-size chunks so there's no point in reserving 102 // now vs. on-demand. 103 return !oom(); 104 } 105 106 bool Assembler::swapBuffer(wasm::Bytes& bytes) { 107 // For now, specialize to the one use case. As long as wasm::Bytes is a 108 // Vector, not a linked-list of chunks, there's not much we can do other 109 // than copy. 110 MOZ_ASSERT(bytes.empty()); 111 if (!bytes.resize(bytesNeeded())) { 112 return false; 113 } 114 armbuffer_.executableCopy(bytes.begin()); 115 return true; 116 } 117 118 BufferOffset Assembler::emitExtendedJumpTable() { 119 if (!pendingJumps_.length() || oom()) { 120 return BufferOffset(); 121 } 122 123 armbuffer_.flushPool(); 124 armbuffer_.align(SizeOfJumpTableEntry); 125 126 BufferOffset tableOffset = armbuffer_.nextOffset(); 127 128 for (size_t i = 0; i < pendingJumps_.length(); i++) { 129 // Each JumpTableEntry is of the form: 130 // LDR ip0 [PC, 8] 131 // BR ip0 132 // [Patchable 8-byte constant low bits] 133 // [Patchable 8-byte constant high bits] 134 DebugOnly<size_t> preOffset = size_t(armbuffer_.nextOffset().getOffset()); 135 136 // The unguarded use of ScratchReg64 here is OK: 137 // 138 // - The present function is called from code that does not claim any 139 // scratch registers, we're done compiling user code and are emitting jump 140 // tables. Hence the scratch registers are available when we enter. 141 // 142 // - The pendingJumps_ represent jumps to other code sections that are not 143 // known to this MacroAssembler instance, and we're generating code to 144 // jump there. It is safe to assume that any code using such a generated 145 // branch to an unknown location did not store any valuable value in any 146 // scratch register. Hence the scratch registers can definitely be 147 // clobbered here. 148 // 149 // - Scratch register usage is restricted to sequential control flow within 150 // MacroAssembler functions. Hence the scratch registers will not be 151 // clobbered by ldr and br as they are Assembler primitives, not 152 // MacroAssembler functions. 153 154 ldr(ScratchReg64, ptrdiff_t(8 / vixl::kInstructionSize)); 155 br(ScratchReg64); 156 157 DebugOnly<size_t> prePointer = size_t(armbuffer_.nextOffset().getOffset()); 158 MOZ_ASSERT_IF(!oom(), 159 prePointer - preOffset == OffsetOfJumpTableEntryPointer); 160 161 brk(0x0); 162 brk(0x0); 163 164 DebugOnly<size_t> postOffset = size_t(armbuffer_.nextOffset().getOffset()); 165 166 MOZ_ASSERT_IF(!oom(), postOffset - preOffset == SizeOfJumpTableEntry); 167 } 168 169 if (oom()) { 170 return BufferOffset(); 171 } 172 173 return tableOffset; 174 } 175 176 void Assembler::executableCopy(uint8_t* buffer) { 177 // Copy the code and all constant pools into the output buffer. 178 armbuffer_.executableCopy(buffer); 179 180 // Patch any relative jumps that target code outside the buffer. 181 // The extended jump table may be used for distant jumps. 182 for (size_t i = 0; i < pendingJumps_.length(); i++) { 183 RelativePatch& rp = pendingJumps_[i]; 184 MOZ_ASSERT(rp.target); 185 186 Instruction* target = (Instruction*)rp.target; 187 Instruction* branch = (Instruction*)(buffer + rp.offset.getOffset()); 188 JumpTableEntry* extendedJumpTable = reinterpret_cast<JumpTableEntry*>( 189 buffer + ExtendedJumpTable_.getOffset()); 190 if (branch->BranchType() != vixl::UnknownBranchType) { 191 if (branch->IsTargetReachable(target)) { 192 branch->SetImmPCOffsetTarget(target); 193 } else { 194 JumpTableEntry* entry = &extendedJumpTable[i]; 195 branch->SetImmPCOffsetTarget(entry->getLdr()); 196 entry->data = target; 197 } 198 } else { 199 // Currently a two-instruction call, it should be possible to optimize 200 // this into a single instruction call + nop in some instances, but this 201 // will work. 202 } 203 } 204 } 205 206 BufferOffset Assembler::immPool(ARMRegister dest, uint8_t* value, 207 vixl::LoadLiteralOp op, const LiteralDoc& doc, 208 ARMBuffer::PoolEntry* pe) { 209 uint32_t inst = op | Rt(dest); 210 const size_t numInst = 1; 211 const unsigned sizeOfPoolEntryInBytes = 4; 212 const unsigned numPoolEntries = sizeof(value) / sizeOfPoolEntryInBytes; 213 return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value, 214 doc, pe); 215 } 216 217 BufferOffset Assembler::immPool64(ARMRegister dest, uint64_t value, 218 ARMBuffer::PoolEntry* pe) { 219 return immPool(dest, (uint8_t*)&value, vixl::LDR_x_lit, LiteralDoc(value), 220 pe); 221 } 222 223 BufferOffset Assembler::fImmPool(ARMFPRegister dest, uint8_t* value, 224 vixl::LoadLiteralOp op, 225 const LiteralDoc& doc) { 226 uint32_t inst = op | Rt(dest); 227 const size_t numInst = 1; 228 const unsigned sizeOfPoolEntryInBits = 32; 229 const unsigned numPoolEntries = dest.size() / sizeOfPoolEntryInBits; 230 return allocLiteralLoadEntry(numInst, numPoolEntries, (uint8_t*)&inst, value, 231 doc); 232 } 233 234 BufferOffset Assembler::fImmPool64(ARMFPRegister dest, double value) { 235 return fImmPool(dest, (uint8_t*)&value, vixl::LDR_d_lit, LiteralDoc(value)); 236 } 237 238 BufferOffset Assembler::fImmPool32(ARMFPRegister dest, float value) { 239 return fImmPool(dest, (uint8_t*)&value, vixl::LDR_s_lit, LiteralDoc(value)); 240 } 241 242 void Assembler::bind(Label* label, BufferOffset targetOffset) { 243 #ifdef JS_DISASM_ARM64 244 spew_.spewBind(label); 245 #endif 246 // Nothing has seen the label yet: just mark the location. 247 // If we've run out of memory, don't attempt to modify the buffer which may 248 // not be there. Just mark the label as bound to the (possibly bogus) 249 // targetOffset. 250 if (!label->used() || oom()) { 251 label->bind(targetOffset.getOffset()); 252 return; 253 } 254 255 // Get the most recent instruction that used the label, as stored in the 256 // label. This instruction is the head of an implicit linked list of label 257 // uses. 258 BufferOffset branchOffset(label); 259 260 while (branchOffset.assigned()) { 261 // Before overwriting the offset in this instruction, get the offset of 262 // the next link in the implicit branch list. 263 BufferOffset nextOffset = NextLink(branchOffset); 264 265 // Linking against the actual (Instruction*) would be invalid, 266 // since that Instruction could be anywhere in memory. 267 // Instead, just link against the correct relative offset, assuming 268 // no constant pools, which will be taken into consideration 269 // during finalization. 270 ptrdiff_t relativeByteOffset = 271 targetOffset.getOffset() - branchOffset.getOffset(); 272 Instruction* link = getInstructionAt(branchOffset); 273 274 // This branch may still be registered for callbacks. Stop tracking it. 275 vixl::ImmBranchType branchType = link->BranchType(); 276 vixl::ImmBranchRangeType branchRange = 277 Instruction::ImmBranchTypeToRange(branchType); 278 if (branchRange < vixl::NumShortBranchRangeTypes) { 279 BufferOffset deadline( 280 branchOffset.getOffset() + 281 Instruction::ImmBranchMaxForwardOffset(branchRange)); 282 armbuffer_.unregisterBranchDeadline(branchRange, deadline); 283 } 284 285 // Is link able to reach the label? 286 if (link->IsPCRelAddressing() || 287 link->IsTargetReachable(link + relativeByteOffset)) { 288 // Write a new relative offset into the instruction. 289 link->SetImmPCOffsetTarget(link + relativeByteOffset); 290 } else { 291 // This is a short-range branch, and it can't reach the label directly. 292 // Verify that it branches to a veneer: an unconditional branch. 293 MOZ_ASSERT(getInstructionAt(nextOffset)->BranchType() == 294 vixl::UncondBranchType); 295 } 296 297 branchOffset = nextOffset; 298 } 299 300 // Bind the label, so that future uses may encode the offset immediately. 301 label->bind(targetOffset.getOffset()); 302 } 303 304 void Assembler::addPendingJump(BufferOffset src, ImmPtr target, 305 RelocationKind reloc) { 306 MOZ_ASSERT(target.value != nullptr); 307 308 if (reloc == RelocationKind::JITCODE) { 309 jumpRelocations_.writeUnsigned(src.getOffset()); 310 } 311 312 // This jump is not patchable at runtime. Extended jump table entry 313 // requirements cannot be known until finalization, so to be safe, give each 314 // jump and entry. This also causes GC tracing of the target. 315 enoughMemory_ &= 316 pendingJumps_.append(RelativePatch(src, target.value, reloc)); 317 } 318 319 void Assembler::PatchWrite_NearCall(CodeLocationLabel start, 320 CodeLocationLabel toCall) { 321 Instruction* dest = (Instruction*)start.raw(); 322 ptrdiff_t relTarget = (Instruction*)toCall.raw() - dest; 323 ptrdiff_t relTarget00 = relTarget >> 2; 324 MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); 325 MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); 326 327 bl(dest, relTarget00); 328 } 329 330 void Assembler::PatchDataWithValueCheck(CodeLocationLabel label, 331 PatchedImmPtr newValue, 332 PatchedImmPtr expected) { 333 Instruction* i = (Instruction*)label.raw(); 334 void** pValue = i->LiteralAddress<void**>(); 335 MOZ_ASSERT(*pValue == expected.value); 336 *pValue = newValue.value; 337 } 338 339 void Assembler::PatchDataWithValueCheck(CodeLocationLabel label, 340 ImmPtr newValue, ImmPtr expected) { 341 PatchDataWithValueCheck(label, PatchedImmPtr(newValue.value), 342 PatchedImmPtr(expected.value)); 343 } 344 345 void Assembler::ToggleToJmp(CodeLocationLabel inst_) { 346 Instruction* i = (Instruction*)inst_.raw(); 347 MOZ_ASSERT(i->IsAddSubImmediate()); 348 349 // Refer to instruction layout in ToggleToCmp(). 350 int imm19 = (int)i->Bits(23, 5); 351 MOZ_ASSERT(vixl::IsInt19(imm19)); 352 353 b(i, imm19, Always); 354 } 355 356 void Assembler::ToggleToCmp(CodeLocationLabel inst_) { 357 Instruction* i = (Instruction*)inst_.raw(); 358 MOZ_ASSERT(i->IsCondB()); 359 360 int imm19 = i->ImmCondBranch(); 361 // bit 23 is reserved, and the simulator throws an assertion when this happens 362 // It'll be messy to decode, but we can steal bit 30 or bit 31. 363 MOZ_ASSERT(vixl::IsInt18(imm19)); 364 365 // 31 - 64-bit if set, 32-bit if unset. (OK!) 366 // 30 - sub if set, add if unset. (OK!) 367 // 29 - SetFlagsBit. Must be set. 368 // 22:23 - ShiftAddSub. (OK!) 369 // 10:21 - ImmAddSub. (OK!) 370 // 5:9 - First source register (Rn). (OK!) 371 // 0:4 - Destination Register. Must be xzr. 372 373 // From the above, there is a safe 19-bit contiguous region from 5:23. 374 Emit(i, vixl::ThirtyTwoBits | vixl::AddSubImmediateFixed | vixl::SUB | 375 Flags(vixl::SetFlags) | Rd(vixl::xzr) | 376 (imm19 << vixl::Rn_offset)); 377 } 378 379 void Assembler::ToggleCall(CodeLocationLabel inst_, bool enabled) { 380 const Instruction* first = reinterpret_cast<Instruction*>(inst_.raw()); 381 Instruction* load; 382 Instruction* call; 383 384 // There might be a constant pool at the very first instruction. 385 first = first->skipPool(); 386 387 // Skip the stack pointer restore instruction. 388 if (first->IsStackPtrSync()) { 389 first = first->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); 390 } 391 392 load = const_cast<Instruction*>(first); 393 394 // The call instruction follows the load, but there may be an injected 395 // constant pool. 396 call = const_cast<Instruction*>( 397 load->InstructionAtOffset(vixl::kInstructionSize)->skipPool()); 398 399 if (call->IsBLR() == enabled) { 400 return; 401 } 402 403 if (call->IsBLR()) { 404 // If the second instruction is blr(), then we have: 405 // ldr x17, [pc, offset] 406 // blr x17 407 MOZ_ASSERT(load->IsLDR()); 408 // We want to transform this to: 409 // adr xzr, [pc, offset] 410 // nop 411 int32_t offset = load->ImmLLiteral(); 412 adr(load, xzr, int32_t(offset)); 413 nop(call); 414 } else { 415 // We have: 416 // adr xzr, [pc, offset] (or ldr x17, [pc, offset]) 417 // nop 418 MOZ_ASSERT(load->IsADR() || load->IsLDR()); 419 MOZ_ASSERT(call->IsNOP()); 420 // Transform this to: 421 // ldr x17, [pc, offset] 422 // blr x17 423 int32_t offset = (int)load->ImmPCRawOffset(); 424 MOZ_ASSERT(vixl::IsInt19(offset)); 425 ldr(load, ScratchReg2_64, int32_t(offset)); 426 blr(call, ScratchReg2_64); 427 } 428 } 429 430 // Patches loads generated by MacroAssemblerCompat::mov(CodeLabel*, Register). 431 // The loading code is implemented in movePatchablePtr(). 432 void Assembler::UpdateLoad64Value(Instruction* inst0, uint64_t value) { 433 MOZ_ASSERT(inst0->IsLDR()); 434 uint64_t* literal = inst0->LiteralAddress<uint64_t*>(); 435 *literal = value; 436 } 437 438 class RelocationIterator { 439 CompactBufferReader reader_; 440 uint32_t offset_ = 0; 441 442 public: 443 explicit RelocationIterator(CompactBufferReader& reader) : reader_(reader) {} 444 445 bool read() { 446 if (!reader_.more()) { 447 return false; 448 } 449 offset_ = reader_.readUnsigned(); 450 return true; 451 } 452 453 uint32_t offset() const { return offset_; } 454 }; 455 456 static JitCode* CodeFromJump(JitCode* code, uint8_t* jump) { 457 const Instruction* inst = (const Instruction*)jump; 458 uint8_t* target; 459 460 // We're expecting a call created by MacroAssembler::call(JitCode*). 461 // It looks like: 462 // 463 // ldr scratch, [pc, offset] 464 // blr scratch 465 // 466 // If the call has been toggled by ToggleCall(), it looks like: 467 // 468 // adr xzr, [pc, offset] 469 // nop 470 // 471 // There might be a constant pool at the very first instruction. 472 // See also ToggleCall(). 473 inst = inst->skipPool(); 474 475 // Skip the stack pointer restore instruction. 476 if (inst->IsStackPtrSync()) { 477 inst = inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); 478 } 479 480 if (inst->BranchType() != vixl::UnknownBranchType) { 481 // This is an immediate branch. 482 target = (uint8_t*)inst->ImmPCOffsetTarget(); 483 } else if (inst->IsLDR()) { 484 // This is an ldr+blr call that is enabled. See ToggleCall(). 485 mozilla::DebugOnly<const Instruction*> nextInst = 486 inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); 487 MOZ_ASSERT(nextInst->IsNOP() || nextInst->IsBLR()); 488 target = (uint8_t*)inst->Literal64(); 489 } else if (inst->IsADR()) { 490 // This is a disabled call: adr+nop. See ToggleCall(). 491 mozilla::DebugOnly<const Instruction*> nextInst = 492 inst->InstructionAtOffset(vixl::kInstructionSize)->skipPool(); 493 MOZ_ASSERT(nextInst->IsNOP()); 494 ptrdiff_t offset = inst->ImmPCRawOffset() << vixl::kLiteralEntrySizeLog2; 495 // This is what Literal64 would do with the corresponding ldr. 496 memcpy(&target, inst + offset, sizeof(target)); 497 } else { 498 MOZ_CRASH("Unrecognized jump instruction."); 499 } 500 501 // If the jump is within the code buffer, it uses the extended jump table. 502 if (target >= code->raw() && 503 target < code->raw() + code->instructionsSize()) { 504 MOZ_ASSERT(target + Assembler::SizeOfJumpTableEntry <= 505 code->raw() + code->instructionsSize()); 506 507 uint8_t** patchablePtr = 508 (uint8_t**)(target + Assembler::OffsetOfJumpTableEntryPointer); 509 target = *patchablePtr; 510 } 511 512 return JitCode::FromExecutable(target); 513 } 514 515 void Assembler::TraceJumpRelocations(JSTracer* trc, JitCode* code, 516 CompactBufferReader& reader) { 517 RelocationIterator iter(reader); 518 while (iter.read()) { 519 JitCode* child = CodeFromJump(code, code->raw() + iter.offset()); 520 TraceManuallyBarrieredEdge(trc, &child, "rel32"); 521 MOZ_ASSERT(child == CodeFromJump(code, code->raw() + iter.offset())); 522 } 523 } 524 525 /* static */ 526 void Assembler::TraceDataRelocations(JSTracer* trc, JitCode* code, 527 CompactBufferReader& reader) { 528 mozilla::Maybe<AutoWritableJitCode> awjc; 529 530 uint8_t* buffer = code->raw(); 531 532 while (reader.more()) { 533 size_t offset = reader.readUnsigned(); 534 Instruction* load = (Instruction*)&buffer[offset]; 535 536 // The only valid traceable operation is a 64-bit load to an ARMRegister. 537 // Refer to movePatchablePtr() for generation. 538 MOZ_ASSERT(load->Mask(vixl::LoadLiteralMask) == vixl::LDR_x_lit); 539 540 uintptr_t* literalAddr = load->LiteralAddress<uintptr_t*>(); 541 uintptr_t literal = *literalAddr; 542 543 // Data relocations can be for Values or for raw pointers. If a Value is 544 // zero-tagged, we can trace it as if it were a raw pointer. If a Value 545 // is not zero-tagged, we have to interpret it as a Value to ensure that the 546 // tag bits are masked off to recover the actual pointer. 547 548 if (literal >> JSVAL_TAG_SHIFT) { 549 // This relocation is a Value with a non-zero tag. 550 Value v = Value::fromRawBits(literal); 551 TraceManuallyBarrieredEdge(trc, &v, "jit-masm-value"); 552 if (*literalAddr != v.asRawBits()) { 553 if (awjc.isNothing()) { 554 awjc.emplace(code); 555 } 556 *literalAddr = v.asRawBits(); 557 } 558 continue; 559 } 560 561 // This relocation is a raw pointer or a Value with a zero tag. 562 // No barriers needed since the pointers are constants. 563 gc::Cell* cell = reinterpret_cast<gc::Cell*>(literal); 564 MOZ_ASSERT(gc::IsCellPointerValid(cell)); 565 TraceManuallyBarrieredGenericPointerEdge(trc, &cell, "jit-masm-ptr"); 566 if (uintptr_t(cell) != literal) { 567 if (awjc.isNothing()) { 568 awjc.emplace(code); 569 } 570 *literalAddr = uintptr_t(cell); 571 } 572 } 573 } 574 575 void Assembler::retarget(Label* label, Label* target) { 576 #ifdef JS_DISASM_ARM64 577 spew_.spewRetarget(label, target); 578 #endif 579 if (label->used()) { 580 if (target->bound()) { 581 bind(label, BufferOffset(target)); 582 } else if (target->used()) { 583 // The target is not bound but used. Prepend label's branch list 584 // onto target's. 585 BufferOffset labelBranchOffset(label); 586 587 // Find the head of the use chain for label. 588 BufferOffset next = NextLink(labelBranchOffset); 589 while (next.assigned()) { 590 labelBranchOffset = next; 591 next = NextLink(next); 592 } 593 594 // Then patch the head of label's use chain to the tail of target's 595 // use chain, prepending the entire use chain of target. 596 SetNextLink(labelBranchOffset, BufferOffset(target)); 597 target->use(label->offset()); 598 } else { 599 // The target is unbound and unused. We can just take the head of 600 // the list hanging off of label, and dump that into target. 601 target->use(label->offset()); 602 } 603 } 604 label->reset(); 605 } 606 607 } // namespace jit 608 } // namespace js