MacroAssembler-arm64.cpp (136379B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * vim: set ts=8 sts=2 et sw=2 tw=80: 3 * This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "jit/arm64/MacroAssembler-arm64.h" 8 9 #include "mozilla/MathAlgorithms.h" 10 #include "mozilla/Maybe.h" 11 12 #include "jsmath.h" 13 14 #include "jit/arm64/MoveEmitter-arm64.h" 15 #include "jit/arm64/SharedICRegisters-arm64.h" 16 #include "jit/Bailouts.h" 17 #include "jit/BaselineFrame.h" 18 #include "jit/JitRuntime.h" 19 #include "jit/MacroAssembler.h" 20 #include "jit/ProcessExecutableMemory.h" 21 #include "util/Memory.h" 22 #include "vm/BigIntType.h" 23 #include "vm/JitActivation.h" // js::jit::JitActivation 24 #include "vm/JSContext.h" 25 #include "vm/StringType.h" 26 #include "wasm/WasmStubs.h" 27 28 #include "jit/MacroAssembler-inl.h" 29 30 namespace js { 31 namespace jit { 32 33 enum class Width { _32 = 32, _64 = 64 }; 34 35 static inline ARMRegister X(Register r) { return ARMRegister(r, 64); } 36 37 static inline ARMRegister X(MacroAssembler& masm, RegisterOrSP r) { 38 return masm.toARMRegister(r, 64); 39 } 40 41 static inline ARMRegister W(Register r) { return ARMRegister(r, 32); } 42 43 static inline ARMRegister R(Register r, Width w) { 44 return ARMRegister(r, unsigned(w)); 45 } 46 47 #ifdef DEBUG 48 static constexpr int32_t PayloadSize(JSValueType type) { 49 switch (type) { 50 case JSVAL_TYPE_UNDEFINED: 51 case JSVAL_TYPE_NULL: 52 return 0; 53 case JSVAL_TYPE_BOOLEAN: 54 return 1; 55 case JSVAL_TYPE_INT32: 56 case JSVAL_TYPE_MAGIC: 57 return 32; 58 case JSVAL_TYPE_STRING: 59 case JSVAL_TYPE_SYMBOL: 60 case JSVAL_TYPE_PRIVATE_GCTHING: 61 case JSVAL_TYPE_BIGINT: 62 case JSVAL_TYPE_OBJECT: 63 return JSVAL_TAG_SHIFT; 64 case JSVAL_TYPE_DOUBLE: 65 case JSVAL_TYPE_UNKNOWN: 66 break; 67 } 68 MOZ_CRASH("bad value type"); 69 } 70 #endif 71 72 static void AssertValidPayload(MacroAssemblerCompat& masm, JSValueType type, 73 Register payload, Register scratch) { 74 #ifdef DEBUG 75 // All bits above the payload must be zeroed. 76 Label upperBitsZeroed; 77 masm.Lsr(ARMRegister(scratch, 64), ARMRegister(payload, 64), 78 PayloadSize(type)); 79 masm.Cbz(ARMRegister(scratch, 64), &upperBitsZeroed); 80 masm.breakpoint(); 81 masm.bind(&upperBitsZeroed); 82 #endif 83 } 84 85 void MacroAssemblerCompat::tagValue(JSValueType type, Register payload, 86 ValueOperand dest) { 87 MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL); 88 89 #ifdef DEBUG 90 { 91 vixl::UseScratchRegisterScope temps(this); 92 Register scratch = temps.AcquireX().asUnsized(); 93 94 AssertValidPayload(*this, type, payload, scratch); 95 } 96 #endif 97 98 Orr(ARMRegister(dest.valueReg(), 64), ARMRegister(payload, 64), 99 Operand(ImmShiftedTag(type).value)); 100 } 101 102 void MacroAssemblerCompat::boxValue(JSValueType type, Register src, 103 Register dest) { 104 MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL); 105 MOZ_ASSERT(src != dest); 106 107 AssertValidPayload(*this, type, src, dest); 108 109 Orr(ARMRegister(dest, 64), ARMRegister(src, 64), 110 Operand(ImmShiftedTag(type).value)); 111 } 112 113 void MacroAssemblerCompat::boxValue(Register type, Register src, 114 Register dest) { 115 MOZ_ASSERT(src != dest); 116 117 #ifdef DEBUG 118 { 119 vixl::UseScratchRegisterScope temps(this); 120 Register scratch = temps.AcquireX().asUnsized(); 121 122 Label check, isNullOrUndefined, isBoolean, isInt32OrMagic, isPointerSized; 123 124 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_NULL), 125 &isNullOrUndefined); 126 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_UNDEFINED), 127 &isNullOrUndefined); 128 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BOOLEAN), 129 &isBoolean); 130 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_INT32), 131 &isInt32OrMagic); 132 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_MAGIC), 133 &isInt32OrMagic); 134 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_STRING), 135 &isPointerSized); 136 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_SYMBOL), 137 &isPointerSized); 138 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_PRIVATE_GCTHING), 139 &isPointerSized); 140 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BIGINT), 141 &isPointerSized); 142 asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_OBJECT), 143 &isPointerSized); 144 breakpoint(); 145 { 146 bind(&isNullOrUndefined); 147 move32(Imm32(PayloadSize(JSVAL_TYPE_NULL)), scratch); 148 jump(&check); 149 } 150 { 151 bind(&isBoolean); 152 move32(Imm32(PayloadSize(JSVAL_TYPE_BOOLEAN)), scratch); 153 jump(&check); 154 } 155 { 156 bind(&isInt32OrMagic); 157 move32(Imm32(PayloadSize(JSVAL_TYPE_INT32)), scratch); 158 jump(&check); 159 } 160 { 161 bind(&isPointerSized); 162 move32(Imm32(PayloadSize(JSVAL_TYPE_STRING)), scratch); 163 // fall-through 164 } 165 bind(&check); 166 167 // All bits above the payload must be zeroed. 168 Label upperBitsZeroed; 169 Lsr(ARMRegister(scratch, 64), ARMRegister(src, 64), 170 ARMRegister(scratch, 64)); 171 Cbz(ARMRegister(scratch, 64), &upperBitsZeroed); 172 breakpoint(); 173 bind(&upperBitsZeroed); 174 } 175 #endif 176 177 Orr(ARMRegister(dest, 64), ARMRegister(type, 64), 178 Operand(JSVAL_TAG_MAX_DOUBLE)); 179 Orr(ARMRegister(dest, 64), ARMRegister(src, 64), 180 Operand(ARMRegister(dest, 64), vixl::LSL, JSVAL_TAG_SHIFT)); 181 } 182 183 #ifdef ENABLE_WASM_SIMD 184 bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { 185 switch (op) { 186 case wasm::SimdOp::I8x16Shl: 187 case wasm::SimdOp::I8x16ShrU: 188 case wasm::SimdOp::I8x16ShrS: 189 *mask = 7; 190 break; 191 case wasm::SimdOp::I16x8Shl: 192 case wasm::SimdOp::I16x8ShrU: 193 case wasm::SimdOp::I16x8ShrS: 194 *mask = 15; 195 break; 196 case wasm::SimdOp::I32x4Shl: 197 case wasm::SimdOp::I32x4ShrU: 198 case wasm::SimdOp::I32x4ShrS: 199 *mask = 31; 200 break; 201 case wasm::SimdOp::I64x2Shl: 202 case wasm::SimdOp::I64x2ShrU: 203 case wasm::SimdOp::I64x2ShrS: 204 *mask = 63; 205 break; 206 default: 207 MOZ_CRASH("Unexpected shift operation"); 208 } 209 return true; 210 } 211 #endif 212 213 void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { 214 ARMRegister dest(output, 32); 215 Fcvtns(dest, ARMFPRegister(input, 64)); 216 217 { 218 vixl::UseScratchRegisterScope temps(this); 219 const ARMRegister scratch32 = temps.AcquireW(); 220 221 Mov(scratch32, Operand(0xff)); 222 Cmp(dest, scratch32); 223 Csel(dest, dest, scratch32, LessThan); 224 } 225 226 Cmp(dest, Operand(0)); 227 Csel(dest, dest, wzr, GreaterThan); 228 } 229 230 js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() { 231 return *static_cast<js::jit::MacroAssembler*>(this); 232 } 233 234 const js::jit::MacroAssembler& MacroAssemblerCompat::asMasm() const { 235 return *static_cast<const js::jit::MacroAssembler*>(this); 236 } 237 238 vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() { 239 return *static_cast<vixl::MacroAssembler*>(this); 240 } 241 242 const vixl::MacroAssembler& MacroAssemblerCompat::asVIXL() const { 243 return *static_cast<const vixl::MacroAssembler*>(this); 244 } 245 246 void MacroAssemblerCompat::mov(CodeLabel* label, Register dest) { 247 BufferOffset bo = movePatchablePtr(ImmWord(/* placeholder */ 0), dest); 248 label->patchAt()->bind(bo.getOffset()); 249 label->setLinkMode(CodeLabel::MoveImmediate); 250 } 251 252 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmPtr ptr, Register dest) { 253 const size_t numInst = 1; // Inserting one load instruction. 254 const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. 255 uint8_t* literalAddr = (uint8_t*)(&ptr.value); // TODO: Should be const. 256 257 // Scratch space for generating the load instruction. 258 // 259 // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary 260 // index to the corresponding PoolEntry in the instruction itself. 261 // 262 // That index will be fixed up later when finishPool() 263 // walks over all marked loads and calls PatchConstantPoolLoad(). 264 uint32_t instructionScratch = 0; 265 266 // Emit the instruction mask in the scratch space. 267 // The offset doesn't matter: it will be fixed up later. 268 vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), 269 0); 270 271 // Add the entry to the pool, fix up the LDR imm19 offset, 272 // and add the completed instruction to the buffer. 273 return allocLiteralLoadEntry(numInst, numPoolEntries, 274 (uint8_t*)&instructionScratch, literalAddr); 275 } 276 277 BufferOffset MacroAssemblerCompat::movePatchablePtr(ImmWord ptr, 278 Register dest) { 279 const size_t numInst = 1; // Inserting one load instruction. 280 const unsigned numPoolEntries = 2; // Every pool entry is 4 bytes. 281 uint8_t* literalAddr = (uint8_t*)(&ptr.value); 282 283 // Scratch space for generating the load instruction. 284 // 285 // allocLiteralLoadEntry() will use InsertIndexIntoTag() to store a temporary 286 // index to the corresponding PoolEntry in the instruction itself. 287 // 288 // That index will be fixed up later when finishPool() 289 // walks over all marked loads and calls PatchConstantPoolLoad(). 290 uint32_t instructionScratch = 0; 291 292 // Emit the instruction mask in the scratch space. 293 // The offset doesn't matter: it will be fixed up later. 294 vixl::Assembler::ldr((Instruction*)&instructionScratch, ARMRegister(dest, 64), 295 0); 296 297 // Add the entry to the pool, fix up the LDR imm19 offset, 298 // and add the completed instruction to the buffer. 299 return allocLiteralLoadEntry(numInst, numPoolEntries, 300 (uint8_t*)&instructionScratch, literalAddr); 301 } 302 303 void MacroAssemblerCompat::loadPrivate(const Address& src, Register dest) { 304 loadPtr(src, dest); 305 } 306 307 void MacroAssemblerCompat::handleFailureWithHandlerTail( 308 Label* profilerExitTail, Label* bailoutTail, 309 uint32_t* returnValueCheckOffset) { 310 // Fail rather than silently create wrong code. 311 MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); 312 313 // Reserve space for exception information. 314 int64_t size = (sizeof(ResumeFromException) + 7) & ~7; 315 Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(size)); 316 syncStackPtr(); 317 318 MOZ_ASSERT(!x0.Is(PseudoStackPointer64)); 319 Mov(x0, PseudoStackPointer64); 320 321 // Call the handler. 322 using Fn = void (*)(ResumeFromException* rfe); 323 asMasm().setupUnalignedABICall(r1); 324 asMasm().passABIArg(r0); 325 asMasm().callWithABI<Fn, HandleException>( 326 ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame); 327 328 *returnValueCheckOffset = asMasm().currentOffset(); 329 330 Label entryFrame; 331 Label catch_; 332 Label finally; 333 Label returnBaseline; 334 Label returnIon; 335 Label bailout; 336 Label wasmInterpEntry; 337 Label wasmCatch; 338 339 // Check the `asMasm` calls above didn't mess with the StackPointer identity. 340 MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); 341 342 loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfKind()), r0); 343 asMasm().branch32(Assembler::Equal, r0, 344 Imm32(ExceptionResumeKind::EntryFrame), &entryFrame); 345 asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Catch), 346 &catch_); 347 asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Finally), 348 &finally); 349 asMasm().branch32(Assembler::Equal, r0, 350 Imm32(ExceptionResumeKind::ForcedReturnBaseline), 351 &returnBaseline); 352 asMasm().branch32(Assembler::Equal, r0, 353 Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon); 354 asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::Bailout), 355 &bailout); 356 asMasm().branch32(Assembler::Equal, r0, 357 Imm32(ExceptionResumeKind::WasmInterpEntry), 358 &wasmInterpEntry); 359 asMasm().branch32(Assembler::Equal, r0, Imm32(ExceptionResumeKind::WasmCatch), 360 &wasmCatch); 361 362 breakpoint(); // Invalid kind. 363 364 // No exception handler. Load the error value, restore state and return from 365 // the entry frame. 366 bind(&entryFrame); 367 moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand); 368 loadPtr( 369 Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), 370 FramePointer); 371 loadPtr( 372 Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), 373 PseudoStackPointer); 374 375 // `retn` does indeed sync the stack pointer, but before doing that it reads 376 // from the stack. Consequently, if we remove this call to syncStackPointer 377 // then we take on the requirement to prove that the immediately preceding 378 // loadPtr produces a value for PSP which maintains the SP <= PSP invariant. 379 // That's a proof burden we don't want to take on. In general it would be 380 // good to move (at some time in the future, not now) to a world where 381 // *every* assignment to PSP or SP is followed immediately by a copy into 382 // the other register. That would make all required correctness proofs 383 // trivial in the sense that it requires only local inspection of code 384 // immediately following (dominated by) any such assignment. 385 syncStackPtr(); 386 retn(Imm32(1 * sizeof(void*))); // Pop from stack and return. 387 388 // If we found a catch handler, this must be a baseline frame. Restore state 389 // and jump to the catch block. 390 bind(&catch_); 391 loadPtr(Address(PseudoStackPointer, ResumeFromException::offsetOfTarget()), 392 r0); 393 loadPtr( 394 Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), 395 FramePointer); 396 loadPtr( 397 Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), 398 PseudoStackPointer); 399 syncStackPtr(); 400 Br(x0); 401 402 // If we found a finally block, this must be a baseline frame. Push three 403 // values expected by the finally block: the exception, the exception stack, 404 // and BooleanValue(true). 405 bind(&finally); 406 ARMRegister exception = x1; 407 Ldr(exception, MemOperand(PseudoStackPointer64, 408 ResumeFromException::offsetOfException())); 409 410 ARMRegister exceptionStack = x2; 411 Ldr(exceptionStack, 412 MemOperand(PseudoStackPointer64, 413 ResumeFromException::offsetOfExceptionStack())); 414 415 Ldr(x0, 416 MemOperand(PseudoStackPointer64, ResumeFromException::offsetOfTarget())); 417 Ldr(ARMRegister(FramePointer, 64), 418 MemOperand(PseudoStackPointer64, 419 ResumeFromException::offsetOfFramePointer())); 420 Ldr(PseudoStackPointer64, 421 MemOperand(PseudoStackPointer64, 422 ResumeFromException::offsetOfStackPointer())); 423 syncStackPtr(); 424 push(exception); 425 push(exceptionStack); 426 pushValue(BooleanValue(true)); 427 Br(x0); 428 429 // Return BaselineFrame->returnValue() to the caller. 430 // Used in debug mode and for GeneratorReturn. 431 Label profilingInstrumentation; 432 bind(&returnBaseline); 433 loadPtr( 434 Address(PseudoStackPointer, ResumeFromException::offsetOfFramePointer()), 435 FramePointer); 436 loadPtr( 437 Address(PseudoStackPointer, ResumeFromException::offsetOfStackPointer()), 438 PseudoStackPointer); 439 // See comment further up beginning "`retn` does indeed sync the stack 440 // pointer". That comment applies here too. 441 syncStackPtr(); 442 loadValue(Address(FramePointer, BaselineFrame::reverseOffsetOfReturnValue()), 443 JSReturnOperand); 444 jump(&profilingInstrumentation); 445 446 // Return the given value to the caller. 447 bind(&returnIon); 448 loadValue( 449 Address(PseudoStackPointer, ResumeFromException::offsetOfException()), 450 JSReturnOperand); 451 loadPtr( 452 Address(PseudoStackPointer, offsetof(ResumeFromException, framePointer)), 453 FramePointer); 454 loadPtr( 455 Address(PseudoStackPointer, offsetof(ResumeFromException, stackPointer)), 456 PseudoStackPointer); 457 syncStackPtr(); 458 459 // If profiling is enabled, then update the lastProfilingFrame to refer to 460 // caller frame before returning. This code is shared by ForcedReturnIon 461 // and ForcedReturnBaseline. 462 bind(&profilingInstrumentation); 463 { 464 Label skipProfilingInstrumentation; 465 AbsoluteAddress addressOfEnabled( 466 asMasm().runtime()->geckoProfiler().addressOfEnabled()); 467 asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0), 468 &skipProfilingInstrumentation); 469 jump(profilerExitTail); 470 bind(&skipProfilingInstrumentation); 471 } 472 473 movePtr(FramePointer, PseudoStackPointer); 474 syncStackPtr(); 475 vixl::MacroAssembler::Pop(ARMRegister(FramePointer, 64)); 476 477 vixl::MacroAssembler::Pop(vixl::lr); 478 syncStackPtr(); 479 vixl::MacroAssembler::Ret(vixl::lr); 480 481 // If we are bailing out to baseline to handle an exception, jump to the 482 // bailout tail stub. Load 1 (true) in x0 (ReturnReg) to indicate success. 483 bind(&bailout); 484 Ldr(x2, MemOperand(PseudoStackPointer64, 485 ResumeFromException::offsetOfBailoutInfo())); 486 Ldr(PseudoStackPointer64, 487 MemOperand(PseudoStackPointer64, 488 ResumeFromException::offsetOfStackPointer())); 489 syncStackPtr(); 490 Mov(x0, 1); 491 jump(bailoutTail); 492 493 // Reset SP and FP; SP is pointing to the unwound return address to the wasm 494 // interpreter entry, so we can just ret(). 495 bind(&wasmInterpEntry); 496 Ldr(x29, MemOperand(PseudoStackPointer64, 497 ResumeFromException::offsetOfFramePointer())); 498 Ldr(PseudoStackPointer64, 499 MemOperand(PseudoStackPointer64, 500 ResumeFromException::offsetOfStackPointer())); 501 syncStackPtr(); 502 Mov(x23, int64_t(wasm::InterpFailInstanceReg)); 503 ret(); 504 505 // Found a wasm catch handler, restore state and jump to it. 506 bind(&wasmCatch); 507 wasm::GenerateJumpToCatchHandler(asMasm(), PseudoStackPointer, r0, r1); 508 509 MOZ_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); 510 } 511 512 void MacroAssemblerCompat::profilerEnterFrame(Register framePtr, 513 Register scratch) { 514 asMasm().loadJSContext(scratch); 515 loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch); 516 storePtr(framePtr, 517 Address(scratch, JitActivation::offsetOfLastProfilingFrame())); 518 storePtr(ImmPtr(nullptr), 519 Address(scratch, JitActivation::offsetOfLastProfilingCallSite())); 520 } 521 522 void MacroAssemblerCompat::profilerExitFrame() { 523 jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail()); 524 } 525 526 Assembler::Condition MacroAssemblerCompat::testStringTruthy( 527 bool truthy, const ValueOperand& value) { 528 vixl::UseScratchRegisterScope temps(this); 529 const Register scratch = temps.AcquireX().asUnsized(); 530 const ARMRegister scratch32(scratch, 32); 531 const ARMRegister scratch64(scratch, 64); 532 533 MOZ_ASSERT(value.valueReg() != scratch); 534 535 unboxString(value, scratch); 536 Ldr(scratch32, MemOperand(scratch64, JSString::offsetOfLength())); 537 Cmp(scratch32, Operand(0)); 538 return truthy ? Condition::NonZero : Condition::Zero; 539 } 540 541 Assembler::Condition MacroAssemblerCompat::testBigIntTruthy( 542 bool truthy, const ValueOperand& value) { 543 vixl::UseScratchRegisterScope temps(this); 544 const Register scratch = temps.AcquireX().asUnsized(); 545 546 MOZ_ASSERT(value.valueReg() != scratch); 547 548 unboxBigInt(value, scratch); 549 load32(Address(scratch, BigInt::offsetOfDigitLength()), scratch); 550 cmp32(scratch, Imm32(0)); 551 return truthy ? Condition::NonZero : Condition::Zero; 552 } 553 554 void MacroAssemblerCompat::breakpoint() { 555 // Note, other payloads are possible, but GDB is known to misinterpret them 556 // sometimes and iloop on the breakpoint instead of stopping properly. 557 Brk(0xf000); 558 } 559 560 void MacroAssemblerCompat::minMax32(Register lhs, Register rhs, Register dest, 561 bool isMax) { 562 auto lhs32 = ARMRegister(lhs, 32); 563 auto rhs32 = vixl::Operand(ARMRegister(rhs, 32)); 564 auto dest32 = ARMRegister(dest, 32); 565 566 if (CPUHas(vixl::CPUFeatures::kCSSC)) { 567 if (isMax) { 568 Smax(dest32, lhs32, rhs32); 569 } else { 570 Smin(dest32, lhs32, rhs32); 571 } 572 return; 573 } 574 575 auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThan; 576 Cmp(lhs32, rhs32); 577 Csel(dest32, lhs32, rhs32, cond); 578 } 579 580 void MacroAssemblerCompat::minMax32(Register lhs, Imm32 rhs, Register dest, 581 bool isMax) { 582 auto lhs32 = ARMRegister(lhs, 32); 583 auto rhs32 = vixl::Operand(vixl::IntegerOperand(rhs.value)); 584 auto dest32 = ARMRegister(dest, 32); 585 586 if (CPUHas(vixl::CPUFeatures::kCSSC)) { 587 if (isMax) { 588 Smax(dest32, lhs32, rhs32); 589 } else { 590 Smin(dest32, lhs32, rhs32); 591 } 592 return; 593 } 594 595 // max(lhs, 0): dest = lhs & ~(lhs >> 31) 596 // min(lhs, 0): dest = lhs & (lhs >> 31) 597 if (rhs32.GetImmediate() == 0) { 598 if (isMax) { 599 Bic(dest32, lhs32, vixl::Operand(lhs32, vixl::ASR, 31)); 600 } else { 601 And(dest32, lhs32, vixl::Operand(lhs32, vixl::ASR, 31)); 602 } 603 return; 604 } 605 606 // max(lhs, 1): lhs > 0 ? lhs : 1 607 // min(lhs, 1): lhs <= 0 ? lhs : 1 608 // 609 // Note: Csel emits a single `csinc` instruction when the operand is 1. 610 if (rhs32.GetImmediate() == 1) { 611 auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThanOrEqual; 612 Cmp(lhs32, vixl::Operand(0)); 613 Csel(dest32, lhs32, rhs32, cond); 614 return; 615 } 616 617 // max(lhs, -1): lhs >= 0 ? lhs : -1 618 // min(lhs, -1): lhs < 0 ? lhs : -1 619 // 620 // Note: Csel emits a single `csinv` instruction when the operand is -1. 621 if (rhs32.GetImmediate() == -1) { 622 auto cond = isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThan; 623 Cmp(lhs32, vixl::Operand(0)); 624 Csel(dest32, lhs32, rhs32, cond); 625 return; 626 } 627 628 auto cond = 629 isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThanOrEqual; 630 631 // Use scratch register when immediate can't be encoded in `cmp` instruction. 632 // This avoids materializing the immediate twice. 633 if (!IsImmAddSub(mozilla::Abs(rhs32.GetImmediate()))) { 634 vixl::UseScratchRegisterScope temps(this); 635 vixl::Register scratch32 = temps.AcquireW(); 636 637 Mov(scratch32, rhs32.GetImmediate()); 638 Cmp(lhs32, scratch32); 639 Csel(dest32, lhs32, vixl::Operand(scratch32), cond); 640 return; 641 } 642 643 if (lhs != dest) { 644 Mov(dest32, lhs32); 645 } 646 Label done; 647 Cmp(lhs32, rhs32); 648 B(&done, cond); 649 Mov(dest32, rhs32); 650 bind(&done); 651 } 652 653 void MacroAssemblerCompat::minMaxPtr(Register lhs, Register rhs, Register dest, 654 bool isMax) { 655 auto lhs64 = ARMRegister(lhs, 64); 656 auto rhs64 = vixl::Operand(ARMRegister(rhs, 64)); 657 auto dest64 = ARMRegister(dest, 64); 658 659 if (CPUHas(vixl::CPUFeatures::kCSSC)) { 660 if (isMax) { 661 Smax(dest64, lhs64, rhs64); 662 } else { 663 Smin(dest64, lhs64, rhs64); 664 } 665 return; 666 } 667 668 auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThan; 669 Cmp(lhs64, rhs64); 670 Csel(dest64, lhs64, rhs64, cond); 671 } 672 673 void MacroAssemblerCompat::minMaxPtr(Register lhs, ImmWord rhs, Register dest, 674 bool isMax) { 675 auto lhs64 = ARMRegister(lhs, 64); 676 auto rhs64 = vixl::Operand(vixl::IntegerOperand(rhs.value)); 677 auto dest64 = ARMRegister(dest, 64); 678 679 if (CPUHas(vixl::CPUFeatures::kCSSC)) { 680 if (isMax) { 681 Smax(dest64, lhs64, rhs64); 682 } else { 683 Smin(dest64, lhs64, rhs64); 684 } 685 return; 686 } 687 688 // max(lhs, 0): dest = lhs & ~(lhs >> 63) 689 // min(lhs, 0): dest = lhs & (lhs >> 63) 690 if (rhs64.GetImmediate() == 0) { 691 if (isMax) { 692 Bic(dest64, lhs64, vixl::Operand(lhs64, vixl::ASR, 63)); 693 } else { 694 And(dest64, lhs64, vixl::Operand(lhs64, vixl::ASR, 63)); 695 } 696 return; 697 } 698 699 // max(lhs, 1): lhs > 0 ? lhs : 1 700 // min(lhs, 1): lhs <= 0 ? lhs : 1 701 // 702 // Note: Csel emits a single `csinc` instruction when the operand is 1. 703 if (rhs64.GetImmediate() == 1) { 704 auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThanOrEqual; 705 Cmp(lhs64, vixl::Operand(0)); 706 Csel(dest64, lhs64, rhs64, cond); 707 return; 708 } 709 710 // max(lhs, -1): lhs >= 0 ? lhs : -1 711 // min(lhs, -1): lhs < 0 ? lhs : -1 712 // 713 // Note: Csel emits a single `csinv` instruction when the operand is -1. 714 if (rhs64.GetImmediate() == -1) { 715 auto cond = isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThan; 716 Cmp(lhs64, vixl::Operand(0)); 717 Csel(dest64, lhs64, rhs64, cond); 718 return; 719 } 720 721 auto cond = 722 isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThanOrEqual; 723 724 // Use scratch register when immediate can't be encoded in `cmp` instruction. 725 // This avoids materializing the immediate twice. 726 if (!IsImmAddSub(mozilla::Abs(rhs64.GetImmediate()))) { 727 vixl::UseScratchRegisterScope temps(this); 728 vixl::Register scratch64 = temps.AcquireX(); 729 730 Mov(scratch64, rhs64.GetImmediate()); 731 Cmp(lhs64, scratch64); 732 Csel(dest64, lhs64, vixl::Operand(scratch64), cond); 733 return; 734 } 735 736 if (lhs != dest) { 737 Mov(dest64, lhs64); 738 } 739 Label done; 740 Cmp(lhs64, rhs64); 741 B(&done, cond); 742 Mov(dest64, rhs64); 743 bind(&done); 744 } 745 746 // Either `any` is valid or `sixtyfour` is valid. Return a 32-bit ARMRegister 747 // in the first case and an ARMRegister of the desired size in the latter case. 748 749 static inline ARMRegister SelectGPReg(AnyRegister any, Register64 sixtyfour, 750 unsigned size = 64) { 751 MOZ_ASSERT(any.isValid() != (sixtyfour != Register64::Invalid())); 752 753 if (sixtyfour == Register64::Invalid()) { 754 return ARMRegister(any.gpr(), 32); 755 } 756 757 return ARMRegister(sixtyfour.reg, size); 758 } 759 760 // Assert that `sixtyfour` is invalid and then return an FP register from `any` 761 // of the desired size. 762 763 static inline ARMFPRegister SelectFPReg(AnyRegister any, Register64 sixtyfour, 764 unsigned size) { 765 MOZ_ASSERT(sixtyfour == Register64::Invalid()); 766 return ARMFPRegister(any.fpu(), size); 767 } 768 769 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, 770 Register memoryBase_, Register ptr_, 771 AnyRegister outany, Register64 out64) { 772 access.assertOffsetInGuardPages(); 773 uint32_t offset = access.offset32(); 774 775 MOZ_ASSERT(memoryBase_ != ptr_); 776 777 ARMRegister memoryBase(memoryBase_, 64); 778 ARMRegister ptr(ptr_, 64); 779 if (offset) { 780 vixl::UseScratchRegisterScope temps(this); 781 ARMRegister scratch = temps.AcquireX(); 782 Add(scratch, ptr, Operand(offset)); 783 MemOperand srcAddr(memoryBase, scratch); 784 wasmLoadImpl(access, srcAddr, outany, out64); 785 } else { 786 MemOperand srcAddr(memoryBase, ptr); 787 wasmLoadImpl(access, srcAddr, outany, out64); 788 } 789 } 790 791 void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access, 792 MemOperand srcAddr, AnyRegister outany, 793 Register64 out64) { 794 MOZ_ASSERT_IF(access.isSplatSimd128Load() || access.isWidenSimd128Load(), 795 access.type() == Scalar::Float64); 796 797 // NOTE: the generated code must match the assembly code in gen_load in 798 // GenerateAtomicOperations.py 799 asMasm().memoryBarrierBefore(access.sync()); 800 801 FaultingCodeOffset fco; 802 switch (access.type()) { 803 case Scalar::Int8: 804 fco = Ldrsb(SelectGPReg(outany, out64), srcAddr); 805 break; 806 case Scalar::Uint8: 807 fco = Ldrb(SelectGPReg(outany, out64), srcAddr); 808 break; 809 case Scalar::Int16: 810 fco = Ldrsh(SelectGPReg(outany, out64), srcAddr); 811 break; 812 case Scalar::Uint16: 813 fco = Ldrh(SelectGPReg(outany, out64), srcAddr); 814 break; 815 case Scalar::Int32: 816 if (out64 != Register64::Invalid()) { 817 fco = Ldrsw(SelectGPReg(outany, out64), srcAddr); 818 } else { 819 fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr); 820 } 821 break; 822 case Scalar::Uint32: 823 fco = Ldr(SelectGPReg(outany, out64, 32), srcAddr); 824 break; 825 case Scalar::Int64: 826 fco = Ldr(SelectGPReg(outany, out64), srcAddr); 827 break; 828 case Scalar::Float32: 829 // LDR does the right thing also for access.isZeroExtendSimd128Load() 830 fco = Ldr(SelectFPReg(outany, out64, 32), srcAddr); 831 break; 832 case Scalar::Float64: 833 if (access.isSplatSimd128Load() || access.isWidenSimd128Load()) { 834 ScratchSimd128Scope scratch_(asMasm()); 835 ARMFPRegister scratch = Simd1D(scratch_); 836 fco = Ldr(scratch, srcAddr); 837 if (access.isSplatSimd128Load()) { 838 Dup(SelectFPReg(outany, out64, 128).V2D(), scratch, 0); 839 } else { 840 MOZ_ASSERT(access.isWidenSimd128Load()); 841 switch (access.widenSimdOp()) { 842 case wasm::SimdOp::V128Load8x8S: 843 Sshll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); 844 break; 845 case wasm::SimdOp::V128Load8x8U: 846 Ushll(SelectFPReg(outany, out64, 128).V8H(), scratch.V8B(), 0); 847 break; 848 case wasm::SimdOp::V128Load16x4S: 849 Sshll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); 850 break; 851 case wasm::SimdOp::V128Load16x4U: 852 Ushll(SelectFPReg(outany, out64, 128).V4S(), scratch.V4H(), 0); 853 break; 854 case wasm::SimdOp::V128Load32x2S: 855 Sshll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); 856 break; 857 case wasm::SimdOp::V128Load32x2U: 858 Ushll(SelectFPReg(outany, out64, 128).V2D(), scratch.V2S(), 0); 859 break; 860 default: 861 MOZ_CRASH("Unexpected widening op for wasmLoad"); 862 } 863 } 864 } else { 865 // LDR does the right thing also for access.isZeroExtendSimd128Load() 866 fco = Ldr(SelectFPReg(outany, out64, 64), srcAddr); 867 } 868 break; 869 case Scalar::Simd128: 870 fco = Ldr(SelectFPReg(outany, out64, 128), srcAddr); 871 break; 872 case Scalar::Uint8Clamped: 873 case Scalar::BigInt64: 874 case Scalar::BigUint64: 875 case Scalar::Float16: 876 case Scalar::MaxTypedArrayViewType: 877 MOZ_CRASH("unexpected array type"); 878 } 879 880 append(access, wasm::TrapMachineInsnForLoad(byteSize(access.type())), fco); 881 882 asMasm().memoryBarrierAfter(access.sync()); 883 } 884 885 // Return true if `address` can be represented as an immediate (possibly scaled 886 // by the access size) in an LDR/STR type instruction. 887 // 888 // For more about the logic here, see vixl::MacroAssembler::LoadStoreMacro(). 889 static bool IsLSImmediateOffset(uint64_t address, size_t accessByteSize) { 890 // The predicates below operate on signed values only. 891 if (address > INT64_MAX) { 892 return false; 893 } 894 895 // The access size is always a power of 2, so computing the log amounts to 896 // counting trailing zeroes. 897 unsigned logAccessSize = mozilla::CountTrailingZeroes32(accessByteSize); 898 return (MacroAssemblerCompat::IsImmLSUnscaled(int64_t(address)) || 899 MacroAssemblerCompat::IsImmLSScaled(int64_t(address), logAccessSize)); 900 } 901 902 void MacroAssemblerCompat::wasmLoadAbsolute( 903 const wasm::MemoryAccessDesc& access, Register memoryBase, uint64_t address, 904 AnyRegister output, Register64 out64) { 905 if (!IsLSImmediateOffset(address, access.byteSize())) { 906 // The access will require the constant to be loaded into a temp register. 907 // Do so here, to keep the logic in wasmLoadImpl() tractable wrt emitting 908 // trap information. 909 // 910 // Almost all constant addresses will in practice be handled by a single MOV 911 // so do not worry about additional optimizations here. 912 vixl::UseScratchRegisterScope temps(this); 913 ARMRegister scratch = temps.AcquireX(); 914 Mov(scratch, address); 915 MemOperand srcAddr(X(memoryBase), scratch); 916 wasmLoadImpl(access, srcAddr, output, out64); 917 } else { 918 MemOperand srcAddr(X(memoryBase), address); 919 wasmLoadImpl(access, srcAddr, output, out64); 920 } 921 } 922 923 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, 924 AnyRegister valany, Register64 val64, 925 Register memoryBase_, Register ptr_) { 926 access.assertOffsetInGuardPages(); 927 uint32_t offset = access.offset32(); 928 929 ARMRegister memoryBase(memoryBase_, 64); 930 ARMRegister ptr(ptr_, 64); 931 if (offset) { 932 vixl::UseScratchRegisterScope temps(this); 933 ARMRegister scratch = temps.AcquireX(); 934 Add(scratch, ptr, Operand(offset)); 935 MemOperand destAddr(memoryBase, scratch); 936 wasmStoreImpl(access, destAddr, valany, val64); 937 } else { 938 MemOperand destAddr(memoryBase, ptr); 939 wasmStoreImpl(access, destAddr, valany, val64); 940 } 941 } 942 943 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access, 944 MemOperand dstAddr, AnyRegister valany, 945 Register64 val64) { 946 // NOTE: the generated code must match the assembly code in gen_store in 947 // GenerateAtomicOperations.py 948 asMasm().memoryBarrierBefore(access.sync()); 949 950 FaultingCodeOffset fco; 951 switch (access.type()) { 952 case Scalar::Int8: 953 case Scalar::Uint8: 954 fco = Strb(SelectGPReg(valany, val64), dstAddr); 955 break; 956 case Scalar::Int16: 957 case Scalar::Uint16: 958 fco = Strh(SelectGPReg(valany, val64), dstAddr); 959 break; 960 case Scalar::Int32: 961 case Scalar::Uint32: 962 fco = Str(SelectGPReg(valany, val64), dstAddr); 963 break; 964 case Scalar::Int64: 965 fco = Str(SelectGPReg(valany, val64), dstAddr); 966 break; 967 case Scalar::Float32: 968 fco = Str(SelectFPReg(valany, val64, 32), dstAddr); 969 break; 970 case Scalar::Float64: 971 fco = Str(SelectFPReg(valany, val64, 64), dstAddr); 972 break; 973 case Scalar::Simd128: 974 fco = Str(SelectFPReg(valany, val64, 128), dstAddr); 975 break; 976 case Scalar::Uint8Clamped: 977 case Scalar::BigInt64: 978 case Scalar::BigUint64: 979 case Scalar::Float16: 980 case Scalar::MaxTypedArrayViewType: 981 MOZ_CRASH("unexpected array type"); 982 } 983 984 append(access, wasm::TrapMachineInsnForStore(byteSize(access.type())), fco); 985 986 asMasm().memoryBarrierAfter(access.sync()); 987 } 988 989 void MacroAssemblerCompat::wasmStoreAbsolute( 990 const wasm::MemoryAccessDesc& access, AnyRegister value, Register64 value64, 991 Register memoryBase, uint64_t address) { 992 // See comments in wasmLoadAbsolute. 993 unsigned logAccessSize = mozilla::CountTrailingZeroes32(access.byteSize()); 994 if (address > INT64_MAX || !(IsImmLSScaled(int64_t(address), logAccessSize) || 995 IsImmLSUnscaled(int64_t(address)))) { 996 vixl::UseScratchRegisterScope temps(this); 997 ARMRegister scratch = temps.AcquireX(); 998 Mov(scratch, address); 999 MemOperand destAddr(X(memoryBase), scratch); 1000 wasmStoreImpl(access, destAddr, value, value64); 1001 } else { 1002 MemOperand destAddr(X(memoryBase), address); 1003 wasmStoreImpl(access, destAddr, value, value64); 1004 } 1005 } 1006 1007 void MacroAssemblerCompat::compareSimd128Int(Assembler::Condition cond, 1008 ARMFPRegister dest, 1009 ARMFPRegister lhs, 1010 ARMFPRegister rhs) { 1011 switch (cond) { 1012 case Assembler::Equal: 1013 Cmeq(dest, lhs, rhs); 1014 break; 1015 case Assembler::NotEqual: 1016 Cmeq(dest, lhs, rhs); 1017 Mvn(dest, dest); 1018 break; 1019 case Assembler::GreaterThan: 1020 Cmgt(dest, lhs, rhs); 1021 break; 1022 case Assembler::GreaterThanOrEqual: 1023 Cmge(dest, lhs, rhs); 1024 break; 1025 case Assembler::LessThan: 1026 Cmgt(dest, rhs, lhs); 1027 break; 1028 case Assembler::LessThanOrEqual: 1029 Cmge(dest, rhs, lhs); 1030 break; 1031 case Assembler::Above: 1032 Cmhi(dest, lhs, rhs); 1033 break; 1034 case Assembler::AboveOrEqual: 1035 Cmhs(dest, lhs, rhs); 1036 break; 1037 case Assembler::Below: 1038 Cmhi(dest, rhs, lhs); 1039 break; 1040 case Assembler::BelowOrEqual: 1041 Cmhs(dest, rhs, lhs); 1042 break; 1043 default: 1044 MOZ_CRASH("Unexpected SIMD integer condition"); 1045 } 1046 } 1047 1048 void MacroAssemblerCompat::compareSimd128Float(Assembler::Condition cond, 1049 ARMFPRegister dest, 1050 ARMFPRegister lhs, 1051 ARMFPRegister rhs) { 1052 switch (cond) { 1053 case Assembler::Equal: 1054 Fcmeq(dest, lhs, rhs); 1055 break; 1056 case Assembler::NotEqual: 1057 Fcmeq(dest, lhs, rhs); 1058 Mvn(dest, dest); 1059 break; 1060 case Assembler::GreaterThan: 1061 Fcmgt(dest, lhs, rhs); 1062 break; 1063 case Assembler::GreaterThanOrEqual: 1064 Fcmge(dest, lhs, rhs); 1065 break; 1066 case Assembler::LessThan: 1067 Fcmgt(dest, rhs, lhs); 1068 break; 1069 case Assembler::LessThanOrEqual: 1070 Fcmge(dest, rhs, lhs); 1071 break; 1072 default: 1073 MOZ_CRASH("Unexpected SIMD integer condition"); 1074 } 1075 } 1076 1077 void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs, 1078 FloatRegister dest, 1079 bool isUnsigned) { 1080 ScratchSimd128Scope scratch_(asMasm()); 1081 ARMFPRegister shift = Simd16B(scratch_); 1082 1083 Dup(shift, ARMRegister(rhs, 32)); 1084 Neg(shift, shift); 1085 1086 if (isUnsigned) { 1087 Ushl(Simd16B(dest), Simd16B(lhs), shift); 1088 } else { 1089 Sshl(Simd16B(dest), Simd16B(lhs), shift); 1090 } 1091 } 1092 1093 void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs, 1094 FloatRegister dest, 1095 bool isUnsigned) { 1096 ScratchSimd128Scope scratch_(asMasm()); 1097 ARMFPRegister shift = Simd8H(scratch_); 1098 1099 Dup(shift, ARMRegister(rhs, 32)); 1100 Neg(shift, shift); 1101 1102 if (isUnsigned) { 1103 Ushl(Simd8H(dest), Simd8H(lhs), shift); 1104 } else { 1105 Sshl(Simd8H(dest), Simd8H(lhs), shift); 1106 } 1107 } 1108 1109 void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs, 1110 FloatRegister dest, 1111 bool isUnsigned) { 1112 ScratchSimd128Scope scratch_(asMasm()); 1113 ARMFPRegister shift = Simd4S(scratch_); 1114 1115 Dup(shift, ARMRegister(rhs, 32)); 1116 Neg(shift, shift); 1117 1118 if (isUnsigned) { 1119 Ushl(Simd4S(dest), Simd4S(lhs), shift); 1120 } else { 1121 Sshl(Simd4S(dest), Simd4S(lhs), shift); 1122 } 1123 } 1124 1125 void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs, 1126 FloatRegister dest, 1127 bool isUnsigned) { 1128 ScratchSimd128Scope scratch_(asMasm()); 1129 ARMFPRegister shift = Simd2D(scratch_); 1130 1131 Dup(shift, ARMRegister(rhs, 64)); 1132 Neg(shift, shift); 1133 1134 if (isUnsigned) { 1135 Ushl(Simd2D(dest), Simd2D(lhs), shift); 1136 } else { 1137 Sshl(Simd2D(dest), Simd2D(lhs), shift); 1138 } 1139 } 1140 1141 void MacroAssembler::reserveStack(uint32_t amount) { 1142 // TODO: This bumps |sp| every time we reserve using a second register. 1143 // It would save some instructions if we had a fixed frame size. 1144 vixl::MacroAssembler::Claim(Operand(amount)); 1145 adjustFrame(amount); 1146 } 1147 1148 void MacroAssembler::Push(RegisterOrSP reg) { 1149 if (IsHiddenSP(reg)) { 1150 push(sp); 1151 } else { 1152 push(AsRegister(reg)); 1153 } 1154 adjustFrame(sizeof(intptr_t)); 1155 } 1156 1157 //{{{ check_macroassembler_style 1158 // =============================================================== 1159 // MacroAssembler high-level usage. 1160 1161 void MacroAssembler::flush() { Assembler::flush(); } 1162 1163 // =============================================================== 1164 // Stack manipulation functions. 1165 1166 // Routines for saving/restoring registers on the stack. The format is: 1167 // 1168 // (highest address) 1169 // 1170 // integer (X) regs in any order size: 8 * # int regs 1171 // 1172 // if # int regs is odd, 1173 // then an 8 byte alignment hole size: 0 or 8 1174 // 1175 // double (D) regs in any order size: 8 * # double regs 1176 // 1177 // if # double regs is odd, 1178 // then an 8 byte alignment hole size: 0 or 8 1179 // 1180 // vector (Q) regs in any order size: 16 * # vector regs 1181 // 1182 // (lowest address) 1183 // 1184 // Hence the size of the save area is 0 % 16. And, provided that the base 1185 // (highest) address is 16-aligned, then the vector reg save/restore accesses 1186 // will also be 16-aligned, as will pairwise operations for the double regs. 1187 // 1188 // Implied by this is that the format of the double and vector dump area 1189 // corresponds with what FloatRegister::GetPushSizeInBytes computes. 1190 // See block comment in MacroAssembler.h for more details. 1191 1192 size_t MacroAssembler::PushRegsInMaskSizeInBytes(LiveRegisterSet set) { 1193 size_t numIntRegs = set.gprs().size(); 1194 return ((numIntRegs + 1) & ~1) * sizeof(intptr_t) + 1195 FloatRegister::GetPushSizeInBytes(set.fpus()); 1196 } 1197 1198 // Generate code to dump the values in `set`, either on the stack if `dest` is 1199 // `Nothing` or working backwards from the address denoted by `dest` if it is 1200 // `Some`. These two cases are combined so as to minimise the chance of 1201 // mistakenly generating different formats for the same `set`, given that the 1202 // `Some` `dest` case is used extremely rarely. 1203 static void PushOrStoreRegsInMask(MacroAssembler* masm, LiveRegisterSet set, 1204 mozilla::Maybe<Address> dest) { 1205 static_assert(sizeof(FloatRegisters::RegisterContent) == 16); 1206 1207 // If we're saving to arbitrary memory, check the destination is big enough. 1208 if (dest) { 1209 mozilla::DebugOnly<size_t> bytesRequired = 1210 MacroAssembler::PushRegsInMaskSizeInBytes(set); 1211 MOZ_ASSERT(dest->offset >= 0); 1212 MOZ_ASSERT(((size_t)dest->offset) >= bytesRequired); 1213 } 1214 1215 // Note the high limit point; we'll check it again later. 1216 mozilla::DebugOnly<size_t> maxExtentInitial = 1217 dest ? dest->offset : masm->framePushed(); 1218 1219 // Gather up the integer registers in groups of four, and either push each 1220 // group as a single transfer so as to minimise the number of stack pointer 1221 // changes, or write them individually to memory. Take care to ensure the 1222 // space used remains 16-aligned. 1223 for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more();) { 1224 vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, vixl::NoCPUReg, 1225 vixl::NoCPUReg}; 1226 size_t i; 1227 for (i = 0; i < 4 && iter.more(); i++) { 1228 src[i] = ARMRegister(*iter, 64); 1229 ++iter; 1230 } 1231 MOZ_ASSERT(i > 0); 1232 1233 if (i == 1 || i == 3) { 1234 // Ensure the stack remains 16-aligned 1235 MOZ_ASSERT(!iter.more()); 1236 src[i] = vixl::xzr; 1237 i++; 1238 } 1239 MOZ_ASSERT(i == 2 || i == 4); 1240 1241 if (dest) { 1242 for (size_t j = 0; j < i; j++) { 1243 Register ireg = Register::FromCode(src[j].IsZero() ? Registers::xzr 1244 : src[j].code()); 1245 dest->offset -= sizeof(intptr_t); 1246 masm->storePtr(ireg, *dest); 1247 } 1248 } else { 1249 masm->adjustFrame(i * 8); 1250 masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); 1251 } 1252 } 1253 1254 // Now the same for the FP double registers. Note that because of how 1255 // ReduceSetForPush works, an underlying AArch64 SIMD/FP register can either 1256 // be present as a double register, or as a V128 register, but not both. 1257 // Firstly, round up the registers to be pushed. 1258 1259 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 1260 vixl::CPURegister allSrcs[FloatRegisters::TotalPhys]; 1261 size_t numAllSrcs = 0; 1262 1263 for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { 1264 FloatRegister reg = *iter; 1265 if (reg.isDouble()) { 1266 MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); 1267 allSrcs[numAllSrcs] = ARMFPRegister(reg, 64); 1268 numAllSrcs++; 1269 } else { 1270 MOZ_ASSERT(reg.isSimd128()); 1271 } 1272 } 1273 MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); 1274 1275 if ((numAllSrcs & 1) == 1) { 1276 // We've got an odd number of doubles. In order to maintain 16-alignment, 1277 // push the last register twice. We'll skip over the duplicate in 1278 // PopRegsInMaskIgnore. 1279 allSrcs[numAllSrcs] = allSrcs[numAllSrcs - 1]; 1280 numAllSrcs++; 1281 } 1282 MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); 1283 MOZ_RELEASE_ASSERT((numAllSrcs & 1) == 0); 1284 1285 // And now generate the transfers. 1286 size_t i; 1287 if (dest) { 1288 for (i = 0; i < numAllSrcs; i++) { 1289 FloatRegister freg = 1290 FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), 1291 FloatRegisters::Kind::Double); 1292 dest->offset -= sizeof(double); 1293 masm->storeDouble(freg, *dest); 1294 } 1295 } else { 1296 i = 0; 1297 while (i < numAllSrcs) { 1298 vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, 1299 vixl::NoCPUReg, vixl::NoCPUReg}; 1300 size_t j; 1301 for (j = 0; j < 4 && j + i < numAllSrcs; j++) { 1302 src[j] = allSrcs[j + i]; 1303 } 1304 masm->adjustFrame(8 * j); 1305 masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); 1306 i += j; 1307 } 1308 } 1309 MOZ_ASSERT(i == numAllSrcs); 1310 1311 // Finally, deal with the SIMD (V128) registers. This is a bit simpler 1312 // as there's no need for special-casing to maintain 16-alignment. 1313 1314 numAllSrcs = 0; 1315 for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); ++iter) { 1316 FloatRegister reg = *iter; 1317 if (reg.isSimd128()) { 1318 MOZ_RELEASE_ASSERT(numAllSrcs < FloatRegisters::TotalPhys); 1319 allSrcs[numAllSrcs] = ARMFPRegister(reg, 128); 1320 numAllSrcs++; 1321 } 1322 } 1323 MOZ_RELEASE_ASSERT(numAllSrcs <= FloatRegisters::TotalPhys); 1324 1325 // Generate the transfers. 1326 if (dest) { 1327 for (i = 0; i < numAllSrcs; i++) { 1328 FloatRegister freg = 1329 FloatRegister(FloatRegisters::FPRegisterID(allSrcs[i].code()), 1330 FloatRegisters::Kind::Simd128); 1331 dest->offset -= FloatRegister::SizeOfSimd128; 1332 masm->storeUnalignedSimd128(freg, *dest); 1333 } 1334 } else { 1335 i = 0; 1336 while (i < numAllSrcs) { 1337 vixl::CPURegister src[4] = {vixl::NoCPUReg, vixl::NoCPUReg, 1338 vixl::NoCPUReg, vixl::NoCPUReg}; 1339 size_t j; 1340 for (j = 0; j < 4 && j + i < numAllSrcs; j++) { 1341 src[j] = allSrcs[j + i]; 1342 } 1343 masm->adjustFrame(16 * j); 1344 masm->vixl::MacroAssembler::Push(src[0], src[1], src[2], src[3]); 1345 i += j; 1346 } 1347 } 1348 MOZ_ASSERT(i == numAllSrcs); 1349 1350 // Final overrun check. 1351 if (dest) { 1352 MOZ_ASSERT(maxExtentInitial - dest->offset == 1353 MacroAssembler::PushRegsInMaskSizeInBytes(set)); 1354 } else { 1355 MOZ_ASSERT(masm->framePushed() - maxExtentInitial == 1356 MacroAssembler::PushRegsInMaskSizeInBytes(set)); 1357 } 1358 } 1359 1360 void MacroAssembler::PushRegsInMask(LiveRegisterSet set) { 1361 PushOrStoreRegsInMask(this, set, mozilla::Nothing()); 1362 } 1363 1364 void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, 1365 Register scratch) { 1366 PushOrStoreRegsInMask(this, set, mozilla::Some(dest)); 1367 } 1368 1369 // This is a helper function for PopRegsInMaskIgnore below. It emits the 1370 // loads described by dests[0] and [1] and offsets[0] and [1], generating a 1371 // load-pair if it can. 1372 static void GeneratePendingLoadsThenFlush(MacroAssembler* masm, 1373 vixl::CPURegister* dests, 1374 uint32_t* offsets, 1375 uint32_t transactionSize) { 1376 // Generate the loads .. 1377 if (!dests[0].IsNone()) { 1378 if (!dests[1].IsNone()) { 1379 // [0] and [1] both present. 1380 if (offsets[0] + transactionSize == offsets[1]) { 1381 masm->Ldp(dests[0], dests[1], 1382 MemOperand(masm->GetStackPointer64(), offsets[0])); 1383 } else { 1384 // Theoretically we could check for a load-pair with the destinations 1385 // switched, but our callers will never generate that. Hence there's 1386 // no loss in giving up at this point and generating two loads. 1387 masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); 1388 masm->Ldr(dests[1], MemOperand(masm->GetStackPointer64(), offsets[1])); 1389 } 1390 } else { 1391 // [0] only. 1392 masm->Ldr(dests[0], MemOperand(masm->GetStackPointer64(), offsets[0])); 1393 } 1394 } else { 1395 if (!dests[1].IsNone()) { 1396 // [1] only. Can't happen because callers always fill [0] before [1]. 1397 MOZ_CRASH("GenerateLoadsThenFlush"); 1398 } else { 1399 // Neither entry valid. This can happen. 1400 } 1401 } 1402 1403 // .. and flush. 1404 dests[0] = dests[1] = vixl::NoCPUReg; 1405 offsets[0] = offsets[1] = 0; 1406 } 1407 1408 void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, 1409 LiveRegisterSet ignore) { 1410 mozilla::DebugOnly<size_t> framePushedInitial = framePushed(); 1411 1412 // The offset of the data from the stack pointer. 1413 uint32_t offset = 0; 1414 1415 // The set of FP/SIMD registers we need to restore. 1416 FloatRegisterSet fpuSet(set.fpus().reduceSetForPush()); 1417 1418 // The set of registers to ignore. BroadcastToAllSizes() is used to avoid 1419 // any ambiguities arising from (eg) `fpuSet` containing q17 but `ignore` 1420 // containing d17. 1421 FloatRegisterSet ignoreFpusBroadcasted( 1422 FloatRegister::BroadcastToAllSizes(ignore.fpus())); 1423 1424 // First recover the SIMD (V128) registers. This is straightforward in that 1425 // we don't need to think about alignment holes. 1426 1427 // These three form a two-entry queue that holds loads that we know we 1428 // need, but which we haven't yet emitted. 1429 vixl::CPURegister pendingDests[2] = {vixl::NoCPUReg, vixl::NoCPUReg}; 1430 uint32_t pendingOffsets[2] = {0, 0}; 1431 size_t nPending = 0; 1432 1433 for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { 1434 FloatRegister reg = *iter; 1435 if (reg.isDouble()) { 1436 continue; 1437 } 1438 MOZ_RELEASE_ASSERT(reg.isSimd128()); 1439 1440 uint32_t offsetForReg = offset; 1441 offset += FloatRegister::SizeOfSimd128; 1442 1443 if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { 1444 continue; 1445 } 1446 1447 MOZ_ASSERT(nPending <= 2); 1448 if (nPending == 2) { 1449 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); 1450 nPending = 0; 1451 } 1452 pendingDests[nPending] = ARMFPRegister(reg, 128); 1453 pendingOffsets[nPending] = offsetForReg; 1454 nPending++; 1455 } 1456 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 16); 1457 nPending = 0; 1458 1459 MOZ_ASSERT((offset % 16) == 0); 1460 1461 // Now recover the FP double registers. This is more tricky in that we need 1462 // to skip over the lowest-addressed of them if the number of them was odd. 1463 1464 if ((((fpuSet.bits() & FloatRegisters::AllDoubleMask).size()) & 1) == 1) { 1465 offset += sizeof(double); 1466 } 1467 1468 for (FloatRegisterIterator iter(fpuSet); iter.more(); ++iter) { 1469 FloatRegister reg = *iter; 1470 if (reg.isSimd128()) { 1471 continue; 1472 } 1473 /* true but redundant, per loop above: MOZ_RELEASE_ASSERT(reg.isDouble()) */ 1474 1475 uint32_t offsetForReg = offset; 1476 offset += sizeof(double); 1477 1478 if (ignoreFpusBroadcasted.hasRegisterIndex(reg)) { 1479 continue; 1480 } 1481 1482 MOZ_ASSERT(nPending <= 2); 1483 if (nPending == 2) { 1484 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); 1485 nPending = 0; 1486 } 1487 pendingDests[nPending] = ARMFPRegister(reg, 64); 1488 pendingOffsets[nPending] = offsetForReg; 1489 nPending++; 1490 } 1491 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); 1492 nPending = 0; 1493 1494 MOZ_ASSERT((offset % 16) == 0); 1495 MOZ_ASSERT(offset == set.fpus().getPushSizeInBytes()); 1496 1497 // And finally recover the integer registers, again skipping an alignment 1498 // hole if it exists. 1499 1500 if ((set.gprs().size() & 1) == 1) { 1501 offset += sizeof(uint64_t); 1502 } 1503 1504 for (GeneralRegisterIterator iter(set.gprs()); iter.more(); ++iter) { 1505 Register reg = *iter; 1506 1507 uint32_t offsetForReg = offset; 1508 offset += sizeof(uint64_t); 1509 1510 if (ignore.has(reg)) { 1511 continue; 1512 } 1513 1514 MOZ_ASSERT(nPending <= 2); 1515 if (nPending == 2) { 1516 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); 1517 nPending = 0; 1518 } 1519 pendingDests[nPending] = ARMRegister(reg, 64); 1520 pendingOffsets[nPending] = offsetForReg; 1521 nPending++; 1522 } 1523 GeneratePendingLoadsThenFlush(this, pendingDests, pendingOffsets, 8); 1524 1525 MOZ_ASSERT((offset % 16) == 0); 1526 1527 size_t bytesPushed = PushRegsInMaskSizeInBytes(set); 1528 MOZ_ASSERT(offset == bytesPushed); 1529 freeStack(bytesPushed); 1530 } 1531 1532 void MacroAssembler::Push(Register reg) { 1533 push(reg); 1534 adjustFrame(sizeof(intptr_t)); 1535 } 1536 1537 void MacroAssembler::Push(Register reg1, Register reg2, Register reg3, 1538 Register reg4) { 1539 push(reg1, reg2, reg3, reg4); 1540 adjustFrame(4 * sizeof(intptr_t)); 1541 } 1542 1543 void MacroAssembler::Push(const Imm32 imm) { 1544 push(imm); 1545 adjustFrame(sizeof(intptr_t)); 1546 } 1547 1548 void MacroAssembler::Push(const ImmWord imm) { 1549 push(imm); 1550 adjustFrame(sizeof(intptr_t)); 1551 } 1552 1553 void MacroAssembler::Push(const ImmPtr imm) { 1554 push(imm); 1555 adjustFrame(sizeof(intptr_t)); 1556 } 1557 1558 void MacroAssembler::Push(const ImmGCPtr ptr) { 1559 push(ptr); 1560 adjustFrame(sizeof(intptr_t)); 1561 } 1562 1563 void MacroAssembler::Push(FloatRegister f) { 1564 push(f); 1565 // See MacroAssemblerCompat::push(FloatRegister) for why we use 1566 // sizeof(double). 1567 adjustFrame(sizeof(double)); 1568 } 1569 1570 void MacroAssembler::PushBoxed(FloatRegister reg) { 1571 subFromStackPtr(Imm32(sizeof(double))); 1572 boxDouble(reg, Address(getStackPointer(), 0)); 1573 adjustFrame(sizeof(double)); 1574 } 1575 1576 void MacroAssembler::Pop(Register reg) { 1577 pop(reg); 1578 adjustFrame(-1 * int64_t(sizeof(int64_t))); 1579 } 1580 1581 void MacroAssembler::Pop(FloatRegister f) { 1582 loadDouble(Address(getStackPointer(), 0), f); 1583 // See MacroAssemblerCompat::pop(FloatRegister) for why we use 1584 // sizeof(double). 1585 freeStack(sizeof(double)); 1586 } 1587 1588 void MacroAssembler::Pop(const ValueOperand& val) { 1589 pop(val); 1590 adjustFrame(-1 * int64_t(sizeof(int64_t))); 1591 } 1592 1593 void MacroAssembler::freeStackTo(uint32_t framePushed) { 1594 MOZ_ASSERT(framePushed <= framePushed_); 1595 Sub(GetStackPointer64(), X(FramePointer), Operand(int32_t(framePushed))); 1596 syncStackPtr(); 1597 framePushed_ = framePushed; 1598 } 1599 1600 // =============================================================== 1601 // Simple call functions. 1602 1603 CodeOffset MacroAssembler::call(Register reg) { 1604 // This sync has been observed (and is expected) to be necessary. 1605 // eg testcase: tests/debug/bug1107525.js 1606 syncStackPtr(); 1607 Blr(ARMRegister(reg, 64)); 1608 return CodeOffset(currentOffset()); 1609 } 1610 1611 CodeOffset MacroAssembler::call(Label* label) { 1612 // This sync has been observed (and is expected) to be necessary. 1613 // eg testcase: tests/basic/testBug504520Harder.js 1614 syncStackPtr(); 1615 Bl(label); 1616 return CodeOffset(currentOffset()); 1617 } 1618 1619 void MacroAssembler::call(ImmPtr imm) { 1620 // This sync has been observed (and is expected) to be necessary. 1621 // eg testcase: asm.js/testTimeout5.js 1622 syncStackPtr(); 1623 vixl::UseScratchRegisterScope temps(this); 1624 const Register scratch = temps.AcquireX().asUnsized(); 1625 movePtr(imm, scratch); 1626 Blr(ARMRegister(scratch, 64)); 1627 } 1628 1629 void MacroAssembler::call(ImmWord imm) { call(ImmPtr((void*)imm.value)); } 1630 1631 CodeOffset MacroAssembler::call(wasm::SymbolicAddress imm) { 1632 vixl::UseScratchRegisterScope temps(this); 1633 const Register scratch = temps.AcquireX().asUnsized(); 1634 // This sync is believed to be necessary, although no case in jit-test/tests 1635 // has been observed to cause SP != PSP here. 1636 syncStackPtr(); 1637 movePtr(imm, scratch); 1638 Blr(ARMRegister(scratch, 64)); 1639 return CodeOffset(currentOffset()); 1640 } 1641 1642 CodeOffset MacroAssembler::call(const Address& addr) { 1643 vixl::UseScratchRegisterScope temps(this); 1644 const Register scratch = temps.AcquireX().asUnsized(); 1645 // This sync has been observed (and is expected) to be necessary. 1646 // eg testcase: tests/backup-point-bug1315634.js 1647 syncStackPtr(); 1648 loadPtr(addr, scratch); 1649 Blr(ARMRegister(scratch, 64)); 1650 return CodeOffset(currentOffset()); 1651 } 1652 1653 void MacroAssembler::call(JitCode* c) { 1654 vixl::UseScratchRegisterScope temps(this); 1655 const ARMRegister scratch64 = temps.AcquireX(); 1656 // This sync has been observed (and is expected) to be necessary. 1657 // eg testcase: arrays/new-array-undefined-undefined-more-args-2.js 1658 syncStackPtr(); 1659 BufferOffset off = immPool64(scratch64, uint64_t(c->raw())); 1660 addPendingJump(off, ImmPtr(c->raw()), RelocationKind::JITCODE); 1661 blr(scratch64); 1662 } 1663 1664 CodeOffset MacroAssembler::callWithPatch() { 1665 // This needs to sync. Wasm goes through this one for intramodule calls. 1666 // 1667 // In other cases, wasm goes through masm.wasmCallImport(), 1668 // masm.wasmCallBuiltinInstanceMethod, masm.wasmCallIndirect, all of which 1669 // sync. 1670 // 1671 // This sync is believed to be necessary, although no case in jit-test/tests 1672 // has been observed to cause SP != PSP here. 1673 syncStackPtr(); 1674 bl(0, LabelDoc()); 1675 return CodeOffset(currentOffset()); 1676 } 1677 void MacroAssembler::patchCall(uint32_t callerOffset, uint32_t calleeOffset) { 1678 Instruction* inst = getInstructionAt(BufferOffset(callerOffset - 4)); 1679 MOZ_ASSERT(inst->IsBL()); 1680 ptrdiff_t relTarget = (int)calleeOffset - ((int)callerOffset - 4); 1681 ptrdiff_t relTarget00 = relTarget >> 2; 1682 MOZ_RELEASE_ASSERT((relTarget & 0x3) == 0); 1683 MOZ_RELEASE_ASSERT(vixl::IsInt26(relTarget00)); 1684 bl(inst, relTarget00); 1685 } 1686 1687 CodeOffset MacroAssembler::farJumpWithPatch() { 1688 vixl::UseScratchRegisterScope temps(this); 1689 const ARMRegister scratch = temps.AcquireX(); 1690 const ARMRegister scratch2 = temps.AcquireX(); 1691 1692 AutoForbidPoolsAndNops afp(this, 1693 /* max number of instructions in scope = */ 7); 1694 1695 mozilla::DebugOnly<uint32_t> before = currentOffset(); 1696 1697 align(8); // At most one nop 1698 1699 Label branch; 1700 adr(scratch2, &branch); 1701 ldr(scratch, vixl::MemOperand(scratch2, 4)); 1702 add(scratch2, scratch2, scratch); 1703 CodeOffset offs(currentOffset()); 1704 bind(&branch); 1705 br(scratch2); 1706 Emit(UINT32_MAX); 1707 Emit(UINT32_MAX); 1708 1709 mozilla::DebugOnly<uint32_t> after = currentOffset(); 1710 1711 MOZ_ASSERT_IF(!oom(), after - before == 24 || after - before == 28); 1712 1713 return offs; 1714 } 1715 1716 void MacroAssembler::patchFarJump(CodeOffset farJump, uint32_t targetOffset) { 1717 Instruction* inst1 = getInstructionAt(BufferOffset(farJump.offset() + 4)); 1718 Instruction* inst2 = getInstructionAt(BufferOffset(farJump.offset() + 8)); 1719 1720 int64_t distance = (int64_t)targetOffset - (int64_t)farJump.offset(); 1721 1722 MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX); 1723 MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX); 1724 1725 inst1->SetInstructionBits((uint32_t)distance); 1726 inst2->SetInstructionBits((uint32_t)(distance >> 32)); 1727 } 1728 1729 void MacroAssembler::patchFarJump(uint8_t* farJump, uint8_t* target) { 1730 Instruction* inst1 = (Instruction*)(farJump + 4); 1731 Instruction* inst2 = (Instruction*)(farJump + 8); 1732 1733 int64_t distance = (int64_t)target - (int64_t)farJump; 1734 MOZ_RELEASE_ASSERT(mozilla::Abs(distance) <= 1735 (intptr_t)jit::MaxCodeBytesPerProcess); 1736 1737 MOZ_ASSERT(inst1->InstructionBits() == UINT32_MAX); 1738 MOZ_ASSERT(inst2->InstructionBits() == UINT32_MAX); 1739 1740 inst1->SetInstructionBits((uint32_t)distance); 1741 inst2->SetInstructionBits((uint32_t)(distance >> 32)); 1742 } 1743 1744 CodeOffset MacroAssembler::nopPatchableToCall() { 1745 AutoForbidPoolsAndNops afp(this, 1746 /* max number of instructions in scope = */ 1); 1747 Nop(); 1748 return CodeOffset(currentOffset()); 1749 } 1750 1751 void MacroAssembler::patchNopToCall(uint8_t* call, uint8_t* target) { 1752 uint8_t* inst = call - 4; 1753 Instruction* instr = reinterpret_cast<Instruction*>(inst); 1754 MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); 1755 bl(instr, (target - inst) >> 2); 1756 } 1757 1758 void MacroAssembler::patchCallToNop(uint8_t* call) { 1759 uint8_t* inst = call - 4; 1760 Instruction* instr = reinterpret_cast<Instruction*>(inst); 1761 MOZ_ASSERT(instr->IsBL() || instr->IsNOP()); 1762 nop(instr); 1763 } 1764 1765 CodeOffset MacroAssembler::move32WithPatch(Register dest) { 1766 AutoForbidPoolsAndNops afp(this, 1767 /* max number of instructions in scope = */ 3); 1768 CodeOffset offs = CodeOffset(currentOffset()); 1769 movz(ARMRegister(dest, 64), 0, 0); 1770 movk(ARMRegister(dest, 64), 0, 16); 1771 return offs; 1772 } 1773 1774 void MacroAssembler::patchMove32(CodeOffset offset, Imm32 n) { 1775 Instruction* i1 = getInstructionAt(BufferOffset(offset.offset())); 1776 MOZ_ASSERT(i1->IsMovz()); 1777 i1->SetInstructionBits(i1->InstructionBits() | ImmMoveWide(n.value & 0xFFFF)); 1778 1779 Instruction* i2 = getInstructionAt(BufferOffset(offset.offset() + 4)); 1780 MOZ_ASSERT(i2->IsMovk()); 1781 i2->SetInstructionBits(i2->InstructionBits() | 1782 ImmMoveWide((n.value >> 16) & 0xFFFF)); 1783 } 1784 1785 void MacroAssembler::pushReturnAddress() { 1786 MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); 1787 push(lr); 1788 } 1789 1790 void MacroAssembler::popReturnAddress() { 1791 MOZ_RELEASE_ASSERT(!sp.Is(GetStackPointer64()), "Not valid"); 1792 pop(lr); 1793 } 1794 1795 // =============================================================== 1796 // ABI function calls. 1797 1798 void MacroAssembler::setupUnalignedABICall(Register scratch) { 1799 // Because wasm operates without the need for dynamic alignment of SP, it is 1800 // implied that this routine should never be called when generating wasm. 1801 MOZ_ASSERT(!IsCompilingWasm()); 1802 1803 // The following won't work for SP -- needs slightly different logic. 1804 MOZ_RELEASE_ASSERT(GetStackPointer64().Is(PseudoStackPointer64)); 1805 1806 setupNativeABICall(); 1807 dynamicAlignment_ = true; 1808 1809 int64_t alignment = ~(int64_t(ABIStackAlignment) - 1); 1810 ARMRegister scratch64(scratch, 64); 1811 MOZ_ASSERT(!scratch64.Is(PseudoStackPointer64)); 1812 1813 // Always save LR -- Baseline ICs assume that LR isn't modified. 1814 push(lr); 1815 1816 // Remember the stack address on entry. This is reloaded in callWithABIPost 1817 // below. 1818 Mov(scratch64, PseudoStackPointer64); 1819 1820 // Make alignment, including the effective push of the previous sp. 1821 Sub(PseudoStackPointer64, PseudoStackPointer64, Operand(8)); 1822 And(PseudoStackPointer64, PseudoStackPointer64, Operand(alignment)); 1823 syncStackPtr(); 1824 1825 // Store previous sp to the top of the stack, aligned. This is also 1826 // reloaded in callWithABIPost. 1827 Str(scratch64, MemOperand(PseudoStackPointer64, 0)); 1828 } 1829 1830 void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { 1831 // wasm operates without the need for dynamic alignment of SP. 1832 MOZ_ASSERT(!(dynamicAlignment_ && callFromWasm)); 1833 1834 MOZ_ASSERT(inCall_); 1835 uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar(); 1836 1837 // ARM64 *really* wants SP to always be 16-aligned, so ensure this now. 1838 if (dynamicAlignment_) { 1839 stackForCall += ComputeByteAlignment(stackForCall, StackAlignment); 1840 } else { 1841 // This can happen when we attach out-of-line stubs for rare cases. For 1842 // example CodeGenerator::visitWasmTruncateToInt32 adds an out-of-line 1843 // chunk. 1844 uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0; 1845 stackForCall += ComputeByteAlignment( 1846 stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment); 1847 } 1848 1849 *stackAdjust = stackForCall; 1850 reserveStack(*stackAdjust); 1851 { 1852 enoughMemory_ &= moveResolver_.resolve(); 1853 if (!enoughMemory_) { 1854 return; 1855 } 1856 MoveEmitter emitter(*this); 1857 emitter.emit(moveResolver_); 1858 emitter.finish(); 1859 } 1860 1861 assertStackAlignment(ABIStackAlignment); 1862 } 1863 1864 void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result) { 1865 // Call boundaries communicate stack via SP, so we must resync PSP now. 1866 initPseudoStackPtr(); 1867 1868 freeStack(stackAdjust); 1869 1870 if (dynamicAlignment_) { 1871 // This then-clause makes more sense if you first read 1872 // setupUnalignedABICall above. 1873 // 1874 // Restore the stack pointer from entry. The stack pointer will have been 1875 // saved by setupUnalignedABICall. This is fragile in that it assumes 1876 // that uses of this routine (callWithABIPost) with `dynamicAlignment_ == 1877 // true` are preceded by matching calls to setupUnalignedABICall. But 1878 // there's nothing that enforce that mechanically. If we really want to 1879 // enforce this, we could add a debug-only CallWithABIState enum to the 1880 // MacroAssembler and assert that setupUnalignedABICall updates it before 1881 // we get here, then reset it to its initial state. 1882 Ldr(GetStackPointer64(), MemOperand(GetStackPointer64(), 0)); 1883 syncStackPtr(); 1884 1885 // Restore LR. This restores LR to the value stored by 1886 // setupUnalignedABICall, which should have been called just before 1887 // callWithABIPre. This is, per the above comment, also fragile. 1888 pop(lr); 1889 1890 // SP may be < PSP now. That is expected from the behaviour of `pop`. It 1891 // is not clear why the following `syncStackPtr` is necessary, but it is: 1892 // without it, the following test segfaults: 1893 // tests/backup-point-bug1315634.js 1894 syncStackPtr(); 1895 } 1896 1897 // If the ABI's return regs are where ION is expecting them, then 1898 // no other work needs to be done. 1899 1900 #ifdef DEBUG 1901 MOZ_ASSERT(inCall_); 1902 inCall_ = false; 1903 #endif 1904 } 1905 1906 void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) { 1907 vixl::UseScratchRegisterScope temps(this); 1908 const Register scratch = temps.AcquireX().asUnsized(); 1909 movePtr(fun, scratch); 1910 1911 uint32_t stackAdjust; 1912 callWithABIPre(&stackAdjust); 1913 call(scratch); 1914 callWithABIPost(stackAdjust, result); 1915 } 1916 1917 void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) { 1918 vixl::UseScratchRegisterScope temps(this); 1919 const Register scratch = temps.AcquireX().asUnsized(); 1920 loadPtr(fun, scratch); 1921 1922 uint32_t stackAdjust; 1923 callWithABIPre(&stackAdjust); 1924 call(scratch); 1925 callWithABIPost(stackAdjust, result); 1926 } 1927 1928 // =============================================================== 1929 // Jit Frames. 1930 1931 uint32_t MacroAssembler::pushFakeReturnAddress(Register scratch) { 1932 enterNoPool(3); 1933 Label fakeCallsite; 1934 1935 Adr(ARMRegister(scratch, 64), &fakeCallsite); 1936 Push(scratch); 1937 bind(&fakeCallsite); 1938 uint32_t pseudoReturnOffset = currentOffset(); 1939 1940 leaveNoPool(); 1941 return pseudoReturnOffset; 1942 } 1943 1944 bool MacroAssemblerCompat::buildOOLFakeExitFrame(void* fakeReturnAddr) { 1945 asMasm().Push(FrameDescriptor(FrameType::IonJS)); 1946 asMasm().Push(ImmPtr(fakeReturnAddr)); 1947 asMasm().Push(FramePointer); 1948 return true; 1949 } 1950 1951 // =============================================================== 1952 // Move instructions 1953 1954 void MacroAssembler::moveValue(const ValueOperand& src, 1955 const ValueOperand& dest) { 1956 if (src == dest) { 1957 return; 1958 } 1959 movePtr(src.valueReg(), dest.valueReg()); 1960 } 1961 1962 void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) { 1963 if (!src.isGCThing()) { 1964 movePtr(ImmWord(src.asRawBits()), dest.valueReg()); 1965 return; 1966 } 1967 1968 BufferOffset load = 1969 movePatchablePtr(ImmPtr(src.bitsAsPunboxPointer()), dest.valueReg()); 1970 writeDataRelocation(src, load); 1971 } 1972 1973 // =============================================================== 1974 // Branch functions 1975 1976 void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) { 1977 And(ARMRegister(buffer, 64), ARMRegister(ptr, 64), 1978 Operand(int32_t(~gc::ChunkMask))); 1979 loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer); 1980 } 1981 1982 void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, 1983 Register temp, Label* label) { 1984 MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); 1985 MOZ_ASSERT(ptr != temp); 1986 MOZ_ASSERT(ptr != ScratchReg && 1987 ptr != ScratchReg2); // Both may be used internally. 1988 MOZ_ASSERT(temp != ScratchReg && temp != ScratchReg2); 1989 1990 And(ARMRegister(temp, 64), ARMRegister(ptr, 64), 1991 Operand(int32_t(~gc::ChunkMask))); 1992 branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), 1993 ImmWord(0), label); 1994 } 1995 1996 void MacroAssembler::branchValueIsNurseryCell(Condition cond, 1997 const Address& address, 1998 Register temp, Label* label) { 1999 branchValueIsNurseryCellImpl(cond, address, temp, label); 2000 } 2001 2002 void MacroAssembler::branchValueIsNurseryCell(Condition cond, 2003 ValueOperand value, Register temp, 2004 Label* label) { 2005 branchValueIsNurseryCellImpl(cond, value, temp, label); 2006 } 2007 template <typename T> 2008 void MacroAssembler::branchValueIsNurseryCellImpl(Condition cond, 2009 const T& value, Register temp, 2010 Label* label) { 2011 MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual); 2012 MOZ_ASSERT(temp != ScratchReg && 2013 temp != ScratchReg2); // Both may be used internally. 2014 2015 Label done; 2016 branchTestGCThing(Assembler::NotEqual, value, 2017 cond == Assembler::Equal ? &done : label); 2018 2019 getGCThingValueChunk(value, temp); 2020 branchPtr(InvertCondition(cond), Address(temp, gc::ChunkStoreBufferOffset), 2021 ImmWord(0), label); 2022 2023 bind(&done); 2024 } 2025 2026 void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs, 2027 const Value& rhs, Label* label) { 2028 MOZ_ASSERT(cond == Equal || cond == NotEqual); 2029 MOZ_ASSERT(!rhs.isNaN()); 2030 2031 if (!rhs.isGCThing()) { 2032 Cmp(ARMRegister(lhs.valueReg(), 64), Operand(rhs.asRawBits())); 2033 } else { 2034 vixl::UseScratchRegisterScope temps(this); 2035 const ARMRegister scratch64 = temps.AcquireX(); 2036 MOZ_ASSERT(scratch64.asUnsized() != lhs.valueReg()); 2037 moveValue(rhs, ValueOperand(scratch64.asUnsized())); 2038 Cmp(ARMRegister(lhs.valueReg(), 64), scratch64); 2039 } 2040 B(label, cond); 2041 } 2042 2043 void MacroAssembler::branchTestNaNValue(Condition cond, const ValueOperand& val, 2044 Register temp, Label* label) { 2045 MOZ_ASSERT(cond == Equal || cond == NotEqual); 2046 vixl::UseScratchRegisterScope temps(this); 2047 const ARMRegister scratch64 = temps.AcquireX(); 2048 MOZ_ASSERT(scratch64.asUnsized() != val.valueReg()); 2049 2050 // When testing for NaN, we want to ignore the sign bit. 2051 And(ARMRegister(temp, 64), ARMRegister(val.valueReg(), 64), 2052 Operand(~mozilla::FloatingPoint<double>::kSignBit)); 2053 2054 // Compare against a NaN with sign bit 0. 2055 static_assert(JS::detail::CanonicalizedNaNSignBit == 0); 2056 moveValue(DoubleValue(JS::GenericNaN()), ValueOperand(scratch64.asUnsized())); 2057 Cmp(ARMRegister(temp, 64), scratch64); 2058 B(label, cond); 2059 } 2060 2061 // ======================================================================== 2062 // Memory access primitives. 2063 template <typename T> 2064 void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, 2065 MIRType valueType, const T& dest) { 2066 MOZ_ASSERT(valueType < MIRType::Value); 2067 2068 if (valueType == MIRType::Double) { 2069 boxDouble(value.reg().typedReg().fpu(), dest); 2070 return; 2071 } 2072 2073 if (value.constant()) { 2074 storeValue(value.value(), dest); 2075 } else { 2076 storeValue(ValueTypeFromMIRType(valueType), value.reg().typedReg().gpr(), 2077 dest); 2078 } 2079 } 2080 2081 template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, 2082 MIRType valueType, 2083 const Address& dest); 2084 template void MacroAssembler::storeUnboxedValue( 2085 const ConstantOrRegister& value, MIRType valueType, 2086 const BaseObjectElementIndex& dest); 2087 2088 void MacroAssembler::comment(const char* msg) { Assembler::comment(msg); } 2089 2090 // ======================================================================== 2091 // wasm support 2092 2093 FaultingCodeOffset MacroAssembler::wasmTrapInstruction() { 2094 AutoForbidPoolsAndNops afp(this, 2095 /* max number of instructions in scope = */ 1); 2096 FaultingCodeOffset fco = FaultingCodeOffset(currentOffset()); 2097 Unreachable(); 2098 return fco; 2099 } 2100 2101 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, 2102 Register boundsCheckLimit, 2103 Label* label) { 2104 branch32(cond, index, boundsCheckLimit, label); 2105 if (JitOptions.spectreIndexMasking) { 2106 csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); 2107 } 2108 } 2109 2110 void MacroAssembler::wasmBoundsCheck32(Condition cond, Register index, 2111 Address boundsCheckLimit, Label* label) { 2112 branch32(cond, index, boundsCheckLimit, label); 2113 if (JitOptions.spectreIndexMasking) { 2114 csel(ARMRegister(index, 32), vixl::wzr, ARMRegister(index, 32), cond); 2115 } 2116 } 2117 2118 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, 2119 Register64 boundsCheckLimit, 2120 Label* label) { 2121 branchPtr(cond, index.reg, boundsCheckLimit.reg, label); 2122 if (JitOptions.spectreIndexMasking) { 2123 csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), 2124 cond); 2125 } 2126 } 2127 2128 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index, 2129 Address boundsCheckLimit, Label* label) { 2130 branchPtr(SwapCmpOperandsCondition(cond), boundsCheckLimit, index.reg, label); 2131 if (JitOptions.spectreIndexMasking) { 2132 csel(ARMRegister(index.reg, 64), vixl::xzr, ARMRegister(index.reg, 64), 2133 cond); 2134 } 2135 } 2136 2137 // FCVTZU behaves as follows: 2138 // 2139 // on NaN it produces zero 2140 // on too large it produces UINT_MAX (for appropriate type) 2141 // on too small it produces zero 2142 // 2143 // FCVTZS behaves as follows: 2144 // 2145 // on NaN it produces zero 2146 // on too large it produces INT_MAX (for appropriate type) 2147 // on too small it produces INT_MIN (ditto) 2148 2149 void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input_, 2150 Register output_, 2151 bool isSaturating, 2152 Label* oolEntry) { 2153 ARMRegister output(output_, 32); 2154 ARMFPRegister input(input_, 64); 2155 Fcvtzu(output, input); 2156 if (!isSaturating) { 2157 Cmp(output, 0); 2158 Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); 2159 B(oolEntry, Assembler::Equal); 2160 } 2161 } 2162 2163 void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input_, 2164 Register output_, 2165 bool isSaturating, 2166 Label* oolEntry) { 2167 ARMRegister output(output_, 32); 2168 ARMFPRegister input(input_, 32); 2169 Fcvtzu(output, input); 2170 if (!isSaturating) { 2171 Cmp(output, 0); 2172 Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); 2173 B(oolEntry, Assembler::Equal); 2174 } 2175 } 2176 2177 void MacroAssembler::wasmTruncateDoubleToInt32(FloatRegister input_, 2178 Register output_, 2179 bool isSaturating, 2180 Label* oolEntry) { 2181 ARMRegister output(output_, 32); 2182 ARMFPRegister input(input_, 64); 2183 Fcvtzs(output, input); 2184 if (!isSaturating) { 2185 Cmp(output, 0); 2186 Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); 2187 Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); 2188 B(oolEntry, Assembler::Equal); 2189 } 2190 } 2191 2192 void MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input_, 2193 Register output_, 2194 bool isSaturating, 2195 Label* oolEntry) { 2196 ARMRegister output(output_, 32); 2197 ARMFPRegister input(input_, 32); 2198 Fcvtzs(output, input); 2199 if (!isSaturating) { 2200 Cmp(output, 0); 2201 Ccmp(output, INT32_MAX, vixl::ZFlag, Assembler::NotEqual); 2202 Ccmp(output, INT32_MIN, vixl::ZFlag, Assembler::NotEqual); 2203 B(oolEntry, Assembler::Equal); 2204 } 2205 } 2206 2207 void MacroAssembler::wasmTruncateDoubleToUInt64( 2208 FloatRegister input_, Register64 output_, bool isSaturating, 2209 Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { 2210 MOZ_ASSERT(tempDouble.isInvalid()); 2211 2212 ARMRegister output(output_.reg, 64); 2213 ARMFPRegister input(input_, 64); 2214 Fcvtzu(output, input); 2215 if (!isSaturating) { 2216 Cmp(output, 0); 2217 Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); 2218 B(oolEntry, Assembler::Equal); 2219 bind(oolRejoin); 2220 } 2221 } 2222 2223 void MacroAssembler::wasmTruncateFloat32ToUInt64( 2224 FloatRegister input_, Register64 output_, bool isSaturating, 2225 Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { 2226 MOZ_ASSERT(tempDouble.isInvalid()); 2227 2228 ARMRegister output(output_.reg, 64); 2229 ARMFPRegister input(input_, 32); 2230 Fcvtzu(output, input); 2231 if (!isSaturating) { 2232 Cmp(output, 0); 2233 Ccmp(output, -1, vixl::ZFlag, Assembler::NotEqual); 2234 B(oolEntry, Assembler::Equal); 2235 bind(oolRejoin); 2236 } 2237 } 2238 2239 void MacroAssembler::wasmTruncateDoubleToInt64( 2240 FloatRegister input_, Register64 output_, bool isSaturating, 2241 Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { 2242 MOZ_ASSERT(tempDouble.isInvalid()); 2243 2244 ARMRegister output(output_.reg, 64); 2245 ARMFPRegister input(input_, 64); 2246 Fcvtzs(output, input); 2247 if (!isSaturating) { 2248 Cmp(output, 0); 2249 Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); 2250 Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); 2251 B(oolEntry, Assembler::Equal); 2252 bind(oolRejoin); 2253 } 2254 } 2255 2256 void MacroAssembler::wasmTruncateFloat32ToInt64( 2257 FloatRegister input_, Register64 output_, bool isSaturating, 2258 Label* oolEntry, Label* oolRejoin, FloatRegister tempDouble) { 2259 ARMRegister output(output_.reg, 64); 2260 ARMFPRegister input(input_, 32); 2261 Fcvtzs(output, input); 2262 if (!isSaturating) { 2263 Cmp(output, 0); 2264 Ccmp(output, INT64_MAX, vixl::ZFlag, Assembler::NotEqual); 2265 Ccmp(output, INT64_MIN, vixl::ZFlag, Assembler::NotEqual); 2266 B(oolEntry, Assembler::Equal); 2267 bind(oolRejoin); 2268 } 2269 } 2270 2271 void MacroAssembler::oolWasmTruncateCheckF32ToI32( 2272 FloatRegister input, Register output, TruncFlags flags, 2273 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 2274 Label notNaN; 2275 branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); 2276 wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc); 2277 bind(¬NaN); 2278 2279 Label isOverflow; 2280 const float two_31 = -float(INT32_MIN); 2281 ScratchFloat32Scope fpscratch(*this); 2282 if (flags & TRUNC_UNSIGNED) { 2283 loadConstantFloat32(two_31 * 2, fpscratch); 2284 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2285 &isOverflow); 2286 loadConstantFloat32(-1.0f, fpscratch); 2287 branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); 2288 } else { 2289 loadConstantFloat32(two_31, fpscratch); 2290 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2291 &isOverflow); 2292 loadConstantFloat32(-two_31, fpscratch); 2293 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); 2294 } 2295 bind(&isOverflow); 2296 wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc); 2297 } 2298 2299 void MacroAssembler::oolWasmTruncateCheckF64ToI32( 2300 FloatRegister input, Register output, TruncFlags flags, 2301 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 2302 Label notNaN; 2303 branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); 2304 wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc); 2305 bind(¬NaN); 2306 2307 Label isOverflow; 2308 const double two_31 = -double(INT32_MIN); 2309 ScratchDoubleScope fpscratch(*this); 2310 if (flags & TRUNC_UNSIGNED) { 2311 loadConstantDouble(two_31 * 2, fpscratch); 2312 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2313 &isOverflow); 2314 loadConstantDouble(-1.0, fpscratch); 2315 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); 2316 } else { 2317 loadConstantDouble(two_31, fpscratch); 2318 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2319 &isOverflow); 2320 loadConstantDouble(-two_31 - 1, fpscratch); 2321 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); 2322 } 2323 bind(&isOverflow); 2324 wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc); 2325 } 2326 2327 void MacroAssembler::oolWasmTruncateCheckF32ToI64( 2328 FloatRegister input, Register64 output, TruncFlags flags, 2329 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 2330 Label notNaN; 2331 branchFloat(Assembler::DoubleOrdered, input, input, ¬NaN); 2332 wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc); 2333 bind(¬NaN); 2334 2335 Label isOverflow; 2336 const float two_63 = -float(INT64_MIN); 2337 ScratchFloat32Scope fpscratch(*this); 2338 if (flags & TRUNC_UNSIGNED) { 2339 loadConstantFloat32(two_63 * 2, fpscratch); 2340 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2341 &isOverflow); 2342 loadConstantFloat32(-1.0f, fpscratch); 2343 branchFloat(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); 2344 } else { 2345 loadConstantFloat32(two_63, fpscratch); 2346 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2347 &isOverflow); 2348 loadConstantFloat32(-two_63, fpscratch); 2349 branchFloat(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); 2350 } 2351 bind(&isOverflow); 2352 wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc); 2353 } 2354 2355 void MacroAssembler::oolWasmTruncateCheckF64ToI64( 2356 FloatRegister input, Register64 output, TruncFlags flags, 2357 const wasm::TrapSiteDesc& trapSiteDesc, Label* rejoin) { 2358 Label notNaN; 2359 branchDouble(Assembler::DoubleOrdered, input, input, ¬NaN); 2360 wasmTrap(wasm::Trap::InvalidConversionToInteger, trapSiteDesc); 2361 bind(¬NaN); 2362 2363 Label isOverflow; 2364 const double two_63 = -double(INT64_MIN); 2365 ScratchDoubleScope fpscratch(*this); 2366 if (flags & TRUNC_UNSIGNED) { 2367 loadConstantDouble(two_63 * 2, fpscratch); 2368 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2369 &isOverflow); 2370 loadConstantDouble(-1.0, fpscratch); 2371 branchDouble(Assembler::DoubleGreaterThan, input, fpscratch, rejoin); 2372 } else { 2373 loadConstantDouble(two_63, fpscratch); 2374 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, 2375 &isOverflow); 2376 loadConstantDouble(-two_63, fpscratch); 2377 branchDouble(Assembler::DoubleGreaterThanOrEqual, input, fpscratch, rejoin); 2378 } 2379 bind(&isOverflow); 2380 wasmTrap(wasm::Trap::IntegerOverflow, trapSiteDesc); 2381 } 2382 2383 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, 2384 Register memoryBase, Register ptr, 2385 AnyRegister output) { 2386 wasmLoadImpl(access, memoryBase, ptr, output, Register64::Invalid()); 2387 } 2388 2389 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, 2390 Register memoryBase, Register ptr, 2391 Register64 output) { 2392 wasmLoadImpl(access, memoryBase, ptr, AnyRegister(), output); 2393 } 2394 2395 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, 2396 AnyRegister value, Register memoryBase, 2397 Register ptr) { 2398 wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr); 2399 } 2400 2401 void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, 2402 Register64 value, Register memoryBase, 2403 Register ptr) { 2404 wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr); 2405 } 2406 2407 void MacroAssembler::enterFakeExitFrameForWasm(Register cxreg, Register scratch, 2408 ExitFrameType type) { 2409 // Wasm stubs use the native SP, not the PSP. 2410 2411 linkExitFrame(cxreg, scratch); 2412 2413 MOZ_RELEASE_ASSERT(sp.Is(GetStackPointer64())); 2414 2415 // SP has to be 16-byte aligned when we do a load/store, so push |type| twice 2416 // and then add 8 bytes to SP. This leaves SP unaligned. 2417 move32(Imm32(int32_t(type)), scratch); 2418 push(scratch, scratch); 2419 Add(sp, sp, 8); 2420 2421 // Despite the above assertion, it is possible for control to flow from here 2422 // to the code generated by 2423 // MacroAssemblerCompat::handleFailureWithHandlerTail without any 2424 // intervening assignment to PSP. But handleFailureWithHandlerTail assumes 2425 // that PSP is the active stack pointer. Hence the following is necessary 2426 // for safety. Note we can't use initPseudoStackPtr here as that would 2427 // generate no instructions. 2428 Mov(PseudoStackPointer64, sp); 2429 } 2430 2431 void MacroAssembler::widenInt32(Register r) { 2432 move32To64ZeroExtend(r, Register64(r)); 2433 } 2434 2435 CodeOffset MacroAssembler::sub32FromMemAndBranchIfNegativeWithPatch( 2436 Address address, Label* label) { 2437 vixl::UseScratchRegisterScope temps(this); 2438 const ARMRegister value32 = temps.AcquireW(); 2439 MOZ_ASSERT(value32.asUnsized() != address.base); 2440 Ldr(value32, toMemOperand(address)); 2441 // -128 is arbitrary, but makes `*address` count upwards, which may help 2442 // to identify cases where the subsequent ::patch..() call was forgotten. 2443 Subs(value32, value32, Operand(-128)); 2444 // Points immediately after the insn to patch 2445 CodeOffset patchPoint = CodeOffset(currentOffset()); 2446 // This assumes that Str does not change the condition codes. 2447 Str(value32, toMemOperand(address)); 2448 B(label, Assembler::Signed); 2449 return patchPoint; 2450 } 2451 2452 void MacroAssembler::patchSub32FromMemAndBranchIfNegative(CodeOffset offset, 2453 Imm32 imm) { 2454 int32_t val = imm.value; 2455 // Patching it to zero would make the insn pointless 2456 MOZ_RELEASE_ASSERT(val >= 1 && val <= 127); 2457 Instruction* instrPtr = getInstructionAt(BufferOffset(offset.offset() - 4)); 2458 // 31 27 23 21 9 4 2459 // | | | | | | 2460 // 0011 0001 00 imm12 Rn Rd = ADDS Wd, Wn|WSP, #imm12 // (expected) 2461 // 0111 0001 00 imm12 Rn Rd = SUBS Wd, Wn|WSP, #imm12 // (replacement) 2462 vixl::Instr oldInstr = instrPtr->InstructionBits(); 2463 // Check opcode bits and imm field are as expected 2464 MOZ_ASSERT((oldInstr & 0b1111'1111'11'000000000000'00000'00000U) == 2465 0b0011'0001'00'000000000000'00000'00000U); 2466 MOZ_RELEASE_ASSERT((oldInstr & 0b0000'0000'00'111111111111'00000'00000U) == 2467 (128 << 10)); // 128 as created above 2468 vixl::Instr newInstr = 2469 0b0111'0001'00'000000000000'00000'00000U | // opcode bits 2470 (oldInstr & 0b11111'11111) | // existing register fields 2471 ((val & 0b111111111111) << 10); // #val 2472 instrPtr->SetInstructionBits(newInstr); 2473 } 2474 2475 // ======================================================================== 2476 // Convert floating point. 2477 2478 bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; } 2479 2480 void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest, 2481 Register temp) { 2482 MOZ_ASSERT(temp == Register::Invalid()); 2483 Ucvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); 2484 } 2485 2486 void MacroAssembler::convertInt64ToDouble(Register64 src, FloatRegister dest) { 2487 Scvtf(ARMFPRegister(dest, 64), ARMRegister(src.reg, 64)); 2488 } 2489 2490 void MacroAssembler::convertUInt64ToFloat32(Register64 src, FloatRegister dest, 2491 Register temp) { 2492 MOZ_ASSERT(temp == Register::Invalid()); 2493 Ucvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); 2494 } 2495 2496 void MacroAssembler::convertInt64ToFloat32(Register64 src, FloatRegister dest) { 2497 Scvtf(ARMFPRegister(dest, 32), ARMRegister(src.reg, 64)); 2498 } 2499 2500 void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) { 2501 convertInt64ToDouble(Register64(src), dest); 2502 } 2503 2504 // ======================================================================== 2505 // Primitive atomic operations. 2506 2507 // The computed MemOperand must be Reg+0 because the load/store exclusive 2508 // instructions only take a single pointer register. 2509 2510 static MemOperand ComputePointerForAtomic(MacroAssembler& masm, 2511 const Address& address, 2512 Register scratch) { 2513 if (address.offset == 0) { 2514 return MemOperand(X(masm, address.base), 0); 2515 } 2516 2517 masm.Add(X(scratch), X(masm, address.base), address.offset); 2518 return MemOperand(X(scratch), 0); 2519 } 2520 2521 static MemOperand ComputePointerForAtomic(MacroAssembler& masm, 2522 const BaseIndex& address, 2523 Register scratch) { 2524 masm.Add(X(scratch), X(masm, address.base), 2525 Operand(X(address.index), vixl::LSL, address.scale)); 2526 if (address.offset) { 2527 masm.Add(X(scratch), X(scratch), address.offset); 2528 } 2529 return MemOperand(X(scratch), 0); 2530 } 2531 2532 // This sign extends to targetWidth and leaves any higher bits zero. 2533 2534 static void SignOrZeroExtend(MacroAssembler& masm, Scalar::Type srcType, 2535 Width targetWidth, Register src, Register dest) { 2536 bool signExtend = Scalar::isSignedIntType(srcType); 2537 2538 switch (Scalar::byteSize(srcType)) { 2539 case 1: 2540 if (signExtend) { 2541 masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); 2542 } else { 2543 masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 7); 2544 } 2545 break; 2546 case 2: 2547 if (signExtend) { 2548 masm.Sbfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); 2549 } else { 2550 masm.Ubfm(R(dest, targetWidth), R(src, targetWidth), 0, 15); 2551 } 2552 break; 2553 case 4: 2554 if (targetWidth == Width::_64) { 2555 if (signExtend) { 2556 masm.Sbfm(X(dest), X(src), 0, 31); 2557 } else { 2558 masm.Ubfm(X(dest), X(src), 0, 31); 2559 } 2560 } else if (src != dest) { 2561 masm.Mov(R(dest, targetWidth), R(src, targetWidth)); 2562 } 2563 break; 2564 case 8: 2565 if (src != dest) { 2566 masm.Mov(R(dest, targetWidth), R(src, targetWidth)); 2567 } 2568 break; 2569 default: 2570 MOZ_CRASH(); 2571 } 2572 } 2573 2574 // Exclusive-loads zero-extend their values to the full width of the X register. 2575 // 2576 // Note, we've promised to leave the high bits of the 64-bit register clear if 2577 // the targetWidth is 32. 2578 2579 static void LoadExclusive(MacroAssembler& masm, 2580 const wasm::MemoryAccessDesc* access, 2581 Scalar::Type srcType, Width targetWidth, 2582 MemOperand ptr, Register dest) { 2583 bool signExtend = Scalar::isSignedIntType(srcType); 2584 2585 // With this address form, a single native ldxr* will be emitted, and the 2586 // AutoForbidPoolsAndNops ensures that the metadata is emitted at the 2587 // address of the ldxr*. Note that the use of AutoForbidPoolsAndNops is now 2588 // a "second class" solution; the right way to do this would be to have the 2589 // masm.<LoadInsn> calls produce an FaultingCodeOffset, and hand that value to 2590 // `masm.append`. 2591 MOZ_ASSERT(ptr.IsImmediateOffset() && ptr.offset() == 0); 2592 2593 switch (Scalar::byteSize(srcType)) { 2594 case 1: { 2595 { 2596 AutoForbidPoolsAndNops afp( 2597 &masm, 2598 /* max number of instructions in scope = */ 1); 2599 if (access) { 2600 masm.append(*access, wasm::TrapMachineInsn::Load8, 2601 FaultingCodeOffset(masm.currentOffset())); 2602 } 2603 masm.Ldxrb(W(dest), ptr); 2604 } 2605 if (signExtend) { 2606 masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 7); 2607 } 2608 break; 2609 } 2610 case 2: { 2611 { 2612 AutoForbidPoolsAndNops afp( 2613 &masm, 2614 /* max number of instructions in scope = */ 1); 2615 if (access) { 2616 masm.append(*access, wasm::TrapMachineInsn::Load16, 2617 FaultingCodeOffset(masm.currentOffset())); 2618 } 2619 masm.Ldxrh(W(dest), ptr); 2620 } 2621 if (signExtend) { 2622 masm.Sbfm(R(dest, targetWidth), R(dest, targetWidth), 0, 15); 2623 } 2624 break; 2625 } 2626 case 4: { 2627 { 2628 AutoForbidPoolsAndNops afp( 2629 &masm, 2630 /* max number of instructions in scope = */ 1); 2631 if (access) { 2632 masm.append(*access, wasm::TrapMachineInsn::Load32, 2633 FaultingCodeOffset(masm.currentOffset())); 2634 } 2635 masm.Ldxr(W(dest), ptr); 2636 } 2637 if (targetWidth == Width::_64 && signExtend) { 2638 masm.Sbfm(X(dest), X(dest), 0, 31); 2639 } 2640 break; 2641 } 2642 case 8: { 2643 { 2644 AutoForbidPoolsAndNops afp( 2645 &masm, 2646 /* max number of instructions in scope = */ 1); 2647 if (access) { 2648 masm.append(*access, wasm::TrapMachineInsn::Load64, 2649 FaultingCodeOffset(masm.currentOffset())); 2650 } 2651 masm.Ldxr(X(dest), ptr); 2652 } 2653 break; 2654 } 2655 default: { 2656 MOZ_CRASH(); 2657 } 2658 } 2659 } 2660 2661 static void StoreExclusive(MacroAssembler& masm, Scalar::Type type, 2662 Register status, Register src, MemOperand ptr) { 2663 // Note, these are not decorated with a TrapSite only because they are 2664 // assumed to be preceded by a LoadExclusive to the same address, of the 2665 // same width, so that will always take the page fault if the address is bad. 2666 switch (Scalar::byteSize(type)) { 2667 case 1: 2668 masm.Stxrb(W(status), W(src), ptr); 2669 break; 2670 case 2: 2671 masm.Stxrh(W(status), W(src), ptr); 2672 break; 2673 case 4: 2674 masm.Stxr(W(status), W(src), ptr); 2675 break; 2676 case 8: 2677 masm.Stxr(W(status), X(src), ptr); 2678 break; 2679 } 2680 } 2681 2682 static bool HasAtomicInstructions(MacroAssembler& masm) { 2683 return masm.asVIXL().GetCPUFeatures()->Has(vixl::CPUFeatures::kAtomics); 2684 } 2685 2686 static inline bool SupportedAtomicInstructionOperands(Scalar::Type type, 2687 Width targetWidth) { 2688 if (targetWidth == Width::_32) { 2689 return byteSize(type) <= 4; 2690 } 2691 if (targetWidth == Width::_64) { 2692 return byteSize(type) == 8; 2693 } 2694 return false; 2695 } 2696 2697 template <typename T> 2698 static void CompareExchange(MacroAssembler& masm, 2699 const wasm::MemoryAccessDesc* access, 2700 Scalar::Type type, Width targetWidth, 2701 Synchronization sync, const T& mem, Register oldval, 2702 Register newval, Register output) { 2703 MOZ_ASSERT(oldval != output && newval != output); 2704 2705 vixl::UseScratchRegisterScope temps(&masm); 2706 2707 Register ptrScratch = temps.AcquireX().asUnsized(); 2708 MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); 2709 2710 MOZ_ASSERT(ptr.base().asUnsized() != output); 2711 2712 if (HasAtomicInstructions(masm) && 2713 SupportedAtomicInstructionOperands(type, targetWidth)) { 2714 masm.Mov(X(output), X(oldval)); 2715 // Capal is using same atomic mechanism as Ldxr/Stxr, and 2716 // consider it is the same for "Inner Shareable" domain. 2717 // Not updated gen_cmpxchg in GenerateAtomicOperations.py. 2718 masm.memoryBarrierBefore(sync); 2719 { 2720 AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1); 2721 if (access) { 2722 masm.append(*access, wasm::TrapMachineInsn::Atomic, 2723 FaultingCodeOffset(masm.currentOffset())); 2724 } 2725 switch (byteSize(type)) { 2726 case 1: 2727 masm.Casalb(R(output, targetWidth), R(newval, targetWidth), ptr); 2728 break; 2729 case 2: 2730 masm.Casalh(R(output, targetWidth), R(newval, targetWidth), ptr); 2731 break; 2732 case 4: 2733 case 8: 2734 masm.Casal(R(output, targetWidth), R(newval, targetWidth), ptr); 2735 break; 2736 default: 2737 MOZ_CRASH("CompareExchange unsupported type"); 2738 } 2739 } 2740 masm.memoryBarrierAfter(sync); 2741 SignOrZeroExtend(masm, type, targetWidth, output, output); 2742 return; 2743 } 2744 2745 // The target doesn't support atomics, so generate a LL-SC loop. This requires 2746 // only AArch64 v8.0. 2747 Label again; 2748 Label done; 2749 2750 // NOTE: the generated code must match the assembly code in gen_cmpxchg in 2751 // GenerateAtomicOperations.py 2752 masm.memoryBarrierBefore(sync); 2753 2754 Register scratch = temps.AcquireX().asUnsized(); 2755 2756 masm.bind(&again); 2757 SignOrZeroExtend(masm, type, targetWidth, oldval, scratch); 2758 LoadExclusive(masm, access, type, targetWidth, ptr, output); 2759 masm.Cmp(R(output, targetWidth), R(scratch, targetWidth)); 2760 masm.B(&done, MacroAssembler::NotEqual); 2761 StoreExclusive(masm, type, scratch, newval, ptr); 2762 masm.Cbnz(W(scratch), &again); 2763 masm.bind(&done); 2764 2765 masm.memoryBarrierAfter(sync); 2766 } 2767 2768 template <typename T> 2769 static void AtomicExchange(MacroAssembler& masm, 2770 const wasm::MemoryAccessDesc* access, 2771 Scalar::Type type, Width targetWidth, 2772 Synchronization sync, const T& mem, Register value, 2773 Register output) { 2774 MOZ_ASSERT(value != output); 2775 2776 vixl::UseScratchRegisterScope temps(&masm); 2777 2778 Register ptrScratch = temps.AcquireX().asUnsized(); 2779 MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); 2780 2781 if (HasAtomicInstructions(masm) && 2782 SupportedAtomicInstructionOperands(type, targetWidth)) { 2783 // Swpal is using same atomic mechanism as Ldxr/Stxr, and 2784 // consider it is the same for "Inner Shareable" domain. 2785 // Not updated gen_exchange in GenerateAtomicOperations.py. 2786 masm.memoryBarrierBefore(sync); 2787 { 2788 AutoForbidPoolsAndNops afp(&masm, /* number of insns = */ 1); 2789 if (access) { 2790 masm.append(*access, wasm::TrapMachineInsn::Atomic, 2791 FaultingCodeOffset(masm.currentOffset())); 2792 } 2793 switch (byteSize(type)) { 2794 case 1: 2795 masm.Swpalb(R(value, targetWidth), R(output, targetWidth), ptr); 2796 break; 2797 case 2: 2798 masm.Swpalh(R(value, targetWidth), R(output, targetWidth), ptr); 2799 break; 2800 case 4: 2801 case 8: 2802 masm.Swpal(R(value, targetWidth), R(output, targetWidth), ptr); 2803 break; 2804 default: 2805 MOZ_CRASH("AtomicExchange unsupported type"); 2806 } 2807 } 2808 masm.memoryBarrierAfter(sync); 2809 SignOrZeroExtend(masm, type, targetWidth, output, output); 2810 return; 2811 } 2812 2813 // The target doesn't support atomics, so generate a LL-SC loop. This requires 2814 // only AArch64 v8.0. 2815 Label again; 2816 2817 // NOTE: the generated code must match the assembly code in gen_exchange in 2818 // GenerateAtomicOperations.py 2819 masm.memoryBarrierBefore(sync); 2820 2821 Register scratch = temps.AcquireX().asUnsized(); 2822 2823 masm.bind(&again); 2824 LoadExclusive(masm, access, type, targetWidth, ptr, output); 2825 StoreExclusive(masm, type, scratch, value, ptr); 2826 masm.Cbnz(W(scratch), &again); 2827 2828 masm.memoryBarrierAfter(sync); 2829 } 2830 2831 template <bool wantResult, typename T> 2832 static void AtomicFetchOp(MacroAssembler& masm, 2833 const wasm::MemoryAccessDesc* access, 2834 Scalar::Type type, Width targetWidth, 2835 Synchronization sync, AtomicOp op, const T& mem, 2836 Register value, Register temp, Register output) { 2837 MOZ_ASSERT(value != output); 2838 MOZ_ASSERT(value != temp); 2839 MOZ_ASSERT_IF(wantResult, output != temp); 2840 2841 vixl::UseScratchRegisterScope temps(&masm); 2842 2843 Register ptrScratch = temps.AcquireX().asUnsized(); 2844 MemOperand ptr = ComputePointerForAtomic(masm, mem, ptrScratch); 2845 2846 if (HasAtomicInstructions(masm) && 2847 SupportedAtomicInstructionOperands(type, targetWidth) && 2848 !isFloatingType(type)) { 2849 // LdXXXal/StXXXl is using same atomic mechanism as Ldxr/Stxr, and 2850 // consider it is the same for "Inner Shareable" domain. 2851 // Not updated gen_fetchop in GenerateAtomicOperations.py. 2852 masm.memoryBarrierBefore(sync); 2853 2854 #define FETCH_OP_CASE(op, arg) \ 2855 { \ 2856 AutoForbidPoolsAndNops afp(&masm, /* num insns = */ 1); \ 2857 if (access) { \ 2858 masm.append(*access, wasm::TrapMachineInsn::Atomic, \ 2859 FaultingCodeOffset(masm.currentOffset())); \ 2860 } \ 2861 switch (byteSize(type)) { \ 2862 case 1: \ 2863 if (wantResult) { \ 2864 masm.Ld##op##alb(R(arg, targetWidth), R(output, targetWidth), ptr); \ 2865 } else { \ 2866 masm.St##op##lb(R(arg, targetWidth), ptr); \ 2867 } \ 2868 break; \ 2869 case 2: \ 2870 if (wantResult) { \ 2871 masm.Ld##op##alh(R(arg, targetWidth), R(output, targetWidth), ptr); \ 2872 } else { \ 2873 masm.St##op##lh(R(arg, targetWidth), ptr); \ 2874 } \ 2875 break; \ 2876 case 4: \ 2877 case 8: \ 2878 if (wantResult) { \ 2879 masm.Ld##op##al(R(arg, targetWidth), R(output, targetWidth), ptr); \ 2880 } else { \ 2881 masm.St##op##l(R(arg, targetWidth), ptr); \ 2882 } \ 2883 break; \ 2884 default: \ 2885 MOZ_CRASH("AtomicFetchOp unsupported type"); \ 2886 } \ 2887 } 2888 2889 switch (op) { 2890 case AtomicOp::Add: 2891 FETCH_OP_CASE(add, value); 2892 break; 2893 case AtomicOp::Sub: { 2894 Register scratch = temps.AcquireX().asUnsized(); 2895 masm.Neg(X(scratch), X(value)); 2896 FETCH_OP_CASE(add, scratch); 2897 break; 2898 } 2899 case AtomicOp::And: { 2900 Register scratch = temps.AcquireX().asUnsized(); 2901 masm.Eor(X(scratch), X(value), Operand(~0)); 2902 FETCH_OP_CASE(clr, scratch); 2903 break; 2904 } 2905 case AtomicOp::Or: 2906 FETCH_OP_CASE(set, value); 2907 break; 2908 case AtomicOp::Xor: 2909 FETCH_OP_CASE(eor, value); 2910 break; 2911 } 2912 masm.memoryBarrierAfter(sync); 2913 if (wantResult) { 2914 SignOrZeroExtend(masm, type, targetWidth, output, output); 2915 } 2916 return; 2917 } 2918 2919 #undef FETCH_OP_CASE 2920 2921 // The target doesn't support atomics, so generate a LL-SC loop. This requires 2922 // only AArch64 v8.0. 2923 Label again; 2924 2925 // NOTE: the generated code must match the assembly code in gen_fetchop in 2926 // GenerateAtomicOperations.py 2927 masm.memoryBarrierBefore(sync); 2928 2929 Register scratch = temps.AcquireX().asUnsized(); 2930 2931 masm.bind(&again); 2932 LoadExclusive(masm, access, type, targetWidth, ptr, output); 2933 switch (op) { 2934 case AtomicOp::Add: 2935 masm.Add(X(temp), X(output), X(value)); 2936 break; 2937 case AtomicOp::Sub: 2938 masm.Sub(X(temp), X(output), X(value)); 2939 break; 2940 case AtomicOp::And: 2941 masm.And(X(temp), X(output), X(value)); 2942 break; 2943 case AtomicOp::Or: 2944 masm.Orr(X(temp), X(output), X(value)); 2945 break; 2946 case AtomicOp::Xor: 2947 masm.Eor(X(temp), X(output), X(value)); 2948 break; 2949 } 2950 StoreExclusive(masm, type, scratch, temp, ptr); 2951 masm.Cbnz(W(scratch), &again); 2952 if (wantResult) { 2953 SignOrZeroExtend(masm, type, targetWidth, output, output); 2954 } 2955 2956 masm.memoryBarrierAfter(sync); 2957 } 2958 2959 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync, 2960 const Address& mem, Register oldval, 2961 Register newval, Register output) { 2962 CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, 2963 output); 2964 } 2965 2966 void MacroAssembler::compareExchange(Scalar::Type type, Synchronization sync, 2967 const BaseIndex& mem, Register oldval, 2968 Register newval, Register output) { 2969 CompareExchange(*this, nullptr, type, Width::_32, sync, mem, oldval, newval, 2970 output); 2971 } 2972 2973 void MacroAssembler::compareExchange64(Synchronization sync, const Address& mem, 2974 Register64 expect, Register64 replace, 2975 Register64 output) { 2976 CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, 2977 expect.reg, replace.reg, output.reg); 2978 } 2979 2980 void MacroAssembler::compareExchange64(Synchronization sync, 2981 const BaseIndex& mem, Register64 expect, 2982 Register64 replace, Register64 output) { 2983 CompareExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, 2984 expect.reg, replace.reg, output.reg); 2985 } 2986 2987 void MacroAssembler::atomicExchange64(Synchronization sync, const Address& mem, 2988 Register64 value, Register64 output) { 2989 AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, 2990 value.reg, output.reg); 2991 } 2992 2993 void MacroAssembler::atomicExchange64(Synchronization sync, 2994 const BaseIndex& mem, Register64 value, 2995 Register64 output) { 2996 AtomicExchange(*this, nullptr, Scalar::Int64, Width::_64, sync, mem, 2997 value.reg, output.reg); 2998 } 2999 3000 void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op, 3001 Register64 value, const Address& mem, 3002 Register64 temp, Register64 output) { 3003 AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, 3004 value.reg, temp.reg, output.reg); 3005 } 3006 3007 void MacroAssembler::atomicFetchOp64(Synchronization sync, AtomicOp op, 3008 Register64 value, const BaseIndex& mem, 3009 Register64 temp, Register64 output) { 3010 AtomicFetchOp<true>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, 3011 value.reg, temp.reg, output.reg); 3012 } 3013 3014 void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op, 3015 Register64 value, const Address& mem, 3016 Register64 temp) { 3017 AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, 3018 value.reg, temp.reg, temp.reg); 3019 } 3020 3021 void MacroAssembler::atomicEffectOp64(Synchronization sync, AtomicOp op, 3022 Register64 value, const BaseIndex& mem, 3023 Register64 temp) { 3024 AtomicFetchOp<false>(*this, nullptr, Scalar::Int64, Width::_64, sync, op, mem, 3025 value.reg, temp.reg, temp.reg); 3026 } 3027 3028 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, 3029 const Address& mem, Register oldval, 3030 Register newval, Register output) { 3031 CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, 3032 oldval, newval, output); 3033 } 3034 3035 void MacroAssembler::wasmCompareExchange(const wasm::MemoryAccessDesc& access, 3036 const BaseIndex& mem, Register oldval, 3037 Register newval, Register output) { 3038 CompareExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, 3039 oldval, newval, output); 3040 } 3041 3042 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync, 3043 const Address& mem, Register value, 3044 Register output) { 3045 AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); 3046 } 3047 3048 void MacroAssembler::atomicExchange(Scalar::Type type, Synchronization sync, 3049 const BaseIndex& mem, Register value, 3050 Register output) { 3051 AtomicExchange(*this, nullptr, type, Width::_32, sync, mem, value, output); 3052 } 3053 3054 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, 3055 const Address& mem, Register value, 3056 Register output) { 3057 AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, 3058 value, output); 3059 } 3060 3061 void MacroAssembler::wasmAtomicExchange(const wasm::MemoryAccessDesc& access, 3062 const BaseIndex& mem, Register value, 3063 Register output) { 3064 AtomicExchange(*this, &access, access.type(), Width::_32, access.sync(), mem, 3065 value, output); 3066 } 3067 3068 void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync, 3069 AtomicOp op, Register value, 3070 const Address& mem, Register temp, 3071 Register output) { 3072 AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, 3073 temp, output); 3074 } 3075 3076 void MacroAssembler::atomicFetchOp(Scalar::Type type, Synchronization sync, 3077 AtomicOp op, Register value, 3078 const BaseIndex& mem, Register temp, 3079 Register output) { 3080 AtomicFetchOp<true>(*this, nullptr, type, Width::_32, sync, op, mem, value, 3081 temp, output); 3082 } 3083 3084 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 3085 AtomicOp op, Register value, 3086 const Address& mem, Register temp, 3087 Register output) { 3088 AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), 3089 op, mem, value, temp, output); 3090 } 3091 3092 void MacroAssembler::wasmAtomicFetchOp(const wasm::MemoryAccessDesc& access, 3093 AtomicOp op, Register value, 3094 const BaseIndex& mem, Register temp, 3095 Register output) { 3096 AtomicFetchOp<true>(*this, &access, access.type(), Width::_32, access.sync(), 3097 op, mem, value, temp, output); 3098 } 3099 3100 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 3101 AtomicOp op, Register value, 3102 const Address& mem, Register temp) { 3103 AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), 3104 op, mem, value, temp, temp); 3105 } 3106 3107 void MacroAssembler::wasmAtomicEffectOp(const wasm::MemoryAccessDesc& access, 3108 AtomicOp op, Register value, 3109 const BaseIndex& mem, Register temp) { 3110 AtomicFetchOp<false>(*this, &access, access.type(), Width::_32, access.sync(), 3111 op, mem, value, temp, temp); 3112 } 3113 3114 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, 3115 const Address& mem, 3116 Register64 expect, 3117 Register64 replace, 3118 Register64 output) { 3119 CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, 3120 expect.reg, replace.reg, output.reg); 3121 } 3122 3123 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access, 3124 const BaseIndex& mem, 3125 Register64 expect, 3126 Register64 replace, 3127 Register64 output) { 3128 CompareExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, 3129 expect.reg, replace.reg, output.reg); 3130 } 3131 3132 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, 3133 const Address& mem, Register64 value, 3134 Register64 output) { 3135 AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, 3136 value.reg, output.reg); 3137 } 3138 3139 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access, 3140 const BaseIndex& mem, 3141 Register64 value, Register64 output) { 3142 AtomicExchange(*this, &access, Scalar::Int64, Width::_64, access.sync(), mem, 3143 value.reg, output.reg); 3144 } 3145 3146 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, 3147 AtomicOp op, Register64 value, 3148 const Address& mem, Register64 temp, 3149 Register64 output) { 3150 AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), 3151 op, mem, value.reg, temp.reg, output.reg); 3152 } 3153 3154 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access, 3155 AtomicOp op, Register64 value, 3156 const BaseIndex& mem, Register64 temp, 3157 Register64 output) { 3158 AtomicFetchOp<true>(*this, &access, Scalar::Int64, Width::_64, access.sync(), 3159 op, mem, value.reg, temp.reg, output.reg); 3160 } 3161 3162 void MacroAssembler::wasmAtomicEffectOp64(const wasm::MemoryAccessDesc& access, 3163 AtomicOp op, Register64 value, 3164 const BaseIndex& mem, 3165 Register64 temp) { 3166 AtomicFetchOp<false>(*this, &access, Scalar::Int64, Width::_64, access.sync(), 3167 op, mem, value.reg, temp.reg, temp.reg); 3168 } 3169 3170 // ======================================================================== 3171 // JS atomic operations. 3172 3173 template <typename T> 3174 static void CompareExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, 3175 Synchronization sync, const T& mem, 3176 Register oldval, Register newval, Register temp, 3177 AnyRegister output) { 3178 if (arrayType == Scalar::Uint32) { 3179 masm.compareExchange(arrayType, sync, mem, oldval, newval, temp); 3180 masm.convertUInt32ToDouble(temp, output.fpu()); 3181 } else { 3182 masm.compareExchange(arrayType, sync, mem, oldval, newval, output.gpr()); 3183 } 3184 } 3185 3186 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, 3187 Synchronization sync, const Address& mem, 3188 Register oldval, Register newval, 3189 Register temp, AnyRegister output) { 3190 CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); 3191 } 3192 3193 void MacroAssembler::compareExchangeJS(Scalar::Type arrayType, 3194 Synchronization sync, 3195 const BaseIndex& mem, Register oldval, 3196 Register newval, Register temp, 3197 AnyRegister output) { 3198 CompareExchangeJS(*this, arrayType, sync, mem, oldval, newval, temp, output); 3199 } 3200 3201 template <typename T> 3202 static void AtomicExchangeJS(MacroAssembler& masm, Scalar::Type arrayType, 3203 Synchronization sync, const T& mem, Register value, 3204 Register temp, AnyRegister output) { 3205 if (arrayType == Scalar::Uint32) { 3206 masm.atomicExchange(arrayType, sync, mem, value, temp); 3207 masm.convertUInt32ToDouble(temp, output.fpu()); 3208 } else { 3209 masm.atomicExchange(arrayType, sync, mem, value, output.gpr()); 3210 } 3211 } 3212 3213 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, 3214 Synchronization sync, const Address& mem, 3215 Register value, Register temp, 3216 AnyRegister output) { 3217 AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); 3218 } 3219 3220 void MacroAssembler::atomicExchangeJS(Scalar::Type arrayType, 3221 Synchronization sync, 3222 const BaseIndex& mem, Register value, 3223 Register temp, AnyRegister output) { 3224 AtomicExchangeJS(*this, arrayType, sync, mem, value, temp, output); 3225 } 3226 3227 template <typename T> 3228 static void AtomicFetchOpJS(MacroAssembler& masm, Scalar::Type arrayType, 3229 Synchronization sync, AtomicOp op, Register value, 3230 const T& mem, Register temp1, Register temp2, 3231 AnyRegister output) { 3232 if (arrayType == Scalar::Uint32) { 3233 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp2, temp1); 3234 masm.convertUInt32ToDouble(temp1, output.fpu()); 3235 } else { 3236 masm.atomicFetchOp(arrayType, sync, op, value, mem, temp1, output.gpr()); 3237 } 3238 } 3239 3240 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 3241 Synchronization sync, AtomicOp op, 3242 Register value, const Address& mem, 3243 Register temp1, Register temp2, 3244 AnyRegister output) { 3245 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 3246 } 3247 3248 void MacroAssembler::atomicFetchOpJS(Scalar::Type arrayType, 3249 Synchronization sync, AtomicOp op, 3250 Register value, const BaseIndex& mem, 3251 Register temp1, Register temp2, 3252 AnyRegister output) { 3253 AtomicFetchOpJS(*this, arrayType, sync, op, value, mem, temp1, temp2, output); 3254 } 3255 3256 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, 3257 Synchronization sync, AtomicOp op, 3258 Register value, const BaseIndex& mem, 3259 Register temp) { 3260 AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, 3261 value, temp, temp); 3262 } 3263 3264 void MacroAssembler::atomicEffectOpJS(Scalar::Type arrayType, 3265 Synchronization sync, AtomicOp op, 3266 Register value, const Address& mem, 3267 Register temp) { 3268 AtomicFetchOp<false>(*this, nullptr, arrayType, Width::_32, sync, op, mem, 3269 value, temp, temp); 3270 } 3271 3272 void MacroAssembler::atomicPause() { Isb(); } 3273 3274 void MacroAssembler::flexibleQuotient32(Register lhs, Register rhs, 3275 Register dest, bool isUnsigned, 3276 const LiveRegisterSet&) { 3277 quotient32(lhs, rhs, dest, isUnsigned); 3278 } 3279 3280 void MacroAssembler::flexibleQuotientPtr( 3281 Register lhs, Register rhs, Register dest, bool isUnsigned, 3282 const LiveRegisterSet& volatileLiveRegs) { 3283 quotient64(lhs, rhs, dest, isUnsigned); 3284 } 3285 3286 void MacroAssembler::flexibleRemainder32(Register lhs, Register rhs, 3287 Register dest, bool isUnsigned, 3288 const LiveRegisterSet&) { 3289 remainder32(lhs, rhs, dest, isUnsigned); 3290 } 3291 3292 void MacroAssembler::flexibleRemainderPtr( 3293 Register lhs, Register rhs, Register dest, bool isUnsigned, 3294 const LiveRegisterSet& volatileLiveRegs) { 3295 remainder64(lhs, rhs, dest, isUnsigned); 3296 } 3297 3298 void MacroAssembler::flexibleDivMod32(Register lhs, Register rhs, 3299 Register divOutput, Register remOutput, 3300 bool isUnsigned, const LiveRegisterSet&) { 3301 MOZ_ASSERT(lhs != divOutput && lhs != remOutput, "lhs is preserved"); 3302 MOZ_ASSERT(rhs != divOutput && rhs != remOutput, "rhs is preserved"); 3303 3304 if (isUnsigned) { 3305 Udiv(ARMRegister(divOutput, 32), ARMRegister(lhs, 32), 3306 ARMRegister(rhs, 32)); 3307 } else { 3308 Sdiv(ARMRegister(divOutput, 32), ARMRegister(lhs, 32), 3309 ARMRegister(rhs, 32)); 3310 } 3311 3312 // Compute the remainder: remOutput = lhs - (divOutput * rhs). 3313 Msub(/* result= */ ARMRegister(remOutput, 32), ARMRegister(divOutput, 32), 3314 ARMRegister(rhs, 32), ARMRegister(lhs, 32)); 3315 } 3316 3317 CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) { 3318 AutoForbidPoolsAndNops afp(this, 3319 /* max number of instructions in scope = */ 1); 3320 CodeOffset offset(currentOffset()); 3321 adr(ARMRegister(dest, 64), 0, LabelDoc()); 3322 return offset; 3323 } 3324 3325 void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc, 3326 CodeLocationLabel target) { 3327 ptrdiff_t off = target - loc; 3328 MOZ_RELEASE_ASSERT(vixl::IsInt21(off)); 3329 3330 Instruction* cur = reinterpret_cast<Instruction*>(loc.raw()); 3331 MOZ_ASSERT(cur->IsADR()); 3332 3333 vixl::Register rd = vixl::XRegister(cur->Rd()); 3334 adr(cur, rd, off); 3335 } 3336 3337 // ======================================================================== 3338 // Spectre Mitigations. 3339 3340 void MacroAssembler::speculationBarrier() { 3341 // Conditional speculation barrier. 3342 csdb(); 3343 } 3344 3345 void MacroAssembler::floorFloat32ToInt32(FloatRegister src, Register dest, 3346 Label* fail) { 3347 ARMFPRegister iFlt(src, 32); 3348 ARMRegister o64(dest, 64); 3349 ARMRegister o32(dest, 32); 3350 3351 Label handleZero; 3352 Label fin; 3353 3354 // Handle ±0 and NaN first. 3355 Fcmp(iFlt, 0.0); 3356 B(Assembler::Equal, &handleZero); 3357 // NaN is always a bail condition, just bail directly. 3358 B(Assembler::Overflow, fail); 3359 3360 // Round towards negative infinity. 3361 Fcvtms(o64, iFlt); 3362 3363 // Sign extend lower 32 bits to test if the result isn't an Int32. 3364 Cmp(o64, Operand(o64, vixl::SXTW)); 3365 B(NotEqual, fail); 3366 3367 // Clear upper 32 bits. 3368 Uxtw(o64, o64); 3369 B(&fin); 3370 3371 bind(&handleZero); 3372 // Move the float into the output reg, if it is non-zero, then the original 3373 // value was -0.0. 3374 Fmov(o32, iFlt); 3375 Cbnz(o32, fail); 3376 bind(&fin); 3377 } 3378 3379 void MacroAssembler::floorDoubleToInt32(FloatRegister src, Register dest, 3380 Label* fail) { 3381 ARMFPRegister iDbl(src, 64); 3382 ARMRegister o64(dest, 64); 3383 3384 Label handleZero; 3385 Label fin; 3386 3387 // Handle ±0 and NaN first. 3388 Fcmp(iDbl, 0.0); 3389 B(Assembler::Equal, &handleZero); 3390 // NaN is always a bail condition, just bail directly. 3391 B(Assembler::Overflow, fail); 3392 3393 // Round towards negative infinity. 3394 Fcvtms(o64, iDbl); 3395 3396 // Sign extend lower 32 bits to test if the result isn't an Int32. 3397 Cmp(o64, Operand(o64, vixl::SXTW)); 3398 B(NotEqual, fail); 3399 3400 // Clear upper 32 bits. 3401 Uxtw(o64, o64); 3402 B(&fin); 3403 3404 bind(&handleZero); 3405 // Move the double into the output reg, if it is non-zero, then the original 3406 // value was -0.0. 3407 Fmov(o64, iDbl); 3408 Cbnz(o64, fail); 3409 bind(&fin); 3410 } 3411 3412 void MacroAssembler::ceilFloat32ToInt32(FloatRegister src, Register dest, 3413 Label* fail) { 3414 ARMFPRegister iFlt(src, 32); 3415 ARMRegister o64(dest, 64); 3416 ARMRegister o32(dest, 32); 3417 3418 Label handleZero; 3419 Label fin; 3420 3421 // Round towards positive infinity. 3422 Fcvtps(o64, iFlt); 3423 3424 // Sign extend lower 32 bits to test if the result isn't an Int32. 3425 Cmp(o64, Operand(o64, vixl::SXTW)); 3426 B(NotEqual, fail); 3427 3428 // We have to check for (-1, -0] and NaN when the result is zero. 3429 Cbz(o64, &handleZero); 3430 3431 // Clear upper 32 bits. 3432 Uxtw(o64, o64); 3433 B(&fin); 3434 3435 // Bail if the input is in (-1, -0] or NaN. 3436 bind(&handleZero); 3437 // Move the float into the output reg, if it is non-zero, then the original 3438 // value wasn't +0.0. 3439 Fmov(o32, iFlt); 3440 Cbnz(o32, fail); 3441 bind(&fin); 3442 } 3443 3444 void MacroAssembler::ceilDoubleToInt32(FloatRegister src, Register dest, 3445 Label* fail) { 3446 ARMFPRegister iDbl(src, 64); 3447 ARMRegister o64(dest, 64); 3448 3449 Label handleZero; 3450 Label fin; 3451 3452 // Round towards positive infinity. 3453 Fcvtps(o64, iDbl); 3454 3455 // Sign extend lower 32 bits to test if the result isn't an Int32. 3456 Cmp(o64, Operand(o64, vixl::SXTW)); 3457 B(NotEqual, fail); 3458 3459 // We have to check for (-1, -0] and NaN when the result is zero. 3460 Cbz(o64, &handleZero); 3461 3462 // Clear upper 32 bits. 3463 Uxtw(o64, o64); 3464 B(&fin); 3465 3466 // Bail if the input is in (-1, -0] or NaN. 3467 bind(&handleZero); 3468 // Move the double into the output reg, if it is non-zero, then the original 3469 // value wasn't +0.0. 3470 Fmov(o64, iDbl); 3471 Cbnz(o64, fail); 3472 bind(&fin); 3473 } 3474 3475 void MacroAssembler::truncFloat32ToInt32(FloatRegister src, Register dest, 3476 Label* fail) { 3477 ARMFPRegister src32(src, 32); 3478 ARMRegister dest32(dest, 32); 3479 ARMRegister dest64(dest, 64); 3480 3481 Label done, zeroCase; 3482 3483 // Convert scalar to signed 64-bit fixed-point, rounding toward zero. 3484 // In the case of overflow, the output is saturated. 3485 // In the case of NaN and -0, the output is zero. 3486 Fcvtzs(dest64, src32); 3487 3488 // If the output was zero, worry about special cases. 3489 Cbz(dest64, &zeroCase); 3490 3491 // Sign extend lower 32 bits to test if the result isn't an Int32. 3492 Cmp(dest64, Operand(dest64, vixl::SXTW)); 3493 B(NotEqual, fail); 3494 3495 // Clear upper 32 bits. 3496 Uxtw(dest64, dest64); 3497 3498 // If the output was non-zero and wasn't saturated, just return it. 3499 B(&done); 3500 3501 // Handle the case of a zero output: 3502 // 1. The input may have been NaN, requiring a failure. 3503 // 2. The input may have been in (-1,-0], requiring a failure. 3504 { 3505 bind(&zeroCase); 3506 3507 // Combine test for negative and NaN values using a single bitwise 3508 // operation. 3509 // 3510 // | Decimal number | Bitwise representation | 3511 // |----------------|------------------------| 3512 // | -0 | 8000'0000 | 3513 // | +0 | 0000'0000 | 3514 // | +1 | 3f80'0000 | 3515 // | NaN (or +Inf) | 7fyx'xxxx, y >= 8 | 3516 // | -NaN (or -Inf) | ffyx'xxxx, y >= 8 | 3517 // 3518 // If any of two most significant bits is set, the number isn't in [0, 1). 3519 // (Recall that floating point numbers, except for NaN, are strictly ordered 3520 // when comparing their bitwise representation as signed integers.) 3521 3522 Fmov(dest32, src32); 3523 Lsr(dest32, dest32, 30); 3524 Cbnz(dest32, fail); 3525 } 3526 3527 bind(&done); 3528 } 3529 3530 void MacroAssembler::truncDoubleToInt32(FloatRegister src, Register dest, 3531 Label* fail) { 3532 ARMFPRegister src64(src, 64); 3533 ARMRegister dest64(dest, 64); 3534 ARMRegister dest32(dest, 32); 3535 3536 Label done, zeroCase; 3537 3538 // Convert scalar to signed 64-bit fixed-point, rounding toward zero. 3539 // In the case of overflow, the output is saturated. 3540 // In the case of NaN and -0, the output is zero. 3541 Fcvtzs(dest64, src64); 3542 3543 // If the output was zero, worry about special cases. 3544 Cbz(dest64, &zeroCase); 3545 3546 // Sign extend lower 32 bits to test if the result isn't an Int32. 3547 Cmp(dest64, Operand(dest64, vixl::SXTW)); 3548 B(NotEqual, fail); 3549 3550 // Clear upper 32 bits. 3551 Uxtw(dest64, dest64); 3552 3553 // If the output was non-zero and wasn't saturated, just return it. 3554 B(&done); 3555 3556 // Handle the case of a zero output: 3557 // 1. The input may have been NaN, requiring a failure. 3558 // 2. The input may have been in (-1,-0], requiring a failure. 3559 { 3560 bind(&zeroCase); 3561 3562 // Combine test for negative and NaN values using a single bitwise 3563 // operation. 3564 // 3565 // | Decimal number | Bitwise representation | 3566 // |----------------|------------------------| 3567 // | -0 | 8000'0000'0000'0000 | 3568 // | +0 | 0000'0000'0000'0000 | 3569 // | +1 | 3ff0'0000'0000'0000 | 3570 // | NaN (or +Inf) | 7ffx'xxxx'xxxx'xxxx | 3571 // | -NaN (or -Inf) | fffx'xxxx'xxxx'xxxx | 3572 // 3573 // If any of two most significant bits is set, the number isn't in [0, 1). 3574 // (Recall that floating point numbers, except for NaN, are strictly ordered 3575 // when comparing their bitwise representation as signed integers.) 3576 3577 Fmov(dest64, src64); 3578 Lsr(dest64, dest64, 62); 3579 Cbnz(dest64, fail); 3580 } 3581 3582 bind(&done); 3583 } 3584 3585 void MacroAssembler::roundFloat32ToInt32(FloatRegister src, Register dest, 3586 FloatRegister temp, Label* fail) { 3587 ARMFPRegister src32(src, 32); 3588 ARMRegister dest32(dest, 32); 3589 ARMRegister dest64(dest, 64); 3590 3591 Label negative, saturated, done; 3592 3593 // Branch to a slow path if input < 0.0 due to complicated rounding rules. 3594 // Note that Fcmp with NaN unsets the negative flag. 3595 Fcmp(src32, 0.0); 3596 B(&negative, Assembler::Condition::lo); 3597 3598 // Handle the simple case of a positive input, and also -0 and NaN. 3599 // Rounding proceeds with consideration of the fractional part of the input: 3600 // 1. If > 0.5, round to integer with higher absolute value (so, up). 3601 // 2. If < 0.5, round to integer with lower absolute value (so, down). 3602 // 3. If = 0.5, round to +Infinity (so, up). 3603 { 3604 // Convert to signed 64-bit integer, rounding halfway cases away from zero. 3605 // In the case of overflow, the output is saturated. 3606 // In the case of NaN and -0, the output is zero. 3607 Fcvtas(dest64, src32); 3608 3609 // In the case of zero, the input may have been NaN or -0, which must bail. 3610 Cbnz(dest64, &saturated); 3611 3612 // Combine test for -0 and NaN values using a single bitwise operation. 3613 // See truncFloat32ToInt32 for an explanation. 3614 Fmov(dest32, src32); 3615 Lsr(dest32, dest32, 30); 3616 Cbnz(dest32, fail); 3617 3618 B(&done); 3619 } 3620 3621 // Handle the complicated case of a negative input. 3622 // Rounding proceeds with consideration of the fractional part of the input: 3623 // 1. If > 0.5, round to integer with higher absolute value (so, down). 3624 // 2. If < 0.5, round to integer with lower absolute value (so, up). 3625 // 3. If = 0.5, round to +Infinity (so, up). 3626 bind(&negative); 3627 { 3628 // Inputs in [-0.5, 0) are rounded to -0. Fail. 3629 loadConstantFloat32(-0.5f, temp); 3630 branchFloat(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); 3631 3632 // Other negative inputs need the biggest double less than 0.5 added. 3633 loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp); 3634 addFloat32(src, temp); 3635 3636 // Round all values toward -Infinity. 3637 // In the case of overflow, the output is saturated. 3638 // NaN and -0 are already handled by the "positive number" path above. 3639 Fcvtms(dest64, temp); 3640 } 3641 3642 bind(&saturated); 3643 3644 // Sign extend lower 32 bits to test if the result isn't an Int32. 3645 Cmp(dest64, Operand(dest64, vixl::SXTW)); 3646 B(NotEqual, fail); 3647 3648 // Clear upper 32 bits. 3649 Uxtw(dest64, dest64); 3650 3651 bind(&done); 3652 } 3653 3654 void MacroAssembler::roundDoubleToInt32(FloatRegister src, Register dest, 3655 FloatRegister temp, Label* fail) { 3656 ARMFPRegister src64(src, 64); 3657 ARMRegister dest64(dest, 64); 3658 ARMRegister dest32(dest, 32); 3659 3660 Label negative, saturated, done; 3661 3662 // Branch to a slow path if input < 0.0 due to complicated rounding rules. 3663 // Note that Fcmp with NaN unsets the negative flag. 3664 Fcmp(src64, 0.0); 3665 B(&negative, Assembler::Condition::lo); 3666 3667 // Handle the simple case of a positive input, and also -0 and NaN. 3668 // Rounding proceeds with consideration of the fractional part of the input: 3669 // 1. If > 0.5, round to integer with higher absolute value (so, up). 3670 // 2. If < 0.5, round to integer with lower absolute value (so, down). 3671 // 3. If = 0.5, round to +Infinity (so, up). 3672 { 3673 // Convert to signed 64-bit integer, rounding halfway cases away from zero. 3674 // In the case of overflow, the output is saturated. 3675 // In the case of NaN and -0, the output is zero. 3676 Fcvtas(dest64, src64); 3677 3678 // In the case of zero, the input may have been NaN or -0, which must bail. 3679 Cbnz(dest64, &saturated); 3680 3681 // Combine test for -0 and NaN values using a single bitwise operation. 3682 // See truncDoubleToInt32 for an explanation. 3683 Fmov(dest64, src64); 3684 Lsr(dest64, dest64, 62); 3685 Cbnz(dest64, fail); 3686 3687 B(&done); 3688 } 3689 3690 // Handle the complicated case of a negative input. 3691 // Rounding proceeds with consideration of the fractional part of the input: 3692 // 1. If > 0.5, round to integer with higher absolute value (so, down). 3693 // 2. If < 0.5, round to integer with lower absolute value (so, up). 3694 // 3. If = 0.5, round to +Infinity (so, up). 3695 bind(&negative); 3696 { 3697 // Inputs in [-0.5, 0) are rounded to -0. Fail. 3698 loadConstantDouble(-0.5, temp); 3699 branchDouble(Assembler::DoubleGreaterThanOrEqual, src, temp, fail); 3700 3701 // Other negative inputs need the biggest double less than 0.5 added. 3702 loadConstantDouble(GetBiggestNumberLessThan(0.5), temp); 3703 addDouble(src, temp); 3704 3705 // Round all values toward -Infinity. 3706 // In the case of overflow, the output is saturated. 3707 // NaN and -0 are already handled by the "positive number" path above. 3708 Fcvtms(dest64, temp); 3709 } 3710 3711 bind(&saturated); 3712 3713 // Sign extend lower 32 bits to test if the result isn't an Int32. 3714 Cmp(dest64, Operand(dest64, vixl::SXTW)); 3715 B(NotEqual, fail); 3716 3717 // Clear upper 32 bits. 3718 Uxtw(dest64, dest64); 3719 3720 bind(&done); 3721 } 3722 3723 void MacroAssembler::nearbyIntDouble(RoundingMode mode, FloatRegister src, 3724 FloatRegister dest) { 3725 switch (mode) { 3726 case RoundingMode::Up: 3727 frintp(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); 3728 return; 3729 case RoundingMode::Down: 3730 frintm(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); 3731 return; 3732 case RoundingMode::NearestTiesToEven: 3733 frintn(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); 3734 return; 3735 case RoundingMode::TowardsZero: 3736 frintz(ARMFPRegister(dest, 64), ARMFPRegister(src, 64)); 3737 return; 3738 } 3739 MOZ_CRASH("unexpected mode"); 3740 } 3741 3742 void MacroAssembler::nearbyIntFloat32(RoundingMode mode, FloatRegister src, 3743 FloatRegister dest) { 3744 switch (mode) { 3745 case RoundingMode::Up: 3746 frintp(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); 3747 return; 3748 case RoundingMode::Down: 3749 frintm(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); 3750 return; 3751 case RoundingMode::NearestTiesToEven: 3752 frintn(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); 3753 return; 3754 case RoundingMode::TowardsZero: 3755 frintz(ARMFPRegister(dest, 32), ARMFPRegister(src, 32)); 3756 return; 3757 } 3758 MOZ_CRASH("unexpected mode"); 3759 } 3760 3761 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, 3762 FloatRegister output) { 3763 ScratchDoubleScope scratch(*this); 3764 3765 // Double with only the sign bit set 3766 loadConstantDouble(-0.0, scratch); 3767 3768 if (lhs != output) { 3769 moveDouble(lhs, output); 3770 } 3771 3772 bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), 3773 ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), 3774 ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); 3775 } 3776 3777 void MacroAssembler::copySignFloat32(FloatRegister lhs, FloatRegister rhs, 3778 FloatRegister output) { 3779 ScratchFloat32Scope scratch(*this); 3780 3781 // Float with only the sign bit set 3782 loadConstantFloat32(-0.0f, scratch); 3783 3784 if (lhs != output) { 3785 moveFloat32(lhs, output); 3786 } 3787 3788 bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), 3789 ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), 3790 ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); 3791 } 3792 3793 void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, 3794 Register pointer) { 3795 Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64), 3796 Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift)); 3797 } 3798 3799 void MacroAssembler::wasmMarkCallAsSlow() { 3800 // Use mov() instead of Mov() to ensure this no-op move isn't elided. 3801 vixl::MacroAssembler::mov(x28, x28); 3802 } 3803 3804 const int32_t SlowCallMarker = 0xaa1c03fc; 3805 3806 void MacroAssembler::wasmCheckSlowCallsite(Register ra, Label* notSlow, 3807 Register temp1, Register temp2) { 3808 MOZ_ASSERT(ra != temp2); 3809 Ldr(W(temp2), MemOperand(X(ra), 0)); 3810 Cmp(W(temp2), Operand(SlowCallMarker)); 3811 B(Assembler::NotEqual, notSlow); 3812 } 3813 3814 CodeOffset MacroAssembler::wasmMarkedSlowCall(const wasm::CallSiteDesc& desc, 3815 const Register reg) { 3816 AutoForbidPoolsAndNops afp(this, !GetStackPointer64().Is(vixl::sp) ? 3 : 2); 3817 CodeOffset offset = call(desc, reg); 3818 wasmMarkCallAsSlow(); 3819 return offset; 3820 } 3821 3822 //}}} check_macroassembler_style 3823 3824 } // namespace jit 3825 } // namespace js