tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

MacroAssembler-x86.cpp (72440B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "jit/x86/MacroAssembler-x86.h"
      8 
      9 #include "mozilla/Casting.h"
     10 
     11 #include "jit/AtomicOp.h"
     12 #include "jit/Bailouts.h"
     13 #include "jit/BaselineFrame.h"
     14 #include "jit/JitFrames.h"
     15 #include "jit/JitRuntime.h"
     16 #include "jit/MacroAssembler.h"
     17 #include "jit/MoveEmitter.h"
     18 #include "util/Memory.h"
     19 #include "vm/BigIntType.h"
     20 #include "vm/JitActivation.h"  // js::jit::JitActivation
     21 #include "vm/JSContext.h"
     22 #include "vm/StringType.h"
     23 #include "wasm/WasmStubs.h"
     24 
     25 #include "jit/MacroAssembler-inl.h"
     26 #include "vm/JSScript-inl.h"
     27 
     28 using namespace js;
     29 using namespace js::jit;
     30 
     31 void MacroAssemblerX86::loadConstantDouble(double d, FloatRegister dest) {
     32  if (maybeInlineDouble(d, dest)) {
     33    return;
     34  }
     35  Double* dbl = getDouble(d);
     36  if (!dbl) {
     37    return;
     38  }
     39  masm.vmovsd_mr(nullptr, dest.encoding());
     40  propagateOOM(dbl->uses.append(CodeOffset(masm.size())));
     41 }
     42 
     43 void MacroAssemblerX86::loadConstantFloat32(float f, FloatRegister dest) {
     44  if (maybeInlineFloat(f, dest)) {
     45    return;
     46  }
     47  Float* flt = getFloat(f);
     48  if (!flt) {
     49    return;
     50  }
     51  masm.vmovss_mr(nullptr, dest.encoding());
     52  propagateOOM(flt->uses.append(CodeOffset(masm.size())));
     53 }
     54 
     55 void MacroAssemblerX86::loadConstantSimd128Int(const SimdConstant& v,
     56                                               FloatRegister dest) {
     57  if (maybeInlineSimd128Int(v, dest)) {
     58    return;
     59  }
     60  SimdData* i4 = getSimdData(v);
     61  if (!i4) {
     62    return;
     63  }
     64  masm.vmovdqa_mr(nullptr, dest.encoding());
     65  propagateOOM(i4->uses.append(CodeOffset(masm.size())));
     66 }
     67 
     68 void MacroAssemblerX86::loadConstantSimd128Float(const SimdConstant& v,
     69                                                 FloatRegister dest) {
     70  if (maybeInlineSimd128Float(v, dest)) {
     71    return;
     72  }
     73  SimdData* f4 = getSimdData(v);
     74  if (!f4) {
     75    return;
     76  }
     77  masm.vmovaps_mr(nullptr, dest.encoding());
     78  propagateOOM(f4->uses.append(CodeOffset(masm.size())));
     79 }
     80 
     81 void MacroAssemblerX86::vpPatchOpSimd128(
     82    const SimdConstant& v, FloatRegister src, FloatRegister dest,
     83    void (X86Encoding::BaseAssemblerX86::*op)(
     84        const void* address, X86Encoding::XMMRegisterID srcId,
     85        X86Encoding::XMMRegisterID destId)) {
     86  SimdData* val = getSimdData(v);
     87  if (!val) {
     88    return;
     89  }
     90  (masm.*op)(nullptr, src.encoding(), dest.encoding());
     91  propagateOOM(val->uses.append(CodeOffset(masm.size())));
     92 }
     93 
     94 void MacroAssemblerX86::vpPatchOpSimd128(
     95    const SimdConstant& v, FloatRegister src, FloatRegister dest,
     96    size_t (X86Encoding::BaseAssemblerX86::*op)(
     97        const void* address, X86Encoding::XMMRegisterID srcId,
     98        X86Encoding::XMMRegisterID destId)) {
     99  SimdData* val = getSimdData(v);
    100  if (!val) {
    101    return;
    102  }
    103  size_t patchOffsetFromEnd =
    104      (masm.*op)(nullptr, src.encoding(), dest.encoding());
    105  propagateOOM(val->uses.append(CodeOffset(masm.size() - patchOffsetFromEnd)));
    106 }
    107 
    108 void MacroAssemblerX86::vpaddbSimd128(const SimdConstant& v, FloatRegister lhs,
    109                                      FloatRegister dest) {
    110  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddb_mr);
    111 }
    112 
    113 void MacroAssemblerX86::vpaddwSimd128(const SimdConstant& v, FloatRegister lhs,
    114                                      FloatRegister dest) {
    115  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddw_mr);
    116 }
    117 
    118 void MacroAssemblerX86::vpadddSimd128(const SimdConstant& v, FloatRegister lhs,
    119                                      FloatRegister dest) {
    120  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddd_mr);
    121 }
    122 
    123 void MacroAssemblerX86::vpaddqSimd128(const SimdConstant& v, FloatRegister lhs,
    124                                      FloatRegister dest) {
    125  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddq_mr);
    126 }
    127 
    128 void MacroAssemblerX86::vpsubbSimd128(const SimdConstant& v, FloatRegister lhs,
    129                                      FloatRegister dest) {
    130  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubb_mr);
    131 }
    132 
    133 void MacroAssemblerX86::vpsubwSimd128(const SimdConstant& v, FloatRegister lhs,
    134                                      FloatRegister dest) {
    135  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubw_mr);
    136 }
    137 
    138 void MacroAssemblerX86::vpsubdSimd128(const SimdConstant& v, FloatRegister lhs,
    139                                      FloatRegister dest) {
    140  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubd_mr);
    141 }
    142 
    143 void MacroAssemblerX86::vpsubqSimd128(const SimdConstant& v, FloatRegister lhs,
    144                                      FloatRegister dest) {
    145  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubq_mr);
    146 }
    147 
    148 void MacroAssemblerX86::vpmullwSimd128(const SimdConstant& v, FloatRegister lhs,
    149                                       FloatRegister dest) {
    150  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmullw_mr);
    151 }
    152 
    153 void MacroAssemblerX86::vpmulldSimd128(const SimdConstant& v, FloatRegister lhs,
    154                                       FloatRegister dest) {
    155  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmulld_mr);
    156 }
    157 
    158 void MacroAssemblerX86::vpaddsbSimd128(const SimdConstant& v, FloatRegister lhs,
    159                                       FloatRegister dest) {
    160  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddsb_mr);
    161 }
    162 
    163 void MacroAssemblerX86::vpaddusbSimd128(const SimdConstant& v,
    164                                        FloatRegister lhs, FloatRegister dest) {
    165  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddusb_mr);
    166 }
    167 
    168 void MacroAssemblerX86::vpaddswSimd128(const SimdConstant& v, FloatRegister lhs,
    169                                       FloatRegister dest) {
    170  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddsw_mr);
    171 }
    172 
    173 void MacroAssemblerX86::vpadduswSimd128(const SimdConstant& v,
    174                                        FloatRegister lhs, FloatRegister dest) {
    175  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpaddusw_mr);
    176 }
    177 
    178 void MacroAssemblerX86::vpsubsbSimd128(const SimdConstant& v, FloatRegister lhs,
    179                                       FloatRegister dest) {
    180  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubsb_mr);
    181 }
    182 
    183 void MacroAssemblerX86::vpsubusbSimd128(const SimdConstant& v,
    184                                        FloatRegister lhs, FloatRegister dest) {
    185  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubusb_mr);
    186 }
    187 
    188 void MacroAssemblerX86::vpsubswSimd128(const SimdConstant& v, FloatRegister lhs,
    189                                       FloatRegister dest) {
    190  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubsw_mr);
    191 }
    192 
    193 void MacroAssemblerX86::vpsubuswSimd128(const SimdConstant& v,
    194                                        FloatRegister lhs, FloatRegister dest) {
    195  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpsubusw_mr);
    196 }
    197 
    198 void MacroAssemblerX86::vpminsbSimd128(const SimdConstant& v, FloatRegister lhs,
    199                                       FloatRegister dest) {
    200  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsb_mr);
    201 }
    202 
    203 void MacroAssemblerX86::vpminubSimd128(const SimdConstant& v, FloatRegister lhs,
    204                                       FloatRegister dest) {
    205  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminub_mr);
    206 }
    207 
    208 void MacroAssemblerX86::vpminswSimd128(const SimdConstant& v, FloatRegister lhs,
    209                                       FloatRegister dest) {
    210  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsw_mr);
    211 }
    212 
    213 void MacroAssemblerX86::vpminuwSimd128(const SimdConstant& v, FloatRegister lhs,
    214                                       FloatRegister dest) {
    215  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminuw_mr);
    216 }
    217 
    218 void MacroAssemblerX86::vpminsdSimd128(const SimdConstant& v, FloatRegister lhs,
    219                                       FloatRegister dest) {
    220  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminsd_mr);
    221 }
    222 
    223 void MacroAssemblerX86::vpminudSimd128(const SimdConstant& v, FloatRegister lhs,
    224                                       FloatRegister dest) {
    225  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpminud_mr);
    226 }
    227 
    228 void MacroAssemblerX86::vpmaxsbSimd128(const SimdConstant& v, FloatRegister lhs,
    229                                       FloatRegister dest) {
    230  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsb_mr);
    231 }
    232 
    233 void MacroAssemblerX86::vpmaxubSimd128(const SimdConstant& v, FloatRegister lhs,
    234                                       FloatRegister dest) {
    235  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxub_mr);
    236 }
    237 
    238 void MacroAssemblerX86::vpmaxswSimd128(const SimdConstant& v, FloatRegister lhs,
    239                                       FloatRegister dest) {
    240  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsw_mr);
    241 }
    242 
    243 void MacroAssemblerX86::vpmaxuwSimd128(const SimdConstant& v, FloatRegister lhs,
    244                                       FloatRegister dest) {
    245  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxuw_mr);
    246 }
    247 
    248 void MacroAssemblerX86::vpmaxsdSimd128(const SimdConstant& v, FloatRegister lhs,
    249                                       FloatRegister dest) {
    250  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxsd_mr);
    251 }
    252 
    253 void MacroAssemblerX86::vpmaxudSimd128(const SimdConstant& v, FloatRegister lhs,
    254                                       FloatRegister dest) {
    255  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaxud_mr);
    256 }
    257 
    258 void MacroAssemblerX86::vpandSimd128(const SimdConstant& v, FloatRegister lhs,
    259                                     FloatRegister dest) {
    260  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpand_mr);
    261 }
    262 
    263 void MacroAssemblerX86::vpxorSimd128(const SimdConstant& v, FloatRegister lhs,
    264                                     FloatRegister dest) {
    265  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpxor_mr);
    266 }
    267 
    268 void MacroAssemblerX86::vporSimd128(const SimdConstant& v, FloatRegister lhs,
    269                                    FloatRegister dest) {
    270  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpor_mr);
    271 }
    272 
    273 void MacroAssemblerX86::vaddpsSimd128(const SimdConstant& v, FloatRegister lhs,
    274                                      FloatRegister dest) {
    275  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vaddps_mr);
    276 }
    277 
    278 void MacroAssemblerX86::vaddpdSimd128(const SimdConstant& v, FloatRegister lhs,
    279                                      FloatRegister dest) {
    280  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vaddpd_mr);
    281 }
    282 
    283 void MacroAssemblerX86::vsubpsSimd128(const SimdConstant& v, FloatRegister lhs,
    284                                      FloatRegister dest) {
    285  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vsubps_mr);
    286 }
    287 
    288 void MacroAssemblerX86::vsubpdSimd128(const SimdConstant& v, FloatRegister lhs,
    289                                      FloatRegister dest) {
    290  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vsubpd_mr);
    291 }
    292 
    293 void MacroAssemblerX86::vdivpsSimd128(const SimdConstant& v, FloatRegister lhs,
    294                                      FloatRegister dest) {
    295  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vdivps_mr);
    296 }
    297 
    298 void MacroAssemblerX86::vdivpdSimd128(const SimdConstant& v, FloatRegister lhs,
    299                                      FloatRegister dest) {
    300  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vdivpd_mr);
    301 }
    302 
    303 void MacroAssemblerX86::vmulpsSimd128(const SimdConstant& v, FloatRegister lhs,
    304                                      FloatRegister dest) {
    305  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vmulps_mr);
    306 }
    307 
    308 void MacroAssemblerX86::vmulpdSimd128(const SimdConstant& v, FloatRegister lhs,
    309                                      FloatRegister dest) {
    310  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vmulpd_mr);
    311 }
    312 
    313 void MacroAssemblerX86::vandpsSimd128(const SimdConstant& v, FloatRegister lhs,
    314                                      FloatRegister dest) {
    315  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vandps_mr);
    316 }
    317 
    318 void MacroAssemblerX86::vandpdSimd128(const SimdConstant& v, FloatRegister lhs,
    319                                      FloatRegister dest) {
    320  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vandpd_mr);
    321 }
    322 
    323 void MacroAssemblerX86::vxorpsSimd128(const SimdConstant& v, FloatRegister lhs,
    324                                      FloatRegister dest) {
    325  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vxorps_mr);
    326 }
    327 
    328 void MacroAssemblerX86::vxorpdSimd128(const SimdConstant& v, FloatRegister lhs,
    329                                      FloatRegister dest) {
    330  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vxorpd_mr);
    331 }
    332 
    333 void MacroAssemblerX86::vminpdSimd128(const SimdConstant& v, FloatRegister lhs,
    334                                      FloatRegister dest) {
    335  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vminpd_mr);
    336 }
    337 
    338 void MacroAssemblerX86::vpacksswbSimd128(const SimdConstant& v,
    339                                         FloatRegister lhs,
    340                                         FloatRegister dest) {
    341  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpacksswb_mr);
    342 }
    343 
    344 void MacroAssemblerX86::vpackuswbSimd128(const SimdConstant& v,
    345                                         FloatRegister lhs,
    346                                         FloatRegister dest) {
    347  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackuswb_mr);
    348 }
    349 
    350 void MacroAssemblerX86::vpackssdwSimd128(const SimdConstant& v,
    351                                         FloatRegister lhs,
    352                                         FloatRegister dest) {
    353  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackssdw_mr);
    354 }
    355 
    356 void MacroAssemblerX86::vpackusdwSimd128(const SimdConstant& v,
    357                                         FloatRegister lhs,
    358                                         FloatRegister dest) {
    359  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpackusdw_mr);
    360 }
    361 
    362 void MacroAssemblerX86::vpunpckldqSimd128(const SimdConstant& v,
    363                                          FloatRegister lhs,
    364                                          FloatRegister dest) {
    365  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpunpckldq_mr);
    366 }
    367 
    368 void MacroAssemblerX86::vunpcklpsSimd128(const SimdConstant& v,
    369                                         FloatRegister lhs,
    370                                         FloatRegister dest) {
    371  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vunpcklps_mr);
    372 }
    373 
    374 void MacroAssemblerX86::vpshufbSimd128(const SimdConstant& v, FloatRegister lhs,
    375                                       FloatRegister dest) {
    376  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpshufb_mr);
    377 }
    378 
    379 void MacroAssemblerX86::vptestSimd128(const SimdConstant& v,
    380                                      FloatRegister lhs) {
    381  vpPatchOpSimd128(v, lhs, &X86Encoding::BaseAssemblerX86::vptest_mr);
    382 }
    383 
    384 void MacroAssemblerX86::vpmaddwdSimd128(const SimdConstant& v,
    385                                        FloatRegister lhs, FloatRegister dest) {
    386  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaddwd_mr);
    387 }
    388 
    389 void MacroAssemblerX86::vpcmpeqbSimd128(const SimdConstant& v,
    390                                        FloatRegister lhs, FloatRegister dest) {
    391  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqb_mr);
    392 }
    393 
    394 void MacroAssemblerX86::vpcmpgtbSimd128(const SimdConstant& v,
    395                                        FloatRegister lhs, FloatRegister dest) {
    396  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtb_mr);
    397 }
    398 
    399 void MacroAssemblerX86::vpcmpeqwSimd128(const SimdConstant& v,
    400                                        FloatRegister lhs, FloatRegister dest) {
    401  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqw_mr);
    402 }
    403 
    404 void MacroAssemblerX86::vpcmpgtwSimd128(const SimdConstant& v,
    405                                        FloatRegister lhs, FloatRegister dest) {
    406  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtw_mr);
    407 }
    408 
    409 void MacroAssemblerX86::vpcmpeqdSimd128(const SimdConstant& v,
    410                                        FloatRegister lhs, FloatRegister dest) {
    411  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpeqd_mr);
    412 }
    413 
    414 void MacroAssemblerX86::vpcmpgtdSimd128(const SimdConstant& v,
    415                                        FloatRegister lhs, FloatRegister dest) {
    416  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpcmpgtd_mr);
    417 }
    418 
    419 void MacroAssemblerX86::vcmpeqpsSimd128(const SimdConstant& v,
    420                                        FloatRegister lhs, FloatRegister dest) {
    421  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpeqps_mr);
    422 }
    423 
    424 void MacroAssemblerX86::vcmpneqpsSimd128(const SimdConstant& v,
    425                                         FloatRegister lhs,
    426                                         FloatRegister dest) {
    427  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpneqps_mr);
    428 }
    429 
    430 void MacroAssemblerX86::vcmpltpsSimd128(const SimdConstant& v,
    431                                        FloatRegister lhs, FloatRegister dest) {
    432  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpltps_mr);
    433 }
    434 
    435 void MacroAssemblerX86::vcmplepsSimd128(const SimdConstant& v,
    436                                        FloatRegister lhs, FloatRegister dest) {
    437  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpleps_mr);
    438 }
    439 
    440 void MacroAssemblerX86::vcmpgepsSimd128(const SimdConstant& v,
    441                                        FloatRegister lhs, FloatRegister dest) {
    442  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpgeps_mr);
    443 }
    444 
    445 void MacroAssemblerX86::vcmpeqpdSimd128(const SimdConstant& v,
    446                                        FloatRegister lhs, FloatRegister dest) {
    447  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpeqpd_mr);
    448 }
    449 
    450 void MacroAssemblerX86::vcmpneqpdSimd128(const SimdConstant& v,
    451                                         FloatRegister lhs,
    452                                         FloatRegister dest) {
    453  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpneqpd_mr);
    454 }
    455 
    456 void MacroAssemblerX86::vcmpltpdSimd128(const SimdConstant& v,
    457                                        FloatRegister lhs, FloatRegister dest) {
    458  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmpltpd_mr);
    459 }
    460 
    461 void MacroAssemblerX86::vcmplepdSimd128(const SimdConstant& v,
    462                                        FloatRegister lhs, FloatRegister dest) {
    463  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmplepd_mr);
    464 }
    465 
    466 void MacroAssemblerX86::vpmaddubswSimd128(const SimdConstant& v,
    467                                          FloatRegister lhs,
    468                                          FloatRegister dest) {
    469  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaddubsw_mr);
    470 }
    471 
    472 void MacroAssemblerX86::vpmuludqSimd128(const SimdConstant& v,
    473                                        FloatRegister lhs, FloatRegister dest) {
    474  vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmuludq_mr);
    475 }
    476 
    477 void MacroAssemblerX86::finish() {
    478  // Last instruction may be an indirect jump so eagerly insert an undefined
    479  // instruction byte to prevent processors from decoding data values into
    480  // their pipelines. See Intel performance guides.
    481  masm.ud2();
    482 
    483  if (!doubles_.empty()) {
    484    masm.haltingAlign(sizeof(double));
    485  }
    486  for (const Double& d : doubles_) {
    487    CodeOffset cst(masm.currentOffset());
    488    for (CodeOffset use : d.uses) {
    489      addCodeLabel(CodeLabel(use, cst));
    490    }
    491    masm.doubleConstant(d.value);
    492    if (!enoughMemory_) {
    493      return;
    494    }
    495  }
    496 
    497  if (!floats_.empty()) {
    498    masm.haltingAlign(sizeof(float));
    499  }
    500  for (const Float& f : floats_) {
    501    CodeOffset cst(masm.currentOffset());
    502    for (CodeOffset use : f.uses) {
    503      addCodeLabel(CodeLabel(use, cst));
    504    }
    505    masm.floatConstant(f.value);
    506    if (!enoughMemory_) {
    507      return;
    508    }
    509  }
    510 
    511  // SIMD memory values must be suitably aligned.
    512  if (!simds_.empty()) {
    513    masm.haltingAlign(SimdMemoryAlignment);
    514  }
    515  for (const SimdData& v : simds_) {
    516    CodeOffset cst(masm.currentOffset());
    517    for (CodeOffset use : v.uses) {
    518      addCodeLabel(CodeLabel(use, cst));
    519    }
    520    masm.simd128Constant(v.value.bytes());
    521    if (!enoughMemory_) {
    522      return;
    523    }
    524  }
    525 }
    526 
    527 void MacroAssemblerX86::boxNonDouble(JSValueType type, Register src,
    528                                     const ValueOperand& dest) {
    529  MOZ_ASSERT(type != JSVAL_TYPE_UNDEFINED && type != JSVAL_TYPE_NULL);
    530  MOZ_ASSERT(dest.typeReg() != dest.payloadReg());
    531 
    532 #ifdef DEBUG
    533  if (type == JSVAL_TYPE_BOOLEAN) {
    534    Label upperBitsZeroed;
    535    cmp32(src, Imm32(1));
    536    j(Assembler::BelowOrEqual, &upperBitsZeroed);
    537    breakpoint();
    538    bind(&upperBitsZeroed);
    539  }
    540 #endif
    541 
    542  if (src != dest.payloadReg()) {
    543    movl(src, dest.payloadReg());
    544  }
    545  movl(ImmType(type), dest.typeReg());
    546 }
    547 
    548 void MacroAssemblerX86::boxNonDouble(Register type, Register src,
    549                                     const ValueOperand& dest) {
    550  MOZ_ASSERT(type != dest.payloadReg() && src != dest.typeReg());
    551 
    552 #ifdef DEBUG
    553  Label ok, isNullOrUndefined, isBoolean;
    554 
    555  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_NULL),
    556                    &isNullOrUndefined);
    557  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_UNDEFINED),
    558                    &isNullOrUndefined);
    559  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BOOLEAN),
    560                    &isBoolean);
    561  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_INT32), &ok);
    562  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_MAGIC), &ok);
    563  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_STRING), &ok);
    564  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_SYMBOL), &ok);
    565  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_PRIVATE_GCTHING),
    566                    &ok);
    567  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_BIGINT), &ok);
    568  asMasm().branch32(Assembler::Equal, type, Imm32(JSVAL_TYPE_OBJECT), &ok);
    569  breakpoint();
    570  {
    571    bind(&isNullOrUndefined);
    572    cmp32(src, src);
    573    j(Assembler::Zero, &ok);
    574    breakpoint();
    575  }
    576  {
    577    bind(&isBoolean);
    578    cmp32(src, Imm32(1));
    579    j(Assembler::BelowOrEqual, &ok);
    580    breakpoint();
    581  }
    582  bind(&ok);
    583 #endif
    584 
    585  if (src != dest.payloadReg()) {
    586    movl(src, dest.payloadReg());
    587  }
    588  if (type != dest.typeReg()) {
    589    movl(Imm32(JSVAL_TAG_CLEAR), dest.typeReg());
    590    orl(type, dest.typeReg());
    591  } else {
    592    orl(Imm32(JSVAL_TAG_CLEAR), dest.typeReg());
    593  }
    594 }
    595 
    596 void MacroAssemblerX86::handleFailureWithHandlerTail(
    597    Label* profilerExitTail, Label* bailoutTail,
    598    uint32_t* returnValueCheckOffset) {
    599  // Reserve space for exception information.
    600  subl(Imm32(sizeof(ResumeFromException)), esp);
    601  movl(esp, eax);
    602 
    603  // Call the handler.
    604  using Fn = void (*)(ResumeFromException* rfe);
    605  asMasm().setupUnalignedABICall(ecx);
    606  asMasm().passABIArg(eax);
    607  asMasm().callWithABI<Fn, HandleException>(
    608      ABIType::General, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
    609 
    610  *returnValueCheckOffset = asMasm().currentOffset();
    611 
    612  Label entryFrame;
    613  Label catch_;
    614  Label finally;
    615  Label returnBaseline;
    616  Label returnIon;
    617  Label bailout;
    618  Label wasmInterpEntry;
    619  Label wasmCatch;
    620 
    621  loadPtr(Address(esp, ResumeFromException::offsetOfKind()), eax);
    622  asMasm().branch32(Assembler::Equal, eax,
    623                    Imm32(ExceptionResumeKind::EntryFrame), &entryFrame);
    624  asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Catch),
    625                    &catch_);
    626  asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Finally),
    627                    &finally);
    628  asMasm().branch32(Assembler::Equal, eax,
    629                    Imm32(ExceptionResumeKind::ForcedReturnBaseline),
    630                    &returnBaseline);
    631  asMasm().branch32(Assembler::Equal, eax,
    632                    Imm32(ExceptionResumeKind::ForcedReturnIon), &returnIon);
    633  asMasm().branch32(Assembler::Equal, eax, Imm32(ExceptionResumeKind::Bailout),
    634                    &bailout);
    635  asMasm().branch32(Assembler::Equal, eax,
    636                    Imm32(ExceptionResumeKind::WasmInterpEntry),
    637                    &wasmInterpEntry);
    638  asMasm().branch32(Assembler::Equal, eax,
    639                    Imm32(ExceptionResumeKind::WasmCatch), &wasmCatch);
    640 
    641  breakpoint();  // Invalid kind.
    642 
    643  // No exception handler. Load the error value, restore state and return from
    644  // the entry frame.
    645  bind(&entryFrame);
    646  asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
    647  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    648  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    649  ret();
    650 
    651  // If we found a catch handler, this must be a baseline frame. Restore state
    652  // and jump to the catch block.
    653  bind(&catch_);
    654  loadPtr(Address(esp, ResumeFromException::offsetOfTarget()), eax);
    655  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    656  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    657  jmp(Operand(eax));
    658 
    659  // If we found a finally block, this must be a baseline frame. Push three
    660  // values expected by the finally block: the exception, the exception stack,
    661  // and BooleanValue(true).
    662  bind(&finally);
    663  ValueOperand exception = ValueOperand(ecx, edx);
    664  loadValue(Address(esp, ResumeFromException::offsetOfException()), exception);
    665 
    666  ValueOperand exceptionStack = ValueOperand(esi, edi);
    667  loadValue(Address(esp, ResumeFromException::offsetOfExceptionStack()),
    668            exceptionStack);
    669 
    670  loadPtr(Address(esp, ResumeFromException::offsetOfTarget()), eax);
    671  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    672  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    673 
    674  pushValue(exception);
    675  pushValue(exceptionStack);
    676  pushValue(BooleanValue(true));
    677  jmp(Operand(eax));
    678 
    679  // Return BaselineFrame->returnValue() to the caller.
    680  // Used in debug mode and for GeneratorReturn.
    681  Label profilingInstrumentation;
    682  bind(&returnBaseline);
    683  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    684  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    685  loadValue(Address(ebp, BaselineFrame::reverseOffsetOfReturnValue()),
    686            JSReturnOperand);
    687  jump(&profilingInstrumentation);
    688 
    689  // Return the given value to the caller.
    690  bind(&returnIon);
    691  loadValue(Address(esp, ResumeFromException::offsetOfException()),
    692            JSReturnOperand);
    693  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    694  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    695 
    696  // If profiling is enabled, then update the lastProfilingFrame to refer to
    697  // caller frame before returning. This code is shared by ForcedReturnIon
    698  // and ForcedReturnBaseline.
    699  bind(&profilingInstrumentation);
    700  {
    701    Label skipProfilingInstrumentation;
    702    // Test if profiler enabled.
    703    AbsoluteAddress addressOfEnabled(
    704        asMasm().runtime()->geckoProfiler().addressOfEnabled());
    705    asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
    706                      &skipProfilingInstrumentation);
    707    jump(profilerExitTail);
    708    bind(&skipProfilingInstrumentation);
    709  }
    710 
    711  movl(ebp, esp);
    712  pop(ebp);
    713  ret();
    714 
    715  // If we are bailing out to baseline to handle an exception, jump to the
    716  // bailout tail stub. Load 1 (true) in ReturnReg to indicate success.
    717  bind(&bailout);
    718  loadPtr(Address(esp, ResumeFromException::offsetOfBailoutInfo()), ecx);
    719  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    720  move32(Imm32(1), ReturnReg);
    721  jump(bailoutTail);
    722 
    723  // Reset SP and FP; SP is pointing to the unwound return address to the wasm
    724  // interpreter entry, so we can just ret().
    725  bind(&wasmInterpEntry);
    726  loadPtr(Address(esp, ResumeFromException::offsetOfFramePointer()), ebp);
    727  loadPtr(Address(esp, ResumeFromException::offsetOfStackPointer()), esp);
    728  movePtr(ImmPtr((const void*)wasm::InterpFailInstanceReg), InstanceReg);
    729  masm.ret();
    730 
    731  // Found a wasm catch handler, restore state and jump to it.
    732  bind(&wasmCatch);
    733  wasm::GenerateJumpToCatchHandler(asMasm(), esp, eax, ebx);
    734 }
    735 
    736 void MacroAssemblerX86::profilerEnterFrame(Register framePtr,
    737                                           Register scratch) {
    738  asMasm().loadJSContext(scratch);
    739  loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
    740  storePtr(framePtr,
    741           Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
    742  storePtr(ImmPtr(nullptr),
    743           Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
    744 }
    745 
    746 void MacroAssemblerX86::profilerExitFrame() {
    747  jump(asMasm().runtime()->jitRuntime()->getProfilerExitFrameTail());
    748 }
    749 
    750 Assembler::Condition MacroAssemblerX86::testStringTruthy(
    751    bool truthy, const ValueOperand& value) {
    752  Register string = value.payloadReg();
    753  cmp32(Operand(string, JSString::offsetOfLength()), Imm32(0));
    754  return truthy ? Assembler::NotEqual : Assembler::Equal;
    755 }
    756 
    757 Assembler::Condition MacroAssemblerX86::testBigIntTruthy(
    758    bool truthy, const ValueOperand& value) {
    759  Register bi = value.payloadReg();
    760  cmp32(Operand(bi, JS::BigInt::offsetOfDigitLength()), Imm32(0));
    761  return truthy ? Assembler::NotEqual : Assembler::Equal;
    762 }
    763 
    764 MacroAssembler& MacroAssemblerX86::asMasm() {
    765  return *static_cast<MacroAssembler*>(this);
    766 }
    767 
    768 const MacroAssembler& MacroAssemblerX86::asMasm() const {
    769  return *static_cast<const MacroAssembler*>(this);
    770 }
    771 
    772 void MacroAssemblerX86::minMax32(Register lhs, Register rhs, Register dest,
    773                                 bool isMax) {
    774  if (rhs == dest) {
    775    std::swap(lhs, rhs);
    776  }
    777 
    778  auto cond = isMax ? Assembler::GreaterThan : Assembler::LessThan;
    779  if (lhs != dest) {
    780    movl(lhs, dest);
    781  }
    782  cmpl(lhs, rhs);
    783  cmovCCl(cond, rhs, dest);
    784 }
    785 
    786 void MacroAssemblerX86::minMax32(Register lhs, Imm32 rhs, Register dest,
    787                                 bool isMax) {
    788  auto cond =
    789      isMax ? Assembler::GreaterThanOrEqual : Assembler::LessThanOrEqual;
    790  if (lhs != dest) {
    791    movl(lhs, dest);
    792  }
    793  Label done;
    794  cmpl(rhs, lhs);
    795  j(cond, &done);
    796  move32(rhs, dest);
    797  bind(&done);
    798 }
    799 
    800 void MacroAssembler::subFromStackPtr(Imm32 imm32) {
    801  if (imm32.value) {
    802    // On windows, we cannot skip very far down the stack without touching the
    803    // memory pages in-between.  This is a corner-case code for situations where
    804    // the Ion frame data for a piece of code is very large.  To handle this
    805    // special case, for frames over 4k in size we allocate memory on the stack
    806    // incrementally, touching it as we go.
    807    //
    808    // When the amount is quite large, which it can be, we emit an actual loop,
    809    // in order to keep the function prologue compact.  Compactness is a
    810    // requirement for eg Wasm's CodeRange data structure, which can encode only
    811    // 8-bit offsets.
    812    uint32_t amountLeft = imm32.value;
    813    uint32_t fullPages = amountLeft / 4096;
    814    if (fullPages <= 8) {
    815      while (amountLeft > 4096) {
    816        subl(Imm32(4096), StackPointer);
    817        store32(Imm32(0), Address(StackPointer, 0));
    818        amountLeft -= 4096;
    819      }
    820      subl(Imm32(amountLeft), StackPointer);
    821    } else {
    822      // Save scratch register.
    823      push(eax);
    824      amountLeft -= 4;
    825      fullPages = amountLeft / 4096;
    826 
    827      Label top;
    828      move32(Imm32(fullPages), eax);
    829      bind(&top);
    830      subl(Imm32(4096), StackPointer);
    831      store32(Imm32(0), Address(StackPointer, 0));
    832      subl(Imm32(1), eax);
    833      j(Assembler::NonZero, &top);
    834      amountLeft -= fullPages * 4096;
    835      if (amountLeft) {
    836        subl(Imm32(amountLeft), StackPointer);
    837      }
    838 
    839      // Restore scratch register.
    840      movl(Operand(StackPointer, uint32_t(imm32.value) - 4), eax);
    841    }
    842  }
    843 }
    844 
    845 //{{{ check_macroassembler_style
    846 // ===============================================================
    847 // ABI function calls.
    848 
    849 void MacroAssembler::setupUnalignedABICall(Register scratch) {
    850  setupNativeABICall();
    851  dynamicAlignment_ = true;
    852 
    853  movl(esp, scratch);
    854  andl(Imm32(~(ABIStackAlignment - 1)), esp);
    855  push(scratch);
    856 }
    857 
    858 void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
    859  MOZ_ASSERT(inCall_);
    860  uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
    861 
    862  if (dynamicAlignment_) {
    863    // sizeof(intptr_t) accounts for the saved stack pointer pushed by
    864    // setupUnalignedABICall.
    865    stackForCall += ComputeByteAlignment(stackForCall + sizeof(intptr_t),
    866                                         ABIStackAlignment);
    867  } else {
    868    uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
    869    stackForCall += ComputeByteAlignment(
    870        stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
    871  }
    872 
    873  *stackAdjust = stackForCall;
    874  reserveStack(stackForCall);
    875 
    876  // Position all arguments.
    877  {
    878    enoughMemory_ &= moveResolver_.resolve();
    879    if (!enoughMemory_) {
    880      return;
    881    }
    882 
    883    MoveEmitter emitter(*this);
    884    emitter.emit(moveResolver_);
    885    emitter.finish();
    886  }
    887 
    888  assertStackAlignment(ABIStackAlignment);
    889 }
    890 
    891 void MacroAssembler::callWithABIPost(uint32_t stackAdjust, ABIType result) {
    892  freeStack(stackAdjust);
    893 
    894  // If this was a call to a system ABI function, we need to adapt the FP
    895  // results to the expected return registers for JIT code.
    896  if (abiArgs_.abi() == ABIKind::System) {
    897    if (result == ABIType::Float64) {
    898      reserveStack(sizeof(double));
    899      fstp(Operand(esp, 0));
    900      loadDouble(Operand(esp, 0), ReturnDoubleReg);
    901      freeStack(sizeof(double));
    902    } else if (result == ABIType::Float32) {
    903      reserveStack(sizeof(float));
    904      fstp32(Operand(esp, 0));
    905      loadFloat32(Operand(esp, 0), ReturnFloat32Reg);
    906      freeStack(sizeof(float));
    907    }
    908  }
    909 
    910  if (dynamicAlignment_) {
    911    pop(esp);
    912  }
    913 
    914 #ifdef DEBUG
    915  MOZ_ASSERT(inCall_);
    916  inCall_ = false;
    917 #endif
    918 }
    919 
    920 void MacroAssembler::callWithABINoProfiler(Register fun, ABIType result) {
    921  uint32_t stackAdjust;
    922  callWithABIPre(&stackAdjust);
    923  call(fun);
    924  callWithABIPost(stackAdjust, result);
    925 }
    926 
    927 void MacroAssembler::callWithABINoProfiler(const Address& fun, ABIType result) {
    928  uint32_t stackAdjust;
    929  callWithABIPre(&stackAdjust);
    930  call(fun);
    931  callWithABIPost(stackAdjust, result);
    932 }
    933 
    934 // ===============================================================
    935 // Move instructions
    936 
    937 void MacroAssembler::moveValue(const ValueOperand& src,
    938                               const ValueOperand& dest) {
    939  Register s0 = src.typeReg();
    940  Register s1 = src.payloadReg();
    941  Register d0 = dest.typeReg();
    942  Register d1 = dest.payloadReg();
    943 
    944  // Either one or both of the source registers could be the same as a
    945  // destination register.
    946  if (s1 == d0) {
    947    if (s0 == d1) {
    948      // If both are, this is just a swap of two registers.
    949      xchgl(d0, d1);
    950      return;
    951    }
    952    // If only one is, copy that source first.
    953    std::swap(s0, s1);
    954    std::swap(d0, d1);
    955  }
    956 
    957  if (s0 != d0) {
    958    movl(s0, d0);
    959  }
    960  if (s1 != d1) {
    961    movl(s1, d1);
    962  }
    963 }
    964 
    965 void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
    966  movl(Imm32(src.toNunboxTag()), dest.typeReg());
    967  if (src.isGCThing()) {
    968    movl(ImmGCPtr(src.toGCThing()), dest.payloadReg());
    969  } else {
    970    movl(Imm32(src.toNunboxPayload()), dest.payloadReg());
    971  }
    972 }
    973 
    974 // ===============================================================
    975 // Arithmetic functions
    976 
    977 void MacroAssembler::flexibleQuotientPtr(
    978    Register lhs, Register rhs, Register dest, bool isUnsigned,
    979    const LiveRegisterSet& volatileLiveRegs) {
    980  flexibleQuotient32(lhs, rhs, dest, isUnsigned, volatileLiveRegs);
    981 }
    982 
    983 void MacroAssembler::flexibleRemainderPtr(
    984    Register lhs, Register rhs, Register dest, bool isUnsigned,
    985    const LiveRegisterSet& volatileLiveRegs) {
    986  flexibleRemainder32(lhs, rhs, dest, isUnsigned, volatileLiveRegs);
    987 }
    988 
    989 // ===============================================================
    990 // Branch functions
    991 
    992 void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
    993  if (ptr != buffer) {
    994    movePtr(ptr, buffer);
    995  }
    996  andPtr(Imm32(~gc::ChunkMask), buffer);
    997  loadPtr(Address(buffer, gc::ChunkStoreBufferOffset), buffer);
    998 }
    999 
   1000 void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
   1001                                             Register temp, Label* label) {
   1002  MOZ_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
   1003  MOZ_ASSERT(ptr != temp);
   1004  movePtr(ptr, temp);
   1005  branchPtrInNurseryChunkImpl(cond, temp, label);
   1006 }
   1007 
   1008 void MacroAssembler::branchPtrInNurseryChunk(Condition cond,
   1009                                             const Address& address,
   1010                                             Register temp, Label* label) {
   1011  MOZ_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
   1012  loadPtr(address, temp);
   1013  branchPtrInNurseryChunkImpl(cond, temp, label);
   1014 }
   1015 
   1016 void MacroAssembler::branchPtrInNurseryChunkImpl(Condition cond, Register ptr,
   1017                                                 Label* label) {
   1018  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
   1019 
   1020  andPtr(Imm32(~gc::ChunkMask), ptr);
   1021  branchPtr(InvertCondition(cond), Address(ptr, gc::ChunkStoreBufferOffset),
   1022            ImmWord(0), label);
   1023 }
   1024 
   1025 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
   1026                                              const Address& address,
   1027                                              Register temp, Label* label) {
   1028  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
   1029 
   1030  Label done;
   1031 
   1032  branchTestGCThing(Assembler::NotEqual, address,
   1033                    cond == Assembler::Equal ? &done : label);
   1034  branchPtrInNurseryChunk(cond, ToPayload(address), temp, label);
   1035 
   1036  bind(&done);
   1037 }
   1038 
   1039 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
   1040                                              ValueOperand value, Register temp,
   1041                                              Label* label) {
   1042  MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
   1043 
   1044  Label done;
   1045 
   1046  branchTestGCThing(Assembler::NotEqual, value,
   1047                    cond == Assembler::Equal ? &done : label);
   1048  branchPtrInNurseryChunk(cond, value.payloadReg(), temp, label);
   1049 
   1050  bind(&done);
   1051 }
   1052 
   1053 void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
   1054                                     const Value& rhs, Label* label) {
   1055  MOZ_ASSERT(cond == Equal || cond == NotEqual);
   1056  MOZ_ASSERT(!rhs.isNaN());
   1057  if (rhs.isGCThing()) {
   1058    cmpPtr(lhs.payloadReg(), ImmGCPtr(rhs.toGCThing()));
   1059  } else {
   1060    cmpPtr(lhs.payloadReg(), ImmWord(rhs.toNunboxPayload()));
   1061  }
   1062 
   1063  if (cond == Equal) {
   1064    Label done;
   1065    j(NotEqual, &done);
   1066    {
   1067      cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag()));
   1068      j(Equal, label);
   1069    }
   1070    bind(&done);
   1071  } else {
   1072    j(NotEqual, label);
   1073 
   1074    cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag()));
   1075    j(NotEqual, label);
   1076  }
   1077 }
   1078 
   1079 void MacroAssembler::branchTestNaNValue(Condition cond, const ValueOperand& val,
   1080                                        Register temp, Label* label) {
   1081  MOZ_ASSERT(cond == Equal || cond == NotEqual);
   1082 
   1083  // When testing for NaN, we want to ignore the sign bit.
   1084  const uint32_t SignBit = mozilla::FloatingPoint<double>::kSignBit >> 32;
   1085  movl(val.typeReg(), temp);
   1086  andl(Imm32(~SignBit), temp);
   1087 
   1088  // Compare against a NaN with sign bit 0.
   1089  static_assert(JS::detail::CanonicalizedNaNSignBit == 0);
   1090  Value expected = DoubleValue(JS::GenericNaN());
   1091  cmpPtr(val.payloadReg(), ImmWord(expected.toNunboxPayload()));
   1092 
   1093  if (cond == Equal) {
   1094    Label done;
   1095    j(NotEqual, &done);
   1096    {
   1097      cmp32(temp, Imm32(expected.toNunboxTag()));
   1098      j(Equal, label);
   1099    }
   1100    bind(&done);
   1101  } else {
   1102    j(NotEqual, label);
   1103 
   1104    cmp32(temp, Imm32(expected.toNunboxTag()));
   1105    j(NotEqual, label);
   1106  }
   1107 }
   1108 
   1109 // ========================================================================
   1110 // Memory access primitives.
   1111 template <typename T>
   1112 void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
   1113                                       MIRType valueType, const T& dest) {
   1114  MOZ_ASSERT(valueType < MIRType::Value);
   1115 
   1116  if (valueType == MIRType::Double) {
   1117    storeDouble(value.reg().typedReg().fpu(), dest);
   1118    return;
   1119  }
   1120 
   1121  // Store the type tag.
   1122  storeTypeTag(ImmType(ValueTypeFromMIRType(valueType)), Operand(dest));
   1123 
   1124  // Store the payload.
   1125  if (value.constant()) {
   1126    storePayload(value.value(), Operand(dest));
   1127  } else {
   1128    storePayload(value.reg().typedReg().gpr(), Operand(dest));
   1129  }
   1130 }
   1131 
   1132 template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
   1133                                                MIRType valueType,
   1134                                                const Address& dest);
   1135 template void MacroAssembler::storeUnboxedValue(
   1136    const ConstantOrRegister& value, MIRType valueType,
   1137    const BaseObjectElementIndex& dest);
   1138 
   1139 // wasm specific methods, used in both the wasm baseline compiler and ion.
   1140 
   1141 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
   1142                              Operand srcAddr, AnyRegister out) {
   1143  MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP ||
   1144             srcAddr.kind() == Operand::MEM_SCALE);
   1145 
   1146  MOZ_ASSERT_IF(
   1147      access.isZeroExtendSimd128Load(),
   1148      access.type() == Scalar::Float32 || access.type() == Scalar::Float64);
   1149  MOZ_ASSERT_IF(
   1150      access.isSplatSimd128Load(),
   1151      access.type() == Scalar::Uint8 || access.type() == Scalar::Uint16 ||
   1152          access.type() == Scalar::Float32 || access.type() == Scalar::Float64);
   1153  MOZ_ASSERT_IF(access.isWidenSimd128Load(), access.type() == Scalar::Float64);
   1154 
   1155  // NOTE: the generated code must match the assembly code in gen_load in
   1156  // GenerateAtomicOperations.py
   1157  memoryBarrierBefore(access.sync());
   1158 
   1159  switch (access.type()) {
   1160    case Scalar::Int8:
   1161      append(access, wasm::TrapMachineInsn::Load8,
   1162             FaultingCodeOffset(currentOffset()));
   1163      movsbl(srcAddr, out.gpr());
   1164      break;
   1165    case Scalar::Uint8:
   1166      append(access, wasm::TrapMachineInsn::Load8,
   1167             FaultingCodeOffset(currentOffset()));
   1168      if (access.isSplatSimd128Load()) {
   1169        vbroadcastb(srcAddr, out.fpu());
   1170      } else {
   1171        movzbl(srcAddr, out.gpr());
   1172      }
   1173      break;
   1174    case Scalar::Int16:
   1175      append(access, wasm::TrapMachineInsn::Load16,
   1176             FaultingCodeOffset(currentOffset()));
   1177      movswl(srcAddr, out.gpr());
   1178      break;
   1179    case Scalar::Uint16:
   1180      append(access, wasm::TrapMachineInsn::Load16,
   1181             FaultingCodeOffset(currentOffset()));
   1182      if (access.isSplatSimd128Load()) {
   1183        vbroadcastw(srcAddr, out.fpu());
   1184      } else {
   1185        movzwl(srcAddr, out.gpr());
   1186      }
   1187      break;
   1188    case Scalar::Int32:
   1189    case Scalar::Uint32:
   1190      append(access, wasm::TrapMachineInsn::Load32,
   1191             FaultingCodeOffset(currentOffset()));
   1192      movl(srcAddr, out.gpr());
   1193      break;
   1194    case Scalar::Float32:
   1195      append(access, wasm::TrapMachineInsn::Load32,
   1196             FaultingCodeOffset(currentOffset()));
   1197      if (access.isSplatSimd128Load()) {
   1198        vbroadcastss(srcAddr, out.fpu());
   1199      } else {
   1200        // vmovss does the right thing also for access.isZeroExtendSimd128Load()
   1201        vmovss(srcAddr, out.fpu());
   1202      }
   1203      break;
   1204    case Scalar::Float64:
   1205      append(access, wasm::TrapMachineInsn::Load64,
   1206             FaultingCodeOffset(currentOffset()));
   1207      if (access.isSplatSimd128Load()) {
   1208        vmovddup(srcAddr, out.fpu());
   1209      } else if (access.isWidenSimd128Load()) {
   1210        switch (access.widenSimdOp()) {
   1211          case wasm::SimdOp::V128Load8x8S:
   1212            vpmovsxbw(srcAddr, out.fpu());
   1213            break;
   1214          case wasm::SimdOp::V128Load8x8U:
   1215            vpmovzxbw(srcAddr, out.fpu());
   1216            break;
   1217          case wasm::SimdOp::V128Load16x4S:
   1218            vpmovsxwd(srcAddr, out.fpu());
   1219            break;
   1220          case wasm::SimdOp::V128Load16x4U:
   1221            vpmovzxwd(srcAddr, out.fpu());
   1222            break;
   1223          case wasm::SimdOp::V128Load32x2S:
   1224            vpmovsxdq(srcAddr, out.fpu());
   1225            break;
   1226          case wasm::SimdOp::V128Load32x2U:
   1227            vpmovzxdq(srcAddr, out.fpu());
   1228            break;
   1229          default:
   1230            MOZ_CRASH("Unexpected widening op for wasmLoad");
   1231        }
   1232      } else {
   1233        // vmovsd does the right thing also for access.isZeroExtendSimd128Load()
   1234        vmovsd(srcAddr, out.fpu());
   1235      }
   1236      break;
   1237    case Scalar::Simd128:
   1238      append(access, wasm::TrapMachineInsn::Load128,
   1239             FaultingCodeOffset(currentOffset()));
   1240      vmovups(srcAddr, out.fpu());
   1241      break;
   1242    case Scalar::Int64:
   1243    case Scalar::Uint8Clamped:
   1244    case Scalar::BigInt64:
   1245    case Scalar::BigUint64:
   1246    case Scalar::Float16:
   1247    case Scalar::MaxTypedArrayViewType:
   1248      MOZ_CRASH("unexpected type");
   1249  }
   1250 
   1251  memoryBarrierAfter(access.sync());
   1252 }
   1253 
   1254 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
   1255                                 Operand srcAddr, Register64 out) {
   1256  // Atomic i64 load must use lock_cmpxchg8b.
   1257  MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
   1258  MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP ||
   1259             srcAddr.kind() == Operand::MEM_SCALE);
   1260  MOZ_ASSERT(!access.isZeroExtendSimd128Load());  // Use wasmLoad()
   1261  MOZ_ASSERT(!access.isSplatSimd128Load());       // Use wasmLoad()
   1262  MOZ_ASSERT(!access.isWidenSimd128Load());       // Use wasmLoad()
   1263 
   1264  memoryBarrierBefore(access.sync());
   1265 
   1266  switch (access.type()) {
   1267    case Scalar::Int8:
   1268      MOZ_ASSERT(out == Register64(edx, eax));
   1269      append(access, wasm::TrapMachineInsn::Load8,
   1270             FaultingCodeOffset(currentOffset()));
   1271      movsbl(srcAddr, out.low);
   1272 
   1273      cdq();
   1274      break;
   1275    case Scalar::Uint8:
   1276      append(access, wasm::TrapMachineInsn::Load8,
   1277             FaultingCodeOffset(currentOffset()));
   1278      movzbl(srcAddr, out.low);
   1279 
   1280      xorl(out.high, out.high);
   1281      break;
   1282    case Scalar::Int16:
   1283      MOZ_ASSERT(out == Register64(edx, eax));
   1284      append(access, wasm::TrapMachineInsn::Load16,
   1285             FaultingCodeOffset(currentOffset()));
   1286      movswl(srcAddr, out.low);
   1287 
   1288      cdq();
   1289      break;
   1290    case Scalar::Uint16:
   1291      append(access, wasm::TrapMachineInsn::Load16,
   1292             FaultingCodeOffset(currentOffset()));
   1293      movzwl(srcAddr, out.low);
   1294 
   1295      xorl(out.high, out.high);
   1296      break;
   1297    case Scalar::Int32:
   1298      MOZ_ASSERT(out == Register64(edx, eax));
   1299      append(access, wasm::TrapMachineInsn::Load32,
   1300             FaultingCodeOffset(currentOffset()));
   1301      movl(srcAddr, out.low);
   1302 
   1303      cdq();
   1304      break;
   1305    case Scalar::Uint32:
   1306      append(access, wasm::TrapMachineInsn::Load32,
   1307             FaultingCodeOffset(currentOffset()));
   1308      movl(srcAddr, out.low);
   1309 
   1310      xorl(out.high, out.high);
   1311      break;
   1312    case Scalar::Int64: {
   1313      if (srcAddr.kind() == Operand::MEM_SCALE) {
   1314        MOZ_RELEASE_ASSERT(srcAddr.toBaseIndex().base != out.low &&
   1315                           srcAddr.toBaseIndex().index != out.low);
   1316      }
   1317      if (srcAddr.kind() == Operand::MEM_REG_DISP) {
   1318        MOZ_RELEASE_ASSERT(srcAddr.toAddress().base != out.low);
   1319      }
   1320 
   1321      append(access, wasm::TrapMachineInsn::Load32,
   1322             FaultingCodeOffset(currentOffset()));
   1323      movl(LowWord(srcAddr), out.low);
   1324 
   1325      append(access, wasm::TrapMachineInsn::Load32,
   1326             FaultingCodeOffset(currentOffset()));
   1327      movl(HighWord(srcAddr), out.high);
   1328 
   1329      break;
   1330    }
   1331    case Scalar::Float16:
   1332    case Scalar::Float32:
   1333    case Scalar::Float64:
   1334      MOZ_CRASH("non-int64 loads should use load()");
   1335    case Scalar::Simd128:
   1336    case Scalar::Uint8Clamped:
   1337    case Scalar::BigInt64:
   1338    case Scalar::BigUint64:
   1339    case Scalar::MaxTypedArrayViewType:
   1340      MOZ_CRASH("unexpected array type");
   1341  }
   1342 
   1343  memoryBarrierAfter(access.sync());
   1344 }
   1345 
   1346 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
   1347                               AnyRegister value, Operand dstAddr) {
   1348  MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
   1349             dstAddr.kind() == Operand::MEM_SCALE);
   1350 
   1351  // NOTE: the generated code must match the assembly code in gen_store in
   1352  // GenerateAtomicOperations.py
   1353  memoryBarrierBefore(access.sync());
   1354 
   1355  switch (access.type()) {
   1356    case Scalar::Int8:
   1357    case Scalar::Uint8Clamped:
   1358    case Scalar::Uint8:
   1359      append(access, wasm::TrapMachineInsn::Store8,
   1360             FaultingCodeOffset(currentOffset()));
   1361      // FIXME figure out where this movb goes
   1362      movb(value.gpr(), dstAddr);
   1363      break;
   1364    case Scalar::Int16:
   1365    case Scalar::Uint16:
   1366      append(access, wasm::TrapMachineInsn::Store16,
   1367             FaultingCodeOffset(currentOffset()));
   1368      movw(value.gpr(), dstAddr);
   1369      break;
   1370    case Scalar::Int32:
   1371    case Scalar::Uint32:
   1372      append(access, wasm::TrapMachineInsn::Store32,
   1373             FaultingCodeOffset(currentOffset()));
   1374      movl(value.gpr(), dstAddr);
   1375      break;
   1376    case Scalar::Float32:
   1377      append(access, wasm::TrapMachineInsn::Store32,
   1378             FaultingCodeOffset(currentOffset()));
   1379      vmovss(value.fpu(), dstAddr);
   1380      break;
   1381    case Scalar::Float64:
   1382      append(access, wasm::TrapMachineInsn::Store64,
   1383             FaultingCodeOffset(currentOffset()));
   1384      vmovsd(value.fpu(), dstAddr);
   1385      break;
   1386    case Scalar::Simd128:
   1387      append(access, wasm::TrapMachineInsn::Store128,
   1388             FaultingCodeOffset(currentOffset()));
   1389      vmovups(value.fpu(), dstAddr);
   1390      break;
   1391    case Scalar::Int64:
   1392      MOZ_CRASH("Should be handled in storeI64.");
   1393    case Scalar::Float16:
   1394    case Scalar::MaxTypedArrayViewType:
   1395    case Scalar::BigInt64:
   1396    case Scalar::BigUint64:
   1397      MOZ_CRASH("unexpected type");
   1398  }
   1399 
   1400  memoryBarrierAfter(access.sync());
   1401 }
   1402 
   1403 void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
   1404                                  Register64 value, Operand dstAddr) {
   1405  // Atomic i64 store must use lock_cmpxchg8b.
   1406  MOZ_ASSERT(!access.isAtomic());
   1407  MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
   1408             dstAddr.kind() == Operand::MEM_SCALE);
   1409 
   1410  // Store the high word first so as to hit guard-page-based OOB checks without
   1411  // writing partial data.
   1412  append(access, wasm::TrapMachineInsn::Store32,
   1413         FaultingCodeOffset(currentOffset()));
   1414  movl(value.high, HighWord(dstAddr));
   1415 
   1416  append(access, wasm::TrapMachineInsn::Store32,
   1417         FaultingCodeOffset(currentOffset()));
   1418  movl(value.low, LowWord(dstAddr));
   1419 }
   1420 
   1421 template <typename T>
   1422 static void AtomicLoad64(MacroAssembler& masm,
   1423                         const wasm::MemoryAccessDesc* access, const T& address,
   1424                         Register64 temp, Register64 output) {
   1425  MOZ_ASSERT(temp.low == ebx);
   1426  MOZ_ASSERT(temp.high == ecx);
   1427  MOZ_ASSERT(output.high == edx);
   1428  MOZ_ASSERT(output.low == eax);
   1429 
   1430  // In the event edx:eax matches what's in memory, ecx:ebx will be
   1431  // stored.  The two pairs must therefore have the same values.
   1432  masm.movl(edx, ecx);
   1433  masm.movl(eax, ebx);
   1434 
   1435  if (access) {
   1436    masm.append(*access, wasm::TrapMachineInsn::Atomic,
   1437                FaultingCodeOffset(masm.currentOffset()));
   1438  }
   1439  masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));
   1440 }
   1441 
   1442 void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
   1443                                      const Address& mem, Register64 temp,
   1444                                      Register64 output) {
   1445  AtomicLoad64(*this, &access, mem, temp, output);
   1446 }
   1447 
   1448 void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
   1449                                      const BaseIndex& mem, Register64 temp,
   1450                                      Register64 output) {
   1451  AtomicLoad64(*this, &access, mem, temp, output);
   1452 }
   1453 
   1454 template <typename T>
   1455 static void CompareExchange64(MacroAssembler& masm,
   1456                              const wasm::MemoryAccessDesc* access,
   1457                              const T& mem, Register64 expected,
   1458                              Register64 replacement, Register64 output) {
   1459  MOZ_ASSERT(expected == output);
   1460  MOZ_ASSERT(expected.high == edx);
   1461  MOZ_ASSERT(expected.low == eax);
   1462  MOZ_ASSERT(replacement.high == ecx);
   1463  MOZ_ASSERT(replacement.low == ebx);
   1464 
   1465  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
   1466  // GenerateAtomicOperations.py
   1467  if (access) {
   1468    masm.append(*access, wasm::TrapMachineInsn::Atomic,
   1469                FaultingCodeOffset(masm.currentOffset()));
   1470  }
   1471  masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem));
   1472 }
   1473 
   1474 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
   1475                                           const Address& mem,
   1476                                           Register64 expected,
   1477                                           Register64 replacement,
   1478                                           Register64 output) {
   1479  CompareExchange64(*this, &access, mem, expected, replacement, output);
   1480 }
   1481 
   1482 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
   1483                                           const BaseIndex& mem,
   1484                                           Register64 expected,
   1485                                           Register64 replacement,
   1486                                           Register64 output) {
   1487  CompareExchange64(*this, &access, mem, expected, replacement, output);
   1488 }
   1489 
   1490 template <typename T>
   1491 static void AtomicExchange64(MacroAssembler& masm,
   1492                             const wasm::MemoryAccessDesc* access, const T& mem,
   1493                             Register64 value, Register64 output) {
   1494  MOZ_ASSERT(value.low == ebx);
   1495  MOZ_ASSERT(value.high == ecx);
   1496  MOZ_ASSERT(output.high == edx);
   1497  MOZ_ASSERT(output.low == eax);
   1498 
   1499  // edx:eax has garbage initially, and that is the best we can do unless
   1500  // we can guess with high probability what's in memory.
   1501 
   1502  MOZ_ASSERT(mem.base != edx && mem.base != eax);
   1503  if constexpr (std::is_same_v<T, BaseIndex>) {
   1504    MOZ_ASSERT(mem.index != edx && mem.index != eax);
   1505  } else {
   1506    static_assert(std::is_same_v<T, Address>);
   1507  }
   1508 
   1509  Label again;
   1510  masm.bind(&again);
   1511  if (access) {
   1512    masm.append(*access, wasm::TrapMachineInsn::Atomic,
   1513                FaultingCodeOffset(masm.currentOffset()));
   1514  }
   1515  masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem));
   1516  masm.j(MacroAssembler::NonZero, &again);
   1517 }
   1518 
   1519 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
   1520                                          const Address& mem, Register64 value,
   1521                                          Register64 output) {
   1522  AtomicExchange64(*this, &access, mem, value, output);
   1523 }
   1524 
   1525 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
   1526                                          const BaseIndex& mem,
   1527                                          Register64 value, Register64 output) {
   1528  AtomicExchange64(*this, &access, mem, value, output);
   1529 }
   1530 
   1531 template <typename T>
   1532 static void AtomicFetchOp64(MacroAssembler& masm,
   1533                            const wasm::MemoryAccessDesc* access, AtomicOp op,
   1534                            const Address& value, const T& mem, Register64 temp,
   1535                            Register64 output) {
   1536  // We don't have enough registers for all the operands on x86, so the rhs
   1537  // operand is in memory.
   1538 
   1539 #define ATOMIC_OP_BODY(OPERATE)                                         \
   1540  do {                                                                  \
   1541    MOZ_ASSERT(output.low == eax);                                      \
   1542    MOZ_ASSERT(output.high == edx);                                     \
   1543    MOZ_ASSERT(temp.low == ebx);                                        \
   1544    MOZ_ASSERT(temp.high == ecx);                                       \
   1545    FaultingCodeOffsetPair fcop = masm.load64(mem, output);             \
   1546    if (access) {                                                       \
   1547      masm.append(*access, wasm::TrapMachineInsn::Load32, fcop.first);  \
   1548      masm.append(*access, wasm::TrapMachineInsn::Load32, fcop.second); \
   1549    }                                                                   \
   1550    Label again;                                                        \
   1551    masm.bind(&again);                                                  \
   1552    masm.move64(output, temp);                                          \
   1553    masm.OPERATE(Operand(value), temp);                                 \
   1554    masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem));              \
   1555    masm.j(MacroAssembler::NonZero, &again);                            \
   1556  } while (0)
   1557 
   1558  switch (op) {
   1559    case AtomicOp::Add:
   1560      ATOMIC_OP_BODY(add64FromMemory);
   1561      break;
   1562    case AtomicOp::Sub:
   1563      ATOMIC_OP_BODY(sub64FromMemory);
   1564      break;
   1565    case AtomicOp::And:
   1566      ATOMIC_OP_BODY(and64FromMemory);
   1567      break;
   1568    case AtomicOp::Or:
   1569      ATOMIC_OP_BODY(or64FromMemory);
   1570      break;
   1571    case AtomicOp::Xor:
   1572      ATOMIC_OP_BODY(xor64FromMemory);
   1573      break;
   1574    default:
   1575      MOZ_CRASH();
   1576  }
   1577 
   1578 #undef ATOMIC_OP_BODY
   1579 }
   1580 
   1581 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
   1582                                         AtomicOp op, const Address& value,
   1583                                         const Address& mem, Register64 temp,
   1584                                         Register64 output) {
   1585  AtomicFetchOp64(*this, &access, op, value, mem, temp, output);
   1586 }
   1587 
   1588 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
   1589                                         AtomicOp op, const Address& value,
   1590                                         const BaseIndex& mem, Register64 temp,
   1591                                         Register64 output) {
   1592  AtomicFetchOp64(*this, &access, op, value, mem, temp, output);
   1593 }
   1594 
   1595 void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input,
   1596                                                Register output,
   1597                                                bool isSaturating,
   1598                                                Label* oolEntry) {
   1599  Label done;
   1600  vcvttsd2si(input, output);
   1601  branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done);
   1602 
   1603  ScratchDoubleScope fpscratch(*this);
   1604  loadConstantDouble(double(int32_t(0x80000000)), fpscratch);
   1605  addDouble(input, fpscratch);
   1606  vcvttsd2si(fpscratch, output);
   1607 
   1608  branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry);
   1609  or32(Imm32(0x80000000), output);
   1610 
   1611  bind(&done);
   1612 }
   1613 
   1614 void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input,
   1615                                                 Register output,
   1616                                                 bool isSaturating,
   1617                                                 Label* oolEntry) {
   1618  Label done;
   1619  vcvttss2si(input, output);
   1620  branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done);
   1621 
   1622  ScratchFloat32Scope fpscratch(*this);
   1623  loadConstantFloat32(float(int32_t(0x80000000)), fpscratch);
   1624  addFloat32(input, fpscratch);
   1625  vcvttss2si(fpscratch, output);
   1626 
   1627  branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry);
   1628  or32(Imm32(0x80000000), output);
   1629 
   1630  bind(&done);
   1631 }
   1632 
   1633 void MacroAssembler::wasmTruncateDoubleToInt64(
   1634    FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
   1635    Label* oolRejoin, FloatRegister tempReg) {
   1636  Label ok;
   1637  Register temp = output.high;
   1638 
   1639  reserveStack(2 * sizeof(int32_t));
   1640  storeDouble(input, Operand(esp, 0));
   1641 
   1642  truncateDoubleToInt64(Address(esp, 0), Address(esp, 0), temp);
   1643  load64(Address(esp, 0), output);
   1644 
   1645  cmpl(Imm32(0), Operand(esp, 0));
   1646  j(Assembler::NotEqual, &ok);
   1647 
   1648  cmpl(Imm32(1), Operand(esp, 4));
   1649  j(Assembler::Overflow, oolEntry);
   1650 
   1651  bind(&ok);
   1652  bind(oolRejoin);
   1653 
   1654  freeStack(2 * sizeof(int32_t));
   1655 }
   1656 
   1657 void MacroAssembler::wasmTruncateFloat32ToInt64(
   1658    FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
   1659    Label* oolRejoin, FloatRegister tempReg) {
   1660  Label ok;
   1661  Register temp = output.high;
   1662 
   1663  reserveStack(2 * sizeof(int32_t));
   1664  storeFloat32(input, Operand(esp, 0));
   1665 
   1666  truncateFloat32ToInt64(Address(esp, 0), Address(esp, 0), temp);
   1667  load64(Address(esp, 0), output);
   1668 
   1669  cmpl(Imm32(0), Operand(esp, 0));
   1670  j(Assembler::NotEqual, &ok);
   1671 
   1672  cmpl(Imm32(1), Operand(esp, 4));
   1673  j(Assembler::Overflow, oolEntry);
   1674 
   1675  bind(&ok);
   1676  bind(oolRejoin);
   1677 
   1678  freeStack(2 * sizeof(int32_t));
   1679 }
   1680 
   1681 void MacroAssembler::wasmTruncateDoubleToUInt64(
   1682    FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
   1683    Label* oolRejoin, FloatRegister tempReg) {
   1684  Label fail, convert;
   1685  Register temp = output.high;
   1686 
   1687  // Make sure input fits in uint64.
   1688  reserveStack(2 * sizeof(int32_t));
   1689  storeDouble(input, Operand(esp, 0));
   1690  branchDoubleNotInUInt64Range(Address(esp, 0), temp, &fail);
   1691  size_t stackBeforeBranch = framePushed();
   1692  jump(&convert);
   1693 
   1694  bind(&fail);
   1695  freeStack(2 * sizeof(int32_t));
   1696  jump(oolEntry);
   1697  if (isSaturating) {
   1698    // The OOL path computes the right values.
   1699    setFramePushed(stackBeforeBranch);
   1700  } else {
   1701    // The OOL path just checks the input values.
   1702    bind(oolRejoin);
   1703    reserveStack(2 * sizeof(int32_t));
   1704    storeDouble(input, Operand(esp, 0));
   1705  }
   1706 
   1707  // Convert the double/float to uint64.
   1708  bind(&convert);
   1709  truncateDoubleToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg);
   1710 
   1711  // Load value into int64 register.
   1712  load64(Address(esp, 0), output);
   1713  freeStack(2 * sizeof(int32_t));
   1714 
   1715  if (isSaturating) {
   1716    bind(oolRejoin);
   1717  }
   1718 }
   1719 
   1720 void MacroAssembler::wasmTruncateFloat32ToUInt64(
   1721    FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
   1722    Label* oolRejoin, FloatRegister tempReg) {
   1723  Label fail, convert;
   1724  Register temp = output.high;
   1725 
   1726  // Make sure input fits in uint64.
   1727  reserveStack(2 * sizeof(int32_t));
   1728  storeFloat32(input, Operand(esp, 0));
   1729  branchFloat32NotInUInt64Range(Address(esp, 0), temp, &fail);
   1730  size_t stackBeforeBranch = framePushed();
   1731  jump(&convert);
   1732 
   1733  bind(&fail);
   1734  freeStack(2 * sizeof(int32_t));
   1735  jump(oolEntry);
   1736  if (isSaturating) {
   1737    // The OOL path computes the right values.
   1738    setFramePushed(stackBeforeBranch);
   1739  } else {
   1740    // The OOL path just checks the input values.
   1741    bind(oolRejoin);
   1742    reserveStack(2 * sizeof(int32_t));
   1743    storeFloat32(input, Operand(esp, 0));
   1744  }
   1745 
   1746  // Convert the float to uint64.
   1747  bind(&convert);
   1748  truncateFloat32ToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg);
   1749 
   1750  // Load value into int64 register.
   1751  load64(Address(esp, 0), output);
   1752  freeStack(2 * sizeof(int32_t));
   1753 
   1754  if (isSaturating) {
   1755    bind(oolRejoin);
   1756  }
   1757 }
   1758 
   1759 // ========================================================================
   1760 // Primitive atomic operations.
   1761 
   1762 void MacroAssembler::atomicLoad64(Synchronization, const Address& mem,
   1763                                  Register64 temp, Register64 output) {
   1764  AtomicLoad64(*this, nullptr, mem, temp, output);
   1765 }
   1766 
   1767 void MacroAssembler::atomicLoad64(Synchronization, const BaseIndex& mem,
   1768                                  Register64 temp, Register64 output) {
   1769  AtomicLoad64(*this, nullptr, mem, temp, output);
   1770 }
   1771 
   1772 void MacroAssembler::atomicStore64(Synchronization, const Address& mem,
   1773                                   Register64 value, Register64 temp) {
   1774  AtomicExchange64(*this, nullptr, mem, value, temp);
   1775 }
   1776 
   1777 void MacroAssembler::atomicStore64(Synchronization, const BaseIndex& mem,
   1778                                   Register64 value, Register64 temp) {
   1779  AtomicExchange64(*this, nullptr, mem, value, temp);
   1780 }
   1781 
   1782 void MacroAssembler::compareExchange64(Synchronization, const Address& mem,
   1783                                       Register64 expected,
   1784                                       Register64 replacement,
   1785                                       Register64 output) {
   1786  CompareExchange64(*this, nullptr, mem, expected, replacement, output);
   1787 }
   1788 
   1789 void MacroAssembler::compareExchange64(Synchronization, const BaseIndex& mem,
   1790                                       Register64 expected,
   1791                                       Register64 replacement,
   1792                                       Register64 output) {
   1793  CompareExchange64(*this, nullptr, mem, expected, replacement, output);
   1794 }
   1795 
   1796 void MacroAssembler::atomicExchange64(Synchronization, const Address& mem,
   1797                                      Register64 value, Register64 output) {
   1798  AtomicExchange64(*this, nullptr, mem, value, output);
   1799 }
   1800 
   1801 void MacroAssembler::atomicExchange64(Synchronization, const BaseIndex& mem,
   1802                                      Register64 value, Register64 output) {
   1803  AtomicExchange64(*this, nullptr, mem, value, output);
   1804 }
   1805 
   1806 void MacroAssembler::atomicFetchOp64(Synchronization, AtomicOp op,
   1807                                     const Address& value, const Address& mem,
   1808                                     Register64 temp, Register64 output) {
   1809  AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output);
   1810 }
   1811 
   1812 void MacroAssembler::atomicFetchOp64(Synchronization, AtomicOp op,
   1813                                     const Address& value, const BaseIndex& mem,
   1814                                     Register64 temp, Register64 output) {
   1815  AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output);
   1816 }
   1817 
   1818 // ========================================================================
   1819 // Convert floating point.
   1820 
   1821 bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return false; }
   1822 
   1823 void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
   1824                                           Register temp) {
   1825  MOZ_ASSERT(temp == Register::Invalid());
   1826 
   1827  // SUBPD needs SSE2, HADDPD needs SSE3.
   1828  if (!HasSSE3()) {
   1829    // Zero the dest register to break dependencies, see convertInt32ToDouble.
   1830    zeroDouble(dest);
   1831 
   1832    Push(src.high);
   1833    Push(src.low);
   1834    fild(Operand(esp, 0));
   1835 
   1836    Label notNegative;
   1837    branch32(Assembler::NotSigned, src.high, Imm32(0), &notNegative);
   1838    double add_constant = 18446744073709551616.0;  // 2^64
   1839    store64(Imm64(mozilla::BitwiseCast<uint64_t>(add_constant)),
   1840            Address(esp, 0));
   1841    fld(Operand(esp, 0));
   1842    faddp();
   1843    bind(&notNegative);
   1844 
   1845    fstp(Operand(esp, 0));
   1846    vmovsd(Address(esp, 0), dest);
   1847    freeStack(2 * sizeof(intptr_t));
   1848    return;
   1849  }
   1850 
   1851  // Following operation uses entire 128-bit of dest XMM register.
   1852  // Currently higher 64-bit is free when we have access to lower 64-bit.
   1853  MOZ_ASSERT(dest.size() == 8);
   1854  FloatRegister dest128 =
   1855      FloatRegister(dest.encoding(), FloatRegisters::Simd128);
   1856 
   1857  // Assume that src is represented as following:
   1858  //   src      = 0x HHHHHHHH LLLLLLLL
   1859 
   1860  {
   1861    // Move src to dest (=dest128) and ScratchInt32x4Reg (=scratch):
   1862    //   dest     = 0x 00000000 00000000  00000000 LLLLLLLL
   1863    //   scratch  = 0x 00000000 00000000  00000000 HHHHHHHH
   1864    ScratchSimd128Scope scratch(*this);
   1865    vmovd(src.low, dest128);
   1866    vmovd(src.high, scratch);
   1867 
   1868    // Unpack and interleave dest and scratch to dest:
   1869    //   dest     = 0x 00000000 00000000  HHHHHHHH LLLLLLLL
   1870    vpunpckldq(scratch, dest128, dest128);
   1871  }
   1872 
   1873  // Unpack and interleave dest and a constant C1 to dest:
   1874  //   C1       = 0x 00000000 00000000  45300000 43300000
   1875  //   dest     = 0x 45300000 HHHHHHHH  43300000 LLLLLLLL
   1876  // here, each 64-bit part of dest represents following double:
   1877  //   HI(dest) = 0x 1.00000HHHHHHHH * 2**84 == 2**84 + 0x HHHHHHHH 00000000
   1878  //   LO(dest) = 0x 1.00000LLLLLLLL * 2**52 == 2**52 + 0x 00000000 LLLLLLLL
   1879  // See convertUInt64ToDouble for the details.
   1880  static const int32_t CST1[4] = {
   1881      0x43300000,
   1882      0x45300000,
   1883      0x0,
   1884      0x0,
   1885  };
   1886 
   1887  vpunpckldqSimd128(SimdConstant::CreateX4(CST1), dest128, dest128);
   1888 
   1889  // Subtract a constant C2 from dest, for each 64-bit part:
   1890  //   C2       = 0x 45300000 00000000  43300000 00000000
   1891  // here, each 64-bit part of C2 represents following double:
   1892  //   HI(C2)   = 0x 1.0000000000000 * 2**84 == 2**84
   1893  //   LO(C2)   = 0x 1.0000000000000 * 2**52 == 2**52
   1894  // after the operation each 64-bit part of dest represents following:
   1895  //   HI(dest) = double(0x HHHHHHHH 00000000)
   1896  //   LO(dest) = double(0x 00000000 LLLLLLLL)
   1897  static const int32_t CST2[4] = {
   1898      0x0,
   1899      0x43300000,
   1900      0x0,
   1901      0x45300000,
   1902  };
   1903 
   1904  vsubpdSimd128(SimdConstant::CreateX4(CST2), dest128, dest128);
   1905 
   1906  // Add HI(dest) and LO(dest) in double and store it into LO(dest),
   1907  //   LO(dest) = double(0x HHHHHHHH 00000000) + double(0x 00000000 LLLLLLLL)
   1908  //            = double(0x HHHHHHHH LLLLLLLL)
   1909  //            = double(src)
   1910  vhaddpd(dest128, dest128);
   1911 }
   1912 
   1913 void MacroAssembler::convertInt64ToDouble(Register64 input,
   1914                                          FloatRegister output) {
   1915  // Zero the output register to break dependencies, see convertInt32ToDouble.
   1916  zeroDouble(output);
   1917 
   1918  Push(input.high);
   1919  Push(input.low);
   1920  fild(Operand(esp, 0));
   1921 
   1922  fstp(Operand(esp, 0));
   1923  vmovsd(Address(esp, 0), output);
   1924  freeStack(2 * sizeof(intptr_t));
   1925 }
   1926 
   1927 void MacroAssembler::convertUInt64ToFloat32(Register64 input,
   1928                                            FloatRegister output,
   1929                                            Register temp) {
   1930  // Zero the dest register to break dependencies, see convertInt32ToDouble.
   1931  zeroDouble(output);
   1932 
   1933  // Set the FPU precision to 80 bits.
   1934  reserveStack(2 * sizeof(intptr_t));
   1935  fnstcw(Operand(esp, 0));
   1936  load32(Operand(esp, 0), temp);
   1937  orl(Imm32(0x300), temp);
   1938  store32(temp, Operand(esp, sizeof(intptr_t)));
   1939  fldcw(Operand(esp, sizeof(intptr_t)));
   1940 
   1941  Push(input.high);
   1942  Push(input.low);
   1943  fild(Operand(esp, 0));
   1944 
   1945  Label notNegative;
   1946  branch32(Assembler::NotSigned, input.high, Imm32(0), &notNegative);
   1947  double add_constant = 18446744073709551616.0;  // 2^64
   1948  uint64_t add_constant_u64 = mozilla::BitwiseCast<uint64_t>(add_constant);
   1949  store64(Imm64(add_constant_u64), Address(esp, 0));
   1950 
   1951  fld(Operand(esp, 0));
   1952  faddp();
   1953  bind(&notNegative);
   1954 
   1955  fstp32(Operand(esp, 0));
   1956  vmovss(Address(esp, 0), output);
   1957  freeStack(2 * sizeof(intptr_t));
   1958 
   1959  // Restore FPU precision to the initial value.
   1960  fldcw(Operand(esp, 0));
   1961  freeStack(2 * sizeof(intptr_t));
   1962 }
   1963 
   1964 void MacroAssembler::convertInt64ToFloat32(Register64 input,
   1965                                           FloatRegister output) {
   1966  // Zero the output register to break dependencies, see convertInt32ToDouble.
   1967  zeroDouble(output);
   1968 
   1969  Push(input.high);
   1970  Push(input.low);
   1971  fild(Operand(esp, 0));
   1972 
   1973  fstp32(Operand(esp, 0));
   1974  vmovss(Address(esp, 0), output);
   1975  freeStack(2 * sizeof(intptr_t));
   1976 }
   1977 
   1978 void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
   1979  convertInt32ToDouble(src, dest);
   1980 }
   1981 
   1982 void MacroAssembler::PushBoxed(FloatRegister reg) { Push(reg); }
   1983 
   1984 CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
   1985  return movWithPatch(ImmPtr(nullptr), dest);
   1986 }
   1987 
   1988 void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
   1989                                          CodeLocationLabel target) {
   1990  PatchDataWithValueCheck(loc, ImmPtr(target.raw()), ImmPtr(nullptr));
   1991 }
   1992 
   1993 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
   1994                                       Register64 boundsCheckLimit,
   1995                                       Label* label) {
   1996  Label ifFalse;
   1997  cmp32(index.high, Imm32(0));
   1998  j(Assembler::NonZero, &ifFalse);
   1999  wasmBoundsCheck32(cond, index.low, boundsCheckLimit.low, label);
   2000  bind(&ifFalse);
   2001 }
   2002 
   2003 void MacroAssembler::wasmBoundsCheck64(Condition cond, Register64 index,
   2004                                       Address boundsCheckLimit, Label* label) {
   2005  Label ifFalse;
   2006  cmp32(index.high, Imm32(0));
   2007  j(Assembler::NonZero, &ifFalse);
   2008  wasmBoundsCheck32(cond, index.low, boundsCheckLimit, label);
   2009  bind(&ifFalse);
   2010 }
   2011 
   2012 void MacroAssembler::wasmMarkCallAsSlow() {
   2013  static_assert(esi == InstanceReg);
   2014  or32(esi, esi);
   2015 }
   2016 
   2017 const int32_t SlowCallMarker = 0xf60b;  // OR esi, esi
   2018 
   2019 void MacroAssembler::wasmCheckSlowCallsite(Register ra, Label* notSlow,
   2020                                           Register temp1, Register temp2) {
   2021  // Check if RA has slow marker.
   2022  cmp16(Address(ra, 0), Imm32(SlowCallMarker));
   2023  j(Assembler::NotEqual, notSlow);
   2024 }
   2025 
   2026 //}}} check_macroassembler_style