tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

BaseAssembler-x86-shared.h (250576B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: set ts=8 sts=2 et sw=2 tw=80:
      3 *
      4 * ***** BEGIN LICENSE BLOCK *****
      5 * Copyright (C) 2008 Apple Inc. All rights reserved.
      6 *
      7 * Redistribution and use in source and binary forms, with or without
      8 * modification, are permitted provided that the following conditions
      9 * are met:
     10 * 1. Redistributions of source code must retain the above copyright
     11 *    notice, this list of conditions and the following disclaimer.
     12 * 2. Redistributions in binary form must reproduce the above copyright
     13 *    notice, this list of conditions and the following disclaimer in the
     14 *    documentation and/or other materials provided with the distribution.
     15 *
     16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 *
     28 * ***** END LICENSE BLOCK ***** */
     29 
     30 #ifndef jit_x86_shared_BaseAssembler_x86_shared_h
     31 #define jit_x86_shared_BaseAssembler_x86_shared_h
     32 
     33 #include "mozilla/IntegerPrintfMacros.h"
     34 
     35 #include "jit/x86-shared/AssemblerBuffer-x86-shared.h"
     36 #include "jit/x86-shared/Encoding-x86-shared.h"
     37 #include "jit/x86-shared/Patching-x86-shared.h"
     38 #include "wasm/WasmTypeDecls.h"
     39 
     40 namespace js {
     41 namespace jit {
     42 
     43 namespace X86Encoding {
     44 
     45 class BaseAssembler;
     46 
     47 class BaseAssembler : public GenericAssembler {
     48 public:
     49  BaseAssembler() : useVEX_(true) {}
     50 
     51  void disableVEX() { useVEX_ = false; }
     52 
     53  size_t size() const { return m_formatter.size(); }
     54  const unsigned char* buffer() const { return m_formatter.buffer(); }
     55  unsigned char* data() { return m_formatter.data(); }
     56  bool oom() const { return m_formatter.oom(); }
     57  bool reserve(size_t size) { return m_formatter.reserve(size); }
     58  bool swapBuffer(wasm::Bytes& other) { return m_formatter.swapBuffer(other); }
     59 
     60  void nop() {
     61    spew("nop");
     62    m_formatter.oneByteOp(OP_NOP);
     63  }
     64 
     65  void comment(const char* msg) { spew("; %s", msg); }
     66 
     67  static void patchFiveByteNopToCall(uint8_t* callsite, uint8_t* target) {
     68    // Note: the offset is relative to the address of the instruction after
     69    // the call which is five bytes.
     70    uint8_t* inst = callsite - sizeof(int32_t) - 1;
     71    // The nop can be already patched as call, overriding the call.
     72    // See also nop_five.
     73    MOZ_ASSERT(inst[0] == OP_NOP_0F || inst[0] == OP_CALL_rel32);
     74    MOZ_ASSERT_IF(inst[0] == OP_NOP_0F,
     75                  inst[1] == OP_NOP_1F || inst[2] == OP_NOP_44 ||
     76                      inst[3] == OP_NOP_00 || inst[4] == OP_NOP_00);
     77    inst[0] = OP_CALL_rel32;
     78    SetRel32(callsite, target);
     79  }
     80 
     81  static void patchCallToFiveByteNop(uint8_t* callsite) {
     82    // See also patchFiveByteNopToCall and nop_five.
     83    uint8_t* inst = callsite - sizeof(int32_t) - 1;
     84    // The call can be already patched as nop.
     85    if (inst[0] == OP_NOP_0F) {
     86      MOZ_ASSERT(inst[1] == OP_NOP_1F || inst[2] == OP_NOP_44 ||
     87                 inst[3] == OP_NOP_00 || inst[4] == OP_NOP_00);
     88      return;
     89    }
     90    MOZ_ASSERT(inst[0] == OP_CALL_rel32);
     91    inst[0] = OP_NOP_0F;
     92    inst[1] = OP_NOP_1F;
     93    inst[2] = OP_NOP_44;
     94    inst[3] = OP_NOP_00;
     95    inst[4] = OP_NOP_00;
     96  }
     97 
     98  /*
     99   * The nop multibytes sequences are directly taken from the Intel's
    100   * architecture software developer manual.
    101   * They are defined for sequences of sizes from 1 to 9 included.
    102   */
    103  void nop_one() { m_formatter.oneByteOp(OP_NOP); }
    104 
    105  void nop_two() {
    106    m_formatter.oneByteOp(OP_NOP_66);
    107    m_formatter.oneByteOp(OP_NOP);
    108  }
    109 
    110  void nop_three() {
    111    m_formatter.oneByteOp(OP_NOP_0F);
    112    m_formatter.oneByteOp(OP_NOP_1F);
    113    m_formatter.oneByteOp(OP_NOP_00);
    114  }
    115 
    116  void nop_four() {
    117    m_formatter.oneByteOp(OP_NOP_0F);
    118    m_formatter.oneByteOp(OP_NOP_1F);
    119    m_formatter.oneByteOp(OP_NOP_40);
    120    m_formatter.oneByteOp(OP_NOP_00);
    121  }
    122 
    123  void nop_five() {
    124    m_formatter.oneByteOp(OP_NOP_0F);
    125    m_formatter.oneByteOp(OP_NOP_1F);
    126    m_formatter.oneByteOp(OP_NOP_44);
    127    m_formatter.oneByteOp(OP_NOP_00);
    128    m_formatter.oneByteOp(OP_NOP_00);
    129  }
    130 
    131  void nop_six() {
    132    m_formatter.oneByteOp(OP_NOP_66);
    133    nop_five();
    134  }
    135 
    136  void nop_seven() {
    137    m_formatter.oneByteOp(OP_NOP_0F);
    138    m_formatter.oneByteOp(OP_NOP_1F);
    139    m_formatter.oneByteOp(OP_NOP_80);
    140    for (int i = 0; i < 4; ++i) {
    141      m_formatter.oneByteOp(OP_NOP_00);
    142    }
    143  }
    144 
    145  void nop_eight() {
    146    m_formatter.oneByteOp(OP_NOP_0F);
    147    m_formatter.oneByteOp(OP_NOP_1F);
    148    m_formatter.oneByteOp(OP_NOP_84);
    149    for (int i = 0; i < 5; ++i) {
    150      m_formatter.oneByteOp(OP_NOP_00);
    151    }
    152  }
    153 
    154  void nop_nine() {
    155    m_formatter.oneByteOp(OP_NOP_66);
    156    nop_eight();
    157  }
    158 
    159  void insert_nop(int size) {
    160    switch (size) {
    161      case 1:
    162        nop_one();
    163        break;
    164      case 2:
    165        nop_two();
    166        break;
    167      case 3:
    168        nop_three();
    169        break;
    170      case 4:
    171        nop_four();
    172        break;
    173      case 5:
    174        nop_five();
    175        break;
    176      case 6:
    177        nop_six();
    178        break;
    179      case 7:
    180        nop_seven();
    181        break;
    182      case 8:
    183        nop_eight();
    184        break;
    185      case 9:
    186        nop_nine();
    187        break;
    188      case 10:
    189        nop_three();
    190        nop_seven();
    191        break;
    192      case 11:
    193        nop_four();
    194        nop_seven();
    195        break;
    196      case 12:
    197        nop_six();
    198        nop_six();
    199        break;
    200      case 13:
    201        nop_six();
    202        nop_seven();
    203        break;
    204      case 14:
    205        nop_seven();
    206        nop_seven();
    207        break;
    208      case 15:
    209        nop_one();
    210        nop_seven();
    211        nop_seven();
    212        break;
    213      default:
    214        MOZ_CRASH("Unhandled alignment");
    215    }
    216  }
    217 
    218  // Stack operations:
    219 
    220  void push_r(RegisterID reg) {
    221    spew("push       %s", GPRegName(reg));
    222    m_formatter.oneByteOp(OP_PUSH_EAX, reg);
    223  }
    224 
    225  void pop_r(RegisterID reg) {
    226    spew("pop        %s", GPRegName(reg));
    227    m_formatter.oneByteOp(OP_POP_EAX, reg);
    228  }
    229 
    230  void push_i(int32_t imm) {
    231    spew("push       $%s0x%x", PRETTYHEX(imm));
    232    if (CAN_SIGN_EXTEND_8_32(imm)) {
    233      m_formatter.oneByteOp(OP_PUSH_Ib);
    234      m_formatter.immediate8s(imm);
    235    } else {
    236      m_formatter.oneByteOp(OP_PUSH_Iz);
    237      m_formatter.immediate32(imm);
    238    }
    239  }
    240 
    241  void push_i32(int32_t imm) {
    242    spew("push       $%s0x%04x", PRETTYHEX(imm));
    243    m_formatter.oneByteOp(OP_PUSH_Iz);
    244    m_formatter.immediate32(imm);
    245  }
    246 
    247  void push_m(int32_t offset, RegisterID base) {
    248    spew("push       " MEM_ob, ADDR_ob(offset, base));
    249    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_PUSH);
    250  }
    251  void push_m(int32_t offset, RegisterID base, RegisterID index, int scale) {
    252    spew("push       " MEM_obs, ADDR_obs(offset, base, index, scale));
    253    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, index, scale,
    254                          GROUP5_OP_PUSH);
    255  }
    256 
    257  void pop_m(int32_t offset, RegisterID base) {
    258    spew("pop        " MEM_ob, ADDR_ob(offset, base));
    259    m_formatter.oneByteOp(OP_GROUP1A_Ev, offset, base, GROUP1A_OP_POP);
    260  }
    261 
    262  void push_flags() {
    263    spew("pushf");
    264    m_formatter.oneByteOp(OP_PUSHFLAGS);
    265  }
    266 
    267  void pop_flags() {
    268    spew("popf");
    269    m_formatter.oneByteOp(OP_POPFLAGS);
    270  }
    271 
    272  // Arithmetic operations:
    273 
    274  void addl_rr(RegisterID src, RegisterID dst) {
    275    spew("addl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
    276    m_formatter.oneByteOp(OP_ADD_GvEv, src, dst);
    277  }
    278 
    279  void addw_rr(RegisterID src, RegisterID dst) {
    280    spew("addw       %s, %s", GPReg16Name(src), GPReg16Name(dst));
    281    m_formatter.prefix(PRE_OPERAND_SIZE);
    282    m_formatter.oneByteOp(OP_ADD_GvEv, src, dst);
    283  }
    284 
    285  void addl_mr(int32_t offset, RegisterID base, RegisterID dst) {
    286    spew("addl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
    287    m_formatter.oneByteOp(OP_ADD_GvEv, offset, base, dst);
    288  }
    289 
    290  void addl_rm(RegisterID src, int32_t offset, RegisterID base) {
    291    spew("addl       %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
    292    m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src);
    293  }
    294 
    295  void addl_rm(RegisterID src, int32_t offset, RegisterID base,
    296               RegisterID index, int scale) {
    297    spew("addl       %s, " MEM_obs, GPReg32Name(src),
    298         ADDR_obs(offset, base, index, scale));
    299    m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src);
    300  }
    301 
    302  void addl_ir(int32_t imm, RegisterID dst) {
    303    spew("addl       $%d, %s", imm, GPReg32Name(dst));
    304    if (CAN_SIGN_EXTEND_8_32(imm)) {
    305      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_ADD);
    306      m_formatter.immediate8s(imm);
    307    } else {
    308      if (dst == rax) {
    309        m_formatter.oneByteOp(OP_ADD_EAXIv);
    310      } else {
    311        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD);
    312      }
    313      m_formatter.immediate32(imm);
    314    }
    315  }
    316 
    317  void addw_ir(int32_t imm, RegisterID dst) {
    318    spew("addw       $%d, %s", int16_t(imm), GPReg16Name(dst));
    319    m_formatter.prefix(PRE_OPERAND_SIZE);
    320    m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD);
    321    m_formatter.immediate16(imm);
    322  }
    323 
    324  void addl_i32r(int32_t imm, RegisterID dst) {
    325    // 32-bit immediate always, for patching.
    326    spew("addl       $0x%04x, %s", uint32_t(imm), GPReg32Name(dst));
    327    if (dst == rax) {
    328      m_formatter.oneByteOp(OP_ADD_EAXIv);
    329    } else {
    330      m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_ADD);
    331    }
    332    m_formatter.immediate32(imm);
    333  }
    334 
    335  void addl_im(int32_t imm, int32_t offset, RegisterID base) {
    336    spew("addl       $%d, " MEM_ob, imm, ADDR_ob(offset, base));
    337    if (CAN_SIGN_EXTEND_8_32(imm)) {
    338      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_ADD);
    339      m_formatter.immediate8s(imm);
    340    } else {
    341      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD);
    342      m_formatter.immediate32(imm);
    343    }
    344  }
    345 
    346  void addl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    347               int scale) {
    348    spew("addl       $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
    349    if (CAN_SIGN_EXTEND_8_32(imm)) {
    350      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
    351                            GROUP1_OP_ADD);
    352      m_formatter.immediate8s(imm);
    353    } else {
    354      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
    355                            GROUP1_OP_ADD);
    356      m_formatter.immediate32(imm);
    357    }
    358  }
    359 
    360  void addl_im(int32_t imm, const void* addr) {
    361    spew("addl       $%d, %p", imm, addr);
    362    if (CAN_SIGN_EXTEND_8_32(imm)) {
    363      m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD);
    364      m_formatter.immediate8s(imm);
    365    } else {
    366      m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD);
    367      m_formatter.immediate32(imm);
    368    }
    369  }
    370  void addw_im(int32_t imm, const void* addr) {
    371    spew("addw       $%d, %p", int16_t(imm), addr);
    372    m_formatter.prefix(PRE_OPERAND_SIZE);
    373    if (CAN_SIGN_EXTEND_8_32(imm)) {
    374      m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_ADD);
    375      m_formatter.immediate8s(imm);
    376    } else {
    377      m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_ADD);
    378      m_formatter.immediate16(imm);
    379    }
    380  }
    381 
    382  void addw_im(int32_t imm, int32_t offset, RegisterID base) {
    383    spew("addw       $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base));
    384    m_formatter.prefix(PRE_OPERAND_SIZE);
    385    m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_ADD);
    386    m_formatter.immediate16(imm);
    387  }
    388 
    389  void addw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    390               int scale) {
    391    spew("addw       $%d, " MEM_obs, int16_t(imm),
    392         ADDR_obs(offset, base, index, scale));
    393    m_formatter.prefix(PRE_OPERAND_SIZE);
    394    m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
    395                          GROUP1_OP_ADD);
    396    m_formatter.immediate16(imm);
    397  }
    398 
    399  void addw_rm(RegisterID src, int32_t offset, RegisterID base) {
    400    spew("addw       %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
    401    m_formatter.prefix(PRE_OPERAND_SIZE);
    402    m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, src);
    403  }
    404 
    405  void addw_rm(RegisterID src, int32_t offset, RegisterID base,
    406               RegisterID index, int scale) {
    407    spew("addw       %s, " MEM_obs, GPReg16Name(src),
    408         ADDR_obs(offset, base, index, scale));
    409    m_formatter.prefix(PRE_OPERAND_SIZE);
    410    m_formatter.oneByteOp(OP_ADD_EvGv, offset, base, index, scale, src);
    411  }
    412 
    413  void addb_im(int32_t imm, int32_t offset, RegisterID base) {
    414    spew("addb       $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base));
    415    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_ADD);
    416    m_formatter.immediate8(imm);
    417  }
    418 
    419  void addb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    420               int scale) {
    421    spew("addb       $%d, " MEM_obs, int8_t(imm),
    422         ADDR_obs(offset, base, index, scale));
    423    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
    424                          GROUP1_OP_ADD);
    425    m_formatter.immediate8(imm);
    426  }
    427 
    428  void addb_rm(RegisterID src, int32_t offset, RegisterID base) {
    429    spew("addb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
    430    m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, src);
    431  }
    432 
    433  void addb_rm(RegisterID src, int32_t offset, RegisterID base,
    434               RegisterID index, int scale) {
    435    spew("addb       %s, " MEM_obs, GPReg8Name(src),
    436         ADDR_obs(offset, base, index, scale));
    437    m_formatter.oneByteOp8(OP_ADD_EbGb, offset, base, index, scale, src);
    438  }
    439 
    440  void subb_im(int32_t imm, int32_t offset, RegisterID base) {
    441    spew("subb       $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base));
    442    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_SUB);
    443    m_formatter.immediate8(imm);
    444  }
    445 
    446  void subb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    447               int scale) {
    448    spew("subb       $%d, " MEM_obs, int8_t(imm),
    449         ADDR_obs(offset, base, index, scale));
    450    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
    451                          GROUP1_OP_SUB);
    452    m_formatter.immediate8(imm);
    453  }
    454 
    455  void subb_rm(RegisterID src, int32_t offset, RegisterID base) {
    456    spew("subb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
    457    m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, src);
    458  }
    459 
    460  void subb_rm(RegisterID src, int32_t offset, RegisterID base,
    461               RegisterID index, int scale) {
    462    spew("subb       %s, " MEM_obs, GPReg8Name(src),
    463         ADDR_obs(offset, base, index, scale));
    464    m_formatter.oneByteOp8(OP_SUB_EbGb, offset, base, index, scale, src);
    465  }
    466 
    467  void andb_im(int32_t imm, int32_t offset, RegisterID base) {
    468    spew("andb       $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base));
    469    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_AND);
    470    m_formatter.immediate8(imm);
    471  }
    472 
    473  void andb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    474               int scale) {
    475    spew("andb       $%d, " MEM_obs, int8_t(imm),
    476         ADDR_obs(offset, base, index, scale));
    477    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
    478                          GROUP1_OP_AND);
    479    m_formatter.immediate8(imm);
    480  }
    481 
    482  void andb_rm(RegisterID src, int32_t offset, RegisterID base) {
    483    spew("andb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
    484    m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, src);
    485  }
    486 
    487  void andb_rm(RegisterID src, int32_t offset, RegisterID base,
    488               RegisterID index, int scale) {
    489    spew("andb       %s, " MEM_obs, GPReg8Name(src),
    490         ADDR_obs(offset, base, index, scale));
    491    m_formatter.oneByteOp8(OP_AND_EbGb, offset, base, index, scale, src);
    492  }
    493 
    494  void orb_im(int32_t imm, int32_t offset, RegisterID base) {
    495    spew("orb       $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base));
    496    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_OR);
    497    m_formatter.immediate8(imm);
    498  }
    499 
    500  void orb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    501              int scale) {
    502    spew("orb        $%d, " MEM_obs, int8_t(imm),
    503         ADDR_obs(offset, base, index, scale));
    504    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
    505                          GROUP1_OP_OR);
    506    m_formatter.immediate8(imm);
    507  }
    508 
    509  void orb_rm(RegisterID src, int32_t offset, RegisterID base) {
    510    spew("orb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
    511    m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, src);
    512  }
    513 
    514  void orb_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index,
    515              int scale) {
    516    spew("orb        %s, " MEM_obs, GPReg8Name(src),
    517         ADDR_obs(offset, base, index, scale));
    518    m_formatter.oneByteOp8(OP_OR_EbGb, offset, base, index, scale, src);
    519  }
    520 
    521  void xorb_im(int32_t imm, int32_t offset, RegisterID base) {
    522    spew("xorb       $%d, " MEM_ob, int8_t(imm), ADDR_ob(offset, base));
    523    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_XOR);
    524    m_formatter.immediate8(imm);
    525  }
    526 
    527  void xorb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
    528               int scale) {
    529    spew("xorb       $%d, " MEM_obs, int8_t(imm),
    530         ADDR_obs(offset, base, index, scale));
    531    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
    532                          GROUP1_OP_XOR);
    533    m_formatter.immediate8(imm);
    534  }
    535 
    536  void xorb_rm(RegisterID src, int32_t offset, RegisterID base) {
    537    spew("xorb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
    538    m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, src);
    539  }
    540 
    541  void xorb_rm(RegisterID src, int32_t offset, RegisterID base,
    542               RegisterID index, int scale) {
    543    spew("xorb       %s, " MEM_obs, GPReg8Name(src),
    544         ADDR_obs(offset, base, index, scale));
    545    m_formatter.oneByteOp8(OP_XOR_EbGb, offset, base, index, scale, src);
    546  }
    547 
    548  void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base) {
    549    spew("lock xaddb %s, " MEM_ob, GPReg8Name(srcdest), ADDR_ob(offset, base));
    550    m_formatter.oneByteOp(PRE_LOCK);
    551    m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, srcdest);
    552  }
    553 
    554  void lock_xaddb_rm(RegisterID srcdest, int32_t offset, RegisterID base,
    555                     RegisterID index, int scale) {
    556    spew("lock xaddb %s, " MEM_obs, GPReg8Name(srcdest),
    557         ADDR_obs(offset, base, index, scale));
    558    m_formatter.oneByteOp(PRE_LOCK);
    559    m_formatter.twoByteOp8(OP2_XADD_EbGb, offset, base, index, scale, srcdest);
    560  }
    561 
    562  void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base) {
    563    spew("lock xaddl %s, " MEM_ob, GPReg32Name(srcdest), ADDR_ob(offset, base));
    564    m_formatter.oneByteOp(PRE_LOCK);
    565    m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, srcdest);
    566  }
    567 
    568  void lock_xaddl_rm(RegisterID srcdest, int32_t offset, RegisterID base,
    569                     RegisterID index, int scale) {
    570    spew("lock xaddl %s, " MEM_obs, GPReg32Name(srcdest),
    571         ADDR_obs(offset, base, index, scale));
    572    m_formatter.oneByteOp(PRE_LOCK);
    573    m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest);
    574  }
    575 
    576  void vpmaddubsw_rr(XMMRegisterID src1, XMMRegisterID src0,
    577                     XMMRegisterID dst) {
    578    threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38, src1,
    579                    src0, dst);
    580  }
    581  void vpmaddubsw_mr(const void* address, XMMRegisterID src0,
    582                     XMMRegisterID dst) {
    583    threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38,
    584                    address, src0, dst);
    585  }
    586 
    587  void vpaddb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    588    twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, src1, src0, dst);
    589  }
    590  void vpaddb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    591                 XMMRegisterID dst) {
    592    twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, offset, base, src0, dst);
    593  }
    594  void vpaddb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    595    twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, address, src0, dst);
    596  }
    597 
    598  void vpaddsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    599    twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, src1, src0, dst);
    600  }
    601  void vpaddsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    602                  XMMRegisterID dst) {
    603    twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, offset, base, src0,
    604                  dst);
    605  }
    606  void vpaddsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    607    twoByteOpSimd("vpaddsb", VEX_PD, OP2_PADDSB_VdqWdq, address, src0, dst);
    608  }
    609 
    610  void vpaddusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    611    twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, src1, src0, dst);
    612  }
    613  void vpaddusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    614                   XMMRegisterID dst) {
    615    twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, offset, base, src0,
    616                  dst);
    617  }
    618  void vpaddusb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    619    twoByteOpSimd("vpaddusb", VEX_PD, OP2_PADDUSB_VdqWdq, address, src0, dst);
    620  }
    621 
    622  void vpaddw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    623    twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, src1, src0, dst);
    624  }
    625  void vpaddw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    626                 XMMRegisterID dst) {
    627    twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, offset, base, src0, dst);
    628  }
    629  void vpaddw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    630    twoByteOpSimd("vpaddw", VEX_PD, OP2_PADDW_VdqWdq, address, src0, dst);
    631  }
    632 
    633  void vpaddsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    634    twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, src1, src0, dst);
    635  }
    636  void vpaddsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    637                  XMMRegisterID dst) {
    638    twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, offset, base, src0,
    639                  dst);
    640  }
    641  void vpaddsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    642    twoByteOpSimd("vpaddsw", VEX_PD, OP2_PADDSW_VdqWdq, address, src0, dst);
    643  }
    644 
    645  void vpaddusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    646    twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, src1, src0, dst);
    647  }
    648  void vpaddusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    649                   XMMRegisterID dst) {
    650    twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, offset, base, src0,
    651                  dst);
    652  }
    653  void vpaddusw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    654    twoByteOpSimd("vpaddusw", VEX_PD, OP2_PADDUSW_VdqWdq, address, src0, dst);
    655  }
    656 
    657  void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    658    twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst);
    659  }
    660  void vpaddd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    661                 XMMRegisterID dst) {
    662    twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst);
    663  }
    664  void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    665    twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst);
    666  }
    667 
    668  void vpaddq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    669    twoByteOpSimd("vpaddq", VEX_PD, OP2_PADDQ_VdqWdq, address, src0, dst);
    670  }
    671 
    672  void vpsubb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    673    twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, src1, src0, dst);
    674  }
    675  void vpsubb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    676                 XMMRegisterID dst) {
    677    twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, offset, base, src0, dst);
    678  }
    679  void vpsubb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    680    twoByteOpSimd("vpsubb", VEX_PD, OP2_PSUBB_VdqWdq, address, src0, dst);
    681  }
    682 
    683  void vpsubsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    684    twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, src1, src0, dst);
    685  }
    686  void vpsubsb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    687                  XMMRegisterID dst) {
    688    twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, offset, base, src0,
    689                  dst);
    690  }
    691  void vpsubsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    692    twoByteOpSimd("vpsubsb", VEX_PD, OP2_PSUBSB_VdqWdq, address, src0, dst);
    693  }
    694 
    695  void vpsubusb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    696    twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, src1, src0, dst);
    697  }
    698  void vpsubusb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    699                   XMMRegisterID dst) {
    700    twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, offset, base, src0,
    701                  dst);
    702  }
    703  void vpsubusb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    704    twoByteOpSimd("vpsubusb", VEX_PD, OP2_PSUBUSB_VdqWdq, address, src0, dst);
    705  }
    706 
    707  void vpsubw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    708    twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, src1, src0, dst);
    709  }
    710  void vpsubw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    711                 XMMRegisterID dst) {
    712    twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, offset, base, src0, dst);
    713  }
    714  void vpsubw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    715    twoByteOpSimd("vpsubw", VEX_PD, OP2_PSUBW_VdqWdq, address, src0, dst);
    716  }
    717 
    718  void vpsubsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    719    twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, src1, src0, dst);
    720  }
    721  void vpsubsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    722                  XMMRegisterID dst) {
    723    twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, offset, base, src0,
    724                  dst);
    725  }
    726  void vpsubsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    727    twoByteOpSimd("vpsubsw", VEX_PD, OP2_PSUBSW_VdqWdq, address, src0, dst);
    728  }
    729 
    730  void vpsubusw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    731    twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, src1, src0, dst);
    732  }
    733  void vpsubusw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    734                   XMMRegisterID dst) {
    735    twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, offset, base, src0,
    736                  dst);
    737  }
    738  void vpsubusw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    739    twoByteOpSimd("vpsubusw", VEX_PD, OP2_PSUBUSW_VdqWdq, address, src0, dst);
    740  }
    741 
    742  void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    743    twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst);
    744  }
    745  void vpsubd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    746                 XMMRegisterID dst) {
    747    twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst);
    748  }
    749  void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    750    twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst);
    751  }
    752 
    753  void vpsubq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    754    twoByteOpSimd("vpsubq", VEX_PD, OP2_PSUBQ_VdqWdq, address, src0, dst);
    755  }
    756 
    757  void vpmuldq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    758    threeByteOpSimd("vpmuldq", VEX_PD, OP3_PMULDQ_VdqWdq, ESCAPE_38, src1, src0,
    759                    dst);
    760  }
    761 
    762  void vpmuludq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    763    twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, src1, src0, dst);
    764  }
    765  void vpmuludq_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    766                   XMMRegisterID dst) {
    767    twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, offset, base, src0,
    768                  dst);
    769  }
    770  void vpmuludq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    771    twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, address, src0, dst);
    772  }
    773 
    774  void vpmaddwd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    775    twoByteOpSimd("vpmaddwd", VEX_PD, OP2_PMADDWD_VdqWdq, src1, src0, dst);
    776  }
    777  void vpmaddwd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    778    twoByteOpSimd("vpmaddwd", VEX_PD, OP2_PMADDWD_VdqWdq, address, src0, dst);
    779  }
    780 
    781  void vpmullw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    782    twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, src1, src0, dst);
    783  }
    784  void vpmulhw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    785    twoByteOpSimd("vpmulhw", VEX_PD, OP2_PMULHW_VdqWdq, src1, src0, dst);
    786  }
    787  void vpmulhuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    788    twoByteOpSimd("vpmulhuw", VEX_PD, OP2_PMULHUW_VdqWdq, src1, src0, dst);
    789  }
    790  void vpmullw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    791                  XMMRegisterID dst) {
    792    twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, offset, base, src0,
    793                  dst);
    794  }
    795  void vpmulhw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    796                  XMMRegisterID dst) {
    797    twoByteOpSimd("vpmulhw", VEX_PD, OP2_PMULHW_VdqWdq, offset, base, src0,
    798                  dst);
    799  }
    800  void vpmulhuw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    801                   XMMRegisterID dst) {
    802    twoByteOpSimd("vpmulhuw", VEX_PD, OP2_PMULHUW_VdqWdq, offset, base, src0,
    803                  dst);
    804  }
    805  void vpmullw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    806    twoByteOpSimd("vpmullw", VEX_PD, OP2_PMULLW_VdqWdq, address, src0, dst);
    807  }
    808 
    809  void vpmulld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    810    threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, src1, src0,
    811                    dst);
    812  }
    813  void vpmulld_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    814                  XMMRegisterID dst) {
    815    threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, offset,
    816                    base, src0, dst);
    817  }
    818  void vpmulld_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    819    threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
    820                    src0, dst);
    821  }
    822  void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    823    threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1,
    824                    src0, dst);
    825  }
    826  void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    827                    XMMRegisterID dst) {
    828    threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset,
    829                    base, src0, dst);
    830  }
    831 
    832  void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    833    twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
    834  }
    835  void vaddps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    836                 XMMRegisterID dst) {
    837    twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, offset, base, src0, dst);
    838  }
    839  void vaddps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    840    twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, address, src0, dst);
    841  }
    842 
    843  void vsubps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    844    twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, src1, src0, dst);
    845  }
    846  void vsubps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    847                 XMMRegisterID dst) {
    848    twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, offset, base, src0, dst);
    849  }
    850  void vsubps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    851    twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, address, src0, dst);
    852  }
    853 
    854  void vmulps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    855    twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, src1, src0, dst);
    856  }
    857  void vmulps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    858                 XMMRegisterID dst) {
    859    twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, offset, base, src0, dst);
    860  }
    861  void vmulps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    862    twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, address, src0, dst);
    863  }
    864 
    865  void vdivps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    866    twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, src1, src0, dst);
    867  }
    868  void vdivps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    869                 XMMRegisterID dst) {
    870    twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, offset, base, src0, dst);
    871  }
    872  void vdivps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    873    twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, address, src0, dst);
    874  }
    875 
    876  void vmaxps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    877    twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, src1, src0, dst);
    878  }
    879  void vmaxps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    880                 XMMRegisterID dst) {
    881    twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, offset, base, src0, dst);
    882  }
    883  void vmaxps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    884    twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, address, src0, dst);
    885  }
    886 
    887  void vmaxpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    888    twoByteOpSimd("vmaxpd", VEX_PD, OP2_MAXPD_VpdWpd, src1, src0, dst);
    889  }
    890 
    891  void vminps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    892    twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, src1, src0, dst);
    893  }
    894  void vminps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
    895                 XMMRegisterID dst) {
    896    twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, offset, base, src0, dst);
    897  }
    898  void vminps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    899    twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, address, src0, dst);
    900  }
    901 
    902  void vminpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    903    twoByteOpSimd("vminpd", VEX_PD, OP2_MINPD_VpdWpd, src1, src0, dst);
    904  }
    905  void vminpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    906    twoByteOpSimd("vminpd", VEX_PD, OP2_MINPD_VpdWpd, address, src0, dst);
    907  }
    908 
    909  void vaddpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    910    twoByteOpSimd("vaddpd", VEX_PD, OP2_ADDPD_VpdWpd, src1, src0, dst);
    911  }
    912  void vaddpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    913    twoByteOpSimd("vaddpd", VEX_PD, OP2_ADDPD_VpdWpd, address, src0, dst);
    914  }
    915 
    916  void vsubpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    917    twoByteOpSimd("vsubpd", VEX_PD, OP2_SUBPD_VpdWpd, src1, src0, dst);
    918  }
    919  void vsubpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    920    twoByteOpSimd("vsubpd", VEX_PD, OP2_SUBPD_VpdWpd, address, src0, dst);
    921  }
    922 
    923  void vmulpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    924    twoByteOpSimd("vmulpd", VEX_PD, OP2_MULPD_VpdWpd, src1, src0, dst);
    925  }
    926  void vmulpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    927    twoByteOpSimd("vmulpd", VEX_PD, OP2_MULPD_VpdWpd, address, src0, dst);
    928  }
    929 
    930  void vdivpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    931    twoByteOpSimd("vdivpd", VEX_PD, OP2_DIVPD_VpdWpd, src1, src0, dst);
    932  }
    933  void vdivpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
    934    twoByteOpSimd("vdivpd", VEX_PD, OP2_DIVPD_VpdWpd, address, src0, dst);
    935  }
    936 
    937  void andl_rr(RegisterID src, RegisterID dst) {
    938    spew("andl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
    939    m_formatter.oneByteOp(OP_AND_GvEv, src, dst);
    940  }
    941 
    942  void andw_rr(RegisterID src, RegisterID dst) {
    943    spew("andw       %s, %s", GPReg16Name(src), GPReg16Name(dst));
    944    m_formatter.prefix(PRE_OPERAND_SIZE);
    945    m_formatter.oneByteOp(OP_AND_GvEv, src, dst);
    946  }
    947 
    948  void andl_mr(int32_t offset, RegisterID base, RegisterID dst) {
    949    spew("andl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
    950    m_formatter.oneByteOp(OP_AND_GvEv, offset, base, dst);
    951  }
    952 
    953  void andl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
    954               RegisterID dst) {
    955    spew("andl       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
    956         GPReg32Name(dst));
    957    m_formatter.oneByteOp(OP_AND_GvEv, offset, base, index, scale, dst);
    958  }
    959 
    960  void andl_rm(RegisterID src, int32_t offset, RegisterID base) {
    961    spew("andl       %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
    962    m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src);
    963  }
    964 
    965  void andw_rm(RegisterID src, int32_t offset, RegisterID base) {
    966    spew("andw       %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
    967    m_formatter.prefix(PRE_OPERAND_SIZE);
    968    m_formatter.oneByteOp(OP_AND_EvGv, offset, base, src);
    969  }
    970 
    971  void andl_rm(RegisterID src, int32_t offset, RegisterID base,
    972               RegisterID index, int scale) {
    973    spew("andl       %s, " MEM_obs, GPReg32Name(src),
    974         ADDR_obs(offset, base, index, scale));
    975    m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src);
    976  }
    977 
    978  void andw_rm(RegisterID src, int32_t offset, RegisterID base,
    979               RegisterID index, int scale) {
    980    spew("andw       %s, " MEM_obs, GPReg16Name(src),
    981         ADDR_obs(offset, base, index, scale));
    982    m_formatter.prefix(PRE_OPERAND_SIZE);
    983    m_formatter.oneByteOp(OP_AND_EvGv, offset, base, index, scale, src);
    984  }
    985 
    986  void andl_ir(int32_t imm, RegisterID dst) {
    987    spew("andl       $0x%x, %s", uint32_t(imm), GPReg32Name(dst));
    988    if (CAN_SIGN_EXTEND_8_32(imm)) {
    989      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND);
    990      m_formatter.immediate8s(imm);
    991    } else {
    992      if (dst == rax) {
    993        m_formatter.oneByteOp(OP_AND_EAXIv);
    994      } else {
    995        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND);
    996      }
    997      m_formatter.immediate32(imm);
    998    }
    999  }
   1000 
   1001  void andw_ir(int32_t imm, RegisterID dst) {
   1002    spew("andw       $0x%x, %s", uint16_t(imm), GPReg16Name(dst));
   1003    m_formatter.prefix(PRE_OPERAND_SIZE);
   1004    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1005      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_AND);
   1006      m_formatter.immediate8s(imm);
   1007    } else {
   1008      if (dst == rax) {
   1009        m_formatter.oneByteOp(OP_AND_EAXIv);
   1010      } else {
   1011        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_AND);
   1012      }
   1013      m_formatter.immediate16(imm);
   1014    }
   1015  }
   1016 
   1017  void andl_im(int32_t imm, int32_t offset, RegisterID base) {
   1018    spew("andl       $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   1019    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1020      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND);
   1021      m_formatter.immediate8s(imm);
   1022    } else {
   1023      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND);
   1024      m_formatter.immediate32(imm);
   1025    }
   1026  }
   1027 
   1028  void andw_im(int32_t imm, int32_t offset, RegisterID base) {
   1029    spew("andw       $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base));
   1030    m_formatter.prefix(PRE_OPERAND_SIZE);
   1031    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1032      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_AND);
   1033      m_formatter.immediate8s(imm);
   1034    } else {
   1035      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_AND);
   1036      m_formatter.immediate16(imm);
   1037    }
   1038  }
   1039 
   1040  void andl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1041               int scale) {
   1042    spew("andl       $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
   1043    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1044      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1045                            GROUP1_OP_AND);
   1046      m_formatter.immediate8s(imm);
   1047    } else {
   1048      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1049                            GROUP1_OP_AND);
   1050      m_formatter.immediate32(imm);
   1051    }
   1052  }
   1053 
   1054  void andw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1055               int scale) {
   1056    spew("andw       $%d, " MEM_obs, int16_t(imm),
   1057         ADDR_obs(offset, base, index, scale));
   1058    m_formatter.prefix(PRE_OPERAND_SIZE);
   1059    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1060      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1061                            GROUP1_OP_AND);
   1062      m_formatter.immediate8s(imm);
   1063    } else {
   1064      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1065                            GROUP1_OP_AND);
   1066      m_formatter.immediate16(imm);
   1067    }
   1068  }
   1069 
   1070  void fld_m(int32_t offset, RegisterID base) {
   1071    spew("fld        " MEM_ob, ADDR_ob(offset, base));
   1072    m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FLD);
   1073  }
   1074  void fld32_m(int32_t offset, RegisterID base) {
   1075    spew("fld        " MEM_ob, ADDR_ob(offset, base));
   1076    m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLD);
   1077  }
   1078  void faddp() {
   1079    spew("addp       ");
   1080    m_formatter.oneByteOp(OP_FPU6_ADDP);
   1081    m_formatter.oneByteOp(OP_ADDP_ST0_ST1);
   1082  }
   1083  void fisttp_m(int32_t offset, RegisterID base) {
   1084    spew("fisttp     " MEM_ob, ADDR_ob(offset, base));
   1085    m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTTP);
   1086  }
   1087  void fistp_m(int32_t offset, RegisterID base) {
   1088    spew("fistp      " MEM_ob, ADDR_ob(offset, base));
   1089    m_formatter.oneByteOp(OP_FILD, offset, base, FPU6_OP_FISTP);
   1090  }
   1091  void fstp_m(int32_t offset, RegisterID base) {
   1092    spew("fstp       " MEM_ob, ADDR_ob(offset, base));
   1093    m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FSTP);
   1094  }
   1095  void fstp32_m(int32_t offset, RegisterID base) {
   1096    spew("fstp32     " MEM_ob, ADDR_ob(offset, base));
   1097    m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FSTP);
   1098  }
   1099  void fnstcw_m(int32_t offset, RegisterID base) {
   1100    spew("fnstcw     " MEM_ob, ADDR_ob(offset, base));
   1101    m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FISTP);
   1102  }
   1103  void fldcw_m(int32_t offset, RegisterID base) {
   1104    spew("fldcw      " MEM_ob, ADDR_ob(offset, base));
   1105    m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLDCW);
   1106  }
   1107  void fnstsw_m(int32_t offset, RegisterID base) {
   1108    spew("fnstsw     " MEM_ob, ADDR_ob(offset, base));
   1109    m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTP);
   1110  }
   1111 
   1112  void negl_r(RegisterID dst) {
   1113    spew("negl       %s", GPReg32Name(dst));
   1114    m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NEG);
   1115  }
   1116 
   1117  void negl_m(int32_t offset, RegisterID base) {
   1118    spew("negl       " MEM_ob, ADDR_ob(offset, base));
   1119    m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NEG);
   1120  }
   1121 
   1122  void notl_r(RegisterID dst) {
   1123    spew("notl       %s", GPReg32Name(dst));
   1124    m_formatter.oneByteOp(OP_GROUP3_Ev, dst, GROUP3_OP_NOT);
   1125  }
   1126 
   1127  void notl_m(int32_t offset, RegisterID base) {
   1128    spew("notl       " MEM_ob, ADDR_ob(offset, base));
   1129    m_formatter.oneByteOp(OP_GROUP3_Ev, offset, base, GROUP3_OP_NOT);
   1130  }
   1131 
   1132  void orl_rr(RegisterID src, RegisterID dst) {
   1133    spew("orl        %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1134    m_formatter.oneByteOp(OP_OR_GvEv, src, dst);
   1135  }
   1136 
   1137  void orw_rr(RegisterID src, RegisterID dst) {
   1138    spew("orw        %s, %s", GPReg16Name(src), GPReg16Name(dst));
   1139    m_formatter.prefix(PRE_OPERAND_SIZE);
   1140    m_formatter.oneByteOp(OP_OR_GvEv, src, dst);
   1141  }
   1142 
   1143  void orl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   1144    spew("orl        " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   1145    m_formatter.oneByteOp(OP_OR_GvEv, offset, base, dst);
   1146  }
   1147 
   1148  void orl_rm(RegisterID src, int32_t offset, RegisterID base) {
   1149    spew("orl        %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   1150    m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src);
   1151  }
   1152 
   1153  void orw_rm(RegisterID src, int32_t offset, RegisterID base) {
   1154    spew("orw        %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   1155    m_formatter.prefix(PRE_OPERAND_SIZE);
   1156    m_formatter.oneByteOp(OP_OR_EvGv, offset, base, src);
   1157  }
   1158 
   1159  void orl_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index,
   1160              int scale) {
   1161    spew("orl        %s, " MEM_obs, GPReg32Name(src),
   1162         ADDR_obs(offset, base, index, scale));
   1163    m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src);
   1164  }
   1165 
   1166  void orw_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index,
   1167              int scale) {
   1168    spew("orw        %s, " MEM_obs, GPReg16Name(src),
   1169         ADDR_obs(offset, base, index, scale));
   1170    m_formatter.prefix(PRE_OPERAND_SIZE);
   1171    m_formatter.oneByteOp(OP_OR_EvGv, offset, base, index, scale, src);
   1172  }
   1173 
   1174  void orl_ir(int32_t imm, RegisterID dst) {
   1175    spew("orl        $0x%x, %s", uint32_t(imm), GPReg32Name(dst));
   1176    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1177      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR);
   1178      m_formatter.immediate8s(imm);
   1179    } else {
   1180      if (dst == rax) {
   1181        m_formatter.oneByteOp(OP_OR_EAXIv);
   1182      } else {
   1183        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR);
   1184      }
   1185      m_formatter.immediate32(imm);
   1186    }
   1187  }
   1188 
   1189  void orw_ir(int32_t imm, RegisterID dst) {
   1190    spew("orw        $0x%x, %s", uint16_t(imm), GPReg16Name(dst));
   1191    m_formatter.prefix(PRE_OPERAND_SIZE);
   1192    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1193      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_OR);
   1194      m_formatter.immediate8s(imm);
   1195    } else {
   1196      if (dst == rax) {
   1197        m_formatter.oneByteOp(OP_OR_EAXIv);
   1198      } else {
   1199        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_OR);
   1200      }
   1201      m_formatter.immediate16(imm);
   1202    }
   1203  }
   1204 
   1205  void orl_im(int32_t imm, int32_t offset, RegisterID base) {
   1206    spew("orl        $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   1207    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1208      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR);
   1209      m_formatter.immediate8s(imm);
   1210    } else {
   1211      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR);
   1212      m_formatter.immediate32(imm);
   1213    }
   1214  }
   1215 
   1216  void orw_im(int32_t imm, int32_t offset, RegisterID base) {
   1217    spew("orw        $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base));
   1218    m_formatter.prefix(PRE_OPERAND_SIZE);
   1219    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1220      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_OR);
   1221      m_formatter.immediate8s(imm);
   1222    } else {
   1223      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_OR);
   1224      m_formatter.immediate16(imm);
   1225    }
   1226  }
   1227 
   1228  void orl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1229              int scale) {
   1230    spew("orl        $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
   1231    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1232      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1233                            GROUP1_OP_OR);
   1234      m_formatter.immediate8s(imm);
   1235    } else {
   1236      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1237                            GROUP1_OP_OR);
   1238      m_formatter.immediate32(imm);
   1239    }
   1240  }
   1241 
   1242  void orw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1243              int scale) {
   1244    spew("orw        $%d, " MEM_obs, int16_t(imm),
   1245         ADDR_obs(offset, base, index, scale));
   1246    m_formatter.prefix(PRE_OPERAND_SIZE);
   1247    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1248      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1249                            GROUP1_OP_OR);
   1250      m_formatter.immediate8s(imm);
   1251    } else {
   1252      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1253                            GROUP1_OP_OR);
   1254      m_formatter.immediate16(imm);
   1255    }
   1256  }
   1257 
   1258  void sbbl_rr(RegisterID src, RegisterID dst) {
   1259    spew("sbbl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1260    m_formatter.oneByteOp(OP_SBB_GvEv, src, dst);
   1261  }
   1262 
   1263  void subl_rr(RegisterID src, RegisterID dst) {
   1264    spew("subl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1265    m_formatter.oneByteOp(OP_SUB_GvEv, src, dst);
   1266  }
   1267 
   1268  void subw_rr(RegisterID src, RegisterID dst) {
   1269    spew("subw       %s, %s", GPReg16Name(src), GPReg16Name(dst));
   1270    m_formatter.prefix(PRE_OPERAND_SIZE);
   1271    m_formatter.oneByteOp(OP_SUB_GvEv, src, dst);
   1272  }
   1273 
   1274  void subl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   1275    spew("subl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   1276    m_formatter.oneByteOp(OP_SUB_GvEv, offset, base, dst);
   1277  }
   1278 
   1279  void subl_rm(RegisterID src, int32_t offset, RegisterID base) {
   1280    spew("subl       %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   1281    m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src);
   1282  }
   1283 
   1284  void subw_rm(RegisterID src, int32_t offset, RegisterID base) {
   1285    spew("subw       %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   1286    m_formatter.prefix(PRE_OPERAND_SIZE);
   1287    m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, src);
   1288  }
   1289 
   1290  void subl_rm(RegisterID src, int32_t offset, RegisterID base,
   1291               RegisterID index, int scale) {
   1292    spew("subl       %s, " MEM_obs, GPReg32Name(src),
   1293         ADDR_obs(offset, base, index, scale));
   1294    m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src);
   1295  }
   1296 
   1297  void subw_rm(RegisterID src, int32_t offset, RegisterID base,
   1298               RegisterID index, int scale) {
   1299    spew("subw       %s, " MEM_obs, GPReg16Name(src),
   1300         ADDR_obs(offset, base, index, scale));
   1301    m_formatter.prefix(PRE_OPERAND_SIZE);
   1302    m_formatter.oneByteOp(OP_SUB_EvGv, offset, base, index, scale, src);
   1303  }
   1304 
   1305  size_t subl_ir(int32_t imm, RegisterID dst) {
   1306    spew("subl       $%d, %s", imm, GPReg32Name(dst));
   1307    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1308      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB);
   1309      m_formatter.immediate8s(imm);
   1310      return 1;
   1311    } else {
   1312      if (dst == rax) {
   1313        m_formatter.oneByteOp(OP_SUB_EAXIv);
   1314      } else {
   1315        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB);
   1316      }
   1317      m_formatter.immediate32(imm);
   1318      return 4;
   1319    }
   1320  }
   1321 
   1322  void subw_ir(int32_t imm, RegisterID dst) {
   1323    spew("subw       $%d, %s", int16_t(imm), GPReg16Name(dst));
   1324    m_formatter.prefix(PRE_OPERAND_SIZE);
   1325    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1326      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SUB);
   1327      m_formatter.immediate8s(imm);
   1328    } else {
   1329      if (dst == rax) {
   1330        m_formatter.oneByteOp(OP_SUB_EAXIv);
   1331      } else {
   1332        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SUB);
   1333      }
   1334      m_formatter.immediate16(imm);
   1335    }
   1336  }
   1337 
   1338  size_t subl_im(int32_t imm, int32_t offset, RegisterID base) {
   1339    spew("subl       $%d, " MEM_ob, imm, ADDR_ob(offset, base));
   1340    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1341      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB);
   1342      m_formatter.immediate8s(imm);
   1343      return 1;
   1344    } else {
   1345      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB);
   1346      m_formatter.immediate32(imm);
   1347      return 4;
   1348    }
   1349  }
   1350 
   1351  void subw_im(int32_t imm, int32_t offset, RegisterID base) {
   1352    spew("subw       $%d, " MEM_ob, int16_t(imm), ADDR_ob(offset, base));
   1353    m_formatter.prefix(PRE_OPERAND_SIZE);
   1354    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1355      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_SUB);
   1356      m_formatter.immediate8s(imm);
   1357    } else {
   1358      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_SUB);
   1359      m_formatter.immediate16(imm);
   1360    }
   1361  }
   1362 
   1363  size_t subl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1364                 int scale) {
   1365    spew("subl       $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
   1366    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1367      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1368                            GROUP1_OP_SUB);
   1369      m_formatter.immediate8s(imm);
   1370      return 1;
   1371    } else {
   1372      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1373                            GROUP1_OP_SUB);
   1374      m_formatter.immediate32(imm);
   1375      return 1;
   1376    }
   1377  }
   1378 
   1379  void subw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1380               int scale) {
   1381    spew("subw       $%d, " MEM_obs, int16_t(imm),
   1382         ADDR_obs(offset, base, index, scale));
   1383    m_formatter.prefix(PRE_OPERAND_SIZE);
   1384    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1385      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1386                            GROUP1_OP_SUB);
   1387      m_formatter.immediate8s(imm);
   1388    } else {
   1389      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1390                            GROUP1_OP_SUB);
   1391      m_formatter.immediate16(imm);
   1392    }
   1393  }
   1394 
   1395  void xorl_rr(RegisterID src, RegisterID dst) {
   1396    spew("xorl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1397    m_formatter.oneByteOp(OP_XOR_GvEv, src, dst);
   1398  }
   1399 
   1400  void xorw_rr(RegisterID src, RegisterID dst) {
   1401    spew("xorw       %s, %s", GPReg16Name(src), GPReg16Name(dst));
   1402    m_formatter.prefix(PRE_OPERAND_SIZE);
   1403    m_formatter.oneByteOp(OP_XOR_GvEv, src, dst);
   1404  }
   1405 
   1406  void xorl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   1407    spew("xorl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   1408    m_formatter.oneByteOp(OP_XOR_GvEv, offset, base, dst);
   1409  }
   1410 
   1411  void xorl_rm(RegisterID src, int32_t offset, RegisterID base) {
   1412    spew("xorl       %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   1413    m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src);
   1414  }
   1415 
   1416  void xorw_rm(RegisterID src, int32_t offset, RegisterID base) {
   1417    spew("xorw       %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   1418    m_formatter.prefix(PRE_OPERAND_SIZE);
   1419    m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, src);
   1420  }
   1421 
   1422  void xorl_rm(RegisterID src, int32_t offset, RegisterID base,
   1423               RegisterID index, int scale) {
   1424    spew("xorl       %s, " MEM_obs, GPReg32Name(src),
   1425         ADDR_obs(offset, base, index, scale));
   1426    m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src);
   1427  }
   1428 
   1429  void xorw_rm(RegisterID src, int32_t offset, RegisterID base,
   1430               RegisterID index, int scale) {
   1431    spew("xorw       %s, " MEM_obs, GPReg16Name(src),
   1432         ADDR_obs(offset, base, index, scale));
   1433    m_formatter.prefix(PRE_OPERAND_SIZE);
   1434    m_formatter.oneByteOp(OP_XOR_EvGv, offset, base, index, scale, src);
   1435  }
   1436 
   1437  void xorl_im(int32_t imm, int32_t offset, RegisterID base) {
   1438    spew("xorl       $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   1439    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1440      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR);
   1441      m_formatter.immediate8s(imm);
   1442    } else {
   1443      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR);
   1444      m_formatter.immediate32(imm);
   1445    }
   1446  }
   1447 
   1448  void xorw_im(int32_t imm, int32_t offset, RegisterID base) {
   1449    spew("xorw       $0x%x, " MEM_ob, uint16_t(imm), ADDR_ob(offset, base));
   1450    m_formatter.prefix(PRE_OPERAND_SIZE);
   1451    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1452      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_XOR);
   1453      m_formatter.immediate8s(imm);
   1454    } else {
   1455      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_XOR);
   1456      m_formatter.immediate16(imm);
   1457    }
   1458  }
   1459 
   1460  void xorl_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1461               int scale) {
   1462    spew("xorl       $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
   1463    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1464      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1465                            GROUP1_OP_XOR);
   1466      m_formatter.immediate8s(imm);
   1467    } else {
   1468      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1469                            GROUP1_OP_XOR);
   1470      m_formatter.immediate32(imm);
   1471    }
   1472  }
   1473 
   1474  void xorw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   1475               int scale) {
   1476    spew("xorw       $%d, " MEM_obs, int16_t(imm),
   1477         ADDR_obs(offset, base, index, scale));
   1478    m_formatter.prefix(PRE_OPERAND_SIZE);
   1479    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1480      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1481                            GROUP1_OP_XOR);
   1482      m_formatter.immediate8s(imm);
   1483    } else {
   1484      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1485                            GROUP1_OP_XOR);
   1486      m_formatter.immediate16(imm);
   1487    }
   1488  }
   1489 
   1490  void xorl_ir(int32_t imm, RegisterID dst) {
   1491    spew("xorl       $%d, %s", imm, GPReg32Name(dst));
   1492    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1493      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR);
   1494      m_formatter.immediate8s(imm);
   1495    } else {
   1496      if (dst == rax) {
   1497        m_formatter.oneByteOp(OP_XOR_EAXIv);
   1498      } else {
   1499        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR);
   1500      }
   1501      m_formatter.immediate32(imm);
   1502    }
   1503  }
   1504 
   1505  void xorw_ir(int32_t imm, RegisterID dst) {
   1506    spew("xorw       $%d, %s", int16_t(imm), GPReg16Name(dst));
   1507    m_formatter.prefix(PRE_OPERAND_SIZE);
   1508    if (CAN_SIGN_EXTEND_8_32(imm)) {
   1509      m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_XOR);
   1510      m_formatter.immediate8s(imm);
   1511    } else {
   1512      if (dst == rax) {
   1513        m_formatter.oneByteOp(OP_XOR_EAXIv);
   1514      } else {
   1515        m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_XOR);
   1516      }
   1517      m_formatter.immediate16(imm);
   1518    }
   1519  }
   1520 
   1521  void bswapl_r(RegisterID dst) {
   1522    spew("bswap      %s", GPReg32Name(dst));
   1523    m_formatter.twoByteOp(OP2_BSWAP, dst);
   1524  }
   1525 
   1526  void sarl_ir(int32_t imm, RegisterID dst) {
   1527    MOZ_ASSERT(imm < 32);
   1528    spew("sarl       $%d, %s", imm, GPReg32Name(dst));
   1529    if (imm == 1) {
   1530      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SAR);
   1531    } else {
   1532      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SAR);
   1533      m_formatter.immediate8u(imm);
   1534    }
   1535  }
   1536 
   1537  void sarl_CLr(RegisterID dst) {
   1538    spew("sarl       %%cl, %s", GPReg32Name(dst));
   1539    m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SAR);
   1540  }
   1541 
   1542  void shrl_ir(int32_t imm, RegisterID dst) {
   1543    MOZ_ASSERT(imm < 32);
   1544    spew("shrl       $%d, %s", imm, GPReg32Name(dst));
   1545    if (imm == 1) {
   1546      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHR);
   1547    } else {
   1548      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHR);
   1549      m_formatter.immediate8u(imm);
   1550    }
   1551  }
   1552 
   1553  void shrl_CLr(RegisterID dst) {
   1554    spew("shrl       %%cl, %s", GPReg32Name(dst));
   1555    m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHR);
   1556  }
   1557 
   1558  void shrdl_CLr(RegisterID src, RegisterID dst) {
   1559    spew("shrdl      %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1560    m_formatter.twoByteOp(OP2_SHRD_GvEv, dst, src);
   1561  }
   1562 
   1563  void shldl_CLr(RegisterID src, RegisterID dst) {
   1564    spew("shldl      %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1565    m_formatter.twoByteOp(OP2_SHLD_GvEv, dst, src);
   1566  }
   1567 
   1568  void shll_ir(int32_t imm, RegisterID dst) {
   1569    MOZ_ASSERT(imm < 32);
   1570    spew("shll       $%d, %s", imm, GPReg32Name(dst));
   1571    if (imm == 1) {
   1572      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHL);
   1573    } else {
   1574      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHL);
   1575      m_formatter.immediate8u(imm);
   1576    }
   1577  }
   1578 
   1579  void shll_CLr(RegisterID dst) {
   1580    spew("shll       %%cl, %s", GPReg32Name(dst));
   1581    m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHL);
   1582  }
   1583 
   1584  void roll_ir(int32_t imm, RegisterID dst) {
   1585    MOZ_ASSERT(imm < 32);
   1586    spew("roll       $%d, %s", imm, GPReg32Name(dst));
   1587    if (imm == 1) {
   1588      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROL);
   1589    } else {
   1590      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROL);
   1591      m_formatter.immediate8u(imm);
   1592    }
   1593  }
   1594  void rolw_ir(int32_t imm, RegisterID dst) {
   1595    MOZ_ASSERT(imm < 32);
   1596    spew("roll       $%d, %s", imm, GPReg16Name(dst));
   1597    m_formatter.prefix(PRE_OPERAND_SIZE);
   1598    if (imm == 1) {
   1599      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROL);
   1600    } else {
   1601      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROL);
   1602      m_formatter.immediate8u(imm);
   1603    }
   1604  }
   1605  void roll_CLr(RegisterID dst) {
   1606    spew("roll       %%cl, %s", GPReg32Name(dst));
   1607    m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROL);
   1608  }
   1609 
   1610  void rorl_ir(int32_t imm, RegisterID dst) {
   1611    MOZ_ASSERT(imm < 32);
   1612    spew("rorl       $%d, %s", imm, GPReg32Name(dst));
   1613    if (imm == 1) {
   1614      m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_ROR);
   1615    } else {
   1616      m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_ROR);
   1617      m_formatter.immediate8u(imm);
   1618    }
   1619  }
   1620  void rorl_CLr(RegisterID dst) {
   1621    spew("rorl       %%cl, %s", GPReg32Name(dst));
   1622    m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_ROR);
   1623  }
   1624 
   1625  void bsrl_rr(RegisterID src, RegisterID dst) {
   1626    spew("bsrl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1627    m_formatter.twoByteOp(OP2_BSR_GvEv, src, dst);
   1628  }
   1629 
   1630  void bsfl_rr(RegisterID src, RegisterID dst) {
   1631    spew("bsfl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1632    m_formatter.twoByteOp(OP2_BSF_GvEv, src, dst);
   1633  }
   1634 
   1635  void lzcntl_rr(RegisterID src, RegisterID dst) {
   1636    spew("lzcntl     %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1637    m_formatter.legacySSEPrefix(VEX_SS);
   1638    m_formatter.twoByteOp(OP2_LZCNT_GvEv, src, dst);
   1639  }
   1640 
   1641  void tzcntl_rr(RegisterID src, RegisterID dst) {
   1642    spew("tzcntl     %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1643    m_formatter.legacySSEPrefix(VEX_SS);
   1644    m_formatter.twoByteOp(OP2_TZCNT_GvEv, src, dst);
   1645  }
   1646 
   1647  void popcntl_rr(RegisterID src, RegisterID dst) {
   1648    spew("popcntl    %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1649    m_formatter.legacySSEPrefix(VEX_SS);
   1650    m_formatter.twoByteOp(OP2_POPCNT_GvEv, src, dst);
   1651  }
   1652 
   1653  void imull_rr(RegisterID src, RegisterID dst) {
   1654    spew("imull      %s, %s", GPReg32Name(src), GPReg32Name(dst));
   1655    m_formatter.twoByteOp(OP2_IMUL_GvEv, src, dst);
   1656  }
   1657 
   1658  void imull_r(RegisterID multiplier) {
   1659    spew("imull      %s", GPReg32Name(multiplier));
   1660    m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_IMUL);
   1661  }
   1662 
   1663  void imull_mr(int32_t offset, RegisterID base, RegisterID dst) {
   1664    spew("imull      " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   1665    m_formatter.twoByteOp(OP2_IMUL_GvEv, offset, base, dst);
   1666  }
   1667 
   1668  void imull_ir(int32_t value, RegisterID src, RegisterID dst) {
   1669    spew("imull      $%d, %s, %s", value, GPReg32Name(src), GPReg32Name(dst));
   1670    if (CAN_SIGN_EXTEND_8_32(value)) {
   1671      m_formatter.oneByteOp(OP_IMUL_GvEvIb, src, dst);
   1672      m_formatter.immediate8s(value);
   1673    } else {
   1674      m_formatter.oneByteOp(OP_IMUL_GvEvIz, src, dst);
   1675      m_formatter.immediate32(value);
   1676    }
   1677  }
   1678 
   1679  void mull_r(RegisterID multiplier) {
   1680    spew("mull       %s", GPReg32Name(multiplier));
   1681    m_formatter.oneByteOp(OP_GROUP3_Ev, multiplier, GROUP3_OP_MUL);
   1682  }
   1683 
   1684  void idivl_r(RegisterID divisor) {
   1685    spew("idivl      %s", GPReg32Name(divisor));
   1686    m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_IDIV);
   1687  }
   1688 
   1689  void divl_r(RegisterID divisor) {
   1690    spew("div        %s", GPReg32Name(divisor));
   1691    m_formatter.oneByteOp(OP_GROUP3_Ev, divisor, GROUP3_OP_DIV);
   1692  }
   1693 
   1694  void prefix_lock() {
   1695    spew("lock");
   1696    m_formatter.oneByteOp(PRE_LOCK);
   1697  }
   1698 
   1699  void prefix_16_for_32() {
   1700    spew("[16-bit operands next]");
   1701    m_formatter.prefix(PRE_OPERAND_SIZE);
   1702  }
   1703 
   1704  void incl_m32(int32_t offset, RegisterID base) {
   1705    spew("incl       " MEM_ob, ADDR_ob(offset, base));
   1706    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_INC);
   1707  }
   1708 
   1709  void decl_m32(int32_t offset, RegisterID base) {
   1710    spew("decl       " MEM_ob, ADDR_ob(offset, base));
   1711    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_DEC);
   1712  }
   1713 
   1714  // Note that CMPXCHG performs comparison against REG = %al/%ax/%eax/%rax.
   1715  // If %REG == [%base+offset], then %src -> [%base+offset].
   1716  // Otherwise, [%base+offset] -> %REG.
   1717  // For the 8-bit operations src must also be an 8-bit register.
   1718 
   1719  void cmpxchgb(RegisterID src, int32_t offset, RegisterID base) {
   1720    spew("cmpxchgb   %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
   1721    m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, src);
   1722  }
   1723  void cmpxchgb(RegisterID src, int32_t offset, RegisterID base,
   1724                RegisterID index, int scale) {
   1725    spew("cmpxchgb   %s, " MEM_obs, GPReg8Name(src),
   1726         ADDR_obs(offset, base, index, scale));
   1727    m_formatter.twoByteOp8(OP2_CMPXCHG_GvEb, offset, base, index, scale, src);
   1728  }
   1729  void cmpxchgw(RegisterID src, int32_t offset, RegisterID base) {
   1730    spew("cmpxchgw   %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   1731    m_formatter.prefix(PRE_OPERAND_SIZE);
   1732    m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src);
   1733  }
   1734  void cmpxchgw(RegisterID src, int32_t offset, RegisterID base,
   1735                RegisterID index, int scale) {
   1736    spew("cmpxchgw   %s, " MEM_obs, GPReg16Name(src),
   1737         ADDR_obs(offset, base, index, scale));
   1738    m_formatter.prefix(PRE_OPERAND_SIZE);
   1739    m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src);
   1740  }
   1741  void cmpxchgl(RegisterID src, int32_t offset, RegisterID base) {
   1742    spew("cmpxchgl   %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   1743    m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src);
   1744  }
   1745  void cmpxchgl(RegisterID src, int32_t offset, RegisterID base,
   1746                RegisterID index, int scale) {
   1747    spew("cmpxchgl   %s, " MEM_obs, GPReg32Name(src),
   1748         ADDR_obs(offset, base, index, scale));
   1749    m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src);
   1750  }
   1751 
   1752  void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi,
   1753                 RegisterID newLo, int32_t offset, RegisterID base) {
   1754    MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code());
   1755    MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code());
   1756    spew("cmpxchg8b  %s, " MEM_ob, "edx:eax", ADDR_ob(offset, base));
   1757    m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, 1);
   1758  }
   1759  void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi,
   1760                 RegisterID newLo, int32_t offset, RegisterID base,
   1761                 RegisterID index, int scale) {
   1762    MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code());
   1763    MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code());
   1764    spew("cmpxchg8b  %s, " MEM_obs, "edx:eax",
   1765         ADDR_obs(offset, base, index, scale));
   1766    m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, index, scale, 1);
   1767  }
   1768 
   1769  // Comparisons:
   1770 
   1771  void cmpl_rr(RegisterID rhs, RegisterID lhs) {
   1772    spew("cmpl       %s, %s", GPReg32Name(rhs), GPReg32Name(lhs));
   1773    m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs);
   1774  }
   1775 
   1776  void cmpl_rm(RegisterID rhs, int32_t offset, RegisterID base) {
   1777    spew("cmpl       %s, " MEM_ob, GPReg32Name(rhs), ADDR_ob(offset, base));
   1778    m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, rhs);
   1779  }
   1780 
   1781  void cmpl_rm(RegisterID rhs, int32_t offset, RegisterID base,
   1782               RegisterID index, int scale) {
   1783    spew("cmpl       %s, " MEM_obs, GPReg32Name(rhs),
   1784         ADDR_obs(offset, base, index, scale));
   1785    m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, index, scale, rhs);
   1786  }
   1787 
   1788  void cmpl_mr(int32_t offset, RegisterID base, RegisterID lhs) {
   1789    spew("cmpl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(lhs));
   1790    m_formatter.oneByteOp(OP_CMP_GvEv, offset, base, lhs);
   1791  }
   1792 
   1793  void cmpl_mr(const void* address, RegisterID lhs) {
   1794    spew("cmpl       %p, %s", address, GPReg32Name(lhs));
   1795    m_formatter.oneByteOp(OP_CMP_GvEv, address, lhs);
   1796  }
   1797 
   1798  void cmpl_ir(int32_t rhs, RegisterID lhs) {
   1799    if (rhs == 0) {
   1800      testl_rr(lhs, lhs);
   1801      return;
   1802    }
   1803 
   1804    spew("cmpl       $0x%x, %s", uint32_t(rhs), GPReg32Name(lhs));
   1805    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1806      m_formatter.oneByteOp(OP_GROUP1_EvIb, lhs, GROUP1_OP_CMP);
   1807      m_formatter.immediate8s(rhs);
   1808    } else {
   1809      if (lhs == rax) {
   1810        m_formatter.oneByteOp(OP_CMP_EAXIv);
   1811      } else {
   1812        m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP);
   1813      }
   1814      m_formatter.immediate32(rhs);
   1815    }
   1816  }
   1817 
   1818  void cmpl_i32r(int32_t rhs, RegisterID lhs) {
   1819    spew("cmpl       $0x%04x, %s", uint32_t(rhs), GPReg32Name(lhs));
   1820    if (lhs == rax) {
   1821      m_formatter.oneByteOp(OP_CMP_EAXIv);
   1822    } else {
   1823      m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP);
   1824    }
   1825    m_formatter.immediate32(rhs);
   1826  }
   1827 
   1828  void cmpl_im(int32_t rhs, int32_t offset, RegisterID base) {
   1829    spew("cmpl       $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   1830    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1831      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP);
   1832      m_formatter.immediate8s(rhs);
   1833    } else {
   1834      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP);
   1835      m_formatter.immediate32(rhs);
   1836    }
   1837  }
   1838 
   1839  void cmpb_rr(RegisterID rhs, RegisterID lhs) {
   1840    spew("cmpb       %s, %s", GPReg8Name(rhs), GPReg8Name(lhs));
   1841    m_formatter.oneByteOp8(OP_CMP_GbEb, rhs, lhs);
   1842  }
   1843 
   1844  void cmpb_rm(RegisterID rhs, int32_t offset, RegisterID base) {
   1845    spew("cmpb       %s, " MEM_ob, GPReg8Name(rhs), ADDR_ob(offset, base));
   1846    m_formatter.oneByteOp8(OP_CMP_EbGb, offset, base, rhs);
   1847  }
   1848 
   1849  void cmpb_rm(RegisterID rhs, int32_t offset, RegisterID base,
   1850               RegisterID index, int scale) {
   1851    spew("cmpb       %s, " MEM_obs, GPReg8Name(rhs),
   1852         ADDR_obs(offset, base, index, scale));
   1853    m_formatter.oneByteOp8(OP_CMP_EbGb, offset, base, index, scale, rhs);
   1854  }
   1855 
   1856  void cmpb_rm(RegisterID rhs, const void* addr) {
   1857    spew("cmpb       %s, %p", GPReg8Name(rhs), addr);
   1858    m_formatter.oneByteOp8(OP_CMP_EbGb, addr, rhs);
   1859  }
   1860 
   1861  void cmpb_ir(int32_t rhs, RegisterID lhs) {
   1862    if (rhs == 0) {
   1863      testb_rr(lhs, lhs);
   1864      return;
   1865    }
   1866 
   1867    spew("cmpb       $0x%x, %s", uint32_t(rhs), GPReg8Name(lhs));
   1868    if (lhs == rax) {
   1869      m_formatter.oneByteOp8(OP_CMP_EAXIb);
   1870    } else {
   1871      m_formatter.oneByteOp8(OP_GROUP1_EbIb, lhs, GROUP1_OP_CMP);
   1872    }
   1873    m_formatter.immediate8(rhs);
   1874  }
   1875 
   1876  void cmpb_im(int32_t rhs, int32_t offset, RegisterID base) {
   1877    spew("cmpb       $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   1878    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, GROUP1_OP_CMP);
   1879    m_formatter.immediate8(rhs);
   1880  }
   1881 
   1882  void cmpb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index,
   1883               int scale) {
   1884    spew("cmpb       $0x%x, " MEM_obs, uint32_t(rhs),
   1885         ADDR_obs(offset, base, index, scale));
   1886    m_formatter.oneByteOp(OP_GROUP1_EbIb, offset, base, index, scale,
   1887                          GROUP1_OP_CMP);
   1888    m_formatter.immediate8(rhs);
   1889  }
   1890 
   1891  void cmpb_im(int32_t rhs, const void* addr) {
   1892    spew("cmpb       $0x%x, %p", uint32_t(rhs), addr);
   1893    m_formatter.oneByteOp(OP_GROUP1_EbIb, addr, GROUP1_OP_CMP);
   1894    m_formatter.immediate8(rhs);
   1895  }
   1896 
   1897  void cmpl_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index,
   1898               int scale) {
   1899    spew("cmpl       $0x%x, " MEM_obs, uint32_t(rhs),
   1900         ADDR_obs(offset, base, index, scale));
   1901    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1902      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   1903                            GROUP1_OP_CMP);
   1904      m_formatter.immediate8s(rhs);
   1905    } else {
   1906      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   1907                            GROUP1_OP_CMP);
   1908      m_formatter.immediate32(rhs);
   1909    }
   1910  }
   1911 
   1912  [[nodiscard]] JmpSrc cmpl_im_disp32(int32_t rhs, int32_t offset,
   1913                                      RegisterID base) {
   1914    spew("cmpl       $0x%x, " MEM_o32b, uint32_t(rhs), ADDR_o32b(offset, base));
   1915    JmpSrc r;
   1916    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1917      m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP);
   1918      r = JmpSrc(m_formatter.size());
   1919      m_formatter.immediate8s(rhs);
   1920    } else {
   1921      m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP);
   1922      r = JmpSrc(m_formatter.size());
   1923      m_formatter.immediate32(rhs);
   1924    }
   1925    return r;
   1926  }
   1927 
   1928  [[nodiscard]] JmpSrc cmpl_im_disp32(int32_t rhs, const void* addr) {
   1929    spew("cmpl       $0x%x, %p", uint32_t(rhs), addr);
   1930    JmpSrc r;
   1931    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1932      m_formatter.oneByteOp_disp32(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP);
   1933      r = JmpSrc(m_formatter.size());
   1934      m_formatter.immediate8s(rhs);
   1935    } else {
   1936      m_formatter.oneByteOp_disp32(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP);
   1937      r = JmpSrc(m_formatter.size());
   1938      m_formatter.immediate32(rhs);
   1939    }
   1940    return r;
   1941  }
   1942 
   1943  void cmpl_i32m(int32_t rhs, int32_t offset, RegisterID base) {
   1944    spew("cmpl       $0x%04x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   1945    m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP);
   1946    m_formatter.immediate32(rhs);
   1947  }
   1948 
   1949  void cmpl_i32m(int32_t rhs, const void* addr) {
   1950    spew("cmpl       $0x%04x, %p", uint32_t(rhs), addr);
   1951    m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP);
   1952    m_formatter.immediate32(rhs);
   1953  }
   1954 
   1955  void cmpl_rm(RegisterID rhs, const void* addr) {
   1956    spew("cmpl       %s, %p", GPReg32Name(rhs), addr);
   1957    m_formatter.oneByteOp(OP_CMP_EvGv, addr, rhs);
   1958  }
   1959 
   1960  void cmpl_rm_disp32(RegisterID rhs, const void* addr) {
   1961    spew("cmpl       %s, %p", GPReg32Name(rhs), addr);
   1962    m_formatter.oneByteOp_disp32(OP_CMP_EvGv, addr, rhs);
   1963  }
   1964 
   1965  void cmpl_im(int32_t rhs, const void* addr) {
   1966    spew("cmpl       $0x%x, %p", uint32_t(rhs), addr);
   1967    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1968      m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP);
   1969      m_formatter.immediate8s(rhs);
   1970    } else {
   1971      m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP);
   1972      m_formatter.immediate32(rhs);
   1973    }
   1974  }
   1975 
   1976  void cmpw_rr(RegisterID rhs, RegisterID lhs) {
   1977    spew("cmpw       %s, %s", GPReg16Name(rhs), GPReg16Name(lhs));
   1978    m_formatter.prefix(PRE_OPERAND_SIZE);
   1979    m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs);
   1980  }
   1981 
   1982  void cmpw_rm(RegisterID rhs, int32_t offset, RegisterID base,
   1983               RegisterID index, int scale) {
   1984    spew("cmpw       %s, " MEM_obs, GPReg16Name(rhs),
   1985         ADDR_obs(offset, base, index, scale));
   1986    m_formatter.prefix(PRE_OPERAND_SIZE);
   1987    m_formatter.oneByteOp(OP_CMP_EvGv, offset, base, index, scale, rhs);
   1988  }
   1989 
   1990  void cmpw_ir(int32_t rhs, RegisterID lhs) {
   1991    if (rhs == 0) {
   1992      testw_rr(lhs, lhs);
   1993      return;
   1994    }
   1995 
   1996    spew("cmpw       $0x%x, %s", uint32_t(rhs), GPReg16Name(lhs));
   1997    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   1998      m_formatter.prefix(PRE_OPERAND_SIZE);
   1999      m_formatter.oneByteOp(OP_GROUP1_EvIb, lhs, GROUP1_OP_CMP);
   2000      m_formatter.immediate8s(rhs);
   2001    } else {
   2002      m_formatter.prefix(PRE_OPERAND_SIZE);
   2003      m_formatter.oneByteOp(OP_GROUP1_EvIz, lhs, GROUP1_OP_CMP);
   2004      m_formatter.immediate16(rhs);
   2005    }
   2006  }
   2007 
   2008  void cmpw_im(int32_t rhs, int32_t offset, RegisterID base) {
   2009    spew("cmpw       $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   2010    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   2011      m_formatter.prefix(PRE_OPERAND_SIZE);
   2012      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, GROUP1_OP_CMP);
   2013      m_formatter.immediate8s(rhs);
   2014    } else {
   2015      m_formatter.prefix(PRE_OPERAND_SIZE);
   2016      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, GROUP1_OP_CMP);
   2017      m_formatter.immediate16(rhs);
   2018    }
   2019  }
   2020 
   2021  void cmpw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   2022               int scale) {
   2023    spew("cmpw       $%d, " MEM_obs, imm, ADDR_obs(offset, base, index, scale));
   2024    if (CAN_SIGN_EXTEND_8_32(imm)) {
   2025      m_formatter.prefix(PRE_OPERAND_SIZE);
   2026      m_formatter.oneByteOp(OP_GROUP1_EvIb, offset, base, index, scale,
   2027                            GROUP1_OP_CMP);
   2028      m_formatter.immediate8s(imm);
   2029    } else {
   2030      m_formatter.prefix(PRE_OPERAND_SIZE);
   2031      m_formatter.oneByteOp(OP_GROUP1_EvIz, offset, base, index, scale,
   2032                            GROUP1_OP_CMP);
   2033      m_formatter.immediate16(imm);
   2034    }
   2035  }
   2036 
   2037  void cmpw_im(int32_t rhs, const void* addr) {
   2038    spew("cmpw       $0x%x, %p", uint32_t(rhs), addr);
   2039    if (CAN_SIGN_EXTEND_8_32(rhs)) {
   2040      m_formatter.prefix(PRE_OPERAND_SIZE);
   2041      m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_CMP);
   2042      m_formatter.immediate8s(rhs);
   2043    } else {
   2044      m_formatter.prefix(PRE_OPERAND_SIZE);
   2045      m_formatter.oneByteOp(OP_GROUP1_EvIz, addr, GROUP1_OP_CMP);
   2046      m_formatter.immediate16(rhs);
   2047    }
   2048  }
   2049 
   2050  void testl_rr(RegisterID rhs, RegisterID lhs) {
   2051    spew("testl      %s, %s", GPReg32Name(rhs), GPReg32Name(lhs));
   2052    m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs);
   2053  }
   2054 
   2055  void testb_rr(RegisterID rhs, RegisterID lhs) {
   2056    spew("testb      %s, %s", GPReg8Name(rhs), GPReg8Name(lhs));
   2057    m_formatter.oneByteOp8(OP_TEST_EbGb, lhs, rhs);
   2058  }
   2059 
   2060  void testl_ir(int32_t rhs, RegisterID lhs) {
   2061    // If the mask fits in an 8-bit immediate, we can use testb with an
   2062    // 8-bit subreg.
   2063    if (CAN_ZERO_EXTEND_8_32(rhs) && HasSubregL(lhs)) {
   2064      testb_ir(rhs, lhs);
   2065      return;
   2066    }
   2067    // If the mask is a subset of 0xff00, we can use testb with an h reg, if
   2068    // one happens to be available.
   2069    if (CAN_ZERO_EXTEND_8H_32(rhs) && HasSubregH(lhs)) {
   2070      testb_ir_norex(rhs >> 8, GetSubregH(lhs));
   2071      return;
   2072    }
   2073    spew("testl      $0x%x, %s", uint32_t(rhs), GPReg32Name(lhs));
   2074    if (lhs == rax) {
   2075      m_formatter.oneByteOp(OP_TEST_EAXIv);
   2076    } else {
   2077      m_formatter.oneByteOp(OP_GROUP3_EvIz, lhs, GROUP3_OP_TEST);
   2078    }
   2079    m_formatter.immediate32(rhs);
   2080  }
   2081 
   2082  void testl_i32m(int32_t rhs, int32_t offset, RegisterID base) {
   2083    spew("testl      $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   2084    m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, GROUP3_OP_TEST);
   2085    m_formatter.immediate32(rhs);
   2086  }
   2087 
   2088  void testl_i32m(int32_t rhs, const void* addr) {
   2089    spew("testl      $0x%x, %p", uint32_t(rhs), addr);
   2090    m_formatter.oneByteOp(OP_GROUP3_EvIz, addr, GROUP3_OP_TEST);
   2091    m_formatter.immediate32(rhs);
   2092  }
   2093 
   2094  void testb_im(int32_t rhs, int32_t offset, RegisterID base) {
   2095    spew("testb      $0x%x, " MEM_ob, uint32_t(rhs), ADDR_ob(offset, base));
   2096    m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, GROUP3_OP_TEST);
   2097    m_formatter.immediate8(rhs);
   2098  }
   2099 
   2100  void testb_im(int32_t rhs, int32_t offset, RegisterID base, RegisterID index,
   2101                int scale) {
   2102    spew("testb      $0x%x, " MEM_obs, uint32_t(rhs),
   2103         ADDR_obs(offset, base, index, scale));
   2104    m_formatter.oneByteOp(OP_GROUP3_EbIb, offset, base, index, scale,
   2105                          GROUP3_OP_TEST);
   2106    m_formatter.immediate8(rhs);
   2107  }
   2108 
   2109  void testl_i32m(int32_t rhs, int32_t offset, RegisterID base,
   2110                  RegisterID index, int scale) {
   2111    spew("testl      $0x%4x, " MEM_obs, uint32_t(rhs),
   2112         ADDR_obs(offset, base, index, scale));
   2113    m_formatter.oneByteOp(OP_GROUP3_EvIz, offset, base, index, scale,
   2114                          GROUP3_OP_TEST);
   2115    m_formatter.immediate32(rhs);
   2116  }
   2117 
   2118  void testw_rr(RegisterID rhs, RegisterID lhs) {
   2119    spew("testw      %s, %s", GPReg16Name(rhs), GPReg16Name(lhs));
   2120    m_formatter.prefix(PRE_OPERAND_SIZE);
   2121    m_formatter.oneByteOp(OP_TEST_EvGv, lhs, rhs);
   2122  }
   2123 
   2124  void testb_ir(int32_t rhs, RegisterID lhs) {
   2125    spew("testb      $0x%x, %s", uint32_t(rhs), GPReg8Name(lhs));
   2126    if (lhs == rax) {
   2127      m_formatter.oneByteOp8(OP_TEST_EAXIb);
   2128    } else {
   2129      m_formatter.oneByteOp8(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST);
   2130    }
   2131    m_formatter.immediate8(rhs);
   2132  }
   2133 
   2134  // Like testb_ir, but never emits a REX prefix. This may be used to
   2135  // reference ah..bh.
   2136  void testb_ir_norex(int32_t rhs, HRegisterID lhs) {
   2137    spew("testb      $0x%x, %s", uint32_t(rhs), HRegName8(lhs));
   2138    m_formatter.oneByteOp8_norex(OP_GROUP3_EbIb, lhs, GROUP3_OP_TEST);
   2139    m_formatter.immediate8(rhs);
   2140  }
   2141 
   2142  void setCC_r(Condition cond, RegisterID lhs) {
   2143    spew("set%s      %s", CCName(cond), GPReg8Name(lhs));
   2144    m_formatter.twoByteOp8(setccOpcode(cond), lhs, (GroupOpcodeID)0);
   2145  }
   2146 
   2147  void sete_r(RegisterID dst) { setCC_r(ConditionE, dst); }
   2148 
   2149  void setz_r(RegisterID dst) { sete_r(dst); }
   2150 
   2151  void setne_r(RegisterID dst) { setCC_r(ConditionNE, dst); }
   2152 
   2153  void setnz_r(RegisterID dst) { setne_r(dst); }
   2154 
   2155  // Various move ops:
   2156 
   2157  void cdq() {
   2158    spew("cdq        ");
   2159    m_formatter.oneByteOp(OP_CDQ);
   2160  }
   2161 
   2162  void xchgb_rm(RegisterID src, int32_t offset, RegisterID base) {
   2163    spew("xchgb      %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
   2164    m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, src);
   2165  }
   2166  void xchgb_rm(RegisterID src, int32_t offset, RegisterID base,
   2167                RegisterID index, int scale) {
   2168    spew("xchgb      %s, " MEM_obs, GPReg8Name(src),
   2169         ADDR_obs(offset, base, index, scale));
   2170    m_formatter.oneByteOp8(OP_XCHG_GbEb, offset, base, index, scale, src);
   2171  }
   2172 
   2173  void xchgw_rm(RegisterID src, int32_t offset, RegisterID base) {
   2174    spew("xchgw      %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   2175    m_formatter.prefix(PRE_OPERAND_SIZE);
   2176    m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src);
   2177  }
   2178  void xchgw_rm(RegisterID src, int32_t offset, RegisterID base,
   2179                RegisterID index, int scale) {
   2180    spew("xchgw      %s, " MEM_obs, GPReg16Name(src),
   2181         ADDR_obs(offset, base, index, scale));
   2182    m_formatter.prefix(PRE_OPERAND_SIZE);
   2183    m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src);
   2184  }
   2185 
   2186  void xchgl_rr(RegisterID src, RegisterID dst) {
   2187    spew("xchgl      %s, %s", GPReg32Name(src), GPReg32Name(dst));
   2188    m_formatter.oneByteOp(OP_XCHG_GvEv, src, dst);
   2189  }
   2190  void xchgl_rm(RegisterID src, int32_t offset, RegisterID base) {
   2191    spew("xchgl      %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   2192    m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, src);
   2193  }
   2194  void xchgl_rm(RegisterID src, int32_t offset, RegisterID base,
   2195                RegisterID index, int scale) {
   2196    spew("xchgl      %s, " MEM_obs, GPReg32Name(src),
   2197         ADDR_obs(offset, base, index, scale));
   2198    m_formatter.oneByteOp(OP_XCHG_GvEv, offset, base, index, scale, src);
   2199  }
   2200 
   2201  void cmovCCl_rr(Condition cond, RegisterID src, RegisterID dst) {
   2202    spew("cmov%s     %s, %s", CCName(cond), GPReg32Name(src), GPReg32Name(dst));
   2203    m_formatter.twoByteOp(cmovccOpcode(cond), src, dst);
   2204  }
   2205  void cmovCCl_mr(Condition cond, int32_t offset, RegisterID base,
   2206                  RegisterID dst) {
   2207    spew("cmov%s     " MEM_ob ", %s", CCName(cond), ADDR_ob(offset, base),
   2208         GPReg32Name(dst));
   2209    m_formatter.twoByteOp(cmovccOpcode(cond), offset, base, dst);
   2210  }
   2211  void cmovCCl_mr(Condition cond, int32_t offset, RegisterID base,
   2212                  RegisterID index, int scale, RegisterID dst) {
   2213    spew("cmov%s     " MEM_obs ", %s", CCName(cond),
   2214         ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
   2215    m_formatter.twoByteOp(cmovccOpcode(cond), offset, base, index, scale, dst);
   2216  }
   2217 
   2218  void movl_rr(RegisterID src, RegisterID dst) {
   2219    spew("movl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
   2220    m_formatter.oneByteOp(OP_MOV_GvEv, src, dst);
   2221  }
   2222 
   2223  void movw_rm(RegisterID src, int32_t offset, RegisterID base) {
   2224    spew("movw       %s, " MEM_ob, GPReg16Name(src), ADDR_ob(offset, base));
   2225    m_formatter.prefix(PRE_OPERAND_SIZE);
   2226    m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src);
   2227  }
   2228 
   2229  void movw_rm_disp32(RegisterID src, int32_t offset, RegisterID base) {
   2230    spew("movw       %s, " MEM_o32b, GPReg16Name(src), ADDR_o32b(offset, base));
   2231    m_formatter.prefix(PRE_OPERAND_SIZE);
   2232    m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src);
   2233  }
   2234 
   2235  void movw_rm(RegisterID src, int32_t offset, RegisterID base,
   2236               RegisterID index, int scale) {
   2237    spew("movw       %s, " MEM_obs, GPReg16Name(src),
   2238         ADDR_obs(offset, base, index, scale));
   2239    m_formatter.prefix(PRE_OPERAND_SIZE);
   2240    m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src);
   2241  }
   2242 
   2243  void movw_rm(RegisterID src, const void* addr) {
   2244    spew("movw       %s, %p", GPReg16Name(src), addr);
   2245    m_formatter.prefix(PRE_OPERAND_SIZE);
   2246    m_formatter.oneByteOp_disp32(OP_MOV_EvGv, addr, src);
   2247  }
   2248 
   2249  void movl_rm(RegisterID src, int32_t offset, RegisterID base) {
   2250    spew("movl       %s, " MEM_ob, GPReg32Name(src), ADDR_ob(offset, base));
   2251    m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, src);
   2252  }
   2253 
   2254  void movl_rm_disp32(RegisterID src, int32_t offset, RegisterID base) {
   2255    spew("movl       %s, " MEM_o32b, GPReg32Name(src), ADDR_o32b(offset, base));
   2256    m_formatter.oneByteOp_disp32(OP_MOV_EvGv, offset, base, src);
   2257  }
   2258 
   2259  void movl_rm(RegisterID src, int32_t offset, RegisterID base,
   2260               RegisterID index, int scale) {
   2261    spew("movl       %s, " MEM_obs, GPReg32Name(src),
   2262         ADDR_obs(offset, base, index, scale));
   2263    m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src);
   2264  }
   2265 
   2266  void movl_mEAX(const void* addr) {
   2267 #ifdef JS_CODEGEN_X64
   2268    if (IsAddressImmediate(addr)) {
   2269      movl_mr(addr, rax);
   2270      return;
   2271    }
   2272 #endif
   2273 
   2274 #ifdef JS_CODEGEN_X64
   2275    spew("movabs     %p, %%eax", addr);
   2276 #else
   2277    spew("movl       %p, %%eax", addr);
   2278 #endif
   2279    m_formatter.oneByteOp(OP_MOV_EAXOv);
   2280 #ifdef JS_CODEGEN_X64
   2281    m_formatter.immediate64(reinterpret_cast<int64_t>(addr));
   2282 #else
   2283    m_formatter.immediate32(reinterpret_cast<int32_t>(addr));
   2284 #endif
   2285  }
   2286 
   2287  void movl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2288    spew("movl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2289    m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, dst);
   2290  }
   2291 
   2292  void movl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) {
   2293    spew("movl       " MEM_o32b ", %s", ADDR_o32b(offset, base),
   2294         GPReg32Name(dst));
   2295    m_formatter.oneByteOp_disp32(OP_MOV_GvEv, offset, base, dst);
   2296  }
   2297 
   2298  void movl_mr(const void* base, RegisterID index, int scale, RegisterID dst) {
   2299    int32_t disp = AddressImmediate(base);
   2300 
   2301    spew("movl       " MEM_os ", %s", ADDR_os(disp, index, scale),
   2302         GPReg32Name(dst));
   2303    m_formatter.oneByteOp_disp32(OP_MOV_GvEv, disp, index, scale, dst);
   2304  }
   2305 
   2306  void movl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2307               RegisterID dst) {
   2308    spew("movl       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2309         GPReg32Name(dst));
   2310    m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, index, scale, dst);
   2311  }
   2312 
   2313  void movl_mr(const void* addr, RegisterID dst) {
   2314    if (dst == rax
   2315 #ifdef JS_CODEGEN_X64
   2316        && !IsAddressImmediate(addr)
   2317 #endif
   2318    ) {
   2319      movl_mEAX(addr);
   2320      return;
   2321    }
   2322 
   2323    spew("movl       %p, %s", addr, GPReg32Name(dst));
   2324    m_formatter.oneByteOp(OP_MOV_GvEv, addr, dst);
   2325  }
   2326 
   2327  void movl_i32r(int32_t imm, RegisterID dst) {
   2328    spew("movl       $0x%x, %s", uint32_t(imm), GPReg32Name(dst));
   2329    m_formatter.oneByteOp(OP_MOV_EAXIv, dst);
   2330    m_formatter.immediate32(imm);
   2331  }
   2332 
   2333  void movb_ir(int32_t imm, RegisterID reg) {
   2334    spew("movb       $0x%x, %s", uint32_t(imm), GPReg8Name(reg));
   2335    m_formatter.oneByteOp8(OP_MOV_EbIb, reg);
   2336    m_formatter.immediate8(imm);
   2337  }
   2338 
   2339  void movb_im(int32_t imm, int32_t offset, RegisterID base) {
   2340    spew("movb       $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   2341    m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, GROUP11_MOV);
   2342    m_formatter.immediate8(imm);
   2343  }
   2344 
   2345  void movb_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   2346               int scale) {
   2347    spew("movb       $0x%x, " MEM_obs, uint32_t(imm),
   2348         ADDR_obs(offset, base, index, scale));
   2349    m_formatter.oneByteOp(OP_GROUP11_EvIb, offset, base, index, scale,
   2350                          GROUP11_MOV);
   2351    m_formatter.immediate8(imm);
   2352  }
   2353 
   2354  void movb_im(int32_t imm, const void* addr) {
   2355    spew("movb       $%d, %p", imm, addr);
   2356    m_formatter.oneByteOp_disp32(OP_GROUP11_EvIb, addr, GROUP11_MOV);
   2357    m_formatter.immediate8(imm);
   2358  }
   2359 
   2360  void movw_im(int32_t imm, int32_t offset, RegisterID base) {
   2361    spew("movw       $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   2362    m_formatter.prefix(PRE_OPERAND_SIZE);
   2363    m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV);
   2364    m_formatter.immediate16(imm);
   2365  }
   2366 
   2367  void movw_im(int32_t imm, const void* addr) {
   2368    spew("movw       $%d, %p", imm, addr);
   2369    m_formatter.prefix(PRE_OPERAND_SIZE);
   2370    m_formatter.oneByteOp_disp32(OP_GROUP11_EvIz, addr, GROUP11_MOV);
   2371    m_formatter.immediate16(imm);
   2372  }
   2373 
   2374  void movl_i32m(int32_t imm, int32_t offset, RegisterID base) {
   2375    spew("movl       $0x%x, " MEM_ob, uint32_t(imm), ADDR_ob(offset, base));
   2376    m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, GROUP11_MOV);
   2377    m_formatter.immediate32(imm);
   2378  }
   2379 
   2380  void movw_im(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   2381               int scale) {
   2382    spew("movw       $0x%x, " MEM_obs, uint32_t(imm),
   2383         ADDR_obs(offset, base, index, scale));
   2384    m_formatter.prefix(PRE_OPERAND_SIZE);
   2385    m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale,
   2386                          GROUP11_MOV);
   2387    m_formatter.immediate16(imm);
   2388  }
   2389 
   2390  void movl_i32m(int32_t imm, int32_t offset, RegisterID base, RegisterID index,
   2391                 int scale) {
   2392    spew("movl       $0x%x, " MEM_obs, uint32_t(imm),
   2393         ADDR_obs(offset, base, index, scale));
   2394    m_formatter.oneByteOp(OP_GROUP11_EvIz, offset, base, index, scale,
   2395                          GROUP11_MOV);
   2396    m_formatter.immediate32(imm);
   2397  }
   2398 
   2399  void movl_EAXm(const void* addr) {
   2400 #ifdef JS_CODEGEN_X64
   2401    if (IsAddressImmediate(addr)) {
   2402      movl_rm(rax, addr);
   2403      return;
   2404    }
   2405 #endif
   2406 
   2407    spew("movl       %%eax, %p", addr);
   2408    m_formatter.oneByteOp(OP_MOV_OvEAX);
   2409 #ifdef JS_CODEGEN_X64
   2410    m_formatter.immediate64(reinterpret_cast<int64_t>(addr));
   2411 #else
   2412    m_formatter.immediate32(reinterpret_cast<int32_t>(addr));
   2413 #endif
   2414  }
   2415 
   2416  void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   2417    // vmovq_rm can be encoded either as a true vmovq or as a vmovd with a
   2418    // REX prefix modifying it to be 64-bit. We choose the vmovq encoding
   2419    // because it's smaller (when it doesn't need a REX prefix for other
   2420    // reasons) and because it works on 32-bit x86 too.
   2421    twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm,
   2422                  src);
   2423  }
   2424 
   2425  void vmovq_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   2426    twoByteOpSimd_disp32("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base,
   2427                         invalid_xmm, src);
   2428  }
   2429 
   2430  void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   2431                RegisterID index, int scale) {
   2432    twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, index, scale,
   2433                  invalid_xmm, src);
   2434  }
   2435 
   2436  void vmovq_rm(XMMRegisterID src, const void* addr) {
   2437    twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, addr, invalid_xmm, src);
   2438  }
   2439 
   2440  void vmovq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   2441    // vmovq_mr can be encoded either as a true vmovq or as a vmovd with a
   2442    // REX prefix modifying it to be 64-bit. We choose the vmovq encoding
   2443    // because it's smaller (when it doesn't need a REX prefix for other
   2444    // reasons) and because it works on 32-bit x86 too.
   2445    twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm,
   2446                  dst);
   2447  }
   2448 
   2449  void vmovq_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   2450    twoByteOpSimd_disp32("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base,
   2451                         invalid_xmm, dst);
   2452  }
   2453 
   2454  void vmovq_mr(int32_t offset, RegisterID base, RegisterID index,
   2455                int32_t scale, XMMRegisterID dst) {
   2456    twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, index, scale,
   2457                  invalid_xmm, dst);
   2458  }
   2459 
   2460  void vmovq_mr(const void* addr, XMMRegisterID dst) {
   2461    twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, addr, invalid_xmm, dst);
   2462  }
   2463 
   2464  void movl_rm(RegisterID src, const void* addr) {
   2465    if (src == rax
   2466 #ifdef JS_CODEGEN_X64
   2467        && !IsAddressImmediate(addr)
   2468 #endif
   2469    ) {
   2470      movl_EAXm(addr);
   2471      return;
   2472    }
   2473 
   2474    spew("movl       %s, %p", GPReg32Name(src), addr);
   2475    m_formatter.oneByteOp(OP_MOV_EvGv, addr, src);
   2476  }
   2477 
   2478  void movl_i32m(int32_t imm, const void* addr) {
   2479    spew("movl       $%d, %p", imm, addr);
   2480    m_formatter.oneByteOp(OP_GROUP11_EvIz, addr, GROUP11_MOV);
   2481    m_formatter.immediate32(imm);
   2482  }
   2483 
   2484  void movb_rm(RegisterID src, int32_t offset, RegisterID base) {
   2485    spew("movb       %s, " MEM_ob, GPReg8Name(src), ADDR_ob(offset, base));
   2486    m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, src);
   2487  }
   2488 
   2489  void movb_rm_disp32(RegisterID src, int32_t offset, RegisterID base) {
   2490    spew("movb       %s, " MEM_o32b, GPReg8Name(src), ADDR_o32b(offset, base));
   2491    m_formatter.oneByteOp8_disp32(OP_MOV_EbGv, offset, base, src);
   2492  }
   2493 
   2494  void movb_rm(RegisterID src, int32_t offset, RegisterID base,
   2495               RegisterID index, int scale) {
   2496    spew("movb       %s, " MEM_obs, GPReg8Name(src),
   2497         ADDR_obs(offset, base, index, scale));
   2498    m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, index, scale, src);
   2499  }
   2500 
   2501  void movb_rm(RegisterID src, const void* addr) {
   2502    spew("movb       %s, %p", GPReg8Name(src), addr);
   2503    m_formatter.oneByteOp8(OP_MOV_EbGv, addr, src);
   2504  }
   2505 
   2506  void movb_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2507    spew("movb       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg8Name(dst));
   2508    m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, dst);
   2509  }
   2510 
   2511  void movb_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2512               RegisterID dst) {
   2513    spew("movb       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2514         GPReg8Name(dst));
   2515    m_formatter.oneByteOp(OP_MOV_GvEb, offset, base, index, scale, dst);
   2516  }
   2517 
   2518  void movzbl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2519    spew("movzbl     " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2520    m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, dst);
   2521  }
   2522 
   2523  void movzbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) {
   2524    spew("movzbl     " MEM_o32b ", %s", ADDR_o32b(offset, base),
   2525         GPReg32Name(dst));
   2526    m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEb, offset, base, dst);
   2527  }
   2528 
   2529  void movzbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2530                 RegisterID dst) {
   2531    spew("movzbl     " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2532         GPReg32Name(dst));
   2533    m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, index, scale, dst);
   2534  }
   2535 
   2536  void movzbl_mr(const void* addr, RegisterID dst) {
   2537    spew("movzbl     %p, %s", addr, GPReg32Name(dst));
   2538    m_formatter.twoByteOp(OP2_MOVZX_GvEb, addr, dst);
   2539  }
   2540 
   2541  void movsbl_rr(RegisterID src, RegisterID dst) {
   2542    spew("movsbl     %s, %s", GPReg8Name(src), GPReg32Name(dst));
   2543    m_formatter.twoByteOp8_movx(OP2_MOVSX_GvEb, src, dst);
   2544  }
   2545 
   2546  void movsbl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2547    spew("movsbl     " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2548    m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, dst);
   2549  }
   2550 
   2551  void movsbl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) {
   2552    spew("movsbl     " MEM_o32b ", %s", ADDR_o32b(offset, base),
   2553         GPReg32Name(dst));
   2554    m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEb, offset, base, dst);
   2555  }
   2556 
   2557  void movsbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2558                 RegisterID dst) {
   2559    spew("movsbl     " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2560         GPReg32Name(dst));
   2561    m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, index, scale, dst);
   2562  }
   2563 
   2564  void movsbl_mr(const void* addr, RegisterID dst) {
   2565    spew("movsbl     %p, %s", addr, GPReg32Name(dst));
   2566    m_formatter.twoByteOp(OP2_MOVSX_GvEb, addr, dst);
   2567  }
   2568 
   2569  void movzwl_rr(RegisterID src, RegisterID dst) {
   2570    spew("movzwl     %s, %s", GPReg16Name(src), GPReg32Name(dst));
   2571    m_formatter.twoByteOp(OP2_MOVZX_GvEw, src, dst);
   2572  }
   2573 
   2574  void movzwl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2575    spew("movzwl     " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2576    m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, dst);
   2577  }
   2578 
   2579  void movzwl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) {
   2580    spew("movzwl     " MEM_o32b ", %s", ADDR_o32b(offset, base),
   2581         GPReg32Name(dst));
   2582    m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEw, offset, base, dst);
   2583  }
   2584 
   2585  void movzwl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2586                 RegisterID dst) {
   2587    spew("movzwl     " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2588         GPReg32Name(dst));
   2589    m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, index, scale, dst);
   2590  }
   2591 
   2592  void movzwl_mr(const void* addr, RegisterID dst) {
   2593    spew("movzwl     %p, %s", addr, GPReg32Name(dst));
   2594    m_formatter.twoByteOp(OP2_MOVZX_GvEw, addr, dst);
   2595  }
   2596 
   2597  void movswl_rr(RegisterID src, RegisterID dst) {
   2598    spew("movswl     %s, %s", GPReg16Name(src), GPReg32Name(dst));
   2599    m_formatter.twoByteOp(OP2_MOVSX_GvEw, src, dst);
   2600  }
   2601 
   2602  void movswl_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2603    spew("movswl     " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2604    m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, dst);
   2605  }
   2606 
   2607  void movswl_mr_disp32(int32_t offset, RegisterID base, RegisterID dst) {
   2608    spew("movswl     " MEM_o32b ", %s", ADDR_o32b(offset, base),
   2609         GPReg32Name(dst));
   2610    m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEw, offset, base, dst);
   2611  }
   2612 
   2613  void movswl_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2614                 RegisterID dst) {
   2615    spew("movswl     " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2616         GPReg32Name(dst));
   2617    m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, index, scale, dst);
   2618  }
   2619 
   2620  void movswl_mr(const void* addr, RegisterID dst) {
   2621    spew("movswl     %p, %s", addr, GPReg32Name(dst));
   2622    m_formatter.twoByteOp(OP2_MOVSX_GvEw, addr, dst);
   2623  }
   2624 
   2625  void movzbl_rr(RegisterID src, RegisterID dst) {
   2626    spew("movzbl     %s, %s", GPReg8Name(src), GPReg32Name(dst));
   2627    m_formatter.twoByteOp8_movx(OP2_MOVZX_GvEb, src, dst);
   2628  }
   2629 
   2630  void leal_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   2631               RegisterID dst) {
   2632    spew("leal       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale),
   2633         GPReg32Name(dst));
   2634    m_formatter.oneByteOp(OP_LEA, offset, base, index, scale, dst);
   2635  }
   2636 
   2637  void leal_mr(int32_t offset, RegisterID base, RegisterID dst) {
   2638    spew("leal       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
   2639    m_formatter.oneByteOp(OP_LEA, offset, base, dst);
   2640  }
   2641 
   2642  void leal_mr(int32_t offset, RegisterID index, int scale, RegisterID dst) {
   2643    spew("leal       " MEM_o32s ", %s", ADDR_o32s(offset, index, scale),
   2644         GPReg32Name(dst));
   2645    m_formatter.oneByteOp_disp32(OP_LEA, offset, index, scale, dst);
   2646  }
   2647 
   2648  // Flow control:
   2649 
   2650  [[nodiscard]] JmpSrc call() {
   2651    m_formatter.oneByteOp(OP_CALL_rel32);
   2652    JmpSrc r = m_formatter.immediateRel32();
   2653    spew("call       .Lfrom%d", r.offset());
   2654    return r;
   2655  }
   2656 
   2657  void call_r(RegisterID dst) {
   2658    m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_CALLN);
   2659    spew("call       *%s", GPRegName(dst));
   2660  }
   2661 
   2662  void call_m(int32_t offset, RegisterID base) {
   2663    spew("call       *" MEM_ob, ADDR_ob(offset, base));
   2664    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_CALLN);
   2665  }
   2666 
   2667  // Comparison of EAX against a 32-bit immediate. The immediate is patched
   2668  // in as if it were a jump target. The intention is to toggle the first
   2669  // byte of the instruction between a CMP and a JMP to produce a pseudo-NOP.
   2670  [[nodiscard]] JmpSrc cmp_eax() {
   2671    m_formatter.oneByteOp(OP_CMP_EAXIv);
   2672    JmpSrc r = m_formatter.immediateRel32();
   2673    spew("cmpl       %%eax, .Lfrom%d", r.offset());
   2674    return r;
   2675  }
   2676 
   2677  void jmp_i(JmpDst dst) {
   2678    int32_t diff = dst.offset() - m_formatter.size();
   2679    spew("jmp        .Llabel%d", dst.offset());
   2680 
   2681    // The jump immediate is an offset from the end of the jump instruction.
   2682    // A jump instruction is either 1 byte opcode and 1 byte offset, or 1
   2683    // byte opcode and 4 bytes offset.
   2684    if (CAN_SIGN_EXTEND_8_32(diff - 2)) {
   2685      m_formatter.oneByteOp(OP_JMP_rel8);
   2686      m_formatter.immediate8s(diff - 2);
   2687    } else {
   2688      m_formatter.oneByteOp(OP_JMP_rel32);
   2689      m_formatter.immediate32(diff - 5);
   2690    }
   2691  }
   2692  [[nodiscard]] JmpSrc jmp() {
   2693    m_formatter.oneByteOp(OP_JMP_rel32);
   2694    JmpSrc r = m_formatter.immediateRel32();
   2695    spew("jmp        .Lfrom%d", r.offset());
   2696    return r;
   2697  }
   2698 
   2699  void jmp_r(RegisterID dst) {
   2700    spew("jmp        *%s", GPRegName(dst));
   2701    m_formatter.oneByteOp(OP_GROUP5_Ev, dst, GROUP5_OP_JMPN);
   2702  }
   2703 
   2704  void jmp_m(int32_t offset, RegisterID base) {
   2705    spew("jmp        *" MEM_ob, ADDR_ob(offset, base));
   2706    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_JMPN);
   2707  }
   2708 
   2709  void jmp_m(int32_t offset, RegisterID base, RegisterID index, int scale) {
   2710    spew("jmp        *" MEM_obs, ADDR_obs(offset, base, index, scale));
   2711    m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, index, scale,
   2712                          GROUP5_OP_JMPN);
   2713  }
   2714 
   2715  void jCC_i(Condition cond, JmpDst dst) {
   2716    int32_t diff = dst.offset() - m_formatter.size();
   2717    spew("j%s        .Llabel%d", CCName(cond), dst.offset());
   2718 
   2719    // The jump immediate is an offset from the end of the jump instruction.
   2720    // A conditional jump instruction is either 1 byte opcode and 1 byte
   2721    // offset, or 2 bytes opcode and 4 bytes offset.
   2722    if (CAN_SIGN_EXTEND_8_32(diff - 2)) {
   2723      m_formatter.oneByteOp(jccRel8(cond));
   2724      m_formatter.immediate8s(diff - 2);
   2725    } else {
   2726      m_formatter.twoByteOp(jccRel32(cond));
   2727      m_formatter.immediate32(diff - 6);
   2728    }
   2729  }
   2730 
   2731  [[nodiscard]] JmpSrc jCC(Condition cond) {
   2732    m_formatter.twoByteOp(jccRel32(cond));
   2733    JmpSrc r = m_formatter.immediateRel32();
   2734    spew("j%s        .Lfrom%d", CCName(cond), r.offset());
   2735    return r;
   2736  }
   2737 
   2738  // SSE operations:
   2739 
   2740  void vpcmpeqb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2741    twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, src1, src0, dst);
   2742  }
   2743  void vpcmpeqb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2744                   XMMRegisterID dst) {
   2745    twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, offset, base, src0,
   2746                  dst);
   2747  }
   2748  void vpcmpeqb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2749    twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, address, src0, dst);
   2750  }
   2751 
   2752  void vpcmpgtb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2753    twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, src1, src0, dst);
   2754  }
   2755  void vpcmpgtb_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2756                   XMMRegisterID dst) {
   2757    twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, offset, base, src0,
   2758                  dst);
   2759  }
   2760  void vpcmpgtb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2761    twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, address, src0, dst);
   2762  }
   2763 
   2764  void vpcmpeqw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2765    twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, src1, src0, dst);
   2766  }
   2767  void vpcmpeqw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2768                   XMMRegisterID dst) {
   2769    twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, offset, base, src0,
   2770                  dst);
   2771  }
   2772  void vpcmpeqw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2773    twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, address, src0, dst);
   2774  }
   2775 
   2776  void vpcmpgtw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2777    twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, src1, src0, dst);
   2778  }
   2779  void vpcmpgtw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2780                   XMMRegisterID dst) {
   2781    twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, offset, base, src0,
   2782                  dst);
   2783  }
   2784  void vpcmpgtw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2785    twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, address, src0, dst);
   2786  }
   2787 
   2788  void vpcmpeqd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2789    twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, src1, src0, dst);
   2790  }
   2791  void vpcmpeqd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2792                   XMMRegisterID dst) {
   2793    twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, offset, base, src0,
   2794                  dst);
   2795  }
   2796  void vpcmpeqd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2797    twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, address, src0, dst);
   2798  }
   2799 
   2800  void vpcmpgtd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2801    twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, src1, src0, dst);
   2802  }
   2803  void vpcmpgtd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2804                   XMMRegisterID dst) {
   2805    twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, offset, base, src0,
   2806                  dst);
   2807  }
   2808  void vpcmpgtd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2809    twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, address, src0, dst);
   2810  }
   2811 
   2812  void vpcmpgtq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2813    threeByteOpSimd("vpcmpgtq", VEX_PD, OP3_PCMPGTQ_VdqWdq, ESCAPE_38, src1,
   2814                    src0, dst);
   2815  }
   2816 
   2817  void vpcmpeqq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2818    threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, src1,
   2819                    src0, dst);
   2820  }
   2821  void vpcmpeqq_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2822                   XMMRegisterID dst) {
   2823    threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, offset,
   2824                    base, src0, dst);
   2825  }
   2826  void vpcmpeqq_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2827    threeByteOpSimd("vpcmpeqq", VEX_PD, OP3_PCMPEQQ_VdqWdq, ESCAPE_38, address,
   2828                    src0, dst);
   2829  }
   2830 
   2831  void vcmpps_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0,
   2832                 XMMRegisterID dst) {
   2833    MOZ_ASSERT_IF(!useVEX_,
   2834                  order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled));
   2835    twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, src1, src0,
   2836                     dst);
   2837  }
   2838  void vcmpps_mr(uint8_t order, int32_t offset, RegisterID base,
   2839                 XMMRegisterID src0, XMMRegisterID dst) {
   2840    MOZ_ASSERT_IF(!useVEX_,
   2841                  order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled));
   2842    twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base,
   2843                     src0, dst);
   2844  }
   2845  void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0,
   2846                 XMMRegisterID dst) {
   2847    MOZ_ASSERT_IF(!useVEX_,
   2848                  order < uint8_t(X86Encoding::ConditionCmp_AVX_Enabled));
   2849    twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0,
   2850                     dst);
   2851  }
   2852 
   2853  static constexpr size_t CMPPS_MR_PATCH_OFFSET = 1;
   2854 
   2855  size_t vcmpeqps_mr(const void* address, XMMRegisterID src0,
   2856                     XMMRegisterID dst) {
   2857    vcmpps_mr(X86Encoding::ConditionCmp_EQ, address, src0, dst);
   2858    return CMPPS_MR_PATCH_OFFSET;
   2859  }
   2860  size_t vcmpneqps_mr(const void* address, XMMRegisterID src0,
   2861                      XMMRegisterID dst) {
   2862    vcmpps_mr(X86Encoding::ConditionCmp_NEQ, address, src0, dst);
   2863    return CMPPS_MR_PATCH_OFFSET;
   2864  }
   2865  size_t vcmpltps_mr(const void* address, XMMRegisterID src0,
   2866                     XMMRegisterID dst) {
   2867    vcmpps_mr(X86Encoding::ConditionCmp_LT, address, src0, dst);
   2868    return CMPPS_MR_PATCH_OFFSET;
   2869  }
   2870  size_t vcmpleps_mr(const void* address, XMMRegisterID src0,
   2871                     XMMRegisterID dst) {
   2872    vcmpps_mr(X86Encoding::ConditionCmp_LE, address, src0, dst);
   2873    return CMPPS_MR_PATCH_OFFSET;
   2874  }
   2875  size_t vcmpgeps_mr(const void* address, XMMRegisterID src0,
   2876                     XMMRegisterID dst) {
   2877    vcmpps_mr(X86Encoding::ConditionCmp_GE, address, src0, dst);
   2878    return CMPPS_MR_PATCH_OFFSET;
   2879  }
   2880 
   2881  void vcmppd_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0,
   2882                 XMMRegisterID dst) {
   2883    twoByteOpImmSimd("vcmppd", VEX_PD, OP2_CMPPD_VpdWpd, order, src1, src0,
   2884                     dst);
   2885  }
   2886  void vcmppd_mr(uint8_t order, const void* address, XMMRegisterID src0,
   2887                 XMMRegisterID dst) {
   2888    twoByteOpImmSimd("vcmppd", VEX_PD, OP2_CMPPD_VpdWpd, order, address, src0,
   2889                     dst);
   2890  }
   2891 
   2892  static constexpr size_t CMPPD_MR_PATCH_OFFSET = 1;
   2893 
   2894  size_t vcmpeqpd_mr(const void* address, XMMRegisterID src0,
   2895                     XMMRegisterID dst) {
   2896    vcmppd_mr(X86Encoding::ConditionCmp_EQ, address, src0, dst);
   2897    return CMPPD_MR_PATCH_OFFSET;
   2898  }
   2899  size_t vcmpneqpd_mr(const void* address, XMMRegisterID src0,
   2900                      XMMRegisterID dst) {
   2901    vcmppd_mr(X86Encoding::ConditionCmp_NEQ, address, src0, dst);
   2902    return CMPPD_MR_PATCH_OFFSET;
   2903  }
   2904  size_t vcmpltpd_mr(const void* address, XMMRegisterID src0,
   2905                     XMMRegisterID dst) {
   2906    vcmppd_mr(X86Encoding::ConditionCmp_LT, address, src0, dst);
   2907    return CMPPD_MR_PATCH_OFFSET;
   2908  }
   2909  size_t vcmplepd_mr(const void* address, XMMRegisterID src0,
   2910                     XMMRegisterID dst) {
   2911    vcmppd_mr(X86Encoding::ConditionCmp_LE, address, src0, dst);
   2912    return CMPPD_MR_PATCH_OFFSET;
   2913  }
   2914 
   2915  void vrcpps_rr(XMMRegisterID src, XMMRegisterID dst) {
   2916    twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, src, invalid_xmm, dst);
   2917  }
   2918  void vrcpps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   2919    twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, offset, base, invalid_xmm,
   2920                  dst);
   2921  }
   2922  void vrcpps_mr(const void* address, XMMRegisterID dst) {
   2923    twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, address, invalid_xmm,
   2924                  dst);
   2925  }
   2926 
   2927  void vrsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) {
   2928    twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, src, invalid_xmm,
   2929                  dst);
   2930  }
   2931  void vrsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   2932    twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, offset, base,
   2933                  invalid_xmm, dst);
   2934  }
   2935  void vrsqrtps_mr(const void* address, XMMRegisterID dst) {
   2936    twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, address, invalid_xmm,
   2937                  dst);
   2938  }
   2939 
   2940  void vsqrtps_rr(XMMRegisterID src, XMMRegisterID dst) {
   2941    twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, src, invalid_xmm, dst);
   2942  }
   2943  void vsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   2944    twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, offset, base,
   2945                  invalid_xmm, dst);
   2946  }
   2947  void vsqrtps_mr(const void* address, XMMRegisterID dst) {
   2948    twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, address, invalid_xmm,
   2949                  dst);
   2950  }
   2951  void vsqrtpd_rr(XMMRegisterID src, XMMRegisterID dst) {
   2952    twoByteOpSimd("vsqrtpd", VEX_PD, OP2_SQRTPD_VpdWpd, src, invalid_xmm, dst);
   2953  }
   2954 
   2955  void vaddsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2956    twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, src1, src0, dst);
   2957  }
   2958 
   2959  void vaddss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2960    twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, src1, src0, dst);
   2961  }
   2962 
   2963  void vaddsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2964                 XMMRegisterID dst) {
   2965    twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, offset, base, src0, dst);
   2966  }
   2967 
   2968  void vaddss_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   2969                 XMMRegisterID dst) {
   2970    twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, offset, base, src0, dst);
   2971  }
   2972 
   2973  void vaddsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2974    twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, address, src0, dst);
   2975  }
   2976  void vaddss_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   2977    twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, address, src0, dst);
   2978  }
   2979 
   2980  void vcvtss2sd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2981    twoByteOpSimd("vcvtss2sd", VEX_SS, OP2_CVTSS2SD_VsdEd, src1, src0, dst);
   2982  }
   2983 
   2984  void vcvtsd2ss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2985    twoByteOpSimd("vcvtsd2ss", VEX_SD, OP2_CVTSD2SS_VsdEd, src1, src0, dst);
   2986  }
   2987 
   2988  void vcvtsi2ss_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2989    twoByteOpInt32Simd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, src1, src0,
   2990                       dst);
   2991  }
   2992 
   2993  void vcvtsi2sd_rr(RegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   2994    twoByteOpInt32Simd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, src1, src0,
   2995                       dst);
   2996  }
   2997 
   2998  void vcvttps2dq_rr(XMMRegisterID src, XMMRegisterID dst) {
   2999    twoByteOpSimd("vcvttps2dq", VEX_SS, OP2_CVTTPS2DQ_VdqWps, src, invalid_xmm,
   3000                  dst);
   3001  }
   3002 
   3003  void vcvttpd2dq_rr(XMMRegisterID src, XMMRegisterID dst) {
   3004    twoByteOpSimd("vcvttpd2dq", VEX_PD, OP2_CVTTPD2DQ_VdqWpd, src, invalid_xmm,
   3005                  dst);
   3006  }
   3007 
   3008  void vcvtdq2ps_rr(XMMRegisterID src, XMMRegisterID dst) {
   3009    twoByteOpSimd("vcvtdq2ps", VEX_PS, OP2_CVTDQ2PS_VpsWdq, src, invalid_xmm,
   3010                  dst);
   3011  }
   3012 
   3013  void vcvtdq2pd_rr(XMMRegisterID src, XMMRegisterID dst) {
   3014    twoByteOpSimd("vcvtdq2pd", VEX_SS, OP2_CVTDQ2PD_VpdWdq, src, invalid_xmm,
   3015                  dst);
   3016  }
   3017 
   3018  void vcvtpd2ps_rr(XMMRegisterID src, XMMRegisterID dst) {
   3019    twoByteOpSimd("vcvtpd2ps", VEX_PD, OP2_CVTPD2PS_VpsWpd, src, invalid_xmm,
   3020                  dst);
   3021  }
   3022 
   3023  void vcvtps2pd_rr(XMMRegisterID src, XMMRegisterID dst) {
   3024    twoByteOpSimd("vcvtps2pd", VEX_PS, OP2_CVTPS2PD_VpdWps, src, invalid_xmm,
   3025                  dst);
   3026  }
   3027 
   3028  void vcvtsi2sd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3029                    XMMRegisterID dst) {
   3030    twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, src0,
   3031                  dst);
   3032  }
   3033 
   3034  void vcvtsi2sd_mr(int32_t offset, RegisterID base, RegisterID index,
   3035                    int scale, XMMRegisterID src0, XMMRegisterID dst) {
   3036    twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, offset, base, index,
   3037                  scale, src0, dst);
   3038  }
   3039 
   3040  void vcvtsi2ss_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3041                    XMMRegisterID dst) {
   3042    twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, src0,
   3043                  dst);
   3044  }
   3045 
   3046  void vcvtsi2ss_mr(int32_t offset, RegisterID base, RegisterID index,
   3047                    int scale, XMMRegisterID src0, XMMRegisterID dst) {
   3048    twoByteOpSimd("vcvtsi2ss", VEX_SS, OP2_CVTSI2SD_VsdEd, offset, base, index,
   3049                  scale, src0, dst);
   3050  }
   3051 
   3052  void vcvttsd2si_rr(XMMRegisterID src, RegisterID dst) {
   3053    twoByteOpSimdInt32("vcvttsd2si", VEX_SD, OP2_CVTTSD2SI_GdWsd, src, dst);
   3054  }
   3055 
   3056  void vcvttss2si_rr(XMMRegisterID src, RegisterID dst) {
   3057    twoByteOpSimdInt32("vcvttss2si", VEX_SS, OP2_CVTTSD2SI_GdWsd, src, dst);
   3058  }
   3059 
   3060  void vunpcklps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3061    twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, src1, src0, dst);
   3062  }
   3063  void vunpcklps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3064                    XMMRegisterID dst) {
   3065    twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, offset, base, src0,
   3066                  dst);
   3067  }
   3068  void vunpcklps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) {
   3069    twoByteOpSimd("vunpcklps", VEX_PS, OP2_UNPCKLPS_VsdWsd, addr, src0, dst);
   3070  }
   3071 
   3072  void vunpckhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3073    twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, src1, src0, dst);
   3074  }
   3075  void vunpckhps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3076                    XMMRegisterID dst) {
   3077    twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, offset, base, src0,
   3078                  dst);
   3079  }
   3080  void vunpckhps_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) {
   3081    twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, addr, src0, dst);
   3082  }
   3083 
   3084  void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3085    twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst);
   3086  }
   3087  void vpand_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3088                XMMRegisterID dst) {
   3089    twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, offset, base, src0, dst);
   3090  }
   3091  void vpand_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3092    twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, address, src0, dst);
   3093  }
   3094  void vpor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3095    twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, src1, src0, dst);
   3096  }
   3097  void vpor_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3098               XMMRegisterID dst) {
   3099    twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, offset, base, src0, dst);
   3100  }
   3101  void vpor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3102    twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, address, src0, dst);
   3103  }
   3104  void vpxor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3105    twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, src1, src0, dst);
   3106  }
   3107  void vpxor_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3108                XMMRegisterID dst) {
   3109    twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, offset, base, src0, dst);
   3110  }
   3111  void vpxor_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3112    twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, address, src0, dst);
   3113  }
   3114  void vpandn_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3115    twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, src1, src0, dst);
   3116  }
   3117  void vpandn_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3118                 XMMRegisterID dst) {
   3119    twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0,
   3120                  dst);
   3121  }
   3122  void vpandn_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3123    twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst);
   3124  }
   3125  void vptest_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3126    threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_38, address, src0,
   3127                    dst);
   3128  }
   3129 
   3130  void vpshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) {
   3131    twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, src,
   3132                     invalid_xmm, dst);
   3133  }
   3134  void vpshufd_imr(uint32_t mask, int32_t offset, RegisterID base,
   3135                   XMMRegisterID dst) {
   3136    twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base,
   3137                     invalid_xmm, dst);
   3138  }
   3139  void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst) {
   3140    twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address,
   3141                     invalid_xmm, dst);
   3142  }
   3143 
   3144  void vpshuflw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) {
   3145    twoByteOpImmSimd("vpshuflw", VEX_SD, OP2_PSHUFLW_VdqWdqIb, mask, src,
   3146                     invalid_xmm, dst);
   3147  }
   3148 
   3149  void vpshufhw_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst) {
   3150    twoByteOpImmSimd("vpshufhw", VEX_SS, OP2_PSHUFHW_VdqWdqIb, mask, src,
   3151                     invalid_xmm, dst);
   3152  }
   3153 
   3154  void vpshufb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3155    threeByteOpSimd("vpshufb", VEX_PD, OP3_PSHUFB_VdqWdq, ESCAPE_38, src1, src0,
   3156                    dst);
   3157  }
   3158  void vpshufb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3159    threeByteOpSimd("vpshufb", VEX_PD, OP3_PSHUFB_VdqWdq, ESCAPE_38, address,
   3160                    src0, dst);
   3161  }
   3162 
   3163  void vshufps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0,
   3164                   XMMRegisterID dst) {
   3165    twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, src1, src0,
   3166                     dst);
   3167  }
   3168  void vshufps_imr(uint32_t mask, int32_t offset, RegisterID base,
   3169                   XMMRegisterID src0, XMMRegisterID dst) {
   3170    twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base,
   3171                     src0, dst);
   3172  }
   3173  void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0,
   3174                   XMMRegisterID dst) {
   3175    twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address,
   3176                     src0, dst);
   3177  }
   3178  void vshufpd_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0,
   3179                   XMMRegisterID dst) {
   3180    twoByteOpImmSimd("vshufpd", VEX_PD, OP2_SHUFPD_VpdWpdIb, mask, src1, src0,
   3181                     dst);
   3182  }
   3183 
   3184  void vmovddup_rr(XMMRegisterID src, XMMRegisterID dst) {
   3185    twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, src, invalid_xmm, dst);
   3186  }
   3187  void vmovddup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3188    twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, offset, base,
   3189                  invalid_xmm, dst);
   3190  }
   3191  void vmovddup_mr(int32_t offset, RegisterID base, RegisterID index,
   3192                   int32_t scale, XMMRegisterID dst) {
   3193    twoByteOpSimd("vmovddup", VEX_SD, OP2_MOVDDUP_VqWq, offset, base, index,
   3194                  scale, invalid_xmm, dst);
   3195  }
   3196 
   3197  void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3198    twoByteOpSimd("vmovhlps", VEX_PS, OP2_MOVHLPS_VqUq, src1, src0, dst);
   3199  }
   3200 
   3201  void vmovlhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3202    twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst);
   3203  }
   3204 
   3205  void vpsrldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3206    MOZ_ASSERT(count < 16);
   3207    shiftOpImmSimd("vpsrldq", OP2_PSRLDQ_Vd, ShiftID::vpsrldq, count, src, dst);
   3208  }
   3209 
   3210  void vpslldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3211    MOZ_ASSERT(count < 16);
   3212    shiftOpImmSimd("vpslldq", OP2_PSRLDQ_Vd, ShiftID::vpslldq, count, src, dst);
   3213  }
   3214 
   3215  void vpsllq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3216    MOZ_ASSERT(count < 64);
   3217    shiftOpImmSimd("vpsllq", OP2_PSRLDQ_Vd, ShiftID::vpsllx, count, src, dst);
   3218  }
   3219 
   3220  void vpsllq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3221    twoByteOpSimd("vpsllq", VEX_PD, OP2_PSLLQ_VdqWdq, src1, src0, dst);
   3222  }
   3223 
   3224  void vpsrlq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3225    MOZ_ASSERT(count < 64);
   3226    shiftOpImmSimd("vpsrlq", OP2_PSRLDQ_Vd, ShiftID::vpsrlx, count, src, dst);
   3227  }
   3228 
   3229  void vpsrlq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3230    twoByteOpSimd("vpsrlq", VEX_PD, OP2_PSRLQ_VdqWdq, src1, src0, dst);
   3231  }
   3232 
   3233  void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3234    twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst);
   3235  }
   3236 
   3237  void vpslld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3238    MOZ_ASSERT(count < 32);
   3239    shiftOpImmSimd("vpslld", OP2_PSLLD_UdqIb, ShiftID::vpsllx, count, src, dst);
   3240  }
   3241 
   3242  void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3243    twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst);
   3244  }
   3245 
   3246  void vpsrad_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3247    MOZ_ASSERT(count < 32);
   3248    shiftOpImmSimd("vpsrad", OP2_PSRAD_UdqIb, ShiftID::vpsrad, count, src, dst);
   3249  }
   3250 
   3251  void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3252    twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst);
   3253  }
   3254 
   3255  void vpsrld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3256    MOZ_ASSERT(count < 32);
   3257    shiftOpImmSimd("vpsrld", OP2_PSRLD_UdqIb, ShiftID::vpsrlx, count, src, dst);
   3258  }
   3259 
   3260  void vpsllw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3261    twoByteOpSimd("vpsllw", VEX_PD, OP2_PSLLW_VdqWdq, src1, src0, dst);
   3262  }
   3263 
   3264  void vpsllw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3265    MOZ_ASSERT(count < 16);
   3266    shiftOpImmSimd("vpsllw", OP2_PSLLW_UdqIb, ShiftID::vpsllx, count, src, dst);
   3267  }
   3268 
   3269  void vpsraw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3270    twoByteOpSimd("vpsraw", VEX_PD, OP2_PSRAW_VdqWdq, src1, src0, dst);
   3271  }
   3272 
   3273  void vpsraw_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3274    MOZ_ASSERT(count < 16);
   3275    shiftOpImmSimd("vpsraw", OP2_PSRAW_UdqIb, ShiftID::vpsrad, count, src, dst);
   3276  }
   3277 
   3278  void vpsrlw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3279    twoByteOpSimd("vpsrlw", VEX_PD, OP2_PSRLW_VdqWdq, src1, src0, dst);
   3280  }
   3281 
   3282  void vpsrlw_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst) {
   3283    MOZ_ASSERT(count < 16);
   3284    shiftOpImmSimd("vpsrlw", OP2_PSRLW_UdqIb, ShiftID::vpsrlx, count, src, dst);
   3285  }
   3286 
   3287  void vmovmskpd_rr(XMMRegisterID src, RegisterID dst) {
   3288    twoByteOpSimdInt32("vmovmskpd", VEX_PD, OP2_MOVMSKPD_EdVd, src, dst);
   3289  }
   3290 
   3291  void vmovmskps_rr(XMMRegisterID src, RegisterID dst) {
   3292    twoByteOpSimdInt32("vmovmskps", VEX_PS, OP2_MOVMSKPD_EdVd, src, dst);
   3293  }
   3294 
   3295  void vpmovmskb_rr(XMMRegisterID src, RegisterID dst) {
   3296    twoByteOpSimdInt32("vpmovmskb", VEX_PD, OP2_PMOVMSKB_EdVd, src, dst);
   3297  }
   3298 
   3299  void vptest_rr(XMMRegisterID rhs, XMMRegisterID lhs) {
   3300    threeByteOpSimd("vptest", VEX_PD, OP3_PTEST_VdVd, ESCAPE_38, rhs,
   3301                    invalid_xmm, lhs);
   3302  }
   3303 
   3304  void vmovd_rr(XMMRegisterID src, RegisterID dst) {
   3305    twoByteOpSimdInt32("vmovd", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst,
   3306                       (RegisterID)src);
   3307  }
   3308 
   3309  void vmovd_rr(RegisterID src, XMMRegisterID dst) {
   3310    twoByteOpInt32Simd("vmovd", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst);
   3311  }
   3312 
   3313  void vmovd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3314    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm,
   3315                  dst);
   3316  }
   3317 
   3318  void vmovd_mr(int32_t offset, RegisterID base, RegisterID index,
   3319                int32_t scale, XMMRegisterID dst) {
   3320    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, index, scale,
   3321                  invalid_xmm, dst);
   3322  }
   3323 
   3324  void vmovd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3325    twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base,
   3326                         invalid_xmm, dst);
   3327  }
   3328 
   3329  void vmovd_mr(const void* address, XMMRegisterID dst) {
   3330    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, address, invalid_xmm, dst);
   3331  }
   3332 
   3333  void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3334    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm,
   3335                  src);
   3336  }
   3337 
   3338  void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3339                RegisterID index, int scale) {
   3340    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, index, scale,
   3341                  invalid_xmm, src);
   3342  }
   3343 
   3344  void vmovd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   3345    twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base,
   3346                         invalid_xmm, src);
   3347  }
   3348 
   3349  void vmovd_rm(XMMRegisterID src, const void* address) {
   3350    twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, address, invalid_xmm, src);
   3351  }
   3352 
   3353  void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3354    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm,
   3355                  src);
   3356  }
   3357 
   3358  void vmovsd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   3359    twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base,
   3360                         invalid_xmm, src);
   3361  }
   3362 
   3363  void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3364    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm,
   3365                  src);
   3366  }
   3367 
   3368  void vmovss_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   3369    twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base,
   3370                         invalid_xmm, src);
   3371  }
   3372 
   3373  void vmovss_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3374    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm,
   3375                  dst);
   3376  }
   3377 
   3378  void vmovss_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3379    twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base,
   3380                         invalid_xmm, dst);
   3381  }
   3382 
   3383  void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3384                 RegisterID index, int scale) {
   3385    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, index,
   3386                  scale, invalid_xmm, src);
   3387  }
   3388 
   3389  void vmovss_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3390                 RegisterID index, int scale) {
   3391    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, offset, base, index,
   3392                  scale, invalid_xmm, src);
   3393  }
   3394 
   3395  void vmovss_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3396                 XMMRegisterID dst) {
   3397    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, index,
   3398                  scale, invalid_xmm, dst);
   3399  }
   3400 
   3401  void vmovsd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3402    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, invalid_xmm,
   3403                  dst);
   3404  }
   3405 
   3406  void vmovsd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3407    twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base,
   3408                         invalid_xmm, dst);
   3409  }
   3410 
   3411  void vmovsd_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3412                 XMMRegisterID dst) {
   3413    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, index,
   3414                  scale, invalid_xmm, dst);
   3415  }
   3416 
   3417  // Note that the register-to-register form of vmovsd does not write to the
   3418  // entire output register. For general-purpose register-to-register moves,
   3419  // use vmovapd instead.
   3420  void vmovsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3421    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, src1, src0, dst);
   3422  }
   3423 
   3424  // The register-to-register form of vmovss has the same problem as vmovsd
   3425  // above. Prefer vmovaps for register-to-register moves.
   3426  void vmovss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3427    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, src1, src0, dst);
   3428  }
   3429 
   3430  void vmovsd_mr(const void* address, XMMRegisterID dst) {
   3431    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, address, invalid_xmm,
   3432                  dst);
   3433  }
   3434 
   3435  void vmovss_mr(const void* address, XMMRegisterID dst) {
   3436    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, address, invalid_xmm,
   3437                  dst);
   3438  }
   3439 
   3440  void vmovups_mr(const void* address, XMMRegisterID dst) {
   3441    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, address, invalid_xmm,
   3442                  dst);
   3443  }
   3444 
   3445  void vmovdqu_mr(const void* address, XMMRegisterID dst) {
   3446    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, address, invalid_xmm,
   3447                  dst);
   3448  }
   3449 
   3450  void vmovsd_rm(XMMRegisterID src, const void* address) {
   3451    twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, address, invalid_xmm,
   3452                  src);
   3453  }
   3454 
   3455  void vmovss_rm(XMMRegisterID src, const void* address) {
   3456    twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, address, invalid_xmm,
   3457                  src);
   3458  }
   3459 
   3460  void vmovdqa_rm(XMMRegisterID src, const void* address) {
   3461    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, address, invalid_xmm,
   3462                  src);
   3463  }
   3464 
   3465  void vmovaps_rm(XMMRegisterID src, const void* address) {
   3466    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, address, invalid_xmm,
   3467                  src);
   3468  }
   3469 
   3470  void vmovdqu_rm(XMMRegisterID src, const void* address) {
   3471    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, address, invalid_xmm,
   3472                  src);
   3473  }
   3474 
   3475  void vmovups_rm(XMMRegisterID src, const void* address) {
   3476    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, address, invalid_xmm,
   3477                  src);
   3478  }
   3479 
   3480  void vmovaps_rr(XMMRegisterID src, XMMRegisterID dst) {
   3481 #ifdef JS_CODEGEN_X64
   3482    // There are two opcodes that can encode this instruction. If we have
   3483    // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the
   3484    // opcode which swaps the operands, as that way we can get a two-byte
   3485    // VEX in that case.
   3486    if (src >= xmm8 && dst < xmm8) {
   3487      twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, dst, invalid_xmm,
   3488                    src);
   3489      return;
   3490    }
   3491 #endif
   3492    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, src, invalid_xmm, dst);
   3493  }
   3494  void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3495    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base,
   3496                  invalid_xmm, src);
   3497  }
   3498  void vmovaps_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3499                  RegisterID index, int scale) {
   3500    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, offset, base, index,
   3501                  scale, invalid_xmm, src);
   3502  }
   3503  void vmovaps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3504    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base,
   3505                  invalid_xmm, dst);
   3506  }
   3507  void vmovaps_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3508                  XMMRegisterID dst) {
   3509    twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, index,
   3510                  scale, invalid_xmm, dst);
   3511  }
   3512 
   3513  void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3514    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base,
   3515                  invalid_xmm, src);
   3516  }
   3517  void vmovups_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   3518    twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base,
   3519                         invalid_xmm, src);
   3520  }
   3521  void vmovups_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3522                  RegisterID index, int scale) {
   3523    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, offset, base, index,
   3524                  scale, invalid_xmm, src);
   3525  }
   3526  void vmovups_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3527    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base,
   3528                  invalid_xmm, dst);
   3529  }
   3530  void vmovups_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3531    twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base,
   3532                         invalid_xmm, dst);
   3533  }
   3534  void vmovups_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3535                  XMMRegisterID dst) {
   3536    twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, index,
   3537                  scale, invalid_xmm, dst);
   3538  }
   3539 
   3540  void vmovapd_rr(XMMRegisterID src, XMMRegisterID dst) {
   3541 #ifdef JS_CODEGEN_X64
   3542    // There are two opcodes that can encode this instruction. If we have
   3543    // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the
   3544    // opcode which swaps the operands, as that way we can get a two-byte
   3545    // VEX in that case.
   3546    if (src >= xmm8 && dst < xmm8) {
   3547      twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPS_WsdVsd, dst, invalid_xmm,
   3548                    src);
   3549      return;
   3550    }
   3551 #endif
   3552    twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPD_VsdWsd, src, invalid_xmm, dst);
   3553  }
   3554 
   3555  void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3556    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base,
   3557                  invalid_xmm, src);
   3558  }
   3559 
   3560  void vmovdqu_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base) {
   3561    twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base,
   3562                         invalid_xmm, src);
   3563  }
   3564 
   3565  void vmovdqu_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3566                  RegisterID index, int scale) {
   3567    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, offset, base, index,
   3568                  scale, invalid_xmm, src);
   3569  }
   3570 
   3571  void vmovdqu_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3572    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base,
   3573                  invalid_xmm, dst);
   3574  }
   3575 
   3576  void vmovdqu_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3577    twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base,
   3578                         invalid_xmm, dst);
   3579  }
   3580 
   3581  void vmovdqu_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3582                  XMMRegisterID dst) {
   3583    twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, index,
   3584                  scale, invalid_xmm, dst);
   3585  }
   3586 
   3587  void vmovdqa_rr(XMMRegisterID src, XMMRegisterID dst) {
   3588 #ifdef JS_CODEGEN_X64
   3589    // There are two opcodes that can encode this instruction. If we have
   3590    // one register in [xmm8,xmm15] and one in [xmm0,xmm7], use the
   3591    // opcode which swaps the operands, as that way we can get a two-byte
   3592    // VEX in that case.
   3593    if (src >= xmm8 && dst < xmm8) {
   3594      twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, dst, invalid_xmm, src);
   3595      return;
   3596    }
   3597 #endif
   3598    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, src, invalid_xmm, dst);
   3599  }
   3600 
   3601  void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3602    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base,
   3603                  invalid_xmm, src);
   3604  }
   3605 
   3606  void vmovdqa_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3607                  RegisterID index, int scale) {
   3608    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, offset, base, index,
   3609                  scale, invalid_xmm, src);
   3610  }
   3611 
   3612  void vmovdqa_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   3613    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base,
   3614                  invalid_xmm, dst);
   3615  }
   3616 
   3617  void vmovdqa_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3618                  XMMRegisterID dst) {
   3619    twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, index,
   3620                  scale, invalid_xmm, dst);
   3621  }
   3622 
   3623  void vmulsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3624    twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, src1, src0, dst);
   3625  }
   3626 
   3627  void vmulss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3628    twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, src1, src0, dst);
   3629  }
   3630 
   3631  void vmulsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3632                 XMMRegisterID dst) {
   3633    twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, offset, base, src0, dst);
   3634  }
   3635 
   3636  void vmulss_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3637                 XMMRegisterID dst) {
   3638    twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, offset, base, src0, dst);
   3639  }
   3640 
   3641  void vpinsrw_irr(uint32_t whichWord, RegisterID src1, XMMRegisterID src0,
   3642                   XMMRegisterID dst) {
   3643    MOZ_ASSERT(whichWord < 8);
   3644    twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, whichWord, src1, src0,
   3645                          dst);
   3646  }
   3647  void vpinsrw_imr(unsigned lane, int32_t offset, RegisterID base,
   3648                   XMMRegisterID src0, XMMRegisterID dst) {
   3649    MOZ_ASSERT(lane < 16);
   3650    twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, lane, offset, base,
   3651                          src0, dst);
   3652  }
   3653  void vpinsrw_imr(unsigned lane, int32_t offset, RegisterID base,
   3654                   RegisterID index, int32_t scale, XMMRegisterID src0,
   3655                   XMMRegisterID dst) {
   3656    MOZ_ASSERT(lane < 16);
   3657    twoByteOpImmInt32Simd("vpinsrw", VEX_PD, OP2_PINSRW, lane, offset, base,
   3658                          index, scale, src0, dst);
   3659  }
   3660 
   3661  void vpextrw_irr(uint32_t whichWord, XMMRegisterID src, RegisterID dst) {
   3662    MOZ_ASSERT(whichWord < 8);
   3663    twoByteOpImmSimdInt32("vpextrw", VEX_PD, OP2_PEXTRW_GdUdIb, whichWord, src,
   3664                          dst);
   3665  }
   3666 
   3667  void vpextrw_irm(unsigned lane, XMMRegisterID src, int32_t offset,
   3668                   RegisterID base) {
   3669    MOZ_ASSERT(lane < 8);
   3670    threeByteOpImmSimdInt32("vpextrw", VEX_PD, OP3_PEXTRW_EwVdqIb, ESCAPE_3A,
   3671                            lane, offset, base, (RegisterID)src);
   3672  }
   3673 
   3674  void vpextrw_irm(unsigned lane, XMMRegisterID src, int32_t offset,
   3675                   RegisterID base, RegisterID index, int scale) {
   3676    MOZ_ASSERT(lane < 8);
   3677    threeByteOpImmSimdInt32("vpextrw", VEX_PD, OP3_PEXTRW_EwVdqIb, ESCAPE_3A,
   3678                            lane, offset, base, index, scale, (RegisterID)src);
   3679  }
   3680 
   3681  void vsubsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3682    twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, src1, src0, dst);
   3683  }
   3684 
   3685  void vsubss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3686    twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, src1, src0, dst);
   3687  }
   3688 
   3689  void vsubsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3690                 XMMRegisterID dst) {
   3691    twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, offset, base, src0, dst);
   3692  }
   3693 
   3694  void vsubss_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3695                 XMMRegisterID dst) {
   3696    twoByteOpSimd("vsubss", VEX_SS, OP2_SUBSD_VsdWsd, offset, base, src0, dst);
   3697  }
   3698 
   3699  void vucomiss_rr(XMMRegisterID rhs, XMMRegisterID lhs) {
   3700    twoByteOpSimdFlags("vucomiss", VEX_PS, OP2_UCOMISD_VsdWsd, rhs, lhs);
   3701  }
   3702 
   3703  void vucomisd_rr(XMMRegisterID rhs, XMMRegisterID lhs) {
   3704    twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, rhs, lhs);
   3705  }
   3706 
   3707  void vucomisd_mr(int32_t offset, RegisterID base, XMMRegisterID lhs) {
   3708    twoByteOpSimdFlags("vucomisd", VEX_PD, OP2_UCOMISD_VsdWsd, offset, base,
   3709                       lhs);
   3710  }
   3711 
   3712  void vdivsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3713    twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, src1, src0, dst);
   3714  }
   3715 
   3716  void vdivss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3717    twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, src1, src0, dst);
   3718  }
   3719 
   3720  void vdivsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3721                 XMMRegisterID dst) {
   3722    twoByteOpSimd("vdivsd", VEX_SD, OP2_DIVSD_VsdWsd, offset, base, src0, dst);
   3723  }
   3724 
   3725  void vdivss_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3726                 XMMRegisterID dst) {
   3727    twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, offset, base, src0, dst);
   3728  }
   3729 
   3730  void vxorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3731    twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, src1, src0, dst);
   3732  }
   3733 
   3734  void vxorpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3735    twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, address, src0, dst);
   3736  }
   3737 
   3738  void vorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3739    twoByteOpSimd("vorpd", VEX_PD, OP2_ORPD_VpdWpd, src1, src0, dst);
   3740  }
   3741 
   3742  void vandpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3743    twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, src1, src0, dst);
   3744  }
   3745  void vandpd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3746    twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, address, src0, dst);
   3747  }
   3748 
   3749  void vandps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3750    twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, src1, src0, dst);
   3751  }
   3752 
   3753  void vandps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3754                 XMMRegisterID dst) {
   3755    twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst);
   3756  }
   3757 
   3758  void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3759    twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst);
   3760  }
   3761 
   3762  void vandnps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3763    twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, src1, src0, dst);
   3764  }
   3765 
   3766  void vandnps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3767                  XMMRegisterID dst) {
   3768    twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0,
   3769                  dst);
   3770  }
   3771 
   3772  void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3773    twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst);
   3774  }
   3775 
   3776  void vorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3777    twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, src1, src0, dst);
   3778  }
   3779 
   3780  void vorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3781                XMMRegisterID dst) {
   3782    twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst);
   3783  }
   3784 
   3785  void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3786    twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst);
   3787  }
   3788 
   3789  void vxorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3790    twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, src1, src0, dst);
   3791  }
   3792 
   3793  void vxorps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3794                 XMMRegisterID dst) {
   3795    twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst);
   3796  }
   3797 
   3798  void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   3799    twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst);
   3800  }
   3801 
   3802  void vsqrtsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3803    twoByteOpSimd("vsqrtsd", VEX_SD, OP2_SQRTSD_VsdWsd, src1, src0, dst);
   3804  }
   3805 
   3806  void vsqrtss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   3807    twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst);
   3808  }
   3809 
   3810  void vroundsd_irr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst) {
   3811    threeByteOpImmSimd("vroundsd", VEX_PD, OP3_ROUNDSD_VsdWsd, ESCAPE_3A, mode,
   3812                       src, invalid_xmm, dst);
   3813  }
   3814 
   3815  void vroundss_irr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst) {
   3816    threeByteOpImmSimd("vroundss", VEX_PD, OP3_ROUNDSS_VsdWsd, ESCAPE_3A, mode,
   3817                       src, invalid_xmm, dst);
   3818  }
   3819  void vroundps_irr(SSERoundingMode mode, XMMRegisterID src,
   3820                    XMMRegisterID dst) {
   3821    threeByteOpImmSimd("vroundps", VEX_PD, OP3_ROUNDPS_VpsWps, ESCAPE_3A,
   3822                       int(mode), src, invalid_xmm, dst);
   3823  }
   3824  void vroundpd_irr(SSERoundingMode mode, XMMRegisterID src,
   3825                    XMMRegisterID dst) {
   3826    threeByteOpImmSimd("vroundpd", VEX_PD, OP3_ROUNDPD_VpdWpd, ESCAPE_3A,
   3827                       int(mode), src, invalid_xmm, dst);
   3828  }
   3829 
   3830  void vinsertps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0,
   3831                     XMMRegisterID dst) {
   3832    threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A,
   3833                       mask, src1, src0, dst);
   3834  }
   3835  void vinsertps_imr(uint32_t mask, int32_t offset, RegisterID base,
   3836                     XMMRegisterID src0, XMMRegisterID dst) {
   3837    threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A,
   3838                       mask, offset, base, src0, dst);
   3839  }
   3840  void vinsertps_imr(uint32_t mask, int32_t offset, RegisterID base,
   3841                     RegisterID index, int scale, XMMRegisterID src0,
   3842                     XMMRegisterID dst) {
   3843    threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_3A,
   3844                       mask, offset, base, index, scale, src0, dst);
   3845  }
   3846 
   3847  void vmovlps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3848                  XMMRegisterID dst) {
   3849    twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_VqEq, offset, base, src0, dst);
   3850  }
   3851  void vmovlps_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3852                  XMMRegisterID src0, XMMRegisterID dst) {
   3853    twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_VqEq, offset, base, index,
   3854                  scale, src0, dst);
   3855  }
   3856  void vmovlps_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3857    twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_EqVq, offset, base, invalid_xmm,
   3858                  src);
   3859  }
   3860  void vmovlps_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3861                  RegisterID index, int scale) {
   3862    twoByteOpSimd("vmovlps", VEX_PS, OP2_MOVLPS_EqVq, offset, base, index,
   3863                  scale, invalid_xmm, src);
   3864  }
   3865 
   3866  void vmovhps_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   3867                  XMMRegisterID dst) {
   3868    twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_VqEq, offset, base, src0, dst);
   3869  }
   3870  void vmovhps_mr(int32_t offset, RegisterID base, RegisterID index, int scale,
   3871                  XMMRegisterID src0, XMMRegisterID dst) {
   3872    twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_VqEq, offset, base, index,
   3873                  scale, src0, dst);
   3874  }
   3875 
   3876  void vmovhps_rm(XMMRegisterID src, int32_t offset, RegisterID base) {
   3877    twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_EqVq, offset, base, invalid_xmm,
   3878                  src);
   3879  }
   3880  void vmovhps_rm(XMMRegisterID src, int32_t offset, RegisterID base,
   3881                  RegisterID index, int scale) {
   3882    twoByteOpSimd("vmovhps", VEX_PS, OP2_MOVHPS_EqVq, offset, base, index,
   3883                  scale, invalid_xmm, src);
   3884  }
   3885 
   3886  void vextractps_rm(unsigned lane, XMMRegisterID src, int32_t offset,
   3887                     RegisterID base) {
   3888    threeByteOpImmSimd("vextractps", VEX_PD, OP3_EXTRACTPS_EdVdqIb, ESCAPE_3A,
   3889                       lane, offset, base, invalid_xmm, src);
   3890  }
   3891  void vextractps_rm(unsigned lane, XMMRegisterID src, int32_t offset,
   3892                     RegisterID base, RegisterID index, int scale) {
   3893    threeByteOpImmSimd("vextractps", VEX_PD, OP3_EXTRACTPS_EdVdqIb, ESCAPE_3A,
   3894                       lane, offset, base, index, scale, invalid_xmm, src);
   3895  }
   3896 
   3897  void vpblendw_irr(unsigned mask, XMMRegisterID src1, XMMRegisterID src0,
   3898                    XMMRegisterID dst) {
   3899    MOZ_ASSERT(mask < 256);
   3900    threeByteOpImmSimd("vpblendw", VEX_PD, OP3_PBLENDW_VdqWdqIb, ESCAPE_3A,
   3901                       mask, src1, src0, dst);
   3902  }
   3903 
   3904  void vpblendvb_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0,
   3905                    XMMRegisterID dst) {
   3906    vblendvOpSimd("vpblendvb", OP3_PBLENDVB_VdqWdq, OP3_VPBLENDVB_VdqWdq, mask,
   3907                  src1, src0, dst);
   3908  }
   3909 
   3910  void vpinsrb_irr(unsigned lane, RegisterID src1, XMMRegisterID src0,
   3911                   XMMRegisterID dst) {
   3912    MOZ_ASSERT(lane < 16);
   3913    threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A,
   3914                            lane, src1, src0, dst);
   3915  }
   3916  void vpinsrb_imr(unsigned lane, int32_t offset, RegisterID base,
   3917                   XMMRegisterID src0, XMMRegisterID dst) {
   3918    MOZ_ASSERT(lane < 16);
   3919    threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A,
   3920                            lane, offset, base, src0, dst);
   3921  }
   3922  void vpinsrb_imr(unsigned lane, int32_t offset, RegisterID base,
   3923                   RegisterID index, int32_t scale, XMMRegisterID src0,
   3924                   XMMRegisterID dst) {
   3925    MOZ_ASSERT(lane < 16);
   3926    threeByteOpImmInt32Simd("vpinsrb", VEX_PD, OP3_PINSRB_VdqEvIb, ESCAPE_3A,
   3927                            lane, offset, base, index, scale, src0, dst);
   3928  }
   3929 
   3930  void vpinsrd_irr(unsigned lane, RegisterID src1, XMMRegisterID src0,
   3931                   XMMRegisterID dst) {
   3932    MOZ_ASSERT(lane < 4);
   3933    threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEvIb, ESCAPE_3A,
   3934                            lane, src1, src0, dst);
   3935  }
   3936 
   3937  void vpextrb_irr(unsigned lane, XMMRegisterID src, RegisterID dst) {
   3938    MOZ_ASSERT(lane < 16);
   3939    threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A,
   3940                            lane, (XMMRegisterID)dst, (RegisterID)src);
   3941  }
   3942 
   3943  void vpextrb_irm(unsigned lane, XMMRegisterID src, int32_t offset,
   3944                   RegisterID base) {
   3945    MOZ_ASSERT(lane < 16);
   3946    threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A,
   3947                            lane, offset, base, (RegisterID)src);
   3948  }
   3949 
   3950  void vpextrb_irm(unsigned lane, XMMRegisterID src, int32_t offset,
   3951                   RegisterID base, RegisterID index, int scale) {
   3952    MOZ_ASSERT(lane < 16);
   3953    threeByteOpImmSimdInt32("vpextrb", VEX_PD, OP3_PEXTRB_EvVdqIb, ESCAPE_3A,
   3954                            lane, offset, base, index, scale, (RegisterID)src);
   3955  }
   3956 
   3957  void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst) {
   3958    MOZ_ASSERT(lane < 4);
   3959    threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EvVdqIb, ESCAPE_3A,
   3960                            lane, (XMMRegisterID)dst, (RegisterID)src);
   3961  }
   3962 
   3963  void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0,
   3964                    XMMRegisterID dst) {
   3965    MOZ_ASSERT(imm < 16);
   3966    // Despite being a "ps" instruction, vblendps is encoded with the "pd"
   3967    // prefix.
   3968    threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm,
   3969                       src1, src0, dst);
   3970  }
   3971 
   3972  void vblendps_imr(unsigned imm, int32_t offset, RegisterID base,
   3973                    XMMRegisterID src0, XMMRegisterID dst) {
   3974    MOZ_ASSERT(imm < 16);
   3975    // Despite being a "ps" instruction, vblendps is encoded with the "pd"
   3976    // prefix.
   3977    threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm,
   3978                       offset, base, src0, dst);
   3979  }
   3980 
   3981  void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0,
   3982                    XMMRegisterID dst) {
   3983    vblendvOpSimd("vblendvps", OP3_BLENDVPS_VdqWdq, OP3_VBLENDVPS_VdqWdq, mask,
   3984                  src1, src0, dst);
   3985  }
   3986  void vblendvps_mr(XMMRegisterID mask, int32_t offset, RegisterID base,
   3987                    XMMRegisterID src0, XMMRegisterID dst) {
   3988    vblendvOpSimd("vblendvps", OP3_BLENDVPS_VdqWdq, OP3_VBLENDVPS_VdqWdq, mask,
   3989                  offset, base, src0, dst);
   3990  }
   3991  void vblendvpd_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0,
   3992                    XMMRegisterID dst) {
   3993    vblendvOpSimd("vblendvpd", OP3_BLENDVPD_VdqWdq, OP3_VBLENDVPD_VdqWdq, mask,
   3994                  src1, src0, dst);
   3995  }
   3996 
   3997  void vmovsldup_rr(XMMRegisterID src, XMMRegisterID dst) {
   3998    twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, src, invalid_xmm,
   3999                  dst);
   4000  }
   4001  void vmovsldup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4002    twoByteOpSimd("vmovsldup", VEX_SS, OP2_MOVSLDUP_VpsWps, offset, base,
   4003                  invalid_xmm, dst);
   4004  }
   4005 
   4006  void vmovshdup_rr(XMMRegisterID src, XMMRegisterID dst) {
   4007    twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, src, invalid_xmm,
   4008                  dst);
   4009  }
   4010  void vmovshdup_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4011    twoByteOpSimd("vmovshdup", VEX_SS, OP2_MOVSHDUP_VpsWps, offset, base,
   4012                  invalid_xmm, dst);
   4013  }
   4014 
   4015  void vminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4016    twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, src1, src0, dst);
   4017  }
   4018  void vminsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   4019                 XMMRegisterID dst) {
   4020    twoByteOpSimd("vminsd", VEX_SD, OP2_MINSD_VsdWsd, offset, base, src0, dst);
   4021  }
   4022 
   4023  void vminss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4024    twoByteOpSimd("vminss", VEX_SS, OP2_MINSS_VssWss, src1, src0, dst);
   4025  }
   4026 
   4027  void vmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4028    twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, src1, src0, dst);
   4029  }
   4030  void vmaxsd_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   4031                 XMMRegisterID dst) {
   4032    twoByteOpSimd("vmaxsd", VEX_SD, OP2_MAXSD_VsdWsd, offset, base, src0, dst);
   4033  }
   4034 
   4035  void vmaxss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4036    twoByteOpSimd("vmaxss", VEX_SS, OP2_MAXSS_VssWss, src1, src0, dst);
   4037  }
   4038 
   4039  void vpavgb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4040    twoByteOpSimd("vpavgb", VEX_PD, OP2_PAVGB_VdqWdq, src1, src0, dst);
   4041  }
   4042 
   4043  void vpavgw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4044    twoByteOpSimd("vpavgw", VEX_PD, OP2_PAVGW_VdqWdq, src1, src0, dst);
   4045  }
   4046 
   4047  void vpminsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4048    threeByteOpSimd("vpminsb", VEX_PD, OP3_PMINSB_VdqWdq, ESCAPE_38, src1, src0,
   4049                    dst);
   4050  }
   4051  void vpminsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4052    threeByteOpSimd("vpminsb", VEX_PD, OP3_PMINSB_VdqWdq, ESCAPE_38, address,
   4053                    src0, dst);
   4054  }
   4055 
   4056  void vpmaxsb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4057    threeByteOpSimd("vpmaxsb", VEX_PD, OP3_PMAXSB_VdqWdq, ESCAPE_38, src1, src0,
   4058                    dst);
   4059  }
   4060  void vpmaxsb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4061    threeByteOpSimd("vpmaxsb", VEX_PD, OP3_PMAXSB_VdqWdq, ESCAPE_38, address,
   4062                    src0, dst);
   4063  }
   4064 
   4065  void vpminub_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4066    twoByteOpSimd("vpminub", VEX_PD, OP2_PMINUB_VdqWdq, src1, src0, dst);
   4067  }
   4068  void vpminub_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4069    twoByteOpSimd("vpminub", VEX_PD, OP2_PMINUB_VdqWdq, address, src0, dst);
   4070  }
   4071 
   4072  void vpmaxub_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4073    twoByteOpSimd("vpmaxub", VEX_PD, OP2_PMAXUB_VdqWdq, src1, src0, dst);
   4074  }
   4075  void vpmaxub_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4076    twoByteOpSimd("vpmaxub", VEX_PD, OP2_PMAXUB_VdqWdq, address, src0, dst);
   4077  }
   4078 
   4079  void vpminsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4080    twoByteOpSimd("vpminsw", VEX_PD, OP2_PMINSW_VdqWdq, src1, src0, dst);
   4081  }
   4082  void vpminsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4083    twoByteOpSimd("vpminsw", VEX_PD, OP2_PMINSW_VdqWdq, address, src0, dst);
   4084  }
   4085 
   4086  void vpmaxsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4087    twoByteOpSimd("vpmaxsw", VEX_PD, OP2_PMAXSW_VdqWdq, src1, src0, dst);
   4088  }
   4089  void vpmaxsw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4090    twoByteOpSimd("vpmaxsw", VEX_PD, OP2_PMAXSW_VdqWdq, address, src0, dst);
   4091  }
   4092 
   4093  void vpminuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4094    threeByteOpSimd("vpminuw", VEX_PD, OP3_PMINUW_VdqWdq, ESCAPE_38, src1, src0,
   4095                    dst);
   4096  }
   4097  void vpminuw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4098    threeByteOpSimd("vpminuw", VEX_PD, OP3_PMINUW_VdqWdq, ESCAPE_38, address,
   4099                    src0, dst);
   4100  }
   4101 
   4102  void vpmaxuw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4103    threeByteOpSimd("vpmaxuw", VEX_PD, OP3_PMAXUW_VdqWdq, ESCAPE_38, src1, src0,
   4104                    dst);
   4105  }
   4106  void vpmaxuw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4107    threeByteOpSimd("vpmaxuw", VEX_PD, OP3_PMAXUW_VdqWdq, ESCAPE_38, address,
   4108                    src0, dst);
   4109  }
   4110 
   4111  void vpminsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4112    threeByteOpSimd("vpminsd", VEX_PD, OP3_PMINSD_VdqWdq, ESCAPE_38, src1, src0,
   4113                    dst);
   4114  }
   4115  void vpminsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4116    threeByteOpSimd("vpminsd", VEX_PD, OP3_PMINSD_VdqWdq, ESCAPE_38, address,
   4117                    src0, dst);
   4118  }
   4119 
   4120  void vpmaxsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4121    threeByteOpSimd("vpmaxsd", VEX_PD, OP3_PMAXSD_VdqWdq, ESCAPE_38, src1, src0,
   4122                    dst);
   4123  }
   4124  void vpmaxsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4125    threeByteOpSimd("vpmaxsd", VEX_PD, OP3_PMAXSD_VdqWdq, ESCAPE_38, address,
   4126                    src0, dst);
   4127  }
   4128 
   4129  void vpminud_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4130    threeByteOpSimd("vpminud", VEX_PD, OP3_PMINUD_VdqWdq, ESCAPE_38, src1, src0,
   4131                    dst);
   4132  }
   4133  void vpminud_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4134    threeByteOpSimd("vpminud", VEX_PD, OP3_PMINUD_VdqWdq, ESCAPE_38, address,
   4135                    src0, dst);
   4136  }
   4137 
   4138  void vpmaxud_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4139    threeByteOpSimd("vpmaxud", VEX_PD, OP3_PMAXUD_VdqWdq, ESCAPE_38, src1, src0,
   4140                    dst);
   4141  }
   4142  void vpmaxud_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) {
   4143    threeByteOpSimd("vpmaxud", VEX_PD, OP3_PMAXUD_VdqWdq, ESCAPE_38, address,
   4144                    src0, dst);
   4145  }
   4146 
   4147  void vpacksswb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4148    twoByteOpSimd("vpacksswb", VEX_PD, OP2_PACKSSWB_VdqWdq, src1, src0, dst);
   4149  }
   4150  void vpacksswb_mr(const void* address, XMMRegisterID src0,
   4151                    XMMRegisterID dst) {
   4152    twoByteOpSimd("vpacksswb", VEX_PD, OP2_PACKSSWB_VdqWdq, address, src0, dst);
   4153  }
   4154 
   4155  void vpackuswb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4156    twoByteOpSimd("vpackuswb", VEX_PD, OP2_PACKUSWB_VdqWdq, src1, src0, dst);
   4157  }
   4158  void vpackuswb_mr(const void* address, XMMRegisterID src0,
   4159                    XMMRegisterID dst) {
   4160    twoByteOpSimd("vpackuswb", VEX_PD, OP2_PACKUSWB_VdqWdq, address, src0, dst);
   4161  }
   4162 
   4163  void vpackssdw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4164    twoByteOpSimd("vpackssdw", VEX_PD, OP2_PACKSSDW_VdqWdq, src1, src0, dst);
   4165  }
   4166  void vpackssdw_mr(const void* address, XMMRegisterID src0,
   4167                    XMMRegisterID dst) {
   4168    twoByteOpSimd("vpackssdw", VEX_PD, OP2_PACKSSDW_VdqWdq, address, src0, dst);
   4169  }
   4170 
   4171  void vpackusdw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4172    threeByteOpSimd("vpackusdw", VEX_PD, OP3_PACKUSDW_VdqWdq, ESCAPE_38, src1,
   4173                    src0, dst);
   4174  }
   4175  void vpackusdw_mr(const void* address, XMMRegisterID src0,
   4176                    XMMRegisterID dst) {
   4177    threeByteOpSimd("vpackusdw", VEX_PD, OP3_PACKUSDW_VdqWdq, ESCAPE_38,
   4178                    address, src0, dst);
   4179  }
   4180 
   4181  void vpabsb_rr(XMMRegisterID src, XMMRegisterID dst) {
   4182    threeByteOpSimd("vpabsb", VEX_PD, OP3_PABSB_VdqWdq, ESCAPE_38, src,
   4183                    invalid_xmm, dst);
   4184  }
   4185 
   4186  void vpabsw_rr(XMMRegisterID src, XMMRegisterID dst) {
   4187    threeByteOpSimd("vpabsw", VEX_PD, OP3_PABSW_VdqWdq, ESCAPE_38, src,
   4188                    invalid_xmm, dst);
   4189  }
   4190 
   4191  void vpabsd_rr(XMMRegisterID src, XMMRegisterID dst) {
   4192    threeByteOpSimd("vpabsd", VEX_PD, OP3_PABSD_VdqWdq, ESCAPE_38, src,
   4193                    invalid_xmm, dst);
   4194  }
   4195 
   4196  void vpmovsxbw_rr(XMMRegisterID src, XMMRegisterID dst) {
   4197    threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, src,
   4198                    invalid_xmm, dst);
   4199  }
   4200  void vpmovsxbw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4201    threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, offset,
   4202                    base, invalid_xmm, dst);
   4203  }
   4204  void vpmovsxbw_mr(int32_t offset, RegisterID base, RegisterID index,
   4205                    int32_t scale, XMMRegisterID dst) {
   4206    threeByteOpSimd("vpmovsxbw", VEX_PD, OP3_PMOVSXBW_VdqWdq, ESCAPE_38, offset,
   4207                    base, index, scale, invalid_xmm, dst);
   4208  }
   4209 
   4210  void vpmovzxbw_rr(XMMRegisterID src, XMMRegisterID dst) {
   4211    threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, src,
   4212                    invalid_xmm, dst);
   4213  }
   4214  void vpmovzxbw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4215    threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, offset,
   4216                    base, invalid_xmm, dst);
   4217  }
   4218  void vpmovzxbw_mr(int32_t offset, RegisterID base, RegisterID index,
   4219                    int32_t scale, XMMRegisterID dst) {
   4220    threeByteOpSimd("vpmovzxbw", VEX_PD, OP3_PMOVZXBW_VdqWdq, ESCAPE_38, offset,
   4221                    base, index, scale, invalid_xmm, dst);
   4222  }
   4223 
   4224  void vpmovzxbd_rr(XMMRegisterID src, XMMRegisterID dst) {
   4225    threeByteOpSimd("vpmovzxbd", VEX_PD, OP3_PMOVZXBD_VdqWdq, ESCAPE_38, src,
   4226                    invalid_xmm, dst);
   4227  }
   4228 
   4229  void vpmovzxbq_rr(XMMRegisterID src, XMMRegisterID dst) {
   4230    threeByteOpSimd("vpmovzxbq", VEX_PD, OP3_PMOVZXBQ_VdqWdq, ESCAPE_38, src,
   4231                    invalid_xmm, dst);
   4232  }
   4233 
   4234  void vpmovsxwd_rr(XMMRegisterID src, XMMRegisterID dst) {
   4235    threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, src,
   4236                    invalid_xmm, dst);
   4237  }
   4238  void vpmovsxwd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4239    threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, offset,
   4240                    base, invalid_xmm, dst);
   4241  }
   4242  void vpmovsxwd_mr(int32_t offset, RegisterID base, RegisterID index,
   4243                    int32_t scale, XMMRegisterID dst) {
   4244    threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXWD_VdqWdq, ESCAPE_38, offset,
   4245                    base, index, scale, invalid_xmm, dst);
   4246  }
   4247 
   4248  void vpmovzxwd_rr(XMMRegisterID src, XMMRegisterID dst) {
   4249    threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, src,
   4250                    invalid_xmm, dst);
   4251  }
   4252  void vpmovzxwd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4253    threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, offset,
   4254                    base, invalid_xmm, dst);
   4255  }
   4256  void vpmovzxwd_mr(int32_t offset, RegisterID base, RegisterID index,
   4257                    int32_t scale, XMMRegisterID dst) {
   4258    threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXWD_VdqWdq, ESCAPE_38, offset,
   4259                    base, index, scale, invalid_xmm, dst);
   4260  }
   4261 
   4262  void vpmovzxwq_rr(XMMRegisterID src, XMMRegisterID dst) {
   4263    threeByteOpSimd("vpmovzxwq", VEX_PD, OP3_PMOVZXWQ_VdqWdq, ESCAPE_38, src,
   4264                    invalid_xmm, dst);
   4265  }
   4266 
   4267  void vpmovsxdq_rr(XMMRegisterID src, XMMRegisterID dst) {
   4268    threeByteOpSimd("vpmovsxwd", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, src,
   4269                    invalid_xmm, dst);
   4270  }
   4271  void vpmovsxdq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4272    threeByteOpSimd("vpmovsxdq", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, offset,
   4273                    base, invalid_xmm, dst);
   4274  }
   4275  void vpmovsxdq_mr(int32_t offset, RegisterID base, RegisterID index,
   4276                    int32_t scale, XMMRegisterID dst) {
   4277    threeByteOpSimd("vpmovsxdq", VEX_PD, OP3_PMOVSXDQ_VdqWdq, ESCAPE_38, offset,
   4278                    base, index, scale, invalid_xmm, dst);
   4279  }
   4280 
   4281  void vpmovzxdq_rr(XMMRegisterID src, XMMRegisterID dst) {
   4282    threeByteOpSimd("vpmovzxwd", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, src,
   4283                    invalid_xmm, dst);
   4284  }
   4285  void vpmovzxdq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4286    threeByteOpSimd("vpmovzxdq", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, offset,
   4287                    base, invalid_xmm, dst);
   4288  }
   4289  void vpmovzxdq_mr(int32_t offset, RegisterID base, RegisterID index,
   4290                    int32_t scale, XMMRegisterID dst) {
   4291    threeByteOpSimd("vpmovzxdq", VEX_PD, OP3_PMOVZXDQ_VdqWdq, ESCAPE_38, offset,
   4292                    base, index, scale, invalid_xmm, dst);
   4293  }
   4294 
   4295  void vphaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4296    threeByteOpSimd("vphaddd", VEX_PD, OP3_PHADDD_VdqWdq, ESCAPE_38, src1, src0,
   4297                    dst);
   4298  }
   4299 
   4300  void vpalignr_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0,
   4301                    XMMRegisterID dst) {
   4302    MOZ_ASSERT(imm < 32);
   4303    threeByteOpImmSimd("vpalignr", VEX_PD, OP3_PALIGNR_VdqWdqIb, ESCAPE_3A, imm,
   4304                       src1, src0, dst);
   4305  }
   4306 
   4307  void vpunpcklbw_rr(XMMRegisterID src1, XMMRegisterID src0,
   4308                     XMMRegisterID dst) {
   4309    twoByteOpSimd("vpunpcklbw", VEX_PD, OP2_PUNPCKLBW_VdqWdq, src1, src0, dst);
   4310  }
   4311  void vpunpckhbw_rr(XMMRegisterID src1, XMMRegisterID src0,
   4312                     XMMRegisterID dst) {
   4313    twoByteOpSimd("vpunpckhbw", VEX_PD, OP2_PUNPCKHBW_VdqWdq, src1, src0, dst);
   4314  }
   4315 
   4316  void vpunpckldq_rr(XMMRegisterID src1, XMMRegisterID src0,
   4317                     XMMRegisterID dst) {
   4318    twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, src1, src0, dst);
   4319  }
   4320  void vpunpckldq_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   4321                     XMMRegisterID dst) {
   4322    twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, offset, base,
   4323                  src0, dst);
   4324  }
   4325  void vpunpckldq_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) {
   4326    twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ_VdqWdq, addr, src0, dst);
   4327  }
   4328  void vpunpcklqdq_rr(XMMRegisterID src1, XMMRegisterID src0,
   4329                      XMMRegisterID dst) {
   4330    twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, src1, src0,
   4331                  dst);
   4332  }
   4333  void vpunpcklqdq_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
   4334                      XMMRegisterID dst) {
   4335    twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, offset, base,
   4336                  src0, dst);
   4337  }
   4338  void vpunpcklqdq_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst) {
   4339    twoByteOpSimd("vpunpcklqdq", VEX_PD, OP2_PUNPCKLQDQ_VdqWdq, addr, src0,
   4340                  dst);
   4341  }
   4342  void vpunpckhdq_rr(XMMRegisterID src1, XMMRegisterID src0,
   4343                     XMMRegisterID dst) {
   4344    twoByteOpSimd("vpunpckhdq", VEX_PD, OP2_PUNPCKHDQ_VdqWdq, src1, src0, dst);
   4345  }
   4346  void vpunpckhqdq_rr(XMMRegisterID src1, XMMRegisterID src0,
   4347                      XMMRegisterID dst) {
   4348    twoByteOpSimd("vpunpckhqdq", VEX_PD, OP2_PUNPCKHQDQ_VdqWdq, src1, src0,
   4349                  dst);
   4350  }
   4351  void vpunpcklwd_rr(XMMRegisterID src1, XMMRegisterID src0,
   4352                     XMMRegisterID dst) {
   4353    twoByteOpSimd("vpunpcklwd", VEX_PD, OP2_PUNPCKLWD_VdqWdq, src1, src0, dst);
   4354  }
   4355  void vpunpckhwd_rr(XMMRegisterID src1, XMMRegisterID src0,
   4356                     XMMRegisterID dst) {
   4357    twoByteOpSimd("vpunpckhwd", VEX_PD, OP2_PUNPCKHWD_VdqWdq, src1, src0, dst);
   4358  }
   4359 
   4360  void vpaddq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4361    twoByteOpSimd("vpaddq", VEX_PD, OP2_PADDQ_VdqWdq, src1, src0, dst);
   4362  }
   4363  void vpsubq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4364    twoByteOpSimd("vpsubq", VEX_PD, OP2_PSUBQ_VdqWdq, src1, src0, dst);
   4365  }
   4366 
   4367  void vbroadcastb_rr(XMMRegisterID src, XMMRegisterID dst) {
   4368    threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38, src,
   4369                    invalid_xmm, dst);
   4370  }
   4371  void vbroadcastb_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4372    threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38,
   4373                    offset, base, invalid_xmm, dst);
   4374  }
   4375  void vbroadcastb_mr(int32_t offset, RegisterID base, RegisterID index,
   4376                      int32_t scale, XMMRegisterID dst) {
   4377    threeByteOpSimd("vbroadcastb", VEX_PD, OP3_VBROADCASTB_VxWx, ESCAPE_38,
   4378                    offset, base, index, scale, invalid_xmm, dst);
   4379  }
   4380  void vbroadcastw_rr(XMMRegisterID src, XMMRegisterID dst) {
   4381    threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38, src,
   4382                    invalid_xmm, dst);
   4383  }
   4384  void vbroadcastw_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4385    threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38,
   4386                    offset, base, invalid_xmm, dst);
   4387  }
   4388  void vbroadcastw_mr(int32_t offset, RegisterID base, RegisterID index,
   4389                      int32_t scale, XMMRegisterID dst) {
   4390    threeByteOpSimd("vbroadcastw", VEX_PD, OP3_VBROADCASTW_VxWx, ESCAPE_38,
   4391                    offset, base, index, scale, invalid_xmm, dst);
   4392  }
   4393  void vbroadcastd_rr(XMMRegisterID src, XMMRegisterID dst) {
   4394    threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38, src,
   4395                    invalid_xmm, dst);
   4396  }
   4397  void vbroadcastd_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4398    threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38,
   4399                    offset, base, invalid_xmm, dst);
   4400  }
   4401  void vbroadcastd_mr(int32_t offset, RegisterID base, RegisterID index,
   4402                      int32_t scale, XMMRegisterID dst) {
   4403    threeByteOpSimd("vbroadcastd", VEX_PD, OP3_VBROADCASTD_VxWx, ESCAPE_38,
   4404                    offset, base, index, scale, invalid_xmm, dst);
   4405  }
   4406  void vbroadcastq_rr(XMMRegisterID src, XMMRegisterID dst) {
   4407    threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38, src,
   4408                    invalid_xmm, dst);
   4409  }
   4410  void vbroadcastq_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4411    threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38,
   4412                    offset, base, invalid_xmm, dst);
   4413  }
   4414  void vbroadcastq_mr(int32_t offset, RegisterID base, RegisterID index,
   4415                      int32_t scale, XMMRegisterID dst) {
   4416    threeByteOpSimd("vbroadcastq", VEX_PD, OP3_VBROADCASTQ_VxWx, ESCAPE_38,
   4417                    offset, base, index, scale, invalid_xmm, dst);
   4418  }
   4419  void vbroadcastss_rr(XMMRegisterID src, XMMRegisterID dst) {
   4420    threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38,
   4421                    src, invalid_xmm, dst);
   4422  }
   4423  void vbroadcastss_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
   4424    threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38,
   4425                    offset, base, invalid_xmm, dst);
   4426  }
   4427  void vbroadcastss_mr(int32_t offset, RegisterID base, RegisterID index,
   4428                       int32_t scale, XMMRegisterID dst) {
   4429    threeByteOpSimd("vbroadcastss", VEX_PD, OP3_VBROADCASTSS_VxWd, ESCAPE_38,
   4430                    offset, base, index, scale, invalid_xmm, dst);
   4431  }
   4432 
   4433  void vpsignd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
   4434    threeByteOpSimd("vpsignd", VEX_PD, OP3_PSIGND_PdqQdq, ESCAPE_38, src1, src0,
   4435                    dst);
   4436  }
   4437 
   4438  // F16C instructions:
   4439 
   4440  void vcvtph2ps_rr(XMMRegisterID src, XMMRegisterID dst) {
   4441    spew("vcvtph2ps  %s, %s", XMMRegName(src), XMMRegName(dst));
   4442    m_formatter.threeByteOpVex(VEX_PD, OP3_VCVTPH2PS_VxWxIb, ESCAPE_38,
   4443                               RegisterID(src), invalid_xmm, dst);
   4444  }
   4445 
   4446  void vcvtps2ph_rr(XMMRegisterID src, XMMRegisterID dst) {
   4447    // Use MXCSR.RC for rounding.
   4448    //
   4449    // An explicit rounding mode can be given to this instruction, but using
   4450    // MXCSR.RC is the default option.
   4451    constexpr int8_t roundingMode = 4;
   4452 
   4453    spew("vcvtps2ph  $0x%x, %s, %s", roundingMode, XMMRegName(src),
   4454         XMMRegName(dst));
   4455    m_formatter.threeByteOpVex(VEX_PD, OP3_VCVTPS2PH_WxVxIb, ESCAPE_3A,
   4456                               RegisterID(dst), invalid_xmm, src);
   4457    m_formatter.immediate8(roundingMode);
   4458  }
   4459 
   4460  void vcvtps2ph_rr(XMMRegisterID src, XMMRegisterID dst,
   4461                    RoundingMode roundingMode) {
   4462    spew("vcvtps2ph  $0x%x, %s, %s", roundingMode, XMMRegName(src),
   4463         XMMRegName(dst));
   4464    m_formatter.threeByteOpVex(VEX_PD, OP3_VCVTPS2PH_WxVxIb, ESCAPE_3A,
   4465                               RegisterID(dst), invalid_xmm, src);
   4466    m_formatter.immediate8(roundingMode);
   4467  }
   4468 
   4469  // BMI instructions:
   4470 
   4471  void sarxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) {
   4472    spew("sarxl      %s, %s, %s", GPReg32Name(src), GPReg32Name(shift),
   4473         GPReg32Name(dst));
   4474 
   4475    RegisterID rm = src;
   4476    XMMRegisterID src0 = static_cast<XMMRegisterID>(shift);
   4477    int reg = dst;
   4478    m_formatter.threeByteOpVex(VEX_SS /* = F3 */, OP3_SARX_GyEyBy, ESCAPE_38,
   4479                               rm, src0, reg);
   4480  }
   4481 
   4482  void shlxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) {
   4483    spew("shlxl      %s, %s, %s", GPReg32Name(src), GPReg32Name(shift),
   4484         GPReg32Name(dst));
   4485 
   4486    RegisterID rm = src;
   4487    XMMRegisterID src0 = static_cast<XMMRegisterID>(shift);
   4488    int reg = dst;
   4489    m_formatter.threeByteOpVex(VEX_PD /* = 66 */, OP3_SHLX_GyEyBy, ESCAPE_38,
   4490                               rm, src0, reg);
   4491  }
   4492 
   4493  void shrxl_rrr(RegisterID src, RegisterID shift, RegisterID dst) {
   4494    spew("shrxl      %s, %s, %s", GPReg32Name(src), GPReg32Name(shift),
   4495         GPReg32Name(dst));
   4496 
   4497    RegisterID rm = src;
   4498    XMMRegisterID src0 = static_cast<XMMRegisterID>(shift);
   4499    int reg = dst;
   4500    m_formatter.threeByteOpVex(VEX_SD /* = F2 */, OP3_SHRX_GyEyBy, ESCAPE_38,
   4501                               rm, src0, reg);
   4502  }
   4503 
   4504  void andnl_rrr(RegisterID src1, RegisterID src2, RegisterID dst) {
   4505    spew("andnl      %s, %s, %s", GPReg32Name(src1), GPReg32Name(src2),
   4506         GPReg32Name(dst));
   4507 
   4508    RegisterID rm = src2;
   4509    XMMRegisterID src0 = static_cast<XMMRegisterID>(src1);
   4510    int reg = dst;
   4511    m_formatter.threeByteOpVex(VEX_PS, OP3_ANDN_GyByEy, ESCAPE_38, rm, src0,
   4512                               reg);
   4513  }
   4514 
   4515  // FMA instructions:
   4516 
   4517  void vfmadd231ps_rrr(XMMRegisterID src1, XMMRegisterID src0,
   4518                       XMMRegisterID dst) {
   4519    spew("vfmadd213ps %s, %s, %s", XMMRegName(src1), XMMRegName(src0),
   4520         XMMRegName(dst));
   4521 
   4522    m_formatter.threeByteOpVex(VEX_PD, OP3_VFMADD231PS_VxHxWx, ESCAPE_38,
   4523                               (RegisterID)src1, src0, (RegisterID)dst);
   4524  }
   4525 
   4526  void vfnmadd231ps_rrr(XMMRegisterID src1, XMMRegisterID src0,
   4527                        XMMRegisterID dst) {
   4528    spew("vfnmadd213ps %s, %s, %s", XMMRegName(src1), XMMRegName(src0),
   4529         XMMRegName(dst));
   4530 
   4531    m_formatter.threeByteOpVex(VEX_PD, OP3_VFNMADD231PS_VxHxWx, ESCAPE_38,
   4532                               (RegisterID)src1, src0, (RegisterID)dst);
   4533  }
   4534 
   4535  void vfmadd231pd_rrr(XMMRegisterID src1, XMMRegisterID src0,
   4536                       XMMRegisterID dst) {
   4537    spew("vfmadd213pd %s, %s, %s", XMMRegName(src1), XMMRegName(src0),
   4538         XMMRegName(dst));
   4539 
   4540    m_formatter.threeByteOpVex64(VEX_PD, OP3_VFMADD231PD_VxHxWx, ESCAPE_38,
   4541                                 (RegisterID)src1, src0, (RegisterID)dst);
   4542  }
   4543 
   4544  void vfnmadd231pd_rrr(XMMRegisterID src1, XMMRegisterID src0,
   4545                        XMMRegisterID dst) {
   4546    spew("vfnmadd213pd %s, %s, %s", XMMRegName(src1), XMMRegName(src0),
   4547         XMMRegName(dst));
   4548 
   4549    m_formatter.threeByteOpVex64(VEX_PD, OP3_VFNMADD231PD_VxHxWx, ESCAPE_38,
   4550                                 (RegisterID)src1, src0, (RegisterID)dst);
   4551  }
   4552 
   4553  // Misc instructions:
   4554 
   4555  void int3() {
   4556    spew("int3");
   4557    m_formatter.oneByteOp(OP_INT3);
   4558  }
   4559 
   4560  void ud2() {
   4561    spew("ud2");
   4562    m_formatter.twoByteOp(OP2_UD2);
   4563  }
   4564 
   4565  void ret() {
   4566    spew("ret");
   4567    m_formatter.oneByteOp(OP_RET);
   4568  }
   4569 
   4570  void ret_i(int32_t imm) {
   4571    spew("ret        $%d", imm);
   4572    m_formatter.oneByteOp(OP_RET_Iz);
   4573    m_formatter.immediate16u(imm);
   4574  }
   4575 
   4576  void lfence() {
   4577    spew("lfence");
   4578    m_formatter.twoByteOp(OP_FENCE, (RegisterID)0, 0b101);
   4579  }
   4580  void mfence() {
   4581    spew("mfence");
   4582    m_formatter.twoByteOp(OP_FENCE, (RegisterID)0, 0b110);
   4583  }
   4584 
   4585  void pause() {
   4586    spew("pause");
   4587    m_formatter.oneByteOp(PRE_REP);
   4588    m_formatter.oneByteOp(OP_NOP);
   4589  }
   4590 
   4591  // Assembler admin methods:
   4592 
   4593  JmpDst label() {
   4594    JmpDst r = JmpDst(m_formatter.size());
   4595    spew(".set .Llabel%d, .", r.offset());
   4596    return r;
   4597  }
   4598 
   4599  size_t currentOffset() const { return m_formatter.size(); }
   4600 
   4601  static JmpDst labelFor(JmpSrc jump, intptr_t offset = 0) {
   4602    return JmpDst(jump.offset() + offset);
   4603  }
   4604 
   4605  void haltingAlign(int alignment) {
   4606    spew(".balign %d, 0x%x   # hlt", alignment, unsigned(OP_HLT));
   4607    while (!m_formatter.isAligned(alignment)) {
   4608      m_formatter.oneByteOp(OP_HLT);
   4609    }
   4610  }
   4611 
   4612  void nopAlign(int alignment) {
   4613    spew(".balign %d", alignment);
   4614 
   4615    int remainder = m_formatter.size() % alignment;
   4616    if (remainder > 0) {
   4617      insert_nop(alignment - remainder);
   4618    }
   4619  }
   4620 
   4621  void jumpTablePointer(uintptr_t ptr) {
   4622 #ifdef JS_CODEGEN_X64
   4623    spew(".quad 0x%" PRIxPTR, ptr);
   4624 #else
   4625    spew(".int 0x%" PRIxPTR, ptr);
   4626 #endif
   4627    m_formatter.jumpTablePointer(ptr);
   4628  }
   4629 
   4630  void doubleConstant(double d) {
   4631    spew(".double %.16g", d);
   4632    m_formatter.doubleConstant(d);
   4633  }
   4634  void floatConstant(float f) {
   4635    spew(".float %.16g", f);
   4636    m_formatter.floatConstant(f);
   4637  }
   4638 
   4639  void simd128Constant(const void* data) {
   4640    const uint32_t* dw = reinterpret_cast<const uint32_t*>(data);
   4641    spew(".int 0x%08x,0x%08x,0x%08x,0x%08x", dw[0], dw[1], dw[2], dw[3]);
   4642    MOZ_ASSERT(m_formatter.isAligned(16));
   4643    m_formatter.simd128Constant(data);
   4644  }
   4645 
   4646  void int32Constant(int32_t i) {
   4647    spew(".int %d", i);
   4648    m_formatter.int32Constant(i);
   4649  }
   4650  void int64Constant(int64_t i) {
   4651    spew(".quad %lld", (long long)i);
   4652    m_formatter.int64Constant(i);
   4653  }
   4654 
   4655  // Linking & patching:
   4656 
   4657  void assertValidJmpSrc(JmpSrc src) {
   4658    // The target offset is stored at offset - 4.
   4659    MOZ_RELEASE_ASSERT(src.offset() > int32_t(sizeof(int32_t)));
   4660    MOZ_RELEASE_ASSERT(size_t(src.offset()) <= size());
   4661  }
   4662 
   4663  bool nextJump(const JmpSrc& from, JmpSrc* next) {
   4664    // Sanity check - if the assembler has OOM'd, it will start overwriting
   4665    // its internal buffer and thus our links could be garbage.
   4666    if (oom()) {
   4667      return false;
   4668    }
   4669 
   4670    assertValidJmpSrc(from);
   4671    MOZ_ASSERT(from.trailing() == 0);
   4672 
   4673    const unsigned char* code = m_formatter.data();
   4674    int32_t offset = GetInt32(code + from.offset());
   4675    if (offset == -1) {
   4676      return false;
   4677    }
   4678 
   4679    MOZ_RELEASE_ASSERT(size_t(offset) < size(), "nextJump bogus offset");
   4680 
   4681    *next = JmpSrc(offset);
   4682    return true;
   4683  }
   4684  void setNextJump(const JmpSrc& from, const JmpSrc& to) {
   4685    // Sanity check - if the assembler has OOM'd, it will start overwriting
   4686    // its internal buffer and thus our links could be garbage.
   4687    if (oom()) {
   4688      return;
   4689    }
   4690 
   4691    assertValidJmpSrc(from);
   4692    MOZ_ASSERT(from.trailing() == 0);
   4693    MOZ_RELEASE_ASSERT(to.offset() == -1 || size_t(to.offset()) <= size());
   4694 
   4695    unsigned char* code = m_formatter.data();
   4696    SetInt32(code + from.offset(), to.offset());
   4697  }
   4698 
   4699  void linkJump(JmpSrc from, JmpDst to) {
   4700    MOZ_ASSERT(from.offset() != -1);
   4701    MOZ_ASSERT(to.offset() != -1);
   4702 
   4703    // Sanity check - if the assembler has OOM'd, it will start overwriting
   4704    // its internal buffer and thus our links could be garbage.
   4705    if (oom()) {
   4706      return;
   4707    }
   4708 
   4709    assertValidJmpSrc(from);
   4710    MOZ_RELEASE_ASSERT(size_t(to.offset()) <= size());
   4711 
   4712    spew(".set .Lfrom%d, .Llabel%d", from.offset(), to.offset());
   4713    unsigned char* code = m_formatter.data();
   4714    SetRel32(code + from.offset(), code + to.offset(), from.trailing());
   4715  }
   4716 
   4717  void executableCopy(void* dst) {
   4718    const unsigned char* src = m_formatter.buffer();
   4719    memcpy(dst, src, size());
   4720  }
   4721  [[nodiscard]] bool appendRawCode(const uint8_t* code, size_t numBytes) {
   4722    return m_formatter.append(code, numBytes);
   4723  }
   4724 
   4725  // `offset` is the instruction offset at the end of the instruction.
   4726  void addToPCRel4(uint32_t offset, int32_t bias) {
   4727    unsigned char* code = m_formatter.data();
   4728    SetInt32(code + offset, GetInt32(code + offset) + bias);
   4729  }
   4730 
   4731 protected:
   4732  static bool CAN_SIGN_EXTEND_8_32(int32_t value) {
   4733    return value == (int32_t)(int8_t)value;
   4734  }
   4735  static bool CAN_SIGN_EXTEND_16_32(int32_t value) {
   4736    return value == (int32_t)(int16_t)value;
   4737  }
   4738  static bool CAN_ZERO_EXTEND_8_32(int32_t value) {
   4739    return value == (int32_t)(uint8_t)value;
   4740  }
   4741  static bool CAN_ZERO_EXTEND_8H_32(int32_t value) {
   4742    return value == (value & 0xff00);
   4743  }
   4744  static bool CAN_ZERO_EXTEND_16_32(int32_t value) {
   4745    return value == (int32_t)(uint16_t)value;
   4746  }
   4747  static bool CAN_ZERO_EXTEND_32_64(int32_t value) { return value >= 0; }
   4748 
   4749  // Methods for encoding SIMD instructions via either legacy SSE encoding or
   4750  // VEX encoding.
   4751 
   4752  bool useLegacySSEEncoding(XMMRegisterID src0, XMMRegisterID dst) {
   4753    // If we don't have AVX or it's disabled, use the legacy SSE encoding.
   4754    if (!useVEX_) {
   4755      MOZ_ASSERT(
   4756          src0 == invalid_xmm || src0 == dst,
   4757          "Legacy SSE (pre-AVX) encoding requires the output register to be "
   4758          "the same as the src0 input register");
   4759      return true;
   4760    }
   4761 
   4762    // If src0 is the same as the output register, we might as well use
   4763    // the legacy SSE encoding, since it is smaller. However, this is only
   4764    // beneficial as long as we're not using ymm registers anywhere.
   4765    return src0 == dst;
   4766  }
   4767 
   4768  bool useLegacySSEEncodingForVblendv(XMMRegisterID mask, XMMRegisterID src0,
   4769                                      XMMRegisterID dst) {
   4770    // Similar to useLegacySSEEncoding, but for vblendv the Legacy SSE
   4771    // encoding also requires the mask to be in xmm0.
   4772 
   4773    if (!useVEX_) {
   4774      MOZ_ASSERT(
   4775          src0 == dst,
   4776          "Legacy SSE (pre-AVX) encoding requires the output register to be "
   4777          "the same as the src0 input register");
   4778      MOZ_ASSERT(
   4779          mask == xmm0,
   4780          "Legacy SSE (pre-AVX) encoding for blendv requires the mask to be "
   4781          "in xmm0");
   4782      return true;
   4783    }
   4784 
   4785    return src0 == dst && mask == xmm0;
   4786  }
   4787 
   4788  bool useLegacySSEEncodingAlways() { return !useVEX_; }
   4789 
   4790  const char* legacySSEOpName(const char* name) {
   4791    MOZ_ASSERT(name[0] == 'v');
   4792    return name + 1;
   4793  }
   4794 
   4795  void twoByteOpSimd(const char* name, VexOperandType ty,
   4796                     TwoByteOpcodeID opcode, XMMRegisterID rm,
   4797                     XMMRegisterID src0, XMMRegisterID dst) {
   4798    if (useLegacySSEEncoding(src0, dst)) {
   4799      if (IsXMMReversedOperands(opcode)) {
   4800        spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst),
   4801             XMMRegName(rm));
   4802      } else {
   4803        spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm),
   4804             XMMRegName(dst));
   4805      }
   4806      m_formatter.legacySSEPrefix(ty);
   4807      m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
   4808      return;
   4809    }
   4810 
   4811    if (src0 == invalid_xmm) {
   4812      if (IsXMMReversedOperands(opcode)) {
   4813        spew("%-11s%s, %s", name, XMMRegName(dst), XMMRegName(rm));
   4814      } else {
   4815        spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(dst));
   4816      }
   4817    } else {
   4818      spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0),
   4819           XMMRegName(dst));
   4820    }
   4821    m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
   4822  }
   4823 
   4824  void twoByteOpImmSimd(const char* name, VexOperandType ty,
   4825                        TwoByteOpcodeID opcode, uint32_t imm, XMMRegisterID rm,
   4826                        XMMRegisterID src0, XMMRegisterID dst) {
   4827    if (useLegacySSEEncoding(src0, dst)) {
   4828      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm),
   4829           XMMRegName(dst));
   4830      m_formatter.legacySSEPrefix(ty);
   4831      m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
   4832      m_formatter.immediate8u(imm);
   4833      return;
   4834    }
   4835 
   4836    if (src0 == invalid_xmm) {
   4837      spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), XMMRegName(dst));
   4838    } else {
   4839      spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm),
   4840           XMMRegName(src0), XMMRegName(dst));
   4841    }
   4842    m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
   4843    m_formatter.immediate8u(imm);
   4844  }
   4845 
   4846  void twoByteOpSimd(const char* name, VexOperandType ty,
   4847                     TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   4848                     XMMRegisterID src0, XMMRegisterID dst) {
   4849    if (useLegacySSEEncoding(src0, dst)) {
   4850      if (IsXMMReversedOperands(opcode)) {
   4851        spew("%-11s%s, " MEM_ob, legacySSEOpName(name), XMMRegName(dst),
   4852             ADDR_ob(offset, base));
   4853      } else {
   4854        spew("%-11s" MEM_ob ", %s", legacySSEOpName(name),
   4855             ADDR_ob(offset, base), XMMRegName(dst));
   4856      }
   4857      m_formatter.legacySSEPrefix(ty);
   4858      m_formatter.twoByteOp(opcode, offset, base, dst);
   4859      return;
   4860    }
   4861 
   4862    if (src0 == invalid_xmm) {
   4863      if (IsXMMReversedOperands(opcode)) {
   4864        spew("%-11s%s, " MEM_ob, name, XMMRegName(dst), ADDR_ob(offset, base));
   4865      } else {
   4866        spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base),
   4867             XMMRegName(dst));
   4868      }
   4869    } else {
   4870      spew("%-11s" MEM_ob ", %s, %s", name, ADDR_ob(offset, base),
   4871           XMMRegName(src0), XMMRegName(dst));
   4872    }
   4873    m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst);
   4874  }
   4875 
   4876  void twoByteOpSimd_disp32(const char* name, VexOperandType ty,
   4877                            TwoByteOpcodeID opcode, int32_t offset,
   4878                            RegisterID base, XMMRegisterID src0,
   4879                            XMMRegisterID dst) {
   4880    if (useLegacySSEEncoding(src0, dst)) {
   4881      if (IsXMMReversedOperands(opcode)) {
   4882        spew("%-11s%s, " MEM_o32b, legacySSEOpName(name), XMMRegName(dst),
   4883             ADDR_o32b(offset, base));
   4884      } else {
   4885        spew("%-11s" MEM_o32b ", %s", legacySSEOpName(name),
   4886             ADDR_o32b(offset, base), XMMRegName(dst));
   4887      }
   4888      m_formatter.legacySSEPrefix(ty);
   4889      m_formatter.twoByteOp_disp32(opcode, offset, base, dst);
   4890      return;
   4891    }
   4892 
   4893    if (src0 == invalid_xmm) {
   4894      if (IsXMMReversedOperands(opcode)) {
   4895        spew("%-11s%s, " MEM_o32b, name, XMMRegName(dst),
   4896             ADDR_o32b(offset, base));
   4897      } else {
   4898        spew("%-11s" MEM_o32b ", %s", name, ADDR_o32b(offset, base),
   4899             XMMRegName(dst));
   4900      }
   4901    } else {
   4902      spew("%-11s" MEM_o32b ", %s, %s", name, ADDR_o32b(offset, base),
   4903           XMMRegName(src0), XMMRegName(dst));
   4904    }
   4905    m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst);
   4906  }
   4907 
   4908  void twoByteOpImmSimd(const char* name, VexOperandType ty,
   4909                        TwoByteOpcodeID opcode, uint32_t imm, int32_t offset,
   4910                        RegisterID base, XMMRegisterID src0,
   4911                        XMMRegisterID dst) {
   4912    if (useLegacySSEEncoding(src0, dst)) {
   4913      spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
   4914           ADDR_ob(offset, base), XMMRegName(dst));
   4915      m_formatter.legacySSEPrefix(ty);
   4916      m_formatter.twoByteOp(opcode, offset, base, dst);
   4917      m_formatter.immediate8u(imm);
   4918      return;
   4919    }
   4920 
   4921    spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
   4922         XMMRegName(src0), XMMRegName(dst));
   4923    m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst);
   4924    m_formatter.immediate8u(imm);
   4925  }
   4926 
   4927  void twoByteOpSimd(const char* name, VexOperandType ty,
   4928                     TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   4929                     RegisterID index, int scale, XMMRegisterID src0,
   4930                     XMMRegisterID dst) {
   4931    if (useLegacySSEEncoding(src0, dst)) {
   4932      if (IsXMMReversedOperands(opcode)) {
   4933        spew("%-11s%s, " MEM_obs, legacySSEOpName(name), XMMRegName(dst),
   4934             ADDR_obs(offset, base, index, scale));
   4935      } else {
   4936        spew("%-11s" MEM_obs ", %s", legacySSEOpName(name),
   4937             ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   4938      }
   4939      m_formatter.legacySSEPrefix(ty);
   4940      m_formatter.twoByteOp(opcode, offset, base, index, scale, dst);
   4941      return;
   4942    }
   4943 
   4944    if (src0 == invalid_xmm) {
   4945      if (IsXMMReversedOperands(opcode)) {
   4946        spew("%-11s%s, " MEM_obs, name, XMMRegName(dst),
   4947             ADDR_obs(offset, base, index, scale));
   4948      } else {
   4949        spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale),
   4950             XMMRegName(dst));
   4951      }
   4952    } else {
   4953      spew("%-11s" MEM_obs ", %s, %s", name,
   4954           ADDR_obs(offset, base, index, scale), XMMRegName(src0),
   4955           XMMRegName(dst));
   4956    }
   4957    m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst);
   4958  }
   4959 
   4960  void twoByteOpSimd(const char* name, VexOperandType ty,
   4961                     TwoByteOpcodeID opcode, const void* address,
   4962                     XMMRegisterID src0, XMMRegisterID dst) {
   4963    if (useLegacySSEEncoding(src0, dst)) {
   4964      if (IsXMMReversedOperands(opcode)) {
   4965        spew("%-11s%s, %p", legacySSEOpName(name), XMMRegName(dst), address);
   4966      } else {
   4967        spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst));
   4968      }
   4969      m_formatter.legacySSEPrefix(ty);
   4970      m_formatter.twoByteOp(opcode, address, dst);
   4971      return;
   4972    }
   4973 
   4974    if (src0 == invalid_xmm) {
   4975      if (IsXMMReversedOperands(opcode)) {
   4976        spew("%-11s%s, %p", name, XMMRegName(dst), address);
   4977      } else {
   4978        spew("%-11s%p, %s", name, address, XMMRegName(dst));
   4979      }
   4980    } else {
   4981      spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst));
   4982    }
   4983    m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
   4984  }
   4985 
   4986  void twoByteOpImmSimd(const char* name, VexOperandType ty,
   4987                        TwoByteOpcodeID opcode, uint32_t imm,
   4988                        const void* address, XMMRegisterID src0,
   4989                        XMMRegisterID dst) {
   4990    if (useLegacySSEEncoding(src0, dst)) {
   4991      spew("%-11s$0x%x, %p, %s", legacySSEOpName(name), imm, address,
   4992           XMMRegName(dst));
   4993      m_formatter.legacySSEPrefix(ty);
   4994      m_formatter.twoByteOp(opcode, address, dst);
   4995      m_formatter.immediate8u(imm);
   4996      return;
   4997    }
   4998 
   4999    spew("%-11s$0x%x, %p, %s, %s", name, imm, address, XMMRegName(src0),
   5000         XMMRegName(dst));
   5001    m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
   5002    m_formatter.immediate8u(imm);
   5003  }
   5004 
   5005  void twoByteOpInt32Simd(const char* name, VexOperandType ty,
   5006                          TwoByteOpcodeID opcode, RegisterID rm,
   5007                          XMMRegisterID src0, XMMRegisterID dst) {
   5008    if (useLegacySSEEncoding(src0, dst)) {
   5009      if (IsXMMReversedOperands(opcode)) {
   5010        spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(dst),
   5011             GPReg32Name(rm));
   5012      } else {
   5013        spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(rm),
   5014             XMMRegName(dst));
   5015      }
   5016      m_formatter.legacySSEPrefix(ty);
   5017      m_formatter.twoByteOp(opcode, rm, dst);
   5018      return;
   5019    }
   5020 
   5021    if (src0 == invalid_xmm) {
   5022      if (IsXMMReversedOperands(opcode)) {
   5023        spew("%-11s%s, %s", name, XMMRegName(dst), GPReg32Name(rm));
   5024      } else {
   5025        spew("%-11s%s, %s", name, GPReg32Name(rm), XMMRegName(dst));
   5026      }
   5027    } else {
   5028      spew("%-11s%s, %s, %s", name, GPReg32Name(rm), XMMRegName(src0),
   5029           XMMRegName(dst));
   5030    }
   5031    m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst);
   5032  }
   5033 
   5034  void twoByteOpSimdInt32(const char* name, VexOperandType ty,
   5035                          TwoByteOpcodeID opcode, XMMRegisterID rm,
   5036                          RegisterID dst) {
   5037    if (useLegacySSEEncodingAlways()) {
   5038      if (IsXMMReversedOperands(opcode)) {
   5039        spew("%-11s%s, %s", legacySSEOpName(name), GPReg32Name(dst),
   5040             XMMRegName(rm));
   5041      } else if (opcode == OP2_MOVD_EdVd) {
   5042        spew("%-11s%s, %s", legacySSEOpName(name),
   5043             XMMRegName((XMMRegisterID)dst), GPReg32Name((RegisterID)rm));
   5044      } else {
   5045        spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm),
   5046             GPReg32Name(dst));
   5047      }
   5048      m_formatter.legacySSEPrefix(ty);
   5049      m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
   5050      return;
   5051    }
   5052 
   5053    if (IsXMMReversedOperands(opcode)) {
   5054      spew("%-11s%s, %s", name, GPReg32Name(dst), XMMRegName(rm));
   5055    } else if (opcode == OP2_MOVD_EdVd) {
   5056      spew("%-11s%s, %s", name, XMMRegName((XMMRegisterID)dst),
   5057           GPReg32Name((RegisterID)rm));
   5058    } else {
   5059      spew("%-11s%s, %s", name, XMMRegName(rm), GPReg32Name(dst));
   5060    }
   5061    m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst);
   5062  }
   5063 
   5064  void twoByteOpImmSimdInt32(const char* name, VexOperandType ty,
   5065                             TwoByteOpcodeID opcode, uint32_t imm,
   5066                             XMMRegisterID rm, RegisterID dst) {
   5067    if (useLegacySSEEncodingAlways()) {
   5068      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm),
   5069           GPReg32Name(dst));
   5070      m_formatter.legacySSEPrefix(ty);
   5071      m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
   5072      m_formatter.immediate8u(imm);
   5073      return;
   5074    }
   5075 
   5076    spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), GPReg32Name(dst));
   5077    m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm, dst);
   5078    m_formatter.immediate8u(imm);
   5079  }
   5080 
   5081  void twoByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5082                             TwoByteOpcodeID opcode, uint32_t imm,
   5083                             RegisterID rm, XMMRegisterID src0,
   5084                             XMMRegisterID dst) {
   5085    if (useLegacySSEEncodingAlways()) {
   5086      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(rm),
   5087           XMMRegName(dst));
   5088      m_formatter.legacySSEPrefix(ty);
   5089      m_formatter.twoByteOp(opcode, rm, dst);
   5090      m_formatter.immediate8u(imm);
   5091      return;
   5092    }
   5093 
   5094    spew("%-11s$0x%x, %s, %s", name, imm, GPReg32Name(rm), XMMRegName(dst));
   5095    m_formatter.twoByteOpVex(ty, opcode, rm, src0, dst);
   5096    m_formatter.immediate8u(imm);
   5097  }
   5098 
   5099  void twoByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5100                             TwoByteOpcodeID opcode, uint32_t imm,
   5101                             int32_t offset, RegisterID base,
   5102                             XMMRegisterID src0, XMMRegisterID dst) {
   5103    if (useLegacySSEEncodingAlways()) {
   5104      spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
   5105           ADDR_ob(offset, base), XMMRegName(dst));
   5106      m_formatter.legacySSEPrefix(ty);
   5107      m_formatter.twoByteOp(opcode, offset, base, dst);
   5108      m_formatter.immediate8u(imm);
   5109      return;
   5110    }
   5111 
   5112    spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
   5113         XMMRegName(src0), XMMRegName(dst));
   5114    m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst);
   5115    m_formatter.immediate8u(imm);
   5116  }
   5117 
   5118  void twoByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5119                             TwoByteOpcodeID opcode, uint32_t imm,
   5120                             int32_t offset, RegisterID base, RegisterID index,
   5121                             int scale, XMMRegisterID src0, XMMRegisterID dst) {
   5122    if (useLegacySSEEncodingAlways()) {
   5123      spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm,
   5124           ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   5125      m_formatter.legacySSEPrefix(ty);
   5126      m_formatter.twoByteOp(opcode, offset, base, index, scale, dst);
   5127      m_formatter.immediate8u(imm);
   5128      return;
   5129    }
   5130 
   5131    spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm,
   5132         ADDR_obs(offset, base, index, scale), XMMRegName(src0),
   5133         XMMRegName(dst));
   5134    m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst);
   5135    m_formatter.immediate8u(imm);
   5136  }
   5137 
   5138  void twoByteOpSimdFlags(const char* name, VexOperandType ty,
   5139                          TwoByteOpcodeID opcode, XMMRegisterID rm,
   5140                          XMMRegisterID reg) {
   5141    if (useLegacySSEEncodingAlways()) {
   5142      spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm),
   5143           XMMRegName(reg));
   5144      m_formatter.legacySSEPrefix(ty);
   5145      m_formatter.twoByteOp(opcode, (RegisterID)rm, reg);
   5146      return;
   5147    }
   5148 
   5149    spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(reg));
   5150    m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, invalid_xmm,
   5151                             (XMMRegisterID)reg);
   5152  }
   5153 
   5154  void twoByteOpSimdFlags(const char* name, VexOperandType ty,
   5155                          TwoByteOpcodeID opcode, int32_t offset,
   5156                          RegisterID base, XMMRegisterID reg) {
   5157    if (useLegacySSEEncodingAlways()) {
   5158      spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base),
   5159           XMMRegName(reg));
   5160      m_formatter.legacySSEPrefix(ty);
   5161      m_formatter.twoByteOp(opcode, offset, base, reg);
   5162      return;
   5163    }
   5164 
   5165    spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), XMMRegName(reg));
   5166    m_formatter.twoByteOpVex(ty, opcode, offset, base, invalid_xmm,
   5167                             (XMMRegisterID)reg);
   5168  }
   5169 
   5170  void threeByteOpSimd(const char* name, VexOperandType ty,
   5171                       ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5172                       XMMRegisterID rm, XMMRegisterID src0,
   5173                       XMMRegisterID dst) {
   5174    if (useLegacySSEEncoding(src0, dst)) {
   5175      spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm),
   5176           XMMRegName(dst));
   5177      m_formatter.legacySSEPrefix(ty);
   5178      m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst);
   5179      return;
   5180    }
   5181 
   5182    if (src0 == invalid_xmm) {
   5183      spew("%-11s%s, %s", name, XMMRegName(rm), XMMRegName(dst));
   5184    } else {
   5185      spew("%-11s%s, %s, %s", name, XMMRegName(rm), XMMRegName(src0),
   5186           XMMRegName(dst));
   5187    }
   5188    m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
   5189  }
   5190 
   5191  void threeByteOpImmSimd(const char* name, VexOperandType ty,
   5192                          ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5193                          uint32_t imm, XMMRegisterID rm, XMMRegisterID src0,
   5194                          XMMRegisterID dst) {
   5195    if (useLegacySSEEncoding(src0, dst)) {
   5196      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(rm),
   5197           XMMRegName(dst));
   5198      m_formatter.legacySSEPrefix(ty);
   5199      m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst);
   5200      m_formatter.immediate8u(imm);
   5201      return;
   5202    }
   5203 
   5204    if (src0 == invalid_xmm) {
   5205      spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(rm), XMMRegName(dst));
   5206    } else {
   5207      spew("%-11s$0x%x, %s, %s, %s", name, imm, XMMRegName(rm),
   5208           XMMRegName(src0), XMMRegName(dst));
   5209    }
   5210    m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
   5211    m_formatter.immediate8u(imm);
   5212  }
   5213 
   5214  void threeByteOpSimd(const char* name, VexOperandType ty,
   5215                       ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5216                       int32_t offset, RegisterID base, XMMRegisterID src0,
   5217                       XMMRegisterID dst) {
   5218    if (useLegacySSEEncoding(src0, dst)) {
   5219      spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base),
   5220           XMMRegName(dst));
   5221      m_formatter.legacySSEPrefix(ty);
   5222      m_formatter.threeByteOp(opcode, escape, offset, base, dst);
   5223      return;
   5224    }
   5225 
   5226    if (src0 == invalid_xmm) {
   5227      spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), XMMRegName(dst));
   5228    } else {
   5229      spew("%-11s" MEM_ob ", %s, %s", name, ADDR_ob(offset, base),
   5230           XMMRegName(src0), XMMRegName(dst));
   5231    }
   5232    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
   5233  }
   5234 
   5235  void threeByteOpSimd(const char* name, VexOperandType ty,
   5236                       ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5237                       int32_t offset, RegisterID base, RegisterID index,
   5238                       int32_t scale, XMMRegisterID src0, XMMRegisterID dst) {
   5239    if (useLegacySSEEncoding(src0, dst)) {
   5240      spew("%-11s" MEM_obs ", %s", legacySSEOpName(name),
   5241           ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   5242      m_formatter.legacySSEPrefix(ty);
   5243      m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst);
   5244      return;
   5245    }
   5246 
   5247    if (src0 == invalid_xmm) {
   5248      spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale),
   5249           XMMRegName(dst));
   5250    } else {
   5251      spew("%-11s" MEM_obs ", %s, %s", name,
   5252           ADDR_obs(offset, base, index, scale), XMMRegName(src0),
   5253           XMMRegName(dst));
   5254    }
   5255    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale,
   5256                               src0, dst);
   5257  }
   5258 
   5259  void threeByteOpImmSimd(const char* name, VexOperandType ty,
   5260                          ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5261                          uint32_t imm, int32_t offset, RegisterID base,
   5262                          XMMRegisterID src0, XMMRegisterID dst) {
   5263    if (useLegacySSEEncoding(src0, dst)) {
   5264      spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
   5265           ADDR_ob(offset, base), XMMRegName(dst));
   5266      m_formatter.legacySSEPrefix(ty);
   5267      m_formatter.threeByteOp(opcode, escape, offset, base, dst);
   5268      m_formatter.immediate8u(imm);
   5269      return;
   5270    }
   5271 
   5272    if (src0 == invalid_xmm) {
   5273      spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base),
   5274           XMMRegName(dst));
   5275    } else {
   5276      spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
   5277           XMMRegName(src0), XMMRegName(dst));
   5278    }
   5279    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
   5280    m_formatter.immediate8u(imm);
   5281  }
   5282 
   5283  void threeByteOpImmSimd(const char* name, VexOperandType ty,
   5284                          ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5285                          uint32_t imm, int32_t offset, RegisterID base,
   5286                          RegisterID index, int scale, XMMRegisterID src0,
   5287                          XMMRegisterID dst) {
   5288    if (useLegacySSEEncoding(src0, dst)) {
   5289      spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm,
   5290           ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   5291      m_formatter.legacySSEPrefix(ty);
   5292      m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst);
   5293      m_formatter.immediate8u(imm);
   5294      return;
   5295    }
   5296 
   5297    if (src0 == invalid_xmm) {
   5298      spew("%-11s$0x%x, " MEM_obs ", %s", name, imm,
   5299           ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   5300    } else {
   5301      spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm,
   5302           ADDR_obs(offset, base, index, scale), XMMRegName(src0),
   5303           XMMRegName(dst));
   5304    }
   5305    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale,
   5306                               src0, dst);
   5307    m_formatter.immediate8u(imm);
   5308  }
   5309 
   5310  void threeByteOpSimd(const char* name, VexOperandType ty,
   5311                       ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5312                       const void* address, XMMRegisterID src0,
   5313                       XMMRegisterID dst) {
   5314    if (useLegacySSEEncoding(src0, dst)) {
   5315      spew("%-11s%p, %s", legacySSEOpName(name), address, XMMRegName(dst));
   5316      m_formatter.legacySSEPrefix(ty);
   5317      m_formatter.threeByteOp(opcode, escape, address, dst);
   5318      return;
   5319    }
   5320 
   5321    if (src0 == invalid_xmm) {
   5322      spew("%-11s%p, %s", name, address, XMMRegName(dst));
   5323    } else {
   5324      spew("%-11s%p, %s, %s", name, address, XMMRegName(src0), XMMRegName(dst));
   5325    }
   5326    m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst);
   5327  }
   5328 
   5329  void threeByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5330                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5331                               uint32_t imm, RegisterID src1,
   5332                               XMMRegisterID src0, XMMRegisterID dst) {
   5333    if (useLegacySSEEncoding(src0, dst)) {
   5334      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, GPReg32Name(src1),
   5335           XMMRegName(dst));
   5336      m_formatter.legacySSEPrefix(ty);
   5337      m_formatter.threeByteOp(opcode, escape, src1, dst);
   5338      m_formatter.immediate8u(imm);
   5339      return;
   5340    }
   5341 
   5342    spew("%-11s$0x%x, %s, %s, %s", name, imm, GPReg32Name(src1),
   5343         XMMRegName(src0), XMMRegName(dst));
   5344    m_formatter.threeByteOpVex(ty, opcode, escape, src1, src0, dst);
   5345    m_formatter.immediate8u(imm);
   5346  }
   5347 
   5348  void threeByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5349                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5350                               uint32_t imm, int32_t offset, RegisterID base,
   5351                               XMMRegisterID src0, XMMRegisterID dst) {
   5352    if (useLegacySSEEncoding(src0, dst)) {
   5353      spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
   5354           ADDR_ob(offset, base), XMMRegName(dst));
   5355      m_formatter.legacySSEPrefix(ty);
   5356      m_formatter.threeByteOp(opcode, escape, offset, base, dst);
   5357      m_formatter.immediate8u(imm);
   5358      return;
   5359    }
   5360 
   5361    spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
   5362         XMMRegName(src0), XMMRegName(dst));
   5363    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
   5364    m_formatter.immediate8u(imm);
   5365  }
   5366 
   5367  void threeByteOpImmInt32Simd(const char* name, VexOperandType ty,
   5368                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5369                               uint32_t imm, int32_t offset, RegisterID base,
   5370                               RegisterID index, int scale, XMMRegisterID src0,
   5371                               XMMRegisterID dst) {
   5372    if (useLegacySSEEncoding(src0, dst)) {
   5373      spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm,
   5374           ADDR_obs(offset, base, index, scale), XMMRegName(dst));
   5375      m_formatter.legacySSEPrefix(ty);
   5376      m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst);
   5377      m_formatter.immediate8u(imm);
   5378      return;
   5379    }
   5380 
   5381    spew("%-11s$0x%x, " MEM_obs ", %s, %s", name, imm,
   5382         ADDR_obs(offset, base, index, scale), XMMRegName(src0),
   5383         XMMRegName(dst));
   5384    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale,
   5385                               src0, dst);
   5386    m_formatter.immediate8u(imm);
   5387  }
   5388 
   5389  void threeByteOpImmSimdInt32(const char* name, VexOperandType ty,
   5390                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5391                               uint32_t imm, XMMRegisterID src,
   5392                               RegisterID dst) {
   5393    if (useLegacySSEEncodingAlways()) {
   5394      spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, XMMRegName(src),
   5395           GPReg32Name(dst));
   5396      m_formatter.legacySSEPrefix(ty);
   5397      m_formatter.threeByteOp(opcode, escape, (RegisterID)src, dst);
   5398      m_formatter.immediate8u(imm);
   5399      return;
   5400    }
   5401 
   5402    if (opcode == OP3_PEXTRD_EvVdqIb) {
   5403      spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName((XMMRegisterID)dst),
   5404           GPReg32Name((RegisterID)src));
   5405    } else {
   5406      spew("%-11s$0x%x, %s, %s", name, imm, XMMRegName(src), GPReg32Name(dst));
   5407    }
   5408    m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)src, invalid_xmm,
   5409                               dst);
   5410    m_formatter.immediate8u(imm);
   5411  }
   5412 
   5413  void threeByteOpImmSimdInt32(const char* name, VexOperandType ty,
   5414                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5415                               uint32_t imm, int32_t offset, RegisterID base,
   5416                               RegisterID dst) {
   5417    if (useLegacySSEEncodingAlways()) {
   5418      spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
   5419           ADDR_ob(offset, base), GPReg32Name(dst));
   5420      m_formatter.legacySSEPrefix(ty);
   5421      m_formatter.threeByteOp(opcode, escape, offset, base, dst);
   5422      m_formatter.immediate8u(imm);
   5423      return;
   5424    }
   5425 
   5426    spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base),
   5427         GPReg32Name(dst));
   5428    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, invalid_xmm,
   5429                               dst);
   5430    m_formatter.immediate8u(imm);
   5431  }
   5432 
   5433  void threeByteOpImmSimdInt32(const char* name, VexOperandType ty,
   5434                               ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5435                               uint32_t imm, int32_t offset, RegisterID base,
   5436                               RegisterID index, int scale, RegisterID dst) {
   5437    if (useLegacySSEEncodingAlways()) {
   5438      spew("%-11s$0x%x, " MEM_obs ", %s", legacySSEOpName(name), imm,
   5439           ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
   5440      m_formatter.legacySSEPrefix(ty);
   5441      m_formatter.threeByteOp(opcode, escape, offset, base, index, scale, dst);
   5442      m_formatter.immediate8u(imm);
   5443      return;
   5444    }
   5445 
   5446    spew("%-11s$0x%x, " MEM_obs ", %s", name, imm,
   5447         ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
   5448    m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, index, scale,
   5449                               invalid_xmm, dst);
   5450    m_formatter.immediate8u(imm);
   5451  }
   5452 
   5453  // Blendv is a three-byte op, but the VEX encoding has a different opcode
   5454  // than the SSE encoding, so we handle it specially.
   5455  void vblendvOpSimd(const char* name, ThreeByteOpcodeID opcode,
   5456                     ThreeByteOpcodeID vexOpcode, XMMRegisterID mask,
   5457                     XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) {
   5458    if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
   5459      spew("%-11s%s, %s", legacySSEOpName(name), XMMRegName(rm),
   5460           XMMRegName(dst));
   5461      // Even though a "ps" instruction, vblendv is encoded with the "pd"
   5462      // prefix.
   5463      m_formatter.legacySSEPrefix(VEX_PD);
   5464      m_formatter.threeByteOp(opcode, ESCAPE_38, (RegisterID)rm, dst);
   5465      return;
   5466    }
   5467 
   5468    spew("%-11s%s, %s, %s, %s", name, XMMRegName(mask), XMMRegName(rm),
   5469         XMMRegName(src0), XMMRegName(dst));
   5470    // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
   5471    m_formatter.vblendvOpVex(VEX_PD, vexOpcode, ESCAPE_3A, mask, (RegisterID)rm,
   5472                             src0, dst);
   5473  }
   5474 
   5475  void vblendvOpSimd(const char* name, ThreeByteOpcodeID opcode,
   5476                     ThreeByteOpcodeID vexOpcode, XMMRegisterID mask,
   5477                     int32_t offset, RegisterID base, XMMRegisterID src0,
   5478                     XMMRegisterID dst) {
   5479    if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
   5480      spew("%-11s" MEM_ob ", %s", legacySSEOpName(name), ADDR_ob(offset, base),
   5481           XMMRegName(dst));
   5482      // Even though a "ps" instruction, vblendv is encoded with the "pd"
   5483      // prefix.
   5484      m_formatter.legacySSEPrefix(VEX_PD);
   5485      m_formatter.threeByteOp(opcode, ESCAPE_38, offset, base, dst);
   5486      return;
   5487    }
   5488 
   5489    spew("%-11s%s, " MEM_ob ", %s, %s", name, XMMRegName(mask),
   5490         ADDR_ob(offset, base), XMMRegName(src0), XMMRegName(dst));
   5491    // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
   5492    m_formatter.vblendvOpVex(VEX_PD, vexOpcode, ESCAPE_3A, mask, offset, base,
   5493                             src0, dst);
   5494  }
   5495 
   5496  void shiftOpImmSimd(const char* name, TwoByteOpcodeID opcode,
   5497                      ShiftID shiftKind, uint32_t imm, XMMRegisterID src,
   5498                      XMMRegisterID dst) {
   5499    if (useLegacySSEEncoding(src, dst)) {
   5500      spew("%-11s$%d, %s", legacySSEOpName(name), int32_t(imm),
   5501           XMMRegName(dst));
   5502      m_formatter.legacySSEPrefix(VEX_PD);
   5503      m_formatter.twoByteOp(opcode, (RegisterID)dst, (int)shiftKind);
   5504      m_formatter.immediate8u(imm);
   5505      return;
   5506    }
   5507 
   5508    spew("%-11s$%d, %s, %s", name, int32_t(imm), XMMRegName(src),
   5509         XMMRegName(dst));
   5510    // For shift instructions, destination is stored in vvvv field.
   5511    m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)src, dst,
   5512                             (int)shiftKind);
   5513    m_formatter.immediate8u(imm);
   5514  }
   5515 
   5516  class X86InstructionFormatter {
   5517   public:
   5518    // Legacy prefix bytes:
   5519    //
   5520    // These are emmitted prior to the instruction.
   5521 
   5522    void prefix(OneByteOpcodeID pre) { m_buffer.putByte(pre); }
   5523 
   5524    void legacySSEPrefix(VexOperandType ty) {
   5525      switch (ty) {
   5526        case VEX_PS:
   5527          break;
   5528        case VEX_PD:
   5529          prefix(PRE_SSE_66);
   5530          break;
   5531        case VEX_SS:
   5532          prefix(PRE_SSE_F3);
   5533          break;
   5534        case VEX_SD:
   5535          prefix(PRE_SSE_F2);
   5536          break;
   5537      }
   5538    }
   5539 
   5540    /* clang-format off */
   5541        //
   5542        // Word-sized operands / no operand instruction formatters.
   5543        //
   5544        // In addition to the opcode, the following operand permutations are supported:
   5545        //   * None - instruction takes no operands.
   5546        //   * One register - the low three bits of the RegisterID are added into the opcode.
   5547        //   * Two registers - encode a register form ModRm (for all ModRm formats, the reg field is passed first, and a GroupOpcodeID may be passed in its place).
   5548        //   * Three argument ModRM - a register, and a register and an offset describing a memory operand.
   5549        //   * Five argument ModRM - a register, and a base register, an index, scale, and offset describing a memory operand.
   5550        //
   5551        // For 32-bit x86 targets, the address operand may also be provided as a
   5552        // void*.  On 64-bit targets REX prefixes will be planted as necessary,
   5553        // where high numbered registers are used.
   5554        //
   5555        // The twoByteOp methods plant two-byte Intel instructions sequences
   5556        // (first opcode byte 0x0F).
   5557        //
   5558    /* clang-format on */
   5559 
   5560    void oneByteOp(OneByteOpcodeID opcode) {
   5561      m_buffer.ensureSpace(MaxInstructionSize);
   5562      m_buffer.putByteUnchecked(opcode);
   5563    }
   5564 
   5565    void oneByteOp(OneByteOpcodeID opcode, RegisterID reg) {
   5566      m_buffer.ensureSpace(MaxInstructionSize);
   5567      emitRexIfNeeded(0, 0, reg);
   5568      m_buffer.putByteUnchecked(opcode + (reg & 7));
   5569    }
   5570 
   5571    void oneByteOp(OneByteOpcodeID opcode, RegisterID rm, int reg) {
   5572      m_buffer.ensureSpace(MaxInstructionSize);
   5573      emitRexIfNeeded(reg, 0, rm);
   5574      m_buffer.putByteUnchecked(opcode);
   5575      registerModRM(rm, reg);
   5576    }
   5577 
   5578    void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   5579                   int reg) {
   5580      m_buffer.ensureSpace(MaxInstructionSize);
   5581      emitRexIfNeeded(reg, 0, base);
   5582      m_buffer.putByteUnchecked(opcode);
   5583      memoryModRM(offset, base, reg);
   5584    }
   5585 
   5586    void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset,
   5587                          RegisterID base, int reg) {
   5588      m_buffer.ensureSpace(MaxInstructionSize);
   5589      emitRexIfNeeded(reg, 0, base);
   5590      m_buffer.putByteUnchecked(opcode);
   5591      memoryModRM_disp32(offset, base, reg);
   5592    }
   5593 
   5594    void oneByteOp(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   5595                   RegisterID index, int scale, int reg) {
   5596      m_buffer.ensureSpace(MaxInstructionSize);
   5597      emitRexIfNeeded(reg, index, base);
   5598      m_buffer.putByteUnchecked(opcode);
   5599      memoryModRM(offset, base, index, scale, reg);
   5600    }
   5601 
   5602    void oneByteOp_disp32(OneByteOpcodeID opcode, int32_t offset,
   5603                          RegisterID index, int scale, int reg) {
   5604      m_buffer.ensureSpace(MaxInstructionSize);
   5605      emitRexIfNeeded(reg, index, 0);
   5606      m_buffer.putByteUnchecked(opcode);
   5607      memoryModRM_disp32(offset, index, scale, reg);
   5608    }
   5609 
   5610    void oneByteOp(OneByteOpcodeID opcode, const void* address, int reg) {
   5611      m_buffer.ensureSpace(MaxInstructionSize);
   5612      emitRexIfNeeded(reg, 0, 0);
   5613      m_buffer.putByteUnchecked(opcode);
   5614      memoryModRM_disp32(address, reg);
   5615    }
   5616 
   5617    void oneByteOp_disp32(OneByteOpcodeID opcode, const void* address,
   5618                          int reg) {
   5619      m_buffer.ensureSpace(MaxInstructionSize);
   5620      emitRexIfNeeded(reg, 0, 0);
   5621      m_buffer.putByteUnchecked(opcode);
   5622      memoryModRM_disp32(address, reg);
   5623    }
   5624 #ifdef JS_CODEGEN_X64
   5625    void oneByteRipOp(OneByteOpcodeID opcode, int ripOffset, int reg) {
   5626      m_buffer.ensureSpace(MaxInstructionSize);
   5627      emitRexIfNeeded(reg, 0, 0);
   5628      m_buffer.putByteUnchecked(opcode);
   5629      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5630      m_buffer.putIntUnchecked(ripOffset);
   5631    }
   5632 
   5633    void oneByteRipOp64(OneByteOpcodeID opcode, int ripOffset, int reg) {
   5634      m_buffer.ensureSpace(MaxInstructionSize);
   5635      emitRexW(reg, 0, 0);
   5636      m_buffer.putByteUnchecked(opcode);
   5637      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5638      m_buffer.putIntUnchecked(ripOffset);
   5639    }
   5640 
   5641    void twoByteRipOp(TwoByteOpcodeID opcode, int ripOffset, int reg) {
   5642      m_buffer.ensureSpace(MaxInstructionSize);
   5643      emitRexIfNeeded(reg, 0, 0);
   5644      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5645      m_buffer.putByteUnchecked(opcode);
   5646      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5647      m_buffer.putIntUnchecked(ripOffset);
   5648    }
   5649 
   5650    void twoByteRipOpVex(VexOperandType ty, TwoByteOpcodeID opcode,
   5651                         int ripOffset, XMMRegisterID src0, XMMRegisterID reg) {
   5652      int r = (reg >> 3), x = 0, b = 0;
   5653      int m = 1;  // 0x0F
   5654      int w = 0, v = src0, l = 0;
   5655      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5656      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5657      m_buffer.putIntUnchecked(ripOffset);
   5658    }
   5659 #endif
   5660 
   5661    void twoByteOp(TwoByteOpcodeID opcode) {
   5662      m_buffer.ensureSpace(MaxInstructionSize);
   5663      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5664      m_buffer.putByteUnchecked(opcode);
   5665    }
   5666 
   5667    void twoByteOp(TwoByteOpcodeID opcode, int reg) {
   5668      m_buffer.ensureSpace(MaxInstructionSize);
   5669      emitRexIfNeeded(0, 0, reg);
   5670      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5671      m_buffer.putByteUnchecked(opcode + (reg & 7));
   5672    }
   5673 
   5674    void twoByteOp(TwoByteOpcodeID opcode, RegisterID rm, int reg) {
   5675      m_buffer.ensureSpace(MaxInstructionSize);
   5676      emitRexIfNeeded(reg, 0, rm);
   5677      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5678      m_buffer.putByteUnchecked(opcode);
   5679      registerModRM(rm, reg);
   5680    }
   5681 
   5682    void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, RegisterID rm,
   5683                      XMMRegisterID src0, int reg) {
   5684      int r = (reg >> 3), x = 0, b = (rm >> 3);
   5685      int m = 1;  // 0x0F
   5686      int w = 0, v = src0, l = 0;
   5687      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5688      registerModRM(rm, reg);
   5689    }
   5690 
   5691    void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   5692                   int reg) {
   5693      m_buffer.ensureSpace(MaxInstructionSize);
   5694      emitRexIfNeeded(reg, 0, base);
   5695      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5696      m_buffer.putByteUnchecked(opcode);
   5697      memoryModRM(offset, base, reg);
   5698    }
   5699 
   5700    void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, int32_t offset,
   5701                      RegisterID base, XMMRegisterID src0, int reg) {
   5702      int r = (reg >> 3), x = 0, b = (base >> 3);
   5703      int m = 1;  // 0x0F
   5704      int w = 0, v = src0, l = 0;
   5705      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5706      memoryModRM(offset, base, reg);
   5707    }
   5708 
   5709    void twoByteOp_disp32(TwoByteOpcodeID opcode, int32_t offset,
   5710                          RegisterID base, int reg) {
   5711      m_buffer.ensureSpace(MaxInstructionSize);
   5712      emitRexIfNeeded(reg, 0, base);
   5713      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5714      m_buffer.putByteUnchecked(opcode);
   5715      memoryModRM_disp32(offset, base, reg);
   5716    }
   5717 
   5718    void twoByteOpVex_disp32(VexOperandType ty, TwoByteOpcodeID opcode,
   5719                             int32_t offset, RegisterID base,
   5720                             XMMRegisterID src0, int reg) {
   5721      int r = (reg >> 3), x = 0, b = (base >> 3);
   5722      int m = 1;  // 0x0F
   5723      int w = 0, v = src0, l = 0;
   5724      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5725      memoryModRM_disp32(offset, base, reg);
   5726    }
   5727 
   5728    void twoByteOp(TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   5729                   RegisterID index, int scale, int reg) {
   5730      m_buffer.ensureSpace(MaxInstructionSize);
   5731      emitRexIfNeeded(reg, index, base);
   5732      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5733      m_buffer.putByteUnchecked(opcode);
   5734      memoryModRM(offset, base, index, scale, reg);
   5735    }
   5736 
   5737    void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode, int32_t offset,
   5738                      RegisterID base, RegisterID index, int scale,
   5739                      XMMRegisterID src0, int reg) {
   5740      int r = (reg >> 3), x = (index >> 3), b = (base >> 3);
   5741      int m = 1;  // 0x0F
   5742      int w = 0, v = src0, l = 0;
   5743      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5744      memoryModRM(offset, base, index, scale, reg);
   5745    }
   5746 
   5747    void twoByteOp(TwoByteOpcodeID opcode, const void* address, int reg) {
   5748      m_buffer.ensureSpace(MaxInstructionSize);
   5749      emitRexIfNeeded(reg, 0, 0);
   5750      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5751      m_buffer.putByteUnchecked(opcode);
   5752      memoryModRM(address, reg);
   5753    }
   5754 
   5755    void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode,
   5756                      const void* address, XMMRegisterID src0, int reg) {
   5757      int r = (reg >> 3), x = 0, b = 0;
   5758      int m = 1;  // 0x0F
   5759      int w = 0, v = src0, l = 0;
   5760      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5761      memoryModRM(address, reg);
   5762    }
   5763 
   5764    void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5765                     RegisterID rm, int reg) {
   5766      m_buffer.ensureSpace(MaxInstructionSize);
   5767      emitRexIfNeeded(reg, 0, rm);
   5768      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5769      m_buffer.putByteUnchecked(escape);
   5770      m_buffer.putByteUnchecked(opcode);
   5771      registerModRM(rm, reg);
   5772    }
   5773 
   5774    void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5775                        ThreeByteEscape escape, RegisterID rm,
   5776                        XMMRegisterID src0, int reg) {
   5777      int r = (reg >> 3), x = 0, b = (rm >> 3);
   5778      int m = 0, w = 0, v = src0, l = 0;
   5779      switch (escape) {
   5780        case ESCAPE_38:
   5781          m = 2;
   5782          break;
   5783        case ESCAPE_3A:
   5784          m = 3;
   5785          break;
   5786        default:
   5787          MOZ_CRASH("unexpected escape");
   5788      }
   5789      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5790      registerModRM(rm, reg);
   5791    }
   5792 
   5793    void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5794                     int32_t offset, RegisterID base, int reg) {
   5795      m_buffer.ensureSpace(MaxInstructionSize);
   5796      emitRexIfNeeded(reg, 0, base);
   5797      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5798      m_buffer.putByteUnchecked(escape);
   5799      m_buffer.putByteUnchecked(opcode);
   5800      memoryModRM(offset, base, reg);
   5801    }
   5802 
   5803    void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5804                     int32_t offset, RegisterID base, RegisterID index,
   5805                     int32_t scale, int reg) {
   5806      m_buffer.ensureSpace(MaxInstructionSize);
   5807      emitRexIfNeeded(reg, index, base);
   5808      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5809      m_buffer.putByteUnchecked(escape);
   5810      m_buffer.putByteUnchecked(opcode);
   5811      memoryModRM(offset, base, index, scale, reg);
   5812    }
   5813 
   5814    void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5815                        ThreeByteEscape escape, int32_t offset, RegisterID base,
   5816                        XMMRegisterID src0, int reg) {
   5817      int r = (reg >> 3), x = 0, b = (base >> 3);
   5818      int m = 0, w = 0, v = src0, l = 0;
   5819      switch (escape) {
   5820        case ESCAPE_38:
   5821          m = 2;
   5822          break;
   5823        case ESCAPE_3A:
   5824          m = 3;
   5825          break;
   5826        default:
   5827          MOZ_CRASH("unexpected escape");
   5828      }
   5829      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5830      memoryModRM(offset, base, reg);
   5831    }
   5832 
   5833    void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5834                        ThreeByteEscape escape, int32_t offset, RegisterID base,
   5835                        RegisterID index, int scale, XMMRegisterID src0,
   5836                        int reg) {
   5837      int r = (reg >> 3), x = (index >> 3), b = (base >> 3);
   5838      int m = 0, w = 0, v = src0, l = 0;
   5839      switch (escape) {
   5840        case ESCAPE_38:
   5841          m = 2;
   5842          break;
   5843        case ESCAPE_3A:
   5844          m = 3;
   5845          break;
   5846        default:
   5847          MOZ_CRASH("unexpected escape");
   5848      }
   5849      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5850      memoryModRM(offset, base, index, scale, reg);
   5851    }
   5852 
   5853    void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5854                     const void* address, int reg) {
   5855      m_buffer.ensureSpace(MaxInstructionSize);
   5856      emitRexIfNeeded(reg, 0, 0);
   5857      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5858      m_buffer.putByteUnchecked(escape);
   5859      m_buffer.putByteUnchecked(opcode);
   5860      memoryModRM(address, reg);
   5861    }
   5862 
   5863    void threeByteRipOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   5864                        int ripOffset, int reg) {
   5865      m_buffer.ensureSpace(MaxInstructionSize);
   5866      emitRexIfNeeded(reg, 0, 0);
   5867      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   5868      m_buffer.putByteUnchecked(escape);
   5869      m_buffer.putByteUnchecked(opcode);
   5870      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5871      m_buffer.putIntUnchecked(ripOffset);
   5872    }
   5873 
   5874    void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5875                        ThreeByteEscape escape, const void* address,
   5876                        XMMRegisterID src0, int reg) {
   5877      int r = (reg >> 3), x = 0, b = 0;
   5878      int m = 0, w = 0, v = src0, l = 0;
   5879      switch (escape) {
   5880        case ESCAPE_38:
   5881          m = 2;
   5882          break;
   5883        case ESCAPE_3A:
   5884          m = 3;
   5885          break;
   5886        default:
   5887          MOZ_CRASH("unexpected escape");
   5888      }
   5889      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5890      memoryModRM(address, reg);
   5891    }
   5892 
   5893    void threeByteRipOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5894                           ThreeByteEscape escape, int ripOffset,
   5895                           XMMRegisterID src0, int reg) {
   5896      int r = (reg >> 3), x = 0, b = 0;
   5897      int m = 0;
   5898      switch (escape) {
   5899        case ESCAPE_38:
   5900          m = 2;
   5901          break;
   5902        case ESCAPE_3A:
   5903          m = 3;
   5904          break;
   5905        default:
   5906          MOZ_CRASH("unexpected escape");
   5907      }
   5908      int w = 0, v = src0, l = 0;
   5909      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5910      putModRm(ModRmMemoryNoDisp, noBase, reg);
   5911      m_buffer.putIntUnchecked(ripOffset);
   5912    }
   5913 
   5914    void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5915                      ThreeByteEscape escape, XMMRegisterID mask, RegisterID rm,
   5916                      XMMRegisterID src0, int reg) {
   5917      int r = (reg >> 3), x = 0, b = (rm >> 3);
   5918      int m = 0, w = 0, v = src0, l = 0;
   5919      switch (escape) {
   5920        case ESCAPE_38:
   5921          m = 2;
   5922          break;
   5923        case ESCAPE_3A:
   5924          m = 3;
   5925          break;
   5926        default:
   5927          MOZ_CRASH("unexpected escape");
   5928      }
   5929      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5930      registerModRM(rm, reg);
   5931      immediate8u(mask << 4);
   5932    }
   5933 
   5934    void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode,
   5935                      ThreeByteEscape escape, XMMRegisterID mask,
   5936                      int32_t offset, RegisterID base, XMMRegisterID src0,
   5937                      int reg) {
   5938      int r = (reg >> 3), x = 0, b = (base >> 3);
   5939      int m = 0, w = 0, v = src0, l = 0;
   5940      switch (escape) {
   5941        case ESCAPE_38:
   5942          m = 2;
   5943          break;
   5944        case ESCAPE_3A:
   5945          m = 3;
   5946          break;
   5947        default:
   5948          MOZ_CRASH("unexpected escape");
   5949      }
   5950      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   5951      memoryModRM(offset, base, reg);
   5952      immediate8u(mask << 4);
   5953    }
   5954 
   5955 #ifdef JS_CODEGEN_X64
   5956    // Quad-word-sized operands:
   5957    //
   5958    // Used to format 64-bit operantions, planting a REX.w prefix.  When
   5959    // planting d64 or f64 instructions, not requiring a REX.w prefix, the
   5960    // normal (non-'64'-postfixed) formatters should be used.
   5961 
   5962    void oneByteOp64(OneByteOpcodeID opcode) {
   5963      m_buffer.ensureSpace(MaxInstructionSize);
   5964      emitRexW(0, 0, 0);
   5965      m_buffer.putByteUnchecked(opcode);
   5966    }
   5967 
   5968    void oneByteOp64(OneByteOpcodeID opcode, RegisterID reg) {
   5969      m_buffer.ensureSpace(MaxInstructionSize);
   5970      emitRexW(0, 0, reg);
   5971      m_buffer.putByteUnchecked(opcode + (reg & 7));
   5972    }
   5973 
   5974    void oneByteOp64(OneByteOpcodeID opcode, RegisterID rm, int reg) {
   5975      m_buffer.ensureSpace(MaxInstructionSize);
   5976      emitRexW(reg, 0, rm);
   5977      m_buffer.putByteUnchecked(opcode);
   5978      registerModRM(rm, reg);
   5979    }
   5980 
   5981    void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   5982                     int reg) {
   5983      m_buffer.ensureSpace(MaxInstructionSize);
   5984      emitRexW(reg, 0, base);
   5985      m_buffer.putByteUnchecked(opcode);
   5986      memoryModRM(offset, base, reg);
   5987    }
   5988 
   5989    void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset,
   5990                            RegisterID base, int reg) {
   5991      m_buffer.ensureSpace(MaxInstructionSize);
   5992      emitRexW(reg, 0, base);
   5993      m_buffer.putByteUnchecked(opcode);
   5994      memoryModRM_disp32(offset, base, reg);
   5995    }
   5996 
   5997    void oneByteOp64(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   5998                     RegisterID index, int scale, int reg) {
   5999      m_buffer.ensureSpace(MaxInstructionSize);
   6000      emitRexW(reg, index, base);
   6001      m_buffer.putByteUnchecked(opcode);
   6002      memoryModRM(offset, base, index, scale, reg);
   6003    }
   6004 
   6005    void oneByteOp64_disp32(OneByteOpcodeID opcode, int32_t offset,
   6006                            RegisterID index, int scale, int reg) {
   6007      m_buffer.ensureSpace(MaxInstructionSize);
   6008      emitRexW(reg, index, 0);
   6009      m_buffer.putByteUnchecked(opcode);
   6010      memoryModRM_disp32(offset, index, scale, reg);
   6011    }
   6012 
   6013    void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg) {
   6014      m_buffer.ensureSpace(MaxInstructionSize);
   6015      emitRexW(reg, 0, 0);
   6016      m_buffer.putByteUnchecked(opcode);
   6017      memoryModRM(address, reg);
   6018    }
   6019 
   6020    void twoByteOp64(TwoByteOpcodeID opcode, int reg) {
   6021      m_buffer.ensureSpace(MaxInstructionSize);
   6022      emitRexW(0, 0, reg);
   6023      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6024      m_buffer.putByteUnchecked(opcode + (reg & 7));
   6025    }
   6026 
   6027    void twoByteOp64(TwoByteOpcodeID opcode, RegisterID rm, int reg) {
   6028      m_buffer.ensureSpace(MaxInstructionSize);
   6029      emitRexW(reg, 0, rm);
   6030      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6031      m_buffer.putByteUnchecked(opcode);
   6032      registerModRM(rm, reg);
   6033    }
   6034 
   6035    void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base,
   6036                     int reg) {
   6037      m_buffer.ensureSpace(MaxInstructionSize);
   6038      emitRexW(reg, 0, base);
   6039      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6040      m_buffer.putByteUnchecked(opcode);
   6041      memoryModRM(offset, base, reg);
   6042    }
   6043 
   6044    void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base,
   6045                     RegisterID index, int scale, int reg) {
   6046      m_buffer.ensureSpace(MaxInstructionSize);
   6047      emitRexW(reg, index, base);
   6048      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6049      m_buffer.putByteUnchecked(opcode);
   6050      memoryModRM(offset, base, index, scale, reg);
   6051    }
   6052 
   6053    void twoByteOp64(TwoByteOpcodeID opcode, const void* address, int reg) {
   6054      m_buffer.ensureSpace(MaxInstructionSize);
   6055      emitRexW(reg, 0, 0);
   6056      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6057      m_buffer.putByteUnchecked(opcode);
   6058      memoryModRM(address, reg);
   6059    }
   6060 
   6061    void twoByteOpVex64(VexOperandType ty, TwoByteOpcodeID opcode,
   6062                        RegisterID rm, XMMRegisterID src0, XMMRegisterID reg) {
   6063      int r = (reg >> 3), x = 0, b = (rm >> 3);
   6064      int m = 1;  // 0x0F
   6065      int w = 1, v = src0, l = 0;
   6066      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   6067      registerModRM(rm, reg);
   6068    }
   6069 
   6070    void threeByteOp64(ThreeByteOpcodeID opcode, ThreeByteEscape escape,
   6071                       RegisterID rm, int reg) {
   6072      m_buffer.ensureSpace(MaxInstructionSize);
   6073      emitRexW(reg, 0, rm);
   6074      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6075      m_buffer.putByteUnchecked(escape);
   6076      m_buffer.putByteUnchecked(opcode);
   6077      registerModRM(rm, reg);
   6078    }
   6079 #endif  // JS_CODEGEN_X64
   6080 
   6081    void threeByteOpVex64(VexOperandType ty, ThreeByteOpcodeID opcode,
   6082                          ThreeByteEscape escape, RegisterID rm,
   6083                          XMMRegisterID src0, int reg) {
   6084      int r = (reg >> 3), x = 0, b = (rm >> 3);
   6085      int m = 0, w = 1, v = src0, l = 0;
   6086      switch (escape) {
   6087        case ESCAPE_38:
   6088          m = 2;
   6089          break;
   6090        case ESCAPE_3A:
   6091          m = 3;
   6092          break;
   6093        default:
   6094          MOZ_CRASH("unexpected escape");
   6095      }
   6096      threeOpVex(ty, r, x, b, m, w, v, l, opcode);
   6097      registerModRM(rm, reg);
   6098    }
   6099 
   6100    // Byte-operands:
   6101    //
   6102    // These methods format byte operations.  Byte operations differ from
   6103    // the normal formatters in the circumstances under which they will
   6104    // decide to emit REX prefixes.  These should be used where any register
   6105    // operand signifies a byte register.
   6106    //
   6107    // The disctinction is due to the handling of register numbers in the
   6108    // range 4..7 on x86-64.  These register numbers may either represent
   6109    // the second byte of the first four registers (ah..bh) or the first
   6110    // byte of the second four registers (spl..dil).
   6111    //
   6112    // Address operands should still be checked using regRequiresRex(),
   6113    // while byteRegRequiresRex() is provided to check byte register
   6114    // operands.
   6115 
   6116    void oneByteOp8(OneByteOpcodeID opcode) {
   6117      m_buffer.ensureSpace(MaxInstructionSize);
   6118      m_buffer.putByteUnchecked(opcode);
   6119    }
   6120 
   6121    void oneByteOp8(OneByteOpcodeID opcode, RegisterID r) {
   6122      m_buffer.ensureSpace(MaxInstructionSize);
   6123      emitRexIf(byteRegRequiresRex(r), 0, 0, r);
   6124      m_buffer.putByteUnchecked(opcode + (r & 7));
   6125    }
   6126 
   6127    void oneByteOp8(OneByteOpcodeID opcode, RegisterID rm, RegisterID reg) {
   6128      m_buffer.ensureSpace(MaxInstructionSize);
   6129      emitRexIf(byteRegRequiresRex(reg) || byteRegRequiresRex(rm), reg, 0, rm);
   6130      m_buffer.putByteUnchecked(opcode);
   6131      registerModRM(rm, reg);
   6132    }
   6133 
   6134    void oneByteOp8(OneByteOpcodeID opcode, RegisterID rm,
   6135                    GroupOpcodeID groupOp) {
   6136      m_buffer.ensureSpace(MaxInstructionSize);
   6137      emitRexIf(byteRegRequiresRex(rm), 0, 0, rm);
   6138      m_buffer.putByteUnchecked(opcode);
   6139      registerModRM(rm, groupOp);
   6140    }
   6141 
   6142    // Like oneByteOp8, but never emits a REX prefix.
   6143    void oneByteOp8_norex(OneByteOpcodeID opcode, HRegisterID rm,
   6144                          GroupOpcodeID groupOp) {
   6145      MOZ_ASSERT(!regRequiresRex(RegisterID(rm)));
   6146      m_buffer.ensureSpace(MaxInstructionSize);
   6147      m_buffer.putByteUnchecked(opcode);
   6148      registerModRM(RegisterID(rm), groupOp);
   6149    }
   6150 
   6151    void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   6152                    RegisterID reg) {
   6153      m_buffer.ensureSpace(MaxInstructionSize);
   6154      emitRexIf(byteRegRequiresRex(reg), reg, 0, base);
   6155      m_buffer.putByteUnchecked(opcode);
   6156      memoryModRM(offset, base, reg);
   6157    }
   6158 
   6159    void oneByteOp8_disp32(OneByteOpcodeID opcode, int32_t offset,
   6160                           RegisterID base, RegisterID reg) {
   6161      m_buffer.ensureSpace(MaxInstructionSize);
   6162      emitRexIf(byteRegRequiresRex(reg), reg, 0, base);
   6163      m_buffer.putByteUnchecked(opcode);
   6164      memoryModRM_disp32(offset, base, reg);
   6165    }
   6166 
   6167    void oneByteOp8(OneByteOpcodeID opcode, int32_t offset, RegisterID base,
   6168                    RegisterID index, int scale, RegisterID reg) {
   6169      m_buffer.ensureSpace(MaxInstructionSize);
   6170      emitRexIf(byteRegRequiresRex(reg), reg, index, base);
   6171      m_buffer.putByteUnchecked(opcode);
   6172      memoryModRM(offset, base, index, scale, reg);
   6173    }
   6174 
   6175    void oneByteOp8(OneByteOpcodeID opcode, const void* address,
   6176                    RegisterID reg) {
   6177      m_buffer.ensureSpace(MaxInstructionSize);
   6178      emitRexIf(byteRegRequiresRex(reg), reg, 0, 0);
   6179      m_buffer.putByteUnchecked(opcode);
   6180      memoryModRM_disp32(address, reg);
   6181    }
   6182 
   6183    void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm, RegisterID reg) {
   6184      m_buffer.ensureSpace(MaxInstructionSize);
   6185      emitRexIf(byteRegRequiresRex(reg) || byteRegRequiresRex(rm), reg, 0, rm);
   6186      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6187      m_buffer.putByteUnchecked(opcode);
   6188      registerModRM(rm, reg);
   6189    }
   6190 
   6191    void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   6192                    RegisterID reg) {
   6193      m_buffer.ensureSpace(MaxInstructionSize);
   6194      emitRexIf(byteRegRequiresRex(reg) || regRequiresRex(base), reg, 0, base);
   6195      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6196      m_buffer.putByteUnchecked(opcode);
   6197      memoryModRM(offset, base, reg);
   6198    }
   6199 
   6200    void twoByteOp8(TwoByteOpcodeID opcode, int32_t offset, RegisterID base,
   6201                    RegisterID index, int scale, RegisterID reg) {
   6202      m_buffer.ensureSpace(MaxInstructionSize);
   6203      emitRexIf(byteRegRequiresRex(reg) || regRequiresRex(base) ||
   6204                    regRequiresRex(index),
   6205                reg, index, base);
   6206      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6207      m_buffer.putByteUnchecked(opcode);
   6208      memoryModRM(offset, base, index, scale, reg);
   6209    }
   6210 
   6211    // Like twoByteOp8 but doesn't add a REX prefix if the destination reg
   6212    // is in esp..edi. This may be used when the destination is not an 8-bit
   6213    // register (as in a movzbl instruction), so it doesn't need a REX
   6214    // prefix to disambiguate it from ah..bh.
   6215    void twoByteOp8_movx(TwoByteOpcodeID opcode, RegisterID rm,
   6216                         RegisterID reg) {
   6217      m_buffer.ensureSpace(MaxInstructionSize);
   6218      emitRexIf(regRequiresRex(reg) || byteRegRequiresRex(rm), reg, 0, rm);
   6219      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6220      m_buffer.putByteUnchecked(opcode);
   6221      registerModRM(rm, reg);
   6222    }
   6223 
   6224    void twoByteOp8(TwoByteOpcodeID opcode, RegisterID rm,
   6225                    GroupOpcodeID groupOp) {
   6226      m_buffer.ensureSpace(MaxInstructionSize);
   6227      emitRexIf(byteRegRequiresRex(rm), 0, 0, rm);
   6228      m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
   6229      m_buffer.putByteUnchecked(opcode);
   6230      registerModRM(rm, groupOp);
   6231    }
   6232 
   6233    // Immediates:
   6234    //
   6235    // An immedaite should be appended where appropriate after an op has
   6236    // been emitted.  The writes are unchecked since the opcode formatters
   6237    // above will have ensured space.
   6238 
   6239    // A signed 8-bit immediate.
   6240    MOZ_ALWAYS_INLINE void immediate8s(int32_t imm) {
   6241      MOZ_ASSERT(CAN_SIGN_EXTEND_8_32(imm));
   6242      m_buffer.putByteUnchecked(imm);
   6243    }
   6244 
   6245    // An unsigned 8-bit immediate.
   6246    MOZ_ALWAYS_INLINE void immediate8u(uint32_t imm) {
   6247      MOZ_ASSERT(CAN_ZERO_EXTEND_8_32(imm));
   6248      m_buffer.putByteUnchecked(int32_t(imm));
   6249    }
   6250 
   6251    // An 8-bit immediate with is either signed or unsigned, for use in
   6252    // instructions which actually only operate on 8 bits.
   6253    MOZ_ALWAYS_INLINE void immediate8(int32_t imm) {
   6254      m_buffer.putByteUnchecked(imm);
   6255    }
   6256 
   6257    // A signed 16-bit immediate.
   6258    MOZ_ALWAYS_INLINE void immediate16s(int32_t imm) {
   6259      MOZ_ASSERT(CAN_SIGN_EXTEND_16_32(imm));
   6260      m_buffer.putShortUnchecked(imm);
   6261    }
   6262 
   6263    // An unsigned 16-bit immediate.
   6264    MOZ_ALWAYS_INLINE void immediate16u(int32_t imm) {
   6265      MOZ_ASSERT(CAN_ZERO_EXTEND_16_32(imm));
   6266      m_buffer.putShortUnchecked(imm);
   6267    }
   6268 
   6269    // A 16-bit immediate with is either signed or unsigned, for use in
   6270    // instructions which actually only operate on 16 bits.
   6271    MOZ_ALWAYS_INLINE void immediate16(int32_t imm) {
   6272      m_buffer.putShortUnchecked(imm);
   6273    }
   6274 
   6275    MOZ_ALWAYS_INLINE void immediate32(int32_t imm) {
   6276      m_buffer.putIntUnchecked(imm);
   6277    }
   6278 
   6279    MOZ_ALWAYS_INLINE void immediate64(int64_t imm) {
   6280      m_buffer.putInt64Unchecked(imm);
   6281    }
   6282 
   6283    [[nodiscard]] MOZ_ALWAYS_INLINE JmpSrc immediateRel32() {
   6284      m_buffer.putIntUnchecked(0);
   6285      return JmpSrc(m_buffer.size());
   6286    }
   6287 
   6288    // Data:
   6289 
   6290    void jumpTablePointer(uintptr_t ptr) {
   6291      m_buffer.ensureSpace(sizeof(uintptr_t));
   6292 #ifdef JS_CODEGEN_X64
   6293      m_buffer.putInt64Unchecked(ptr);
   6294 #else
   6295      m_buffer.putIntUnchecked(ptr);
   6296 #endif
   6297    }
   6298 
   6299    void doubleConstant(double d) {
   6300      m_buffer.ensureSpace(sizeof(double));
   6301      m_buffer.putInt64Unchecked(mozilla::BitwiseCast<uint64_t>(d));
   6302    }
   6303 
   6304    void floatConstant(float f) {
   6305      m_buffer.ensureSpace(sizeof(float));
   6306      m_buffer.putIntUnchecked(mozilla::BitwiseCast<uint32_t>(f));
   6307    }
   6308 
   6309    void simd128Constant(const void* data) {
   6310      const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
   6311      m_buffer.ensureSpace(16);
   6312      for (size_t i = 0; i < 16; ++i) {
   6313        m_buffer.putByteUnchecked(bytes[i]);
   6314      }
   6315    }
   6316 
   6317    void int64Constant(int64_t i) {
   6318      m_buffer.ensureSpace(sizeof(int64_t));
   6319      m_buffer.putInt64Unchecked(i);
   6320    }
   6321 
   6322    void int32Constant(int32_t i) {
   6323      m_buffer.ensureSpace(sizeof(int32_t));
   6324      m_buffer.putIntUnchecked(i);
   6325    }
   6326 
   6327    // Administrative methods:
   6328 
   6329    size_t size() const { return m_buffer.size(); }
   6330    const unsigned char* buffer() const { return m_buffer.buffer(); }
   6331    unsigned char* data() { return m_buffer.data(); }
   6332    bool oom() const { return m_buffer.oom(); }
   6333    bool reserve(size_t size) { return m_buffer.reserve(size); }
   6334    bool swapBuffer(wasm::Bytes& other) { return m_buffer.swap(other); }
   6335    bool isAligned(int alignment) const {
   6336      return m_buffer.isAligned(alignment);
   6337    }
   6338 
   6339    [[nodiscard]] bool append(const unsigned char* values, size_t size) {
   6340      return m_buffer.append(values, size);
   6341    }
   6342 
   6343   private:
   6344    // Internals; ModRm and REX formatters.
   6345 
   6346    // Byte operand register spl & above requir a REX prefix, which precludes
   6347    // use of the h registers in the same instruction.
   6348    static bool byteRegRequiresRex(RegisterID reg) {
   6349 #ifdef JS_CODEGEN_X64
   6350      return reg >= rsp;
   6351 #else
   6352      return false;
   6353 #endif
   6354    }
   6355 
   6356    // For non-byte sizes, registers r8 & above always require a REX prefix.
   6357    static bool regRequiresRex(RegisterID reg) {
   6358 #ifdef JS_CODEGEN_X64
   6359      return reg >= r8;
   6360 #else
   6361      return false;
   6362 #endif
   6363    }
   6364 
   6365 #ifdef JS_CODEGEN_X64
   6366    // Format a REX prefix byte.
   6367    void emitRex(bool w, int r, int x, int b) {
   6368      m_buffer.putByteUnchecked(PRE_REX | ((int)w << 3) | ((r >> 3) << 2) |
   6369                                ((x >> 3) << 1) | (b >> 3));
   6370    }
   6371 
   6372    // Used to plant a REX byte with REX.w set (for 64-bit operations).
   6373    void emitRexW(int r, int x, int b) { emitRex(true, r, x, b); }
   6374 
   6375    // Used for operations with byte operands - use byteRegRequiresRex() to
   6376    // check register operands, regRequiresRex() to check other registers
   6377    // (i.e. address base & index).
   6378    //
   6379    // NB: WebKit's use of emitRexIf() is limited such that the
   6380    // reqRequiresRex() checks are not needed. SpiderMonkey extends
   6381    // oneByteOp8 and twoByteOp8 functionality such that r, x, and b
   6382    // can all be used.
   6383    void emitRexIf(bool condition, int r, int x, int b) {
   6384      if (condition || regRequiresRex(RegisterID(r)) ||
   6385          regRequiresRex(RegisterID(x)) || regRequiresRex(RegisterID(b))) {
   6386        emitRex(false, r, x, b);
   6387      }
   6388    }
   6389 
   6390    // Used for word sized operations, will plant a REX prefix if necessary
   6391    // (if any register is r8 or above).
   6392    void emitRexIfNeeded(int r, int x, int b) { emitRexIf(false, r, x, b); }
   6393 #else
   6394    // No REX prefix bytes on 32-bit x86.
   6395    void emitRexIf(bool condition, int, int, int) {
   6396      MOZ_ASSERT(!condition, "32-bit x86 should never use a REX prefix");
   6397    }
   6398    void emitRexIfNeeded(int, int, int) {}
   6399 #endif
   6400 
   6401    void putModRm(ModRmMode mode, RegisterID rm, int reg) {
   6402      m_buffer.putByteUnchecked((mode << 6) | ((reg & 7) << 3) | (rm & 7));
   6403    }
   6404 
   6405    void putModRmSib(ModRmMode mode, RegisterID base, RegisterID index,
   6406                     int scale, int reg) {
   6407      MOZ_ASSERT(mode != ModRmRegister);
   6408 
   6409      putModRm(mode, hasSib, reg);
   6410      m_buffer.putByteUnchecked((scale << 6) | ((index & 7) << 3) | (base & 7));
   6411    }
   6412 
   6413    void registerModRM(RegisterID rm, int reg) {
   6414      putModRm(ModRmRegister, rm, reg);
   6415    }
   6416 
   6417    void memoryModRM(int32_t offset, RegisterID base, int reg) {
   6418      // A base of esp or r12 would be interpreted as a sib, so force a
   6419      // sib with no index & put the base in there.
   6420 #ifdef JS_CODEGEN_X64
   6421      if ((base == hasSib) || (base == hasSib2)) {
   6422 #else
   6423      if (base == hasSib) {
   6424 #endif
   6425        if (!offset) {  // No need to check if the base is noBase, since we know
   6426                        // it is hasSib!
   6427          putModRmSib(ModRmMemoryNoDisp, base, noIndex, 0, reg);
   6428        } else if (CAN_SIGN_EXTEND_8_32(offset)) {
   6429          putModRmSib(ModRmMemoryDisp8, base, noIndex, 0, reg);
   6430          m_buffer.putByteUnchecked(offset);
   6431        } else {
   6432          putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg);
   6433          m_buffer.putIntUnchecked(offset);
   6434        }
   6435      } else {
   6436 #ifdef JS_CODEGEN_X64
   6437        if (!offset && (base != noBase) && (base != noBase2)) {
   6438 #else
   6439        if (!offset && (base != noBase)) {
   6440 #endif
   6441          putModRm(ModRmMemoryNoDisp, base, reg);
   6442        } else if (CAN_SIGN_EXTEND_8_32(offset)) {
   6443          putModRm(ModRmMemoryDisp8, base, reg);
   6444          m_buffer.putByteUnchecked(offset);
   6445        } else {
   6446          putModRm(ModRmMemoryDisp32, base, reg);
   6447          m_buffer.putIntUnchecked(offset);
   6448        }
   6449      }
   6450    }
   6451 
   6452    void memoryModRM_disp32(int32_t offset, RegisterID base, int reg) {
   6453      // A base of esp or r12 would be interpreted as a sib, so force a
   6454      // sib with no index & put the base in there.
   6455 #ifdef JS_CODEGEN_X64
   6456      if ((base == hasSib) || (base == hasSib2)) {
   6457 #else
   6458      if (base == hasSib) {
   6459 #endif
   6460        putModRmSib(ModRmMemoryDisp32, base, noIndex, 0, reg);
   6461        m_buffer.putIntUnchecked(offset);
   6462      } else {
   6463        putModRm(ModRmMemoryDisp32, base, reg);
   6464        m_buffer.putIntUnchecked(offset);
   6465      }
   6466    }
   6467 
   6468    void memoryModRM(int32_t offset, RegisterID base, RegisterID index,
   6469                     int scale, int reg) {
   6470      MOZ_ASSERT(index != noIndex);
   6471 
   6472 #ifdef JS_CODEGEN_X64
   6473      if (!offset && (base != noBase) && (base != noBase2)) {
   6474 #else
   6475      if (!offset && (base != noBase)) {
   6476 #endif
   6477        putModRmSib(ModRmMemoryNoDisp, base, index, scale, reg);
   6478      } else if (CAN_SIGN_EXTEND_8_32(offset)) {
   6479        putModRmSib(ModRmMemoryDisp8, base, index, scale, reg);
   6480        m_buffer.putByteUnchecked(offset);
   6481      } else {
   6482        putModRmSib(ModRmMemoryDisp32, base, index, scale, reg);
   6483        m_buffer.putIntUnchecked(offset);
   6484      }
   6485    }
   6486 
   6487    void memoryModRM_disp32(int32_t offset, RegisterID index, int scale,
   6488                            int reg) {
   6489      MOZ_ASSERT(index != noIndex);
   6490 
   6491      // NB: the base-less memoryModRM overloads generate different code
   6492      // then the base-full memoryModRM overloads in the base == noBase
   6493      // case. The base-less overloads assume that the desired effective
   6494      // address is:
   6495      //
   6496      //   reg := [scaled index] + disp32
   6497      //
   6498      // which means the mod needs to be ModRmMemoryNoDisp. The base-full
   6499      // overloads pass ModRmMemoryDisp32 in all cases and thus, when
   6500      // base == noBase (== ebp), the effective address is:
   6501      //
   6502      //   reg := [scaled index] + disp32 + [ebp]
   6503      //
   6504      // See Intel developer manual, Vol 2, 2.1.5, Table 2-3.
   6505      putModRmSib(ModRmMemoryNoDisp, noBase, index, scale, reg);
   6506      m_buffer.putIntUnchecked(offset);
   6507    }
   6508 
   6509    void memoryModRM_disp32(const void* address, int reg) {
   6510      int32_t disp = AddressImmediate(address);
   6511 
   6512 #ifdef JS_CODEGEN_X64
   6513      // On x64-64, non-RIP-relative absolute mode requires a SIB.
   6514      putModRmSib(ModRmMemoryNoDisp, noBase, noIndex, 0, reg);
   6515 #else
   6516      // noBase + ModRmMemoryNoDisp means noBase + ModRmMemoryDisp32!
   6517      putModRm(ModRmMemoryNoDisp, noBase, reg);
   6518 #endif
   6519      m_buffer.putIntUnchecked(disp);
   6520    }
   6521 
   6522    void memoryModRM(const void* address, int reg) {
   6523      memoryModRM_disp32(address, reg);
   6524    }
   6525 
   6526    void threeOpVex(VexOperandType p, int r, int x, int b, int m, int w, int v,
   6527                    int l, int opcode) {
   6528      m_buffer.ensureSpace(MaxInstructionSize);
   6529 
   6530      if (v == invalid_xmm) {
   6531        v = XMMRegisterID(0);
   6532      }
   6533 
   6534      if (x == 0 && b == 0 && m == 1 && w == 0) {
   6535        // Two byte VEX.
   6536        m_buffer.putByteUnchecked(PRE_VEX_C5);
   6537        m_buffer.putByteUnchecked(((r << 7) | (v << 3) | (l << 2) | p) ^ 0xf8);
   6538      } else {
   6539        // Three byte VEX.
   6540        m_buffer.putByteUnchecked(PRE_VEX_C4);
   6541        m_buffer.putByteUnchecked(((r << 7) | (x << 6) | (b << 5) | m) ^ 0xe0);
   6542        m_buffer.putByteUnchecked(((w << 7) | (v << 3) | (l << 2) | p) ^ 0x78);
   6543      }
   6544 
   6545      m_buffer.putByteUnchecked(opcode);
   6546    }
   6547 
   6548    x86_shared::AssemblerBuffer m_buffer;
   6549  } m_formatter;
   6550 
   6551  bool useVEX_;
   6552 };
   6553 
   6554 }  // namespace X86Encoding
   6555 
   6556 }  // namespace jit
   6557 }  // namespace js
   6558 
   6559 #endif /* jit_x86_shared_BaseAssembler_x86_shared_h */