tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Assembler-vixl.h (140719B)


      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
     28 #define VIXL_A64_ASSEMBLER_A64_H_
     29 
     30 #include "jit/arm64/vixl/Cpu-vixl.h"
     31 #include "jit/arm64/vixl/Globals-vixl.h"
     32 #include "jit/arm64/vixl/Instructions-vixl.h"
     33 #include "jit/arm64/vixl/MozBaseAssembler-vixl.h"
     34 #include "jit/arm64/vixl/Operands-vixl.h"
     35 #include "jit/arm64/vixl/Registers-vixl.h"
     36 #include "jit/arm64/vixl/Utils-vixl.h"
     37 
     38 #include "jit/JitSpewer.h"
     39 
     40 #include "jit/shared/Assembler-shared.h"
     41 #include "jit/shared/Disassembler-shared.h"
     42 #include "jit/shared/IonAssemblerBufferWithConstantPools.h"
     43 
     44 #if defined(_M_ARM64)
     45 #ifdef mvn
     46 #undef mvn
     47 #endif
     48 #endif
     49 
     50 namespace vixl {
     51 
     52 using js::jit::BufferOffset;
     53 using js::jit::Label;
     54 using js::jit::Address;
     55 using js::jit::BaseIndex;
     56 using js::jit::DisassemblerSpew;
     57 
     58 using LabelDoc = DisassemblerSpew::LabelDoc;
     59 
     60 
     61 // Control whether or not position-independent code should be emitted.
     62 enum PositionIndependentCodeOption {
     63  // All code generated will be position-independent; all branches and
     64  // references to labels generated with the Label class will use PC-relative
     65  // addressing.
     66  PositionIndependentCode,
     67 
     68  // Allow VIXL to generate code that refers to absolute addresses. With this
     69  // option, it will not be possible to copy the code buffer and run it from a
     70  // different address; code must be generated in its final location.
     71  PositionDependentCode,
     72 
     73  // Allow VIXL to assume that the bottom 12 bits of the address will be
     74  // constant, but that the top 48 bits may change. This allows `adrp` to
     75  // function in systems which copy code between pages, but otherwise maintain
     76  // 4KB page alignment.
     77  PageOffsetDependentCode
     78 };
     79 
     80 
     81 // Control how scaled- and unscaled-offset loads and stores are generated.
     82 enum LoadStoreScalingOption {
     83  // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
     84  // register-offset, pre-index or post-index instructions if necessary.
     85  PreferScaledOffset,
     86 
     87  // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
     88  // register-offset, pre-index or post-index instructions if necessary.
     89  PreferUnscaledOffset,
     90 
     91  // Require scaled-immediate-offset instructions.
     92  RequireScaledOffset,
     93 
     94  // Require unscaled-immediate-offset instructions.
     95  RequireUnscaledOffset
     96 };
     97 
     98 
     99 // Assembler.
    100 class Assembler : public MozBaseAssembler {
    101 public:
    102  Assembler(PositionIndependentCodeOption pic = PositionIndependentCode);
    103 
    104  // System functions.
    105 
    106  // Finalize a code buffer of generated instructions. This function must be
    107  // called before executing or copying code from the buffer.
    108  void FinalizeCode();
    109 
    110 #define COPYENUM(v) static const Condition v = vixl::v
    111 #define COPYENUM_(v) static const Condition v = vixl::v##_
    112  COPYENUM(Equal);
    113  COPYENUM(Zero);
    114  COPYENUM(NotEqual);
    115  COPYENUM(NonZero);
    116  COPYENUM(AboveOrEqual);
    117  COPYENUM(CarrySet);
    118  COPYENUM(Below);
    119  COPYENUM(CarryClear);
    120  COPYENUM(Signed);
    121  COPYENUM(NotSigned);
    122  COPYENUM(Overflow);
    123  COPYENUM(NoOverflow);
    124  COPYENUM(Above);
    125  COPYENUM(BelowOrEqual);
    126  COPYENUM_(GreaterThanOrEqual);
    127  COPYENUM_(LessThan);
    128  COPYENUM_(GreaterThan);
    129  COPYENUM_(LessThanOrEqual);
    130  COPYENUM(Always);
    131  COPYENUM(Never);
    132 #undef COPYENUM
    133 #undef COPYENUM_
    134 
    135  // Bit set when a DoubleCondition does not map to a single ARM condition.
    136  // The MacroAssembler must special-case these conditions, or else
    137  // ConditionFromDoubleCondition will complain.
    138  static const int DoubleConditionBitSpecial = 0x100;
    139 
    140  enum DoubleCondition {
    141    DoubleOrdered                        = Condition::vc,
    142    DoubleEqual                          = Condition::eq,
    143    DoubleNotEqual                       = Condition::ne | DoubleConditionBitSpecial,
    144    DoubleGreaterThan                    = Condition::gt,
    145    DoubleGreaterThanOrEqual             = Condition::ge,
    146    DoubleLessThan                       = Condition::lo, // Could also use Condition::mi.
    147    DoubleLessThanOrEqual                = Condition::ls,
    148 
    149    // If either operand is NaN, these conditions always evaluate to true.
    150    DoubleUnordered                      = Condition::vs,
    151    DoubleEqualOrUnordered               = Condition::eq | DoubleConditionBitSpecial,
    152    DoubleNotEqualOrUnordered            = Condition::ne,
    153    DoubleGreaterThanOrUnordered         = Condition::hi,
    154    DoubleGreaterThanOrEqualOrUnordered  = Condition::hs,
    155    DoubleLessThanOrUnordered            = Condition::lt,
    156    DoubleLessThanOrEqualOrUnordered     = Condition::le
    157  };
    158 
    159  static inline Condition InvertCondition(Condition cond) {
    160    // Conditions al and nv behave identically, as "always true". They can't be
    161    // inverted, because there is no "always false" condition.
    162    VIXL_ASSERT((cond != al) && (cond != nv));
    163    return static_cast<Condition>(cond ^ 1);
    164  }
    165 
    166  // This is chaging the condition codes for cmp a, b to the same codes for cmp b, a.
    167  static inline Condition SwapCmpOperandsCondition(Condition cond) {
    168    // Conditions al and nv behave identically, as "always true". They can't be
    169    // inverted, because there is no "always false" condition.
    170    switch (cond) {
    171    case eq:
    172    case ne:
    173      return cond;
    174    case gt:
    175      return lt;
    176    case le:
    177      return ge;
    178    case ge:
    179      return le;
    180    case lt:
    181      return gt;
    182    case hi:
    183      return lo;
    184    case lo:
    185      return hi;
    186    case hs:
    187      return ls;
    188    case ls:
    189      return hs;
    190    case mi:
    191      return pl;
    192    case pl:
    193      return mi;
    194    default:
    195      MOZ_CRASH("TODO: figure this case out.");
    196    }
    197    return static_cast<Condition>(cond ^ 1);
    198  }
    199 
    200  static inline DoubleCondition InvertCondition(DoubleCondition cond) {
    201      switch (cond) {
    202 case DoubleOrdered:
    203   return DoubleUnordered;
    204 case DoubleEqual:
    205   return DoubleNotEqualOrUnordered;
    206 case DoubleNotEqual:
    207   return DoubleEqualOrUnordered;
    208 case DoubleGreaterThan:
    209   return DoubleLessThanOrEqualOrUnordered;
    210 case DoubleGreaterThanOrEqual:
    211   return DoubleLessThanOrUnordered;
    212 case DoubleLessThan:
    213   return DoubleGreaterThanOrEqualOrUnordered;
    214 case DoubleLessThanOrEqual:
    215   return DoubleGreaterThanOrUnordered;
    216 case DoubleUnordered:
    217   return DoubleOrdered;
    218 case DoubleEqualOrUnordered:
    219   return DoubleNotEqual;
    220 case DoubleNotEqualOrUnordered:
    221   return DoubleEqual;
    222 case DoubleGreaterThanOrUnordered:
    223   return DoubleLessThanOrEqual;
    224 case DoubleGreaterThanOrEqualOrUnordered:
    225   return DoubleLessThan;
    226 case DoubleLessThanOrUnordered:
    227   return DoubleGreaterThanOrEqual;
    228 case DoubleLessThanOrEqualOrUnordered:
    229   return DoubleGreaterThan;
    230 default:
    231   MOZ_CRASH("Bad condition");
    232    }
    233  }
    234 
    235  static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
    236    VIXL_ASSERT(!(cond & DoubleConditionBitSpecial));
    237    return static_cast<Condition>(cond);
    238  }
    239 
    240  // Instruction set functions.
    241 
    242  // Branch / Jump instructions.
    243  // Branch to register.
    244  void br(const Register& xn);
    245  static void br(Instruction* at, const Register& xn);
    246 
    247  // Branch with link to register.
    248  void blr(const Register& xn);
    249  static void blr(Instruction* at, const Register& blr);
    250 
    251  // Branch to register with return hint.
    252  void ret(const Register& xn = lr);
    253 
    254  // Unconditional branch to label.
    255  BufferOffset b(Label* label);
    256 
    257  // Conditional branch to label.
    258  BufferOffset b(Label* label, Condition cond);
    259 
    260  // Unconditional branch to PC offset.
    261  BufferOffset b(int imm26, const LabelDoc& doc);
    262  static void b(Instruction* at, int imm26);
    263 
    264  // Conditional branch to PC offset.
    265  BufferOffset b(int imm19, Condition cond, const LabelDoc& doc);
    266  static void b(Instruction*at, int imm19, Condition cond);
    267 
    268  // Branch with link to label.
    269  void bl(Label* label);
    270 
    271  // Branch with link to PC offset.
    272  void bl(int imm26, const LabelDoc& doc);
    273  static void bl(Instruction* at, int imm26);
    274 
    275  // Compare and branch to label if zero.
    276  void cbz(const Register& rt, Label* label);
    277 
    278  // Compare and branch to PC offset if zero.
    279  void cbz(const Register& rt, int imm19, const LabelDoc& doc);
    280  static void cbz(Instruction* at, const Register& rt, int imm19);
    281 
    282  // Compare and branch to label if not zero.
    283  void cbnz(const Register& rt, Label* label);
    284 
    285  // Compare and branch to PC offset if not zero.
    286  void cbnz(const Register& rt, int imm19, const LabelDoc& doc);
    287  static void cbnz(Instruction* at, const Register& rt, int imm19);
    288 
    289  // Table lookup from one register.
    290  void tbl(const VRegister& vd,
    291           const VRegister& vn,
    292           const VRegister& vm);
    293 
    294  // Table lookup from two registers.
    295  void tbl(const VRegister& vd,
    296           const VRegister& vn,
    297           const VRegister& vn2,
    298           const VRegister& vm);
    299 
    300  // Table lookup from three registers.
    301  void tbl(const VRegister& vd,
    302           const VRegister& vn,
    303           const VRegister& vn2,
    304           const VRegister& vn3,
    305           const VRegister& vm);
    306 
    307  // Table lookup from four registers.
    308  void tbl(const VRegister& vd,
    309           const VRegister& vn,
    310           const VRegister& vn2,
    311           const VRegister& vn3,
    312           const VRegister& vn4,
    313           const VRegister& vm);
    314 
    315  // Table lookup extension from one register.
    316  void tbx(const VRegister& vd,
    317           const VRegister& vn,
    318           const VRegister& vm);
    319 
    320  // Table lookup extension from two registers.
    321  void tbx(const VRegister& vd,
    322           const VRegister& vn,
    323           const VRegister& vn2,
    324           const VRegister& vm);
    325 
    326  // Table lookup extension from three registers.
    327  void tbx(const VRegister& vd,
    328           const VRegister& vn,
    329           const VRegister& vn2,
    330           const VRegister& vn3,
    331           const VRegister& vm);
    332 
    333  // Table lookup extension from four registers.
    334  void tbx(const VRegister& vd,
    335           const VRegister& vn,
    336           const VRegister& vn2,
    337           const VRegister& vn3,
    338           const VRegister& vn4,
    339           const VRegister& vm);
    340 
    341  // Test bit and branch to label if zero.
    342  void tbz(const Register& rt, unsigned bit_pos, Label* label);
    343 
    344  // Test bit and branch to PC offset if zero.
    345  void tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
    346  static void tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
    347 
    348  // Test bit and branch to label if not zero.
    349  void tbnz(const Register& rt, unsigned bit_pos, Label* label);
    350 
    351  // Test bit and branch to PC offset if not zero.
    352  void tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
    353  static void tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
    354 
    355  // Address calculation instructions.
    356  // Calculate a PC-relative address. Unlike for branches the offset in adr is
    357  // unscaled (i.e. the result can be unaligned).
    358 
    359  // Calculate the address of a label.
    360  void adr(const Register& rd, Label* label);
    361 
    362  // Calculate the address of a PC offset.
    363  void adr(const Register& rd, int imm21, const LabelDoc& doc);
    364  static void adr(Instruction* at, const Register& rd, int imm21);
    365 
    366  // Calculate the page address of a label.
    367  void adrp(const Register& rd, Label* label);
    368 
    369  // Calculate the page address of a PC offset.
    370  void adrp(const Register& rd, int imm21, const LabelDoc& doc);
    371  static void adrp(Instruction* at, const Register& rd, int imm21);
    372 
    373  // Data Processing instructions.
    374  // Add.
    375  void add(const Register& rd,
    376           const Register& rn,
    377           const Operand& operand);
    378 
    379  // Add and update status flags.
    380  void adds(const Register& rd,
    381            const Register& rn,
    382            const Operand& operand);
    383 
    384  // Compare negative.
    385  void cmn(const Register& rn, const Operand& operand);
    386 
    387  // Subtract.
    388  void sub(const Register& rd,
    389           const Register& rn,
    390           const Operand& operand);
    391 
    392  // Subtract and update status flags.
    393  void subs(const Register& rd,
    394            const Register& rn,
    395            const Operand& operand);
    396 
    397  // Compare.
    398  void cmp(const Register& rn, const Operand& operand);
    399 
    400  // Negate.
    401  void neg(const Register& rd,
    402           const Operand& operand);
    403 
    404  // Negate and update status flags.
    405  void negs(const Register& rd,
    406            const Operand& operand);
    407 
    408  // Add with carry bit.
    409  void adc(const Register& rd,
    410           const Register& rn,
    411           const Operand& operand);
    412 
    413  // Add with carry bit and update status flags.
    414  void adcs(const Register& rd,
    415            const Register& rn,
    416            const Operand& operand);
    417 
    418  // Subtract with carry bit.
    419  void sbc(const Register& rd,
    420           const Register& rn,
    421           const Operand& operand);
    422 
    423  // Subtract with carry bit and update status flags.
    424  void sbcs(const Register& rd,
    425            const Register& rn,
    426            const Operand& operand);
    427 
    428  // Negate with carry bit.
    429  void ngc(const Register& rd,
    430           const Operand& operand);
    431 
    432  // Negate with carry bit and update status flags.
    433  void ngcs(const Register& rd,
    434            const Operand& operand);
    435 
    436  // Logical instructions.
    437  // Bitwise and (A & B).
    438  void and_(const Register& rd,
    439            const Register& rn,
    440            const Operand& operand);
    441 
    442  // Bitwise and (A & B) and update status flags.
    443  BufferOffset ands(const Register& rd,
    444                    const Register& rn,
    445                    const Operand& operand);
    446 
    447  // Bit test and set flags.
    448  BufferOffset tst(const Register& rn, const Operand& operand);
    449 
    450  // Bit clear (A & ~B).
    451  void bic(const Register& rd,
    452           const Register& rn,
    453           const Operand& operand);
    454 
    455  // Bit clear (A & ~B) and update status flags.
    456  void bics(const Register& rd,
    457            const Register& rn,
    458            const Operand& operand);
    459 
    460  // Bitwise or (A | B).
    461  void orr(const Register& rd, const Register& rn, const Operand& operand);
    462 
    463  // Bitwise nor (A | ~B).
    464  void orn(const Register& rd, const Register& rn, const Operand& operand);
    465 
    466  // Bitwise eor/xor (A ^ B).
    467  void eor(const Register& rd, const Register& rn, const Operand& operand);
    468 
    469  // Bitwise enor/xnor (A ^ ~B).
    470  void eon(const Register& rd, const Register& rn, const Operand& operand);
    471 
    472  // Logical shift left by variable.
    473  void lslv(const Register& rd, const Register& rn, const Register& rm);
    474 
    475  // Logical shift right by variable.
    476  void lsrv(const Register& rd, const Register& rn, const Register& rm);
    477 
    478  // Arithmetic shift right by variable.
    479  void asrv(const Register& rd, const Register& rn, const Register& rm);
    480 
    481  // Rotate right by variable.
    482  void rorv(const Register& rd, const Register& rn, const Register& rm);
    483 
    484  // Bitfield instructions.
    485  // Bitfield move.
    486  void bfm(const Register& rd,
    487           const Register& rn,
    488           unsigned immr,
    489           unsigned imms);
    490 
    491  // Signed bitfield move.
    492  void sbfm(const Register& rd,
    493            const Register& rn,
    494            unsigned immr,
    495            unsigned imms);
    496 
    497  // Unsigned bitfield move.
    498  void ubfm(const Register& rd,
    499            const Register& rn,
    500            unsigned immr,
    501            unsigned imms);
    502 
    503  // Bfm aliases.
    504  // Bitfield insert.
    505  void bfi(const Register& rd,
    506           const Register& rn,
    507           unsigned lsb,
    508           unsigned width) {
    509    VIXL_ASSERT(width >= 1);
    510    VIXL_ASSERT(lsb + width <= rn.size());
    511    bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
    512  }
    513 
    514  // Bitfield extract and insert low.
    515  void bfxil(const Register& rd,
    516             const Register& rn,
    517             unsigned lsb,
    518             unsigned width) {
    519    VIXL_ASSERT(width >= 1);
    520    VIXL_ASSERT(lsb + width <= rn.size());
    521    bfm(rd, rn, lsb, lsb + width - 1);
    522  }
    523 
    524  // Sbfm aliases.
    525  // Arithmetic shift right.
    526  void asr(const Register& rd, const Register& rn, unsigned shift) {
    527    VIXL_ASSERT(shift < rd.size());
    528    sbfm(rd, rn, shift, rd.size() - 1);
    529  }
    530 
    531  // Signed bitfield insert with zero at right.
    532  void sbfiz(const Register& rd,
    533             const Register& rn,
    534             unsigned lsb,
    535             unsigned width) {
    536    VIXL_ASSERT(width >= 1);
    537    VIXL_ASSERT(lsb + width <= rn.size());
    538    sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
    539  }
    540 
    541  // Signed bitfield extract.
    542  void sbfx(const Register& rd,
    543            const Register& rn,
    544            unsigned lsb,
    545            unsigned width) {
    546    VIXL_ASSERT(width >= 1);
    547    VIXL_ASSERT(lsb + width <= rn.size());
    548    sbfm(rd, rn, lsb, lsb + width - 1);
    549  }
    550 
    551  // Signed extend byte.
    552  void sxtb(const Register& rd, const Register& rn) {
    553    sbfm(rd, rn, 0, 7);
    554  }
    555 
    556  // Signed extend halfword.
    557  void sxth(const Register& rd, const Register& rn) {
    558    sbfm(rd, rn, 0, 15);
    559  }
    560 
    561  // Signed extend word.
    562  void sxtw(const Register& rd, const Register& rn) {
    563    sbfm(rd, rn, 0, 31);
    564  }
    565 
    566  // Ubfm aliases.
    567  // Logical shift left.
    568  void lsl(const Register& rd, const Register& rn, unsigned shift) {
    569    unsigned reg_size = rd.size();
    570    VIXL_ASSERT(shift < reg_size);
    571    ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
    572  }
    573 
    574  // Logical shift right.
    575  void lsr(const Register& rd, const Register& rn, unsigned shift) {
    576    VIXL_ASSERT(shift < rd.size());
    577    ubfm(rd, rn, shift, rd.size() - 1);
    578  }
    579 
    580  // Unsigned bitfield insert with zero at right.
    581  void ubfiz(const Register& rd,
    582             const Register& rn,
    583             unsigned lsb,
    584             unsigned width) {
    585    VIXL_ASSERT(width >= 1);
    586    VIXL_ASSERT(lsb + width <= rn.size());
    587    ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
    588  }
    589 
    590  // Unsigned bitfield extract.
    591  void ubfx(const Register& rd,
    592            const Register& rn,
    593            unsigned lsb,
    594            unsigned width) {
    595    VIXL_ASSERT(width >= 1);
    596    VIXL_ASSERT(lsb + width <= rn.size());
    597    ubfm(rd, rn, lsb, lsb + width - 1);
    598  }
    599 
    600  // Unsigned extend byte.
    601  void uxtb(const Register& rd, const Register& rn) {
    602    ubfm(rd, rn, 0, 7);
    603  }
    604 
    605  // Unsigned extend halfword.
    606  void uxth(const Register& rd, const Register& rn) {
    607    ubfm(rd, rn, 0, 15);
    608  }
    609 
    610  // Unsigned extend word.
    611  void uxtw(const Register& rd, const Register& rn) {
    612    ubfm(rd, rn, 0, 31);
    613  }
    614 
    615  // Extract.
    616  void extr(const Register& rd,
    617            const Register& rn,
    618            const Register& rm,
    619            unsigned lsb);
    620 
    621  // Conditional select: rd = cond ? rn : rm.
    622  void csel(const Register& rd,
    623            const Register& rn,
    624            const Register& rm,
    625            Condition cond);
    626 
    627  // Conditional select increment: rd = cond ? rn : rm + 1.
    628  void csinc(const Register& rd,
    629             const Register& rn,
    630             const Register& rm,
    631             Condition cond);
    632 
    633  // Conditional select inversion: rd = cond ? rn : ~rm.
    634  void csinv(const Register& rd,
    635             const Register& rn,
    636             const Register& rm,
    637             Condition cond);
    638 
    639  // Conditional select negation: rd = cond ? rn : -rm.
    640  void csneg(const Register& rd,
    641             const Register& rn,
    642             const Register& rm,
    643             Condition cond);
    644 
    645  // Conditional set: rd = cond ? 1 : 0.
    646  void cset(const Register& rd, Condition cond);
    647 
    648  // Conditional set mask: rd = cond ? -1 : 0.
    649  void csetm(const Register& rd, Condition cond);
    650 
    651  // Conditional increment: rd = cond ? rn + 1 : rn.
    652  void cinc(const Register& rd, const Register& rn, Condition cond);
    653 
    654  // Conditional invert: rd = cond ? ~rn : rn.
    655  void cinv(const Register& rd, const Register& rn, Condition cond);
    656 
    657  // Conditional negate: rd = cond ? -rn : rn.
    658  void cneg(const Register& rd, const Register& rn, Condition cond);
    659 
    660  // Rotate right.
    661  void ror(const Register& rd, const Register& rs, unsigned shift) {
    662    extr(rd, rs, rs, shift);
    663  }
    664 
    665  // Conditional comparison.
    666  // Conditional compare negative.
    667  void ccmn(const Register& rn,
    668            const Operand& operand,
    669            StatusFlags nzcv,
    670            Condition cond);
    671 
    672  // Conditional compare.
    673  void ccmp(const Register& rn,
    674            const Operand& operand,
    675            StatusFlags nzcv,
    676            Condition cond);
    677 
    678  // CRC-32 checksum from byte.
    679  void crc32b(const Register& rd,
    680              const Register& rn,
    681              const Register& rm);
    682 
    683  // CRC-32 checksum from half-word.
    684  void crc32h(const Register& rd,
    685              const Register& rn,
    686              const Register& rm);
    687 
    688  // CRC-32 checksum from word.
    689  void crc32w(const Register& rd,
    690              const Register& rn,
    691              const Register& rm);
    692 
    693  // CRC-32 checksum from double word.
    694  void crc32x(const Register& rd,
    695              const Register& rn,
    696              const Register& rm);
    697 
    698  // CRC-32 C checksum from byte.
    699  void crc32cb(const Register& rd,
    700               const Register& rn,
    701               const Register& rm);
    702 
    703  // CRC-32 C checksum from half-word.
    704  void crc32ch(const Register& rd,
    705               const Register& rn,
    706               const Register& rm);
    707 
    708  // CRC-32 C checksum from word.
    709  void crc32cw(const Register& rd,
    710               const Register& rn,
    711               const Register& rm);
    712 
    713  // CRC-32C checksum from double word.
    714  void crc32cx(const Register& rd,
    715               const Register& rn,
    716               const Register& rm);
    717 
    718  // Multiply.
    719  void mul(const Register& rd, const Register& rn, const Register& rm);
    720 
    721  // Negated multiply.
    722  void mneg(const Register& rd, const Register& rn, const Register& rm);
    723 
    724  // Signed long multiply: 32 x 32 -> 64-bit.
    725  void smull(const Register& rd, const Register& rn, const Register& rm);
    726 
    727  // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
    728  void smulh(const Register& xd, const Register& xn, const Register& xm);
    729 
    730  // Multiply and accumulate.
    731  void madd(const Register& rd,
    732            const Register& rn,
    733            const Register& rm,
    734            const Register& ra);
    735 
    736  // Multiply and subtract.
    737  void msub(const Register& rd,
    738            const Register& rn,
    739            const Register& rm,
    740            const Register& ra);
    741 
    742  // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    743  void smaddl(const Register& rd,
    744              const Register& rn,
    745              const Register& rm,
    746              const Register& ra);
    747 
    748  // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
    749  void umaddl(const Register& rd,
    750              const Register& rn,
    751              const Register& rm,
    752              const Register& ra);
    753 
    754  // Unsigned long multiply: 32 x 32 -> 64-bit.
    755  void umull(const Register& rd,
    756             const Register& rn,
    757             const Register& rm) {
    758    umaddl(rd, rn, rm, xzr);
    759  }
    760 
    761  // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
    762  void umulh(const Register& xd,
    763             const Register& xn,
    764             const Register& xm);
    765 
    766  // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    767  void smsubl(const Register& rd,
    768              const Register& rn,
    769              const Register& rm,
    770              const Register& ra);
    771 
    772  // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
    773  void umsubl(const Register& rd,
    774              const Register& rn,
    775              const Register& rm,
    776              const Register& ra);
    777 
    778  // Signed integer divide.
    779  void sdiv(const Register& rd, const Register& rn, const Register& rm);
    780 
    781  // Unsigned integer divide.
    782  void udiv(const Register& rd, const Register& rn, const Register& rm);
    783 
    784  // Bit reverse.
    785  void rbit(const Register& rd, const Register& rn);
    786 
    787  // Reverse bytes in 16-bit half words.
    788  void rev16(const Register& rd, const Register& rn);
    789 
    790  // Reverse bytes in 32-bit words.
    791  void rev32(const Register& rd, const Register& rn);
    792 
    793  // Reverse bytes.
    794  void rev(const Register& rd, const Register& rn);
    795 
    796  // Count leading zeroes.
    797  void clz(const Register& rd, const Register& rn);
    798 
    799  // Count leading sign bits.
    800  void cls(const Register& rd, const Register& rn);
    801 
    802  // Memory instructions.
    803  // Load integer or FP register.
    804  void ldr(const CPURegister& rt, const MemOperand& src,
    805           LoadStoreScalingOption option = PreferScaledOffset);
    806 
    807  // Store integer or FP register.
    808  void str(const CPURegister& rt, const MemOperand& dst,
    809           LoadStoreScalingOption option = PreferScaledOffset);
    810 
    811  // Load word with sign extension.
    812  void ldrsw(const Register& rt, const MemOperand& src,
    813             LoadStoreScalingOption option = PreferScaledOffset);
    814 
    815  // Load byte.
    816  void ldrb(const Register& rt, const MemOperand& src,
    817            LoadStoreScalingOption option = PreferScaledOffset);
    818 
    819  // Store byte.
    820  void strb(const Register& rt, const MemOperand& dst,
    821            LoadStoreScalingOption option = PreferScaledOffset);
    822 
    823  // Load byte with sign extension.
    824  void ldrsb(const Register& rt, const MemOperand& src,
    825             LoadStoreScalingOption option = PreferScaledOffset);
    826 
    827  // Load half-word.
    828  void ldrh(const Register& rt, const MemOperand& src,
    829            LoadStoreScalingOption option = PreferScaledOffset);
    830 
    831  // Store half-word.
    832  void strh(const Register& rt, const MemOperand& dst,
    833            LoadStoreScalingOption option = PreferScaledOffset);
    834 
    835  // Load half-word with sign extension.
    836  void ldrsh(const Register& rt, const MemOperand& src,
    837             LoadStoreScalingOption option = PreferScaledOffset);
    838 
    839  // Load integer or FP register (with unscaled offset).
    840  void ldur(const CPURegister& rt, const MemOperand& src,
    841            LoadStoreScalingOption option = PreferUnscaledOffset);
    842 
    843  // Store integer or FP register (with unscaled offset).
    844  void stur(const CPURegister& rt, const MemOperand& src,
    845            LoadStoreScalingOption option = PreferUnscaledOffset);
    846 
    847  // Load word with sign extension.
    848  void ldursw(const Register& rt, const MemOperand& src,
    849              LoadStoreScalingOption option = PreferUnscaledOffset);
    850 
    851  // Load byte (with unscaled offset).
    852  void ldurb(const Register& rt, const MemOperand& src,
    853             LoadStoreScalingOption option = PreferUnscaledOffset);
    854 
    855  // Store byte (with unscaled offset).
    856  void sturb(const Register& rt, const MemOperand& dst,
    857             LoadStoreScalingOption option = PreferUnscaledOffset);
    858 
    859  // Load byte with sign extension (and unscaled offset).
    860  void ldursb(const Register& rt, const MemOperand& src,
    861              LoadStoreScalingOption option = PreferUnscaledOffset);
    862 
    863  // Load half-word (with unscaled offset).
    864  void ldurh(const Register& rt, const MemOperand& src,
    865             LoadStoreScalingOption option = PreferUnscaledOffset);
    866 
    867  // Store half-word (with unscaled offset).
    868  void sturh(const Register& rt, const MemOperand& dst,
    869             LoadStoreScalingOption option = PreferUnscaledOffset);
    870 
    871  // Load half-word with sign extension (and unscaled offset).
    872  void ldursh(const Register& rt, const MemOperand& src,
    873              LoadStoreScalingOption option = PreferUnscaledOffset);
    874 
    875  // Load integer or FP register pair.
    876  void ldp(const CPURegister& rt, const CPURegister& rt2,
    877           const MemOperand& src);
    878 
    879  // Store integer or FP register pair.
    880  void stp(const CPURegister& rt, const CPURegister& rt2,
    881           const MemOperand& dst);
    882 
    883  // Load word pair with sign extension.
    884  void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
    885 
    886  // Load integer or FP register pair, non-temporal.
    887  void ldnp(const CPURegister& rt, const CPURegister& rt2,
    888            const MemOperand& src);
    889 
    890  // Store integer or FP register pair, non-temporal.
    891  void stnp(const CPURegister& rt, const CPURegister& rt2,
    892            const MemOperand& dst);
    893 
    894  // Load integer or FP register from pc + imm19 << 2.
    895  void ldr(const CPURegister& rt, int imm19);
    896  static void ldr(Instruction* at, const CPURegister& rt, int imm19);
    897 
    898  // Load word with sign extension from pc + imm19 << 2.
    899  void ldrsw(const Register& rt, int imm19);
    900  static void ldrsw(Instruction* at, const CPURegister& rt, int imm19);
    901 
    902  // Store exclusive byte.
    903  void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
    904 
    905  // Store exclusive half-word.
    906  void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
    907 
    908  // Store exclusive register.
    909  void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
    910 
    911  // Load exclusive byte.
    912  void ldxrb(const Register& rt, const MemOperand& src);
    913 
    914  // Load exclusive half-word.
    915  void ldxrh(const Register& rt, const MemOperand& src);
    916 
    917  // Load exclusive register.
    918  void ldxr(const Register& rt, const MemOperand& src);
    919 
    920  // Store exclusive register pair.
    921  void stxp(const Register& rs,
    922            const Register& rt,
    923            const Register& rt2,
    924            const MemOperand& dst);
    925 
    926  // Load exclusive register pair.
    927  void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
    928 
    929  // Store-release exclusive byte.
    930  void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
    931 
    932  // Store-release exclusive half-word.
    933  void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
    934 
    935  // Store-release exclusive register.
    936  void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
    937 
    938  // Load-acquire exclusive byte.
    939  void ldaxrb(const Register& rt, const MemOperand& src);
    940 
    941  // Load-acquire exclusive half-word.
    942  void ldaxrh(const Register& rt, const MemOperand& src);
    943 
    944  // Load-acquire exclusive register.
    945  void ldaxr(const Register& rt, const MemOperand& src);
    946 
    947  // Store-release exclusive register pair.
    948  void stlxp(const Register& rs,
    949             const Register& rt,
    950             const Register& rt2,
    951             const MemOperand& dst);
    952 
    953  // Load-acquire exclusive register pair.
    954  void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
    955 
    956  // Store-release byte.
    957  void stlrb(const Register& rt, const MemOperand& dst);
    958 
    959  // Store-release half-word.
    960  void stlrh(const Register& rt, const MemOperand& dst);
    961 
    962  // Store-release register.
    963  void stlr(const Register& rt, const MemOperand& dst);
    964 
    965  // Load-acquire byte.
    966  void ldarb(const Register& rt, const MemOperand& src);
    967 
    968  // Load-acquire half-word.
    969  void ldarh(const Register& rt, const MemOperand& src);
    970 
    971  // Load-acquire register.
    972  void ldar(const Register& rt, const MemOperand& src);
    973 
    974  // Compare and Swap word or doubleword in memory [Armv8.1].
    975  void cas(const Register& rs, const Register& rt, const MemOperand& src);
    976 
    977  // Compare and Swap word or doubleword in memory [Armv8.1].
    978  void casa(const Register& rs, const Register& rt, const MemOperand& src);
    979 
    980  // Compare and Swap word or doubleword in memory [Armv8.1].
    981  void casl(const Register& rs, const Register& rt, const MemOperand& src);
    982 
    983  // Compare and Swap word or doubleword in memory [Armv8.1].
    984  void casal(const Register& rs, const Register& rt, const MemOperand& src);
    985 
    986  // Compare and Swap byte in memory [Armv8.1].
    987  void casb(const Register& rs, const Register& rt, const MemOperand& src);
    988 
    989  // Compare and Swap byte in memory [Armv8.1].
    990  void casab(const Register& rs, const Register& rt, const MemOperand& src);
    991 
    992  // Compare and Swap byte in memory [Armv8.1].
    993  void caslb(const Register& rs, const Register& rt, const MemOperand& src);
    994 
    995  // Compare and Swap byte in memory [Armv8.1].
    996  void casalb(const Register& rs, const Register& rt, const MemOperand& src);
    997 
    998  // Compare and Swap halfword in memory [Armv8.1].
    999  void cash(const Register& rs, const Register& rt, const MemOperand& src);
   1000 
   1001  // Compare and Swap halfword in memory [Armv8.1].
   1002  void casah(const Register& rs, const Register& rt, const MemOperand& src);
   1003 
   1004  // Compare and Swap halfword in memory [Armv8.1].
   1005  void caslh(const Register& rs, const Register& rt, const MemOperand& src);
   1006 
   1007  // Compare and Swap halfword in memory [Armv8.1].
   1008  void casalh(const Register& rs, const Register& rt, const MemOperand& src);
   1009 
   1010  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
   1011  void casp(const Register& rs,
   1012            const Register& rs2,
   1013            const Register& rt,
   1014            const Register& rt2,
   1015            const MemOperand& src);
   1016 
   1017  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
   1018  void caspa(const Register& rs,
   1019             const Register& rs2,
   1020             const Register& rt,
   1021             const Register& rt2,
   1022             const MemOperand& src);
   1023 
   1024  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
   1025  void caspl(const Register& rs,
   1026             const Register& rs2,
   1027             const Register& rt,
   1028             const Register& rt2,
   1029             const MemOperand& src);
   1030 
   1031  // Compare and Swap Pair of words or doublewords in memory [Armv8.1].
   1032  void caspal(const Register& rs,
   1033              const Register& rs2,
   1034              const Register& rt,
   1035              const Register& rt2,
   1036              const MemOperand& src);
   1037 
   1038  // Atomic add on byte in memory [Armv8.1]
   1039  void ldaddb(const Register& rs, const Register& rt, const MemOperand& src);
   1040 
   1041  // Atomic add on byte in memory, with Load-acquire semantics [Armv8.1]
   1042  void ldaddab(const Register& rs, const Register& rt, const MemOperand& src);
   1043 
   1044  // Atomic add on byte in memory, with Store-release semantics [Armv8.1]
   1045  void ldaddlb(const Register& rs, const Register& rt, const MemOperand& src);
   1046 
   1047  // Atomic add on byte in memory, with Load-acquire and Store-release semantics
   1048  // [Armv8.1]
   1049  void ldaddalb(const Register& rs, const Register& rt, const MemOperand& src);
   1050 
   1051  // Atomic add on halfword in memory [Armv8.1]
   1052  void ldaddh(const Register& rs, const Register& rt, const MemOperand& src);
   1053 
   1054  // Atomic add on halfword in memory, with Load-acquire semantics [Armv8.1]
   1055  void ldaddah(const Register& rs, const Register& rt, const MemOperand& src);
   1056 
   1057  // Atomic add on halfword in memory, with Store-release semantics [Armv8.1]
   1058  void ldaddlh(const Register& rs, const Register& rt, const MemOperand& src);
   1059 
   1060  // Atomic add on halfword in memory, with Load-acquire and Store-release
   1061  // semantics [Armv8.1]
   1062  void ldaddalh(const Register& rs, const Register& rt, const MemOperand& src);
   1063 
   1064  // Atomic add on word or doubleword in memory [Armv8.1]
   1065  void ldadd(const Register& rs, const Register& rt, const MemOperand& src);
   1066 
   1067  // Atomic add on word or doubleword in memory, with Load-acquire semantics
   1068  // [Armv8.1]
   1069  void ldadda(const Register& rs, const Register& rt, const MemOperand& src);
   1070 
   1071  // Atomic add on word or doubleword in memory, with Store-release semantics
   1072  // [Armv8.1]
   1073  void ldaddl(const Register& rs, const Register& rt, const MemOperand& src);
   1074 
   1075  // Atomic add on word or doubleword in memory, with Load-acquire and
   1076  // Store-release semantics [Armv8.1]
   1077  void ldaddal(const Register& rs, const Register& rt, const MemOperand& src);
   1078 
   1079  // Atomic bit clear on byte in memory [Armv8.1]
   1080  void ldclrb(const Register& rs, const Register& rt, const MemOperand& src);
   1081 
   1082  // Atomic bit clear on byte in memory, with Load-acquire semantics [Armv8.1]
   1083  void ldclrab(const Register& rs, const Register& rt, const MemOperand& src);
   1084 
   1085  // Atomic bit clear on byte in memory, with Store-release semantics [Armv8.1]
   1086  void ldclrlb(const Register& rs, const Register& rt, const MemOperand& src);
   1087 
   1088  // Atomic bit clear on byte in memory, with Load-acquire and Store-release
   1089  // semantics [Armv8.1]
   1090  void ldclralb(const Register& rs, const Register& rt, const MemOperand& src);
   1091 
   1092  // Atomic bit clear on halfword in memory [Armv8.1]
   1093  void ldclrh(const Register& rs, const Register& rt, const MemOperand& src);
   1094 
   1095  // Atomic bit clear on halfword in memory, with Load-acquire semantics
   1096  // [Armv8.1]
   1097  void ldclrah(const Register& rs, const Register& rt, const MemOperand& src);
   1098 
   1099  // Atomic bit clear on halfword in memory, with Store-release semantics
   1100  // [Armv8.1]
   1101  void ldclrlh(const Register& rs, const Register& rt, const MemOperand& src);
   1102 
   1103  // Atomic bit clear on halfword in memory, with Load-acquire and Store-release
   1104  // semantics [Armv8.1]
   1105  void ldclralh(const Register& rs, const Register& rt, const MemOperand& src);
   1106 
   1107  // Atomic bit clear on word or doubleword in memory [Armv8.1]
   1108  void ldclr(const Register& rs, const Register& rt, const MemOperand& src);
   1109 
   1110  // Atomic bit clear on word or doubleword in memory, with Load-acquire
   1111  // semantics [Armv8.1]
   1112  void ldclra(const Register& rs, const Register& rt, const MemOperand& src);
   1113 
   1114  // Atomic bit clear on word or doubleword in memory, with Store-release
   1115  // semantics [Armv8.1]
   1116  void ldclrl(const Register& rs, const Register& rt, const MemOperand& src);
   1117 
   1118  // Atomic bit clear on word or doubleword in memory, with Load-acquire and
   1119  // Store-release semantics [Armv8.1]
   1120  void ldclral(const Register& rs, const Register& rt, const MemOperand& src);
   1121 
   1122  // Atomic exclusive OR on byte in memory [Armv8.1]
   1123  void ldeorb(const Register& rs, const Register& rt, const MemOperand& src);
   1124 
   1125  // Atomic exclusive OR on byte in memory, with Load-acquire semantics
   1126  // [Armv8.1]
   1127  void ldeorab(const Register& rs, const Register& rt, const MemOperand& src);
   1128 
   1129  // Atomic exclusive OR on byte in memory, with Store-release semantics
   1130  // [Armv8.1]
   1131  void ldeorlb(const Register& rs, const Register& rt, const MemOperand& src);
   1132 
   1133  // Atomic exclusive OR on byte in memory, with Load-acquire and Store-release
   1134  // semantics [Armv8.1]
   1135  void ldeoralb(const Register& rs, const Register& rt, const MemOperand& src);
   1136 
   1137  // Atomic exclusive OR on halfword in memory [Armv8.1]
   1138  void ldeorh(const Register& rs, const Register& rt, const MemOperand& src);
   1139 
   1140  // Atomic exclusive OR on halfword in memory, with Load-acquire semantics
   1141  // [Armv8.1]
   1142  void ldeorah(const Register& rs, const Register& rt, const MemOperand& src);
   1143 
   1144  // Atomic exclusive OR on halfword in memory, with Store-release semantics
   1145  // [Armv8.1]
   1146  void ldeorlh(const Register& rs, const Register& rt, const MemOperand& src);
   1147 
   1148  // Atomic exclusive OR on halfword in memory, with Load-acquire and
   1149  // Store-release semantics [Armv8.1]
   1150  void ldeoralh(const Register& rs, const Register& rt, const MemOperand& src);
   1151 
   1152  // Atomic exclusive OR on word or doubleword in memory [Armv8.1]
   1153  void ldeor(const Register& rs, const Register& rt, const MemOperand& src);
   1154 
   1155  // Atomic exclusive OR on word or doubleword in memory, with Load-acquire
   1156  // semantics [Armv8.1]
   1157  void ldeora(const Register& rs, const Register& rt, const MemOperand& src);
   1158 
   1159  // Atomic exclusive OR on word or doubleword in memory, with Store-release
   1160  // semantics [Armv8.1]
   1161  void ldeorl(const Register& rs, const Register& rt, const MemOperand& src);
   1162 
   1163  // Atomic exclusive OR on word or doubleword in memory, with Load-acquire and
   1164  // Store-release semantics [Armv8.1]
   1165  void ldeoral(const Register& rs, const Register& rt, const MemOperand& src);
   1166 
   1167  // Atomic bit set on byte in memory [Armv8.1]
   1168  void ldsetb(const Register& rs, const Register& rt, const MemOperand& src);
   1169 
   1170  // Atomic bit set on byte in memory, with Load-acquire semantics [Armv8.1]
   1171  void ldsetab(const Register& rs, const Register& rt, const MemOperand& src);
   1172 
   1173  // Atomic bit set on byte in memory, with Store-release semantics [Armv8.1]
   1174  void ldsetlb(const Register& rs, const Register& rt, const MemOperand& src);
   1175 
   1176  // Atomic bit set on byte in memory, with Load-acquire and Store-release
   1177  // semantics [Armv8.1]
   1178  void ldsetalb(const Register& rs, const Register& rt, const MemOperand& src);
   1179 
   1180  // Atomic bit set on halfword in memory [Armv8.1]
   1181  void ldseth(const Register& rs, const Register& rt, const MemOperand& src);
   1182 
   1183  // Atomic bit set on halfword in memory, with Load-acquire semantics [Armv8.1]
   1184  void ldsetah(const Register& rs, const Register& rt, const MemOperand& src);
   1185 
   1186  // Atomic bit set on halfword in memory, with Store-release semantics
   1187  // [Armv8.1]
   1188  void ldsetlh(const Register& rs, const Register& rt, const MemOperand& src);
   1189 
   1190  // Atomic bit set on halfword in memory, with Load-acquire and Store-release
   1191  // semantics [Armv8.1]
   1192  void ldsetalh(const Register& rs, const Register& rt, const MemOperand& src);
   1193 
   1194  // Atomic bit set on word or doubleword in memory [Armv8.1]
   1195  void ldset(const Register& rs, const Register& rt, const MemOperand& src);
   1196 
   1197  // Atomic bit set on word or doubleword in memory, with Load-acquire semantics
   1198  // [Armv8.1]
   1199  void ldseta(const Register& rs, const Register& rt, const MemOperand& src);
   1200 
   1201  // Atomic bit set on word or doubleword in memory, with Store-release
   1202  // semantics [Armv8.1]
   1203  void ldsetl(const Register& rs, const Register& rt, const MemOperand& src);
   1204 
   1205  // Atomic bit set on word or doubleword in memory, with Load-acquire and
   1206  // Store-release semantics [Armv8.1]
   1207  void ldsetal(const Register& rs, const Register& rt, const MemOperand& src);
   1208 
   1209  // Atomic signed maximum on byte in memory [Armv8.1]
   1210  void ldsmaxb(const Register& rs, const Register& rt, const MemOperand& src);
   1211 
   1212  // Atomic signed maximum on byte in memory, with Load-acquire semantics
   1213  // [Armv8.1]
   1214  void ldsmaxab(const Register& rs, const Register& rt, const MemOperand& src);
   1215 
   1216  // Atomic signed maximum on byte in memory, with Store-release semantics
   1217  // [Armv8.1]
   1218  void ldsmaxlb(const Register& rs, const Register& rt, const MemOperand& src);
   1219 
   1220  // Atomic signed maximum on byte in memory, with Load-acquire and
   1221  // Store-release semantics [Armv8.1]
   1222  void ldsmaxalb(const Register& rs, const Register& rt, const MemOperand& src);
   1223 
   1224  // Atomic signed maximum on halfword in memory [Armv8.1]
   1225  void ldsmaxh(const Register& rs, const Register& rt, const MemOperand& src);
   1226 
   1227  // Atomic signed maximum on halfword in memory, with Load-acquire semantics
   1228  // [Armv8.1]
   1229  void ldsmaxah(const Register& rs, const Register& rt, const MemOperand& src);
   1230 
   1231  // Atomic signed maximum on halfword in memory, with Store-release semantics
   1232  // [Armv8.1]
   1233  void ldsmaxlh(const Register& rs, const Register& rt, const MemOperand& src);
   1234 
   1235  // Atomic signed maximum on halfword in memory, with Load-acquire and
   1236  // Store-release semantics [Armv8.1]
   1237  void ldsmaxalh(const Register& rs, const Register& rt, const MemOperand& src);
   1238 
   1239  // Atomic signed maximum on word or doubleword in memory [Armv8.1]
   1240  void ldsmax(const Register& rs, const Register& rt, const MemOperand& src);
   1241 
   1242  // Atomic signed maximum on word or doubleword in memory, with Load-acquire
   1243  // semantics [Armv8.1]
   1244  void ldsmaxa(const Register& rs, const Register& rt, const MemOperand& src);
   1245 
   1246  // Atomic signed maximum on word or doubleword in memory, with Store-release
   1247  // semantics [Armv8.1]
   1248  void ldsmaxl(const Register& rs, const Register& rt, const MemOperand& src);
   1249 
   1250  // Atomic signed maximum on word or doubleword in memory, with Load-acquire
   1251  // and Store-release semantics [Armv8.1]
   1252  void ldsmaxal(const Register& rs, const Register& rt, const MemOperand& src);
   1253 
   1254  // Atomic signed minimum on byte in memory [Armv8.1]
   1255  void ldsminb(const Register& rs, const Register& rt, const MemOperand& src);
   1256 
   1257  // Atomic signed minimum on byte in memory, with Load-acquire semantics
   1258  // [Armv8.1]
   1259  void ldsminab(const Register& rs, const Register& rt, const MemOperand& src);
   1260 
   1261  // Atomic signed minimum on byte in memory, with Store-release semantics
   1262  // [Armv8.1]
   1263  void ldsminlb(const Register& rs, const Register& rt, const MemOperand& src);
   1264 
   1265  // Atomic signed minimum on byte in memory, with Load-acquire and
   1266  // Store-release semantics [Armv8.1]
   1267  void ldsminalb(const Register& rs, const Register& rt, const MemOperand& src);
   1268 
   1269  // Atomic signed minimum on halfword in memory [Armv8.1]
   1270  void ldsminh(const Register& rs, const Register& rt, const MemOperand& src);
   1271 
   1272  // Atomic signed minimum on halfword in memory, with Load-acquire semantics
   1273  // [Armv8.1]
   1274  void ldsminah(const Register& rs, const Register& rt, const MemOperand& src);
   1275 
   1276  // Atomic signed minimum on halfword in memory, with Store-release semantics
   1277  // [Armv8.1]
   1278  void ldsminlh(const Register& rs, const Register& rt, const MemOperand& src);
   1279 
   1280  // Atomic signed minimum on halfword in memory, with Load-acquire and
   1281  // Store-release semantics [Armv8.1]
   1282  void ldsminalh(const Register& rs, const Register& rt, const MemOperand& src);
   1283 
   1284  // Atomic signed minimum on word or doubleword in memory [Armv8.1]
   1285  void ldsmin(const Register& rs, const Register& rt, const MemOperand& src);
   1286 
   1287  // Atomic signed minimum on word or doubleword in memory, with Load-acquire
   1288  // semantics [Armv8.1]
   1289  void ldsmina(const Register& rs, const Register& rt, const MemOperand& src);
   1290 
   1291  // Atomic signed minimum on word or doubleword in memory, with Store-release
   1292  // semantics [Armv8.1]
   1293  void ldsminl(const Register& rs, const Register& rt, const MemOperand& src);
   1294 
   1295  // Atomic signed minimum on word or doubleword in memory, with Load-acquire
   1296  // and Store-release semantics [Armv8.1]
   1297  void ldsminal(const Register& rs, const Register& rt, const MemOperand& src);
   1298 
   1299  // Atomic unsigned maximum on byte in memory [Armv8.1]
   1300  void ldumaxb(const Register& rs, const Register& rt, const MemOperand& src);
   1301 
   1302  // Atomic unsigned maximum on byte in memory, with Load-acquire semantics
   1303  // [Armv8.1]
   1304  void ldumaxab(const Register& rs, const Register& rt, const MemOperand& src);
   1305 
   1306  // Atomic unsigned maximum on byte in memory, with Store-release semantics
   1307  // [Armv8.1]
   1308  void ldumaxlb(const Register& rs, const Register& rt, const MemOperand& src);
   1309 
   1310  // Atomic unsigned maximum on byte in memory, with Load-acquire and
   1311  // Store-release semantics [Armv8.1]
   1312  void ldumaxalb(const Register& rs, const Register& rt, const MemOperand& src);
   1313 
   1314  // Atomic unsigned maximum on halfword in memory [Armv8.1]
   1315  void ldumaxh(const Register& rs, const Register& rt, const MemOperand& src);
   1316 
   1317  // Atomic unsigned maximum on halfword in memory, with Load-acquire semantics
   1318  // [Armv8.1]
   1319  void ldumaxah(const Register& rs, const Register& rt, const MemOperand& src);
   1320 
   1321  // Atomic unsigned maximum on halfword in memory, with Store-release semantics
   1322  // [Armv8.1]
   1323  void ldumaxlh(const Register& rs, const Register& rt, const MemOperand& src);
   1324 
   1325  // Atomic unsigned maximum on halfword in memory, with Load-acquire and
   1326  // Store-release semantics [Armv8.1]
   1327  void ldumaxalh(const Register& rs, const Register& rt, const MemOperand& src);
   1328 
   1329  // Atomic unsigned maximum on word or doubleword in memory [Armv8.1]
   1330  void ldumax(const Register& rs, const Register& rt, const MemOperand& src);
   1331 
   1332  // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
   1333  // semantics [Armv8.1]
   1334  void ldumaxa(const Register& rs, const Register& rt, const MemOperand& src);
   1335 
   1336  // Atomic unsigned maximum on word or doubleword in memory, with Store-release
   1337  // semantics [Armv8.1]
   1338  void ldumaxl(const Register& rs, const Register& rt, const MemOperand& src);
   1339 
   1340  // Atomic unsigned maximum on word or doubleword in memory, with Load-acquire
   1341  // and Store-release semantics [Armv8.1]
   1342  void ldumaxal(const Register& rs, const Register& rt, const MemOperand& src);
   1343 
   1344  // Atomic unsigned minimum on byte in memory [Armv8.1]
   1345  void lduminb(const Register& rs, const Register& rt, const MemOperand& src);
   1346 
   1347  // Atomic unsigned minimum on byte in memory, with Load-acquire semantics
   1348  // [Armv8.1]
   1349  void lduminab(const Register& rs, const Register& rt, const MemOperand& src);
   1350 
   1351  // Atomic unsigned minimum on byte in memory, with Store-release semantics
   1352  // [Armv8.1]
   1353  void lduminlb(const Register& rs, const Register& rt, const MemOperand& src);
   1354 
   1355  // Atomic unsigned minimum on byte in memory, with Load-acquire and
   1356  // Store-release semantics [Armv8.1]
   1357  void lduminalb(const Register& rs, const Register& rt, const MemOperand& src);
   1358 
   1359  // Atomic unsigned minimum on halfword in memory [Armv8.1]
   1360  void lduminh(const Register& rs, const Register& rt, const MemOperand& src);
   1361 
   1362  // Atomic unsigned minimum on halfword in memory, with Load-acquire semantics
   1363  // [Armv8.1]
   1364  void lduminah(const Register& rs, const Register& rt, const MemOperand& src);
   1365 
   1366  // Atomic unsigned minimum on halfword in memory, with Store-release semantics
   1367  // [Armv8.1]
   1368  void lduminlh(const Register& rs, const Register& rt, const MemOperand& src);
   1369 
   1370  // Atomic unsigned minimum on halfword in memory, with Load-acquire and
   1371  // Store-release semantics [Armv8.1]
   1372  void lduminalh(const Register& rs, const Register& rt, const MemOperand& src);
   1373 
   1374  // Atomic unsigned minimum on word or doubleword in memory [Armv8.1]
   1375  void ldumin(const Register& rs, const Register& rt, const MemOperand& src);
   1376 
   1377  // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
   1378  // semantics [Armv8.1]
   1379  void ldumina(const Register& rs, const Register& rt, const MemOperand& src);
   1380 
   1381  // Atomic unsigned minimum on word or doubleword in memory, with Store-release
   1382  // semantics [Armv8.1]
   1383  void lduminl(const Register& rs, const Register& rt, const MemOperand& src);
   1384 
   1385  // Atomic unsigned minimum on word or doubleword in memory, with Load-acquire
   1386  // and Store-release semantics [Armv8.1]
   1387  void lduminal(const Register& rs, const Register& rt, const MemOperand& src);
   1388 
   1389  // Atomic add on byte in memory, without return. [Armv8.1]
   1390  void staddb(const Register& rs, const MemOperand& src);
   1391 
   1392  // Atomic add on byte in memory, with Store-release semantics and without
   1393  // return. [Armv8.1]
   1394  void staddlb(const Register& rs, const MemOperand& src);
   1395 
   1396  // Atomic add on halfword in memory, without return. [Armv8.1]
   1397  void staddh(const Register& rs, const MemOperand& src);
   1398 
   1399  // Atomic add on halfword in memory, with Store-release semantics and without
   1400  // return. [Armv8.1]
   1401  void staddlh(const Register& rs, const MemOperand& src);
   1402 
   1403  // Atomic add on word or doubleword in memory, without return. [Armv8.1]
   1404  void stadd(const Register& rs, const MemOperand& src);
   1405 
   1406  // Atomic add on word or doubleword in memory, with Store-release semantics
   1407  // and without return. [Armv8.1]
   1408  void staddl(const Register& rs, const MemOperand& src);
   1409 
   1410  // Atomic bit clear on byte in memory, without return. [Armv8.1]
   1411  void stclrb(const Register& rs, const MemOperand& src);
   1412 
   1413  // Atomic bit clear on byte in memory, with Store-release semantics and
   1414  // without return. [Armv8.1]
   1415  void stclrlb(const Register& rs, const MemOperand& src);
   1416 
   1417  // Atomic bit clear on halfword in memory, without return. [Armv8.1]
   1418  void stclrh(const Register& rs, const MemOperand& src);
   1419 
   1420  // Atomic bit clear on halfword in memory, with Store-release semantics and
   1421  // without return. [Armv8.1]
   1422  void stclrlh(const Register& rs, const MemOperand& src);
   1423 
   1424  // Atomic bit clear on word or doubleword in memory, without return. [Armv8.1]
   1425  void stclr(const Register& rs, const MemOperand& src);
   1426 
   1427  // Atomic bit clear on word or doubleword in memory, with Store-release
   1428  // semantics and without return. [Armv8.1]
   1429  void stclrl(const Register& rs, const MemOperand& src);
   1430 
   1431  // Atomic exclusive OR on byte in memory, without return. [Armv8.1]
   1432  void steorb(const Register& rs, const MemOperand& src);
   1433 
   1434  // Atomic exclusive OR on byte in memory, with Store-release semantics and
   1435  // without return. [Armv8.1]
   1436  void steorlb(const Register& rs, const MemOperand& src);
   1437 
   1438  // Atomic exclusive OR on halfword in memory, without return. [Armv8.1]
   1439  void steorh(const Register& rs, const MemOperand& src);
   1440 
   1441  // Atomic exclusive OR on halfword in memory, with Store-release semantics
   1442  // and without return. [Armv8.1]
   1443  void steorlh(const Register& rs, const MemOperand& src);
   1444 
   1445  // Atomic exclusive OR on word or doubleword in memory, without return.
   1446  // [Armv8.1]
   1447  void steor(const Register& rs, const MemOperand& src);
   1448 
   1449  // Atomic exclusive OR on word or doubleword in memory, with Store-release
   1450  // semantics and without return. [Armv8.1]
   1451  void steorl(const Register& rs, const MemOperand& src);
   1452 
   1453  // Atomic bit set on byte in memory, without return. [Armv8.1]
   1454  void stsetb(const Register& rs, const MemOperand& src);
   1455 
   1456  // Atomic bit set on byte in memory, with Store-release semantics and without
   1457  // return. [Armv8.1]
   1458  void stsetlb(const Register& rs, const MemOperand& src);
   1459 
   1460  // Atomic bit set on halfword in memory, without return. [Armv8.1]
   1461  void stseth(const Register& rs, const MemOperand& src);
   1462 
   1463  // Atomic bit set on halfword in memory, with Store-release semantics and
   1464  // without return. [Armv8.1]
   1465  void stsetlh(const Register& rs, const MemOperand& src);
   1466 
   1467  // Atomic bit set on word or doubleword in memory, without return. [Armv8.1]
   1468  void stset(const Register& rs, const MemOperand& src);
   1469 
   1470  // Atomic bit set on word or doubleword in memory, with Store-release
   1471  // semantics and without return. [Armv8.1]
   1472  void stsetl(const Register& rs, const MemOperand& src);
   1473 
   1474  // Atomic signed maximum on byte in memory, without return. [Armv8.1]
   1475  void stsmaxb(const Register& rs, const MemOperand& src);
   1476 
   1477  // Atomic signed maximum on byte in memory, with Store-release semantics and
   1478  // without return. [Armv8.1]
   1479  void stsmaxlb(const Register& rs, const MemOperand& src);
   1480 
   1481  // Atomic signed maximum on halfword in memory, without return. [Armv8.1]
   1482  void stsmaxh(const Register& rs, const MemOperand& src);
   1483 
   1484  // Atomic signed maximum on halfword in memory, with Store-release semantics
   1485  // and without return. [Armv8.1]
   1486  void stsmaxlh(const Register& rs, const MemOperand& src);
   1487 
   1488  // Atomic signed maximum on word or doubleword in memory, without return.
   1489  // [Armv8.1]
   1490  void stsmax(const Register& rs, const MemOperand& src);
   1491 
   1492  // Atomic signed maximum on word or doubleword in memory, with Store-release
   1493  // semantics and without return. [Armv8.1]
   1494  void stsmaxl(const Register& rs, const MemOperand& src);
   1495 
   1496  // Atomic signed minimum on byte in memory, without return. [Armv8.1]
   1497  void stsminb(const Register& rs, const MemOperand& src);
   1498 
   1499  // Atomic signed minimum on byte in memory, with Store-release semantics and
   1500  // without return. [Armv8.1]
   1501  void stsminlb(const Register& rs, const MemOperand& src);
   1502 
   1503  // Atomic signed minimum on halfword in memory, without return. [Armv8.1]
   1504  void stsminh(const Register& rs, const MemOperand& src);
   1505 
   1506  // Atomic signed minimum on halfword in memory, with Store-release semantics
   1507  // and without return. [Armv8.1]
   1508  void stsminlh(const Register& rs, const MemOperand& src);
   1509 
   1510  // Atomic signed minimum on word or doubleword in memory, without return.
   1511  // [Armv8.1]
   1512  void stsmin(const Register& rs, const MemOperand& src);
   1513 
   1514  // Atomic signed minimum on word or doubleword in memory, with Store-release
   1515  // semantics and without return. semantics [Armv8.1]
   1516  void stsminl(const Register& rs, const MemOperand& src);
   1517 
   1518  // Atomic unsigned maximum on byte in memory, without return. [Armv8.1]
   1519  void stumaxb(const Register& rs, const MemOperand& src);
   1520 
   1521  // Atomic unsigned maximum on byte in memory, with Store-release semantics and
   1522  // without return. [Armv8.1]
   1523  void stumaxlb(const Register& rs, const MemOperand& src);
   1524 
   1525  // Atomic unsigned maximum on halfword in memory, without return. [Armv8.1]
   1526  void stumaxh(const Register& rs, const MemOperand& src);
   1527 
   1528  // Atomic unsigned maximum on halfword in memory, with Store-release semantics
   1529  // and without return. [Armv8.1]
   1530  void stumaxlh(const Register& rs, const MemOperand& src);
   1531 
   1532  // Atomic unsigned maximum on word or doubleword in memory, without return.
   1533  // [Armv8.1]
   1534  void stumax(const Register& rs, const MemOperand& src);
   1535 
   1536  // Atomic unsigned maximum on word or doubleword in memory, with Store-release
   1537  // semantics and without return. [Armv8.1]
   1538  void stumaxl(const Register& rs, const MemOperand& src);
   1539 
   1540  // Atomic unsigned minimum on byte in memory, without return. [Armv8.1]
   1541  void stuminb(const Register& rs, const MemOperand& src);
   1542 
   1543  // Atomic unsigned minimum on byte in memory, with Store-release semantics and
   1544  // without return. [Armv8.1]
   1545  void stuminlb(const Register& rs, const MemOperand& src);
   1546 
   1547  // Atomic unsigned minimum on halfword in memory, without return. [Armv8.1]
   1548  void stuminh(const Register& rs, const MemOperand& src);
   1549 
   1550  // Atomic unsigned minimum on halfword in memory, with Store-release semantics
   1551  // and without return. [Armv8.1]
   1552  void stuminlh(const Register& rs, const MemOperand& src);
   1553 
   1554  // Atomic unsigned minimum on word or doubleword in memory, without return.
   1555  // [Armv8.1]
   1556  void stumin(const Register& rs, const MemOperand& src);
   1557 
   1558  // Atomic unsigned minimum on word or doubleword in memory, with Store-release
   1559  // semantics and without return. [Armv8.1]
   1560  void stuminl(const Register& rs, const MemOperand& src);
   1561 
   1562  // Swap byte in memory [Armv8.1]
   1563  void swpb(const Register& rs, const Register& rt, const MemOperand& src);
   1564 
   1565  // Swap byte in memory, with Load-acquire semantics [Armv8.1]
   1566  void swpab(const Register& rs, const Register& rt, const MemOperand& src);
   1567 
   1568  // Swap byte in memory, with Store-release semantics [Armv8.1]
   1569  void swplb(const Register& rs, const Register& rt, const MemOperand& src);
   1570 
   1571  // Swap byte in memory, with Load-acquire and Store-release semantics
   1572  // [Armv8.1]
   1573  void swpalb(const Register& rs, const Register& rt, const MemOperand& src);
   1574 
   1575  // Swap halfword in memory [Armv8.1]
   1576  void swph(const Register& rs, const Register& rt, const MemOperand& src);
   1577 
   1578  // Swap halfword in memory, with Load-acquire semantics [Armv8.1]
   1579  void swpah(const Register& rs, const Register& rt, const MemOperand& src);
   1580 
   1581  // Swap halfword in memory, with Store-release semantics [Armv8.1]
   1582  void swplh(const Register& rs, const Register& rt, const MemOperand& src);
   1583 
   1584  // Swap halfword in memory, with Load-acquire and Store-release semantics
   1585  // [Armv8.1]
   1586  void swpalh(const Register& rs, const Register& rt, const MemOperand& src);
   1587 
   1588  // Swap word or doubleword in memory [Armv8.1]
   1589  void swp(const Register& rs, const Register& rt, const MemOperand& src);
   1590 
   1591  // Swap word or doubleword in memory, with Load-acquire semantics [Armv8.1]
   1592  void swpa(const Register& rs, const Register& rt, const MemOperand& src);
   1593 
   1594  // Swap word or doubleword in memory, with Store-release semantics [Armv8.1]
   1595  void swpl(const Register& rs, const Register& rt, const MemOperand& src);
   1596 
   1597  // Swap word or doubleword in memory, with Load-acquire and Store-release
   1598  // semantics [Armv8.1]
   1599  void swpal(const Register& rs, const Register& rt, const MemOperand& src);
   1600 
   1601  // Prefetch memory.
   1602  void prfm(PrefetchOperation op, const MemOperand& addr,
   1603            LoadStoreScalingOption option = PreferScaledOffset);
   1604 
   1605  // Prefetch memory (with unscaled offset).
   1606  void prfum(PrefetchOperation op, const MemOperand& addr,
   1607             LoadStoreScalingOption option = PreferUnscaledOffset);
   1608 
   1609  // Prefetch from pc + imm19 << 2.
   1610  void prfm(PrefetchOperation op, int imm19);
   1611 
   1612  // Move instructions. The default shift of -1 indicates that the move
   1613  // instruction will calculate an appropriate 16-bit immediate and left shift
   1614  // that is equal to the 64-bit immediate argument. If an explicit left shift
   1615  // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
   1616  //
   1617  // For movk, an explicit shift can be used to indicate which half word should
   1618  // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
   1619  // half word with zero, whereas movk(x0, 0, 48) will overwrite the
   1620  // most-significant.
   1621 
   1622  // Move immediate and keep.
   1623  void movk(const Register& rd, uint64_t imm, int shift = -1) {
   1624    MoveWide(rd, imm, shift, MOVK);
   1625  }
   1626 
   1627  // Move inverted immediate.
   1628  void movn(const Register& rd, uint64_t imm, int shift = -1) {
   1629    MoveWide(rd, imm, shift, MOVN);
   1630  }
   1631 
   1632  // Move immediate.
   1633  void movz(const Register& rd, uint64_t imm, int shift = -1) {
   1634    MoveWide(rd, imm, shift, MOVZ);
   1635  }
   1636 
   1637  // Misc instructions.
   1638  // Monitor debug-mode breakpoint.
   1639  void brk(int code);
   1640 
   1641  // Halting debug-mode breakpoint.
   1642  void hlt(int code);
   1643 
   1644  // Generate exception targeting EL1.
   1645  void svc(int code);
   1646  static void svc(Instruction* at, int code);
   1647 
   1648  // Move register to register.
   1649  void mov(const Register& rd, const Register& rn);
   1650 
   1651  // Move inverted operand to register.
   1652  void mvn(const Register& rd, const Operand& operand);
   1653 
   1654  // System instructions.
   1655  // Move to register from system register.
   1656  void mrs(const Register& rt, SystemRegister sysreg);
   1657 
   1658  // Move from register to system register.
   1659  void msr(SystemRegister sysreg, const Register& rt);
   1660 
   1661  // System instruction.
   1662  void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
   1663 
   1664  // System instruction with pre-encoded op (op1:crn:crm:op2).
   1665  void sys(int op, const Register& rt = xzr);
   1666 
   1667  // System data cache operation.
   1668  void dc(DataCacheOp op, const Register& rt);
   1669 
   1670  // System instruction cache operation.
   1671  void ic(InstructionCacheOp op, const Register& rt);
   1672 
   1673  // System hint.
   1674  BufferOffset hint(SystemHint code);
   1675  static void hint(Instruction* at, SystemHint code);
   1676 
   1677  // Clear exclusive monitor.
   1678  void clrex(int imm4 = 0xf);
   1679 
   1680  // Data memory barrier.
   1681  void dmb(BarrierDomain domain, BarrierType type);
   1682 
   1683  // Data synchronization barrier.
   1684  void dsb(BarrierDomain domain, BarrierType type);
   1685 
   1686  // Instruction synchronization barrier.
   1687  void isb();
   1688 
   1689  // Alias for system instructions.
   1690  // No-op.
   1691  BufferOffset nop() {
   1692    return hint(NOP);
   1693  }
   1694  static void nop(Instruction* at);
   1695 
   1696  // Alias for system instructions.
   1697  // Conditional speculation barrier.
   1698  BufferOffset csdb() {
   1699    return hint(CSDB);
   1700  }
   1701  static void csdb(Instruction* at);
   1702 
   1703  // FP and NEON instructions.
   1704  // Move double precision immediate to FP register.
   1705  void fmov(const VRegister& vd, double imm);
   1706 
   1707  // Move single precision immediate to FP register.
   1708  void fmov(const VRegister& vd, float imm);
   1709 
   1710  // Move FP register to register.
   1711  void fmov(const Register& rd, const VRegister& fn);
   1712 
   1713  // Move register to FP register.
   1714  void fmov(const VRegister& vd, const Register& rn);
   1715 
   1716  // Move FP register to FP register.
   1717  void fmov(const VRegister& vd, const VRegister& fn);
   1718 
   1719  // Move 64-bit register to top half of 128-bit FP register.
   1720  void fmov(const VRegister& vd, int index, const Register& rn);
   1721 
   1722  // Move top half of 128-bit FP register to 64-bit register.
   1723  void fmov(const Register& rd, const VRegister& vn, int index);
   1724 
   1725  // FP add.
   1726  void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1727 
   1728  // FP subtract.
   1729  void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1730 
   1731  // FP multiply.
   1732  void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
   1733 
   1734  // FP fused multiply-add.
   1735  void fmadd(const VRegister& vd,
   1736             const VRegister& vn,
   1737             const VRegister& vm,
   1738             const VRegister& va);
   1739 
   1740  // FP fused multiply-subtract.
   1741  void fmsub(const VRegister& vd,
   1742             const VRegister& vn,
   1743             const VRegister& vm,
   1744             const VRegister& va);
   1745 
   1746  // FP fused multiply-add and negate.
   1747  void fnmadd(const VRegister& vd,
   1748              const VRegister& vn,
   1749              const VRegister& vm,
   1750              const VRegister& va);
   1751 
   1752  // FP fused multiply-subtract and negate.
   1753  void fnmsub(const VRegister& vd,
   1754              const VRegister& vn,
   1755              const VRegister& vm,
   1756              const VRegister& va);
   1757 
   1758  // FP multiply-negate scalar.
   1759  void fnmul(const VRegister& vd,
   1760             const VRegister& vn,
   1761             const VRegister& vm);
   1762 
   1763  // FP reciprocal exponent scalar.
   1764  void frecpx(const VRegister& vd,
   1765              const VRegister& vn);
   1766 
   1767  // FP divide.
   1768  void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1769 
   1770  // FP maximum.
   1771  void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1772 
   1773  // FP minimum.
   1774  void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1775 
   1776  // FP maximum number.
   1777  void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1778 
   1779  // FP minimum number.
   1780  void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
   1781 
   1782  // FP absolute.
   1783  void fabs(const VRegister& vd, const VRegister& vn);
   1784 
   1785  // FP negate.
   1786  void fneg(const VRegister& vd, const VRegister& vn);
   1787 
   1788  // FP square root.
   1789  void fsqrt(const VRegister& vd, const VRegister& vn);
   1790 
   1791  // FP round to integer, nearest with ties to away.
   1792  void frinta(const VRegister& vd, const VRegister& vn);
   1793 
   1794  // FP round to integer, implicit rounding.
   1795  void frinti(const VRegister& vd, const VRegister& vn);
   1796 
   1797  // FP round to integer, toward minus infinity.
   1798  void frintm(const VRegister& vd, const VRegister& vn);
   1799 
   1800  // FP round to integer, nearest with ties to even.
   1801  void frintn(const VRegister& vd, const VRegister& vn);
   1802 
   1803  // FP round to integer, toward plus infinity.
   1804  void frintp(const VRegister& vd, const VRegister& vn);
   1805 
   1806  // FP round to integer, exact, implicit rounding.
   1807  void frintx(const VRegister& vd, const VRegister& vn);
   1808 
   1809  // FP round to integer, towards zero.
   1810  void frintz(const VRegister& vd, const VRegister& vn);
   1811 
   1812  void FPCompareMacro(const VRegister& vn,
   1813                      double value,
   1814                      FPTrapFlags trap);
   1815 
   1816  void FPCompareMacro(const VRegister& vn,
   1817                      const VRegister& vm,
   1818                      FPTrapFlags trap);
   1819 
   1820  // FP compare registers.
   1821  void fcmp(const VRegister& vn, const VRegister& vm);
   1822 
   1823  // FP compare immediate.
   1824  void fcmp(const VRegister& vn, double value);
   1825 
   1826  void FPCCompareMacro(const VRegister& vn,
   1827                       const VRegister& vm,
   1828                       StatusFlags nzcv,
   1829                       Condition cond,
   1830                       FPTrapFlags trap);
   1831 
   1832  // FP conditional compare.
   1833  void fccmp(const VRegister& vn,
   1834             const VRegister& vm,
   1835             StatusFlags nzcv,
   1836             Condition cond);
   1837 
   1838  // FP signaling compare registers.
   1839  void fcmpe(const VRegister& vn, const VRegister& vm);
   1840 
   1841  // FP signaling compare immediate.
   1842  void fcmpe(const VRegister& vn, double value);
   1843 
   1844  // FP conditional signaling compare.
   1845  void fccmpe(const VRegister& vn,
   1846              const VRegister& vm,
   1847              StatusFlags nzcv,
   1848              Condition cond);
   1849 
   1850  // FP conditional select.
   1851  void fcsel(const VRegister& vd,
   1852             const VRegister& vn,
   1853             const VRegister& vm,
   1854             Condition cond);
   1855 
   1856  // Common FP Convert functions.
   1857  void NEONFPConvertToInt(const Register& rd,
   1858                          const VRegister& vn,
   1859                          Instr op);
   1860  void NEONFPConvertToInt(const VRegister& vd,
   1861                          const VRegister& vn,
   1862                          Instr op);
   1863 
   1864  // FP convert between precisions.
   1865  void fcvt(const VRegister& vd, const VRegister& vn);
   1866 
   1867  // FP convert to higher precision.
   1868  void fcvtl(const VRegister& vd, const VRegister& vn);
   1869 
   1870  // FP convert to higher precision (second part).
   1871  void fcvtl2(const VRegister& vd, const VRegister& vn);
   1872 
   1873  // FP convert to lower precision.
   1874  void fcvtn(const VRegister& vd, const VRegister& vn);
   1875 
   1876  // FP convert to lower prevision (second part).
   1877  void fcvtn2(const VRegister& vd, const VRegister& vn);
   1878 
   1879  // FP convert to lower precision, rounding to odd.
   1880  void fcvtxn(const VRegister& vd, const VRegister& vn);
   1881 
   1882  // FP convert to lower precision, rounding to odd (second part).
   1883  void fcvtxn2(const VRegister& vd, const VRegister& vn);
   1884 
   1885  // FP convert to signed integer, nearest with ties to away.
   1886  void fcvtas(const Register& rd, const VRegister& vn);
   1887 
   1888  // FP convert to unsigned integer, nearest with ties to away.
   1889  void fcvtau(const Register& rd, const VRegister& vn);
   1890 
   1891  // FP convert to signed integer, nearest with ties to away.
   1892  void fcvtas(const VRegister& vd, const VRegister& vn);
   1893 
   1894  // FP convert to unsigned integer, nearest with ties to away.
   1895  void fcvtau(const VRegister& vd, const VRegister& vn);
   1896 
   1897  // FP convert to signed integer, round towards -infinity.
   1898  void fcvtms(const Register& rd, const VRegister& vn);
   1899 
   1900  // FP convert to unsigned integer, round towards -infinity.
   1901  void fcvtmu(const Register& rd, const VRegister& vn);
   1902 
   1903  // FP convert to signed integer, round towards -infinity.
   1904  void fcvtms(const VRegister& vd, const VRegister& vn);
   1905 
   1906  // FP convert to unsigned integer, round towards -infinity.
   1907  void fcvtmu(const VRegister& vd, const VRegister& vn);
   1908 
   1909  // FP convert to signed integer, nearest with ties to even.
   1910  void fcvtns(const Register& rd, const VRegister& vn);
   1911 
   1912  // FP convert to unsigned integer, nearest with ties to even.
   1913  void fcvtnu(const Register& rd, const VRegister& vn);
   1914 
   1915  // FP convert to signed integer, nearest with ties to even.
   1916  void fcvtns(const VRegister& rd, const VRegister& vn);
   1917 
   1918  // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
   1919  void fjcvtzs(const Register& rd, const VRegister& vn);
   1920 
   1921  // FP convert to unsigned integer, nearest with ties to even.
   1922  void fcvtnu(const VRegister& rd, const VRegister& vn);
   1923 
   1924  // FP convert to signed integer or fixed-point, round towards zero.
   1925  void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
   1926 
   1927  // FP convert to unsigned integer or fixed-point, round towards zero.
   1928  void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
   1929 
   1930  // FP convert to signed integer or fixed-point, round towards zero.
   1931  void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1932 
   1933  // FP convert to unsigned integer or fixed-point, round towards zero.
   1934  void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
   1935 
   1936  // FP convert to signed integer, round towards +infinity.
   1937  void fcvtps(const Register& rd, const VRegister& vn);
   1938 
   1939  // FP convert to unsigned integer, round towards +infinity.
   1940  void fcvtpu(const Register& rd, const VRegister& vn);
   1941 
   1942  // FP convert to signed integer, round towards +infinity.
   1943  void fcvtps(const VRegister& vd, const VRegister& vn);
   1944 
   1945  // FP convert to unsigned integer, round towards +infinity.
   1946  void fcvtpu(const VRegister& vd, const VRegister& vn);
   1947 
   1948  // Convert signed integer or fixed point to FP.
   1949  void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1950 
   1951  // Convert unsigned integer or fixed point to FP.
   1952  void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
   1953 
   1954  // Convert signed integer or fixed-point to FP.
   1955  void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1956 
   1957  // Convert unsigned integer or fixed-point to FP.
   1958  void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
   1959 
   1960  // Unsigned absolute difference.
   1961  void uabd(const VRegister& vd,
   1962            const VRegister& vn,
   1963            const VRegister& vm);
   1964 
   1965  // Signed absolute difference.
   1966  void sabd(const VRegister& vd,
   1967            const VRegister& vn,
   1968            const VRegister& vm);
   1969 
   1970  // Unsigned absolute difference and accumulate.
   1971  void uaba(const VRegister& vd,
   1972            const VRegister& vn,
   1973            const VRegister& vm);
   1974 
   1975  // Signed absolute difference and accumulate.
   1976  void saba(const VRegister& vd,
   1977            const VRegister& vn,
   1978            const VRegister& vm);
   1979 
   1980  // Add.
   1981  void add(const VRegister& vd,
   1982           const VRegister& vn,
   1983           const VRegister& vm);
   1984 
   1985  // Subtract.
   1986  void sub(const VRegister& vd,
   1987           const VRegister& vn,
   1988           const VRegister& vm);
   1989 
   1990  // Unsigned halving add.
   1991  void uhadd(const VRegister& vd,
   1992             const VRegister& vn,
   1993             const VRegister& vm);
   1994 
   1995  // Signed halving add.
   1996  void shadd(const VRegister& vd,
   1997             const VRegister& vn,
   1998             const VRegister& vm);
   1999 
   2000  // Unsigned rounding halving add.
   2001  void urhadd(const VRegister& vd,
   2002              const VRegister& vn,
   2003              const VRegister& vm);
   2004 
   2005  // Signed rounding halving add.
   2006  void srhadd(const VRegister& vd,
   2007              const VRegister& vn,
   2008              const VRegister& vm);
   2009 
   2010  // Unsigned halving sub.
   2011  void uhsub(const VRegister& vd,
   2012             const VRegister& vn,
   2013             const VRegister& vm);
   2014 
   2015  // Signed halving sub.
   2016  void shsub(const VRegister& vd,
   2017             const VRegister& vn,
   2018             const VRegister& vm);
   2019 
   2020  // Unsigned saturating add.
   2021  void uqadd(const VRegister& vd,
   2022             const VRegister& vn,
   2023             const VRegister& vm);
   2024 
   2025  // Signed saturating add.
   2026  void sqadd(const VRegister& vd,
   2027             const VRegister& vn,
   2028             const VRegister& vm);
   2029 
   2030  // Unsigned saturating subtract.
   2031  void uqsub(const VRegister& vd,
   2032             const VRegister& vn,
   2033             const VRegister& vm);
   2034 
   2035  // Signed saturating subtract.
   2036  void sqsub(const VRegister& vd,
   2037             const VRegister& vn,
   2038             const VRegister& vm);
   2039 
   2040  // Add pairwise.
   2041  void addp(const VRegister& vd,
   2042            const VRegister& vn,
   2043            const VRegister& vm);
   2044 
   2045  // Add pair of elements scalar.
   2046  void addp(const VRegister& vd,
   2047            const VRegister& vn);
   2048 
   2049  // Multiply-add to accumulator.
   2050  void mla(const VRegister& vd,
   2051           const VRegister& vn,
   2052           const VRegister& vm);
   2053 
   2054  // Multiply-subtract to accumulator.
   2055  void mls(const VRegister& vd,
   2056           const VRegister& vn,
   2057           const VRegister& vm);
   2058 
   2059  // Multiply.
   2060  void mul(const VRegister& vd,
   2061           const VRegister& vn,
   2062           const VRegister& vm);
   2063 
   2064  // Multiply by scalar element.
   2065  void mul(const VRegister& vd,
   2066           const VRegister& vn,
   2067           const VRegister& vm,
   2068           int vm_index);
   2069 
   2070  // Multiply-add by scalar element.
   2071  void mla(const VRegister& vd,
   2072           const VRegister& vn,
   2073           const VRegister& vm,
   2074           int vm_index);
   2075 
   2076  // Multiply-subtract by scalar element.
   2077  void mls(const VRegister& vd,
   2078           const VRegister& vn,
   2079           const VRegister& vm,
   2080           int vm_index);
   2081 
   2082  // Signed long multiply-add by scalar element.
   2083  void smlal(const VRegister& vd,
   2084             const VRegister& vn,
   2085             const VRegister& vm,
   2086             int vm_index);
   2087 
   2088  // Signed long multiply-add by scalar element (second part).
   2089  void smlal2(const VRegister& vd,
   2090              const VRegister& vn,
   2091              const VRegister& vm,
   2092              int vm_index);
   2093 
   2094  // Unsigned long multiply-add by scalar element.
   2095  void umlal(const VRegister& vd,
   2096             const VRegister& vn,
   2097             const VRegister& vm,
   2098             int vm_index);
   2099 
   2100  // Unsigned long multiply-add by scalar element (second part).
   2101  void umlal2(const VRegister& vd,
   2102              const VRegister& vn,
   2103              const VRegister& vm,
   2104              int vm_index);
   2105 
   2106  // Signed long multiply-sub by scalar element.
   2107  void smlsl(const VRegister& vd,
   2108             const VRegister& vn,
   2109             const VRegister& vm,
   2110             int vm_index);
   2111 
   2112  // Signed long multiply-sub by scalar element (second part).
   2113  void smlsl2(const VRegister& vd,
   2114              const VRegister& vn,
   2115              const VRegister& vm,
   2116              int vm_index);
   2117 
   2118  // Unsigned long multiply-sub by scalar element.
   2119  void umlsl(const VRegister& vd,
   2120             const VRegister& vn,
   2121             const VRegister& vm,
   2122             int vm_index);
   2123 
   2124  // Unsigned long multiply-sub by scalar element (second part).
   2125  void umlsl2(const VRegister& vd,
   2126              const VRegister& vn,
   2127              const VRegister& vm,
   2128              int vm_index);
   2129 
   2130  // Signed long multiply by scalar element.
   2131  void smull(const VRegister& vd,
   2132             const VRegister& vn,
   2133             const VRegister& vm,
   2134             int vm_index);
   2135 
   2136  // Signed long multiply by scalar element (second part).
   2137  void smull2(const VRegister& vd,
   2138              const VRegister& vn,
   2139              const VRegister& vm,
   2140              int vm_index);
   2141 
   2142  // Unsigned long multiply by scalar element.
   2143  void umull(const VRegister& vd,
   2144             const VRegister& vn,
   2145             const VRegister& vm,
   2146             int vm_index);
   2147 
   2148  // Unsigned long multiply by scalar element (second part).
   2149  void umull2(const VRegister& vd,
   2150              const VRegister& vn,
   2151              const VRegister& vm,
   2152              int vm_index);
   2153 
   2154  // Signed saturating double long multiply by element.
   2155  void sqdmull(const VRegister& vd,
   2156               const VRegister& vn,
   2157               const VRegister& vm,
   2158               int vm_index);
   2159 
   2160  // Signed saturating double long multiply by element (second part).
   2161  void sqdmull2(const VRegister& vd,
   2162                const VRegister& vn,
   2163                const VRegister& vm,
   2164                int vm_index);
   2165 
   2166  // Signed saturating doubling long multiply-add by element.
   2167  void sqdmlal(const VRegister& vd,
   2168               const VRegister& vn,
   2169               const VRegister& vm,
   2170               int vm_index);
   2171 
   2172  // Signed saturating doubling long multiply-add by element (second part).
   2173  void sqdmlal2(const VRegister& vd,
   2174                const VRegister& vn,
   2175                const VRegister& vm,
   2176                int vm_index);
   2177 
   2178  // Signed saturating doubling long multiply-sub by element.
   2179  void sqdmlsl(const VRegister& vd,
   2180               const VRegister& vn,
   2181               const VRegister& vm,
   2182               int vm_index);
   2183 
   2184  // Signed saturating doubling long multiply-sub by element (second part).
   2185  void sqdmlsl2(const VRegister& vd,
   2186                const VRegister& vn,
   2187                const VRegister& vm,
   2188                int vm_index);
   2189 
   2190  // Compare equal.
   2191  void cmeq(const VRegister& vd,
   2192            const VRegister& vn,
   2193            const VRegister& vm);
   2194 
   2195  // Compare signed greater than or equal.
   2196  void cmge(const VRegister& vd,
   2197            const VRegister& vn,
   2198            const VRegister& vm);
   2199 
   2200  // Compare signed greater than.
   2201  void cmgt(const VRegister& vd,
   2202            const VRegister& vn,
   2203            const VRegister& vm);
   2204 
   2205  // Compare unsigned higher.
   2206  void cmhi(const VRegister& vd,
   2207            const VRegister& vn,
   2208            const VRegister& vm);
   2209 
   2210  // Compare unsigned higher or same.
   2211  void cmhs(const VRegister& vd,
   2212            const VRegister& vn,
   2213            const VRegister& vm);
   2214 
   2215  // Compare bitwise test bits nonzero.
   2216  void cmtst(const VRegister& vd,
   2217             const VRegister& vn,
   2218             const VRegister& vm);
   2219 
   2220  // Compare bitwise to zero.
   2221  void cmeq(const VRegister& vd,
   2222            const VRegister& vn,
   2223            int value);
   2224 
   2225  // Compare signed greater than or equal to zero.
   2226  void cmge(const VRegister& vd,
   2227            const VRegister& vn,
   2228            int value);
   2229 
   2230  // Compare signed greater than zero.
   2231  void cmgt(const VRegister& vd,
   2232            const VRegister& vn,
   2233            int value);
   2234 
   2235  // Compare signed less than or equal to zero.
   2236  void cmle(const VRegister& vd,
   2237            const VRegister& vn,
   2238            int value);
   2239 
   2240  // Compare signed less than zero.
   2241  void cmlt(const VRegister& vd,
   2242            const VRegister& vn,
   2243            int value);
   2244 
   2245  // Signed shift left by register.
   2246  void sshl(const VRegister& vd,
   2247            const VRegister& vn,
   2248            const VRegister& vm);
   2249 
   2250  // Unsigned shift left by register.
   2251  void ushl(const VRegister& vd,
   2252            const VRegister& vn,
   2253            const VRegister& vm);
   2254 
   2255  // Signed saturating shift left by register.
   2256  void sqshl(const VRegister& vd,
   2257             const VRegister& vn,
   2258             const VRegister& vm);
   2259 
   2260  // Unsigned saturating shift left by register.
   2261  void uqshl(const VRegister& vd,
   2262             const VRegister& vn,
   2263             const VRegister& vm);
   2264 
   2265  // Signed rounding shift left by register.
   2266  void srshl(const VRegister& vd,
   2267             const VRegister& vn,
   2268             const VRegister& vm);
   2269 
   2270  // Unsigned rounding shift left by register.
   2271  void urshl(const VRegister& vd,
   2272             const VRegister& vn,
   2273             const VRegister& vm);
   2274 
   2275  // Signed saturating rounding shift left by register.
   2276  void sqrshl(const VRegister& vd,
   2277              const VRegister& vn,
   2278              const VRegister& vm);
   2279 
   2280  // Unsigned saturating rounding shift left by register.
   2281  void uqrshl(const VRegister& vd,
   2282              const VRegister& vn,
   2283              const VRegister& vm);
   2284 
   2285  // Bitwise and.
   2286  void and_(const VRegister& vd,
   2287            const VRegister& vn,
   2288            const VRegister& vm);
   2289 
   2290  // Bitwise or.
   2291  void orr(const VRegister& vd,
   2292           const VRegister& vn,
   2293           const VRegister& vm);
   2294 
   2295  // Bitwise or immediate.
   2296  void orr(const VRegister& vd,
   2297           const int imm8,
   2298           const int left_shift = 0);
   2299 
   2300  // Move register to register.
   2301  void mov(const VRegister& vd,
   2302           const VRegister& vn);
   2303 
   2304  // Bitwise orn.
   2305  void orn(const VRegister& vd,
   2306           const VRegister& vn,
   2307           const VRegister& vm);
   2308 
   2309  // Bitwise eor.
   2310  void eor(const VRegister& vd,
   2311           const VRegister& vn,
   2312           const VRegister& vm);
   2313 
   2314  // Bit clear immediate.
   2315  void bic(const VRegister& vd,
   2316           const int imm8,
   2317           const int left_shift = 0);
   2318 
   2319  // Bit clear.
   2320  void bic(const VRegister& vd,
   2321           const VRegister& vn,
   2322           const VRegister& vm);
   2323 
   2324  // Bitwise insert if false.
   2325  void bif(const VRegister& vd,
   2326           const VRegister& vn,
   2327           const VRegister& vm);
   2328 
   2329  // Bitwise insert if true.
   2330  void bit(const VRegister& vd,
   2331           const VRegister& vn,
   2332           const VRegister& vm);
   2333 
   2334  // Bitwise select.
   2335  void bsl(const VRegister& vd,
   2336           const VRegister& vn,
   2337           const VRegister& vm);
   2338 
   2339  // Polynomial multiply.
   2340  void pmul(const VRegister& vd,
   2341            const VRegister& vn,
   2342            const VRegister& vm);
   2343 
   2344  // Vector move immediate.
   2345  void movi(const VRegister& vd,
   2346            const uint64_t imm,
   2347            Shift shift = LSL,
   2348            const int shift_amount = 0);
   2349 
   2350  // Bitwise not.
   2351  void mvn(const VRegister& vd,
   2352           const VRegister& vn);
   2353 
   2354  // Vector move inverted immediate.
   2355  void mvni(const VRegister& vd,
   2356            const int imm8,
   2357            Shift shift = LSL,
   2358            const int shift_amount = 0);
   2359 
   2360  // Signed saturating accumulate of unsigned value.
   2361  void suqadd(const VRegister& vd,
   2362              const VRegister& vn);
   2363 
   2364  // Unsigned saturating accumulate of signed value.
   2365  void usqadd(const VRegister& vd,
   2366              const VRegister& vn);
   2367 
   2368  // Absolute value.
   2369  void abs(const VRegister& vd,
   2370           const VRegister& vn);
   2371 
   2372  // Signed saturating absolute value.
   2373  void sqabs(const VRegister& vd,
   2374             const VRegister& vn);
   2375 
   2376  // Negate.
   2377  void neg(const VRegister& vd,
   2378           const VRegister& vn);
   2379 
   2380  // Signed saturating negate.
   2381  void sqneg(const VRegister& vd,
   2382             const VRegister& vn);
   2383 
   2384  // Bitwise not.
   2385  void not_(const VRegister& vd,
   2386            const VRegister& vn);
   2387 
   2388  // Extract narrow.
   2389  void xtn(const VRegister& vd,
   2390           const VRegister& vn);
   2391 
   2392  // Extract narrow (second part).
   2393  void xtn2(const VRegister& vd,
   2394            const VRegister& vn);
   2395 
   2396  // Signed saturating extract narrow.
   2397  void sqxtn(const VRegister& vd,
   2398             const VRegister& vn);
   2399 
   2400  // Signed saturating extract narrow (second part).
   2401  void sqxtn2(const VRegister& vd,
   2402              const VRegister& vn);
   2403 
   2404  // Unsigned saturating extract narrow.
   2405  void uqxtn(const VRegister& vd,
   2406             const VRegister& vn);
   2407 
   2408  // Unsigned saturating extract narrow (second part).
   2409  void uqxtn2(const VRegister& vd,
   2410              const VRegister& vn);
   2411 
   2412  // Signed saturating extract unsigned narrow.
   2413  void sqxtun(const VRegister& vd,
   2414              const VRegister& vn);
   2415 
   2416  // Signed saturating extract unsigned narrow (second part).
   2417  void sqxtun2(const VRegister& vd,
   2418               const VRegister& vn);
   2419 
   2420  // Extract vector from pair of vectors.
   2421  void ext(const VRegister& vd,
   2422           const VRegister& vn,
   2423           const VRegister& vm,
   2424           int index);
   2425 
   2426  // Duplicate vector element to vector or scalar.
   2427  void dup(const VRegister& vd,
   2428           const VRegister& vn,
   2429           int vn_index);
   2430 
   2431  // Move vector element to scalar.
   2432  void mov(const VRegister& vd,
   2433           const VRegister& vn,
   2434           int vn_index);
   2435 
   2436  // Duplicate general-purpose register to vector.
   2437  void dup(const VRegister& vd,
   2438           const Register& rn);
   2439 
   2440  // Insert vector element from another vector element.
   2441  void ins(const VRegister& vd,
   2442           int vd_index,
   2443           const VRegister& vn,
   2444           int vn_index);
   2445 
   2446  // Move vector element to another vector element.
   2447  void mov(const VRegister& vd,
   2448           int vd_index,
   2449           const VRegister& vn,
   2450           int vn_index);
   2451 
   2452  // Insert vector element from general-purpose register.
   2453  void ins(const VRegister& vd,
   2454           int vd_index,
   2455           const Register& rn);
   2456 
   2457  // Move general-purpose register to a vector element.
   2458  void mov(const VRegister& vd,
   2459           int vd_index,
   2460           const Register& rn);
   2461 
   2462  // Unsigned move vector element to general-purpose register.
   2463  void umov(const Register& rd,
   2464            const VRegister& vn,
   2465            int vn_index);
   2466 
   2467  // Move vector element to general-purpose register.
   2468  void mov(const Register& rd,
   2469           const VRegister& vn,
   2470           int vn_index);
   2471 
   2472  // Signed move vector element to general-purpose register.
   2473  void smov(const Register& rd,
   2474            const VRegister& vn,
   2475            int vn_index);
   2476 
   2477  // One-element structure load to one register.
   2478  void ld1(const VRegister& vt,
   2479           const MemOperand& src);
   2480 
   2481  // One-element structure load to two registers.
   2482  void ld1(const VRegister& vt,
   2483           const VRegister& vt2,
   2484           const MemOperand& src);
   2485 
   2486  // One-element structure load to three registers.
   2487  void ld1(const VRegister& vt,
   2488           const VRegister& vt2,
   2489           const VRegister& vt3,
   2490           const MemOperand& src);
   2491 
   2492  // One-element structure load to four registers.
   2493  void ld1(const VRegister& vt,
   2494           const VRegister& vt2,
   2495           const VRegister& vt3,
   2496           const VRegister& vt4,
   2497           const MemOperand& src);
   2498 
   2499  // One-element single structure load to one lane.
   2500  void ld1(const VRegister& vt,
   2501           int lane,
   2502           const MemOperand& src);
   2503 
   2504  // One-element single structure load to all lanes.
   2505  void ld1r(const VRegister& vt,
   2506            const MemOperand& src);
   2507 
   2508  // Two-element structure load.
   2509  void ld2(const VRegister& vt,
   2510           const VRegister& vt2,
   2511           const MemOperand& src);
   2512 
   2513  // Two-element single structure load to one lane.
   2514  void ld2(const VRegister& vt,
   2515           const VRegister& vt2,
   2516           int lane,
   2517           const MemOperand& src);
   2518 
   2519  // Two-element single structure load to all lanes.
   2520  void ld2r(const VRegister& vt,
   2521            const VRegister& vt2,
   2522            const MemOperand& src);
   2523 
   2524  // Three-element structure load.
   2525  void ld3(const VRegister& vt,
   2526           const VRegister& vt2,
   2527           const VRegister& vt3,
   2528           const MemOperand& src);
   2529 
   2530  // Three-element single structure load to one lane.
   2531  void ld3(const VRegister& vt,
   2532           const VRegister& vt2,
   2533           const VRegister& vt3,
   2534           int lane,
   2535           const MemOperand& src);
   2536 
   2537  // Three-element single structure load to all lanes.
   2538  void ld3r(const VRegister& vt,
   2539            const VRegister& vt2,
   2540            const VRegister& vt3,
   2541            const MemOperand& src);
   2542 
   2543  // Four-element structure load.
   2544  void ld4(const VRegister& vt,
   2545           const VRegister& vt2,
   2546           const VRegister& vt3,
   2547           const VRegister& vt4,
   2548           const MemOperand& src);
   2549 
   2550  // Four-element single structure load to one lane.
   2551  void ld4(const VRegister& vt,
   2552           const VRegister& vt2,
   2553           const VRegister& vt3,
   2554           const VRegister& vt4,
   2555           int lane,
   2556           const MemOperand& src);
   2557 
   2558  // Four-element single structure load to all lanes.
   2559  void ld4r(const VRegister& vt,
   2560            const VRegister& vt2,
   2561            const VRegister& vt3,
   2562            const VRegister& vt4,
   2563            const MemOperand& src);
   2564 
   2565  // Count leading sign bits.
   2566  void cls(const VRegister& vd,
   2567           const VRegister& vn);
   2568 
   2569  // Count leading zero bits (vector).
   2570  void clz(const VRegister& vd,
   2571           const VRegister& vn);
   2572 
   2573  // Population count per byte.
   2574  void cnt(const VRegister& vd,
   2575           const VRegister& vn);
   2576 
   2577  // Reverse bit order.
   2578  void rbit(const VRegister& vd,
   2579            const VRegister& vn);
   2580 
   2581  // Reverse elements in 16-bit halfwords.
   2582  void rev16(const VRegister& vd,
   2583             const VRegister& vn);
   2584 
   2585  // Reverse elements in 32-bit words.
   2586  void rev32(const VRegister& vd,
   2587             const VRegister& vn);
   2588 
   2589  // Reverse elements in 64-bit doublewords.
   2590  void rev64(const VRegister& vd,
   2591             const VRegister& vn);
   2592 
   2593  // Unsigned reciprocal square root estimate.
   2594  void ursqrte(const VRegister& vd,
   2595               const VRegister& vn);
   2596 
   2597  // Unsigned reciprocal estimate.
   2598  void urecpe(const VRegister& vd,
   2599              const VRegister& vn);
   2600 
   2601  // Signed pairwise long add.
   2602  void saddlp(const VRegister& vd,
   2603              const VRegister& vn);
   2604 
   2605  // Unsigned pairwise long add.
   2606  void uaddlp(const VRegister& vd,
   2607              const VRegister& vn);
   2608 
   2609  // Signed pairwise long add and accumulate.
   2610  void sadalp(const VRegister& vd,
   2611              const VRegister& vn);
   2612 
   2613  // Unsigned pairwise long add and accumulate.
   2614  void uadalp(const VRegister& vd,
   2615              const VRegister& vn);
   2616 
   2617  // Shift left by immediate.
   2618  void shl(const VRegister& vd,
   2619           const VRegister& vn,
   2620           int shift);
   2621 
   2622  // Signed saturating shift left by immediate.
   2623  void sqshl(const VRegister& vd,
   2624             const VRegister& vn,
   2625             int shift);
   2626 
   2627  // Signed saturating shift left unsigned by immediate.
   2628  void sqshlu(const VRegister& vd,
   2629              const VRegister& vn,
   2630              int shift);
   2631 
   2632  // Unsigned saturating shift left by immediate.
   2633  void uqshl(const VRegister& vd,
   2634             const VRegister& vn,
   2635             int shift);
   2636 
   2637  // Signed shift left long by immediate.
   2638  void sshll(const VRegister& vd,
   2639             const VRegister& vn,
   2640             int shift);
   2641 
   2642  // Signed shift left long by immediate (second part).
   2643  void sshll2(const VRegister& vd,
   2644              const VRegister& vn,
   2645              int shift);
   2646 
   2647  // Signed extend long.
   2648  void sxtl(const VRegister& vd,
   2649            const VRegister& vn);
   2650 
   2651  // Signed extend long (second part).
   2652  void sxtl2(const VRegister& vd,
   2653             const VRegister& vn);
   2654 
   2655  // Unsigned shift left long by immediate.
   2656  void ushll(const VRegister& vd,
   2657             const VRegister& vn,
   2658             int shift);
   2659 
   2660  // Unsigned shift left long by immediate (second part).
   2661  void ushll2(const VRegister& vd,
   2662              const VRegister& vn,
   2663              int shift);
   2664 
   2665  // Shift left long by element size.
   2666  void shll(const VRegister& vd,
   2667            const VRegister& vn,
   2668            int shift);
   2669 
   2670  // Shift left long by element size (second part).
   2671  void shll2(const VRegister& vd,
   2672             const VRegister& vn,
   2673             int shift);
   2674 
   2675  // Unsigned extend long.
   2676  void uxtl(const VRegister& vd,
   2677            const VRegister& vn);
   2678 
   2679  // Unsigned extend long (second part).
   2680  void uxtl2(const VRegister& vd,
   2681             const VRegister& vn);
   2682 
   2683  // Shift left by immediate and insert.
   2684  void sli(const VRegister& vd,
   2685           const VRegister& vn,
   2686           int shift);
   2687 
   2688  // Shift right by immediate and insert.
   2689  void sri(const VRegister& vd,
   2690           const VRegister& vn,
   2691           int shift);
   2692 
   2693  // Signed maximum.
   2694  void smax(const VRegister& vd,
   2695            const VRegister& vn,
   2696            const VRegister& vm);
   2697 
   2698  // Signed pairwise maximum.
   2699  void smaxp(const VRegister& vd,
   2700             const VRegister& vn,
   2701             const VRegister& vm);
   2702 
   2703  // Add across vector.
   2704  void addv(const VRegister& vd,
   2705            const VRegister& vn);
   2706 
   2707  // Signed add long across vector.
   2708  void saddlv(const VRegister& vd,
   2709              const VRegister& vn);
   2710 
   2711  // Unsigned add long across vector.
   2712  void uaddlv(const VRegister& vd,
   2713              const VRegister& vn);
   2714 
   2715  // FP maximum number across vector.
   2716  void fmaxnmv(const VRegister& vd,
   2717               const VRegister& vn);
   2718 
   2719  // FP maximum across vector.
   2720  void fmaxv(const VRegister& vd,
   2721             const VRegister& vn);
   2722 
   2723  // FP minimum number across vector.
   2724  void fminnmv(const VRegister& vd,
   2725               const VRegister& vn);
   2726 
   2727  // FP minimum across vector.
   2728  void fminv(const VRegister& vd,
   2729             const VRegister& vn);
   2730 
   2731  // Signed maximum across vector.
   2732  void smaxv(const VRegister& vd,
   2733             const VRegister& vn);
   2734 
   2735  // Signed minimum.
   2736  void smin(const VRegister& vd,
   2737            const VRegister& vn,
   2738            const VRegister& vm);
   2739 
   2740  // Signed minimum pairwise.
   2741  void sminp(const VRegister& vd,
   2742             const VRegister& vn,
   2743             const VRegister& vm);
   2744 
   2745  // Signed minimum across vector.
   2746  void sminv(const VRegister& vd,
   2747             const VRegister& vn);
   2748 
   2749  // One-element structure store from one register.
   2750  void st1(const VRegister& vt,
   2751           const MemOperand& src);
   2752 
   2753  // One-element structure store from two registers.
   2754  void st1(const VRegister& vt,
   2755           const VRegister& vt2,
   2756           const MemOperand& src);
   2757 
   2758  // One-element structure store from three registers.
   2759  void st1(const VRegister& vt,
   2760           const VRegister& vt2,
   2761           const VRegister& vt3,
   2762           const MemOperand& src);
   2763 
   2764  // One-element structure store from four registers.
   2765  void st1(const VRegister& vt,
   2766           const VRegister& vt2,
   2767           const VRegister& vt3,
   2768           const VRegister& vt4,
   2769           const MemOperand& src);
   2770 
   2771  // One-element single structure store from one lane.
   2772  void st1(const VRegister& vt,
   2773           int lane,
   2774           const MemOperand& src);
   2775 
   2776  // Two-element structure store from two registers.
   2777  void st2(const VRegister& vt,
   2778           const VRegister& vt2,
   2779           const MemOperand& src);
   2780 
   2781  // Two-element single structure store from two lanes.
   2782  void st2(const VRegister& vt,
   2783           const VRegister& vt2,
   2784           int lane,
   2785           const MemOperand& src);
   2786 
   2787  // Three-element structure store from three registers.
   2788  void st3(const VRegister& vt,
   2789           const VRegister& vt2,
   2790           const VRegister& vt3,
   2791           const MemOperand& src);
   2792 
   2793  // Three-element single structure store from three lanes.
   2794  void st3(const VRegister& vt,
   2795           const VRegister& vt2,
   2796           const VRegister& vt3,
   2797           int lane,
   2798           const MemOperand& src);
   2799 
   2800  // Four-element structure store from four registers.
   2801  void st4(const VRegister& vt,
   2802           const VRegister& vt2,
   2803           const VRegister& vt3,
   2804           const VRegister& vt4,
   2805           const MemOperand& src);
   2806 
   2807  // Four-element single structure store from four lanes.
   2808  void st4(const VRegister& vt,
   2809           const VRegister& vt2,
   2810           const VRegister& vt3,
   2811           const VRegister& vt4,
   2812           int lane,
   2813           const MemOperand& src);
   2814 
   2815  // Unsigned add long.
   2816  void uaddl(const VRegister& vd,
   2817             const VRegister& vn,
   2818             const VRegister& vm);
   2819 
   2820  // Unsigned add long (second part).
   2821  void uaddl2(const VRegister& vd,
   2822              const VRegister& vn,
   2823              const VRegister& vm);
   2824 
   2825  // Unsigned add wide.
   2826  void uaddw(const VRegister& vd,
   2827             const VRegister& vn,
   2828             const VRegister& vm);
   2829 
   2830  // Unsigned add wide (second part).
   2831  void uaddw2(const VRegister& vd,
   2832              const VRegister& vn,
   2833              const VRegister& vm);
   2834 
   2835  // Signed add long.
   2836  void saddl(const VRegister& vd,
   2837             const VRegister& vn,
   2838             const VRegister& vm);
   2839 
   2840  // Signed add long (second part).
   2841  void saddl2(const VRegister& vd,
   2842              const VRegister& vn,
   2843              const VRegister& vm);
   2844 
   2845  // Signed add wide.
   2846  void saddw(const VRegister& vd,
   2847             const VRegister& vn,
   2848             const VRegister& vm);
   2849 
   2850  // Signed add wide (second part).
   2851  void saddw2(const VRegister& vd,
   2852              const VRegister& vn,
   2853              const VRegister& vm);
   2854 
   2855  // Unsigned subtract long.
   2856  void usubl(const VRegister& vd,
   2857             const VRegister& vn,
   2858             const VRegister& vm);
   2859 
   2860  // Unsigned subtract long (second part).
   2861  void usubl2(const VRegister& vd,
   2862              const VRegister& vn,
   2863              const VRegister& vm);
   2864 
   2865  // Unsigned subtract wide.
   2866  void usubw(const VRegister& vd,
   2867             const VRegister& vn,
   2868             const VRegister& vm);
   2869 
   2870  // Unsigned subtract wide (second part).
   2871  void usubw2(const VRegister& vd,
   2872              const VRegister& vn,
   2873              const VRegister& vm);
   2874 
   2875  // Signed subtract long.
   2876  void ssubl(const VRegister& vd,
   2877             const VRegister& vn,
   2878             const VRegister& vm);
   2879 
   2880  // Signed subtract long (second part).
   2881  void ssubl2(const VRegister& vd,
   2882              const VRegister& vn,
   2883              const VRegister& vm);
   2884 
   2885  // Signed integer subtract wide.
   2886  void ssubw(const VRegister& vd,
   2887             const VRegister& vn,
   2888             const VRegister& vm);
   2889 
   2890  // Signed integer subtract wide (second part).
   2891  void ssubw2(const VRegister& vd,
   2892              const VRegister& vn,
   2893              const VRegister& vm);
   2894 
   2895  // Unsigned maximum.
   2896  void umax(const VRegister& vd,
   2897            const VRegister& vn,
   2898            const VRegister& vm);
   2899 
   2900  // Unsigned pairwise maximum.
   2901  void umaxp(const VRegister& vd,
   2902             const VRegister& vn,
   2903             const VRegister& vm);
   2904 
   2905  // Unsigned maximum across vector.
   2906  void umaxv(const VRegister& vd,
   2907             const VRegister& vn);
   2908 
   2909  // Unsigned minimum.
   2910  void umin(const VRegister& vd,
   2911            const VRegister& vn,
   2912            const VRegister& vm);
   2913 
   2914  // Unsigned pairwise minimum.
   2915  void uminp(const VRegister& vd,
   2916             const VRegister& vn,
   2917             const VRegister& vm);
   2918 
   2919  // Unsigned minimum across vector.
   2920  void uminv(const VRegister& vd,
   2921             const VRegister& vn);
   2922 
   2923  // Transpose vectors (primary).
   2924  void trn1(const VRegister& vd,
   2925            const VRegister& vn,
   2926            const VRegister& vm);
   2927 
   2928  // Transpose vectors (secondary).
   2929  void trn2(const VRegister& vd,
   2930            const VRegister& vn,
   2931            const VRegister& vm);
   2932 
   2933  // Unzip vectors (primary).
   2934  void uzp1(const VRegister& vd,
   2935            const VRegister& vn,
   2936            const VRegister& vm);
   2937 
   2938  // Unzip vectors (secondary).
   2939  void uzp2(const VRegister& vd,
   2940            const VRegister& vn,
   2941            const VRegister& vm);
   2942 
   2943  // Zip vectors (primary).
   2944  void zip1(const VRegister& vd,
   2945            const VRegister& vn,
   2946            const VRegister& vm);
   2947 
   2948  // Zip vectors (secondary).
   2949  void zip2(const VRegister& vd,
   2950            const VRegister& vn,
   2951            const VRegister& vm);
   2952 
   2953  // Signed shift right by immediate.
   2954  void sshr(const VRegister& vd,
   2955            const VRegister& vn,
   2956            int shift);
   2957 
   2958  // Unsigned shift right by immediate.
   2959  void ushr(const VRegister& vd,
   2960            const VRegister& vn,
   2961            int shift);
   2962 
   2963  // Signed rounding shift right by immediate.
   2964  void srshr(const VRegister& vd,
   2965             const VRegister& vn,
   2966             int shift);
   2967 
   2968  // Unsigned rounding shift right by immediate.
   2969  void urshr(const VRegister& vd,
   2970             const VRegister& vn,
   2971             int shift);
   2972 
   2973  // Signed shift right by immediate and accumulate.
   2974  void ssra(const VRegister& vd,
   2975            const VRegister& vn,
   2976            int shift);
   2977 
   2978  // Unsigned shift right by immediate and accumulate.
   2979  void usra(const VRegister& vd,
   2980            const VRegister& vn,
   2981            int shift);
   2982 
   2983  // Signed rounding shift right by immediate and accumulate.
   2984  void srsra(const VRegister& vd,
   2985             const VRegister& vn,
   2986             int shift);
   2987 
   2988  // Unsigned rounding shift right by immediate and accumulate.
   2989  void ursra(const VRegister& vd,
   2990             const VRegister& vn,
   2991             int shift);
   2992 
   2993  // Shift right narrow by immediate.
   2994  void shrn(const VRegister& vd,
   2995            const VRegister& vn,
   2996            int shift);
   2997 
   2998  // Shift right narrow by immediate (second part).
   2999  void shrn2(const VRegister& vd,
   3000             const VRegister& vn,
   3001             int shift);
   3002 
   3003  // Rounding shift right narrow by immediate.
   3004  void rshrn(const VRegister& vd,
   3005             const VRegister& vn,
   3006             int shift);
   3007 
   3008  // Rounding shift right narrow by immediate (second part).
   3009  void rshrn2(const VRegister& vd,
   3010              const VRegister& vn,
   3011              int shift);
   3012 
   3013  // Unsigned saturating shift right narrow by immediate.
   3014  void uqshrn(const VRegister& vd,
   3015              const VRegister& vn,
   3016              int shift);
   3017 
   3018  // Unsigned saturating shift right narrow by immediate (second part).
   3019  void uqshrn2(const VRegister& vd,
   3020               const VRegister& vn,
   3021               int shift);
   3022 
   3023  // Unsigned saturating rounding shift right narrow by immediate.
   3024  void uqrshrn(const VRegister& vd,
   3025               const VRegister& vn,
   3026               int shift);
   3027 
   3028  // Unsigned saturating rounding shift right narrow by immediate (second part).
   3029  void uqrshrn2(const VRegister& vd,
   3030                const VRegister& vn,
   3031                int shift);
   3032 
   3033  // Signed saturating shift right narrow by immediate.
   3034  void sqshrn(const VRegister& vd,
   3035              const VRegister& vn,
   3036              int shift);
   3037 
   3038  // Signed saturating shift right narrow by immediate (second part).
   3039  void sqshrn2(const VRegister& vd,
   3040               const VRegister& vn,
   3041               int shift);
   3042 
   3043  // Signed saturating rounded shift right narrow by immediate.
   3044  void sqrshrn(const VRegister& vd,
   3045               const VRegister& vn,
   3046               int shift);
   3047 
   3048  // Signed saturating rounded shift right narrow by immediate (second part).
   3049  void sqrshrn2(const VRegister& vd,
   3050                const VRegister& vn,
   3051                int shift);
   3052 
   3053  // Signed saturating shift right unsigned narrow by immediate.
   3054  void sqshrun(const VRegister& vd,
   3055               const VRegister& vn,
   3056               int shift);
   3057 
   3058  // Signed saturating shift right unsigned narrow by immediate (second part).
   3059  void sqshrun2(const VRegister& vd,
   3060                const VRegister& vn,
   3061                int shift);
   3062 
   3063  // Signed sat rounded shift right unsigned narrow by immediate.
   3064  void sqrshrun(const VRegister& vd,
   3065                const VRegister& vn,
   3066                int shift);
   3067 
   3068  // Signed sat rounded shift right unsigned narrow by immediate (second part).
   3069  void sqrshrun2(const VRegister& vd,
   3070                 const VRegister& vn,
   3071                 int shift);
   3072 
   3073  // FP reciprocal step.
   3074  void frecps(const VRegister& vd,
   3075              const VRegister& vn,
   3076              const VRegister& vm);
   3077 
   3078  // FP reciprocal estimate.
   3079  void frecpe(const VRegister& vd,
   3080              const VRegister& vn);
   3081 
   3082  // FP reciprocal square root estimate.
   3083  void frsqrte(const VRegister& vd,
   3084               const VRegister& vn);
   3085 
   3086  // FP reciprocal square root step.
   3087  void frsqrts(const VRegister& vd,
   3088               const VRegister& vn,
   3089               const VRegister& vm);
   3090 
   3091  // Signed absolute difference and accumulate long.
   3092  void sabal(const VRegister& vd,
   3093             const VRegister& vn,
   3094             const VRegister& vm);
   3095 
   3096  // Signed absolute difference and accumulate long (second part).
   3097  void sabal2(const VRegister& vd,
   3098              const VRegister& vn,
   3099              const VRegister& vm);
   3100 
   3101  // Unsigned absolute difference and accumulate long.
   3102  void uabal(const VRegister& vd,
   3103             const VRegister& vn,
   3104             const VRegister& vm);
   3105 
   3106  // Unsigned absolute difference and accumulate long (second part).
   3107  void uabal2(const VRegister& vd,
   3108              const VRegister& vn,
   3109              const VRegister& vm);
   3110 
   3111  // Signed absolute difference long.
   3112  void sabdl(const VRegister& vd,
   3113             const VRegister& vn,
   3114             const VRegister& vm);
   3115 
   3116  // Signed absolute difference long (second part).
   3117  void sabdl2(const VRegister& vd,
   3118              const VRegister& vn,
   3119              const VRegister& vm);
   3120 
   3121  // Unsigned absolute difference long.
   3122  void uabdl(const VRegister& vd,
   3123             const VRegister& vn,
   3124             const VRegister& vm);
   3125 
   3126  // Unsigned absolute difference long (second part).
   3127  void uabdl2(const VRegister& vd,
   3128              const VRegister& vn,
   3129              const VRegister& vm);
   3130 
   3131  // Polynomial multiply long.
   3132  void pmull(const VRegister& vd,
   3133             const VRegister& vn,
   3134             const VRegister& vm);
   3135 
   3136  // Polynomial multiply long (second part).
   3137  void pmull2(const VRegister& vd,
   3138              const VRegister& vn,
   3139              const VRegister& vm);
   3140 
   3141  // Signed long multiply-add.
   3142  void smlal(const VRegister& vd,
   3143             const VRegister& vn,
   3144             const VRegister& vm);
   3145 
   3146  // Signed long multiply-add (second part).
   3147  void smlal2(const VRegister& vd,
   3148              const VRegister& vn,
   3149              const VRegister& vm);
   3150 
   3151  // Unsigned long multiply-add.
   3152  void umlal(const VRegister& vd,
   3153             const VRegister& vn,
   3154             const VRegister& vm);
   3155 
   3156  // Unsigned long multiply-add (second part).
   3157  void umlal2(const VRegister& vd,
   3158              const VRegister& vn,
   3159              const VRegister& vm);
   3160 
   3161  // Signed long multiply-sub.
   3162  void smlsl(const VRegister& vd,
   3163             const VRegister& vn,
   3164             const VRegister& vm);
   3165 
   3166  // Signed long multiply-sub (second part).
   3167  void smlsl2(const VRegister& vd,
   3168              const VRegister& vn,
   3169              const VRegister& vm);
   3170 
   3171  // Unsigned long multiply-sub.
   3172  void umlsl(const VRegister& vd,
   3173             const VRegister& vn,
   3174             const VRegister& vm);
   3175 
   3176  // Unsigned long multiply-sub (second part).
   3177  void umlsl2(const VRegister& vd,
   3178              const VRegister& vn,
   3179              const VRegister& vm);
   3180 
   3181  // Signed long multiply.
   3182  void smull(const VRegister& vd,
   3183             const VRegister& vn,
   3184             const VRegister& vm);
   3185 
   3186  // Signed long multiply (second part).
   3187  void smull2(const VRegister& vd,
   3188              const VRegister& vn,
   3189              const VRegister& vm);
   3190 
   3191  // Signed saturating doubling long multiply-add.
   3192  void sqdmlal(const VRegister& vd,
   3193               const VRegister& vn,
   3194               const VRegister& vm);
   3195 
   3196  // Signed saturating doubling long multiply-add (second part).
   3197  void sqdmlal2(const VRegister& vd,
   3198                const VRegister& vn,
   3199                const VRegister& vm);
   3200 
   3201  // Signed saturating doubling long multiply-subtract.
   3202  void sqdmlsl(const VRegister& vd,
   3203               const VRegister& vn,
   3204               const VRegister& vm);
   3205 
   3206  // Signed saturating doubling long multiply-subtract (second part).
   3207  void sqdmlsl2(const VRegister& vd,
   3208                const VRegister& vn,
   3209                const VRegister& vm);
   3210 
   3211  // Signed saturating doubling long multiply.
   3212  void sqdmull(const VRegister& vd,
   3213               const VRegister& vn,
   3214               const VRegister& vm);
   3215 
   3216  // Signed saturating doubling long multiply (second part).
   3217  void sqdmull2(const VRegister& vd,
   3218                const VRegister& vn,
   3219                const VRegister& vm);
   3220 
   3221  // Signed saturating doubling multiply returning high half.
   3222  void sqdmulh(const VRegister& vd,
   3223               const VRegister& vn,
   3224               const VRegister& vm);
   3225 
   3226  // Signed saturating rounding doubling multiply returning high half.
   3227  void sqrdmulh(const VRegister& vd,
   3228                const VRegister& vn,
   3229                const VRegister& vm);
   3230 
   3231  // Signed saturating doubling multiply element returning high half.
   3232  void sqdmulh(const VRegister& vd,
   3233               const VRegister& vn,
   3234               const VRegister& vm,
   3235               int vm_index);
   3236 
   3237  // Signed saturating rounding doubling multiply element returning high half.
   3238  void sqrdmulh(const VRegister& vd,
   3239                const VRegister& vn,
   3240                const VRegister& vm,
   3241                int vm_index);
   3242 
   3243  // Unsigned long multiply long.
   3244  void umull(const VRegister& vd,
   3245             const VRegister& vn,
   3246             const VRegister& vm);
   3247 
   3248  // Unsigned long multiply (second part).
   3249  void umull2(const VRegister& vd,
   3250              const VRegister& vn,
   3251              const VRegister& vm);
   3252 
   3253  // Add narrow returning high half.
   3254  void addhn(const VRegister& vd,
   3255             const VRegister& vn,
   3256             const VRegister& vm);
   3257 
   3258  // Add narrow returning high half (second part).
   3259  void addhn2(const VRegister& vd,
   3260              const VRegister& vn,
   3261              const VRegister& vm);
   3262 
   3263  // Rounding add narrow returning high half.
   3264  void raddhn(const VRegister& vd,
   3265              const VRegister& vn,
   3266              const VRegister& vm);
   3267 
   3268  // Rounding add narrow returning high half (second part).
   3269  void raddhn2(const VRegister& vd,
   3270               const VRegister& vn,
   3271               const VRegister& vm);
   3272 
   3273  // Subtract narrow returning high half.
   3274  void subhn(const VRegister& vd,
   3275             const VRegister& vn,
   3276             const VRegister& vm);
   3277 
   3278  // Subtract narrow returning high half (second part).
   3279  void subhn2(const VRegister& vd,
   3280              const VRegister& vn,
   3281              const VRegister& vm);
   3282 
   3283  // Rounding subtract narrow returning high half.
   3284  void rsubhn(const VRegister& vd,
   3285              const VRegister& vn,
   3286              const VRegister& vm);
   3287 
   3288  // Rounding subtract narrow returning high half (second part).
   3289  void rsubhn2(const VRegister& vd,
   3290               const VRegister& vn,
   3291               const VRegister& vm);
   3292 
   3293  // FP vector multiply accumulate.
   3294  void fmla(const VRegister& vd,
   3295            const VRegister& vn,
   3296            const VRegister& vm);
   3297 
   3298  // FP vector multiply subtract.
   3299  void fmls(const VRegister& vd,
   3300            const VRegister& vn,
   3301            const VRegister& vm);
   3302 
   3303  // FP vector multiply extended.
   3304  void fmulx(const VRegister& vd,
   3305             const VRegister& vn,
   3306             const VRegister& vm);
   3307 
   3308  // FP absolute greater than or equal.
   3309  void facge(const VRegister& vd,
   3310             const VRegister& vn,
   3311             const VRegister& vm);
   3312 
   3313  // FP absolute greater than.
   3314  void facgt(const VRegister& vd,
   3315             const VRegister& vn,
   3316             const VRegister& vm);
   3317 
   3318  // FP multiply by element.
   3319  void fmul(const VRegister& vd,
   3320            const VRegister& vn,
   3321            const VRegister& vm,
   3322            int vm_index);
   3323 
   3324  // FP fused multiply-add to accumulator by element.
   3325  void fmla(const VRegister& vd,
   3326            const VRegister& vn,
   3327            const VRegister& vm,
   3328            int vm_index);
   3329 
   3330  // FP fused multiply-sub from accumulator by element.
   3331  void fmls(const VRegister& vd,
   3332            const VRegister& vn,
   3333            const VRegister& vm,
   3334            int vm_index);
   3335 
   3336  // FP multiply extended by element.
   3337  void fmulx(const VRegister& vd,
   3338             const VRegister& vn,
   3339             const VRegister& vm,
   3340             int vm_index);
   3341 
   3342  // FP compare equal.
   3343  void fcmeq(const VRegister& vd,
   3344             const VRegister& vn,
   3345             const VRegister& vm);
   3346 
   3347  // FP greater than.
   3348  void fcmgt(const VRegister& vd,
   3349             const VRegister& vn,
   3350             const VRegister& vm);
   3351 
   3352  // FP greater than or equal.
   3353  void fcmge(const VRegister& vd,
   3354             const VRegister& vn,
   3355             const VRegister& vm);
   3356 
   3357  // FP compare equal to zero.
   3358  void fcmeq(const VRegister& vd,
   3359             const VRegister& vn,
   3360             double imm);
   3361 
   3362  // FP greater than zero.
   3363  void fcmgt(const VRegister& vd,
   3364             const VRegister& vn,
   3365             double imm);
   3366 
   3367  // FP greater than or equal to zero.
   3368  void fcmge(const VRegister& vd,
   3369             const VRegister& vn,
   3370             double imm);
   3371 
   3372  // FP less than or equal to zero.
   3373  void fcmle(const VRegister& vd,
   3374             const VRegister& vn,
   3375             double imm);
   3376 
   3377  // FP less than to zero.
   3378  void fcmlt(const VRegister& vd,
   3379             const VRegister& vn,
   3380             double imm);
   3381 
   3382  // FP absolute difference.
   3383  void fabd(const VRegister& vd,
   3384            const VRegister& vn,
   3385            const VRegister& vm);
   3386 
   3387  // FP pairwise add vector.
   3388  void faddp(const VRegister& vd,
   3389             const VRegister& vn,
   3390             const VRegister& vm);
   3391 
   3392  // FP pairwise add scalar.
   3393  void faddp(const VRegister& vd,
   3394             const VRegister& vn);
   3395 
   3396  // FP pairwise maximum vector.
   3397  void fmaxp(const VRegister& vd,
   3398             const VRegister& vn,
   3399             const VRegister& vm);
   3400 
   3401  // FP pairwise maximum scalar.
   3402  void fmaxp(const VRegister& vd,
   3403             const VRegister& vn);
   3404 
   3405  // FP pairwise minimum vector.
   3406  void fminp(const VRegister& vd,
   3407             const VRegister& vn,
   3408             const VRegister& vm);
   3409 
   3410  // FP pairwise minimum scalar.
   3411  void fminp(const VRegister& vd,
   3412             const VRegister& vn);
   3413 
   3414  // FP pairwise maximum number vector.
   3415  void fmaxnmp(const VRegister& vd,
   3416               const VRegister& vn,
   3417               const VRegister& vm);
   3418 
   3419  // FP pairwise maximum number scalar.
   3420  void fmaxnmp(const VRegister& vd,
   3421               const VRegister& vn);
   3422 
   3423  // FP pairwise minimum number vector.
   3424  void fminnmp(const VRegister& vd,
   3425               const VRegister& vn,
   3426               const VRegister& vm);
   3427 
   3428  // FP pairwise minimum number scalar.
   3429  void fminnmp(const VRegister& vd,
   3430               const VRegister& vn);
   3431 
   3432  // Absolute value.
   3433  void abs(const Register& rd, const Register& rn);
   3434 
   3435  // Count bits.
   3436  void cnt(const Register& rd, const Register& rn);
   3437 
   3438  // Count Trailing Zeros.
   3439  void ctz(const Register& rd, const Register& rn);
   3440 
   3441  // Signed Maximum.
   3442  void smax(const Register& rd, const Register& rn, const Operand& op);
   3443 
   3444  // Signed Minimum.
   3445  void smin(const Register& rd, const Register& rn, const Operand& op);
   3446 
   3447  // Unsigned Maximum.
   3448  void umax(const Register& rd, const Register& rn, const Operand& op);
   3449 
   3450  // Unsigned Minimum.
   3451  void umin(const Register& rd, const Register& rn, const Operand& op);
   3452 
   3453  // Emit generic instructions.
   3454 
   3455  // Emit raw instructions into the instruction stream.
   3456  void dci(Instr raw_inst) { Emit(raw_inst); }
   3457 
   3458  // Emit 32 bits of data into the instruction stream.
   3459  void dc32(uint32_t data) {
   3460    EmitData(&data, sizeof(data));
   3461  }
   3462 
   3463  // Emit 64 bits of data into the instruction stream.
   3464  void dc64(uint64_t data) {
   3465    EmitData(&data, sizeof(data));
   3466  }
   3467 
   3468  // Code generation helpers.
   3469 
   3470  // Register encoding.
   3471  static Instr Rd(CPURegister rd) {
   3472    VIXL_ASSERT(rd.code() != kSPRegInternalCode);
   3473    return rd.code() << Rd_offset;
   3474  }
   3475 
   3476  static Instr Rn(CPURegister rn) {
   3477    VIXL_ASSERT(rn.code() != kSPRegInternalCode);
   3478    return rn.code() << Rn_offset;
   3479  }
   3480 
   3481  static Instr Rm(CPURegister rm) {
   3482    VIXL_ASSERT(rm.code() != kSPRegInternalCode);
   3483    return rm.code() << Rm_offset;
   3484  }
   3485 
   3486  static Instr RmNot31(CPURegister rm) {
   3487    VIXL_ASSERT(rm.code() != kSPRegInternalCode);
   3488    VIXL_ASSERT(!rm.IsZero());
   3489    return Rm(rm);
   3490  }
   3491 
   3492  static Instr Ra(CPURegister ra) {
   3493    VIXL_ASSERT(ra.code() != kSPRegInternalCode);
   3494    return ra.code() << Ra_offset;
   3495  }
   3496 
   3497  static Instr Rt(CPURegister rt) {
   3498    VIXL_ASSERT(rt.code() != kSPRegInternalCode);
   3499    return rt.code() << Rt_offset;
   3500  }
   3501 
   3502  static Instr Rt2(CPURegister rt2) {
   3503    VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
   3504    return rt2.code() << Rt2_offset;
   3505  }
   3506 
   3507  static Instr Rs(CPURegister rs) {
   3508    VIXL_ASSERT(rs.code() != kSPRegInternalCode);
   3509    return rs.code() << Rs_offset;
   3510  }
   3511 
   3512  // These encoding functions allow the stack pointer to be encoded, and
   3513  // disallow the zero register.
   3514  static Instr RdSP(Register rd) {
   3515    VIXL_ASSERT(!rd.IsZero());
   3516    return (rd.code() & kRegCodeMask) << Rd_offset;
   3517  }
   3518 
   3519  static Instr RnSP(Register rn) {
   3520    VIXL_ASSERT(!rn.IsZero());
   3521    return (rn.code() & kRegCodeMask) << Rn_offset;
   3522  }
   3523 
   3524  // Flags encoding.
   3525  static Instr Flags(FlagsUpdate S) {
   3526    if (S == SetFlags) {
   3527      return 1 << FlagsUpdate_offset;
   3528    } else if (S == LeaveFlags) {
   3529      return 0 << FlagsUpdate_offset;
   3530    }
   3531    VIXL_UNREACHABLE();
   3532    return 0;
   3533  }
   3534 
   3535  static Instr Cond(Condition cond) {
   3536    return cond << Condition_offset;
   3537  }
   3538 
   3539  // Generic immediate encoding.
   3540  template <int hibit, int lobit>
   3541  static Instr ImmField(int64_t imm) {
   3542    VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
   3543    VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
   3544    int fieldsize = hibit - lobit + 1;
   3545    VIXL_ASSERT(IsIntN(fieldsize, imm));
   3546    return static_cast<Instr>(TruncateToUintN(fieldsize, imm) << lobit);
   3547  }
   3548 
   3549  // For unsigned immediate encoding.
   3550  // TODO: Handle signed and unsigned immediate in satisfactory way.
   3551  template <int hibit, int lobit>
   3552  static Instr ImmUnsignedField(uint64_t imm) {
   3553    VIXL_STATIC_ASSERT((hibit >= lobit) && (lobit >= 0));
   3554    VIXL_STATIC_ASSERT(hibit < (sizeof(Instr) * kBitsPerByte));
   3555    VIXL_ASSERT(IsUintN(hibit - lobit + 1, imm));
   3556    return static_cast<Instr>(imm << lobit);
   3557  }
   3558 
   3559  // PC-relative address encoding.
   3560  static Instr ImmPCRelAddress(int imm21) {
   3561    VIXL_ASSERT(IsInt21(imm21));
   3562    Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
   3563    Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
   3564    Instr immlo = imm << ImmPCRelLo_offset;
   3565    return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
   3566  }
   3567 
   3568  // Branch encoding.
   3569  static Instr ImmUncondBranch(int imm26) {
   3570    VIXL_ASSERT(IsInt26(imm26));
   3571    return TruncateToUint26(imm26) << ImmUncondBranch_offset;
   3572  }
   3573 
   3574  static Instr ImmCondBranch(int imm19) {
   3575    VIXL_ASSERT(IsInt19(imm19));
   3576    return TruncateToUint19(imm19) << ImmCondBranch_offset;
   3577  }
   3578 
   3579  static Instr ImmCmpBranch(int imm19) {
   3580    VIXL_ASSERT(IsInt19(imm19));
   3581    return TruncateToUint19(imm19) << ImmCmpBranch_offset;
   3582  }
   3583 
   3584  static Instr ImmTestBranch(int imm14) {
   3585    VIXL_ASSERT(IsInt14(imm14));
   3586    return TruncateToUint14(imm14) << ImmTestBranch_offset;
   3587  }
   3588 
   3589  static Instr ImmTestBranchBit(unsigned bit_pos) {
   3590    VIXL_ASSERT(IsUint6(bit_pos));
   3591    // Subtract five from the shift offset, as we need bit 5 from bit_pos.
   3592    unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
   3593    unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
   3594    b5 &= ImmTestBranchBit5_mask;
   3595    b40 &= ImmTestBranchBit40_mask;
   3596    return b5 | b40;
   3597  }
   3598 
   3599  // Data Processing encoding.
   3600  static Instr SF(Register rd) {
   3601      return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
   3602  }
   3603 
   3604  static Instr ImmAddSub(int imm) {
   3605    VIXL_ASSERT(IsImmAddSub(imm));
   3606    if (IsUint12(imm)) {  // No shift required.
   3607      imm <<= ImmAddSub_offset;
   3608    } else {
   3609      imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ImmAddSubShift_offset);
   3610    }
   3611    return imm;
   3612  }
   3613 
   3614  static Instr ImmS(unsigned imms, unsigned reg_size) {
   3615    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
   3616           ((reg_size == kWRegSize) && IsUint5(imms)));
   3617    USE(reg_size);
   3618    return imms << ImmS_offset;
   3619  }
   3620 
   3621  static Instr ImmR(unsigned immr, unsigned reg_size) {
   3622    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   3623           ((reg_size == kWRegSize) && IsUint5(immr)));
   3624    USE(reg_size);
   3625    VIXL_ASSERT(IsUint6(immr));
   3626    return immr << ImmR_offset;
   3627  }
   3628 
   3629  static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
   3630    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   3631    VIXL_ASSERT(IsUint6(imms));
   3632    VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
   3633    USE(reg_size);
   3634    return imms << ImmSetBits_offset;
   3635  }
   3636 
   3637  static Instr ImmRotate(unsigned immr, unsigned reg_size) {
   3638    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   3639    VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
   3640           ((reg_size == kWRegSize) && IsUint5(immr)));
   3641    USE(reg_size);
   3642    return immr << ImmRotate_offset;
   3643  }
   3644 
   3645  static Instr ImmLLiteral(int imm19) {
   3646    VIXL_ASSERT(IsInt19(imm19));
   3647    return TruncateToUint19(imm19) << ImmLLiteral_offset;
   3648  }
   3649 
   3650  static Instr BitN(unsigned bitn, unsigned reg_size) {
   3651    VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
   3652    VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
   3653    USE(reg_size);
   3654    return bitn << BitN_offset;
   3655  }
   3656 
   3657  static Instr ShiftDP(Shift shift) {
   3658    VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
   3659    return shift << ShiftDP_offset;
   3660  }
   3661 
   3662  static Instr ImmDPShift(unsigned amount) {
   3663    VIXL_ASSERT(IsUint6(amount));
   3664    return amount << ImmDPShift_offset;
   3665  }
   3666 
   3667  static Instr ExtendMode(Extend extend) {
   3668    return extend << ExtendMode_offset;
   3669  }
   3670 
   3671  static Instr ImmExtendShift(unsigned left_shift) {
   3672    VIXL_ASSERT(left_shift <= 4);
   3673    return left_shift << ImmExtendShift_offset;
   3674  }
   3675 
   3676  static Instr ImmCondCmp(unsigned imm) {
   3677    VIXL_ASSERT(IsUint5(imm));
   3678    return imm << ImmCondCmp_offset;
   3679  }
   3680 
   3681  static Instr Nzcv(StatusFlags nzcv) {
   3682    return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
   3683  }
   3684 
   3685  // MemOperand offset encoding.
   3686  static Instr ImmLSUnsigned(int imm12) {
   3687    VIXL_ASSERT(IsUint12(imm12));
   3688    return imm12 << ImmLSUnsigned_offset;
   3689  }
   3690 
   3691  static Instr ImmLS(int imm9) {
   3692    VIXL_ASSERT(IsInt9(imm9));
   3693    return TruncateToUint9(imm9) << ImmLS_offset;
   3694  }
   3695 
   3696  static Instr ImmLSPair(int imm7, unsigned access_size) {
   3697    VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
   3698    int scaled_imm7 = imm7 >> access_size;
   3699    VIXL_ASSERT(IsInt7(scaled_imm7));
   3700    return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
   3701  }
   3702 
   3703  static Instr ImmShiftLS(unsigned shift_amount) {
   3704    VIXL_ASSERT(IsUint1(shift_amount));
   3705    return shift_amount << ImmShiftLS_offset;
   3706  }
   3707 
   3708  static Instr ImmPrefetchOperation(int imm5) {
   3709    VIXL_ASSERT(IsUint5(imm5));
   3710    return imm5 << ImmPrefetchOperation_offset;
   3711  }
   3712 
   3713  static Instr ImmException(int imm16) {
   3714    VIXL_ASSERT(IsUint16(imm16));
   3715    return imm16 << ImmException_offset;
   3716  }
   3717 
   3718  static Instr ImmSystemRegister(int imm15) {
   3719    VIXL_ASSERT(IsUint15(imm15));
   3720    return imm15 << ImmSystemRegister_offset;
   3721  }
   3722 
   3723  static Instr ImmHint(int imm7) {
   3724    VIXL_ASSERT(IsUint7(imm7));
   3725    return imm7 << ImmHint_offset;
   3726  }
   3727 
   3728  static Instr CRm(int imm4) {
   3729    VIXL_ASSERT(IsUint4(imm4));
   3730    return imm4 << CRm_offset;
   3731  }
   3732 
   3733  static Instr CRn(int imm4) {
   3734    VIXL_ASSERT(IsUint4(imm4));
   3735    return imm4 << CRn_offset;
   3736  }
   3737 
   3738  static Instr SysOp(int imm14) {
   3739    VIXL_ASSERT(IsUint14(imm14));
   3740    return imm14 << SysOp_offset;
   3741  }
   3742 
   3743  static Instr ImmSysOp1(int imm3) {
   3744    VIXL_ASSERT(IsUint3(imm3));
   3745    return imm3 << SysOp1_offset;
   3746  }
   3747 
   3748  static Instr ImmSysOp2(int imm3) {
   3749    VIXL_ASSERT(IsUint3(imm3));
   3750    return imm3 << SysOp2_offset;
   3751  }
   3752 
   3753  static Instr ImmBarrierDomain(int imm2) {
   3754    VIXL_ASSERT(IsUint2(imm2));
   3755    return imm2 << ImmBarrierDomain_offset;
   3756  }
   3757 
   3758  static Instr ImmBarrierType(int imm2) {
   3759    VIXL_ASSERT(IsUint2(imm2));
   3760    return imm2 << ImmBarrierType_offset;
   3761  }
   3762 
   3763  // Move immediates encoding.
   3764  static Instr ImmMoveWide(uint64_t imm) {
   3765    VIXL_ASSERT(IsUint16(imm));
   3766    return static_cast<Instr>(imm << ImmMoveWide_offset);
   3767  }
   3768 
   3769  static Instr ShiftMoveWide(int64_t shift) {
   3770    VIXL_ASSERT(IsUint2(shift));
   3771    return static_cast<Instr>(shift << ShiftMoveWide_offset);
   3772  }
   3773 
   3774  // FP Immediates.
   3775  static Instr ImmFP32(float imm);
   3776  static Instr ImmFP64(double imm);
   3777 
   3778  // FP register type.
   3779  static Instr FPType(FPRegister fd) {
   3780    return fd.Is64Bits() ? FP64 : FP32;
   3781  }
   3782 
   3783  static Instr FPScale(unsigned scale) {
   3784    VIXL_ASSERT(IsUint6(scale));
   3785    return scale << FPScale_offset;
   3786  }
   3787 
   3788  // Immediate field checking helpers.
   3789  static bool IsImmAddSub(int64_t immediate);
   3790  static bool IsImmConditionalCompare(int64_t immediate);
   3791  static bool IsImmFP32(float imm);
   3792  static bool IsImmFP64(double imm);
   3793  static bool IsImmLogical(uint64_t value,
   3794                           unsigned width,
   3795                           unsigned* n = NULL,
   3796                           unsigned* imm_s = NULL,
   3797                           unsigned* imm_r = NULL);
   3798  static bool IsImmLSPair(int64_t offset, unsigned access_size);
   3799  static bool IsImmLSScaled(int64_t offset, unsigned access_size);
   3800  static bool IsImmLSUnscaled(int64_t offset);
   3801  static bool IsImmMovn(uint64_t imm, unsigned reg_size);
   3802  static bool IsImmMovz(uint64_t imm, unsigned reg_size);
   3803 
   3804  // Instruction bits for vector format in data processing operations.
   3805  static Instr VFormat(VRegister vd) {
   3806    if (vd.Is64Bits()) {
   3807      switch (vd.lanes()) {
   3808        case 2: return NEON_2S;
   3809        case 4: return NEON_4H;
   3810        case 8: return NEON_8B;
   3811        default: return 0xffffffff;
   3812      }
   3813    } else {
   3814      VIXL_ASSERT(vd.Is128Bits());
   3815      switch (vd.lanes()) {
   3816        case 2: return NEON_2D;
   3817        case 4: return NEON_4S;
   3818        case 8: return NEON_8H;
   3819        case 16: return NEON_16B;
   3820        default: return 0xffffffff;
   3821      }
   3822    }
   3823  }
   3824 
   3825  // Instruction bits for vector format in floating point data processing
   3826  // operations.
   3827  static Instr FPFormat(VRegister vd) {
   3828    if (vd.lanes() == 1) {
   3829      // Floating point scalar formats.
   3830      VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
   3831      return vd.Is64Bits() ? FP64 : FP32;
   3832    }
   3833 
   3834    // Two lane floating point vector formats.
   3835    if (vd.lanes() == 2) {
   3836      VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
   3837      return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
   3838    }
   3839 
   3840    // Four lane floating point vector format.
   3841    VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
   3842    return NEON_FP_4S;
   3843  }
   3844 
   3845  // Instruction bits for vector format in load and store operations.
   3846  static Instr LSVFormat(VRegister vd) {
   3847    if (vd.Is64Bits()) {
   3848      switch (vd.lanes()) {
   3849        case 1: return LS_NEON_1D;
   3850        case 2: return LS_NEON_2S;
   3851        case 4: return LS_NEON_4H;
   3852        case 8: return LS_NEON_8B;
   3853        default: return 0xffffffff;
   3854      }
   3855    } else {
   3856      VIXL_ASSERT(vd.Is128Bits());
   3857      switch (vd.lanes()) {
   3858        case 2: return LS_NEON_2D;
   3859        case 4: return LS_NEON_4S;
   3860        case 8: return LS_NEON_8H;
   3861        case 16: return LS_NEON_16B;
   3862        default: return 0xffffffff;
   3863      }
   3864    }
   3865  }
   3866 
   3867  // Instruction bits for scalar format in data processing operations.
   3868  static Instr SFormat(VRegister vd) {
   3869    VIXL_ASSERT(vd.lanes() == 1);
   3870    switch (vd.SizeInBytes()) {
   3871      case 1: return NEON_B;
   3872      case 2: return NEON_H;
   3873      case 4: return NEON_S;
   3874      case 8: return NEON_D;
   3875      default: return 0xffffffff;
   3876    }
   3877  }
   3878 
   3879  static Instr ImmNEONHLM(int index, int num_bits) {
   3880    int h, l, m;
   3881    if (num_bits == 3) {
   3882      VIXL_ASSERT(IsUint3(index));
   3883      h  = (index >> 2) & 1;
   3884      l  = (index >> 1) & 1;
   3885      m  = (index >> 0) & 1;
   3886    } else if (num_bits == 2) {
   3887      VIXL_ASSERT(IsUint2(index));
   3888      h  = (index >> 1) & 1;
   3889      l  = (index >> 0) & 1;
   3890      m  = 0;
   3891    } else {
   3892      VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
   3893      h  = (index >> 0) & 1;
   3894      l  = 0;
   3895      m  = 0;
   3896    }
   3897    return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
   3898  }
   3899 
   3900  static Instr ImmNEONExt(int imm4) {
   3901    VIXL_ASSERT(IsUint4(imm4));
   3902    return imm4 << ImmNEONExt_offset;
   3903  }
   3904 
   3905  static Instr ImmNEON5(Instr format, int index) {
   3906    VIXL_ASSERT(IsUint4(index));
   3907    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3908    int imm5 = (index << (s + 1)) | (1 << s);
   3909    return imm5 << ImmNEON5_offset;
   3910  }
   3911 
   3912  static Instr ImmNEON4(Instr format, int index) {
   3913    VIXL_ASSERT(IsUint4(index));
   3914    int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
   3915    int imm4 = index << s;
   3916    return imm4 << ImmNEON4_offset;
   3917  }
   3918 
   3919  static Instr ImmNEONabcdefgh(int imm8) {
   3920    VIXL_ASSERT(IsUint8(imm8));
   3921    Instr instr;
   3922    instr  = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
   3923    instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
   3924    return instr;
   3925  }
   3926 
   3927  static Instr NEONCmode(int cmode) {
   3928    VIXL_ASSERT(IsUint4(cmode));
   3929    return cmode << NEONCmode_offset;
   3930  }
   3931 
   3932  static Instr NEONModImmOp(int op) {
   3933    VIXL_ASSERT(IsUint1(op));
   3934    return op << NEONModImmOp_offset;
   3935  }
   3936 
   3937  size_t size() const {
   3938    return SizeOfCodeGenerated();
   3939  }
   3940 
   3941  size_t SizeOfCodeGenerated() const {
   3942    return armbuffer_.size();
   3943  }
   3944 
   3945  PositionIndependentCodeOption pic() const {
   3946    return pic_;
   3947  }
   3948 
   3949  CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
   3950 
   3951  void SetCPUFeatures(const CPUFeatures& cpu_features) {
   3952    cpu_features_ = cpu_features;
   3953  }
   3954 
   3955  bool AllowPageOffsetDependentCode() const {
   3956    return (pic() == PageOffsetDependentCode) ||
   3957           (pic() == PositionDependentCode);
   3958  }
   3959 
   3960  static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
   3961    return reg.Is64Bits() ? xzr : wzr;
   3962  }
   3963 
   3964 
   3965 protected:
   3966  void LoadStore(const CPURegister& rt,
   3967                 const MemOperand& addr,
   3968                 LoadStoreOp op,
   3969                 LoadStoreScalingOption option = PreferScaledOffset);
   3970 
   3971  void LoadStorePair(const CPURegister& rt,
   3972                     const CPURegister& rt2,
   3973                     const MemOperand& addr,
   3974                     LoadStorePairOp op);
   3975  void LoadStoreStruct(const VRegister& vt,
   3976                       const MemOperand& addr,
   3977                       NEONLoadStoreMultiStructOp op);
   3978  void LoadStoreStruct1(const VRegister& vt,
   3979                        int reg_count,
   3980                        const MemOperand& addr);
   3981  void LoadStoreStructSingle(const VRegister& vt,
   3982                             uint32_t lane,
   3983                             const MemOperand& addr,
   3984                             NEONLoadStoreSingleStructOp op);
   3985  void LoadStoreStructSingleAllLanes(const VRegister& vt,
   3986                                     const MemOperand& addr,
   3987                                     NEONLoadStoreSingleStructOp op);
   3988  void LoadStoreStructVerify(const VRegister& vt,
   3989                             const MemOperand& addr,
   3990                             Instr op);
   3991 
   3992  void Prefetch(PrefetchOperation op,
   3993                const MemOperand& addr,
   3994                LoadStoreScalingOption option = PreferScaledOffset);
   3995 
   3996  BufferOffset Logical(const Register& rd,
   3997                       const Register& rn,
   3998                       const Operand& operand,
   3999                       LogicalOp op);
   4000  BufferOffset LogicalImmediate(const Register& rd,
   4001                                const Register& rn,
   4002                                unsigned n,
   4003                                unsigned imm_s,
   4004                                unsigned imm_r,
   4005                                LogicalOp op);
   4006 
   4007  void ConditionalCompare(const Register& rn,
   4008                          const Operand& operand,
   4009                          StatusFlags nzcv,
   4010                          Condition cond,
   4011                          ConditionalCompareOp op);
   4012 
   4013  void AddSubWithCarry(const Register& rd,
   4014                       const Register& rn,
   4015                       const Operand& operand,
   4016                       FlagsUpdate S,
   4017                       AddSubWithCarryOp op);
   4018 
   4019 
   4020  // Functions for emulating operands not directly supported by the instruction
   4021  // set.
   4022  void EmitShift(const Register& rd,
   4023                 const Register& rn,
   4024                 Shift shift,
   4025                 unsigned amount);
   4026  void EmitExtendShift(const Register& rd,
   4027                       const Register& rn,
   4028                       Extend extend,
   4029                       unsigned left_shift);
   4030 
   4031  void AddSub(const Register& rd,
   4032              const Register& rn,
   4033              const Operand& operand,
   4034              FlagsUpdate S,
   4035              AddSubOp op);
   4036 
   4037  void NEONTable(const VRegister& vd,
   4038                 const VRegister& vn,
   4039                 const VRegister& vm,
   4040                 NEONTableOp op);
   4041 
   4042  // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
   4043  // registers. Only simple loads are supported; sign- and zero-extension (such
   4044  // as in LDPSW_x or LDRB_w) are not supported.
   4045  static LoadStoreOp LoadOpFor(const CPURegister& rt);
   4046  static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
   4047                                       const CPURegister& rt2);
   4048  static LoadStoreOp StoreOpFor(const CPURegister& rt);
   4049  static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
   4050                                        const CPURegister& rt2);
   4051  static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
   4052    const CPURegister& rt, const CPURegister& rt2);
   4053  static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
   4054    const CPURegister& rt, const CPURegister& rt2);
   4055  static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
   4056 
   4057  // Convenience pass-through for CPU feature checks.
   4058  bool CPUHas(CPUFeatures::Feature feature0,
   4059              CPUFeatures::Feature feature1 = CPUFeatures::kNone,
   4060              CPUFeatures::Feature feature2 = CPUFeatures::kNone,
   4061              CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
   4062    return cpu_features_.Has(feature0, feature1, feature2, feature3);
   4063  }
   4064 
   4065  // Determine whether the target CPU has the specified registers, based on the
   4066  // currently-enabled CPU features. Presence of a register does not imply
   4067  // support for arbitrary operations on it. For example, CPUs with FP have H
   4068  // registers, but most half-precision operations require the FPHalf feature.
   4069  //
   4070  // These are used to check CPU features in loads and stores that have the same
   4071  // entry point for both integer and FP registers.
   4072  bool CPUHas(const CPURegister& rt) const;
   4073  bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
   4074 
   4075  bool CPUHas(SystemRegister sysreg) const;
   4076 
   4077 private:
   4078  static uint32_t FP32ToImm8(float imm);
   4079  static uint32_t FP64ToImm8(double imm);
   4080 
   4081  // Instruction helpers.
   4082  void MoveWide(const Register& rd,
   4083                uint64_t imm,
   4084                int shift,
   4085                MoveWideImmediateOp mov_op);
   4086  BufferOffset DataProcShiftedRegister(const Register& rd,
   4087                                       const Register& rn,
   4088                                       const Operand& operand,
   4089                                       FlagsUpdate S,
   4090                                       Instr op);
   4091  void DataProcExtendedRegister(const Register& rd,
   4092                                const Register& rn,
   4093                                const Operand& operand,
   4094                                FlagsUpdate S,
   4095                                Instr op);
   4096  void LoadStorePairNonTemporal(const CPURegister& rt,
   4097                                const CPURegister& rt2,
   4098                                const MemOperand& addr,
   4099                                LoadStorePairNonTemporalOp op);
   4100  void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
   4101  void ConditionalSelect(const Register& rd,
   4102                         const Register& rn,
   4103                         const Register& rm,
   4104                         Condition cond,
   4105                         ConditionalSelectOp op);
   4106  void DataProcessing1Source(const Register& rd,
   4107                             const Register& rn,
   4108                             DataProcessing1SourceOp op);
   4109  void DataProcessing3Source(const Register& rd,
   4110                             const Register& rn,
   4111                             const Register& rm,
   4112                             const Register& ra,
   4113                             DataProcessing3SourceOp op);
   4114  void FPDataProcessing1Source(const VRegister& fd,
   4115                               const VRegister& fn,
   4116                               FPDataProcessing1SourceOp op);
   4117  void FPDataProcessing3Source(const VRegister& fd,
   4118                               const VRegister& fn,
   4119                               const VRegister& fm,
   4120                               const VRegister& fa,
   4121                               FPDataProcessing3SourceOp op);
   4122  void NEONAcrossLanesL(const VRegister& vd,
   4123                        const VRegister& vn,
   4124                        NEONAcrossLanesOp op);
   4125  void NEONAcrossLanes(const VRegister& vd,
   4126                       const VRegister& vn,
   4127                       NEONAcrossLanesOp op);
   4128  void NEONModifiedImmShiftLsl(const VRegister& vd,
   4129                               const int imm8,
   4130                               const int left_shift,
   4131                               NEONModifiedImmediateOp op);
   4132  void NEONModifiedImmShiftMsl(const VRegister& vd,
   4133                               const int imm8,
   4134                               const int shift_amount,
   4135                               NEONModifiedImmediateOp op);
   4136  void NEONFP2Same(const VRegister& vd,
   4137                   const VRegister& vn,
   4138                   Instr vop);
   4139  void NEON3Same(const VRegister& vd,
   4140                 const VRegister& vn,
   4141                 const VRegister& vm,
   4142                 NEON3SameOp vop);
   4143  void NEONFP3Same(const VRegister& vd,
   4144                   const VRegister& vn,
   4145                   const VRegister& vm,
   4146                   Instr op);
   4147  void NEON3DifferentL(const VRegister& vd,
   4148                       const VRegister& vn,
   4149                       const VRegister& vm,
   4150                       NEON3DifferentOp vop);
   4151  void NEON3DifferentW(const VRegister& vd,
   4152                       const VRegister& vn,
   4153                       const VRegister& vm,
   4154                       NEON3DifferentOp vop);
   4155  void NEON3DifferentHN(const VRegister& vd,
   4156                        const VRegister& vn,
   4157                        const VRegister& vm,
   4158                        NEON3DifferentOp vop);
   4159  void NEONFP2RegMisc(const VRegister& vd,
   4160                      const VRegister& vn,
   4161                      NEON2RegMiscOp vop,
   4162                      double value = 0.0);
   4163  void NEON2RegMisc(const VRegister& vd,
   4164                    const VRegister& vn,
   4165                    NEON2RegMiscOp vop,
   4166                    int value = 0);
   4167  void NEONFP2RegMisc(const VRegister& vd,
   4168                      const VRegister& vn,
   4169                      Instr op);
   4170  void NEONAddlp(const VRegister& vd,
   4171                 const VRegister& vn,
   4172                 NEON2RegMiscOp op);
   4173  void NEONPerm(const VRegister& vd,
   4174                const VRegister& vn,
   4175                const VRegister& vm,
   4176                NEONPermOp op);
   4177  void NEONFPByElement(const VRegister& vd,
   4178                       const VRegister& vn,
   4179                       const VRegister& vm,
   4180                       int vm_index,
   4181                       NEONByIndexedElementOp op);
   4182  void NEONByElement(const VRegister& vd,
   4183                     const VRegister& vn,
   4184                     const VRegister& vm,
   4185                     int vm_index,
   4186                     NEONByIndexedElementOp op);
   4187  void NEONByElementL(const VRegister& vd,
   4188                      const VRegister& vn,
   4189                      const VRegister& vm,
   4190                      int vm_index,
   4191                      NEONByIndexedElementOp op);
   4192  void NEONShiftImmediate(const VRegister& vd,
   4193                          const VRegister& vn,
   4194                          NEONShiftImmediateOp op,
   4195                          int immh_immb);
   4196  void NEONShiftLeftImmediate(const VRegister& vd,
   4197                              const VRegister& vn,
   4198                              int shift,
   4199                              NEONShiftImmediateOp op);
   4200  void NEONShiftRightImmediate(const VRegister& vd,
   4201                               const VRegister& vn,
   4202                               int shift,
   4203                               NEONShiftImmediateOp op);
   4204  void NEONShiftImmediateL(const VRegister& vd,
   4205                           const VRegister& vn,
   4206                           int shift,
   4207                           NEONShiftImmediateOp op);
   4208  void NEONShiftImmediateN(const VRegister& vd,
   4209                           const VRegister& vn,
   4210                           int shift,
   4211                           NEONShiftImmediateOp op);
   4212  void NEONXtn(const VRegister& vd,
   4213               const VRegister& vn,
   4214               NEON2RegMiscOp vop);
   4215 
   4216  Instr LoadStoreStructAddrModeField(const MemOperand& addr);
   4217 
   4218  // Encode the specified MemOperand for the specified access size and scaling
   4219  // preference.
   4220  Instr LoadStoreMemOperand(const MemOperand& addr,
   4221                            unsigned access_size,
   4222                            LoadStoreScalingOption option);
   4223 
   4224 protected:
   4225  // Prevent generation of a literal pool for the next |maxInst| instructions.
   4226  // Guarantees instruction linearity.
   4227  class AutoBlockLiteralPool {
   4228    ARMBuffer* armbuffer_;
   4229 
   4230   public:
   4231    AutoBlockLiteralPool(Assembler* assembler, size_t maxInst)
   4232      : armbuffer_(&assembler->armbuffer_) {
   4233      armbuffer_->enterNoPool(maxInst);
   4234    }
   4235    ~AutoBlockLiteralPool() {
   4236      armbuffer_->leaveNoPool();
   4237    }
   4238  };
   4239 
   4240 protected:
   4241  // Buffer where the code is emitted.
   4242  PositionIndependentCodeOption pic_;
   4243 
   4244  CPUFeatures cpu_features_;
   4245 
   4246 #ifdef DEBUG
   4247  bool finalized_;
   4248 #endif
   4249 };
   4250 
   4251 }  // namespace vixl
   4252 
   4253 #endif  // VIXL_A64_ASSEMBLER_A64_H_