tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Simulator-vixl.cpp (150989B)


      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include "jstypes.h"
     28 
     29 #ifdef JS_SIMULATOR_ARM64
     30 
     31 #include "jit/arm64/vixl/Simulator-vixl.h"
     32 
     33 #include <cmath>
     34 #include <string.h>
     35 
     36 #include "jit/AtomicOperations.h"
     37 
     38 namespace vixl {
     39 
     40 const Instruction* Simulator::kEndOfSimAddress = NULL;
     41 
     42 void SimSystemRegister::SetBits(int msb, int lsb, uint32_t bits) {
     43  int width = msb - lsb + 1;
     44  VIXL_ASSERT(IsUintN(width, bits) || IsIntN(width, bits));
     45 
     46  bits <<= lsb;
     47  uint32_t mask = ((1 << width) - 1) << lsb;
     48  VIXL_ASSERT((mask & write_ignore_mask_) == 0);
     49 
     50  value_ = (value_ & ~mask) | (bits & mask);
     51 }
     52 
     53 
     54 SimSystemRegister SimSystemRegister::DefaultValueFor(SystemRegister id) {
     55  switch (id) {
     56    case NZCV:
     57      return SimSystemRegister(0x00000000, NZCVWriteIgnoreMask);
     58    case FPCR:
     59      return SimSystemRegister(0x00000000, FPCRWriteIgnoreMask);
     60    default:
     61      VIXL_UNREACHABLE();
     62      return SimSystemRegister();
     63  }
     64 }
     65 
     66 void Simulator::enable_single_stepping(SingleStepCallback cb, void* arg) {
     67  single_stepping_ = true;
     68  single_step_callback_ = cb;
     69  single_step_callback_arg_ = arg;
     70  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
     71 }
     72 
     73 void Simulator::disable_single_stepping() {
     74  if (!single_stepping_) {
     75    return;
     76  }
     77  single_step_callback_(single_step_callback_arg_, this, (void*)get_pc());
     78  single_stepping_ = false;
     79  single_step_callback_ = nullptr;
     80  single_step_callback_arg_ = nullptr;
     81 }
     82 
     83 void Simulator::Run() {
     84  if (single_stepping_) {
     85    single_step_callback_(single_step_callback_arg_, this, nullptr);
     86  }
     87 
     88  pc_modified_ = false;
     89  while (pc_ != kEndOfSimAddress) {
     90    if (single_stepping_) {
     91      single_step_callback_(single_step_callback_arg_, this, (void*)pc_);
     92    }
     93 
     94    ExecuteInstruction();
     95    LogAllWrittenRegisters();
     96  }
     97 
     98  if (single_stepping_) {
     99    single_step_callback_(single_step_callback_arg_, this, nullptr);
    100  }
    101 }
    102 
    103 
    104 void Simulator::RunFrom(const Instruction* first) {
    105  set_pc(first);
    106  Run();
    107 }
    108 
    109 
    110 const char* Simulator::xreg_names[] = {
    111 "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",
    112 "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    113 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
    114 "x24", "x25", "x26", "x27", "x28", "x29", "lr",  "xzr", "sp"};
    115 
    116 const char* Simulator::wreg_names[] = {
    117 "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
    118 "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
    119 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
    120 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr", "wsp"};
    121 
    122 const char* Simulator::sreg_names[] = {
    123 "s0",  "s1",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
    124 "s8",  "s9",  "s10", "s11", "s12", "s13", "s14", "s15",
    125 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
    126 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
    127 
    128 const char* Simulator::dreg_names[] = {
    129 "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
    130 "d8",  "d9",  "d10", "d11", "d12", "d13", "d14", "d15",
    131 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
    132 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
    133 
    134 const char* Simulator::vreg_names[] = {
    135 "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
    136 "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
    137 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
    138 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"};
    139 
    140 
    141 
    142 const char* Simulator::WRegNameForCode(unsigned code, Reg31Mode mode) {
    143  VIXL_ASSERT(code < kNumberOfRegisters);
    144  // If the code represents the stack pointer, index the name after zr.
    145  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
    146    code = kZeroRegCode + 1;
    147  }
    148  return wreg_names[code];
    149 }
    150 
    151 
    152 const char* Simulator::XRegNameForCode(unsigned code, Reg31Mode mode) {
    153  VIXL_ASSERT(code < kNumberOfRegisters);
    154  // If the code represents the stack pointer, index the name after zr.
    155  if ((code == kZeroRegCode) && (mode == Reg31IsStackPointer)) {
    156    code = kZeroRegCode + 1;
    157  }
    158  return xreg_names[code];
    159 }
    160 
    161 
    162 const char* Simulator::SRegNameForCode(unsigned code) {
    163  VIXL_ASSERT(code < kNumberOfFPRegisters);
    164  return sreg_names[code];
    165 }
    166 
    167 
    168 const char* Simulator::DRegNameForCode(unsigned code) {
    169  VIXL_ASSERT(code < kNumberOfFPRegisters);
    170  return dreg_names[code];
    171 }
    172 
    173 
    174 const char* Simulator::VRegNameForCode(unsigned code) {
    175  VIXL_ASSERT(code < kNumberOfVRegisters);
    176  return vreg_names[code];
    177 }
    178 
    179 
    180 #define COLOUR(colour_code)       "\033[0;" colour_code "m"
    181 #define COLOUR_BOLD(colour_code)  "\033[1;" colour_code "m"
    182 #define NORMAL  ""
    183 #define GREY    "30"
    184 #define RED     "31"
    185 #define GREEN   "32"
    186 #define YELLOW  "33"
    187 #define BLUE    "34"
    188 #define MAGENTA "35"
    189 #define CYAN    "36"
    190 #define WHITE   "37"
    191 void Simulator::set_coloured_trace(bool value) {
    192  coloured_trace_ = value;
    193 
    194  clr_normal          = value ? COLOUR(NORMAL)        : "";
    195  clr_flag_name       = value ? COLOUR_BOLD(WHITE)    : "";
    196  clr_flag_value      = value ? COLOUR(NORMAL)        : "";
    197  clr_reg_name        = value ? COLOUR_BOLD(CYAN)     : "";
    198  clr_reg_value       = value ? COLOUR(CYAN)          : "";
    199  clr_vreg_name       = value ? COLOUR_BOLD(MAGENTA)  : "";
    200  clr_vreg_value      = value ? COLOUR(MAGENTA)       : "";
    201  clr_memory_address  = value ? COLOUR_BOLD(BLUE)     : "";
    202  clr_warning         = value ? COLOUR_BOLD(YELLOW)   : "";
    203  clr_warning_message = value ? COLOUR(YELLOW)        : "";
    204  clr_printf          = value ? COLOUR(GREEN)         : "";
    205 }
    206 #undef COLOUR
    207 #undef COLOUR_BOLD
    208 #undef NORMAL
    209 #undef GREY
    210 #undef RED
    211 #undef GREEN
    212 #undef YELLOW
    213 #undef BLUE
    214 #undef MAGENTA
    215 #undef CYAN
    216 #undef WHITE
    217 
    218 
    219 void Simulator::set_trace_parameters(int parameters) {
    220  bool disasm_before = trace_parameters_ & LOG_DISASM;
    221  trace_parameters_ = parameters;
    222  bool disasm_after = trace_parameters_ & LOG_DISASM;
    223 
    224  if (disasm_before != disasm_after) {
    225    if (disasm_after) {
    226      decoder_->InsertVisitorBefore(print_disasm_, this);
    227    } else {
    228      decoder_->RemoveVisitor(print_disasm_);
    229    }
    230  }
    231 }
    232 
    233 
    234 void Simulator::set_instruction_stats(bool value) {
    235  if (instrumentation_ == nullptr) {
    236    return;
    237  }
    238 
    239  if (value != instruction_stats_) {
    240    if (value) {
    241      decoder_->AppendVisitor(instrumentation_);
    242    } else {
    243      decoder_->RemoveVisitor(instrumentation_);
    244    }
    245    instruction_stats_ = value;
    246  }
    247 }
    248 
    249 // Helpers ---------------------------------------------------------------------
    250 uint64_t Simulator::AddWithCarry(unsigned reg_size,
    251                                 bool set_flags,
    252                                 uint64_t left,
    253                                 uint64_t right,
    254                                 int carry_in) {
    255  VIXL_ASSERT((carry_in == 0) || (carry_in == 1));
    256  VIXL_ASSERT((reg_size == kXRegSize) || (reg_size == kWRegSize));
    257 
    258  uint64_t max_uint = (reg_size == kWRegSize) ? kWMaxUInt : kXMaxUInt;
    259  uint64_t reg_mask = (reg_size == kWRegSize) ? kWRegMask : kXRegMask;
    260  uint64_t sign_mask = (reg_size == kWRegSize) ? kWSignMask : kXSignMask;
    261 
    262  left &= reg_mask;
    263  right &= reg_mask;
    264  uint64_t result = (left + right + carry_in) & reg_mask;
    265 
    266  if (set_flags) {
    267    nzcv().SetN(CalcNFlag(result, reg_size));
    268    nzcv().SetZ(CalcZFlag(result));
    269 
    270    // Compute the C flag by comparing the result to the max unsigned integer.
    271    uint64_t max_uint_2op = max_uint - carry_in;
    272    bool C = (left > max_uint_2op) || ((max_uint_2op - left) < right);
    273    nzcv().SetC(C ? 1 : 0);
    274 
    275    // Overflow iff the sign bit is the same for the two inputs and different
    276    // for the result.
    277    uint64_t left_sign = left & sign_mask;
    278    uint64_t right_sign = right & sign_mask;
    279    uint64_t result_sign = result & sign_mask;
    280    bool V = (left_sign == right_sign) && (left_sign != result_sign);
    281    nzcv().SetV(V ? 1 : 0);
    282 
    283    LogSystemRegister(NZCV);
    284  }
    285  return result;
    286 }
    287 
    288 
    289 int64_t Simulator::ShiftOperand(unsigned reg_size,
    290                                int64_t value,
    291                                Shift shift_type,
    292                                unsigned amount) {
    293  if (amount == 0) {
    294    return value;
    295  }
    296  int64_t mask = reg_size == kXRegSize ? kXRegMask : kWRegMask;
    297  switch (shift_type) {
    298    case LSL:
    299      return (value << amount) & mask;
    300    case LSR:
    301      return static_cast<uint64_t>(value) >> amount;
    302    case ASR: {
    303      // Shift used to restore the sign.
    304      unsigned s_shift = kXRegSize - reg_size;
    305      // Value with its sign restored.
    306      int64_t s_value = (value << s_shift) >> s_shift;
    307      return (s_value >> amount) & mask;
    308    }
    309    case ROR: {
    310      if (reg_size == kWRegSize) {
    311        value &= kWRegMask;
    312      }
    313      return (static_cast<uint64_t>(value) >> amount) |
    314             ((value & ((INT64_C(1) << amount) - 1)) <<
    315              (reg_size - amount));
    316    }
    317    default:
    318      VIXL_UNIMPLEMENTED();
    319      return 0;
    320  }
    321 }
    322 
    323 
    324 int64_t Simulator::ExtendValue(unsigned reg_size,
    325                               int64_t value,
    326                               Extend extend_type,
    327                               unsigned left_shift) {
    328  switch (extend_type) {
    329    case UXTB:
    330      value &= kByteMask;
    331      break;
    332    case UXTH:
    333      value &= kHalfWordMask;
    334      break;
    335    case UXTW:
    336      value &= kWordMask;
    337      break;
    338    case SXTB:
    339      value = (value << 56) >> 56;
    340      break;
    341    case SXTH:
    342      value = (value << 48) >> 48;
    343      break;
    344    case SXTW:
    345      value = (value << 32) >> 32;
    346      break;
    347    case UXTX:
    348    case SXTX:
    349      break;
    350    default:
    351      VIXL_UNREACHABLE();
    352  }
    353  int64_t mask = (reg_size == kXRegSize) ? kXRegMask : kWRegMask;
    354  return (value << left_shift) & mask;
    355 }
    356 
    357 
    358 void Simulator::FPCompare(double val0, double val1, FPTrapFlags trap) {
    359  AssertSupportedFPCR();
    360 
    361  // TODO: This assumes that the C++ implementation handles comparisons in the
    362  // way that we expect (as per AssertSupportedFPCR()).
    363  bool process_exception = false;
    364  if ((std::isnan(val0) != 0) || (std::isnan(val1) != 0)) {
    365    nzcv().SetRawValue(FPUnorderedFlag);
    366    if (IsSignallingNaN(val0) || IsSignallingNaN(val1) ||
    367        (trap == EnableTrap)) {
    368      process_exception = true;
    369    }
    370  } else if (val0 < val1) {
    371    nzcv().SetRawValue(FPLessThanFlag);
    372  } else if (val0 > val1) {
    373    nzcv().SetRawValue(FPGreaterThanFlag);
    374  } else if (val0 == val1) {
    375    nzcv().SetRawValue(FPEqualFlag);
    376  } else {
    377    VIXL_UNREACHABLE();
    378  }
    379  LogSystemRegister(NZCV);
    380  if (process_exception) FPProcessException();
    381 }
    382 
    383 
    384 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormatForSize(
    385    unsigned reg_size, unsigned lane_size) {
    386  VIXL_ASSERT(reg_size >= lane_size);
    387 
    388  uint32_t format = 0;
    389  if (reg_size != lane_size) {
    390    switch (reg_size) {
    391      default: VIXL_UNREACHABLE(); break;
    392      case kQRegSizeInBytes: format = kPrintRegAsQVector; break;
    393      case kDRegSizeInBytes: format = kPrintRegAsDVector; break;
    394    }
    395  }
    396 
    397  switch (lane_size) {
    398    default: VIXL_UNREACHABLE(); break;
    399    case kQRegSizeInBytes: format |= kPrintReg1Q; break;
    400    case kDRegSizeInBytes: format |= kPrintReg1D; break;
    401    case kSRegSizeInBytes: format |= kPrintReg1S; break;
    402    case kHRegSizeInBytes: format |= kPrintReg1H; break;
    403    case kBRegSizeInBytes: format |= kPrintReg1B; break;
    404  }
    405  // These sizes would be duplicate case labels.
    406  VIXL_STATIC_ASSERT(kXRegSizeInBytes == kDRegSizeInBytes);
    407  VIXL_STATIC_ASSERT(kWRegSizeInBytes == kSRegSizeInBytes);
    408  VIXL_STATIC_ASSERT(kPrintXReg == kPrintReg1D);
    409  VIXL_STATIC_ASSERT(kPrintWReg == kPrintReg1S);
    410 
    411  return static_cast<PrintRegisterFormat>(format);
    412 }
    413 
    414 
    415 Simulator::PrintRegisterFormat Simulator::GetPrintRegisterFormat(
    416    VectorFormat vform) {
    417  switch (vform) {
    418    default: VIXL_UNREACHABLE(); return kPrintReg16B;
    419    case kFormat16B: return kPrintReg16B;
    420    case kFormat8B: return kPrintReg8B;
    421    case kFormat8H: return kPrintReg8H;
    422    case kFormat4H: return kPrintReg4H;
    423    case kFormat4S: return kPrintReg4S;
    424    case kFormat2S: return kPrintReg2S;
    425    case kFormat2D: return kPrintReg2D;
    426    case kFormat1D: return kPrintReg1D;
    427  }
    428 }
    429 
    430 
    431 void Simulator::PrintWrittenRegisters() {
    432  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    433    if (registers_[i].WrittenSinceLastLog()) PrintRegister(i);
    434  }
    435 }
    436 
    437 
    438 void Simulator::PrintWrittenVRegisters() {
    439  for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
    440    // At this point there is no type information, so print as a raw 1Q.
    441    if (vregisters_[i].WrittenSinceLastLog()) PrintVRegister(i, kPrintReg1Q);
    442  }
    443 }
    444 
    445 
    446 void Simulator::PrintSystemRegisters() {
    447  PrintSystemRegister(NZCV);
    448  PrintSystemRegister(FPCR);
    449 }
    450 
    451 
    452 void Simulator::PrintRegisters() {
    453  for (unsigned i = 0; i < kNumberOfRegisters; i++) {
    454    PrintRegister(i);
    455  }
    456 }
    457 
    458 
    459 void Simulator::PrintVRegisters() {
    460  for (unsigned i = 0; i < kNumberOfVRegisters; i++) {
    461    // At this point there is no type information, so print as a raw 1Q.
    462    PrintVRegister(i, kPrintReg1Q);
    463  }
    464 }
    465 
    466 
    467 // Print a register's name and raw value.
    468 //
    469 // Only the least-significant `size_in_bytes` bytes of the register are printed,
    470 // but the value is aligned as if the whole register had been printed.
    471 //
    472 // For typical register updates, size_in_bytes should be set to kXRegSizeInBytes
    473 // -- the default -- so that the whole register is printed. Other values of
    474 // size_in_bytes are intended for use when the register hasn't actually been
    475 // updated (such as in PrintWrite).
    476 //
    477 // No newline is printed. This allows the caller to print more details (such as
    478 // a memory access annotation).
    479 void Simulator::PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode,
    480                                       int size_in_bytes) {
    481  // The template for all supported sizes.
    482  //   "# x{code}: 0xffeeddccbbaa9988"
    483  //   "# w{code}:         0xbbaa9988"
    484  //   "# w{code}<15:0>:       0x9988"
    485  //   "# w{code}<7:0>:          0x88"
    486  unsigned padding_chars = (kXRegSizeInBytes - size_in_bytes) * 2;
    487 
    488  const char * name = "";
    489  const char * suffix = "";
    490  switch (size_in_bytes) {
    491    case kXRegSizeInBytes: name = XRegNameForCode(code, r31mode); break;
    492    case kWRegSizeInBytes: name = WRegNameForCode(code, r31mode); break;
    493    case 2:
    494      name = WRegNameForCode(code, r31mode);
    495      suffix = "<15:0>";
    496      padding_chars -= strlen(suffix);
    497      break;
    498    case 1:
    499      name = WRegNameForCode(code, r31mode);
    500      suffix = "<7:0>";
    501      padding_chars -= strlen(suffix);
    502      break;
    503    default:
    504      VIXL_UNREACHABLE();
    505  }
    506  fprintf(stream_, "# %s%5s%s: ", clr_reg_name, name, suffix);
    507 
    508  // Print leading padding spaces.
    509  VIXL_ASSERT(padding_chars < (kXRegSizeInBytes * 2));
    510  for (unsigned i = 0; i < padding_chars; i++) {
    511    putc(' ', stream_);
    512  }
    513 
    514  // Print the specified bits in hexadecimal format.
    515  uint64_t bits = reg<uint64_t>(code, r31mode);
    516  bits &= kXRegMask >> ((kXRegSizeInBytes - size_in_bytes) * 8);
    517  VIXL_STATIC_ASSERT(sizeof(bits) == kXRegSizeInBytes);
    518 
    519  int chars = size_in_bytes * 2;
    520  fprintf(stream_, "%s0x%0*" PRIx64 "%s",
    521          clr_reg_value, chars, bits, clr_normal);
    522 }
    523 
    524 
    525 void Simulator::PrintRegister(unsigned code, Reg31Mode r31mode) {
    526  registers_[code].NotifyRegisterLogged();
    527 
    528  // Don't print writes into xzr.
    529  if ((code == kZeroRegCode) && (r31mode == Reg31IsZeroRegister)) {
    530    return;
    531  }
    532 
    533  // The template for all x and w registers:
    534  //   "# x{code}: 0x{value}"
    535  //   "# w{code}: 0x{value}"
    536 
    537  PrintRegisterRawHelper(code, r31mode);
    538  fprintf(stream_, "\n");
    539 }
    540 
    541 
    542 // Print a register's name and raw value.
    543 //
    544 // The `bytes` and `lsb` arguments can be used to limit the bytes that are
    545 // printed. These arguments are intended for use in cases where register hasn't
    546 // actually been updated (such as in PrintVWrite).
    547 //
    548 // No newline is printed. This allows the caller to print more details (such as
    549 // a floating-point interpretation or a memory access annotation).
    550 void Simulator::PrintVRegisterRawHelper(unsigned code, int bytes, int lsb) {
    551  // The template for vector types:
    552  //   "# v{code}: 0xffeeddccbbaa99887766554433221100".
    553  // An example with bytes=4 and lsb=8:
    554  //   "# v{code}:         0xbbaa9988                ".
    555  fprintf(stream_, "# %s%5s: %s",
    556          clr_vreg_name, VRegNameForCode(code), clr_vreg_value);
    557 
    558  int msb = lsb + bytes - 1;
    559  int byte = kQRegSizeInBytes - 1;
    560 
    561  // Print leading padding spaces. (Two spaces per byte.)
    562  while (byte > msb) {
    563    fprintf(stream_, "  ");
    564    byte--;
    565  }
    566 
    567  // Print the specified part of the value, byte by byte.
    568  qreg_t rawbits = qreg(code);
    569  fprintf(stream_, "0x");
    570  while (byte >= lsb) {
    571    fprintf(stream_, "%02x", rawbits.val[byte]);
    572    byte--;
    573  }
    574 
    575  // Print trailing padding spaces.
    576  while (byte >= 0) {
    577    fprintf(stream_, "  ");
    578    byte--;
    579  }
    580  fprintf(stream_, "%s", clr_normal);
    581 }
    582 
    583 
    584 // Print each of the specified lanes of a register as a float or double value.
    585 //
    586 // The `lane_count` and `lslane` arguments can be used to limit the lanes that
    587 // are printed. These arguments are intended for use in cases where register
    588 // hasn't actually been updated (such as in PrintVWrite).
    589 //
    590 // No newline is printed. This allows the caller to print more details (such as
    591 // a memory access annotation).
    592 void Simulator::PrintVRegisterFPHelper(unsigned code,
    593                                       unsigned lane_size_in_bytes,
    594                                       int lane_count,
    595                                       int rightmost_lane) {
    596  VIXL_ASSERT((lane_size_in_bytes == kSRegSizeInBytes) ||
    597              (lane_size_in_bytes == kDRegSizeInBytes));
    598 
    599  unsigned msb = ((lane_count + rightmost_lane) * lane_size_in_bytes);
    600  VIXL_ASSERT(msb <= kQRegSizeInBytes);
    601 
    602  // For scalar types ((lane_count == 1) && (rightmost_lane == 0)), a register
    603  // name is used:
    604  //   " (s{code}: {value})"
    605  //   " (d{code}: {value})"
    606  // For vector types, "..." is used to represent one or more omitted lanes.
    607  //   " (..., {value}, {value}, ...)"
    608  if ((lane_count == 1) && (rightmost_lane == 0)) {
    609    const char * name =
    610        (lane_size_in_bytes == kSRegSizeInBytes) ? SRegNameForCode(code)
    611                                                 : DRegNameForCode(code);
    612    fprintf(stream_, " (%s%s: ", clr_vreg_name, name);
    613  } else {
    614    if (msb < (kQRegSizeInBytes - 1)) {
    615      fprintf(stream_, " (..., ");
    616    } else {
    617      fprintf(stream_, " (");
    618    }
    619  }
    620 
    621  // Print the list of values.
    622  const char * separator = "";
    623  int leftmost_lane = rightmost_lane + lane_count - 1;
    624  for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) {
    625    double value =
    626        (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane)
    627                                                 : vreg(code).Get<double>(lane);
    628    fprintf(stream_, "%s%s%#g%s", separator, clr_vreg_value, value, clr_normal);
    629    separator = ", ";
    630  }
    631 
    632  if (rightmost_lane > 0) {
    633    fprintf(stream_, ", ...");
    634  }
    635  fprintf(stream_, ")");
    636 }
    637 
    638 
    639 void Simulator::PrintVRegister(unsigned code, PrintRegisterFormat format) {
    640  vregisters_[code].NotifyRegisterLogged();
    641 
    642  int lane_size_log2 = format & kPrintRegLaneSizeMask;
    643 
    644  int reg_size_log2;
    645  if (format & kPrintRegAsQVector) {
    646    reg_size_log2 = kQRegSizeInBytesLog2;
    647  } else if (format & kPrintRegAsDVector) {
    648    reg_size_log2 = kDRegSizeInBytesLog2;
    649  } else {
    650    // Scalar types.
    651    reg_size_log2 = lane_size_log2;
    652  }
    653 
    654  int lane_count = 1 << (reg_size_log2 - lane_size_log2);
    655  int lane_size = 1 << lane_size_log2;
    656 
    657  // The template for vector types:
    658  //   "# v{code}: 0x{rawbits} (..., {value}, ...)".
    659  // The template for scalar types:
    660  //   "# v{code}: 0x{rawbits} ({reg}:{value})".
    661  // The values in parentheses after the bit representations are floating-point
    662  // interpretations. They are displayed only if the kPrintVRegAsFP bit is set.
    663 
    664  PrintVRegisterRawHelper(code);
    665  if (format & kPrintRegAsFP) {
    666    PrintVRegisterFPHelper(code, lane_size, lane_count);
    667  }
    668 
    669  fprintf(stream_, "\n");
    670 }
    671 
    672 
    673 void Simulator::PrintSystemRegister(SystemRegister id) {
    674  switch (id) {
    675    case NZCV:
    676      fprintf(stream_, "# %sNZCV: %sN:%d Z:%d C:%d V:%d%s\n",
    677              clr_flag_name, clr_flag_value,
    678              nzcv().N(), nzcv().Z(), nzcv().C(), nzcv().V(),
    679              clr_normal);
    680      break;
    681    case FPCR: {
    682      static const char * rmode[] = {
    683        "0b00 (Round to Nearest)",
    684        "0b01 (Round towards Plus Infinity)",
    685        "0b10 (Round towards Minus Infinity)",
    686        "0b11 (Round towards Zero)"
    687      };
    688      VIXL_ASSERT(fpcr().RMode() < (sizeof(rmode) / sizeof(rmode[0])));
    689      fprintf(stream_,
    690              "# %sFPCR: %sAHP:%d DN:%d FZ:%d RMode:%s%s\n",
    691              clr_flag_name, clr_flag_value,
    692              fpcr().AHP(), fpcr().DN(), fpcr().FZ(), rmode[fpcr().RMode()],
    693              clr_normal);
    694      break;
    695    }
    696    default:
    697      VIXL_UNREACHABLE();
    698  }
    699 }
    700 
    701 
    702 void Simulator::PrintRead(uintptr_t address,
    703                          unsigned reg_code,
    704                          PrintRegisterFormat format) {
    705  registers_[reg_code].NotifyRegisterLogged();
    706 
    707  USE(format);
    708 
    709  // The template is "# {reg}: 0x{value} <- {address}".
    710  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister);
    711  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
    712          clr_memory_address, address, clr_normal);
    713 }
    714 
    715 
    716 void Simulator::PrintVRead(uintptr_t address,
    717                           unsigned reg_code,
    718                           PrintRegisterFormat format,
    719                           unsigned lane) {
    720  vregisters_[reg_code].NotifyRegisterLogged();
    721 
    722  // The template is "# v{code}: 0x{rawbits} <- address".
    723  PrintVRegisterRawHelper(reg_code);
    724  if (format & kPrintRegAsFP) {
    725    PrintVRegisterFPHelper(reg_code, GetPrintRegLaneSizeInBytes(format),
    726                           GetPrintRegLaneCount(format), lane);
    727  }
    728  fprintf(stream_, " <- %s0x%016" PRIxPTR "%s\n",
    729          clr_memory_address, address, clr_normal);
    730 }
    731 
    732 
    733 void Simulator::PrintWrite(uintptr_t address,
    734                           unsigned reg_code,
    735                           PrintRegisterFormat format) {
    736  VIXL_ASSERT(GetPrintRegLaneCount(format) == 1);
    737 
    738  // The template is "# v{code}: 0x{value} -> {address}". To keep the trace tidy
    739  // and readable, the value is aligned with the values in the register trace.
    740  PrintRegisterRawHelper(reg_code, Reg31IsZeroRegister,
    741                         GetPrintRegSizeInBytes(format));
    742  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
    743          clr_memory_address, address, clr_normal);
    744 }
    745 
    746 
    747 void Simulator::PrintVWrite(uintptr_t address,
    748                            unsigned reg_code,
    749                            PrintRegisterFormat format,
    750                            unsigned lane) {
    751  // The templates:
    752  //   "# v{code}: 0x{rawbits} -> {address}"
    753  //   "# v{code}: 0x{rawbits} (..., {value}, ...) -> {address}".
    754  //   "# v{code}: 0x{rawbits} ({reg}:{value}) -> {address}"
    755  // Because this trace doesn't represent a change to the source register's
    756  // value, only the relevant part of the value is printed. To keep the trace
    757  // tidy and readable, the raw value is aligned with the other values in the
    758  // register trace.
    759  int lane_count = GetPrintRegLaneCount(format);
    760  int lane_size = GetPrintRegLaneSizeInBytes(format);
    761  int reg_size = GetPrintRegSizeInBytes(format);
    762  PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane);
    763  if (format & kPrintRegAsFP) {
    764    PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane);
    765  }
    766  fprintf(stream_, " -> %s0x%016" PRIxPTR "%s\n",
    767          clr_memory_address, address, clr_normal);
    768 }
    769 
    770 
    771 // Visitors---------------------------------------------------------------------
    772 
    773 void Simulator::VisitUnimplemented(const Instruction* instr) {
    774  printf("Unimplemented instruction at %p: 0x%08" PRIx32 "\n",
    775         reinterpret_cast<const void*>(instr), instr->InstructionBits());
    776  VIXL_UNIMPLEMENTED();
    777 }
    778 
    779 
    780 void Simulator::VisitUnallocated(const Instruction* instr) {
    781  printf("Unallocated instruction at %p: 0x%08" PRIx32 "\n",
    782         reinterpret_cast<const void*>(instr), instr->InstructionBits());
    783  VIXL_UNIMPLEMENTED();
    784 }
    785 
    786 
    787 void Simulator::VisitPCRelAddressing(const Instruction* instr) {
    788  VIXL_ASSERT((instr->Mask(PCRelAddressingMask) == ADR) ||
    789              (instr->Mask(PCRelAddressingMask) == ADRP));
    790 
    791  set_reg(instr->Rd(), instr->ImmPCOffsetTarget());
    792 }
    793 
    794 
    795 void Simulator::VisitUnconditionalBranch(const Instruction* instr) {
    796  switch (instr->Mask(UnconditionalBranchMask)) {
    797    case BL:
    798      set_lr(instr->NextInstruction());
    799      VIXL_FALLTHROUGH();
    800    case B:
    801      set_pc(instr->ImmPCOffsetTarget());
    802      break;
    803    default: VIXL_UNREACHABLE();
    804  }
    805 }
    806 
    807 
    808 void Simulator::VisitConditionalBranch(const Instruction* instr) {
    809  VIXL_ASSERT(instr->Mask(ConditionalBranchMask) == B_cond);
    810  if (ConditionPassed(instr->ConditionBranch())) {
    811    set_pc(instr->ImmPCOffsetTarget());
    812  }
    813 }
    814 
    815 
    816 void Simulator::VisitUnconditionalBranchToRegister(const Instruction* instr) {
    817  const Instruction* target = Instruction::Cast(xreg(instr->Rn()));
    818 
    819  switch (instr->Mask(UnconditionalBranchToRegisterMask)) {
    820    case BLR:
    821      set_lr(instr->NextInstruction());
    822      VIXL_FALLTHROUGH();
    823    case BR:
    824    case RET: set_pc(target); break;
    825    default: VIXL_UNREACHABLE();
    826  }
    827 }
    828 
    829 
    830 void Simulator::VisitTestBranch(const Instruction* instr) {
    831  unsigned bit_pos = (instr->ImmTestBranchBit5() << 5) |
    832                     instr->ImmTestBranchBit40();
    833  bool bit_zero = ((xreg(instr->Rt()) >> bit_pos) & 1) == 0;
    834  bool take_branch = false;
    835  switch (instr->Mask(TestBranchMask)) {
    836    case TBZ: take_branch = bit_zero; break;
    837    case TBNZ: take_branch = !bit_zero; break;
    838    default: VIXL_UNIMPLEMENTED();
    839  }
    840  if (take_branch) {
    841    set_pc(instr->ImmPCOffsetTarget());
    842  }
    843 }
    844 
    845 
    846 void Simulator::VisitCompareBranch(const Instruction* instr) {
    847  unsigned rt = instr->Rt();
    848  bool take_branch = false;
    849  switch (instr->Mask(CompareBranchMask)) {
    850    case CBZ_w: take_branch = (wreg(rt) == 0); break;
    851    case CBZ_x: take_branch = (xreg(rt) == 0); break;
    852    case CBNZ_w: take_branch = (wreg(rt) != 0); break;
    853    case CBNZ_x: take_branch = (xreg(rt) != 0); break;
    854    default: VIXL_UNIMPLEMENTED();
    855  }
    856  if (take_branch) {
    857    set_pc(instr->ImmPCOffsetTarget());
    858  }
    859 }
    860 
    861 
    862 void Simulator::AddSubHelper(const Instruction* instr, int64_t op2) {
    863  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    864  bool set_flags = instr->FlagsUpdate();
    865  int64_t new_val = 0;
    866  Instr operation = instr->Mask(AddSubOpMask);
    867 
    868  switch (operation) {
    869    case ADD:
    870    case ADDS: {
    871      new_val = AddWithCarry(reg_size,
    872                             set_flags,
    873                             reg(reg_size, instr->Rn(), instr->RnMode()),
    874                             op2);
    875      break;
    876    }
    877    case SUB:
    878    case SUBS: {
    879      new_val = AddWithCarry(reg_size,
    880                             set_flags,
    881                             reg(reg_size, instr->Rn(), instr->RnMode()),
    882                             ~op2,
    883                             1);
    884      break;
    885    }
    886    default: VIXL_UNREACHABLE();
    887  }
    888 
    889  set_reg(reg_size, instr->Rd(), new_val, LogRegWrites, instr->RdMode());
    890 }
    891 
    892 
    893 void Simulator::VisitAddSubShifted(const Instruction* instr) {
    894  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    895  int64_t op2 = ShiftOperand(reg_size,
    896                             reg(reg_size, instr->Rm()),
    897                             static_cast<Shift>(instr->ShiftDP()),
    898                             instr->ImmDPShift());
    899  AddSubHelper(instr, op2);
    900 }
    901 
    902 
    903 void Simulator::VisitAddSubImmediate(const Instruction* instr) {
    904  int64_t op2 = instr->ImmAddSub() << ((instr->GetImmAddSubShift() == 1) ? 12 : 0);
    905  AddSubHelper(instr, op2);
    906 }
    907 
    908 
    909 void Simulator::VisitAddSubExtended(const Instruction* instr) {
    910  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    911  int64_t op2 = ExtendValue(reg_size,
    912                            reg(reg_size, instr->Rm()),
    913                            static_cast<Extend>(instr->ExtendMode()),
    914                            instr->ImmExtendShift());
    915  AddSubHelper(instr, op2);
    916 }
    917 
    918 
    919 void Simulator::VisitAddSubWithCarry(const Instruction* instr) {
    920  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    921  int64_t op2 = reg(reg_size, instr->Rm());
    922  int64_t new_val;
    923 
    924  if ((instr->Mask(AddSubOpMask) == SUB) || instr->Mask(AddSubOpMask) == SUBS) {
    925    op2 = ~op2;
    926  }
    927 
    928  new_val = AddWithCarry(reg_size,
    929                         instr->FlagsUpdate(),
    930                         reg(reg_size, instr->Rn()),
    931                         op2,
    932                         C());
    933 
    934  set_reg(reg_size, instr->Rd(), new_val);
    935 }
    936 
    937 
    938 void Simulator::VisitLogicalShifted(const Instruction* instr) {
    939  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    940  Shift shift_type = static_cast<Shift>(instr->ShiftDP());
    941  unsigned shift_amount = instr->ImmDPShift();
    942  int64_t op2 = ShiftOperand(reg_size, reg(reg_size, instr->Rm()), shift_type,
    943                             shift_amount);
    944  if (instr->Mask(NOT) == NOT) {
    945    op2 = ~op2;
    946  }
    947  LogicalHelper(instr, op2);
    948 }
    949 
    950 
    951 void Simulator::VisitLogicalImmediate(const Instruction* instr) {
    952  LogicalHelper(instr, instr->ImmLogical());
    953 }
    954 
    955 
    956 void Simulator::LogicalHelper(const Instruction* instr, int64_t op2) {
    957  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    958  int64_t op1 = reg(reg_size, instr->Rn());
    959  int64_t result = 0;
    960  bool update_flags = false;
    961 
    962  // Switch on the logical operation, stripping out the NOT bit, as it has a
    963  // different meaning for logical immediate instructions.
    964  switch (instr->Mask(LogicalOpMask & ~NOT)) {
    965    case ANDS: update_flags = true; VIXL_FALLTHROUGH();
    966    case AND: result = op1 & op2; break;
    967    case ORR: result = op1 | op2; break;
    968    case EOR: result = op1 ^ op2; break;
    969    default:
    970      VIXL_UNIMPLEMENTED();
    971  }
    972 
    973  if (update_flags) {
    974    nzcv().SetN(CalcNFlag(result, reg_size));
    975    nzcv().SetZ(CalcZFlag(result));
    976    nzcv().SetC(0);
    977    nzcv().SetV(0);
    978    LogSystemRegister(NZCV);
    979  }
    980 
    981  set_reg(reg_size, instr->Rd(), result, LogRegWrites, instr->RdMode());
    982 }
    983 
    984 
    985 void Simulator::VisitConditionalCompareRegister(const Instruction* instr) {
    986  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    987  ConditionalCompareHelper(instr, reg(reg_size, instr->Rm()));
    988 }
    989 
    990 
    991 void Simulator::VisitConditionalCompareImmediate(const Instruction* instr) {
    992  ConditionalCompareHelper(instr, instr->ImmCondCmp());
    993 }
    994 
    995 
    996 void Simulator::ConditionalCompareHelper(const Instruction* instr,
    997                                         int64_t op2) {
    998  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
    999  int64_t op1 = reg(reg_size, instr->Rn());
   1000 
   1001  if (ConditionPassed(instr->Condition())) {
   1002    // If the condition passes, set the status flags to the result of comparing
   1003    // the operands.
   1004    if (instr->Mask(ConditionalCompareMask) == CCMP) {
   1005      AddWithCarry(reg_size, true, op1, ~op2, 1);
   1006    } else {
   1007      VIXL_ASSERT(instr->Mask(ConditionalCompareMask) == CCMN);
   1008      AddWithCarry(reg_size, true, op1, op2, 0);
   1009    }
   1010  } else {
   1011    // If the condition fails, set the status flags to the nzcv immediate.
   1012    nzcv().SetFlags(instr->Nzcv());
   1013    LogSystemRegister(NZCV);
   1014  }
   1015 }
   1016 
   1017 
   1018 void Simulator::VisitLoadStoreUnsignedOffset(const Instruction* instr) {
   1019  int offset = instr->ImmLSUnsigned() << instr->SizeLS();
   1020  LoadStoreHelper(instr, offset, Offset);
   1021 }
   1022 
   1023 
   1024 void Simulator::VisitLoadStoreUnscaledOffset(const Instruction* instr) {
   1025  LoadStoreHelper(instr, instr->ImmLS(), Offset);
   1026 }
   1027 
   1028 
   1029 void Simulator::VisitLoadStorePreIndex(const Instruction* instr) {
   1030  LoadStoreHelper(instr, instr->ImmLS(), PreIndex);
   1031 }
   1032 
   1033 
   1034 void Simulator::VisitLoadStorePostIndex(const Instruction* instr) {
   1035  LoadStoreHelper(instr, instr->ImmLS(), PostIndex);
   1036 }
   1037 
   1038 
   1039 void Simulator::VisitLoadStoreRegisterOffset(const Instruction* instr) {
   1040  Extend ext = static_cast<Extend>(instr->ExtendMode());
   1041  VIXL_ASSERT((ext == UXTW) || (ext == UXTX) || (ext == SXTW) || (ext == SXTX));
   1042  unsigned shift_amount = instr->ImmShiftLS() * instr->SizeLS();
   1043 
   1044  int64_t offset = ExtendValue(kXRegSize, xreg(instr->Rm()), ext,
   1045                               shift_amount);
   1046  LoadStoreHelper(instr, offset, Offset);
   1047 }
   1048 
   1049 template<typename T>
   1050 static T Faulted() {
   1051    return ~0;
   1052 }
   1053 
   1054 template<>
   1055 Simulator::qreg_t Faulted() {
   1056    static_assert(kQRegSizeInBytes == 16, "Known constraint");
   1057    static Simulator::qreg_t dummy = { {
   1058 255, 255, 255, 255, 255, 255, 255, 255,
   1059 255, 255, 255, 255, 255, 255, 255, 255
   1060    } };
   1061    return dummy;
   1062 }
   1063 
   1064 template<typename T> T
   1065 Simulator::Read(uintptr_t address)
   1066 {
   1067    address = Memory::AddressUntag(address);
   1068    if (handle_wasm_seg_fault(address, sizeof(T)))
   1069 return Faulted<T>();
   1070    return Memory::Read<T>(address);
   1071 }
   1072 
   1073 template <typename T> void
   1074 Simulator::Write(uintptr_t address, T value)
   1075 {
   1076    address = Memory::AddressUntag(address);
   1077    if (handle_wasm_seg_fault(address, sizeof(T)))
   1078 return;
   1079    Memory::Write<T>(address, value);
   1080 }
   1081 
   1082 void Simulator::LoadStoreHelper(const Instruction* instr,
   1083                                int64_t offset,
   1084                                AddrMode addrmode) {
   1085  unsigned srcdst = instr->Rt();
   1086  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
   1087 
   1088  LoadStoreOp op = static_cast<LoadStoreOp>(instr->Mask(LoadStoreMask));
   1089  switch (op) {
   1090    case LDRB_w:
   1091      set_wreg(srcdst, Read<uint8_t>(address), NoRegLog); break;
   1092    case LDRH_w:
   1093      set_wreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
   1094    case LDR_w:
   1095      set_wreg(srcdst, Read<uint32_t>(address), NoRegLog); break;
   1096    case LDR_x:
   1097      set_xreg(srcdst, Read<uint64_t>(address), NoRegLog); break;
   1098    case LDRSB_w:
   1099      set_wreg(srcdst, Read<int8_t>(address), NoRegLog); break;
   1100    case LDRSH_w:
   1101      set_wreg(srcdst, Read<int16_t>(address), NoRegLog); break;
   1102    case LDRSB_x:
   1103      set_xreg(srcdst, Read<int8_t>(address), NoRegLog); break;
   1104    case LDRSH_x:
   1105      set_xreg(srcdst, Read<int16_t>(address), NoRegLog); break;
   1106    case LDRSW_x:
   1107      set_xreg(srcdst, Read<int32_t>(address), NoRegLog); break;
   1108    case LDR_b:
   1109      set_breg(srcdst, Read<uint8_t>(address), NoRegLog); break;
   1110    case LDR_h:
   1111      set_hreg(srcdst, Read<uint16_t>(address), NoRegLog); break;
   1112    case LDR_s:
   1113      set_sreg(srcdst, Read<float>(address), NoRegLog); break;
   1114    case LDR_d:
   1115      set_dreg(srcdst, Read<double>(address), NoRegLog); break;
   1116    case LDR_q:
   1117      set_qreg(srcdst, Read<qreg_t>(address), NoRegLog); break;
   1118 
   1119    case STRB_w:  Write<uint8_t>(address, wreg(srcdst)); break;
   1120    case STRH_w:  Write<uint16_t>(address, wreg(srcdst)); break;
   1121    case STR_w:   Write<uint32_t>(address, wreg(srcdst)); break;
   1122    case STR_x:   Write<uint64_t>(address, xreg(srcdst)); break;
   1123    case STR_b:   Write<uint8_t>(address, breg(srcdst)); break;
   1124    case STR_h:   Write<uint16_t>(address, hreg(srcdst)); break;
   1125    case STR_s:   Write<float>(address, sreg(srcdst)); break;
   1126    case STR_d:   Write<double>(address, dreg(srcdst)); break;
   1127    case STR_q:   Write<qreg_t>(address, qreg(srcdst)); break;
   1128 
   1129    // Ignore prfm hint instructions.
   1130    case PRFM: break;
   1131 
   1132    default: VIXL_UNIMPLEMENTED();
   1133  }
   1134 
   1135  unsigned access_size = 1 << instr->SizeLS();
   1136  if (instr->IsLoad()) {
   1137    if ((op == LDR_s) || (op == LDR_d)) {
   1138      LogVRead(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
   1139    } else if ((op == LDR_b) || (op == LDR_h) || (op == LDR_q)) {
   1140      LogVRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1141    } else {
   1142      LogRead(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1143    }
   1144  } else {
   1145    if ((op == STR_s) || (op == STR_d)) {
   1146      LogVWrite(address, srcdst, GetPrintRegisterFormatForSizeFP(access_size));
   1147    } else if ((op == STR_b) || (op == STR_h) || (op == STR_q)) {
   1148      LogVWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1149    } else {
   1150      LogWrite(address, srcdst, GetPrintRegisterFormatForSize(access_size));
   1151    }
   1152  }
   1153 
   1154  local_monitor_.MaybeClear();
   1155 }
   1156 
   1157 
   1158 void Simulator::VisitLoadStorePairOffset(const Instruction* instr) {
   1159  LoadStorePairHelper(instr, Offset);
   1160 }
   1161 
   1162 
   1163 void Simulator::VisitLoadStorePairPreIndex(const Instruction* instr) {
   1164  LoadStorePairHelper(instr, PreIndex);
   1165 }
   1166 
   1167 
   1168 void Simulator::VisitLoadStorePairPostIndex(const Instruction* instr) {
   1169  LoadStorePairHelper(instr, PostIndex);
   1170 }
   1171 
   1172 
   1173 void Simulator::VisitLoadStorePairNonTemporal(const Instruction* instr) {
   1174  LoadStorePairHelper(instr, Offset);
   1175 }
   1176 
   1177 
   1178 void Simulator::LoadStorePairHelper(const Instruction* instr,
   1179                                    AddrMode addrmode) {
   1180  unsigned rt = instr->Rt();
   1181  unsigned rt2 = instr->Rt2();
   1182  int element_size = 1 << instr->SizeLSPair();
   1183  int64_t offset = instr->ImmLSPair() * element_size;
   1184  uintptr_t address = AddressModeHelper(instr->Rn(), offset, addrmode);
   1185  uintptr_t address2 = address + element_size;
   1186 
   1187  LoadStorePairOp op =
   1188    static_cast<LoadStorePairOp>(instr->Mask(LoadStorePairMask));
   1189 
   1190  // 'rt' and 'rt2' can only be aliased for stores.
   1191  VIXL_ASSERT(((op & LoadStorePairLBit) == 0) || (rt != rt2));
   1192 
   1193  switch (op) {
   1194    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS). We
   1195    // will print a more detailed log.
   1196    case LDP_w: {
   1197      set_wreg(rt, Read<uint32_t>(address), NoRegLog);
   1198      set_wreg(rt2, Read<uint32_t>(address2), NoRegLog);
   1199      break;
   1200    }
   1201    case LDP_s: {
   1202      set_sreg(rt, Read<float>(address), NoRegLog);
   1203      set_sreg(rt2, Read<float>(address2), NoRegLog);
   1204      break;
   1205    }
   1206    case LDP_x: {
   1207      set_xreg(rt, Read<uint64_t>(address), NoRegLog);
   1208      set_xreg(rt2, Read<uint64_t>(address2), NoRegLog);
   1209      break;
   1210    }
   1211    case LDP_d: {
   1212      set_dreg(rt, Read<double>(address), NoRegLog);
   1213      set_dreg(rt2, Read<double>(address2), NoRegLog);
   1214      break;
   1215    }
   1216    case LDP_q: {
   1217      set_qreg(rt, Read<qreg_t>(address), NoRegLog);
   1218      set_qreg(rt2, Read<qreg_t>(address2), NoRegLog);
   1219      break;
   1220    }
   1221    case LDPSW_x: {
   1222      set_xreg(rt, Read<int32_t>(address), NoRegLog);
   1223      set_xreg(rt2, Read<int32_t>(address2), NoRegLog);
   1224      break;
   1225    }
   1226    case STP_w: {
   1227      Write<uint32_t>(address, wreg(rt));
   1228      Write<uint32_t>(address2, wreg(rt2));
   1229      break;
   1230    }
   1231    case STP_s: {
   1232      Write<float>(address, sreg(rt));
   1233      Write<float>(address2, sreg(rt2));
   1234      break;
   1235    }
   1236    case STP_x: {
   1237      Write<uint64_t>(address, xreg(rt));
   1238      Write<uint64_t>(address2, xreg(rt2));
   1239      break;
   1240    }
   1241    case STP_d: {
   1242      Write<double>(address, dreg(rt));
   1243      Write<double>(address2, dreg(rt2));
   1244      break;
   1245    }
   1246    case STP_q: {
   1247      Write<qreg_t>(address, qreg(rt));
   1248      Write<qreg_t>(address2, qreg(rt2));
   1249      break;
   1250    }
   1251    default: VIXL_UNREACHABLE();
   1252  }
   1253 
   1254  // Print a detailed trace (including the memory address) instead of the basic
   1255  // register:value trace generated by set_*reg().
   1256  if (instr->IsLoad()) {
   1257    if ((op == LDP_s) || (op == LDP_d)) {
   1258      LogVRead(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
   1259      LogVRead(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
   1260    } else if (op == LDP_q) {
   1261      LogVRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1262      LogVRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1263    } else {
   1264      LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1265      LogRead(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1266    }
   1267  } else {
   1268    if ((op == STP_s) || (op == STP_d)) {
   1269      LogVWrite(address, rt, GetPrintRegisterFormatForSizeFP(element_size));
   1270      LogVWrite(address2, rt2, GetPrintRegisterFormatForSizeFP(element_size));
   1271    } else if (op == STP_q) {
   1272      LogVWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1273      LogVWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1274    } else {
   1275      LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1276      LogWrite(address2, rt2, GetPrintRegisterFormatForSize(element_size));
   1277    }
   1278  }
   1279 
   1280  local_monitor_.MaybeClear();
   1281 }
   1282 
   1283 
   1284 void Simulator::PrintExclusiveAccessWarning() {
   1285  if (print_exclusive_access_warning_) {
   1286    fprintf(
   1287        stderr,
   1288        "%sWARNING:%s VIXL simulator support for load-/store-/clear-exclusive "
   1289        "instructions is limited. Refer to the README for details.%s\n",
   1290        clr_warning, clr_warning_message, clr_normal);
   1291    print_exclusive_access_warning_ = false;
   1292  }
   1293 }
   1294 
   1295 template <typename T>
   1296 void Simulator::CompareAndSwapHelper(const Instruction* instr) {
   1297  unsigned rs = instr->Rs();
   1298  unsigned rt = instr->Rt();
   1299  unsigned rn = instr->Rn();
   1300 
   1301  unsigned element_size = sizeof(T);
   1302  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1303 
   1304  // Verify that the address is available to the host.
   1305  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1306 
   1307  address = Memory::AddressUntag(address);
   1308  if (handle_wasm_seg_fault(address, element_size))
   1309    return;
   1310 
   1311  bool is_acquire = instr->Bit(22) == 1;
   1312  bool is_release = instr->Bit(15) == 1;
   1313 
   1314  T comparevalue = reg<T>(rs);
   1315  T newvalue = reg<T>(rt);
   1316 
   1317  // The architecture permits that the data read clears any exclusive monitors
   1318  // associated with that location, even if the compare subsequently fails.
   1319  local_monitor_.Clear();
   1320 
   1321  T data = Memory::Read<T>(address);
   1322  if (is_acquire) {
   1323    // Approximate load-acquire by issuing a full barrier after the load.
   1324    __sync_synchronize();
   1325  }
   1326 
   1327  if (data == comparevalue) {
   1328    if (is_release) {
   1329      // Approximate store-release by issuing a full barrier before the store.
   1330      __sync_synchronize();
   1331    }
   1332    Memory::Write<T>(address, newvalue);
   1333    LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1334  }
   1335  set_reg<T>(rs, data);
   1336  LogRead(address, rs, GetPrintRegisterFormatForSize(element_size));
   1337 }
   1338 
   1339 template <typename T>
   1340 void Simulator::CompareAndSwapPairHelper(const Instruction* instr) {
   1341  VIXL_ASSERT((sizeof(T) == 4) || (sizeof(T) == 8));
   1342  unsigned rs = instr->Rs();
   1343  unsigned rt = instr->Rt();
   1344  unsigned rn = instr->Rn();
   1345 
   1346  VIXL_ASSERT((rs % 2 == 0) && (rs % 2 == 0));
   1347 
   1348  unsigned element_size = sizeof(T);
   1349  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1350 
   1351  // Verify that the address is available to the host.
   1352  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1353 
   1354  address = Memory::AddressUntag(address);
   1355  if (handle_wasm_seg_fault(address, element_size))
   1356    return;
   1357 
   1358  uint64_t address2 = address + element_size;
   1359 
   1360  bool is_acquire = instr->Bit(22) == 1;
   1361  bool is_release = instr->Bit(15) == 1;
   1362 
   1363  T comparevalue_high = reg<T>(rs + 1);
   1364  T comparevalue_low = reg<T>(rs);
   1365  T newvalue_high = reg<T>(rt + 1);
   1366  T newvalue_low = reg<T>(rt);
   1367 
   1368  // The architecture permits that the data read clears any exclusive monitors
   1369  // associated with that location, even if the compare subsequently fails.
   1370  local_monitor_.Clear();
   1371 
   1372  T data_high = Memory::Read<T>(address);
   1373  T data_low = Memory::Read<T>(address2);
   1374 
   1375  if (is_acquire) {
   1376    // Approximate load-acquire by issuing a full barrier after the load.
   1377    __sync_synchronize();
   1378  }
   1379 
   1380  bool same =
   1381      (data_high == comparevalue_high) && (data_low == comparevalue_low);
   1382  if (same) {
   1383    if (is_release) {
   1384      // Approximate store-release by issuing a full barrier before the store.
   1385      __sync_synchronize();
   1386    }
   1387 
   1388    Memory::Write<T>(address, newvalue_high);
   1389    Memory::Write<T>(address2, newvalue_low);
   1390  }
   1391 
   1392  set_reg<T>(rs + 1, data_high);
   1393  set_reg<T>(rs, data_low);
   1394 
   1395  LogRead(address, rs + 1, GetPrintRegisterFormatForSize(element_size));
   1396  LogRead(address2, rs, GetPrintRegisterFormatForSize(element_size));
   1397 
   1398  if (same) {
   1399    LogWrite(address, rt + 1, GetPrintRegisterFormatForSize(element_size));
   1400    LogWrite(address2, rt, GetPrintRegisterFormatForSize(element_size));
   1401  }
   1402 }
   1403 
   1404 void Simulator::VisitLoadStoreExclusive(const Instruction* instr) {
   1405  LoadStoreExclusive op =
   1406      static_cast<LoadStoreExclusive>(instr->Mask(LoadStoreExclusiveMask));
   1407 
   1408  switch (op) {
   1409    case CAS_w:
   1410    case CASA_w:
   1411    case CASL_w:
   1412    case CASAL_w:
   1413      CompareAndSwapHelper<uint32_t>(instr);
   1414      break;
   1415    case CAS_x:
   1416    case CASA_x:
   1417    case CASL_x:
   1418    case CASAL_x:
   1419      CompareAndSwapHelper<uint64_t>(instr);
   1420      break;
   1421    case CASB:
   1422    case CASAB:
   1423    case CASLB:
   1424    case CASALB:
   1425      CompareAndSwapHelper<uint8_t>(instr);
   1426      break;
   1427    case CASH:
   1428    case CASAH:
   1429    case CASLH:
   1430    case CASALH:
   1431      CompareAndSwapHelper<uint16_t>(instr);
   1432      break;
   1433    case CASP_w:
   1434    case CASPA_w:
   1435    case CASPL_w:
   1436    case CASPAL_w:
   1437      CompareAndSwapPairHelper<uint32_t>(instr);
   1438      break;
   1439    case CASP_x:
   1440    case CASPA_x:
   1441    case CASPL_x:
   1442    case CASPAL_x:
   1443      CompareAndSwapPairHelper<uint64_t>(instr);
   1444      break;
   1445    default:
   1446      PrintExclusiveAccessWarning();
   1447 
   1448      unsigned rs = instr->Rs();
   1449      unsigned rt = instr->Rt();
   1450      unsigned rt2 = instr->Rt2();
   1451      unsigned rn = instr->Rn();
   1452 
   1453      bool is_exclusive = !instr->LdStXNotExclusive();
   1454      bool is_acquire_release = !is_exclusive || instr->LdStXAcquireRelease();
   1455      bool is_load = instr->LdStXLoad();
   1456      bool is_pair = instr->LdStXPair();
   1457 
   1458      unsigned element_size = 1 << instr->LdStXSizeLog2();
   1459      unsigned access_size = is_pair ? element_size * 2 : element_size;
   1460      uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1461 
   1462      // Verify that the address is available to the host.
   1463      VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1464 
   1465      // Check the alignment of `address`.
   1466      if (AlignDown(address, access_size) != address) {
   1467        VIXL_ALIGNMENT_EXCEPTION();
   1468      }
   1469 
   1470      // The sp must be aligned to 16 bytes when it is accessed.
   1471      if ((rn == 31) && (AlignDown(address, 16) != address)) {
   1472        VIXL_ALIGNMENT_EXCEPTION();
   1473      }
   1474 
   1475      if (is_load) {
   1476        if (is_exclusive) {
   1477          local_monitor_.MarkExclusive(address, access_size);
   1478        } else {
   1479          // Any non-exclusive load can clear the local monitor as a side
   1480          // effect. We don't need to do this, but it is useful to stress the
   1481          // simulated code.
   1482          local_monitor_.Clear();
   1483        }
   1484 
   1485        // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_FP_REGS).
   1486        // We will print a more detailed log.
   1487        switch (op) {
   1488          case LDXRB_w:
   1489          case LDAXRB_w:
   1490          case LDARB_w:
   1491            set_wreg(rt, Read<uint8_t>(address), NoRegLog);
   1492            break;
   1493          case LDXRH_w:
   1494          case LDAXRH_w:
   1495          case LDARH_w:
   1496            set_wreg(rt, Read<uint16_t>(address), NoRegLog);
   1497            break;
   1498          case LDXR_w:
   1499          case LDAXR_w:
   1500          case LDAR_w:
   1501            set_wreg(rt, Read<uint32_t>(address), NoRegLog);
   1502            break;
   1503          case LDXR_x:
   1504          case LDAXR_x:
   1505          case LDAR_x:
   1506            set_xreg(rt, Read<uint64_t>(address), NoRegLog);
   1507            break;
   1508          case LDXP_w:
   1509          case LDAXP_w:
   1510            set_wreg(rt, Read<uint32_t>(address), NoRegLog);
   1511            set_wreg(rt2, Read<uint32_t>(address + element_size), NoRegLog);
   1512            break;
   1513          case LDXP_x:
   1514          case LDAXP_x:
   1515            set_xreg(rt, Read<uint64_t>(address), NoRegLog);
   1516            set_xreg(rt2, Read<uint64_t>(address + element_size), NoRegLog);
   1517            break;
   1518          default:
   1519            VIXL_UNREACHABLE();
   1520        }
   1521 
   1522        if (is_acquire_release) {
   1523          // Approximate load-acquire by issuing a full barrier after the load.
   1524          js::jit::AtomicOperations::fenceSeqCst();
   1525        }
   1526 
   1527        LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1528        if (is_pair) {
   1529          LogRead(address + element_size, rt2,
   1530                  GetPrintRegisterFormatForSize(element_size));
   1531        }
   1532      } else {
   1533        if (is_acquire_release) {
   1534          // Approximate store-release by issuing a full barrier before the
   1535          // store.
   1536          js::jit::AtomicOperations::fenceSeqCst();
   1537        }
   1538 
   1539        bool do_store = true;
   1540        if (is_exclusive) {
   1541          do_store = local_monitor_.IsExclusive(address, access_size) &&
   1542                     global_monitor_.IsExclusive(address, access_size);
   1543          set_wreg(rs, do_store ? 0 : 1);
   1544 
   1545          //  - All exclusive stores explicitly clear the local monitor.
   1546          local_monitor_.Clear();
   1547        } else {
   1548          //  - Any other store can clear the local monitor as a side effect.
   1549          local_monitor_.MaybeClear();
   1550        }
   1551 
   1552        if (do_store) {
   1553          switch (op) {
   1554            case STXRB_w:
   1555            case STLXRB_w:
   1556            case STLRB_w:
   1557              Write<uint8_t>(address, wreg(rt));
   1558              break;
   1559            case STXRH_w:
   1560            case STLXRH_w:
   1561            case STLRH_w:
   1562              Write<uint16_t>(address, wreg(rt));
   1563              break;
   1564            case STXR_w:
   1565            case STLXR_w:
   1566            case STLR_w:
   1567              Write<uint32_t>(address, wreg(rt));
   1568              break;
   1569            case STXR_x:
   1570            case STLXR_x:
   1571            case STLR_x:
   1572              Write<uint64_t>(address, xreg(rt));
   1573              break;
   1574            case STXP_w:
   1575            case STLXP_w:
   1576              Write<uint32_t>(address, wreg(rt));
   1577              Write<uint32_t>(address + element_size, wreg(rt2));
   1578              break;
   1579            case STXP_x:
   1580            case STLXP_x:
   1581              Write<uint64_t>(address, xreg(rt));
   1582              Write<uint64_t>(address + element_size, xreg(rt2));
   1583              break;
   1584            default:
   1585              VIXL_UNREACHABLE();
   1586          }
   1587 
   1588          LogWrite(address, rt, GetPrintRegisterFormatForSize(element_size));
   1589          if (is_pair) {
   1590            LogWrite(address + element_size, rt2,
   1591                     GetPrintRegisterFormatForSize(element_size));
   1592          }
   1593        }
   1594      }
   1595  }
   1596 }
   1597 
   1598 template <typename T>
   1599 void Simulator::AtomicMemorySimpleHelper(const Instruction* instr) {
   1600  unsigned rs = instr->Rs();
   1601  unsigned rt = instr->Rt();
   1602  unsigned rn = instr->Rn();
   1603 
   1604  bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
   1605  bool is_release = instr->Bit(22) == 1;
   1606 
   1607  unsigned element_size = sizeof(T);
   1608  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1609 
   1610  // Verify that the address is available to the host.
   1611  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1612 
   1613  address = Memory::AddressUntag(address);
   1614  if (handle_wasm_seg_fault(address, sizeof(T)))
   1615    return;
   1616 
   1617  T value = reg<T>(rs);
   1618 
   1619  T data = Memory::Read<T>(address);
   1620 
   1621  if (is_acquire) {
   1622    // Approximate load-acquire by issuing a full barrier after the load.
   1623    __sync_synchronize();
   1624  }
   1625 
   1626  T result = 0;
   1627  switch (instr->Mask(AtomicMemorySimpleOpMask)) {
   1628    case LDADDOp:
   1629      result = data + value;
   1630      break;
   1631    case LDCLROp:
   1632      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   1633      result = data & ~value;
   1634      break;
   1635    case LDEOROp:
   1636      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   1637      result = data ^ value;
   1638      break;
   1639    case LDSETOp:
   1640      VIXL_ASSERT(!std::numeric_limits<T>::is_signed);
   1641      result = data | value;
   1642      break;
   1643 
   1644    // Signed/Unsigned difference is done via the templated type T.
   1645    case LDSMAXOp:
   1646    case LDUMAXOp:
   1647      result = (data > value) ? data : value;
   1648      break;
   1649    case LDSMINOp:
   1650    case LDUMINOp:
   1651      result = (data > value) ? value : data;
   1652      break;
   1653  }
   1654 
   1655  if (is_release) {
   1656    // Approximate store-release by issuing a full barrier before the store.
   1657    __sync_synchronize();
   1658  }
   1659 
   1660  Memory::Write<T>(address, result);
   1661  set_reg<T>(rt, data, NoRegLog);
   1662 
   1663  LogRead(address, rt, GetPrintRegisterFormatForSize(element_size));
   1664  LogWrite(address, rs, GetPrintRegisterFormatForSize(element_size));
   1665 }
   1666 
   1667 template <typename T>
   1668 void Simulator::AtomicMemorySwapHelper(const Instruction* instr) {
   1669  unsigned rs = instr->Rs();
   1670  unsigned rt = instr->Rt();
   1671  unsigned rn = instr->Rn();
   1672 
   1673  bool is_acquire = (instr->Bit(23) == 1) && (rt != kZeroRegCode);
   1674  bool is_release = instr->Bit(22) == 1;
   1675 
   1676  unsigned element_size = sizeof(T);
   1677  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1678 
   1679  // Verify that the address is available to the host.
   1680  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1681 
   1682  address = Memory::AddressUntag(address);
   1683  if (handle_wasm_seg_fault(address, sizeof(T)))
   1684    return;
   1685 
   1686  T data = Memory::Read<T>(address);
   1687  if (is_acquire) {
   1688    // Approximate load-acquire by issuing a full barrier after the load.
   1689    __sync_synchronize();
   1690  }
   1691 
   1692  if (is_release) {
   1693    // Approximate store-release by issuing a full barrier before the store.
   1694    __sync_synchronize();
   1695  }
   1696  Memory::Write<T>(address, reg<T>(rs));
   1697 
   1698  set_reg<T>(rt, data);
   1699 
   1700  LogRead(address, rt, GetPrintRegisterFormat(element_size));
   1701  LogWrite(address, rs, GetPrintRegisterFormat(element_size));
   1702 }
   1703 
   1704 template <typename T>
   1705 void Simulator::LoadAcquireRCpcHelper(const Instruction* instr) {
   1706  unsigned rt = instr->Rt();
   1707  unsigned rn = instr->Rn();
   1708 
   1709  unsigned element_size = sizeof(T);
   1710  uint64_t address = reg<uint64_t>(rn, Reg31IsStackPointer);
   1711 
   1712  // Verify that the address is available to the host.
   1713  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1714 
   1715  address = Memory::AddressUntag(address);
   1716  if (handle_wasm_seg_fault(address, sizeof(T)))
   1717    return;
   1718 
   1719  set_reg<T>(rt, Memory::Read<T>(address));
   1720 
   1721  // Approximate load-acquire by issuing a full barrier after the load.
   1722  __sync_synchronize();
   1723 
   1724  LogRead(address, rt, GetPrintRegisterFormat(element_size));
   1725 }
   1726 
   1727 #define ATOMIC_MEMORY_SIMPLE_UINT_LIST(V) \
   1728  V(LDADD)                                \
   1729  V(LDCLR)                                \
   1730  V(LDEOR)                                \
   1731  V(LDSET)                                \
   1732  V(LDUMAX)                               \
   1733  V(LDUMIN)
   1734 
   1735 #define ATOMIC_MEMORY_SIMPLE_INT_LIST(V) \
   1736  V(LDSMAX)                              \
   1737  V(LDSMIN)
   1738 
   1739 void Simulator::VisitAtomicMemory(const Instruction* instr) {
   1740  switch (instr->Mask(AtomicMemoryMask)) {
   1741 // clang-format off
   1742 #define SIM_FUNC_B(A) \
   1743    case A##B:        \
   1744    case A##AB:       \
   1745    case A##LB:       \
   1746    case A##ALB:
   1747 #define SIM_FUNC_H(A) \
   1748    case A##H:        \
   1749    case A##AH:       \
   1750    case A##LH:       \
   1751    case A##ALH:
   1752 #define SIM_FUNC_w(A) \
   1753    case A##_w:       \
   1754    case A##A_w:      \
   1755    case A##L_w:      \
   1756    case A##AL_w:
   1757 #define SIM_FUNC_x(A) \
   1758    case A##_x:       \
   1759    case A##A_x:      \
   1760    case A##L_x:      \
   1761    case A##AL_x:
   1762 
   1763    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_B)
   1764      AtomicMemorySimpleHelper<uint8_t>(instr);
   1765      break;
   1766    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_B)
   1767      AtomicMemorySimpleHelper<int8_t>(instr);
   1768      break;
   1769    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_H)
   1770      AtomicMemorySimpleHelper<uint16_t>(instr);
   1771      break;
   1772    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_H)
   1773      AtomicMemorySimpleHelper<int16_t>(instr);
   1774      break;
   1775    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_w)
   1776      AtomicMemorySimpleHelper<uint32_t>(instr);
   1777      break;
   1778    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_w)
   1779      AtomicMemorySimpleHelper<int32_t>(instr);
   1780      break;
   1781    ATOMIC_MEMORY_SIMPLE_UINT_LIST(SIM_FUNC_x)
   1782      AtomicMemorySimpleHelper<uint64_t>(instr);
   1783      break;
   1784    ATOMIC_MEMORY_SIMPLE_INT_LIST(SIM_FUNC_x)
   1785      AtomicMemorySimpleHelper<int64_t>(instr);
   1786      break;
   1787      // clang-format on
   1788 
   1789    case SWPB:
   1790    case SWPAB:
   1791    case SWPLB:
   1792    case SWPALB:
   1793      AtomicMemorySwapHelper<uint8_t>(instr);
   1794      break;
   1795    case SWPH:
   1796    case SWPAH:
   1797    case SWPLH:
   1798    case SWPALH:
   1799      AtomicMemorySwapHelper<uint16_t>(instr);
   1800      break;
   1801    case SWP_w:
   1802    case SWPA_w:
   1803    case SWPL_w:
   1804    case SWPAL_w:
   1805      AtomicMemorySwapHelper<uint32_t>(instr);
   1806      break;
   1807    case SWP_x:
   1808    case SWPA_x:
   1809    case SWPL_x:
   1810    case SWPAL_x:
   1811      AtomicMemorySwapHelper<uint64_t>(instr);
   1812      break;
   1813    case LDAPRB:
   1814      LoadAcquireRCpcHelper<uint8_t>(instr);
   1815      break;
   1816    case LDAPRH:
   1817      LoadAcquireRCpcHelper<uint16_t>(instr);
   1818      break;
   1819    case LDAPR_w:
   1820      LoadAcquireRCpcHelper<uint32_t>(instr);
   1821      break;
   1822    case LDAPR_x:
   1823      LoadAcquireRCpcHelper<uint64_t>(instr);
   1824      break;
   1825  }
   1826 }
   1827 
   1828 void Simulator::VisitLoadLiteral(const Instruction* instr) {
   1829  unsigned rt = instr->Rt();
   1830  uint64_t address = instr->LiteralAddress<uint64_t>();
   1831 
   1832  // Verify that the calculated address is available to the host.
   1833  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1834 
   1835  switch (instr->Mask(LoadLiteralMask)) {
   1836    // Use NoRegLog to suppress the register trace (LOG_REGS, LOG_VREGS), then
   1837    // print a more detailed log.
   1838    case LDR_w_lit:
   1839      set_wreg(rt, Read<uint32_t>(address), NoRegLog);
   1840      LogRead(address, rt, kPrintWReg);
   1841      break;
   1842    case LDR_x_lit:
   1843      set_xreg(rt, Read<uint64_t>(address), NoRegLog);
   1844      LogRead(address, rt, kPrintXReg);
   1845      break;
   1846    case LDR_s_lit:
   1847      set_sreg(rt, Read<float>(address), NoRegLog);
   1848      LogVRead(address, rt, kPrintSReg);
   1849      break;
   1850    case LDR_d_lit:
   1851      set_dreg(rt, Read<double>(address), NoRegLog);
   1852      LogVRead(address, rt, kPrintDReg);
   1853      break;
   1854    case LDR_q_lit:
   1855      set_qreg(rt, Read<qreg_t>(address), NoRegLog);
   1856      LogVRead(address, rt, kPrintReg1Q);
   1857      break;
   1858    case LDRSW_x_lit:
   1859      set_xreg(rt, Read<int32_t>(address), NoRegLog);
   1860      LogRead(address, rt, kPrintWReg);
   1861      break;
   1862 
   1863    // Ignore prfm hint instructions.
   1864    case PRFM_lit: break;
   1865 
   1866    default: VIXL_UNREACHABLE();
   1867  }
   1868 
   1869  local_monitor_.MaybeClear();
   1870 }
   1871 
   1872 
   1873 uintptr_t Simulator::AddressModeHelper(unsigned addr_reg,
   1874                                       int64_t offset,
   1875                                       AddrMode addrmode) {
   1876  uint64_t address = xreg(addr_reg, Reg31IsStackPointer);
   1877 
   1878  if ((addr_reg == 31) && ((address % 16) != 0)) {
   1879    // When the base register is SP the stack pointer is required to be
   1880    // quadword aligned prior to the address calculation and write-backs.
   1881    // Misalignment will cause a stack alignment fault.
   1882    VIXL_ALIGNMENT_EXCEPTION();
   1883  }
   1884 
   1885  if ((addrmode == PreIndex) || (addrmode == PostIndex)) {
   1886    VIXL_ASSERT(offset != 0);
   1887    // Only preindex should log the register update here. For Postindex, the
   1888    // update will be printed automatically by LogWrittenRegisters _after_ the
   1889    // memory access itself is logged.
   1890    RegLogMode log_mode = (addrmode == PreIndex) ? LogRegWrites : NoRegLog;
   1891    set_xreg(addr_reg, address + offset, log_mode, Reg31IsStackPointer);
   1892  }
   1893 
   1894  if ((addrmode == Offset) || (addrmode == PreIndex)) {
   1895    address += offset;
   1896  }
   1897 
   1898  // Verify that the calculated address is available to the host.
   1899  VIXL_ASSERT(address == static_cast<uintptr_t>(address));
   1900 
   1901  return static_cast<uintptr_t>(address);
   1902 }
   1903 
   1904 
   1905 void Simulator::VisitMoveWideImmediate(const Instruction* instr) {
   1906  MoveWideImmediateOp mov_op =
   1907    static_cast<MoveWideImmediateOp>(instr->Mask(MoveWideImmediateMask));
   1908  int64_t new_xn_val = 0;
   1909 
   1910  bool is_64_bits = instr->SixtyFourBits() == 1;
   1911  // Shift is limited for W operations.
   1912  VIXL_ASSERT(is_64_bits || (instr->ShiftMoveWide() < 2));
   1913 
   1914  // Get the shifted immediate.
   1915  int64_t shift = instr->ShiftMoveWide() * 16;
   1916  int64_t shifted_imm16 = static_cast<int64_t>(instr->ImmMoveWide()) << shift;
   1917 
   1918  // Compute the new value.
   1919  switch (mov_op) {
   1920    case MOVN_w:
   1921    case MOVN_x: {
   1922        new_xn_val = ~shifted_imm16;
   1923        if (!is_64_bits) new_xn_val &= kWRegMask;
   1924      break;
   1925    }
   1926    case MOVK_w:
   1927    case MOVK_x: {
   1928        unsigned reg_code = instr->Rd();
   1929        int64_t prev_xn_val = is_64_bits ? xreg(reg_code)
   1930                                         : wreg(reg_code);
   1931        new_xn_val =
   1932            (prev_xn_val & ~(INT64_C(0xffff) << shift)) | shifted_imm16;
   1933      break;
   1934    }
   1935    case MOVZ_w:
   1936    case MOVZ_x: {
   1937        new_xn_val = shifted_imm16;
   1938      break;
   1939    }
   1940    default:
   1941      VIXL_UNREACHABLE();
   1942  }
   1943 
   1944  // Update the destination register.
   1945  set_xreg(instr->Rd(), new_xn_val);
   1946 }
   1947 
   1948 
   1949 void Simulator::VisitConditionalSelect(const Instruction* instr) {
   1950  uint64_t new_val = xreg(instr->Rn());
   1951 
   1952  if (ConditionFailed(static_cast<Condition>(instr->Condition()))) {
   1953    new_val = xreg(instr->Rm());
   1954    switch (instr->Mask(ConditionalSelectMask)) {
   1955      case CSEL_w:
   1956      case CSEL_x: break;
   1957      case CSINC_w:
   1958      case CSINC_x: new_val++; break;
   1959      case CSINV_w:
   1960      case CSINV_x: new_val = ~new_val; break;
   1961      case CSNEG_w:
   1962      case CSNEG_x: new_val = -new_val; break;
   1963      default: VIXL_UNIMPLEMENTED();
   1964    }
   1965  }
   1966  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   1967  set_reg(reg_size, instr->Rd(), new_val);
   1968 }
   1969 
   1970 
   1971 void Simulator::VisitDataProcessing1Source(const Instruction* instr) {
   1972  unsigned dst = instr->Rd();
   1973  unsigned src = instr->Rn();
   1974 
   1975  switch (instr->Mask(DataProcessing1SourceMask)) {
   1976    case RBIT_w: set_wreg(dst, ReverseBits(wreg(src))); break;
   1977    case RBIT_x: set_xreg(dst, ReverseBits(xreg(src))); break;
   1978    case REV16_w: set_wreg(dst, ReverseBytes(wreg(src), 1)); break;
   1979    case REV16_x: set_xreg(dst, ReverseBytes(xreg(src), 1)); break;
   1980    case REV_w: set_wreg(dst, ReverseBytes(wreg(src), 2)); break;
   1981    case REV32_x: set_xreg(dst, ReverseBytes(xreg(src), 2)); break;
   1982    case REV_x: set_xreg(dst, ReverseBytes(xreg(src), 3)); break;
   1983    case CLZ_w: set_wreg(dst, CountLeadingZeros(wreg(src))); break;
   1984    case CLZ_x: set_xreg(dst, CountLeadingZeros(xreg(src))); break;
   1985    case CLS_w: {
   1986      set_wreg(dst, CountLeadingSignBits(wreg(src)));
   1987      break;
   1988    }
   1989    case CLS_x: {
   1990      set_xreg(dst, CountLeadingSignBits(xreg(src)));
   1991      break;
   1992    }
   1993    case ABS_w:
   1994      set_wreg(dst, Abs(wreg(src)));
   1995      break;
   1996    case ABS_x:
   1997      set_xreg(dst, Abs(xreg(src)));
   1998      break;
   1999    case CNT_w:
   2000      set_wreg(dst, CountSetBits(wreg(src)));
   2001      break;
   2002    case CNT_x:
   2003      set_xreg(dst, CountSetBits(xreg(src)));
   2004      break;
   2005    case CTZ_w:
   2006      set_wreg(dst, CountTrailingZeros(wreg(src)));
   2007      break;
   2008    case CTZ_x:
   2009      set_xreg(dst, CountTrailingZeros(xreg(src)));
   2010      break;
   2011    default: VIXL_UNIMPLEMENTED();
   2012  }
   2013 }
   2014 
   2015 
   2016 uint32_t Simulator::Poly32Mod2(unsigned n, uint64_t data, uint32_t poly) {
   2017  VIXL_ASSERT((n > 32) && (n <= 64));
   2018  for (unsigned i = (n - 1); i >= 32; i--) {
   2019    if (((data >> i) & 1) != 0) {
   2020      uint64_t polysh32 = (uint64_t)poly << (i - 32);
   2021      uint64_t mask = (UINT64_C(1) << i) - 1;
   2022      data = ((data & mask) ^ polysh32);
   2023    }
   2024  }
   2025  return data & 0xffffffff;
   2026 }
   2027 
   2028 
   2029 template <typename T>
   2030 uint32_t Simulator::Crc32Checksum(uint32_t acc, T val, uint32_t poly) {
   2031  unsigned size = sizeof(val) * 8;  // Number of bits in type T.
   2032  VIXL_ASSERT((size == 8) || (size == 16) || (size == 32));
   2033  uint64_t tempacc = static_cast<uint64_t>(ReverseBits(acc)) << size;
   2034  uint64_t tempval = static_cast<uint64_t>(ReverseBits(val)) << 32;
   2035  return ReverseBits(Poly32Mod2(32 + size, tempacc ^ tempval, poly));
   2036 }
   2037 
   2038 
   2039 uint32_t Simulator::Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly) {
   2040  // Poly32Mod2 cannot handle inputs with more than 32 bits, so compute
   2041  // the CRC of each 32-bit word sequentially.
   2042  acc = Crc32Checksum(acc, (uint32_t)(val & 0xffffffff), poly);
   2043  return Crc32Checksum(acc, (uint32_t)(val >> 32), poly);
   2044 }
   2045 
   2046 
   2047 void Simulator::VisitDataProcessing2Source(const Instruction* instr) {
   2048  Shift shift_op = NO_SHIFT;
   2049  int64_t result = 0;
   2050  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   2051 
   2052  switch (instr->Mask(DataProcessing2SourceMask)) {
   2053    case SDIV_w: {
   2054      int32_t rn = wreg(instr->Rn());
   2055      int32_t rm = wreg(instr->Rm());
   2056      if ((rn == kWMinInt) && (rm == -1)) {
   2057        result = kWMinInt;
   2058      } else if (rm == 0) {
   2059        // Division by zero can be trapped, but not on A-class processors.
   2060        result = 0;
   2061      } else {
   2062        result = rn / rm;
   2063      }
   2064      break;
   2065    }
   2066    case SDIV_x: {
   2067      int64_t rn = xreg(instr->Rn());
   2068      int64_t rm = xreg(instr->Rm());
   2069      if ((rn == kXMinInt) && (rm == -1)) {
   2070        result = kXMinInt;
   2071      } else if (rm == 0) {
   2072        // Division by zero can be trapped, but not on A-class processors.
   2073        result = 0;
   2074      } else {
   2075        result = rn / rm;
   2076      }
   2077      break;
   2078    }
   2079    case UDIV_w: {
   2080      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   2081      uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
   2082      if (rm == 0) {
   2083        // Division by zero can be trapped, but not on A-class processors.
   2084        result = 0;
   2085      } else {
   2086        result = rn / rm;
   2087      }
   2088      break;
   2089    }
   2090    case UDIV_x: {
   2091      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   2092      uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
   2093      if (rm == 0) {
   2094        // Division by zero can be trapped, but not on A-class processors.
   2095        result = 0;
   2096      } else {
   2097        result = rn / rm;
   2098      }
   2099      break;
   2100    }
   2101    case LSLV_w:
   2102    case LSLV_x: shift_op = LSL; break;
   2103    case LSRV_w:
   2104    case LSRV_x: shift_op = LSR; break;
   2105    case ASRV_w:
   2106    case ASRV_x: shift_op = ASR; break;
   2107    case RORV_w:
   2108    case RORV_x: shift_op = ROR; break;
   2109    case CRC32B: {
   2110      uint32_t acc = reg<uint32_t>(instr->Rn());
   2111      uint8_t  val = reg<uint8_t>(instr->Rm());
   2112      result = Crc32Checksum(acc, val, CRC32_POLY);
   2113      break;
   2114    }
   2115    case CRC32H: {
   2116      uint32_t acc = reg<uint32_t>(instr->Rn());
   2117      uint16_t val = reg<uint16_t>(instr->Rm());
   2118      result = Crc32Checksum(acc, val, CRC32_POLY);
   2119      break;
   2120    }
   2121    case CRC32W: {
   2122      uint32_t acc = reg<uint32_t>(instr->Rn());
   2123      uint32_t val = reg<uint32_t>(instr->Rm());
   2124      result = Crc32Checksum(acc, val, CRC32_POLY);
   2125      break;
   2126    }
   2127    case CRC32X: {
   2128      uint32_t acc = reg<uint32_t>(instr->Rn());
   2129      uint64_t val = reg<uint64_t>(instr->Rm());
   2130      result = Crc32Checksum(acc, val, CRC32_POLY);
   2131      reg_size = kWRegSize;
   2132      break;
   2133    }
   2134    case CRC32CB: {
   2135      uint32_t acc = reg<uint32_t>(instr->Rn());
   2136      uint8_t  val = reg<uint8_t>(instr->Rm());
   2137      result = Crc32Checksum(acc, val, CRC32C_POLY);
   2138      break;
   2139    }
   2140    case CRC32CH: {
   2141      uint32_t acc = reg<uint32_t>(instr->Rn());
   2142      uint16_t val = reg<uint16_t>(instr->Rm());
   2143      result = Crc32Checksum(acc, val, CRC32C_POLY);
   2144      break;
   2145    }
   2146    case CRC32CW: {
   2147      uint32_t acc = reg<uint32_t>(instr->Rn());
   2148      uint32_t val = reg<uint32_t>(instr->Rm());
   2149      result = Crc32Checksum(acc, val, CRC32C_POLY);
   2150      break;
   2151    }
   2152    case CRC32CX: {
   2153      uint32_t acc = reg<uint32_t>(instr->Rn());
   2154      uint64_t val = reg<uint64_t>(instr->Rm());
   2155      result = Crc32Checksum(acc, val, CRC32C_POLY);
   2156      reg_size = kWRegSize;
   2157      break;
   2158    }
   2159    case SMAX_w: {
   2160      int32_t rn = wreg(instr->Rn());
   2161      int32_t rm = wreg(instr->Rm());
   2162      result = std::max(rn, rm);
   2163      break;
   2164    }
   2165    case SMAX_x: {
   2166      int64_t rn = xreg(instr->Rn());
   2167      int64_t rm = xreg(instr->Rm());
   2168      result = std::max(rn, rm);
   2169      break;
   2170    }
   2171    case SMIN_w: {
   2172      int32_t rn = wreg(instr->Rn());
   2173      int32_t rm = wreg(instr->Rm());
   2174      result = std::min(rn, rm);
   2175      break;
   2176    }
   2177    case SMIN_x: {
   2178      int64_t rn = xreg(instr->Rn());
   2179      int64_t rm = xreg(instr->Rm());
   2180      result = std::min(rn, rm);
   2181      break;
   2182    }
   2183    case UMAX_w: {
   2184      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   2185      uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
   2186      result = std::max(rn, rm);
   2187      break;
   2188    }
   2189    case UMAX_x: {
   2190      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   2191      uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
   2192      result = std::max(rn, rm);
   2193      break;
   2194    }
   2195    case UMIN_w: {
   2196      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   2197      uint32_t rm = static_cast<uint32_t>(wreg(instr->Rm()));
   2198      result = std::min(rn, rm);
   2199      break;
   2200    }
   2201    case UMIN_x: {
   2202      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   2203      uint64_t rm = static_cast<uint64_t>(xreg(instr->Rm()));
   2204      result = std::min(rn, rm);
   2205      break;
   2206    }
   2207    default: VIXL_UNIMPLEMENTED();
   2208  }
   2209 
   2210  if (shift_op != NO_SHIFT) {
   2211    // Shift distance encoded in the least-significant five/six bits of the
   2212    // register.
   2213    int mask = (instr->SixtyFourBits() == 1) ? 0x3f : 0x1f;
   2214    unsigned shift = wreg(instr->Rm()) & mask;
   2215    result = ShiftOperand(reg_size, reg(reg_size, instr->Rn()), shift_op,
   2216                          shift);
   2217  }
   2218  set_reg(reg_size, instr->Rd(), result);
   2219 }
   2220 
   2221 
   2222 void Simulator::VisitMaxMinImmediate(const Instruction *instr) {
   2223  int64_t result = 0;
   2224  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   2225 
   2226  int32_t imm = instr->ExtractSignedBits(17, 10);
   2227 
   2228  switch (instr->Mask(MaxMinImmediateMask)) {
   2229    case SMAX_w_imm: {
   2230      int32_t rn = wreg(instr->Rn());
   2231      int32_t rm = imm;
   2232      result = std::max(rn, rm);
   2233      break;
   2234    }
   2235    case SMAX_x_imm: {
   2236      int64_t rn = xreg(instr->Rn());
   2237      int64_t rm = imm;
   2238      result = std::max(rn, rm);
   2239      break;
   2240    }
   2241    case SMIN_w_imm: {
   2242      int32_t rn = wreg(instr->Rn());
   2243      int32_t rm = imm;
   2244      result = std::min(rn, rm);
   2245      break;
   2246    }
   2247    case SMIN_x_imm: {
   2248      int64_t rn = xreg(instr->Rn());
   2249      int64_t rm = imm;
   2250      result = std::min(rn, rm);
   2251      break;
   2252    }
   2253    case UMAX_w_imm: {
   2254      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   2255      uint32_t rm = static_cast<uint32_t>(imm);
   2256      result = std::max(rn, rm);
   2257      break;
   2258    }
   2259    case UMAX_x_imm: {
   2260      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   2261      uint64_t rm = static_cast<uint64_t>(imm);
   2262      result = std::max(rn, rm);
   2263      break;
   2264    }
   2265    case UMIN_w_imm: {
   2266      uint32_t rn = static_cast<uint32_t>(wreg(instr->Rn()));
   2267      uint32_t rm = static_cast<uint32_t>(imm);
   2268      result = std::min(rn, rm);
   2269      break;
   2270    }
   2271    case UMIN_x_imm: {
   2272      uint64_t rn = static_cast<uint64_t>(xreg(instr->Rn()));
   2273      uint64_t rm = static_cast<uint64_t>(imm);
   2274      result = std::min(rn, rm);
   2275      break;
   2276    }
   2277    default: VIXL_UNIMPLEMENTED();
   2278  }
   2279  set_reg(reg_size, instr->Rd(), result);
   2280 }
   2281 
   2282 
   2283 void Simulator::VisitDataProcessing3Source(const Instruction* instr) {
   2284  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   2285 
   2286  int64_t result = 0;
   2287  // Extract and sign- or zero-extend 32-bit arguments for widening operations.
   2288  uint64_t rn_u32 = reg<uint32_t>(instr->Rn());
   2289  uint64_t rm_u32 = reg<uint32_t>(instr->Rm());
   2290  int64_t rn_s32 = reg<int32_t>(instr->Rn());
   2291  int64_t rm_s32 = reg<int32_t>(instr->Rm());
   2292  switch (instr->Mask(DataProcessing3SourceMask)) {
   2293    case MADD_w:
   2294    case MADD_x:
   2295      result = xreg(instr->Ra()) + (xreg(instr->Rn()) * xreg(instr->Rm()));
   2296      break;
   2297    case MSUB_w:
   2298    case MSUB_x:
   2299      result = xreg(instr->Ra()) - (xreg(instr->Rn()) * xreg(instr->Rm()));
   2300      break;
   2301    case SMADDL_x: result = xreg(instr->Ra()) + (rn_s32 * rm_s32); break;
   2302    case SMSUBL_x: result = xreg(instr->Ra()) - (rn_s32 * rm_s32); break;
   2303    case UMADDL_x: result = xreg(instr->Ra()) + (rn_u32 * rm_u32); break;
   2304    case UMSUBL_x: result = xreg(instr->Ra()) - (rn_u32 * rm_u32); break;
   2305    case UMULH_x:
   2306      result = internal::MultiplyHigh<64>(reg<uint64_t>(instr->Rn()),
   2307                                          reg<uint64_t>(instr->Rm()));
   2308      break;
   2309    case SMULH_x:
   2310      result = internal::MultiplyHigh<64>(xreg(instr->Rn()), xreg(instr->Rm()));
   2311      break;
   2312    default: VIXL_UNIMPLEMENTED();
   2313  }
   2314  set_reg(reg_size, instr->Rd(), result);
   2315 }
   2316 
   2317 
   2318 void Simulator::VisitBitfield(const Instruction* instr) {
   2319  unsigned reg_size = instr->SixtyFourBits() ? kXRegSize : kWRegSize;
   2320  int64_t reg_mask = instr->SixtyFourBits() ? kXRegMask : kWRegMask;
   2321  int64_t R = instr->ImmR();
   2322  int64_t S = instr->ImmS();
   2323  int64_t diff = S - R;
   2324  int64_t mask;
   2325  if (diff >= 0) {
   2326    mask = (diff < (reg_size - 1)) ? (INT64_C(1) << (diff + 1)) - 1
   2327                                   : reg_mask;
   2328  } else {
   2329    mask = (INT64_C(1) << (S + 1)) - 1;
   2330    mask = (static_cast<uint64_t>(mask) >> R) | (mask << (reg_size - R));
   2331    diff += reg_size;
   2332  }
   2333 
   2334  // inzero indicates if the extracted bitfield is inserted into the
   2335  // destination register value or in zero.
   2336  // If extend is true, extend the sign of the extracted bitfield.
   2337  bool inzero = false;
   2338  bool extend = false;
   2339  switch (instr->Mask(BitfieldMask)) {
   2340    case BFM_x:
   2341    case BFM_w:
   2342      break;
   2343    case SBFM_x:
   2344    case SBFM_w:
   2345      inzero = true;
   2346      extend = true;
   2347      break;
   2348    case UBFM_x:
   2349    case UBFM_w:
   2350      inzero = true;
   2351      break;
   2352    default:
   2353      VIXL_UNIMPLEMENTED();
   2354  }
   2355 
   2356  int64_t dst = inzero ? 0 : reg(reg_size, instr->Rd());
   2357  int64_t src = reg(reg_size, instr->Rn());
   2358  // Rotate source bitfield into place.
   2359  int64_t result = (static_cast<uint64_t>(src) >> R) | (src << (reg_size - R));
   2360  // Determine the sign extension.
   2361  int64_t topbits = ((INT64_C(1) << (reg_size - diff - 1)) - 1) << (diff + 1);
   2362  int64_t signbits = extend && ((src >> S) & 1) ? topbits : 0;
   2363 
   2364  // Merge sign extension, dest/zero and bitfield.
   2365  result = signbits | (result & mask) | (dst & ~mask);
   2366 
   2367  set_reg(reg_size, instr->Rd(), result);
   2368 }
   2369 
   2370 
   2371 void Simulator::VisitExtract(const Instruction* instr) {
   2372  unsigned lsb = instr->ImmS();
   2373  unsigned reg_size = (instr->SixtyFourBits() == 1) ? kXRegSize
   2374                                                    : kWRegSize;
   2375  uint64_t low_res = static_cast<uint64_t>(reg(reg_size, instr->Rm())) >> lsb;
   2376  uint64_t high_res =
   2377      (lsb == 0) ? 0 : reg(reg_size, instr->Rn()) << (reg_size - lsb);
   2378  set_reg(reg_size, instr->Rd(), low_res | high_res);
   2379 }
   2380 
   2381 
   2382 void Simulator::VisitFPImmediate(const Instruction* instr) {
   2383  AssertSupportedFPCR();
   2384 
   2385  unsigned dest = instr->Rd();
   2386  switch (instr->Mask(FPImmediateMask)) {
   2387    case FMOV_s_imm: set_sreg(dest, instr->ImmFP32()); break;
   2388    case FMOV_d_imm: set_dreg(dest, instr->ImmFP64()); break;
   2389    default: VIXL_UNREACHABLE();
   2390  }
   2391 }
   2392 
   2393 
   2394 void Simulator::VisitFPIntegerConvert(const Instruction* instr) {
   2395  AssertSupportedFPCR();
   2396 
   2397  unsigned dst = instr->Rd();
   2398  unsigned src = instr->Rn();
   2399 
   2400  FPRounding round = RMode();
   2401 
   2402  switch (instr->Mask(FPIntegerConvertMask)) {
   2403    case FCVTAS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieAway)); break;
   2404    case FCVTAS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieAway)); break;
   2405    case FCVTAS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieAway)); break;
   2406    case FCVTAS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieAway)); break;
   2407    case FCVTAU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieAway)); break;
   2408    case FCVTAU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieAway)); break;
   2409    case FCVTAU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieAway)); break;
   2410    case FCVTAU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieAway)); break;
   2411    case FCVTMS_ws:
   2412      set_wreg(dst, FPToInt32(sreg(src), FPNegativeInfinity));
   2413      break;
   2414    case FCVTMS_xs:
   2415      set_xreg(dst, FPToInt64(sreg(src), FPNegativeInfinity));
   2416      break;
   2417    case FCVTMS_wd:
   2418      set_wreg(dst, FPToInt32(dreg(src), FPNegativeInfinity));
   2419      break;
   2420    case FCVTMS_xd:
   2421      set_xreg(dst, FPToInt64(dreg(src), FPNegativeInfinity));
   2422      break;
   2423    case FCVTMU_ws:
   2424      set_wreg(dst, FPToUInt32(sreg(src), FPNegativeInfinity));
   2425      break;
   2426    case FCVTMU_xs:
   2427      set_xreg(dst, FPToUInt64(sreg(src), FPNegativeInfinity));
   2428      break;
   2429    case FCVTMU_wd:
   2430      set_wreg(dst, FPToUInt32(dreg(src), FPNegativeInfinity));
   2431      break;
   2432    case FCVTMU_xd:
   2433      set_xreg(dst, FPToUInt64(dreg(src), FPNegativeInfinity));
   2434      break;
   2435    case FCVTPS_ws:
   2436      set_wreg(dst, FPToInt32(sreg(src), FPPositiveInfinity));
   2437      break;
   2438    case FCVTPS_xs:
   2439      set_xreg(dst, FPToInt64(sreg(src), FPPositiveInfinity));
   2440      break;
   2441    case FCVTPS_wd:
   2442      set_wreg(dst, FPToInt32(dreg(src), FPPositiveInfinity));
   2443      break;
   2444    case FCVTPS_xd:
   2445      set_xreg(dst, FPToInt64(dreg(src), FPPositiveInfinity));
   2446      break;
   2447    case FCVTPU_ws:
   2448      set_wreg(dst, FPToUInt32(sreg(src), FPPositiveInfinity));
   2449      break;
   2450    case FCVTPU_xs:
   2451      set_xreg(dst, FPToUInt64(sreg(src), FPPositiveInfinity));
   2452      break;
   2453    case FCVTPU_wd:
   2454      set_wreg(dst, FPToUInt32(dreg(src), FPPositiveInfinity));
   2455      break;
   2456    case FCVTPU_xd:
   2457      set_xreg(dst, FPToUInt64(dreg(src), FPPositiveInfinity));
   2458      break;
   2459    case FCVTNS_ws: set_wreg(dst, FPToInt32(sreg(src), FPTieEven)); break;
   2460    case FCVTNS_xs: set_xreg(dst, FPToInt64(sreg(src), FPTieEven)); break;
   2461    case FCVTNS_wd: set_wreg(dst, FPToInt32(dreg(src), FPTieEven)); break;
   2462    case FCVTNS_xd: set_xreg(dst, FPToInt64(dreg(src), FPTieEven)); break;
   2463    case FCVTNU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPTieEven)); break;
   2464    case FCVTNU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPTieEven)); break;
   2465    case FCVTNU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPTieEven)); break;
   2466    case FCVTNU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPTieEven)); break;
   2467    case FCVTZS_ws: set_wreg(dst, FPToInt32(sreg(src), FPZero)); break;
   2468    case FCVTZS_xs: set_xreg(dst, FPToInt64(sreg(src), FPZero)); break;
   2469    case FCVTZS_wd: set_wreg(dst, FPToInt32(dreg(src), FPZero)); break;
   2470    case FCVTZS_xd: set_xreg(dst, FPToInt64(dreg(src), FPZero)); break;
   2471    case FCVTZU_ws: set_wreg(dst, FPToUInt32(sreg(src), FPZero)); break;
   2472    case FCVTZU_xs: set_xreg(dst, FPToUInt64(sreg(src), FPZero)); break;
   2473    case FCVTZU_wd: set_wreg(dst, FPToUInt32(dreg(src), FPZero)); break;
   2474    case FCVTZU_xd: set_xreg(dst, FPToUInt64(dreg(src), FPZero)); break;
   2475    case FJCVTZS: set_wreg(dst, FPToFixedJS(dreg(src))); break;
   2476    case FMOV_ws: set_wreg(dst, sreg_bits(src)); break;
   2477    case FMOV_xd: set_xreg(dst, dreg_bits(src)); break;
   2478    case FMOV_sw: set_sreg_bits(dst, wreg(src)); break;
   2479    case FMOV_dx: set_dreg_bits(dst, xreg(src)); break;
   2480    case FMOV_d1_x:
   2481      LogicVRegister(vreg(dst)).SetUint(kFormatD, 1, xreg(src));
   2482      break;
   2483    case FMOV_x_d1:
   2484      set_xreg(dst, LogicVRegister(vreg(src)).Uint(kFormatD, 1));
   2485      break;
   2486 
   2487    // A 32-bit input can be handled in the same way as a 64-bit input, since
   2488    // the sign- or zero-extension will not affect the conversion.
   2489    case SCVTF_dx: set_dreg(dst, FixedToDouble(xreg(src), 0, round)); break;
   2490    case SCVTF_dw: set_dreg(dst, FixedToDouble(wreg(src), 0, round)); break;
   2491    case UCVTF_dx: set_dreg(dst, UFixedToDouble(xreg(src), 0, round)); break;
   2492    case UCVTF_dw: {
   2493      set_dreg(dst, UFixedToDouble(static_cast<uint32_t>(wreg(src)), 0, round));
   2494      break;
   2495    }
   2496    case SCVTF_sx: set_sreg(dst, FixedToFloat(xreg(src), 0, round)); break;
   2497    case SCVTF_sw: set_sreg(dst, FixedToFloat(wreg(src), 0, round)); break;
   2498    case UCVTF_sx: set_sreg(dst, UFixedToFloat(xreg(src), 0, round)); break;
   2499    case UCVTF_sw: {
   2500      set_sreg(dst, UFixedToFloat(static_cast<uint32_t>(wreg(src)), 0, round));
   2501      break;
   2502    }
   2503 
   2504    default: VIXL_UNREACHABLE();
   2505  }
   2506 }
   2507 
   2508 
   2509 void Simulator::VisitFPFixedPointConvert(const Instruction* instr) {
   2510  AssertSupportedFPCR();
   2511 
   2512  unsigned dst = instr->Rd();
   2513  unsigned src = instr->Rn();
   2514  int fbits = 64 - instr->FPScale();
   2515 
   2516  FPRounding round = RMode();
   2517 
   2518  switch (instr->Mask(FPFixedPointConvertMask)) {
   2519    // A 32-bit input can be handled in the same way as a 64-bit input, since
   2520    // the sign- or zero-extension will not affect the conversion.
   2521    case SCVTF_dx_fixed:
   2522      set_dreg(dst, FixedToDouble(xreg(src), fbits, round));
   2523      break;
   2524    case SCVTF_dw_fixed:
   2525      set_dreg(dst, FixedToDouble(wreg(src), fbits, round));
   2526      break;
   2527    case UCVTF_dx_fixed:
   2528      set_dreg(dst, UFixedToDouble(xreg(src), fbits, round));
   2529      break;
   2530    case UCVTF_dw_fixed: {
   2531      set_dreg(dst,
   2532               UFixedToDouble(static_cast<uint32_t>(wreg(src)), fbits, round));
   2533      break;
   2534    }
   2535    case SCVTF_sx_fixed:
   2536      set_sreg(dst, FixedToFloat(xreg(src), fbits, round));
   2537      break;
   2538    case SCVTF_sw_fixed:
   2539      set_sreg(dst, FixedToFloat(wreg(src), fbits, round));
   2540      break;
   2541    case UCVTF_sx_fixed:
   2542      set_sreg(dst, UFixedToFloat(xreg(src), fbits, round));
   2543      break;
   2544    case UCVTF_sw_fixed: {
   2545      set_sreg(dst,
   2546               UFixedToFloat(static_cast<uint32_t>(wreg(src)), fbits, round));
   2547      break;
   2548    }
   2549    case FCVTZS_xd_fixed:
   2550      set_xreg(dst, FPToInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
   2551      break;
   2552    case FCVTZS_wd_fixed:
   2553      set_wreg(dst, FPToInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
   2554      break;
   2555    case FCVTZU_xd_fixed:
   2556      set_xreg(dst, FPToUInt64(dreg(src) * std::pow(2.0, fbits), FPZero));
   2557      break;
   2558    case FCVTZU_wd_fixed:
   2559      set_wreg(dst, FPToUInt32(dreg(src) * std::pow(2.0, fbits), FPZero));
   2560      break;
   2561    case FCVTZS_xs_fixed:
   2562      set_xreg(dst, FPToInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2563      break;
   2564    case FCVTZS_ws_fixed:
   2565      set_wreg(dst, FPToInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2566      break;
   2567    case FCVTZU_xs_fixed:
   2568      set_xreg(dst, FPToUInt64(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2569      break;
   2570    case FCVTZU_ws_fixed:
   2571      set_wreg(dst, FPToUInt32(sreg(src) * std::pow(2.0f, fbits), FPZero));
   2572      break;
   2573    default: VIXL_UNREACHABLE();
   2574  }
   2575 }
   2576 
   2577 
   2578 void Simulator::VisitFPCompare(const Instruction* instr) {
   2579  AssertSupportedFPCR();
   2580 
   2581  FPTrapFlags trap = DisableTrap;
   2582  switch (instr->Mask(FPCompareMask)) {
   2583    case FCMPE_s: trap = EnableTrap; VIXL_FALLTHROUGH();
   2584    case FCMP_s: FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap); break;
   2585    case FCMPE_d: trap = EnableTrap; VIXL_FALLTHROUGH();
   2586    case FCMP_d: FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap); break;
   2587    case FCMPE_s_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
   2588    case FCMP_s_zero: FPCompare(sreg(instr->Rn()), 0.0f, trap); break;
   2589    case FCMPE_d_zero: trap = EnableTrap; VIXL_FALLTHROUGH();
   2590    case FCMP_d_zero: FPCompare(dreg(instr->Rn()), 0.0, trap); break;
   2591    default: VIXL_UNIMPLEMENTED();
   2592  }
   2593 }
   2594 
   2595 
   2596 void Simulator::VisitFPConditionalCompare(const Instruction* instr) {
   2597  AssertSupportedFPCR();
   2598 
   2599  FPTrapFlags trap = DisableTrap;
   2600  switch (instr->Mask(FPConditionalCompareMask)) {
   2601    case FCCMPE_s: trap = EnableTrap;
   2602      VIXL_FALLTHROUGH();
   2603    case FCCMP_s:
   2604      if (ConditionPassed(instr->Condition())) {
   2605        FPCompare(sreg(instr->Rn()), sreg(instr->Rm()), trap);
   2606      } else {
   2607        nzcv().SetFlags(instr->Nzcv());
   2608        LogSystemRegister(NZCV);
   2609      }
   2610      break;
   2611    case FCCMPE_d: trap = EnableTrap;
   2612      VIXL_FALLTHROUGH();
   2613    case FCCMP_d:
   2614      if (ConditionPassed(instr->Condition())) {
   2615        FPCompare(dreg(instr->Rn()), dreg(instr->Rm()), trap);
   2616      } else {
   2617        nzcv().SetFlags(instr->Nzcv());
   2618        LogSystemRegister(NZCV);
   2619      }
   2620      break;
   2621    default: VIXL_UNIMPLEMENTED();
   2622  }
   2623 }
   2624 
   2625 
   2626 void Simulator::VisitFPConditionalSelect(const Instruction* instr) {
   2627  AssertSupportedFPCR();
   2628 
   2629  Instr selected;
   2630  if (ConditionPassed(instr->Condition())) {
   2631    selected = instr->Rn();
   2632  } else {
   2633    selected = instr->Rm();
   2634  }
   2635 
   2636  switch (instr->Mask(FPConditionalSelectMask)) {
   2637    case FCSEL_s: set_sreg(instr->Rd(), sreg(selected)); break;
   2638    case FCSEL_d: set_dreg(instr->Rd(), dreg(selected)); break;
   2639    default: VIXL_UNIMPLEMENTED();
   2640  }
   2641 }
   2642 
   2643 
   2644 void Simulator::VisitFPDataProcessing1Source(const Instruction* instr) {
   2645  AssertSupportedFPCR();
   2646 
   2647  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   2648  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
   2649  SimVRegister& rd = vreg(instr->Rd());
   2650  SimVRegister& rn = vreg(instr->Rn());
   2651  bool inexact_exception = false;
   2652 
   2653  unsigned fd = instr->Rd();
   2654  unsigned fn = instr->Rn();
   2655 
   2656  switch (instr->Mask(FPDataProcessing1SourceMask)) {
   2657    case FMOV_s: set_sreg(fd, sreg(fn)); return;
   2658    case FMOV_d: set_dreg(fd, dreg(fn)); return;
   2659    case FABS_s: fabs_(kFormatS, vreg(fd), vreg(fn)); return;
   2660    case FABS_d: fabs_(kFormatD, vreg(fd), vreg(fn)); return;
   2661    case FNEG_s: fneg(kFormatS, vreg(fd), vreg(fn)); return;
   2662    case FNEG_d: fneg(kFormatD, vreg(fd), vreg(fn)); return;
   2663    case FCVT_ds:
   2664      set_dreg(fd, FPToDouble(sreg(fn), ReadDN()));
   2665      return;
   2666    case FCVT_sd:
   2667      set_sreg(fd, FPToFloat(dreg(fn), FPTieEven, ReadDN()));
   2668      return;
   2669    case FCVT_hs:
   2670      set_hreg(fd, Float16ToRawbits(FPToFloat16(sreg(fn), FPTieEven, ReadDN())));
   2671      return;
   2672    case FCVT_sh:
   2673      set_sreg(fd, FPToFloat(RawbitsToFloat16(hreg(fn)), ReadDN()));
   2674      return;
   2675    case FCVT_dh:
   2676      set_dreg(fd, FPToDouble(RawbitsToFloat16(hreg(fn)), ReadDN()));
   2677      return;
   2678    case FCVT_hd:
   2679      set_hreg(fd, Float16ToRawbits(FPToFloat16(dreg(fn), FPTieEven, ReadDN())));
   2680      return;
   2681    case FSQRT_s:
   2682    case FSQRT_d: fsqrt(vform, rd, rn); return;
   2683    case FRINTI_s:
   2684    case FRINTI_d: break;  // Use FPCR rounding mode.
   2685    case FRINTX_s:
   2686    case FRINTX_d: inexact_exception = true; break;
   2687    case FRINTA_s:
   2688    case FRINTA_d: fpcr_rounding = FPTieAway; break;
   2689    case FRINTM_s:
   2690    case FRINTM_d: fpcr_rounding = FPNegativeInfinity; break;
   2691    case FRINTN_s:
   2692    case FRINTN_d: fpcr_rounding = FPTieEven; break;
   2693    case FRINTP_s:
   2694    case FRINTP_d: fpcr_rounding = FPPositiveInfinity; break;
   2695    case FRINTZ_s:
   2696    case FRINTZ_d: fpcr_rounding = FPZero; break;
   2697    default: VIXL_UNIMPLEMENTED();
   2698  }
   2699 
   2700  // Only FRINT* instructions fall through the switch above.
   2701  frint(vform, rd, rn, fpcr_rounding, inexact_exception);
   2702 }
   2703 
   2704 
   2705 void Simulator::VisitFPDataProcessing2Source(const Instruction* instr) {
   2706  AssertSupportedFPCR();
   2707 
   2708  VectorFormat vform = (instr->Mask(FP64) == FP64) ? kFormatD : kFormatS;
   2709  SimVRegister& rd = vreg(instr->Rd());
   2710  SimVRegister& rn = vreg(instr->Rn());
   2711  SimVRegister& rm = vreg(instr->Rm());
   2712 
   2713  switch (instr->Mask(FPDataProcessing2SourceMask)) {
   2714    case FADD_s:
   2715    case FADD_d: fadd(vform, rd, rn, rm); break;
   2716    case FSUB_s:
   2717    case FSUB_d: fsub(vform, rd, rn, rm); break;
   2718    case FMUL_s:
   2719    case FMUL_d: fmul(vform, rd, rn, rm); break;
   2720    case FNMUL_s:
   2721    case FNMUL_d: fnmul(vform, rd, rn, rm); break;
   2722    case FDIV_s:
   2723    case FDIV_d: fdiv(vform, rd, rn, rm); break;
   2724    case FMAX_s:
   2725    case FMAX_d: fmax(vform, rd, rn, rm); break;
   2726    case FMIN_s:
   2727    case FMIN_d: fmin(vform, rd, rn, rm); break;
   2728    case FMAXNM_s:
   2729    case FMAXNM_d: fmaxnm(vform, rd, rn, rm); break;
   2730    case FMINNM_s:
   2731    case FMINNM_d: fminnm(vform, rd, rn, rm); break;
   2732    default:
   2733      VIXL_UNREACHABLE();
   2734  }
   2735 }
   2736 
   2737 
   2738 void Simulator::VisitFPDataProcessing3Source(const Instruction* instr) {
   2739  AssertSupportedFPCR();
   2740 
   2741  unsigned fd = instr->Rd();
   2742  unsigned fn = instr->Rn();
   2743  unsigned fm = instr->Rm();
   2744  unsigned fa = instr->Ra();
   2745 
   2746  switch (instr->Mask(FPDataProcessing3SourceMask)) {
   2747    // fd = fa +/- (fn * fm)
   2748    case FMADD_s: set_sreg(fd, FPMulAdd(sreg(fa), sreg(fn), sreg(fm))); break;
   2749    case FMSUB_s: set_sreg(fd, FPMulAdd(sreg(fa), -sreg(fn), sreg(fm))); break;
   2750    case FMADD_d: set_dreg(fd, FPMulAdd(dreg(fa), dreg(fn), dreg(fm))); break;
   2751    case FMSUB_d: set_dreg(fd, FPMulAdd(dreg(fa), -dreg(fn), dreg(fm))); break;
   2752    // Negated variants of the above.
   2753    case FNMADD_s:
   2754      set_sreg(fd, FPMulAdd(-sreg(fa), -sreg(fn), sreg(fm)));
   2755      break;
   2756    case FNMSUB_s:
   2757      set_sreg(fd, FPMulAdd(-sreg(fa), sreg(fn), sreg(fm)));
   2758      break;
   2759    case FNMADD_d:
   2760      set_dreg(fd, FPMulAdd(-dreg(fa), -dreg(fn), dreg(fm)));
   2761      break;
   2762    case FNMSUB_d:
   2763      set_dreg(fd, FPMulAdd(-dreg(fa), dreg(fn), dreg(fm)));
   2764      break;
   2765    default: VIXL_UNIMPLEMENTED();
   2766  }
   2767 }
   2768 
   2769 
   2770 bool Simulator::FPProcessNaNs(const Instruction* instr) {
   2771  unsigned fd = instr->Rd();
   2772  unsigned fn = instr->Rn();
   2773  unsigned fm = instr->Rm();
   2774  bool done = false;
   2775 
   2776  if (instr->Mask(FP64) == FP64) {
   2777    double result = FPProcessNaNs(dreg(fn), dreg(fm));
   2778    if (std::isnan(result)) {
   2779      set_dreg(fd, result);
   2780      done = true;
   2781    }
   2782  } else {
   2783    float result = FPProcessNaNs(sreg(fn), sreg(fm));
   2784    if (std::isnan(result)) {
   2785      set_sreg(fd, result);
   2786      done = true;
   2787    }
   2788  }
   2789 
   2790  return done;
   2791 }
   2792 
   2793 
   2794 void Simulator::SysOp_W(int op, int64_t val) {
   2795  switch (op) {
   2796    case IVAU:
   2797    case CVAC:
   2798    case CVAU:
   2799    case CIVAC: {
   2800      // Perform a dummy memory access to ensure that we have read access
   2801      // to the specified address.
   2802      volatile uint8_t y = Read<uint8_t>(val);
   2803      USE(y);
   2804      // TODO: Implement "case ZVA:".
   2805      break;
   2806    }
   2807    default:
   2808      VIXL_UNIMPLEMENTED();
   2809  }
   2810 }
   2811 
   2812 
   2813 void Simulator::VisitSystem(const Instruction* instr) {
   2814  // Some system instructions hijack their Op and Cp fields to represent a
   2815  // range of immediates instead of indicating a different instruction. This
   2816  // makes the decoding tricky.
   2817  if (instr->Mask(SystemExclusiveMonitorFMask) == SystemExclusiveMonitorFixed) {
   2818    VIXL_ASSERT(instr->Mask(SystemExclusiveMonitorMask) == CLREX);
   2819    switch (instr->Mask(SystemExclusiveMonitorMask)) {
   2820      case CLREX: {
   2821        PrintExclusiveAccessWarning();
   2822        ClearLocalMonitor();
   2823        break;
   2824      }
   2825    }
   2826  } else if (instr->Mask(SystemSysRegFMask) == SystemSysRegFixed) {
   2827    switch (instr->Mask(SystemSysRegMask)) {
   2828      case MRS: {
   2829        switch (instr->ImmSystemRegister()) {
   2830          case NZCV: set_xreg(instr->Rt(), nzcv().RawValue()); break;
   2831          case FPCR: set_xreg(instr->Rt(), fpcr().RawValue()); break;
   2832          default: VIXL_UNIMPLEMENTED();
   2833        }
   2834        break;
   2835      }
   2836      case MSR: {
   2837        switch (instr->ImmSystemRegister()) {
   2838          case NZCV:
   2839            nzcv().SetRawValue(wreg(instr->Rt()));
   2840            LogSystemRegister(NZCV);
   2841            break;
   2842          case FPCR:
   2843            fpcr().SetRawValue(wreg(instr->Rt()));
   2844            LogSystemRegister(FPCR);
   2845            break;
   2846          default: VIXL_UNIMPLEMENTED();
   2847        }
   2848        break;
   2849      }
   2850    }
   2851  } else if (instr->Mask(SystemHintFMask) == SystemHintFixed) {
   2852    VIXL_ASSERT(instr->Mask(SystemHintMask) == HINT);
   2853    switch (instr->ImmHint()) {
   2854      case NOP: break;
   2855      case CSDB: break;
   2856      default: VIXL_UNIMPLEMENTED();
   2857    }
   2858  } else if (instr->Mask(MemBarrierFMask) == MemBarrierFixed) {
   2859    js::jit::AtomicOperations::fenceSeqCst();
   2860  } else if ((instr->Mask(SystemSysFMask) == SystemSysFixed)) {
   2861    switch (instr->Mask(SystemSysMask)) {
   2862      case SYS: SysOp_W(instr->SysOp(), xreg(instr->Rt())); break;
   2863      default: VIXL_UNIMPLEMENTED();
   2864    }
   2865  } else {
   2866    VIXL_UNIMPLEMENTED();
   2867  }
   2868 }
   2869 
   2870 
   2871 void Simulator::VisitCrypto2RegSHA(const Instruction* instr) {
   2872  VisitUnimplemented(instr);
   2873 }
   2874 
   2875 
   2876 void Simulator::VisitCrypto3RegSHA(const Instruction* instr) {
   2877  VisitUnimplemented(instr);
   2878 }
   2879 
   2880 
   2881 void Simulator::VisitCryptoAES(const Instruction* instr) {
   2882  VisitUnimplemented(instr);
   2883 }
   2884 
   2885 
   2886 void Simulator::VisitNEON2RegMisc(const Instruction* instr) {
   2887  NEONFormatDecoder nfd(instr);
   2888  VectorFormat vf = nfd.GetVectorFormat();
   2889 
   2890  static const NEONFormatMap map_lp = {
   2891    {23, 22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S, NF_1D, NF_2D}
   2892  };
   2893  VectorFormat vf_lp = nfd.GetVectorFormat(&map_lp);
   2894 
   2895  static const NEONFormatMap map_fcvtl = {
   2896    {22}, {NF_4S, NF_2D}
   2897  };
   2898  VectorFormat vf_fcvtl = nfd.GetVectorFormat(&map_fcvtl);
   2899 
   2900  static const NEONFormatMap map_fcvtn = {
   2901    {22, 30}, {NF_4H, NF_8H, NF_2S, NF_4S}
   2902  };
   2903  VectorFormat vf_fcvtn = nfd.GetVectorFormat(&map_fcvtn);
   2904 
   2905  SimVRegister& rd = vreg(instr->Rd());
   2906  SimVRegister& rn = vreg(instr->Rn());
   2907 
   2908  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_opcode) {
   2909    // These instructions all use a two bit size field, except NOT and RBIT,
   2910    // which use the field to encode the operation.
   2911    switch (instr->Mask(NEON2RegMiscMask)) {
   2912      case NEON_REV64:     rev64(vf, rd, rn); break;
   2913      case NEON_REV32:     rev32(vf, rd, rn); break;
   2914      case NEON_REV16:     rev16(vf, rd, rn); break;
   2915      case NEON_SUQADD:    suqadd(vf, rd, rn); break;
   2916      case NEON_USQADD:    usqadd(vf, rd, rn); break;
   2917      case NEON_CLS:       cls(vf, rd, rn); break;
   2918      case NEON_CLZ:       clz(vf, rd, rn); break;
   2919      case NEON_CNT:       cnt(vf, rd, rn); break;
   2920      case NEON_SQABS:     abs(vf, rd, rn).SignedSaturate(vf); break;
   2921      case NEON_SQNEG:     neg(vf, rd, rn).SignedSaturate(vf); break;
   2922      case NEON_CMGT_zero: cmp(vf, rd, rn, 0, gt); break;
   2923      case NEON_CMGE_zero: cmp(vf, rd, rn, 0, ge); break;
   2924      case NEON_CMEQ_zero: cmp(vf, rd, rn, 0, eq); break;
   2925      case NEON_CMLE_zero: cmp(vf, rd, rn, 0, le); break;
   2926      case NEON_CMLT_zero: cmp(vf, rd, rn, 0, lt); break;
   2927      case NEON_ABS:       abs(vf, rd, rn); break;
   2928      case NEON_NEG:       neg(vf, rd, rn); break;
   2929      case NEON_SADDLP:    saddlp(vf_lp, rd, rn); break;
   2930      case NEON_UADDLP:    uaddlp(vf_lp, rd, rn); break;
   2931      case NEON_SADALP:    sadalp(vf_lp, rd, rn); break;
   2932      case NEON_UADALP:    uadalp(vf_lp, rd, rn); break;
   2933      case NEON_RBIT_NOT:
   2934        vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   2935        switch (instr->FPType()) {
   2936          case 0: not_(vf, rd, rn); break;
   2937          case 1: rbit(vf, rd, rn);; break;
   2938          default:
   2939            VIXL_UNIMPLEMENTED();
   2940        }
   2941        break;
   2942    }
   2943  } else {
   2944    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPFormatMap());
   2945    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   2946    bool inexact_exception = false;
   2947 
   2948    // These instructions all use a one bit size field, except XTN, SQXTUN,
   2949    // SHLL, SQXTN and UQXTN, which use a two bit size field.
   2950    switch (instr->Mask(NEON2RegMiscFPMask)) {
   2951      case NEON_FABS:   fabs_(fpf, rd, rn); return;
   2952      case NEON_FNEG:   fneg(fpf, rd, rn); return;
   2953      case NEON_FSQRT:  fsqrt(fpf, rd, rn); return;
   2954      case NEON_FCVTL:
   2955        if (instr->Mask(NEON_Q)) {
   2956          fcvtl2(vf_fcvtl, rd, rn);
   2957        } else {
   2958          fcvtl(vf_fcvtl, rd, rn);
   2959        }
   2960        return;
   2961      case NEON_FCVTN:
   2962        if (instr->Mask(NEON_Q)) {
   2963          fcvtn2(vf_fcvtn, rd, rn);
   2964        } else {
   2965          fcvtn(vf_fcvtn, rd, rn);
   2966        }
   2967        return;
   2968      case NEON_FCVTXN:
   2969        if (instr->Mask(NEON_Q)) {
   2970          fcvtxn2(vf_fcvtn, rd, rn);
   2971        } else {
   2972          fcvtxn(vf_fcvtn, rd, rn);
   2973        }
   2974        return;
   2975 
   2976      // The following instructions break from the switch statement, rather
   2977      // than return.
   2978      case NEON_FRINTI:     break;  // Use FPCR rounding mode.
   2979      case NEON_FRINTX:     inexact_exception = true; break;
   2980      case NEON_FRINTA:     fpcr_rounding = FPTieAway; break;
   2981      case NEON_FRINTM:     fpcr_rounding = FPNegativeInfinity; break;
   2982      case NEON_FRINTN:     fpcr_rounding = FPTieEven; break;
   2983      case NEON_FRINTP:     fpcr_rounding = FPPositiveInfinity; break;
   2984      case NEON_FRINTZ:     fpcr_rounding = FPZero; break;
   2985 
   2986      case NEON_FCVTNS:     fcvts(fpf, rd, rn, FPTieEven); return;
   2987      case NEON_FCVTNU:     fcvtu(fpf, rd, rn, FPTieEven); return;
   2988      case NEON_FCVTPS:     fcvts(fpf, rd, rn, FPPositiveInfinity); return;
   2989      case NEON_FCVTPU:     fcvtu(fpf, rd, rn, FPPositiveInfinity); return;
   2990      case NEON_FCVTMS:     fcvts(fpf, rd, rn, FPNegativeInfinity); return;
   2991      case NEON_FCVTMU:     fcvtu(fpf, rd, rn, FPNegativeInfinity); return;
   2992      case NEON_FCVTZS:     fcvts(fpf, rd, rn, FPZero); return;
   2993      case NEON_FCVTZU:     fcvtu(fpf, rd, rn, FPZero); return;
   2994      case NEON_FCVTAS:     fcvts(fpf, rd, rn, FPTieAway); return;
   2995      case NEON_FCVTAU:     fcvtu(fpf, rd, rn, FPTieAway); return;
   2996      case NEON_SCVTF:      scvtf(fpf, rd, rn, 0, fpcr_rounding); return;
   2997      case NEON_UCVTF:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); return;
   2998      case NEON_URSQRTE:    ursqrte(fpf, rd, rn); return;
   2999      case NEON_URECPE:     urecpe(fpf, rd, rn); return;
   3000      case NEON_FRSQRTE:    frsqrte(fpf, rd, rn); return;
   3001      case NEON_FRECPE:     frecpe(fpf, rd, rn, fpcr_rounding); return;
   3002      case NEON_FCMGT_zero: fcmp_zero(fpf, rd, rn, gt); return;
   3003      case NEON_FCMGE_zero: fcmp_zero(fpf, rd, rn, ge); return;
   3004      case NEON_FCMEQ_zero: fcmp_zero(fpf, rd, rn, eq); return;
   3005      case NEON_FCMLE_zero: fcmp_zero(fpf, rd, rn, le); return;
   3006      case NEON_FCMLT_zero: fcmp_zero(fpf, rd, rn, lt); return;
   3007      default:
   3008        if ((NEON_XTN_opcode <= instr->Mask(NEON2RegMiscOpcode)) &&
   3009            (instr->Mask(NEON2RegMiscOpcode) <= NEON_UQXTN_opcode)) {
   3010          switch (instr->Mask(NEON2RegMiscMask)) {
   3011            case NEON_XTN: xtn(vf, rd, rn); return;
   3012            case NEON_SQXTN: sqxtn(vf, rd, rn); return;
   3013            case NEON_UQXTN: uqxtn(vf, rd, rn); return;
   3014            case NEON_SQXTUN: sqxtun(vf, rd, rn); return;
   3015            case NEON_SHLL:
   3016              vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   3017              if (instr->Mask(NEON_Q)) {
   3018                shll2(vf, rd, rn);
   3019              } else {
   3020                shll(vf, rd, rn);
   3021              }
   3022              return;
   3023            default:
   3024              VIXL_UNIMPLEMENTED();
   3025          }
   3026        } else {
   3027          VIXL_UNIMPLEMENTED();
   3028        }
   3029    }
   3030 
   3031    // Only FRINT* instructions fall through the switch above.
   3032    frint(fpf, rd, rn, fpcr_rounding, inexact_exception);
   3033  }
   3034 }
   3035 
   3036 
   3037 void Simulator::VisitNEON3Same(const Instruction* instr) {
   3038  NEONFormatDecoder nfd(instr);
   3039  SimVRegister& rd = vreg(instr->Rd());
   3040  SimVRegister& rn = vreg(instr->Rn());
   3041  SimVRegister& rm = vreg(instr->Rm());
   3042 
   3043  if (instr->Mask(NEON3SameLogicalFMask) == NEON3SameLogicalFixed) {
   3044    VectorFormat vf = nfd.GetVectorFormat(nfd.LogicalFormatMap());
   3045    switch (instr->Mask(NEON3SameLogicalMask)) {
   3046      case NEON_AND: and_(vf, rd, rn, rm); break;
   3047      case NEON_ORR: orr(vf, rd, rn, rm); break;
   3048      case NEON_ORN: orn(vf, rd, rn, rm); break;
   3049      case NEON_EOR: eor(vf, rd, rn, rm); break;
   3050      case NEON_BIC: bic(vf, rd, rn, rm); break;
   3051      case NEON_BIF: bif(vf, rd, rn, rm); break;
   3052      case NEON_BIT: bit(vf, rd, rn, rm); break;
   3053      case NEON_BSL: bsl(vf, rd, rn, rm); break;
   3054      default:
   3055        VIXL_UNIMPLEMENTED();
   3056    }
   3057  } else if (instr->Mask(NEON3SameFPFMask) == NEON3SameFPFixed) {
   3058    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   3059    switch (instr->Mask(NEON3SameFPMask)) {
   3060      case NEON_FADD:    fadd(vf, rd, rn, rm); break;
   3061      case NEON_FSUB:    fsub(vf, rd, rn, rm); break;
   3062      case NEON_FMUL:    fmul(vf, rd, rn, rm); break;
   3063      case NEON_FDIV:    fdiv(vf, rd, rn, rm); break;
   3064      case NEON_FMAX:    fmax(vf, rd, rn, rm); break;
   3065      case NEON_FMIN:    fmin(vf, rd, rn, rm); break;
   3066      case NEON_FMAXNM:  fmaxnm(vf, rd, rn, rm); break;
   3067      case NEON_FMINNM:  fminnm(vf, rd, rn, rm); break;
   3068      case NEON_FMLA:    fmla(vf, rd, rn, rm); break;
   3069      case NEON_FMLS:    fmls(vf, rd, rn, rm); break;
   3070      case NEON_FMULX:   fmulx(vf, rd, rn, rm); break;
   3071      case NEON_FACGE:   fabscmp(vf, rd, rn, rm, ge); break;
   3072      case NEON_FACGT:   fabscmp(vf, rd, rn, rm, gt); break;
   3073      case NEON_FCMEQ:   fcmp(vf, rd, rn, rm, eq); break;
   3074      case NEON_FCMGE:   fcmp(vf, rd, rn, rm, ge); break;
   3075      case NEON_FCMGT:   fcmp(vf, rd, rn, rm, gt); break;
   3076      case NEON_FRECPS:  frecps(vf, rd, rn, rm); break;
   3077      case NEON_FRSQRTS: frsqrts(vf, rd, rn, rm); break;
   3078      case NEON_FABD:    fabd(vf, rd, rn, rm); break;
   3079      case NEON_FADDP:   faddp(vf, rd, rn, rm); break;
   3080      case NEON_FMAXP:   fmaxp(vf, rd, rn, rm); break;
   3081      case NEON_FMAXNMP: fmaxnmp(vf, rd, rn, rm); break;
   3082      case NEON_FMINP:   fminp(vf, rd, rn, rm); break;
   3083      case NEON_FMINNMP: fminnmp(vf, rd, rn, rm); break;
   3084      default:
   3085        VIXL_UNIMPLEMENTED();
   3086    }
   3087  } else {
   3088    VectorFormat vf = nfd.GetVectorFormat();
   3089    switch (instr->Mask(NEON3SameMask)) {
   3090      case NEON_ADD:   add(vf, rd, rn, rm);  break;
   3091      case NEON_ADDP:  addp(vf, rd, rn, rm); break;
   3092      case NEON_CMEQ:  cmp(vf, rd, rn, rm, eq); break;
   3093      case NEON_CMGE:  cmp(vf, rd, rn, rm, ge); break;
   3094      case NEON_CMGT:  cmp(vf, rd, rn, rm, gt); break;
   3095      case NEON_CMHI:  cmp(vf, rd, rn, rm, hi); break;
   3096      case NEON_CMHS:  cmp(vf, rd, rn, rm, hs); break;
   3097      case NEON_CMTST: cmptst(vf, rd, rn, rm); break;
   3098      case NEON_MLS:   mls(vf, rd, rn, rm); break;
   3099      case NEON_MLA:   mla(vf, rd, rn, rm); break;
   3100      case NEON_MUL:   mul(vf, rd, rn, rm); break;
   3101      case NEON_PMUL:  pmul(vf, rd, rn, rm); break;
   3102      case NEON_SMAX:  smax(vf, rd, rn, rm); break;
   3103      case NEON_SMAXP: smaxp(vf, rd, rn, rm); break;
   3104      case NEON_SMIN:  smin(vf, rd, rn, rm); break;
   3105      case NEON_SMINP: sminp(vf, rd, rn, rm); break;
   3106      case NEON_SUB:   sub(vf, rd, rn, rm);  break;
   3107      case NEON_UMAX:  umax(vf, rd, rn, rm); break;
   3108      case NEON_UMAXP: umaxp(vf, rd, rn, rm); break;
   3109      case NEON_UMIN:  umin(vf, rd, rn, rm); break;
   3110      case NEON_UMINP: uminp(vf, rd, rn, rm); break;
   3111      case NEON_SSHL:  sshl(vf, rd, rn, rm); break;
   3112      case NEON_USHL:  ushl(vf, rd, rn, rm); break;
   3113      case NEON_SABD:  absdiff(vf, rd, rn, rm, true); break;
   3114      case NEON_UABD:  absdiff(vf, rd, rn, rm, false); break;
   3115      case NEON_SABA:  saba(vf, rd, rn, rm); break;
   3116      case NEON_UABA:  uaba(vf, rd, rn, rm); break;
   3117      case NEON_UQADD: add(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   3118      case NEON_SQADD: add(vf, rd, rn, rm).SignedSaturate(vf); break;
   3119      case NEON_UQSUB: sub(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   3120      case NEON_SQSUB: sub(vf, rd, rn, rm).SignedSaturate(vf); break;
   3121      case NEON_SQDMULH:  sqdmulh(vf, rd, rn, rm); break;
   3122      case NEON_SQRDMULH: sqrdmulh(vf, rd, rn, rm); break;
   3123      case NEON_UQSHL: ushl(vf, rd, rn, rm).UnsignedSaturate(vf); break;
   3124      case NEON_SQSHL: sshl(vf, rd, rn, rm).SignedSaturate(vf); break;
   3125      case NEON_URSHL: ushl(vf, rd, rn, rm).Round(vf); break;
   3126      case NEON_SRSHL: sshl(vf, rd, rn, rm).Round(vf); break;
   3127      case NEON_UQRSHL:
   3128        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   3129        break;
   3130      case NEON_SQRSHL:
   3131        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   3132        break;
   3133      case NEON_UHADD:
   3134        add(vf, rd, rn, rm).Uhalve(vf);
   3135        break;
   3136      case NEON_URHADD:
   3137        add(vf, rd, rn, rm).Uhalve(vf).Round(vf);
   3138        break;
   3139      case NEON_SHADD:
   3140        add(vf, rd, rn, rm).Halve(vf);
   3141        break;
   3142      case NEON_SRHADD:
   3143        add(vf, rd, rn, rm).Halve(vf).Round(vf);
   3144        break;
   3145      case NEON_UHSUB:
   3146        sub(vf, rd, rn, rm).Uhalve(vf);
   3147        break;
   3148      case NEON_SHSUB:
   3149        sub(vf, rd, rn, rm).Halve(vf);
   3150        break;
   3151      default:
   3152        VIXL_UNIMPLEMENTED();
   3153    }
   3154  }
   3155 }
   3156 
   3157 
   3158 void Simulator::VisitNEON3Different(const Instruction* instr) {
   3159  NEONFormatDecoder nfd(instr);
   3160  VectorFormat vf = nfd.GetVectorFormat();
   3161  VectorFormat vf_l = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   3162 
   3163  SimVRegister& rd = vreg(instr->Rd());
   3164  SimVRegister& rn = vreg(instr->Rn());
   3165  SimVRegister& rm = vreg(instr->Rm());
   3166 
   3167  switch (instr->Mask(NEON3DifferentMask)) {
   3168    case NEON_PMULL:    pmull(vf_l, rd, rn, rm); break;
   3169    case NEON_PMULL2:   pmull2(vf_l, rd, rn, rm); break;
   3170    case NEON_UADDL:    uaddl(vf_l, rd, rn, rm); break;
   3171    case NEON_UADDL2:   uaddl2(vf_l, rd, rn, rm); break;
   3172    case NEON_SADDL:    saddl(vf_l, rd, rn, rm); break;
   3173    case NEON_SADDL2:   saddl2(vf_l, rd, rn, rm); break;
   3174    case NEON_USUBL:    usubl(vf_l, rd, rn, rm); break;
   3175    case NEON_USUBL2:   usubl2(vf_l, rd, rn, rm); break;
   3176    case NEON_SSUBL:    ssubl(vf_l, rd, rn, rm); break;
   3177    case NEON_SSUBL2:   ssubl2(vf_l, rd, rn, rm); break;
   3178    case NEON_SABAL:    sabal(vf_l, rd, rn, rm); break;
   3179    case NEON_SABAL2:   sabal2(vf_l, rd, rn, rm); break;
   3180    case NEON_UABAL:    uabal(vf_l, rd, rn, rm); break;
   3181    case NEON_UABAL2:   uabal2(vf_l, rd, rn, rm); break;
   3182    case NEON_SABDL:    sabdl(vf_l, rd, rn, rm); break;
   3183    case NEON_SABDL2:   sabdl2(vf_l, rd, rn, rm); break;
   3184    case NEON_UABDL:    uabdl(vf_l, rd, rn, rm); break;
   3185    case NEON_UABDL2:   uabdl2(vf_l, rd, rn, rm); break;
   3186    case NEON_SMLAL:    smlal(vf_l, rd, rn, rm); break;
   3187    case NEON_SMLAL2:   smlal2(vf_l, rd, rn, rm); break;
   3188    case NEON_UMLAL:    umlal(vf_l, rd, rn, rm); break;
   3189    case NEON_UMLAL2:   umlal2(vf_l, rd, rn, rm); break;
   3190    case NEON_SMLSL:    smlsl(vf_l, rd, rn, rm); break;
   3191    case NEON_SMLSL2:   smlsl2(vf_l, rd, rn, rm); break;
   3192    case NEON_UMLSL:    umlsl(vf_l, rd, rn, rm); break;
   3193    case NEON_UMLSL2:   umlsl2(vf_l, rd, rn, rm); break;
   3194    case NEON_SMULL:    smull(vf_l, rd, rn, rm); break;
   3195    case NEON_SMULL2:   smull2(vf_l, rd, rn, rm); break;
   3196    case NEON_UMULL:    umull(vf_l, rd, rn, rm); break;
   3197    case NEON_UMULL2:   umull2(vf_l, rd, rn, rm); break;
   3198    case NEON_SQDMLAL:  sqdmlal(vf_l, rd, rn, rm); break;
   3199    case NEON_SQDMLAL2: sqdmlal2(vf_l, rd, rn, rm); break;
   3200    case NEON_SQDMLSL:  sqdmlsl(vf_l, rd, rn, rm); break;
   3201    case NEON_SQDMLSL2: sqdmlsl2(vf_l, rd, rn, rm); break;
   3202    case NEON_SQDMULL:  sqdmull(vf_l, rd, rn, rm); break;
   3203    case NEON_SQDMULL2: sqdmull2(vf_l, rd, rn, rm); break;
   3204    case NEON_UADDW:    uaddw(vf_l, rd, rn, rm); break;
   3205    case NEON_UADDW2:   uaddw2(vf_l, rd, rn, rm); break;
   3206    case NEON_SADDW:    saddw(vf_l, rd, rn, rm); break;
   3207    case NEON_SADDW2:   saddw2(vf_l, rd, rn, rm); break;
   3208    case NEON_USUBW:    usubw(vf_l, rd, rn, rm); break;
   3209    case NEON_USUBW2:   usubw2(vf_l, rd, rn, rm); break;
   3210    case NEON_SSUBW:    ssubw(vf_l, rd, rn, rm); break;
   3211    case NEON_SSUBW2:   ssubw2(vf_l, rd, rn, rm); break;
   3212    case NEON_ADDHN:    addhn(vf, rd, rn, rm); break;
   3213    case NEON_ADDHN2:   addhn2(vf, rd, rn, rm); break;
   3214    case NEON_RADDHN:   raddhn(vf, rd, rn, rm); break;
   3215    case NEON_RADDHN2:  raddhn2(vf, rd, rn, rm); break;
   3216    case NEON_SUBHN:    subhn(vf, rd, rn, rm); break;
   3217    case NEON_SUBHN2:   subhn2(vf, rd, rn, rm); break;
   3218    case NEON_RSUBHN:   rsubhn(vf, rd, rn, rm); break;
   3219    case NEON_RSUBHN2:  rsubhn2(vf, rd, rn, rm); break;
   3220    default:
   3221      VIXL_UNIMPLEMENTED();
   3222  }
   3223 }
   3224 
   3225 
   3226 void Simulator::VisitNEONAcrossLanes(const Instruction* instr) {
   3227  NEONFormatDecoder nfd(instr);
   3228 
   3229  SimVRegister& rd = vreg(instr->Rd());
   3230  SimVRegister& rn = vreg(instr->Rn());
   3231 
   3232  // The input operand's VectorFormat is passed for these instructions.
   3233  if (instr->Mask(NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
   3234    VectorFormat vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   3235 
   3236    switch (instr->Mask(NEONAcrossLanesFPMask)) {
   3237      case NEON_FMAXV: fmaxv(vf, rd, rn); break;
   3238      case NEON_FMINV: fminv(vf, rd, rn); break;
   3239      case NEON_FMAXNMV: fmaxnmv(vf, rd, rn); break;
   3240      case NEON_FMINNMV: fminnmv(vf, rd, rn); break;
   3241      default:
   3242        VIXL_UNIMPLEMENTED();
   3243    }
   3244  } else {
   3245    VectorFormat vf = nfd.GetVectorFormat();
   3246 
   3247    switch (instr->Mask(NEONAcrossLanesMask)) {
   3248      case NEON_ADDV:   addv(vf, rd, rn); break;
   3249      case NEON_SMAXV:  smaxv(vf, rd, rn); break;
   3250      case NEON_SMINV:  sminv(vf, rd, rn); break;
   3251      case NEON_UMAXV:  umaxv(vf, rd, rn); break;
   3252      case NEON_UMINV:  uminv(vf, rd, rn); break;
   3253      case NEON_SADDLV: saddlv(vf, rd, rn); break;
   3254      case NEON_UADDLV: uaddlv(vf, rd, rn); break;
   3255      default:
   3256        VIXL_UNIMPLEMENTED();
   3257    }
   3258  }
   3259 }
   3260 
   3261 
   3262 void Simulator::VisitNEONByIndexedElement(const Instruction* instr) {
   3263  NEONFormatDecoder nfd(instr);
   3264  VectorFormat vf_r = nfd.GetVectorFormat();
   3265  VectorFormat vf = nfd.GetVectorFormat(nfd.LongIntegerFormatMap());
   3266 
   3267  SimVRegister& rd = vreg(instr->Rd());
   3268  SimVRegister& rn = vreg(instr->Rn());
   3269 
   3270  ByElementOp Op = NULL;
   3271 
   3272  int rm_reg = instr->Rm();
   3273  int index = (instr->NEONH() << 1) | instr->NEONL();
   3274  if (instr->NEONSize() == 1) {
   3275    rm_reg &= 0xf;
   3276    index = (index << 1) | instr->NEONM();
   3277  }
   3278 
   3279  switch (instr->Mask(NEONByIndexedElementMask)) {
   3280    case NEON_MUL_byelement: Op = &Simulator::mul; vf = vf_r; break;
   3281    case NEON_MLA_byelement: Op = &Simulator::mla; vf = vf_r; break;
   3282    case NEON_MLS_byelement: Op = &Simulator::mls; vf = vf_r; break;
   3283    case NEON_SQDMULH_byelement: Op = &Simulator::sqdmulh; vf = vf_r; break;
   3284    case NEON_SQRDMULH_byelement: Op = &Simulator::sqrdmulh; vf = vf_r; break;
   3285    case NEON_SMULL_byelement:
   3286      if (instr->Mask(NEON_Q)) {
   3287        Op = &Simulator::smull2;
   3288      } else {
   3289        Op = &Simulator::smull;
   3290      }
   3291      break;
   3292    case NEON_UMULL_byelement:
   3293      if (instr->Mask(NEON_Q)) {
   3294        Op = &Simulator::umull2;
   3295      } else {
   3296        Op = &Simulator::umull;
   3297      }
   3298      break;
   3299    case NEON_SMLAL_byelement:
   3300      if (instr->Mask(NEON_Q)) {
   3301        Op = &Simulator::smlal2;
   3302      } else {
   3303        Op = &Simulator::smlal;
   3304      }
   3305      break;
   3306    case NEON_UMLAL_byelement:
   3307      if (instr->Mask(NEON_Q)) {
   3308        Op = &Simulator::umlal2;
   3309      } else {
   3310        Op = &Simulator::umlal;
   3311      }
   3312      break;
   3313    case NEON_SMLSL_byelement:
   3314      if (instr->Mask(NEON_Q)) {
   3315        Op = &Simulator::smlsl2;
   3316      } else {
   3317        Op = &Simulator::smlsl;
   3318      }
   3319      break;
   3320    case NEON_UMLSL_byelement:
   3321      if (instr->Mask(NEON_Q)) {
   3322        Op = &Simulator::umlsl2;
   3323      } else {
   3324        Op = &Simulator::umlsl;
   3325      }
   3326      break;
   3327    case NEON_SQDMULL_byelement:
   3328      if (instr->Mask(NEON_Q)) {
   3329        Op = &Simulator::sqdmull2;
   3330      } else {
   3331        Op = &Simulator::sqdmull;
   3332      }
   3333      break;
   3334    case NEON_SQDMLAL_byelement:
   3335      if (instr->Mask(NEON_Q)) {
   3336        Op = &Simulator::sqdmlal2;
   3337      } else {
   3338        Op = &Simulator::sqdmlal;
   3339      }
   3340      break;
   3341    case NEON_SQDMLSL_byelement:
   3342      if (instr->Mask(NEON_Q)) {
   3343        Op = &Simulator::sqdmlsl2;
   3344      } else {
   3345        Op = &Simulator::sqdmlsl;
   3346      }
   3347      break;
   3348    default:
   3349      index = instr->NEONH();
   3350      if ((instr->FPType() & 1) == 0) {
   3351        index = (index << 1) | instr->NEONL();
   3352      }
   3353 
   3354      vf = nfd.GetVectorFormat(nfd.FPFormatMap());
   3355 
   3356      switch (instr->Mask(NEONByIndexedElementFPMask)) {
   3357        case NEON_FMUL_byelement: Op = &Simulator::fmul; break;
   3358        case NEON_FMLA_byelement: Op = &Simulator::fmla; break;
   3359        case NEON_FMLS_byelement: Op = &Simulator::fmls; break;
   3360        case NEON_FMULX_byelement: Op = &Simulator::fmulx; break;
   3361        default: VIXL_UNIMPLEMENTED();
   3362      }
   3363  }
   3364 
   3365  (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
   3366 }
   3367 
   3368 
   3369 void Simulator::VisitNEONCopy(const Instruction* instr) {
   3370  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularFormatMap());
   3371  VectorFormat vf = nfd.GetVectorFormat();
   3372 
   3373  SimVRegister& rd = vreg(instr->Rd());
   3374  SimVRegister& rn = vreg(instr->Rn());
   3375  int imm5 = instr->ImmNEON5();
   3376  int tz = CountTrailingZeros(imm5, 32);
   3377  int reg_index = imm5 >> (tz + 1);
   3378 
   3379  if (instr->Mask(NEONCopyInsElementMask) == NEON_INS_ELEMENT) {
   3380    int imm4 = instr->ImmNEON4();
   3381    int rn_index = imm4 >> tz;
   3382    ins_element(vf, rd, reg_index, rn, rn_index);
   3383  } else if (instr->Mask(NEONCopyInsGeneralMask) == NEON_INS_GENERAL) {
   3384    ins_immediate(vf, rd, reg_index, xreg(instr->Rn()));
   3385  } else if (instr->Mask(NEONCopyUmovMask) == NEON_UMOV) {
   3386    uint64_t value = LogicVRegister(rn).Uint(vf, reg_index);
   3387    value &= MaxUintFromFormat(vf);
   3388    set_xreg(instr->Rd(), value);
   3389  } else if (instr->Mask(NEONCopyUmovMask) == NEON_SMOV) {
   3390    int64_t value = LogicVRegister(rn).Int(vf, reg_index);
   3391    if (instr->NEONQ()) {
   3392      set_xreg(instr->Rd(), value);
   3393    } else {
   3394      set_wreg(instr->Rd(), (int32_t)value);
   3395    }
   3396  } else if (instr->Mask(NEONCopyDupElementMask) == NEON_DUP_ELEMENT) {
   3397    dup_element(vf, rd, rn, reg_index);
   3398  } else if (instr->Mask(NEONCopyDupGeneralMask) == NEON_DUP_GENERAL) {
   3399    dup_immediate(vf, rd, xreg(instr->Rn()));
   3400  } else {
   3401    VIXL_UNIMPLEMENTED();
   3402  }
   3403 }
   3404 
   3405 
   3406 void Simulator::VisitNEONExtract(const Instruction* instr) {
   3407  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   3408  VectorFormat vf = nfd.GetVectorFormat();
   3409  SimVRegister& rd = vreg(instr->Rd());
   3410  SimVRegister& rn = vreg(instr->Rn());
   3411  SimVRegister& rm = vreg(instr->Rm());
   3412  if (instr->Mask(NEONExtractMask) == NEON_EXT) {
   3413    int index = instr->ImmNEONExt();
   3414    ext(vf, rd, rn, rm, index);
   3415  } else {
   3416    VIXL_UNIMPLEMENTED();
   3417  }
   3418 }
   3419 
   3420 
   3421 void Simulator::NEONLoadStoreMultiStructHelper(const Instruction* instr,
   3422                                               AddrMode addr_mode) {
   3423  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   3424  VectorFormat vf = nfd.GetVectorFormat();
   3425 
   3426  uint64_t addr_base = xreg(instr->Rn(), Reg31IsStackPointer);
   3427  int reg_size = RegisterSizeInBytesFromFormat(vf);
   3428 
   3429  int reg[4];
   3430  uint64_t addr[4];
   3431  for (int i = 0; i < 4; i++) {
   3432    reg[i] = (instr->Rt() + i) % kNumberOfVRegisters;
   3433    addr[i] = addr_base + (i * reg_size);
   3434  }
   3435  int count = 1;
   3436  bool log_read = true;
   3437 
   3438  Instr itype = instr->Mask(NEONLoadStoreMultiStructMask);
   3439  if (((itype == NEON_LD1_1v) || (itype == NEON_LD1_2v) ||
   3440       (itype == NEON_LD1_3v) || (itype == NEON_LD1_4v) ||
   3441       (itype == NEON_ST1_1v) || (itype == NEON_ST1_2v) ||
   3442       (itype == NEON_ST1_3v) || (itype == NEON_ST1_4v)) &&
   3443      (instr->Bits(20, 16) != 0)) {
   3444    VIXL_UNREACHABLE();
   3445  }
   3446 
   3447  // We use the PostIndex mask here, as it works in this case for both Offset
   3448  // and PostIndex addressing.
   3449  switch (instr->Mask(NEONLoadStoreMultiStructPostIndexMask)) {
   3450    case NEON_LD1_4v:
   3451    case NEON_LD1_4v_post: ld1(vf, vreg(reg[3]), addr[3]); count++;
   3452      VIXL_FALLTHROUGH();
   3453    case NEON_LD1_3v:
   3454    case NEON_LD1_3v_post: ld1(vf, vreg(reg[2]), addr[2]); count++;
   3455      VIXL_FALLTHROUGH();
   3456    case NEON_LD1_2v:
   3457    case NEON_LD1_2v_post: ld1(vf, vreg(reg[1]), addr[1]); count++;
   3458      VIXL_FALLTHROUGH();
   3459    case NEON_LD1_1v:
   3460    case NEON_LD1_1v_post:
   3461      ld1(vf, vreg(reg[0]), addr[0]);
   3462      log_read = true;
   3463      break;
   3464    case NEON_ST1_4v:
   3465    case NEON_ST1_4v_post: st1(vf, vreg(reg[3]), addr[3]); count++;
   3466      VIXL_FALLTHROUGH();
   3467    case NEON_ST1_3v:
   3468    case NEON_ST1_3v_post: st1(vf, vreg(reg[2]), addr[2]); count++;
   3469      VIXL_FALLTHROUGH();
   3470    case NEON_ST1_2v:
   3471    case NEON_ST1_2v_post: st1(vf, vreg(reg[1]), addr[1]); count++;
   3472      VIXL_FALLTHROUGH();
   3473    case NEON_ST1_1v:
   3474    case NEON_ST1_1v_post:
   3475      st1(vf, vreg(reg[0]), addr[0]);
   3476      log_read = false;
   3477      break;
   3478    case NEON_LD2_post:
   3479    case NEON_LD2:
   3480      ld2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
   3481      count = 2;
   3482      break;
   3483    case NEON_ST2:
   3484    case NEON_ST2_post:
   3485      st2(vf, vreg(reg[0]), vreg(reg[1]), addr[0]);
   3486      count = 2;
   3487      break;
   3488    case NEON_LD3_post:
   3489    case NEON_LD3:
   3490      ld3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
   3491      count = 3;
   3492      break;
   3493    case NEON_ST3:
   3494    case NEON_ST3_post:
   3495      st3(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), addr[0]);
   3496      count = 3;
   3497      break;
   3498    case NEON_ST4:
   3499    case NEON_ST4_post:
   3500      st4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
   3501          addr[0]);
   3502      count = 4;
   3503      break;
   3504    case NEON_LD4_post:
   3505    case NEON_LD4:
   3506      ld4(vf, vreg(reg[0]), vreg(reg[1]), vreg(reg[2]), vreg(reg[3]),
   3507          addr[0]);
   3508      count = 4;
   3509      break;
   3510    default: VIXL_UNIMPLEMENTED();
   3511  }
   3512 
   3513  // Explicitly log the register update whilst we have type information.
   3514  for (int i = 0; i < count; i++) {
   3515    // For de-interleaving loads, only print the base address.
   3516    int lane_size = LaneSizeInBytesFromFormat(vf);
   3517    PrintRegisterFormat format = GetPrintRegisterFormatTryFP(
   3518        GetPrintRegisterFormatForSize(reg_size, lane_size));
   3519    if (log_read) {
   3520      LogVRead(addr_base, reg[i], format);
   3521    } else {
   3522      LogVWrite(addr_base, reg[i], format);
   3523    }
   3524  }
   3525 
   3526  if (addr_mode == PostIndex) {
   3527    int rm = instr->Rm();
   3528    // The immediate post index addressing mode is indicated by rm = 31.
   3529    // The immediate is implied by the number of vector registers used.
   3530    addr_base += (rm == 31) ? RegisterSizeInBytesFromFormat(vf) * count
   3531                            : xreg(rm);
   3532    set_xreg(instr->Rn(), addr_base);
   3533  } else {
   3534    VIXL_ASSERT(addr_mode == Offset);
   3535  }
   3536 }
   3537 
   3538 
   3539 void Simulator::VisitNEONLoadStoreMultiStruct(const Instruction* instr) {
   3540  NEONLoadStoreMultiStructHelper(instr, Offset);
   3541 }
   3542 
   3543 
   3544 void Simulator::VisitNEONLoadStoreMultiStructPostIndex(
   3545    const Instruction* instr) {
   3546  NEONLoadStoreMultiStructHelper(instr, PostIndex);
   3547 }
   3548 
   3549 
   3550 void Simulator::NEONLoadStoreSingleStructHelper(const Instruction* instr,
   3551                                                AddrMode addr_mode) {
   3552  uint64_t addr = xreg(instr->Rn(), Reg31IsStackPointer);
   3553  int rt = instr->Rt();
   3554 
   3555  Instr itype = instr->Mask(NEONLoadStoreSingleStructMask);
   3556  if (((itype == NEON_LD1_b) || (itype == NEON_LD1_h) ||
   3557       (itype == NEON_LD1_s) || (itype == NEON_LD1_d)) &&
   3558      (instr->Bits(20, 16) != 0)) {
   3559    VIXL_UNREACHABLE();
   3560  }
   3561 
   3562  // We use the PostIndex mask here, as it works in this case for both Offset
   3563  // and PostIndex addressing.
   3564  bool do_load = false;
   3565 
   3566  bool replicating = false;
   3567 
   3568  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LoadStoreFormatMap());
   3569  VectorFormat vf_t = nfd.GetVectorFormat();
   3570 
   3571  VectorFormat vf = kFormat16B;
   3572  switch (instr->Mask(NEONLoadStoreSingleStructPostIndexMask)) {
   3573    case NEON_LD1_b:
   3574    case NEON_LD1_b_post:
   3575    case NEON_LD2_b:
   3576    case NEON_LD2_b_post:
   3577    case NEON_LD3_b:
   3578    case NEON_LD3_b_post:
   3579    case NEON_LD4_b:
   3580    case NEON_LD4_b_post: do_load = true;
   3581      VIXL_FALLTHROUGH();
   3582    case NEON_ST1_b:
   3583    case NEON_ST1_b_post:
   3584    case NEON_ST2_b:
   3585    case NEON_ST2_b_post:
   3586    case NEON_ST3_b:
   3587    case NEON_ST3_b_post:
   3588    case NEON_ST4_b:
   3589    case NEON_ST4_b_post: break;
   3590 
   3591    case NEON_LD1_h:
   3592    case NEON_LD1_h_post:
   3593    case NEON_LD2_h:
   3594    case NEON_LD2_h_post:
   3595    case NEON_LD3_h:
   3596    case NEON_LD3_h_post:
   3597    case NEON_LD4_h:
   3598    case NEON_LD4_h_post: do_load = true;
   3599      VIXL_FALLTHROUGH();
   3600    case NEON_ST1_h:
   3601    case NEON_ST1_h_post:
   3602    case NEON_ST2_h:
   3603    case NEON_ST2_h_post:
   3604    case NEON_ST3_h:
   3605    case NEON_ST3_h_post:
   3606    case NEON_ST4_h:
   3607    case NEON_ST4_h_post: vf = kFormat8H; break;
   3608    case NEON_LD1_s:
   3609    case NEON_LD1_s_post:
   3610    case NEON_LD2_s:
   3611    case NEON_LD2_s_post:
   3612    case NEON_LD3_s:
   3613    case NEON_LD3_s_post:
   3614    case NEON_LD4_s:
   3615    case NEON_LD4_s_post: do_load = true;
   3616      VIXL_FALLTHROUGH();
   3617    case NEON_ST1_s:
   3618    case NEON_ST1_s_post:
   3619    case NEON_ST2_s:
   3620    case NEON_ST2_s_post:
   3621    case NEON_ST3_s:
   3622    case NEON_ST3_s_post:
   3623    case NEON_ST4_s:
   3624    case NEON_ST4_s_post: {
   3625      VIXL_STATIC_ASSERT((NEON_LD1_s | (1 << NEONLSSize_offset)) == NEON_LD1_d);
   3626      VIXL_STATIC_ASSERT(
   3627          (NEON_LD1_s_post | (1 << NEONLSSize_offset)) == NEON_LD1_d_post);
   3628      VIXL_STATIC_ASSERT((NEON_ST1_s | (1 << NEONLSSize_offset)) == NEON_ST1_d);
   3629      VIXL_STATIC_ASSERT(
   3630          (NEON_ST1_s_post | (1 << NEONLSSize_offset)) == NEON_ST1_d_post);
   3631      vf = ((instr->NEONLSSize() & 1) == 0) ? kFormat4S : kFormat2D;
   3632      break;
   3633    }
   3634 
   3635    case NEON_LD1R:
   3636    case NEON_LD1R_post:
   3637    case NEON_LD2R:
   3638    case NEON_LD2R_post:
   3639    case NEON_LD3R:
   3640    case NEON_LD3R_post:
   3641    case NEON_LD4R:
   3642    case NEON_LD4R_post: {
   3643      vf = vf_t;
   3644      do_load = true;
   3645      replicating = true;
   3646      break;
   3647    }
   3648    default: VIXL_UNIMPLEMENTED();
   3649  }
   3650 
   3651  PrintRegisterFormat print_format =
   3652      GetPrintRegisterFormatTryFP(GetPrintRegisterFormat(vf));
   3653  // Make sure that the print_format only includes a single lane.
   3654  print_format =
   3655      static_cast<PrintRegisterFormat>(print_format & ~kPrintRegAsVectorMask);
   3656 
   3657  int esize = LaneSizeInBytesFromFormat(vf);
   3658  int index_shift = LaneSizeInBytesLog2FromFormat(vf);
   3659  int lane = instr->NEONLSIndex(index_shift);
   3660  int scale = 0;
   3661  int rt2 = (rt + 1) % kNumberOfVRegisters;
   3662  int rt3 = (rt2 + 1) % kNumberOfVRegisters;
   3663  int rt4 = (rt3 + 1) % kNumberOfVRegisters;
   3664  switch (instr->Mask(NEONLoadStoreSingleLenMask)) {
   3665    case NEONLoadStoreSingle1:
   3666      scale = 1;
   3667      if (do_load) {
   3668        if (replicating) {
   3669          ld1r(vf, vreg(rt), addr);
   3670        } else  {
   3671          ld1(vf, vreg(rt), lane, addr);
   3672        }
   3673        LogVRead(addr, rt, print_format, lane);
   3674      } else {
   3675        st1(vf, vreg(rt), lane, addr);
   3676        LogVWrite(addr, rt, print_format, lane);
   3677      }
   3678      break;
   3679    case NEONLoadStoreSingle2:
   3680      scale = 2;
   3681      if (do_load) {
   3682        if (replicating) {
   3683          ld2r(vf, vreg(rt), vreg(rt2), addr);
   3684        } else {
   3685          ld2(vf, vreg(rt), vreg(rt2), lane, addr);
   3686        }
   3687        LogVRead(addr, rt, print_format, lane);
   3688        LogVRead(addr + esize, rt2, print_format, lane);
   3689      } else {
   3690        st2(vf, vreg(rt), vreg(rt2), lane, addr);
   3691        LogVWrite(addr, rt, print_format, lane);
   3692        LogVWrite(addr + esize, rt2, print_format, lane);
   3693      }
   3694      break;
   3695    case NEONLoadStoreSingle3:
   3696      scale = 3;
   3697      if (do_load) {
   3698        if (replicating) {
   3699          ld3r(vf, vreg(rt), vreg(rt2), vreg(rt3), addr);
   3700        } else {
   3701          ld3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
   3702        }
   3703        LogVRead(addr, rt, print_format, lane);
   3704        LogVRead(addr + esize, rt2, print_format, lane);
   3705        LogVRead(addr + (2 * esize), rt3, print_format, lane);
   3706      } else {
   3707        st3(vf, vreg(rt), vreg(rt2), vreg(rt3), lane, addr);
   3708        LogVWrite(addr, rt, print_format, lane);
   3709        LogVWrite(addr + esize, rt2, print_format, lane);
   3710        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
   3711      }
   3712      break;
   3713    case NEONLoadStoreSingle4:
   3714      scale = 4;
   3715      if (do_load) {
   3716        if (replicating) {
   3717          ld4r(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), addr);
   3718        } else {
   3719          ld4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
   3720        }
   3721        LogVRead(addr, rt, print_format, lane);
   3722        LogVRead(addr + esize, rt2, print_format, lane);
   3723        LogVRead(addr + (2 * esize), rt3, print_format, lane);
   3724        LogVRead(addr + (3 * esize), rt4, print_format, lane);
   3725      } else {
   3726        st4(vf, vreg(rt), vreg(rt2), vreg(rt3), vreg(rt4), lane, addr);
   3727        LogVWrite(addr, rt, print_format, lane);
   3728        LogVWrite(addr + esize, rt2, print_format, lane);
   3729        LogVWrite(addr + (2 * esize), rt3, print_format, lane);
   3730        LogVWrite(addr + (3 * esize), rt4, print_format, lane);
   3731      }
   3732      break;
   3733    default: VIXL_UNIMPLEMENTED();
   3734  }
   3735 
   3736  if (addr_mode == PostIndex) {
   3737    int rm = instr->Rm();
   3738    int lane_size = LaneSizeInBytesFromFormat(vf);
   3739    set_xreg(instr->Rn(), addr + ((rm == 31) ? (scale * lane_size) : xreg(rm)));
   3740  }
   3741 }
   3742 
   3743 
   3744 void Simulator::VisitNEONLoadStoreSingleStruct(const Instruction* instr) {
   3745  NEONLoadStoreSingleStructHelper(instr, Offset);
   3746 }
   3747 
   3748 
   3749 void Simulator::VisitNEONLoadStoreSingleStructPostIndex(
   3750    const Instruction* instr) {
   3751  NEONLoadStoreSingleStructHelper(instr, PostIndex);
   3752 }
   3753 
   3754 
   3755 void Simulator::VisitNEONModifiedImmediate(const Instruction* instr) {
   3756  SimVRegister& rd = vreg(instr->Rd());
   3757  int cmode = instr->NEONCmode();
   3758  int cmode_3_1 = (cmode >> 1) & 7;
   3759  int cmode_3 = (cmode >> 3) & 1;
   3760  int cmode_2 = (cmode >> 2) & 1;
   3761  int cmode_1 = (cmode >> 1) & 1;
   3762  int cmode_0 = cmode & 1;
   3763  int q = instr->NEONQ();
   3764  int op_bit = instr->NEONModImmOp();
   3765  uint64_t imm8  = instr->ImmNEONabcdefgh();
   3766 
   3767  // Find the format and immediate value
   3768  uint64_t imm = 0;
   3769  VectorFormat vform = kFormatUndefined;
   3770  switch (cmode_3_1) {
   3771    case 0x0:
   3772    case 0x1:
   3773    case 0x2:
   3774    case 0x3:
   3775      vform = (q == 1) ? kFormat4S : kFormat2S;
   3776      imm = imm8 << (8 * cmode_3_1);
   3777      break;
   3778    case 0x4:
   3779    case 0x5:
   3780      vform = (q == 1) ? kFormat8H : kFormat4H;
   3781      imm = imm8 << (8 * cmode_1);
   3782      break;
   3783    case 0x6:
   3784      vform = (q == 1) ? kFormat4S : kFormat2S;
   3785      if (cmode_0 == 0) {
   3786        imm = imm8 << 8  | 0x000000ff;
   3787      } else {
   3788        imm = imm8 << 16 | 0x0000ffff;
   3789      }
   3790      break;
   3791    case 0x7:
   3792      if (cmode_0 == 0 && op_bit == 0) {
   3793        vform = q ? kFormat16B : kFormat8B;
   3794        imm = imm8;
   3795      } else if (cmode_0 == 0 && op_bit == 1) {
   3796        vform = q ? kFormat2D : kFormat1D;
   3797        imm = 0;
   3798        for (int i = 0; i < 8; ++i) {
   3799          if (imm8 & (1ULL << i)) {
   3800            imm |= (UINT64_C(0xff) << (8 * i));
   3801          }
   3802        }
   3803      } else {  // cmode_0 == 1, cmode == 0xf.
   3804        if (op_bit == 0) {
   3805          vform = q ? kFormat4S : kFormat2S;
   3806          imm = FloatToRawbits(instr->ImmNEONFP32());
   3807        } else if (q == 1) {
   3808          vform = kFormat2D;
   3809          imm = DoubleToRawbits(instr->ImmNEONFP64());
   3810        } else {
   3811          VIXL_ASSERT((q == 0) && (op_bit == 1) && (cmode == 0xf));
   3812          VisitUnallocated(instr);
   3813        }
   3814      }
   3815      break;
   3816    default: VIXL_UNREACHABLE(); break;
   3817  }
   3818 
   3819  // Find the operation
   3820  NEONModifiedImmediateOp op;
   3821  if (cmode_3 == 0) {
   3822    if (cmode_0 == 0) {
   3823      op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3824    } else {  // cmode<0> == '1'
   3825      op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   3826    }
   3827  } else {  // cmode<3> == '1'
   3828    if (cmode_2 == 0) {
   3829      if (cmode_0 == 0) {
   3830        op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3831      } else {  // cmode<0> == '1'
   3832        op = op_bit ? NEONModifiedImmediate_BIC : NEONModifiedImmediate_ORR;
   3833      }
   3834    } else {  // cmode<2> == '1'
   3835       if (cmode_1 == 0) {
   3836         op = op_bit ? NEONModifiedImmediate_MVNI : NEONModifiedImmediate_MOVI;
   3837       } else {  // cmode<1> == '1'
   3838         if (cmode_0 == 0) {
   3839           op = NEONModifiedImmediate_MOVI;
   3840         } else {  // cmode<0> == '1'
   3841           op = NEONModifiedImmediate_MOVI;
   3842         }
   3843       }
   3844    }
   3845  }
   3846 
   3847  // Call the logic function
   3848  if (op == NEONModifiedImmediate_ORR) {
   3849    orr(vform, rd, rd, imm);
   3850  } else if (op == NEONModifiedImmediate_BIC) {
   3851    bic(vform, rd, rd, imm);
   3852  } else  if (op == NEONModifiedImmediate_MOVI) {
   3853    movi(vform, rd, imm);
   3854  } else if (op == NEONModifiedImmediate_MVNI) {
   3855    mvni(vform, rd, imm);
   3856  } else {
   3857    VisitUnimplemented(instr);
   3858  }
   3859 }
   3860 
   3861 
   3862 void Simulator::VisitNEONScalar2RegMisc(const Instruction* instr) {
   3863  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   3864  VectorFormat vf = nfd.GetVectorFormat();
   3865 
   3866  SimVRegister& rd = vreg(instr->Rd());
   3867  SimVRegister& rn = vreg(instr->Rn());
   3868 
   3869  if (instr->Mask(NEON2RegMiscOpcode) <= NEON_NEG_scalar_opcode) {
   3870    // These instructions all use a two bit size field, except NOT and RBIT,
   3871    // which use the field to encode the operation.
   3872    switch (instr->Mask(NEONScalar2RegMiscMask)) {
   3873      case NEON_CMEQ_zero_scalar: cmp(vf, rd, rn, 0, eq); break;
   3874      case NEON_CMGE_zero_scalar: cmp(vf, rd, rn, 0, ge); break;
   3875      case NEON_CMGT_zero_scalar: cmp(vf, rd, rn, 0, gt); break;
   3876      case NEON_CMLT_zero_scalar: cmp(vf, rd, rn, 0, lt); break;
   3877      case NEON_CMLE_zero_scalar: cmp(vf, rd, rn, 0, le); break;
   3878      case NEON_ABS_scalar:       abs(vf, rd, rn); break;
   3879      case NEON_SQABS_scalar:     abs(vf, rd, rn).SignedSaturate(vf); break;
   3880      case NEON_NEG_scalar:       neg(vf, rd, rn); break;
   3881      case NEON_SQNEG_scalar:     neg(vf, rd, rn).SignedSaturate(vf); break;
   3882      case NEON_SUQADD_scalar:    suqadd(vf, rd, rn); break;
   3883      case NEON_USQADD_scalar:    usqadd(vf, rd, rn); break;
   3884      default: VIXL_UNIMPLEMENTED(); break;
   3885    }
   3886  } else {
   3887    VectorFormat fpf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   3888    FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   3889 
   3890    // These instructions all use a one bit size field, except SQXTUN, SQXTN
   3891    // and UQXTN, which use a two bit size field.
   3892    switch (instr->Mask(NEONScalar2RegMiscFPMask)) {
   3893      case NEON_FRECPE_scalar:     frecpe(fpf, rd, rn, fpcr_rounding); break;
   3894      case NEON_FRECPX_scalar:     frecpx(fpf, rd, rn); break;
   3895      case NEON_FRSQRTE_scalar:    frsqrte(fpf, rd, rn); break;
   3896      case NEON_FCMGT_zero_scalar: fcmp_zero(fpf, rd, rn, gt); break;
   3897      case NEON_FCMGE_zero_scalar: fcmp_zero(fpf, rd, rn, ge); break;
   3898      case NEON_FCMEQ_zero_scalar: fcmp_zero(fpf, rd, rn, eq); break;
   3899      case NEON_FCMLE_zero_scalar: fcmp_zero(fpf, rd, rn, le); break;
   3900      case NEON_FCMLT_zero_scalar: fcmp_zero(fpf, rd, rn, lt); break;
   3901      case NEON_SCVTF_scalar:      scvtf(fpf, rd, rn, 0, fpcr_rounding); break;
   3902      case NEON_UCVTF_scalar:      ucvtf(fpf, rd, rn, 0, fpcr_rounding); break;
   3903      case NEON_FCVTNS_scalar: fcvts(fpf, rd, rn, FPTieEven); break;
   3904      case NEON_FCVTNU_scalar: fcvtu(fpf, rd, rn, FPTieEven); break;
   3905      case NEON_FCVTPS_scalar: fcvts(fpf, rd, rn, FPPositiveInfinity); break;
   3906      case NEON_FCVTPU_scalar: fcvtu(fpf, rd, rn, FPPositiveInfinity); break;
   3907      case NEON_FCVTMS_scalar: fcvts(fpf, rd, rn, FPNegativeInfinity); break;
   3908      case NEON_FCVTMU_scalar: fcvtu(fpf, rd, rn, FPNegativeInfinity); break;
   3909      case NEON_FCVTZS_scalar: fcvts(fpf, rd, rn, FPZero); break;
   3910      case NEON_FCVTZU_scalar: fcvtu(fpf, rd, rn, FPZero); break;
   3911      case NEON_FCVTAS_scalar: fcvts(fpf, rd, rn, FPTieAway); break;
   3912      case NEON_FCVTAU_scalar: fcvtu(fpf, rd, rn, FPTieAway); break;
   3913      case NEON_FCVTXN_scalar:
   3914        // Unlike all of the other FP instructions above, fcvtxn encodes dest
   3915        // size S as size<0>=1. There's only one case, so we ignore the form.
   3916        VIXL_ASSERT(instr->Bit(22) == 1);
   3917        fcvtxn(kFormatS, rd, rn);
   3918        break;
   3919      default:
   3920        switch (instr->Mask(NEONScalar2RegMiscMask)) {
   3921          case NEON_SQXTN_scalar:  sqxtn(vf, rd, rn); break;
   3922          case NEON_UQXTN_scalar:  uqxtn(vf, rd, rn); break;
   3923          case NEON_SQXTUN_scalar: sqxtun(vf, rd, rn); break;
   3924          default:
   3925            VIXL_UNIMPLEMENTED();
   3926        }
   3927    }
   3928  }
   3929 }
   3930 
   3931 
   3932 void Simulator::VisitNEONScalar3Diff(const Instruction* instr) {
   3933  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   3934  VectorFormat vf = nfd.GetVectorFormat();
   3935 
   3936  SimVRegister& rd = vreg(instr->Rd());
   3937  SimVRegister& rn = vreg(instr->Rn());
   3938  SimVRegister& rm = vreg(instr->Rm());
   3939  switch (instr->Mask(NEONScalar3DiffMask)) {
   3940    case NEON_SQDMLAL_scalar: sqdmlal(vf, rd, rn, rm); break;
   3941    case NEON_SQDMLSL_scalar: sqdmlsl(vf, rd, rn, rm); break;
   3942    case NEON_SQDMULL_scalar: sqdmull(vf, rd, rn, rm); break;
   3943    default:
   3944      VIXL_UNIMPLEMENTED();
   3945  }
   3946 }
   3947 
   3948 
   3949 void Simulator::VisitNEONScalar3Same(const Instruction* instr) {
   3950  NEONFormatDecoder nfd(instr, NEONFormatDecoder::ScalarFormatMap());
   3951  VectorFormat vf = nfd.GetVectorFormat();
   3952 
   3953  SimVRegister& rd = vreg(instr->Rd());
   3954  SimVRegister& rn = vreg(instr->Rn());
   3955  SimVRegister& rm = vreg(instr->Rm());
   3956 
   3957  if (instr->Mask(NEONScalar3SameFPFMask) == NEONScalar3SameFPFixed) {
   3958    vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   3959    switch (instr->Mask(NEONScalar3SameFPMask)) {
   3960      case NEON_FMULX_scalar:   fmulx(vf, rd, rn, rm); break;
   3961      case NEON_FACGE_scalar:   fabscmp(vf, rd, rn, rm, ge); break;
   3962      case NEON_FACGT_scalar:   fabscmp(vf, rd, rn, rm, gt); break;
   3963      case NEON_FCMEQ_scalar:   fcmp(vf, rd, rn, rm, eq); break;
   3964      case NEON_FCMGE_scalar:   fcmp(vf, rd, rn, rm, ge); break;
   3965      case NEON_FCMGT_scalar:   fcmp(vf, rd, rn, rm, gt); break;
   3966      case NEON_FRECPS_scalar:  frecps(vf, rd, rn, rm); break;
   3967      case NEON_FRSQRTS_scalar: frsqrts(vf, rd, rn, rm); break;
   3968      case NEON_FABD_scalar:    fabd(vf, rd, rn, rm); break;
   3969      default:
   3970        VIXL_UNIMPLEMENTED();
   3971    }
   3972  } else {
   3973    switch (instr->Mask(NEONScalar3SameMask)) {
   3974      case NEON_ADD_scalar:      add(vf, rd, rn, rm); break;
   3975      case NEON_SUB_scalar:      sub(vf, rd, rn, rm); break;
   3976      case NEON_CMEQ_scalar:     cmp(vf, rd, rn, rm, eq); break;
   3977      case NEON_CMGE_scalar:     cmp(vf, rd, rn, rm, ge); break;
   3978      case NEON_CMGT_scalar:     cmp(vf, rd, rn, rm, gt); break;
   3979      case NEON_CMHI_scalar:     cmp(vf, rd, rn, rm, hi); break;
   3980      case NEON_CMHS_scalar:     cmp(vf, rd, rn, rm, hs); break;
   3981      case NEON_CMTST_scalar:    cmptst(vf, rd, rn, rm); break;
   3982      case NEON_USHL_scalar:     ushl(vf, rd, rn, rm); break;
   3983      case NEON_SSHL_scalar:     sshl(vf, rd, rn, rm); break;
   3984      case NEON_SQDMULH_scalar:  sqdmulh(vf, rd, rn, rm); break;
   3985      case NEON_SQRDMULH_scalar: sqrdmulh(vf, rd, rn, rm); break;
   3986      case NEON_UQADD_scalar:
   3987        add(vf, rd, rn, rm).UnsignedSaturate(vf);
   3988        break;
   3989      case NEON_SQADD_scalar:
   3990        add(vf, rd, rn, rm).SignedSaturate(vf);
   3991        break;
   3992      case NEON_UQSUB_scalar:
   3993        sub(vf, rd, rn, rm).UnsignedSaturate(vf);
   3994        break;
   3995      case NEON_SQSUB_scalar:
   3996        sub(vf, rd, rn, rm).SignedSaturate(vf);
   3997        break;
   3998      case NEON_UQSHL_scalar:
   3999        ushl(vf, rd, rn, rm).UnsignedSaturate(vf);
   4000        break;
   4001      case NEON_SQSHL_scalar:
   4002        sshl(vf, rd, rn, rm).SignedSaturate(vf);
   4003        break;
   4004      case NEON_URSHL_scalar:
   4005        ushl(vf, rd, rn, rm).Round(vf);
   4006        break;
   4007      case NEON_SRSHL_scalar:
   4008        sshl(vf, rd, rn, rm).Round(vf);
   4009        break;
   4010      case NEON_UQRSHL_scalar:
   4011        ushl(vf, rd, rn, rm).Round(vf).UnsignedSaturate(vf);
   4012        break;
   4013      case NEON_SQRSHL_scalar:
   4014        sshl(vf, rd, rn, rm).Round(vf).SignedSaturate(vf);
   4015        break;
   4016      default:
   4017        VIXL_UNIMPLEMENTED();
   4018    }
   4019  }
   4020 }
   4021 
   4022 
   4023 void Simulator::VisitNEONScalarByIndexedElement(const Instruction* instr) {
   4024  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LongScalarFormatMap());
   4025  VectorFormat vf = nfd.GetVectorFormat();
   4026  VectorFormat vf_r = nfd.GetVectorFormat(nfd.ScalarFormatMap());
   4027 
   4028  SimVRegister& rd = vreg(instr->Rd());
   4029  SimVRegister& rn = vreg(instr->Rn());
   4030  ByElementOp Op = NULL;
   4031 
   4032  int rm_reg = instr->Rm();
   4033  int index = (instr->NEONH() << 1) | instr->NEONL();
   4034  if (instr->NEONSize() == 1) {
   4035    rm_reg &= 0xf;
   4036    index = (index << 1) | instr->NEONM();
   4037  }
   4038 
   4039  switch (instr->Mask(NEONScalarByIndexedElementMask)) {
   4040    case NEON_SQDMULL_byelement_scalar: Op = &Simulator::sqdmull; break;
   4041    case NEON_SQDMLAL_byelement_scalar: Op = &Simulator::sqdmlal; break;
   4042    case NEON_SQDMLSL_byelement_scalar: Op = &Simulator::sqdmlsl; break;
   4043    case NEON_SQDMULH_byelement_scalar:
   4044      Op = &Simulator::sqdmulh;
   4045      vf = vf_r;
   4046      break;
   4047    case NEON_SQRDMULH_byelement_scalar:
   4048      Op = &Simulator::sqrdmulh;
   4049      vf = vf_r;
   4050      break;
   4051    default:
   4052      vf = nfd.GetVectorFormat(nfd.FPScalarFormatMap());
   4053      index = instr->NEONH();
   4054      if ((instr->FPType() & 1) == 0) {
   4055        index = (index << 1) | instr->NEONL();
   4056      }
   4057      switch (instr->Mask(NEONScalarByIndexedElementFPMask)) {
   4058        case NEON_FMUL_byelement_scalar: Op = &Simulator::fmul; break;
   4059        case NEON_FMLA_byelement_scalar: Op = &Simulator::fmla; break;
   4060        case NEON_FMLS_byelement_scalar: Op = &Simulator::fmls; break;
   4061        case NEON_FMULX_byelement_scalar: Op = &Simulator::fmulx; break;
   4062        default: VIXL_UNIMPLEMENTED();
   4063      }
   4064  }
   4065 
   4066  (this->*Op)(vf, rd, rn, vreg(rm_reg), index);
   4067 }
   4068 
   4069 
   4070 void Simulator::VisitNEONScalarCopy(const Instruction* instr) {
   4071  NEONFormatDecoder nfd(instr, NEONFormatDecoder::TriangularScalarFormatMap());
   4072  VectorFormat vf = nfd.GetVectorFormat();
   4073 
   4074  SimVRegister& rd = vreg(instr->Rd());
   4075  SimVRegister& rn = vreg(instr->Rn());
   4076 
   4077  if (instr->Mask(NEONScalarCopyMask) == NEON_DUP_ELEMENT_scalar) {
   4078    int imm5 = instr->ImmNEON5();
   4079    int tz = CountTrailingZeros(imm5, 32);
   4080    int rn_index = imm5 >> (tz + 1);
   4081    dup_element(vf, rd, rn, rn_index);
   4082  } else {
   4083    VIXL_UNIMPLEMENTED();
   4084  }
   4085 }
   4086 
   4087 
   4088 void Simulator::VisitNEONScalarPairwise(const Instruction* instr) {
   4089  NEONFormatDecoder nfd(instr, NEONFormatDecoder::FPScalarFormatMap());
   4090  VectorFormat vf = nfd.GetVectorFormat();
   4091 
   4092  SimVRegister& rd = vreg(instr->Rd());
   4093  SimVRegister& rn = vreg(instr->Rn());
   4094  switch (instr->Mask(NEONScalarPairwiseMask)) {
   4095    case NEON_ADDP_scalar:    addp(vf, rd, rn); break;
   4096    case NEON_FADDP_scalar:   faddp(vf, rd, rn); break;
   4097    case NEON_FMAXP_scalar:   fmaxp(vf, rd, rn); break;
   4098    case NEON_FMAXNMP_scalar: fmaxnmp(vf, rd, rn); break;
   4099    case NEON_FMINP_scalar:   fminp(vf, rd, rn); break;
   4100    case NEON_FMINNMP_scalar: fminnmp(vf, rd, rn); break;
   4101    default:
   4102      VIXL_UNIMPLEMENTED();
   4103  }
   4104 }
   4105 
   4106 
   4107 void Simulator::VisitNEONScalarShiftImmediate(const Instruction* instr) {
   4108  SimVRegister& rd = vreg(instr->Rd());
   4109  SimVRegister& rn = vreg(instr->Rn());
   4110  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   4111 
   4112  static const NEONFormatMap map = {
   4113    {22, 21, 20, 19},
   4114    {NF_UNDEF, NF_B, NF_H, NF_H, NF_S, NF_S, NF_S, NF_S,
   4115     NF_D,     NF_D, NF_D, NF_D, NF_D, NF_D, NF_D, NF_D}
   4116  };
   4117  NEONFormatDecoder nfd(instr, &map);
   4118  VectorFormat vf = nfd.GetVectorFormat();
   4119 
   4120  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
   4121  int immhimmb = instr->ImmNEONImmhImmb();
   4122  int right_shift = (16 << highestSetBit) - immhimmb;
   4123  int left_shift = immhimmb - (8 << highestSetBit);
   4124  switch (instr->Mask(NEONScalarShiftImmediateMask)) {
   4125    case NEON_SHL_scalar:       shl(vf, rd, rn, left_shift); break;
   4126    case NEON_SLI_scalar:       sli(vf, rd, rn, left_shift); break;
   4127    case NEON_SQSHL_imm_scalar: sqshl(vf, rd, rn, left_shift); break;
   4128    case NEON_UQSHL_imm_scalar: uqshl(vf, rd, rn, left_shift); break;
   4129    case NEON_SQSHLU_scalar:    sqshlu(vf, rd, rn, left_shift); break;
   4130    case NEON_SRI_scalar:       sri(vf, rd, rn, right_shift); break;
   4131    case NEON_SSHR_scalar:      sshr(vf, rd, rn, right_shift); break;
   4132    case NEON_USHR_scalar:      ushr(vf, rd, rn, right_shift); break;
   4133    case NEON_SRSHR_scalar:     sshr(vf, rd, rn, right_shift).Round(vf); break;
   4134    case NEON_URSHR_scalar:     ushr(vf, rd, rn, right_shift).Round(vf); break;
   4135    case NEON_SSRA_scalar:      ssra(vf, rd, rn, right_shift); break;
   4136    case NEON_USRA_scalar:      usra(vf, rd, rn, right_shift); break;
   4137    case NEON_SRSRA_scalar:     srsra(vf, rd, rn, right_shift); break;
   4138    case NEON_URSRA_scalar:     ursra(vf, rd, rn, right_shift); break;
   4139    case NEON_UQSHRN_scalar:    uqshrn(vf, rd, rn, right_shift); break;
   4140    case NEON_UQRSHRN_scalar:   uqrshrn(vf, rd, rn, right_shift); break;
   4141    case NEON_SQSHRN_scalar:    sqshrn(vf, rd, rn, right_shift); break;
   4142    case NEON_SQRSHRN_scalar:   sqrshrn(vf, rd, rn, right_shift); break;
   4143    case NEON_SQSHRUN_scalar:   sqshrun(vf, rd, rn, right_shift); break;
   4144    case NEON_SQRSHRUN_scalar:  sqrshrun(vf, rd, rn, right_shift); break;
   4145    case NEON_FCVTZS_imm_scalar: fcvts(vf, rd, rn, FPZero, right_shift); break;
   4146    case NEON_FCVTZU_imm_scalar: fcvtu(vf, rd, rn, FPZero, right_shift); break;
   4147    case NEON_SCVTF_imm_scalar:
   4148      scvtf(vf, rd, rn, right_shift, fpcr_rounding);
   4149      break;
   4150    case NEON_UCVTF_imm_scalar:
   4151      ucvtf(vf, rd, rn, right_shift, fpcr_rounding);
   4152      break;
   4153    default:
   4154      VIXL_UNIMPLEMENTED();
   4155  }
   4156 }
   4157 
   4158 
   4159 void Simulator::VisitNEONShiftImmediate(const Instruction* instr) {
   4160  SimVRegister& rd = vreg(instr->Rd());
   4161  SimVRegister& rn = vreg(instr->Rn());
   4162  FPRounding fpcr_rounding = static_cast<FPRounding>(fpcr().RMode());
   4163 
   4164  // 00010->8B, 00011->16B, 001x0->4H, 001x1->8H,
   4165  // 01xx0->2S, 01xx1->4S, 1xxx1->2D, all others undefined.
   4166  static const NEONFormatMap map = {
   4167    {22, 21, 20, 19, 30},
   4168    {NF_UNDEF, NF_UNDEF, NF_8B,    NF_16B, NF_4H,    NF_8H, NF_4H,    NF_8H,
   4169     NF_2S,    NF_4S,    NF_2S,    NF_4S,  NF_2S,    NF_4S, NF_2S,    NF_4S,
   4170     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D,
   4171     NF_UNDEF, NF_2D,    NF_UNDEF, NF_2D,  NF_UNDEF, NF_2D, NF_UNDEF, NF_2D}
   4172  };
   4173  NEONFormatDecoder nfd(instr, &map);
   4174  VectorFormat vf = nfd.GetVectorFormat();
   4175 
   4176  // 0001->8H, 001x->4S, 01xx->2D, all others undefined.
   4177  static const NEONFormatMap map_l = {
   4178    {22, 21, 20, 19},
   4179    {NF_UNDEF, NF_8H, NF_4S, NF_4S, NF_2D, NF_2D, NF_2D, NF_2D}
   4180  };
   4181  VectorFormat vf_l = nfd.GetVectorFormat(&map_l);
   4182 
   4183  int highestSetBit = HighestSetBitPosition(instr->ImmNEONImmh());
   4184  int immhimmb = instr->ImmNEONImmhImmb();
   4185  int right_shift = (16 << highestSetBit) - immhimmb;
   4186  int left_shift = immhimmb - (8 << highestSetBit);
   4187 
   4188  switch (instr->Mask(NEONShiftImmediateMask)) {
   4189    case NEON_SHL:    shl(vf, rd, rn, left_shift); break;
   4190    case NEON_SLI:    sli(vf, rd, rn, left_shift); break;
   4191    case NEON_SQSHLU: sqshlu(vf, rd, rn, left_shift); break;
   4192    case NEON_SRI:    sri(vf, rd, rn, right_shift); break;
   4193    case NEON_SSHR:   sshr(vf, rd, rn, right_shift); break;
   4194    case NEON_USHR:   ushr(vf, rd, rn, right_shift); break;
   4195    case NEON_SRSHR:  sshr(vf, rd, rn, right_shift).Round(vf); break;
   4196    case NEON_URSHR:  ushr(vf, rd, rn, right_shift).Round(vf); break;
   4197    case NEON_SSRA:   ssra(vf, rd, rn, right_shift); break;
   4198    case NEON_USRA:   usra(vf, rd, rn, right_shift); break;
   4199    case NEON_SRSRA:  srsra(vf, rd, rn, right_shift); break;
   4200    case NEON_URSRA:  ursra(vf, rd, rn, right_shift); break;
   4201    case NEON_SQSHL_imm: sqshl(vf, rd, rn, left_shift); break;
   4202    case NEON_UQSHL_imm: uqshl(vf, rd, rn, left_shift); break;
   4203    case NEON_SCVTF_imm: scvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
   4204    case NEON_UCVTF_imm: ucvtf(vf, rd, rn, right_shift, fpcr_rounding); break;
   4205    case NEON_FCVTZS_imm: fcvts(vf, rd, rn, FPZero, right_shift); break;
   4206    case NEON_FCVTZU_imm: fcvtu(vf, rd, rn, FPZero, right_shift); break;
   4207    case NEON_SSHLL:
   4208      vf = vf_l;
   4209      if (instr->Mask(NEON_Q)) {
   4210        sshll2(vf, rd, rn, left_shift);
   4211      } else {
   4212        sshll(vf, rd, rn, left_shift);
   4213      }
   4214      break;
   4215    case NEON_USHLL:
   4216      vf = vf_l;
   4217      if (instr->Mask(NEON_Q)) {
   4218        ushll2(vf, rd, rn, left_shift);
   4219      } else {
   4220        ushll(vf, rd, rn, left_shift);
   4221      }
   4222      break;
   4223    case NEON_SHRN:
   4224      if (instr->Mask(NEON_Q)) {
   4225        shrn2(vf, rd, rn, right_shift);
   4226      } else {
   4227        shrn(vf, rd, rn, right_shift);
   4228      }
   4229      break;
   4230    case NEON_RSHRN:
   4231      if (instr->Mask(NEON_Q)) {
   4232        rshrn2(vf, rd, rn, right_shift);
   4233      } else {
   4234        rshrn(vf, rd, rn, right_shift);
   4235      }
   4236      break;
   4237    case NEON_UQSHRN:
   4238      if (instr->Mask(NEON_Q)) {
   4239        uqshrn2(vf, rd, rn, right_shift);
   4240      } else {
   4241        uqshrn(vf, rd, rn, right_shift);
   4242      }
   4243      break;
   4244    case NEON_UQRSHRN:
   4245      if (instr->Mask(NEON_Q)) {
   4246        uqrshrn2(vf, rd, rn, right_shift);
   4247      } else {
   4248        uqrshrn(vf, rd, rn, right_shift);
   4249      }
   4250      break;
   4251    case NEON_SQSHRN:
   4252      if (instr->Mask(NEON_Q)) {
   4253        sqshrn2(vf, rd, rn, right_shift);
   4254      } else {
   4255        sqshrn(vf, rd, rn, right_shift);
   4256      }
   4257      break;
   4258    case NEON_SQRSHRN:
   4259      if (instr->Mask(NEON_Q)) {
   4260        sqrshrn2(vf, rd, rn, right_shift);
   4261      } else {
   4262        sqrshrn(vf, rd, rn, right_shift);
   4263      }
   4264      break;
   4265    case NEON_SQSHRUN:
   4266      if (instr->Mask(NEON_Q)) {
   4267        sqshrun2(vf, rd, rn, right_shift);
   4268      } else {
   4269        sqshrun(vf, rd, rn, right_shift);
   4270      }
   4271      break;
   4272    case NEON_SQRSHRUN:
   4273      if (instr->Mask(NEON_Q)) {
   4274        sqrshrun2(vf, rd, rn, right_shift);
   4275      } else {
   4276        sqrshrun(vf, rd, rn, right_shift);
   4277      }
   4278      break;
   4279    default:
   4280      VIXL_UNIMPLEMENTED();
   4281  }
   4282 }
   4283 
   4284 
   4285 void Simulator::VisitNEONTable(const Instruction* instr) {
   4286  NEONFormatDecoder nfd(instr, NEONFormatDecoder::LogicalFormatMap());
   4287  VectorFormat vf = nfd.GetVectorFormat();
   4288 
   4289  SimVRegister& rd = vreg(instr->Rd());
   4290  SimVRegister& rn = vreg(instr->Rn());
   4291  SimVRegister& rn2 = vreg((instr->Rn() + 1) % kNumberOfVRegisters);
   4292  SimVRegister& rn3 = vreg((instr->Rn() + 2) % kNumberOfVRegisters);
   4293  SimVRegister& rn4 = vreg((instr->Rn() + 3) % kNumberOfVRegisters);
   4294  SimVRegister& rm = vreg(instr->Rm());
   4295 
   4296  switch (instr->Mask(NEONTableMask)) {
   4297    case NEON_TBL_1v: tbl(vf, rd, rn, rm); break;
   4298    case NEON_TBL_2v: tbl(vf, rd, rn, rn2, rm); break;
   4299    case NEON_TBL_3v: tbl(vf, rd, rn, rn2, rn3, rm); break;
   4300    case NEON_TBL_4v: tbl(vf, rd, rn, rn2, rn3, rn4, rm); break;
   4301    case NEON_TBX_1v: tbx(vf, rd, rn, rm); break;
   4302    case NEON_TBX_2v: tbx(vf, rd, rn, rn2, rm); break;
   4303    case NEON_TBX_3v: tbx(vf, rd, rn, rn2, rn3, rm); break;
   4304    case NEON_TBX_4v: tbx(vf, rd, rn, rn2, rn3, rn4, rm); break;
   4305    default:
   4306      VIXL_UNIMPLEMENTED();
   4307  }
   4308 }
   4309 
   4310 
   4311 void Simulator::VisitNEONPerm(const Instruction* instr) {
   4312  NEONFormatDecoder nfd(instr);
   4313  VectorFormat vf = nfd.GetVectorFormat();
   4314 
   4315  SimVRegister& rd = vreg(instr->Rd());
   4316  SimVRegister& rn = vreg(instr->Rn());
   4317  SimVRegister& rm = vreg(instr->Rm());
   4318 
   4319  switch (instr->Mask(NEONPermMask)) {
   4320    case NEON_TRN1: trn1(vf, rd, rn, rm); break;
   4321    case NEON_TRN2: trn2(vf, rd, rn, rm); break;
   4322    case NEON_UZP1: uzp1(vf, rd, rn, rm); break;
   4323    case NEON_UZP2: uzp2(vf, rd, rn, rm); break;
   4324    case NEON_ZIP1: zip1(vf, rd, rn, rm); break;
   4325    case NEON_ZIP2: zip2(vf, rd, rn, rm); break;
   4326    default:
   4327      VIXL_UNIMPLEMENTED();
   4328  }
   4329 }
   4330 
   4331 
   4332 void Simulator::DoUnreachable(const Instruction* instr) {
   4333  VIXL_ASSERT(instr->InstructionBits() == UNDEFINED_INST_PATTERN);
   4334 
   4335  fprintf(stream_, "Hit UNREACHABLE marker at pc=%p.\n",
   4336          reinterpret_cast<const void*>(instr));
   4337  abort();
   4338 }
   4339 
   4340 
   4341 void Simulator::DoTrace(const Instruction* instr) {
   4342  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   4343              (instr->ImmException() == kTraceOpcode));
   4344 
   4345  // Read the arguments encoded inline in the instruction stream.
   4346  uint32_t parameters;
   4347  uint32_t command;
   4348 
   4349  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   4350  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
   4351  memcpy(&command, instr + kTraceCommandOffset, sizeof(command));
   4352 
   4353  switch (command) {
   4354    case TRACE_ENABLE:
   4355      set_trace_parameters(trace_parameters() | parameters);
   4356      break;
   4357    case TRACE_DISABLE:
   4358      set_trace_parameters(trace_parameters() & ~parameters);
   4359      break;
   4360    default:
   4361      VIXL_UNREACHABLE();
   4362  }
   4363 
   4364  set_pc(instr->InstructionAtOffset(kTraceLength));
   4365 }
   4366 
   4367 
   4368 void Simulator::DoLog(const Instruction* instr) {
   4369  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   4370              (instr->ImmException() == kLogOpcode));
   4371 
   4372  // Read the arguments encoded inline in the instruction stream.
   4373  uint32_t parameters;
   4374 
   4375  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   4376  memcpy(&parameters, instr + kTraceParamsOffset, sizeof(parameters));
   4377 
   4378  // We don't support a one-shot LOG_DISASM.
   4379  VIXL_ASSERT((parameters & LOG_DISASM) == 0);
   4380  // Print the requested information.
   4381  if (parameters & LOG_SYSREGS) PrintSystemRegisters();
   4382  if (parameters & LOG_REGS) PrintRegisters();
   4383  if (parameters & LOG_VREGS) PrintVRegisters();
   4384 
   4385  set_pc(instr->InstructionAtOffset(kLogLength));
   4386 }
   4387 
   4388 
   4389 void Simulator::DoPrintf(const Instruction* instr) {
   4390  VIXL_ASSERT((instr->Mask(ExceptionMask) == HLT) &&
   4391              (instr->ImmException() == kPrintfOpcode));
   4392 
   4393  // Read the arguments encoded inline in the instruction stream.
   4394  uint32_t arg_count;
   4395  uint32_t arg_pattern_list;
   4396  VIXL_STATIC_ASSERT(sizeof(*instr) == 1);
   4397  memcpy(&arg_count,
   4398         instr + kPrintfArgCountOffset,
   4399         sizeof(arg_count));
   4400  memcpy(&arg_pattern_list,
   4401         instr + kPrintfArgPatternListOffset,
   4402         sizeof(arg_pattern_list));
   4403 
   4404  VIXL_ASSERT(arg_count <= kPrintfMaxArgCount);
   4405  VIXL_ASSERT((arg_pattern_list >> (kPrintfArgPatternBits * arg_count)) == 0);
   4406 
   4407  // We need to call the host printf function with a set of arguments defined by
   4408  // arg_pattern_list. Because we don't know the types and sizes of the
   4409  // arguments, this is very difficult to do in a robust and portable way. To
   4410  // work around the problem, we pick apart the format string, and print one
   4411  // format placeholder at a time.
   4412 
   4413  // Allocate space for the format string. We take a copy, so we can modify it.
   4414  // Leave enough space for one extra character per expected argument (plus the
   4415  // '\0' termination).
   4416  const char * format_base = reg<const char *>(0);
   4417  VIXL_ASSERT(format_base != NULL);
   4418  size_t length = strlen(format_base) + 1;
   4419  char * const format = (char *)js_calloc(length + arg_count);
   4420 
   4421  // A list of chunks, each with exactly one format placeholder.
   4422  const char * chunks[kPrintfMaxArgCount];
   4423 
   4424  // Copy the format string and search for format placeholders.
   4425  uint32_t placeholder_count = 0;
   4426  char * format_scratch = format;
   4427  for (size_t i = 0; i < length; i++) {
   4428    if (format_base[i] != '%') {
   4429      *format_scratch++ = format_base[i];
   4430    } else {
   4431      if (format_base[i + 1] == '%') {
   4432        // Ignore explicit "%%" sequences.
   4433        *format_scratch++ = format_base[i];
   4434        i++;
   4435        // Chunks after the first are passed as format strings to printf, so we
   4436        // need to escape '%' characters in those chunks.
   4437        if (placeholder_count > 0) *format_scratch++ = format_base[i];
   4438      } else {
   4439        VIXL_CHECK(placeholder_count < arg_count);
   4440        // Insert '\0' before placeholders, and store their locations.
   4441        *format_scratch++ = '\0';
   4442        chunks[placeholder_count++] = format_scratch;
   4443        *format_scratch++ = format_base[i];
   4444      }
   4445    }
   4446  }
   4447  VIXL_CHECK(placeholder_count == arg_count);
   4448 
   4449  // Finally, call printf with each chunk, passing the appropriate register
   4450  // argument. Normally, printf returns the number of bytes transmitted, so we
   4451  // can emulate a single printf call by adding the result from each chunk. If
   4452  // any call returns a negative (error) value, though, just return that value.
   4453 
   4454  printf("%s", clr_printf);
   4455 
   4456  // Because '\0' is inserted before each placeholder, the first string in
   4457  // 'format' contains no format placeholders and should be printed literally.
   4458  int result = printf("%s", format);
   4459  int pcs_r = 1;      // Start at x1. x0 holds the format string.
   4460  int pcs_f = 0;      // Start at d0.
   4461  if (result >= 0) {
   4462    for (uint32_t i = 0; i < placeholder_count; i++) {
   4463      int part_result = -1;
   4464 
   4465      uint32_t arg_pattern = arg_pattern_list >> (i * kPrintfArgPatternBits);
   4466      arg_pattern &= (1 << kPrintfArgPatternBits) - 1;
   4467      switch (arg_pattern) {
   4468        case kPrintfArgW: part_result = printf(chunks[i], wreg(pcs_r++)); break;
   4469        case kPrintfArgX: part_result = printf(chunks[i], xreg(pcs_r++)); break;
   4470        case kPrintfArgD: part_result = printf(chunks[i], dreg(pcs_f++)); break;
   4471        default: VIXL_UNREACHABLE();
   4472      }
   4473 
   4474      if (part_result < 0) {
   4475        // Handle error values.
   4476        result = part_result;
   4477        break;
   4478      }
   4479 
   4480      result += part_result;
   4481    }
   4482  }
   4483 
   4484  printf("%s", clr_normal);
   4485 
   4486  // Printf returns its result in x0 (just like the C library's printf).
   4487  set_xreg(0, result);
   4488 
   4489  // The printf parameters are inlined in the code, so skip them.
   4490  set_pc(instr->InstructionAtOffset(kPrintfLength));
   4491 
   4492  // Set LR as if we'd just called a native printf function.
   4493  set_lr(pc());
   4494 
   4495  js_free(format);
   4496 }
   4497 
   4498 }  // namespace vixl
   4499 
   4500 #endif  // JS_SIMULATOR_ARM64