tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

neon.h (4031B)


      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 //  NEON common code.
     11 
     12 #ifndef WEBP_DSP_NEON_H_
     13 #define WEBP_DSP_NEON_H_
     14 
     15 #include "src/dsp/dsp.h"
     16 
     17 #if defined(WEBP_USE_NEON)
     18 
     19 #include <arm_neon.h>
     20 
     21 // Right now, some intrinsics functions seem slower, so we disable them
     22 // everywhere except newer clang/gcc or aarch64 where the inline assembly is
     23 // incompatible.
     24 #if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64
     25 #define WEBP_USE_INTRINSICS   // use intrinsics when possible
     26 #endif
     27 
     28 #define INIT_VECTOR2(v, a, b) do {  \
     29  v.val[0] = a;                     \
     30  v.val[1] = b;                     \
     31 } while (0)
     32 
     33 #define INIT_VECTOR3(v, a, b, c) do {  \
     34  v.val[0] = a;                        \
     35  v.val[1] = b;                        \
     36  v.val[2] = c;                        \
     37 } while (0)
     38 
     39 #define INIT_VECTOR4(v, a, b, c, d) do {  \
     40  v.val[0] = a;                           \
     41  v.val[1] = b;                           \
     42  v.val[2] = c;                           \
     43  v.val[3] = d;                           \
     44 } while (0)
     45 
     46 // if using intrinsics, this flag avoids some functions that make gcc-4.6.3
     47 // crash ("internal compiler error: in immed_double_const, at emit-rtl.").
     48 // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
     49 #if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
     50 #define WORK_AROUND_GCC
     51 #endif
     52 
     53 static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
     54  uint64x2x2_t row01, row23;
     55 
     56  row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
     57  row01.val[1] = vreinterpretq_u64_s32(rows.val[1]);
     58  row23.val[0] = vreinterpretq_u64_s32(rows.val[2]);
     59  row23.val[1] = vreinterpretq_u64_s32(rows.val[3]);
     60  // Transpose 64-bit values (there's no vswp equivalent)
     61  {
     62    const uint64x1_t row0h = vget_high_u64(row01.val[0]);
     63    const uint64x1_t row2l = vget_low_u64(row23.val[0]);
     64    const uint64x1_t row1h = vget_high_u64(row01.val[1]);
     65    const uint64x1_t row3l = vget_low_u64(row23.val[1]);
     66    row01.val[0] = vcombine_u64(vget_low_u64(row01.val[0]), row2l);
     67    row23.val[0] = vcombine_u64(row0h, vget_high_u64(row23.val[0]));
     68    row01.val[1] = vcombine_u64(vget_low_u64(row01.val[1]), row3l);
     69    row23.val[1] = vcombine_u64(row1h, vget_high_u64(row23.val[1]));
     70  }
     71  {
     72    const int32x4x2_t out01 = vtrnq_s32(vreinterpretq_s32_u64(row01.val[0]),
     73                                        vreinterpretq_s32_u64(row01.val[1]));
     74    const int32x4x2_t out23 = vtrnq_s32(vreinterpretq_s32_u64(row23.val[0]),
     75                                        vreinterpretq_s32_u64(row23.val[1]));
     76    int32x4x4_t out;
     77    out.val[0] = out01.val[0];
     78    out.val[1] = out01.val[1];
     79    out.val[2] = out23.val[0];
     80    out.val[3] = out23.val[1];
     81    return out;
     82  }
     83 }
     84 
     85 #if 0     // Useful debug macro.
     86 #include <stdio.h>
     87 #define PRINT_REG(REG, SIZE) do {                       \
     88  int i;                                                \
     89  printf("%s \t[%d]: 0x", #REG, SIZE);                  \
     90  if (SIZE == 8) {                                      \
     91    uint8_t _tmp[8];                                    \
     92    vst1_u8(_tmp, (REG));                               \
     93    for (i = 0; i < 8; ++i) printf("%.2x ", _tmp[i]);   \
     94  } else if (SIZE == 16) {                              \
     95    uint16_t _tmp[4];                                   \
     96    vst1_u16(_tmp, (REG));                              \
     97    for (i = 0; i < 4; ++i) printf("%.4x ", _tmp[i]);   \
     98  }                                                     \
     99  printf("\n");                                         \
    100 } while (0)
    101 #endif
    102 
    103 #endif  // WEBP_USE_NEON
    104 #endif  // WEBP_DSP_NEON_H_