tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

non_temporal_arm_intrinsics.h (3255B)


      1 // Copyright 2022 The Abseil Authors
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //     https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
     16 #define ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_
     17 
     18 #include "absl/base/config.h"
     19 
     20 #ifdef __aarch64__
     21 #include <arm_neon.h>
     22 
     23 typedef int64x2_t __m128i; /* 128-bit vector containing integers */
     24 #define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x)
     25 #define vreinterpretq_s64_m128i(x) (x)
     26 
     27 // Guarantees that every preceding store is globally visible before any
     28 // subsequent store.
     29 // https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx
     30 static inline __attribute__((always_inline)) void _mm_sfence(void) {
     31  __sync_synchronize();
     32 }
     33 
     34 // Load 128-bits of integer data from unaligned memory into dst. This intrinsic
     35 // may perform better than _mm_loadu_si128 when the data crosses a cache line
     36 // boundary.
     37 //
     38 //   dst[127:0] := MEM[mem_addr+127:mem_addr]
     39 //
     40 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128
     41 #define _mm_lddqu_si128 _mm_loadu_si128
     42 
     43 // Loads 128-bit value. :
     44 // https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx
     45 static inline __attribute__((always_inline)) __m128i _mm_loadu_si128(
     46    const __m128i *p) {
     47  return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *)p));
     48 }
     49 
     50 // Stores the data in a to the address p without polluting the caches.  If the
     51 // cache line containing address p is already in the cache, the cache will be
     52 // updated.
     53 // https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx
     54 static inline __attribute__((always_inline)) void _mm_stream_si128(__m128i *p,
     55                                                                   __m128i a) {
     56 #if ABSL_HAVE_BUILTIN(__builtin_nontemporal_store)
     57  __builtin_nontemporal_store(a, p);
     58 #else
     59  vst1q_s64((int64_t *)p, vreinterpretq_s64_m128i(a));
     60 #endif
     61 }
     62 
     63 // Sets the 16 signed 8-bit integer values.
     64 // https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx
     65 static inline __attribute__((always_inline)) __m128i _mm_set_epi8(
     66    signed char b15, signed char b14, signed char b13, signed char b12,
     67    signed char b11, signed char b10, signed char b9, signed char b8,
     68    signed char b7, signed char b6, signed char b5, signed char b4,
     69    signed char b3, signed char b2, signed char b1, signed char b0) {
     70  int8_t __attribute__((aligned(16)))
     71  data[16] = {(int8_t)b0,  (int8_t)b1,  (int8_t)b2,  (int8_t)b3,
     72              (int8_t)b4,  (int8_t)b5,  (int8_t)b6,  (int8_t)b7,
     73              (int8_t)b8,  (int8_t)b9,  (int8_t)b10, (int8_t)b11,
     74              (int8_t)b12, (int8_t)b13, (int8_t)b14, (int8_t)b15};
     75  return (__m128i)vld1q_s8(data);
     76 }
     77 #endif  // __aarch64__
     78 
     79 #endif  // ABSL_CRC_INTERNAL_NON_TEMPORAL_ARM_INTRINSICS_H_