tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

differ_vector_sse2.cc (3219B)


      1 /*
      2 *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
      3 *
      4 *  Use of this source code is governed by a BSD-style license
      5 *  that can be found in the LICENSE file in the root of the source
      6 *  tree. An additional intellectual property rights grant can be found
      7 *  in the file PATENTS.  All contributing project authors may
      8 *  be found in the AUTHORS file in the root of the source tree.
      9 */
     10 
     11 #include "modules/desktop_capture/differ_vector_sse2.h"
     12 
     13 #include <cstdint>
     14 
     15 #if defined(_MSC_VER)
     16 #include <intrin.h>
     17 #else
     18 #include <emmintrin.h>
     19 #include <mmintrin.h>
     20 #endif
     21 
     22 namespace webrtc {
     23 
     24 extern bool VectorDifference_SSE2_W16(const uint8_t* image1,
     25                                      const uint8_t* image2) {
     26  __m128i acc = _mm_setzero_si128();
     27  __m128i v0;
     28  __m128i v1;
     29  __m128i sad;
     30  const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
     31  const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
     32  v0 = _mm_loadu_si128(i1);
     33  v1 = _mm_loadu_si128(i2);
     34  sad = _mm_sad_epu8(v0, v1);
     35  acc = _mm_adds_epu16(acc, sad);
     36  v0 = _mm_loadu_si128(i1 + 1);
     37  v1 = _mm_loadu_si128(i2 + 1);
     38  sad = _mm_sad_epu8(v0, v1);
     39  acc = _mm_adds_epu16(acc, sad);
     40  v0 = _mm_loadu_si128(i1 + 2);
     41  v1 = _mm_loadu_si128(i2 + 2);
     42  sad = _mm_sad_epu8(v0, v1);
     43  acc = _mm_adds_epu16(acc, sad);
     44  v0 = _mm_loadu_si128(i1 + 3);
     45  v1 = _mm_loadu_si128(i2 + 3);
     46  sad = _mm_sad_epu8(v0, v1);
     47  acc = _mm_adds_epu16(acc, sad);
     48 
     49  // This essential means sad = acc >> 64. We only care about the lower 16
     50  // bits.
     51  sad = _mm_shuffle_epi32(acc, 0xEE);
     52  sad = _mm_adds_epu16(sad, acc);
     53  return _mm_cvtsi128_si32(sad) != 0;
     54 }
     55 
     56 extern bool VectorDifference_SSE2_W32(const uint8_t* image1,
     57                                      const uint8_t* image2) {
     58  __m128i acc = _mm_setzero_si128();
     59  __m128i v0;
     60  __m128i v1;
     61  __m128i sad;
     62  const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
     63  const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
     64  v0 = _mm_loadu_si128(i1);
     65  v1 = _mm_loadu_si128(i2);
     66  sad = _mm_sad_epu8(v0, v1);
     67  acc = _mm_adds_epu16(acc, sad);
     68  v0 = _mm_loadu_si128(i1 + 1);
     69  v1 = _mm_loadu_si128(i2 + 1);
     70  sad = _mm_sad_epu8(v0, v1);
     71  acc = _mm_adds_epu16(acc, sad);
     72  v0 = _mm_loadu_si128(i1 + 2);
     73  v1 = _mm_loadu_si128(i2 + 2);
     74  sad = _mm_sad_epu8(v0, v1);
     75  acc = _mm_adds_epu16(acc, sad);
     76  v0 = _mm_loadu_si128(i1 + 3);
     77  v1 = _mm_loadu_si128(i2 + 3);
     78  sad = _mm_sad_epu8(v0, v1);
     79  acc = _mm_adds_epu16(acc, sad);
     80  v0 = _mm_loadu_si128(i1 + 4);
     81  v1 = _mm_loadu_si128(i2 + 4);
     82  sad = _mm_sad_epu8(v0, v1);
     83  acc = _mm_adds_epu16(acc, sad);
     84  v0 = _mm_loadu_si128(i1 + 5);
     85  v1 = _mm_loadu_si128(i2 + 5);
     86  sad = _mm_sad_epu8(v0, v1);
     87  acc = _mm_adds_epu16(acc, sad);
     88  v0 = _mm_loadu_si128(i1 + 6);
     89  v1 = _mm_loadu_si128(i2 + 6);
     90  sad = _mm_sad_epu8(v0, v1);
     91  acc = _mm_adds_epu16(acc, sad);
     92  v0 = _mm_loadu_si128(i1 + 7);
     93  v1 = _mm_loadu_si128(i2 + 7);
     94  sad = _mm_sad_epu8(v0, v1);
     95  acc = _mm_adds_epu16(acc, sad);
     96 
     97  // This essential means sad = acc >> 64. We only care about the lower 16
     98  // bits.
     99  sad = _mm_shuffle_epi32(acc, 0xEE);
    100  sad = _mm_adds_epu16(sad, acc);
    101  return _mm_cvtsi128_si32(sad) != 0;
    102 }
    103 
    104 }  // namespace webrtc