tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

gfxAlphaRecoveryGeneric.h (4867B)


      1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 #ifndef _GFXALPHARECOVERY_GENERIC_H_
      6 #define _GFXALPHARECOVERY_GENERIC_H_
      7 
      8 #include "gfxAlphaRecovery.h"
      9 #include "gfxImageSurface.h"
     10 #include "nsDebug.h"
     11 #include <xsimd/xsimd.hpp>
     12 
     13 template <typename Arch>
     14 bool gfxAlphaRecovery::RecoverAlphaGeneric(gfxImageSurface* blackSurf,
     15                                           const gfxImageSurface* whiteSurf) {
     16  mozilla::gfx::IntSize size = blackSurf->GetSize();
     17 
     18  if (size != whiteSurf->GetSize() ||
     19      (blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
     20       blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) ||
     21      (whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 &&
     22       whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32))
     23    return false;
     24 
     25  blackSurf->Flush();
     26  whiteSurf->Flush();
     27 
     28  unsigned char* blackData = blackSurf->Data();
     29  unsigned char* whiteData = whiteSurf->Data();
     30 
     31  if ((NS_PTR_TO_UINT32(blackData) & 0xf) !=
     32          (NS_PTR_TO_UINT32(whiteData) & 0xf) ||
     33      (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) {
     34    // Cannot keep these in alignment.
     35    return false;
     36  }
     37 
     38  alignas(Arch::alignment()) static const uint8_t greenMaski[] = {
     39      0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
     40      0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
     41  };
     42  alignas(Arch::alignment()) static const uint8_t alphaMaski[] = {
     43      0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
     44      0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
     45  };
     46 
     47  using batch_type = xsimd::batch<uint8_t, Arch>;
     48  constexpr size_t batch_size = batch_type::size;
     49  static_assert(batch_size == 16);
     50 
     51  batch_type greenMask = batch_type::load_aligned(greenMaski);
     52  batch_type alphaMask = batch_type::load_aligned(alphaMaski);
     53 
     54  for (int32_t i = 0; i < size.height; ++i) {
     55    int32_t j = 0;
     56    // Loop single pixels until at 4 byte alignment.
     57    while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) {
     58      *((uint32_t*)blackData) =
     59          RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
     60                       *reinterpret_cast<uint32_t*>(whiteData));
     61      blackData += 4;
     62      whiteData += 4;
     63      j++;
     64    }
     65    // This extra loop allows the compiler to do some more clever registry
     66    // management and makes it about 5% faster than with only the 4 pixel
     67    // at a time loop.
     68    for (; j < size.width - 8; j += 8) {
     69      auto black1 = batch_type::load_aligned(blackData);
     70      auto white1 = batch_type::load_aligned(whiteData);
     71      auto black2 = batch_type::load_aligned(blackData + batch_size);
     72      auto white2 = batch_type::load_aligned(whiteData + batch_size);
     73 
     74      // Execute the same instructions as described in RecoverPixel, only
     75      // using an SSE2 packed saturated subtract.
     76      white1 = xsimd::ssub(white1, black1);
     77      white2 = xsimd::ssub(white2, black2);
     78      white1 = xsimd::ssub(greenMask, white1);
     79      white2 = xsimd::ssub(greenMask, white2);
     80      // Producing the final black pixel in an XMM register and storing
     81      // that is actually faster than doing a masked store since that
     82      // does an unaligned storage. We have the black pixel in a register
     83      // anyway.
     84      black1 = xsimd::bitwise_andnot(black1, alphaMask);
     85      black2 = xsimd::bitwise_andnot(black2, alphaMask);
     86      white1 = xsimd::slide_left<2>(white1);
     87      white2 = xsimd::slide_left<2>(white2);
     88      white1 &= alphaMask;
     89      white2 &= alphaMask;
     90      black1 |= white1;
     91      black2 |= white2;
     92 
     93      black1.store_aligned(blackData);
     94      black2.store_aligned(blackData + batch_size);
     95      blackData += 2 * batch_size;
     96      whiteData += 2 * batch_size;
     97    }
     98    for (; j < size.width - 4; j += 4) {
     99      auto black = batch_type::load_aligned(blackData);
    100      auto white = batch_type::load_aligned(whiteData);
    101 
    102      white = xsimd::ssub(white, black);
    103      white = xsimd::ssub(greenMask, white);
    104      black = xsimd::bitwise_andnot(black, alphaMask);
    105      white = xsimd::slide_left<2>(white);
    106      white &= alphaMask;
    107      black |= white;
    108      black.store_aligned(blackData);
    109      blackData += batch_size;
    110      whiteData += batch_size;
    111    }
    112    // Loop single pixels until we're done.
    113    while (j < size.width) {
    114      *((uint32_t*)blackData) =
    115          RecoverPixel(*reinterpret_cast<uint32_t*>(blackData),
    116                       *reinterpret_cast<uint32_t*>(whiteData));
    117      blackData += 4;
    118      whiteData += 4;
    119      j++;
    120    }
    121    blackData += blackSurf->Stride() - j * 4;
    122    whiteData += whiteSurf->Stride() - j * 4;
    123  }
    124 
    125  blackSurf->MarkDirty();
    126 
    127  return true;
    128 }
    129 #endif