gfxAlphaRecoveryGeneric.h (4867B)
1 /* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*- 2 * This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 #ifndef _GFXALPHARECOVERY_GENERIC_H_ 6 #define _GFXALPHARECOVERY_GENERIC_H_ 7 8 #include "gfxAlphaRecovery.h" 9 #include "gfxImageSurface.h" 10 #include "nsDebug.h" 11 #include <xsimd/xsimd.hpp> 12 13 template <typename Arch> 14 bool gfxAlphaRecovery::RecoverAlphaGeneric(gfxImageSurface* blackSurf, 15 const gfxImageSurface* whiteSurf) { 16 mozilla::gfx::IntSize size = blackSurf->GetSize(); 17 18 if (size != whiteSurf->GetSize() || 19 (blackSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 && 20 blackSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32) || 21 (whiteSurf->Format() != mozilla::gfx::SurfaceFormat::A8R8G8B8_UINT32 && 22 whiteSurf->Format() != mozilla::gfx::SurfaceFormat::X8R8G8B8_UINT32)) 23 return false; 24 25 blackSurf->Flush(); 26 whiteSurf->Flush(); 27 28 unsigned char* blackData = blackSurf->Data(); 29 unsigned char* whiteData = whiteSurf->Data(); 30 31 if ((NS_PTR_TO_UINT32(blackData) & 0xf) != 32 (NS_PTR_TO_UINT32(whiteData) & 0xf) || 33 (blackSurf->Stride() - whiteSurf->Stride()) & 0xf) { 34 // Cannot keep these in alignment. 35 return false; 36 } 37 38 alignas(Arch::alignment()) static const uint8_t greenMaski[] = { 39 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 40 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 41 }; 42 alignas(Arch::alignment()) static const uint8_t alphaMaski[] = { 43 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 44 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, 45 }; 46 47 using batch_type = xsimd::batch<uint8_t, Arch>; 48 constexpr size_t batch_size = batch_type::size; 49 static_assert(batch_size == 16); 50 51 batch_type greenMask = batch_type::load_aligned(greenMaski); 52 batch_type alphaMask = batch_type::load_aligned(alphaMaski); 53 54 for (int32_t i = 0; i < size.height; ++i) { 55 int32_t j = 0; 56 // Loop single pixels until at 4 byte alignment. 57 while (NS_PTR_TO_UINT32(blackData) & 0xf && j < size.width) { 58 *((uint32_t*)blackData) = 59 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData), 60 *reinterpret_cast<uint32_t*>(whiteData)); 61 blackData += 4; 62 whiteData += 4; 63 j++; 64 } 65 // This extra loop allows the compiler to do some more clever registry 66 // management and makes it about 5% faster than with only the 4 pixel 67 // at a time loop. 68 for (; j < size.width - 8; j += 8) { 69 auto black1 = batch_type::load_aligned(blackData); 70 auto white1 = batch_type::load_aligned(whiteData); 71 auto black2 = batch_type::load_aligned(blackData + batch_size); 72 auto white2 = batch_type::load_aligned(whiteData + batch_size); 73 74 // Execute the same instructions as described in RecoverPixel, only 75 // using an SSE2 packed saturated subtract. 76 white1 = xsimd::ssub(white1, black1); 77 white2 = xsimd::ssub(white2, black2); 78 white1 = xsimd::ssub(greenMask, white1); 79 white2 = xsimd::ssub(greenMask, white2); 80 // Producing the final black pixel in an XMM register and storing 81 // that is actually faster than doing a masked store since that 82 // does an unaligned storage. We have the black pixel in a register 83 // anyway. 84 black1 = xsimd::bitwise_andnot(black1, alphaMask); 85 black2 = xsimd::bitwise_andnot(black2, alphaMask); 86 white1 = xsimd::slide_left<2>(white1); 87 white2 = xsimd::slide_left<2>(white2); 88 white1 &= alphaMask; 89 white2 &= alphaMask; 90 black1 |= white1; 91 black2 |= white2; 92 93 black1.store_aligned(blackData); 94 black2.store_aligned(blackData + batch_size); 95 blackData += 2 * batch_size; 96 whiteData += 2 * batch_size; 97 } 98 for (; j < size.width - 4; j += 4) { 99 auto black = batch_type::load_aligned(blackData); 100 auto white = batch_type::load_aligned(whiteData); 101 102 white = xsimd::ssub(white, black); 103 white = xsimd::ssub(greenMask, white); 104 black = xsimd::bitwise_andnot(black, alphaMask); 105 white = xsimd::slide_left<2>(white); 106 white &= alphaMask; 107 black |= white; 108 black.store_aligned(blackData); 109 blackData += batch_size; 110 whiteData += batch_size; 111 } 112 // Loop single pixels until we're done. 113 while (j < size.width) { 114 *((uint32_t*)blackData) = 115 RecoverPixel(*reinterpret_cast<uint32_t*>(blackData), 116 *reinterpret_cast<uint32_t*>(whiteData)); 117 blackData += 4; 118 whiteData += 4; 119 j++; 120 } 121 blackData += blackSurf->Stride() - j * 4; 122 whiteData += whiteSurf->Stride() - j * 4; 123 } 124 125 blackSurf->MarkDirty(); 126 127 return true; 128 } 129 #endif