Swizzle.cpp (64516B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "Swizzle.h" 8 #include "Logging.h" 9 #include "Orientation.h" 10 #include "Tools.h" 11 #include "mozilla/CheckedInt.h" 12 #include "mozilla/EndianUtils.h" 13 #include "mozilla/UniquePtr.h" 14 15 #ifdef USE_SSE2 16 # include "mozilla/SSE.h" 17 #endif 18 19 #ifdef USE_NEON 20 # include "mozilla/arm.h" 21 #endif 22 23 #include <new> 24 25 namespace mozilla { 26 namespace gfx { 27 28 /** 29 * Convenience macros for dispatching to various format combinations. 30 */ 31 32 // Hash the formats to a relatively dense value to optimize jump table 33 // generation. The first 6 formats in SurfaceFormat are the 32-bit BGRA variants 34 // and are the most common formats dispatched here. Room is reserved in the 35 // lowish bits for up to these 6 destination formats. If a destination format is 36 // >= 6, the 6th bit is set to avoid collisions. 37 #define FORMAT_KEY(aSrcFormat, aDstFormat) \ 38 (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6)) 39 40 #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \ 41 case FORMAT_KEY(aSrcFormat, aDstFormat): \ 42 __VA_ARGS__; \ 43 return true; 44 45 #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \ 46 FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__)) 47 48 #define FORMAT_CASE_ROW(aSrcFormat, aDstFormat, ...) \ 49 case FORMAT_KEY(aSrcFormat, aDstFormat): \ 50 return &__VA_ARGS__; 51 52 /** 53 * Constexpr functions for analyzing format attributes in templates. 54 */ 55 56 // Whether B comes before R in pixel memory layout. 57 static constexpr bool IsBGRFormat(SurfaceFormat aFormat) { 58 return aFormat == SurfaceFormat::B8G8R8A8 || 59 #if MOZ_LITTLE_ENDIAN() 60 aFormat == SurfaceFormat::R5G6B5_UINT16 || 61 #endif 62 aFormat == SurfaceFormat::B8G8R8X8 || aFormat == SurfaceFormat::B8G8R8; 63 } 64 65 // Whether the order of B and R need to be swapped to map from src to dst. 66 static constexpr bool ShouldSwapRB(SurfaceFormat aSrcFormat, 67 SurfaceFormat aDstFormat) { 68 return IsBGRFormat(aSrcFormat) != IsBGRFormat(aDstFormat); 69 } 70 71 // The starting byte of the RGB components in pixel memory. 72 static constexpr uint32_t RGBByteIndex(SurfaceFormat aFormat) { 73 return aFormat == SurfaceFormat::A8R8G8B8 || 74 aFormat == SurfaceFormat::X8R8G8B8 75 ? 1 76 : 0; 77 } 78 79 // The byte of the alpha component, which just comes after RGB. 80 static constexpr uint32_t AlphaByteIndex(SurfaceFormat aFormat) { 81 return (RGBByteIndex(aFormat) + 3) % 4; 82 } 83 84 // The endian-dependent bit shift to access RGB of a UINT32 pixel. 85 static constexpr uint32_t RGBBitShift(SurfaceFormat aFormat) { 86 #if MOZ_LITTLE_ENDIAN() 87 return 8 * RGBByteIndex(aFormat); 88 #else 89 return 8 - 8 * RGBByteIndex(aFormat); 90 #endif 91 } 92 93 // The endian-dependent bit shift to access alpha of a UINT32 pixel. 94 static constexpr uint32_t AlphaBitShift(SurfaceFormat aFormat) { 95 return (RGBBitShift(aFormat) + 24) % 32; 96 } 97 98 // Whether the pixel format should ignore the value of the alpha channel and 99 // treat it as opaque. 100 static constexpr bool IgnoreAlpha(SurfaceFormat aFormat) { 101 return aFormat == SurfaceFormat::B8G8R8X8 || 102 aFormat == SurfaceFormat::R8G8B8X8 || 103 aFormat == SurfaceFormat::X8R8G8B8; 104 } 105 106 // Whether to force alpha to opaque to map from src to dst. 107 static constexpr bool ShouldForceOpaque(SurfaceFormat aSrcFormat, 108 SurfaceFormat aDstFormat) { 109 return IgnoreAlpha(aSrcFormat) != IgnoreAlpha(aDstFormat); 110 } 111 112 #ifdef USE_SSE2 113 /** 114 * SSE2 optimizations 115 */ 116 117 template <bool aSwapRB, bool aOpaqueAlpha> 118 void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 119 120 # define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \ 121 FORMAT_CASE(aSrcFormat, aDstFormat, \ 122 Premultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \ 123 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 124 125 template <bool aSwapRB, bool aOpaqueAlpha> 126 void PremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t); 127 128 # define PREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \ 129 FORMAT_CASE_ROW( \ 130 aSrcFormat, aDstFormat, \ 131 PremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \ 132 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 133 134 template <bool aSwapRB> 135 void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 136 137 # define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \ 138 FORMAT_CASE(aSrcFormat, aDstFormat, \ 139 Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>) 140 141 template <bool aSwapRB> 142 void UnpremultiplyRow_SSE2(const uint8_t*, uint8_t*, int32_t); 143 144 # define UNPREMULTIPLY_ROW_SSE2(aSrcFormat, aDstFormat) \ 145 FORMAT_CASE_ROW( \ 146 aSrcFormat, aDstFormat, \ 147 UnpremultiplyRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>) 148 149 template <bool aSwapRB, bool aOpaqueAlpha> 150 void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 151 152 # define SWIZZLE_SSE2(aSrcFormat, aDstFormat) \ 153 FORMAT_CASE(aSrcFormat, aDstFormat, \ 154 Swizzle_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \ 155 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 156 157 template <bool aSwapRB, bool aOpaqueAlpha> 158 void SwizzleRow_SSE2(const uint8_t*, uint8_t*, int32_t); 159 160 # define SWIZZLE_ROW_SSE2(aSrcFormat, aDstFormat) \ 161 FORMAT_CASE_ROW( \ 162 aSrcFormat, aDstFormat, \ 163 SwizzleRow_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat), \ 164 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 165 166 template <bool aSwapRB> 167 void UnpackRowRGB24_SSSE3(const uint8_t*, uint8_t*, int32_t); 168 169 # define UNPACK_ROW_RGB_SSSE3(aDstFormat) \ 170 FORMAT_CASE_ROW( \ 171 SurfaceFormat::R8G8B8, aDstFormat, \ 172 UnpackRowRGB24_SSSE3<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>) 173 174 template <bool aSwapRB> 175 void UnpackRowRGB24_AVX2(const uint8_t*, uint8_t*, int32_t); 176 177 # define UNPACK_ROW_RGB_AVX2(aDstFormat) \ 178 FORMAT_CASE_ROW( \ 179 SurfaceFormat::R8G8B8, aDstFormat, \ 180 UnpackRowRGB24_AVX2<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>) 181 182 #endif 183 184 #ifdef USE_NEON 185 /** 186 * ARM NEON optimizations 187 */ 188 189 template <bool aSwapRB, bool aOpaqueAlpha> 190 void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 191 192 # define PREMULTIPLY_NEON(aSrcFormat, aDstFormat) \ 193 FORMAT_CASE(aSrcFormat, aDstFormat, \ 194 Premultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \ 195 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 196 197 template <bool aSwapRB, bool aOpaqueAlpha> 198 void PremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t); 199 200 # define PREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \ 201 FORMAT_CASE_ROW( \ 202 aSrcFormat, aDstFormat, \ 203 PremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \ 204 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 205 206 template <bool aSwapRB> 207 void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 208 209 # define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \ 210 FORMAT_CASE(aSrcFormat, aDstFormat, \ 211 Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>) 212 213 template <bool aSwapRB> 214 void UnpremultiplyRow_NEON(const uint8_t*, uint8_t*, int32_t); 215 216 # define UNPREMULTIPLY_ROW_NEON(aSrcFormat, aDstFormat) \ 217 FORMAT_CASE_ROW( \ 218 aSrcFormat, aDstFormat, \ 219 UnpremultiplyRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>) 220 221 template <bool aSwapRB, bool aOpaqueAlpha> 222 void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize); 223 224 # define SWIZZLE_NEON(aSrcFormat, aDstFormat) \ 225 FORMAT_CASE(aSrcFormat, aDstFormat, \ 226 Swizzle_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \ 227 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 228 229 template <bool aSwapRB, bool aOpaqueAlpha> 230 void SwizzleRow_NEON(const uint8_t*, uint8_t*, int32_t); 231 232 # define SWIZZLE_ROW_NEON(aSrcFormat, aDstFormat) \ 233 FORMAT_CASE_ROW( \ 234 aSrcFormat, aDstFormat, \ 235 SwizzleRow_NEON<ShouldSwapRB(aSrcFormat, aDstFormat), \ 236 ShouldForceOpaque(aSrcFormat, aDstFormat)>) 237 238 template <bool aSwapRB> 239 void UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t); 240 241 # define UNPACK_ROW_RGB_NEON(aDstFormat) \ 242 FORMAT_CASE_ROW( \ 243 SurfaceFormat::R8G8B8, aDstFormat, \ 244 UnpackRowRGB24_NEON<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>) 245 #endif 246 247 /** 248 * Premultiplying 249 */ 250 251 // Fallback premultiply implementation that uses splayed pixel math to reduce 252 // the multiplications used. That is, the R and B components are isolated from 253 // the G and A components, which then can be multiplied as if they were two 254 // 2-component vectors. Otherwise, an approximation if divide-by-255 is used 255 // which is faster than an actual division. These optimizations are also used 256 // for the SSE2 and NEON implementations. 257 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 258 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 259 static void PremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, 260 int32_t aLength) { 261 const uint8_t* end = aSrc + 4 * aLength; 262 do { 263 // Load and process 1 entire pixel at a time. 264 uint32_t color = *reinterpret_cast<const uint32_t*>(aSrc); 265 266 uint32_t a = aSrcAShift ? color >> aSrcAShift : color & 0xFF; 267 268 // Isolate the R and B components. 269 uint32_t rb = (color >> aSrcRGBShift) & 0x00FF00FF; 270 // Swap the order of R and B if necessary. 271 if (aSwapRB) { 272 rb = (rb >> 16) | (rb << 16); 273 } 274 // Approximate the multiply by alpha and divide by 255 which is 275 // essentially: 276 // c = c*a + 255; c = (c + (c >> 8)) >> 8; 277 // However, we omit the final >> 8 to fold it with the final shift into 278 // place depending on desired output format. 279 rb = rb * a + 0x00FF00FF; 280 rb = (rb + ((rb >> 8) & 0x00FF00FF)) & 0xFF00FF00; 281 282 // Use same approximation as above, but G is shifted 8 bits left. 283 // Alpha is left out and handled separately. 284 uint32_t g = color & (0xFF00 << aSrcRGBShift); 285 g = g * a + (0xFF00 << aSrcRGBShift); 286 g = (g + (g >> 8)) & (0xFF0000 << aSrcRGBShift); 287 288 // The above math leaves RGB shifted left by 8 bits. 289 // Shift them right if required for the output format. 290 // then combine them back together to produce output pixel. 291 // Add the alpha back on if the output format is not opaque. 292 *reinterpret_cast<uint32_t*>(aDst) = 293 (rb >> (8 - aDstRGBShift)) | (g >> (8 + aSrcRGBShift - aDstRGBShift)) | 294 (aOpaqueAlpha ? 0xFF << aDstAShift : a << aDstAShift); 295 296 aSrc += 4; 297 aDst += 4; 298 } while (aSrc < end); 299 } 300 301 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 302 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 303 static void PremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst, 304 int32_t aLength) { 305 PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift, 306 aDstRGBShift, aDstAShift>(aSrc, aDst, aLength); 307 } 308 309 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 310 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 311 static void PremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap, 312 uint8_t* aDst, int32_t aDstGap, IntSize aSize) { 313 for (int32_t height = aSize.height; height > 0; height--) { 314 PremultiplyChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift, 315 aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width); 316 aSrc += aSrcGap; 317 aDst += aDstGap; 318 } 319 } 320 321 #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \ 322 FORMAT_CASE( \ 323 aSrcFormat, aDstFormat, \ 324 PremultiplyFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \ 325 ShouldForceOpaque(aSrcFormat, aDstFormat), \ 326 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \ 327 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>) 328 329 #define PREMULTIPLY_FALLBACK(aSrcFormat) \ 330 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ 331 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \ 332 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ 333 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \ 334 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \ 335 PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8) 336 337 #define PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \ 338 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ 339 PremultiplyRowFallback< \ 340 ShouldSwapRB(aSrcFormat, aDstFormat), \ 341 ShouldForceOpaque(aSrcFormat, aDstFormat), \ 342 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \ 343 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>) 344 345 #define PREMULTIPLY_ROW_FALLBACK(aSrcFormat) \ 346 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ 347 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \ 348 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ 349 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \ 350 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \ 351 PREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8) 352 353 // If rows are tightly packed, and the size of the total area will fit within 354 // the precision range of a single row, then process all the data as if it was 355 // a single row. 356 static inline IntSize CollapseSize(const IntSize& aSize, int32_t aSrcStride, 357 int32_t aDstStride) { 358 if (aSrcStride == aDstStride && (aSrcStride & 3) == 0 && 359 aSrcStride / 4 == aSize.width) { 360 CheckedInt32 area = CheckedInt32(aSize.width) * CheckedInt32(aSize.height); 361 if (area.isValid()) { 362 return IntSize(area.value(), 1); 363 } 364 } 365 return aSize; 366 } 367 368 static inline int32_t GetStrideGap(int32_t aWidth, SurfaceFormat aFormat, 369 int32_t aStride) { 370 CheckedInt32 used = CheckedInt32(aWidth) * BytesPerPixel(aFormat); 371 if (!used.isValid() || used.value() < 0) { 372 return -1; 373 } 374 return aStride - used.value(); 375 } 376 377 bool PremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, 378 SurfaceFormat aSrcFormat, uint8_t* aDst, 379 int32_t aDstStride, SurfaceFormat aDstFormat, 380 const IntSize& aSize) { 381 if (aSize.IsEmpty()) { 382 return true; 383 } 384 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); 385 // Find gap from end of row to the start of the next row. 386 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); 387 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); 388 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); 389 if (srcGap < 0 || dstGap < 0) { 390 return false; 391 } 392 393 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) 394 395 #ifdef USE_SSE2 396 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 397 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 398 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 399 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 400 PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 401 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 402 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 403 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 404 PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 405 default: 406 break; 407 } 408 #endif 409 410 #ifdef USE_NEON 411 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 412 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 413 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 414 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 415 PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 416 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 417 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 418 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 419 PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 420 default: 421 break; 422 } 423 #endif 424 425 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 426 PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8) 427 PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8) 428 PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8) 429 default: 430 break; 431 } 432 433 #undef FORMAT_CASE_CALL 434 435 MOZ_ASSERT(false, "Unsupported premultiply formats"); 436 return false; 437 } 438 439 SwizzleRowFn PremultiplyRow(SurfaceFormat aSrcFormat, 440 SurfaceFormat aDstFormat) { 441 #ifdef USE_SSE2 442 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 443 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 444 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 445 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 446 PREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 447 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 448 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 449 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 450 PREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 451 default: 452 break; 453 } 454 #endif 455 456 #ifdef USE_NEON 457 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 458 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 459 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 460 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 461 PREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 462 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 463 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 464 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 465 PREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 466 default: 467 break; 468 } 469 #endif 470 471 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 472 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8) 473 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8) 474 PREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8) 475 default: 476 break; 477 } 478 479 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats"); 480 return nullptr; 481 } 482 483 /** 484 * Unpremultiplying 485 */ 486 487 // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha. 488 #define UNPREMULQ(x) (0xFF00FFU / (x)) 489 #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1) 490 #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2) 491 #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4) 492 #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8) 493 #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16) 494 static const uint32_t sUnpremultiplyTable[256] = {0, 495 UNPREMULQ(1), 496 UNPREMULQ_2(2), 497 UNPREMULQ_4(4), 498 UNPREMULQ_8(8), 499 UNPREMULQ_16(16), 500 UNPREMULQ_32(32), 501 UNPREMULQ_32(64), 502 UNPREMULQ_32(96), 503 UNPREMULQ_32(128), 504 UNPREMULQ_32(160), 505 UNPREMULQ_32(192), 506 UNPREMULQ_32(224)}; 507 508 // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal 509 // math to eliminate any division by the alpha component. This optimization is 510 // used for the SSE2 and NEON implementations, with some adaptations. This 511 // implementation also accesses color components using individual byte accesses 512 // as this profiles faster than accessing the pixel as a uint32_t and 513 // shifting/masking to access components. 514 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex, 515 uint32_t aDstRGBIndex, uint32_t aDstAIndex> 516 static void UnpremultiplyChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, 517 int32_t aLength) { 518 const uint8_t* end = aSrc + 4 * aLength; 519 do { 520 uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)]; 521 uint8_t g = aSrc[aSrcRGBIndex + 1]; 522 uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)]; 523 uint8_t a = aSrc[aSrcAIndex]; 524 525 // Access the 8.16 reciprocal from the table based on alpha. Multiply by 526 // the reciprocal and shift off the fraction bits to approximate the 527 // division by alpha. 528 uint32_t q = sUnpremultiplyTable[a]; 529 aDst[aDstRGBIndex + 0] = (r * q) >> 16; 530 aDst[aDstRGBIndex + 1] = (g * q) >> 16; 531 aDst[aDstRGBIndex + 2] = (b * q) >> 16; 532 aDst[aDstAIndex] = a; 533 534 aSrc += 4; 535 aDst += 4; 536 } while (aSrc < end); 537 } 538 539 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex, 540 uint32_t aDstRGBIndex, uint32_t aDstAIndex> 541 static void UnpremultiplyRowFallback(const uint8_t* aSrc, uint8_t* aDst, 542 int32_t aLength) { 543 UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex, 544 aDstAIndex>(aSrc, aDst, aLength); 545 } 546 547 template <bool aSwapRB, uint32_t aSrcRGBIndex, uint32_t aSrcAIndex, 548 uint32_t aDstRGBIndex, uint32_t aDstAIndex> 549 static void UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap, 550 uint8_t* aDst, int32_t aDstGap, 551 IntSize aSize) { 552 for (int32_t height = aSize.height; height > 0; height--) { 553 UnpremultiplyChunkFallback<aSwapRB, aSrcRGBIndex, aSrcAIndex, aDstRGBIndex, 554 aDstAIndex>(aSrc, aDst, aSize.width); 555 aSrc += aSrcGap; 556 aDst += aDstGap; 557 } 558 } 559 560 #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \ 561 FORMAT_CASE(aSrcFormat, aDstFormat, \ 562 UnpremultiplyFallback< \ 563 ShouldSwapRB(aSrcFormat, aDstFormat), \ 564 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \ 565 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>) 566 567 #define UNPREMULTIPLY_FALLBACK(aSrcFormat) \ 568 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ 569 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ 570 UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) 571 572 #define UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, aDstFormat) \ 573 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ 574 UnpremultiplyRowFallback< \ 575 ShouldSwapRB(aSrcFormat, aDstFormat), \ 576 RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \ 577 RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>) 578 579 #define UNPREMULTIPLY_ROW_FALLBACK(aSrcFormat) \ 580 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \ 581 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \ 582 UNPREMULTIPLY_ROW_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) 583 584 bool UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, 585 SurfaceFormat aSrcFormat, uint8_t* aDst, 586 int32_t aDstStride, SurfaceFormat aDstFormat, 587 const IntSize& aSize) { 588 if (aSize.IsEmpty()) { 589 return true; 590 } 591 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); 592 // Find gap from end of row to the start of the next row. 593 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); 594 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); 595 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); 596 if (srcGap < 0 || dstGap < 0) { 597 return false; 598 } 599 600 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) 601 602 #ifdef USE_SSE2 603 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 604 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 605 UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 606 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 607 UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 608 default: 609 break; 610 } 611 #endif 612 613 #ifdef USE_NEON 614 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 615 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 616 UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 617 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 618 UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 619 default: 620 break; 621 } 622 #endif 623 624 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 625 UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8) 626 UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8) 627 UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8) 628 default: 629 break; 630 } 631 632 #undef FORMAT_CASE_CALL 633 634 MOZ_ASSERT(false, "Unsupported unpremultiply formats"); 635 return false; 636 } 637 638 SwizzleRowFn UnpremultiplyRow(SurfaceFormat aSrcFormat, 639 SurfaceFormat aDstFormat) { 640 #ifdef USE_SSE2 641 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 642 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 643 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 644 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 645 UNPREMULTIPLY_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 646 default: 647 break; 648 } 649 #endif 650 651 #ifdef USE_NEON 652 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 653 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8) 654 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 655 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8) 656 UNPREMULTIPLY_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 657 default: 658 break; 659 } 660 #endif 661 662 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 663 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::B8G8R8A8) 664 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::R8G8B8A8) 665 UNPREMULTIPLY_ROW_FALLBACK(SurfaceFormat::A8R8G8B8) 666 default: 667 break; 668 } 669 670 MOZ_ASSERT_UNREACHABLE("Unsupported premultiply formats"); 671 return nullptr; 672 } 673 674 /** 675 * Swizzling 676 */ 677 678 // Fallback swizzle implementation that uses shifting and masking to reorder 679 // pixels. 680 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 681 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 682 static void SwizzleChunkFallback(const uint8_t*& aSrc, uint8_t*& aDst, 683 int32_t aLength) { 684 const uint8_t* end = aSrc + 4 * aLength; 685 do { 686 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc); 687 688 if (aSwapRB) { 689 // Handle R and B swaps by exchanging words and masking. 690 uint32_t rb = 691 ((rgba << 16) | (rgba >> 16)) & (0x00FF00FF << aSrcRGBShift); 692 uint32_t ga = rgba & ((0xFF << aSrcAShift) | (0xFF00 << aSrcRGBShift)); 693 rgba = rb | ga; 694 } 695 696 // If src and dst shifts differ, rotate left or right to move RGB into 697 // place, i.e. ARGB -> RGBA or ARGB -> RGBA. 698 if (aDstRGBShift > aSrcRGBShift) { 699 rgba = (rgba << 8) | (aOpaqueAlpha ? 0x000000FF : rgba >> 24); 700 } else if (aSrcRGBShift > aDstRGBShift) { 701 rgba = (rgba >> 8) | (aOpaqueAlpha ? 0xFF000000 : rgba << 24); 702 } else if (aOpaqueAlpha) { 703 rgba |= 0xFF << aDstAShift; 704 } 705 706 *reinterpret_cast<uint32_t*>(aDst) = rgba; 707 708 aSrc += 4; 709 aDst += 4; 710 } while (aSrc < end); 711 } 712 713 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 714 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 715 static void SwizzleRowFallback(const uint8_t* aSrc, uint8_t* aDst, 716 int32_t aLength) { 717 SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift, 718 aDstRGBShift, aDstAShift>(aSrc, aDst, aLength); 719 } 720 721 template <bool aSwapRB, bool aOpaqueAlpha, uint32_t aSrcRGBShift, 722 uint32_t aSrcAShift, uint32_t aDstRGBShift, uint32_t aDstAShift> 723 static void SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 724 int32_t aDstGap, IntSize aSize) { 725 for (int32_t height = aSize.height; height > 0; height--) { 726 SwizzleChunkFallback<aSwapRB, aOpaqueAlpha, aSrcRGBShift, aSrcAShift, 727 aDstRGBShift, aDstAShift>(aSrc, aDst, aSize.width); 728 aSrc += aSrcGap; 729 aDst += aDstGap; 730 } 731 } 732 733 #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat) \ 734 FORMAT_CASE( \ 735 aSrcFormat, aDstFormat, \ 736 SwizzleFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \ 737 ShouldForceOpaque(aSrcFormat, aDstFormat), \ 738 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \ 739 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>) 740 741 #define SWIZZLE_ROW_FALLBACK(aSrcFormat, aDstFormat) \ 742 FORMAT_CASE_ROW( \ 743 aSrcFormat, aDstFormat, \ 744 SwizzleRowFallback<ShouldSwapRB(aSrcFormat, aDstFormat), \ 745 ShouldForceOpaque(aSrcFormat, aDstFormat), \ 746 RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \ 747 RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>) 748 749 // Fast-path for matching formats. 750 template <int32_t aBytesPerPixel> 751 static void SwizzleRowCopy(const uint8_t* aSrc, uint8_t* aDst, 752 int32_t aLength) { 753 if (aSrc != aDst) { 754 memcpy(aDst, aSrc, aLength * aBytesPerPixel); 755 } 756 } 757 758 // Fast-path for matching formats. 759 static void SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 760 int32_t aDstGap, IntSize aSize, int32_t aBPP) { 761 if (aSrc != aDst) { 762 int32_t rowLength = aBPP * aSize.width; 763 for (int32_t height = aSize.height; height > 0; height--) { 764 memcpy(aDst, aSrc, rowLength); 765 aSrc += rowLength + aSrcGap; 766 aDst += rowLength + aDstGap; 767 } 768 } 769 } 770 771 // Fast-path for conversions that swap all bytes. 772 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift> 773 static void SwizzleChunkSwap(const uint8_t*& aSrc, uint8_t*& aDst, 774 int32_t aLength) { 775 const uint8_t* end = aSrc + 4 * aLength; 776 do { 777 // Use an endian swap to move the bytes, i.e. BGRA -> ARGB. 778 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc); 779 #if MOZ_LITTLE_ENDIAN() 780 rgba = NativeEndian::swapToBigEndian(rgba); 781 #else 782 rgba = NativeEndian::swapToLittleEndian(rgba); 783 #endif 784 if (aOpaqueAlpha) { 785 rgba |= 0xFF << aDstAShift; 786 } 787 *reinterpret_cast<uint32_t*>(aDst) = rgba; 788 aSrc += 4; 789 aDst += 4; 790 } while (aSrc < end); 791 } 792 793 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift> 794 static void SwizzleRowSwap(const uint8_t* aSrc, uint8_t* aDst, 795 int32_t aLength) { 796 SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst, aLength); 797 } 798 799 template <bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift> 800 static void SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 801 int32_t aDstGap, IntSize aSize) { 802 for (int32_t height = aSize.height; height > 0; height--) { 803 SwizzleChunkSwap<aOpaqueAlpha, aSrcAShift, aDstAShift>(aSrc, aDst, 804 aSize.width); 805 aSrc += aSrcGap; 806 aDst += aDstGap; 807 } 808 } 809 810 #define SWIZZLE_SWAP(aSrcFormat, aDstFormat) \ 811 FORMAT_CASE( \ 812 aSrcFormat, aDstFormat, \ 813 SwizzleSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \ 814 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>) 815 816 #define SWIZZLE_ROW_SWAP(aSrcFormat, aDstFormat) \ 817 FORMAT_CASE_ROW( \ 818 aSrcFormat, aDstFormat, \ 819 SwizzleRowSwap<ShouldForceOpaque(aSrcFormat, aDstFormat), \ 820 AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>) 821 822 static void SwizzleChunkSwapRGB24(const uint8_t*& aSrc, uint8_t*& aDst, 823 int32_t aLength) { 824 const uint8_t* end = aSrc + 3 * aLength; 825 do { 826 uint8_t r = aSrc[0]; 827 uint8_t g = aSrc[1]; 828 uint8_t b = aSrc[2]; 829 aDst[0] = b; 830 aDst[1] = g; 831 aDst[2] = r; 832 aSrc += 3; 833 aDst += 3; 834 } while (aSrc < end); 835 } 836 837 static void SwizzleRowSwapRGB24(const uint8_t* aSrc, uint8_t* aDst, 838 int32_t aLength) { 839 SwizzleChunkSwapRGB24(aSrc, aDst, aLength); 840 } 841 842 static void SwizzleSwapRGB24(const uint8_t* aSrc, int32_t aSrcGap, 843 uint8_t* aDst, int32_t aDstGap, IntSize aSize) { 844 for (int32_t height = aSize.height; height > 0; height--) { 845 SwizzleChunkSwapRGB24(aSrc, aDst, aSize.width); 846 aSrc += aSrcGap; 847 aDst += aDstGap; 848 } 849 } 850 851 #define SWIZZLE_SWAP_RGB24(aSrcFormat, aDstFormat) \ 852 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleSwapRGB24) 853 854 #define SWIZZLE_ROW_SWAP_RGB24(aSrcFormat, aDstFormat) \ 855 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, SwizzleRowSwapRGB24) 856 857 // Fast-path for conversions that force alpha to opaque. 858 template <uint32_t aDstAShift> 859 static void SwizzleChunkOpaqueUpdate(uint8_t*& aBuffer, int32_t aLength) { 860 const uint8_t* end = aBuffer + 4 * aLength; 861 do { 862 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aBuffer); 863 // Just add on the alpha bits to the source. 864 rgba |= 0xFF << aDstAShift; 865 *reinterpret_cast<uint32_t*>(aBuffer) = rgba; 866 aBuffer += 4; 867 } while (aBuffer < end); 868 } 869 870 template <uint32_t aDstAShift> 871 static void SwizzleChunkOpaqueCopy(const uint8_t*& aSrc, uint8_t* aDst, 872 int32_t aLength) { 873 const uint8_t* end = aSrc + 4 * aLength; 874 do { 875 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc); 876 // Just add on the alpha bits to the source. 877 rgba |= 0xFF << aDstAShift; 878 *reinterpret_cast<uint32_t*>(aDst) = rgba; 879 aSrc += 4; 880 aDst += 4; 881 } while (aSrc < end); 882 } 883 884 template <uint32_t aDstAShift> 885 static void SwizzleRowOpaque(const uint8_t* aSrc, uint8_t* aDst, 886 int32_t aLength) { 887 if (aSrc == aDst) { 888 SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aLength); 889 } else { 890 SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aLength); 891 } 892 } 893 894 template <uint32_t aDstAShift> 895 static void SwizzleOpaque(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 896 int32_t aDstGap, IntSize aSize) { 897 if (aSrc == aDst) { 898 // Modifying in-place, so just write out the alpha. 899 for (int32_t height = aSize.height; height > 0; height--) { 900 SwizzleChunkOpaqueUpdate<aDstAShift>(aDst, aSize.width); 901 aDst += aDstGap; 902 } 903 } else { 904 for (int32_t height = aSize.height; height > 0; height--) { 905 SwizzleChunkOpaqueCopy<aDstAShift>(aSrc, aDst, aSize.width); 906 aSrc += aSrcGap; 907 aDst += aDstGap; 908 } 909 } 910 } 911 912 #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \ 913 FORMAT_CASE(aSrcFormat, aDstFormat, SwizzleOpaque<AlphaBitShift(aDstFormat)>) 914 915 #define SWIZZLE_ROW_OPAQUE(aSrcFormat, aDstFormat) \ 916 FORMAT_CASE_ROW(aSrcFormat, aDstFormat, \ 917 SwizzleRowOpaque<AlphaBitShift(aDstFormat)>) 918 919 // Packing of 32-bit formats to RGB565. 920 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex> 921 static void PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 922 int32_t aDstGap, IntSize aSize) { 923 for (int32_t height = aSize.height; height > 0; height--) { 924 const uint8_t* end = aSrc + 4 * aSize.width; 925 do { 926 uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc); 927 928 // Isolate the R, G, and B components and shift to final endian-dependent 929 // locations. 930 uint16_t rgb565; 931 if (aSwapRB) { 932 rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) << (8 - aSrcRGBShift)) | 933 ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) | 934 ((rgba & (0xF80000 << aSrcRGBShift)) >> (19 + aSrcRGBShift)); 935 } else { 936 rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) >> (3 + aSrcRGBShift)) | 937 ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) | 938 ((rgba & (0xF80000 << aSrcRGBShift)) >> (8 + aSrcRGBShift)); 939 } 940 941 *reinterpret_cast<uint16_t*>(aDst) = rgb565; 942 943 aSrc += 4; 944 aDst += 2; 945 } while (aSrc < end); 946 947 aSrc += aSrcGap; 948 aDst += aDstGap; 949 } 950 } 951 952 // Packing of 32-bit formats to 24-bit formats. 953 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex> 954 static void PackChunkToRGB24(const uint8_t*& aSrc, uint8_t*& aDst, 955 int32_t aLength) { 956 const uint8_t* end = aSrc + 4 * aLength; 957 do { 958 uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)]; 959 uint8_t g = aSrc[aSrcRGBIndex + 1]; 960 uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)]; 961 962 aDst[0] = r; 963 aDst[1] = g; 964 aDst[2] = b; 965 966 aSrc += 4; 967 aDst += 3; 968 } while (aSrc < end); 969 } 970 971 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex> 972 static void PackRowToRGB24(const uint8_t* aSrc, uint8_t* aDst, 973 int32_t aLength) { 974 PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, aLength); 975 } 976 977 template <bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex> 978 static void PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 979 int32_t aDstGap, IntSize aSize) { 980 for (int32_t height = aSize.height; height > 0; height--) { 981 PackChunkToRGB24<aSwapRB, aSrcRGBShift, aSrcRGBIndex>(aSrc, aDst, 982 aSize.width); 983 aSrc += aSrcGap; 984 aDst += aDstGap; 985 } 986 } 987 988 #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \ 989 FORMAT_CASE(aSrcFormat, aDstFormat, \ 990 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), \ 991 RGBBitShift(aSrcFormat), RGBByteIndex(aSrcFormat)>) 992 993 #define PACK_RGB(aDstFormat, aPackFunc) \ 994 PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ 995 PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \ 996 PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ 997 PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \ 998 PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \ 999 PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc) 1000 1001 #define PACK_ROW_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \ 1002 FORMAT_CASE_ROW( \ 1003 aSrcFormat, aDstFormat, \ 1004 aPackFunc<ShouldSwapRB(aSrcFormat, aDstFormat), RGBBitShift(aSrcFormat), \ 1005 RGBByteIndex(aSrcFormat)>) 1006 1007 #define PACK_ROW_RGB(aDstFormat, aPackFunc) \ 1008 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ 1009 PACK_ROW_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \ 1010 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ 1011 PACK_ROW_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \ 1012 PACK_ROW_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \ 1013 PACK_ROW_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc) 1014 1015 // Packing of 32-bit formats to A8. 1016 template <uint32_t aSrcAIndex> 1017 static void PackToA8(const uint8_t* aSrc, int32_t aSrcGap, uint8_t* aDst, 1018 int32_t aDstGap, IntSize aSize) { 1019 for (int32_t height = aSize.height; height > 0; height--) { 1020 const uint8_t* end = aSrc + 4 * aSize.width; 1021 do { 1022 *aDst++ = aSrc[aSrcAIndex]; 1023 aSrc += 4; 1024 } while (aSrc < end); 1025 aSrc += aSrcGap; 1026 aDst += aDstGap; 1027 } 1028 } 1029 1030 #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \ 1031 FORMAT_CASE(aSrcFormat, aDstFormat, aPackFunc<AlphaByteIndex(aSrcFormat)>) 1032 1033 #define PACK_ALPHA(aDstFormat, aPackFunc) \ 1034 PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \ 1035 PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \ 1036 PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) 1037 1038 template <bool aSwapRB> 1039 void UnpackRowRGB24(const uint8_t* aSrc, uint8_t* aDst, int32_t aLength) { 1040 // Because we are expanding, we can only process the data back to front in 1041 // case we are performing this in place. 1042 const uint8_t* src = aSrc + 3 * (aLength - 1); 1043 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength); 1044 while (src >= aSrc) { 1045 uint8_t r = src[aSwapRB ? 2 : 0]; 1046 uint8_t g = src[1]; 1047 uint8_t b = src[aSwapRB ? 0 : 2]; 1048 #if MOZ_LITTLE_ENDIAN() 1049 *--dst = 0xFF000000 | (b << 16) | (g << 8) | r; 1050 #else 1051 *--dst = 0x000000FF | (b << 8) | (g << 16) | (r << 24); 1052 #endif 1053 src -= 3; 1054 } 1055 } 1056 1057 // Force instantiation of swizzle variants here. 1058 template void UnpackRowRGB24<false>(const uint8_t*, uint8_t*, int32_t); 1059 template void UnpackRowRGB24<true>(const uint8_t*, uint8_t*, int32_t); 1060 1061 #define UNPACK_ROW_RGB(aDstFormat) \ 1062 FORMAT_CASE_ROW( \ 1063 SurfaceFormat::R8G8B8, aDstFormat, \ 1064 UnpackRowRGB24<ShouldSwapRB(SurfaceFormat::R8G8B8, aDstFormat)>) 1065 1066 static void UnpackRowRGB24_To_ARGB(const uint8_t* aSrc, uint8_t* aDst, 1067 int32_t aLength) { 1068 // Because we are expanding, we can only process the data back to front in 1069 // case we are performing this in place. 1070 const uint8_t* src = aSrc + 3 * (aLength - 1); 1071 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + 4 * aLength); 1072 while (src >= aSrc) { 1073 uint8_t r = src[0]; 1074 uint8_t g = src[1]; 1075 uint8_t b = src[2]; 1076 #if MOZ_LITTLE_ENDIAN() 1077 *--dst = 0x000000FF | (r << 8) | (g << 16) | (b << 24); 1078 #else 1079 *--dst = 0xFF000000 | (r << 24) | (g << 16) | b; 1080 #endif 1081 src -= 3; 1082 } 1083 } 1084 1085 #define UNPACK_ROW_RGB_TO_ARGB(aDstFormat) \ 1086 FORMAT_CASE_ROW(SurfaceFormat::R8G8B8, aDstFormat, UnpackRowRGB24_To_ARGB) 1087 1088 bool SwizzleData(const uint8_t* aSrc, int32_t aSrcStride, 1089 SurfaceFormat aSrcFormat, uint8_t* aDst, int32_t aDstStride, 1090 SurfaceFormat aDstFormat, const IntSize& aSize) { 1091 if (aSize.IsEmpty()) { 1092 return true; 1093 } 1094 IntSize size = CollapseSize(aSize, aSrcStride, aDstStride); 1095 // Find gap from end of row to the start of the next row. 1096 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); 1097 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); 1098 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); 1099 if (srcGap < 0 || dstGap < 0) { 1100 return false; 1101 } 1102 1103 #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size) 1104 1105 #ifdef USE_SSE2 1106 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1107 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1108 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1109 SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1110 SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1111 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1112 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1113 SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1114 SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1115 default: 1116 break; 1117 } 1118 #endif 1119 1120 #ifdef USE_NEON 1121 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1122 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1123 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1124 SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1125 SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1126 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1127 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1128 SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1129 SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1130 default: 1131 break; 1132 } 1133 #endif 1134 1135 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1136 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1137 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1138 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1139 SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1140 1141 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1142 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1143 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1144 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1145 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8) 1146 SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8) 1147 1148 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8) 1149 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8) 1150 SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8) 1151 SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8) 1152 1153 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8) 1154 SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8) 1155 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8) 1156 SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8) 1157 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8) 1158 SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8) 1159 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8) 1160 SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8) 1161 1162 SWIZZLE_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8) 1163 SWIZZLE_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8) 1164 1165 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 1166 SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8) 1167 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 1168 SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8) 1169 SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8) 1170 SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8) 1171 1172 PACK_RGB(SurfaceFormat::R5G6B5_UINT16, PackToRGB565) 1173 PACK_RGB(SurfaceFormat::B8G8R8, PackToRGB24) 1174 PACK_RGB(SurfaceFormat::R8G8B8, PackToRGB24) 1175 PACK_ALPHA(SurfaceFormat::A8, PackToA8) 1176 1177 default: 1178 break; 1179 } 1180 1181 if (aSrcFormat == aDstFormat) { 1182 // If the formats match, just do a generic copy. 1183 SwizzleCopy(aSrc, srcGap, aDst, dstGap, size, BytesPerPixel(aSrcFormat)); 1184 return true; 1185 } 1186 1187 #undef FORMAT_CASE_CALL 1188 1189 MOZ_ASSERT(false, "Unsupported swizzle formats"); 1190 return false; 1191 } 1192 1193 static bool SwizzleYFlipDataInternal(const uint8_t* aSrc, int32_t aSrcStride, 1194 SurfaceFormat aSrcFormat, uint8_t* aDst, 1195 int32_t aDstStride, 1196 SurfaceFormat aDstFormat, 1197 const IntSize& aSize, 1198 SwizzleRowFn aSwizzleFn) { 1199 if (!aSwizzleFn) { 1200 return false; 1201 } 1202 1203 // Guarantee our width and height are both greater than zero. 1204 if (aSize.IsEmpty()) { 1205 return true; 1206 } 1207 1208 // Unlike SwizzleData/PremultiplyData, we don't use the stride gaps directly, 1209 // but we can use it to verify that the stride is valid for our width and 1210 // format. 1211 int32_t srcGap = GetStrideGap(aSize.width, aSrcFormat, aSrcStride); 1212 int32_t dstGap = GetStrideGap(aSize.width, aDstFormat, aDstStride); 1213 MOZ_ASSERT(srcGap >= 0 && dstGap >= 0); 1214 if (srcGap < 0 || dstGap < 0) { 1215 return false; 1216 } 1217 1218 // Swapping/swizzling to a new buffer is trivial. 1219 if (aSrc != aDst) { 1220 const uint8_t* src = aSrc; 1221 const uint8_t* srcEnd = aSrc + aSize.height * aSrcStride; 1222 uint8_t* dst = aDst + (aSize.height - 1) * aDstStride; 1223 while (src < srcEnd) { 1224 aSwizzleFn(src, dst, aSize.width); 1225 src += aSrcStride; 1226 dst -= aDstStride; 1227 } 1228 return true; 1229 } 1230 1231 if (aSrcStride != aDstStride) { 1232 return false; 1233 } 1234 1235 // If we are swizzling in place, then we need a temporary row buffer. 1236 UniquePtr<uint8_t[]> rowBuffer(new (std::nothrow) uint8_t[aDstStride]); 1237 if (!rowBuffer) { 1238 return false; 1239 } 1240 1241 // Swizzle and swap the top and bottom rows until we meet in the middle. 1242 int32_t middleRow = aSize.height / 2; 1243 uint8_t* top = aDst; 1244 uint8_t* bottom = aDst + (aSize.height - 1) * aDstStride; 1245 for (int32_t row = 0; row < middleRow; ++row) { 1246 memcpy(rowBuffer.get(), bottom, aDstStride); 1247 aSwizzleFn(top, bottom, aSize.width); 1248 aSwizzleFn(rowBuffer.get(), top, aSize.width); 1249 top += aDstStride; 1250 bottom -= aDstStride; 1251 } 1252 1253 // If there is an odd numbered row, we haven't swizzled it yet. 1254 if (aSize.height % 2 == 1) { 1255 top = aDst + middleRow * aDstStride; 1256 aSwizzleFn(top, top, aSize.width); 1257 } 1258 return true; 1259 } 1260 1261 bool SwizzleYFlipData(const uint8_t* aSrc, int32_t aSrcStride, 1262 SurfaceFormat aSrcFormat, uint8_t* aDst, 1263 int32_t aDstStride, SurfaceFormat aDstFormat, 1264 const IntSize& aSize) { 1265 return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst, 1266 aDstStride, aDstFormat, aSize, 1267 SwizzleRow(aSrcFormat, aDstFormat)); 1268 } 1269 1270 bool PremultiplyYFlipData(const uint8_t* aSrc, int32_t aSrcStride, 1271 SurfaceFormat aSrcFormat, uint8_t* aDst, 1272 int32_t aDstStride, SurfaceFormat aDstFormat, 1273 const IntSize& aSize) { 1274 return SwizzleYFlipDataInternal(aSrc, aSrcStride, aSrcFormat, aDst, 1275 aDstStride, aDstFormat, aSize, 1276 PremultiplyRow(aSrcFormat, aDstFormat)); 1277 } 1278 1279 SwizzleRowFn SwizzleRow(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat) { 1280 #ifdef USE_SSE2 1281 if (mozilla::supports_avx2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1282 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8X8) 1283 UNPACK_ROW_RGB_AVX2(SurfaceFormat::R8G8B8A8) 1284 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8X8) 1285 UNPACK_ROW_RGB_AVX2(SurfaceFormat::B8G8R8A8) 1286 default: 1287 break; 1288 } 1289 1290 if (mozilla::supports_ssse3()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1291 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8X8) 1292 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::R8G8B8A8) 1293 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8X8) 1294 UNPACK_ROW_RGB_SSSE3(SurfaceFormat::B8G8R8A8) 1295 default: 1296 break; 1297 } 1298 1299 if (mozilla::supports_sse2()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1300 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1301 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1302 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1303 SWIZZLE_ROW_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1304 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1305 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1306 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1307 SWIZZLE_ROW_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1308 default: 1309 break; 1310 } 1311 #endif 1312 1313 #ifdef USE_NEON 1314 if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1315 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8X8) 1316 UNPACK_ROW_RGB_NEON(SurfaceFormat::R8G8B8A8) 1317 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8X8) 1318 UNPACK_ROW_RGB_NEON(SurfaceFormat::B8G8R8A8) 1319 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1320 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1321 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1322 SWIZZLE_ROW_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1323 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1324 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1325 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1326 SWIZZLE_ROW_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1327 default: 1328 break; 1329 } 1330 #endif 1331 1332 switch (FORMAT_KEY(aSrcFormat, aDstFormat)) { 1333 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8) 1334 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8) 1335 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8) 1336 SWIZZLE_ROW_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8) 1337 1338 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8) 1339 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8) 1340 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8) 1341 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8) 1342 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8) 1343 SWIZZLE_ROW_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8) 1344 1345 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8) 1346 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8) 1347 SWIZZLE_ROW_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8) 1348 SWIZZLE_ROW_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8) 1349 1350 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8) 1351 SWIZZLE_ROW_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8) 1352 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8) 1353 SWIZZLE_ROW_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8) 1354 SWIZZLE_ROW_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8) 1355 SWIZZLE_ROW_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8) 1356 1357 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8) 1358 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8) 1359 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8) 1360 SWIZZLE_ROW_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8) 1361 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8) 1362 SWIZZLE_ROW_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8) 1363 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8) 1364 SWIZZLE_ROW_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8) 1365 1366 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::R8G8B8, SurfaceFormat::B8G8R8) 1367 SWIZZLE_ROW_SWAP_RGB24(SurfaceFormat::B8G8R8, SurfaceFormat::R8G8B8) 1368 1369 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8X8) 1370 UNPACK_ROW_RGB(SurfaceFormat::R8G8B8A8) 1371 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8X8) 1372 UNPACK_ROW_RGB(SurfaceFormat::B8G8R8A8) 1373 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::A8R8G8B8) 1374 UNPACK_ROW_RGB_TO_ARGB(SurfaceFormat::X8R8G8B8) 1375 1376 PACK_ROW_RGB(SurfaceFormat::R8G8B8, PackRowToRGB24) 1377 PACK_ROW_RGB(SurfaceFormat::B8G8R8, PackRowToRGB24) 1378 1379 default: 1380 break; 1381 } 1382 1383 if (aSrcFormat == aDstFormat) { 1384 switch (BytesPerPixel(aSrcFormat)) { 1385 case 4: 1386 return &SwizzleRowCopy<4>; 1387 case 3: 1388 return &SwizzleRowCopy<3>; 1389 default: 1390 break; 1391 } 1392 } 1393 1394 MOZ_ASSERT_UNREACHABLE("Unsupported swizzle formats"); 1395 return nullptr; 1396 } 1397 1398 static IntRect ReorientRowRotate0FlipFallback(const uint8_t* aSrc, 1399 int32_t aSrcRow, uint8_t* aDst, 1400 const IntSize& aDstSize, 1401 int32_t aDstStride) { 1402 // Reverse order of pixels in the row. 1403 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1404 const uint32_t* end = src + aDstSize.width; 1405 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst + aSrcRow * aDstStride) + 1406 aDstSize.width - 1; 1407 do { 1408 *dst-- = *src++; 1409 } while (src < end); 1410 1411 return IntRect(0, aSrcRow, aDstSize.width, 1); 1412 } 1413 1414 static IntRect ReorientRowRotate90FlipFallback(const uint8_t* aSrc, 1415 int32_t aSrcRow, uint8_t* aDst, 1416 const IntSize& aDstSize, 1417 int32_t aDstStride) { 1418 // Copy row of pixels from top to bottom, into left to right columns. 1419 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1420 const uint32_t* end = src + aDstSize.height; 1421 uint32_t* dst = reinterpret_cast<uint32_t*>(aDst) + aSrcRow; 1422 int32_t stride = aDstStride / sizeof(uint32_t); 1423 do { 1424 *dst = *src++; 1425 dst += stride; 1426 } while (src < end); 1427 1428 return IntRect(aSrcRow, 0, 1, aDstSize.height); 1429 } 1430 1431 static IntRect ReorientRowRotate180FlipFallback(const uint8_t* aSrc, 1432 int32_t aSrcRow, uint8_t* aDst, 1433 const IntSize& aDstSize, 1434 int32_t aDstStride) { 1435 // Copy row of pixels from top to bottom, into bottom to top rows. 1436 uint8_t* dst = aDst + (aDstSize.height - aSrcRow - 1) * aDstStride; 1437 memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t)); 1438 return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1); 1439 } 1440 1441 static IntRect ReorientRowRotate270FlipFallback(const uint8_t* aSrc, 1442 int32_t aSrcRow, uint8_t* aDst, 1443 const IntSize& aDstSize, 1444 int32_t aDstStride) { 1445 // Copy row of pixels in reverse order from top to bottom, into right to left 1446 // columns. 1447 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1448 const uint32_t* end = src + aDstSize.height; 1449 uint32_t* dst = 1450 reinterpret_cast<uint32_t*>(aDst + (aDstSize.height - 1) * aDstStride) + 1451 aDstSize.width - aSrcRow - 1; 1452 int32_t stride = aDstStride / sizeof(uint32_t); 1453 do { 1454 *dst = *src++; 1455 dst -= stride; 1456 } while (src < end); 1457 1458 return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height); 1459 } 1460 1461 static IntRect ReorientRowRotate0Fallback(const uint8_t* aSrc, int32_t aSrcRow, 1462 uint8_t* aDst, 1463 const IntSize& aDstSize, 1464 int32_t aDstStride) { 1465 // Copy row of pixels into the destination. 1466 uint8_t* dst = aDst + aSrcRow * aDstStride; 1467 memcpy(dst, aSrc, aDstSize.width * sizeof(uint32_t)); 1468 return IntRect(0, aSrcRow, aDstSize.width, 1); 1469 } 1470 1471 static IntRect ReorientRowRotate90Fallback(const uint8_t* aSrc, int32_t aSrcRow, 1472 uint8_t* aDst, 1473 const IntSize& aDstSize, 1474 int32_t aDstStride) { 1475 // Copy row of pixels from top to bottom, into right to left columns. 1476 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1477 const uint32_t* end = src + aDstSize.height; 1478 uint32_t* dst = 1479 reinterpret_cast<uint32_t*>(aDst) + aDstSize.width - aSrcRow - 1; 1480 int32_t stride = aDstStride / sizeof(uint32_t); 1481 do { 1482 *dst = *src++; 1483 dst += stride; 1484 } while (src < end); 1485 1486 return IntRect(aDstSize.width - aSrcRow - 1, 0, 1, aDstSize.height); 1487 } 1488 1489 static IntRect ReorientRowRotate180Fallback(const uint8_t* aSrc, 1490 int32_t aSrcRow, uint8_t* aDst, 1491 const IntSize& aDstSize, 1492 int32_t aDstStride) { 1493 // Copy row of pixels in reverse order from top to bottom, into bottom to top 1494 // rows. 1495 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1496 const uint32_t* end = src + aDstSize.width; 1497 uint32_t* dst = reinterpret_cast<uint32_t*>( 1498 aDst + (aDstSize.height - aSrcRow - 1) * aDstStride) + 1499 aDstSize.width - 1; 1500 do { 1501 *dst-- = *src++; 1502 } while (src < end); 1503 1504 return IntRect(0, aDstSize.height - aSrcRow - 1, aDstSize.width, 1); 1505 } 1506 1507 static IntRect ReorientRowRotate270Fallback(const uint8_t* aSrc, 1508 int32_t aSrcRow, uint8_t* aDst, 1509 const IntSize& aDstSize, 1510 int32_t aDstStride) { 1511 // Copy row of pixels in reverse order from top to bottom, into left to right 1512 // column. 1513 const uint32_t* src = reinterpret_cast<const uint32_t*>(aSrc); 1514 const uint32_t* end = src + aDstSize.height; 1515 uint32_t* dst = 1516 reinterpret_cast<uint32_t*>(aDst + (aDstSize.height - 1) * aDstStride) + 1517 aSrcRow; 1518 int32_t stride = aDstStride / sizeof(uint32_t); 1519 do { 1520 *dst = *src++; 1521 dst -= stride; 1522 } while (src < end); 1523 1524 return IntRect(aSrcRow, 0, 1, aDstSize.height); 1525 } 1526 1527 ReorientRowFn ReorientRow(const struct image::Orientation& aOrientation) { 1528 switch (aOrientation.flip) { 1529 case image::Flip::Unflipped: 1530 switch (aOrientation.rotation) { 1531 case image::Angle::D0: 1532 return &ReorientRowRotate0Fallback; 1533 case image::Angle::D90: 1534 return &ReorientRowRotate90Fallback; 1535 case image::Angle::D180: 1536 return &ReorientRowRotate180Fallback; 1537 case image::Angle::D270: 1538 return &ReorientRowRotate270Fallback; 1539 default: 1540 break; 1541 } 1542 break; 1543 case image::Flip::Horizontal: 1544 switch (aOrientation.rotation) { 1545 case image::Angle::D0: 1546 return &ReorientRowRotate0FlipFallback; 1547 case image::Angle::D90: 1548 if (aOrientation.flipFirst) { 1549 return &ReorientRowRotate270FlipFallback; 1550 } else { 1551 return &ReorientRowRotate90FlipFallback; 1552 } 1553 case image::Angle::D180: 1554 return &ReorientRowRotate180FlipFallback; 1555 case image::Angle::D270: 1556 if (aOrientation.flipFirst) { 1557 return &ReorientRowRotate90FlipFallback; 1558 } else { 1559 return &ReorientRowRotate270FlipFallback; 1560 } 1561 default: 1562 break; 1563 } 1564 break; 1565 default: 1566 break; 1567 } 1568 1569 MOZ_ASSERT_UNREACHABLE("Unhandled orientation!"); 1570 return nullptr; 1571 } 1572 1573 } // namespace gfx 1574 } // namespace mozilla