SkConvolver.cpp (26681B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 // Copyright (c) 2011-2016 Google Inc. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the gfx/skia/LICENSE file. 6 7 #include "SkConvolver.h" 8 9 #ifdef USE_SSE2 10 # include "mozilla/SSE.h" 11 #endif 12 13 #ifdef USE_NEON 14 # include "mozilla/arm.h" 15 #endif 16 17 namespace skia { 18 19 using mozilla::gfx::BytesPerPixel; 20 using mozilla::gfx::IsOpaque; 21 using mozilla::gfx::SurfaceFormat; 22 23 // Converts the argument to an 8-bit unsigned value by clamping to the range 24 // 0-255. 25 static inline unsigned char ClampTo8(int a) { 26 if (static_cast<unsigned>(a) < 256) { 27 return a; // Avoid the extra check in the common case. 28 } 29 if (a < 0) { 30 return 0; 31 } 32 return 255; 33 } 34 35 // Convolves horizontally along a single row. The row data is given in 36 // |srcData| and continues for the numValues() of the filter. 37 template <bool hasAlpha> 38 void ConvolveHorizontally(const unsigned char* srcData, 39 const SkConvolutionFilter1D& filter, 40 unsigned char* outRow) { 41 // Loop over each pixel on this row in the output image. 42 int numValues = filter.numValues(); 43 for (int outX = 0; outX < numValues; outX++) { 44 // Get the filter that determines the current output pixel. 45 int filterOffset, filterLength; 46 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 47 filter.FilterForValue(outX, &filterOffset, &filterLength); 48 49 // Compute the first pixel in this row that the filter affects. It will 50 // touch |filterLength| pixels (4 bytes each) after this. 51 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; 52 53 // Apply the filter to the row to get the destination pixel in |accum|. 54 int accum[4] = {0}; 55 for (int filterX = 0; filterX < filterLength; filterX++) { 56 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; 57 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; 58 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; 59 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; 60 if (hasAlpha) { 61 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; 62 } 63 } 64 65 // Bring this value back in range. All of the filter scaling factors 66 // are in fixed point with kShiftBits bits of fractional part. 67 accum[0] >>= SkConvolutionFilter1D::kShiftBits; 68 accum[1] >>= SkConvolutionFilter1D::kShiftBits; 69 accum[2] >>= SkConvolutionFilter1D::kShiftBits; 70 71 if (hasAlpha) { 72 accum[3] >>= SkConvolutionFilter1D::kShiftBits; 73 } 74 75 // Store the new pixel. 76 outRow[outX * 4 + 0] = ClampTo8(accum[0]); 77 outRow[outX * 4 + 1] = ClampTo8(accum[1]); 78 outRow[outX * 4 + 2] = ClampTo8(accum[2]); 79 if (hasAlpha) { 80 outRow[outX * 4 + 3] = ClampTo8(accum[3]); 81 } 82 } 83 } 84 85 // Does vertical convolution to produce one output row. The filter values and 86 // length are given in the first two parameters. These are applied to each 87 // of the rows pointed to in the |sourceDataRows| array, with each row 88 // being |pixelWidth| wide. 89 // 90 // The output must have room for |pixelWidth * 4| bytes. 91 template <bool hasAlpha> 92 void ConvolveVertically( 93 const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 94 int filterLength, unsigned char* const* sourceDataRows, int pixelWidth, 95 unsigned char* outRow) { 96 // We go through each column in the output and do a vertical convolution, 97 // generating one output pixel each time. 98 for (int outX = 0; outX < pixelWidth; outX++) { 99 // Compute the number of bytes over in each row that the current column 100 // we're convolving starts at. The pixel will cover the next 4 bytes. 101 int byteOffset = outX * 4; 102 103 // Apply the filter to one column of pixels. 104 int accum[4] = {0}; 105 for (int filterY = 0; filterY < filterLength; filterY++) { 106 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; 107 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; 108 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; 109 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; 110 if (hasAlpha) { 111 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3]; 112 } 113 } 114 115 // Bring this value back in range. All of the filter scaling factors 116 // are in fixed point with kShiftBits bits of precision. 117 accum[0] >>= SkConvolutionFilter1D::kShiftBits; 118 accum[1] >>= SkConvolutionFilter1D::kShiftBits; 119 accum[2] >>= SkConvolutionFilter1D::kShiftBits; 120 if (hasAlpha) { 121 accum[3] >>= SkConvolutionFilter1D::kShiftBits; 122 } 123 124 // Store the new pixel. 125 outRow[byteOffset + 0] = ClampTo8(accum[0]); 126 outRow[byteOffset + 1] = ClampTo8(accum[1]); 127 outRow[byteOffset + 2] = ClampTo8(accum[2]); 128 129 if (hasAlpha) { 130 unsigned char alpha = ClampTo8(accum[3]); 131 132 // Make sure the alpha channel doesn't come out smaller than any of the 133 // color channels. We use premultipled alpha channels, so this should 134 // never happen, but rounding errors will cause this from time to time. 135 // These "impossible" colors will cause overflows (and hence random pixel 136 // values) when the resulting bitmap is drawn to the screen. 137 // 138 // We only need to do this when generating the final output row (here). 139 int maxColorChannel = 140 std::max(outRow[byteOffset + 0], 141 std::max(outRow[byteOffset + 1], outRow[byteOffset + 2])); 142 if (alpha < maxColorChannel) { 143 outRow[byteOffset + 3] = maxColorChannel; 144 } else { 145 outRow[byteOffset + 3] = alpha; 146 } 147 } else { 148 // No alpha channel, the image is opaque. 149 outRow[byteOffset + 3] = 0xff; 150 } 151 } 152 } 153 154 // Convolves horizontally along a single row. The row data is given in 155 // |srcData| and continues for the numValues() of the filter. 156 void ConvolveHorizontallyA8(const unsigned char* srcData, 157 const SkConvolutionFilter1D& filter, 158 unsigned char* outRow) { 159 // Loop over each pixel on this row in the output image. 160 int numValues = filter.numValues(); 161 for (int outX = 0; outX < numValues; outX++) { 162 // Get the filter that determines the current output pixel. 163 int filterOffset, filterLength; 164 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 165 filter.FilterForValue(outX, &filterOffset, &filterLength); 166 167 // Compute the first pixel in this row that the filter affects. It will 168 // touch |filterLength| pixels (4 bytes each) after this. 169 const unsigned char* rowToFilter = &srcData[filterOffset]; 170 171 // Apply the filter to the row to get the destination pixel in |accum|. 172 int accum = 0; 173 for (int filterX = 0; filterX < filterLength; filterX++) { 174 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX]; 175 accum += curFilter * rowToFilter[filterX]; 176 } 177 178 // Bring this value back in range. All of the filter scaling factors 179 // are in fixed point with kShiftBits bits of fractional part. 180 accum >>= SkConvolutionFilter1D::kShiftBits; 181 182 // Store the new pixel. 183 outRow[outX] = ClampTo8(accum); 184 } 185 } 186 187 // Does vertical convolution to produce one output row. The filter values and 188 // length are given in the first two parameters. These are applied to each 189 // of the rows pointed to in the |sourceDataRows| array, with each row 190 // being |pixelWidth| wide. 191 // 192 // The output must have room for |pixelWidth| bytes. 193 void ConvolveVerticallyA8( 194 const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 195 int filterLength, unsigned char* const* sourceDataRows, int pixelWidth, 196 unsigned char* outRow) { 197 // We go through each column in the output and do a vertical convolution, 198 // generating one output pixel each time. 199 for (int outX = 0; outX < pixelWidth; outX++) { 200 // Apply the filter to one column of pixels. 201 int accum = 0; 202 for (int filterY = 0; filterY < filterLength; filterY++) { 203 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY]; 204 accum += curFilter * sourceDataRows[filterY][outX]; 205 } 206 207 // Bring this value back in range. All of the filter scaling factors 208 // are in fixed point with kShiftBits bits of precision. 209 accum >>= SkConvolutionFilter1D::kShiftBits; 210 211 // Store the new pixel. 212 outRow[outX] = ClampTo8(accum); 213 } 214 } 215 216 #ifdef USE_SSE2 217 void convolve_vertically_avx2(const int16_t* filter, int filterLen, 218 uint8_t* const* srcRows, int width, uint8_t* out, 219 bool hasAlpha); 220 void convolve_horizontally_sse2(const unsigned char* srcData, 221 const SkConvolutionFilter1D& filter, 222 unsigned char* outRow, bool hasAlpha); 223 void convolve_vertically_sse2(const int16_t* filter, int filterLen, 224 uint8_t* const* srcRows, int width, uint8_t* out, 225 bool hasAlpha); 226 #elif defined(USE_NEON) 227 void convolve_horizontally_neon(const unsigned char* srcData, 228 const SkConvolutionFilter1D& filter, 229 unsigned char* outRow, bool hasAlpha); 230 void convolve_vertically_neon(const int16_t* filter, int filterLen, 231 uint8_t* const* srcRows, int width, uint8_t* out, 232 bool hasAlpha); 233 #endif 234 235 void convolve_horizontally(const unsigned char* srcData, 236 const SkConvolutionFilter1D& filter, 237 unsigned char* outRow, SurfaceFormat format) { 238 if (format == SurfaceFormat::A8) { 239 ConvolveHorizontallyA8(srcData, filter, outRow); 240 return; 241 } 242 243 bool hasAlpha = !IsOpaque(format); 244 #ifdef USE_SSE2 245 if (mozilla::supports_sse2()) { 246 convolve_horizontally_sse2(srcData, filter, outRow, hasAlpha); 247 return; 248 } 249 #elif defined(USE_NEON) 250 if (mozilla::supports_neon()) { 251 convolve_horizontally_neon(srcData, filter, outRow, hasAlpha); 252 return; 253 } 254 #endif 255 if (hasAlpha) { 256 ConvolveHorizontally<true>(srcData, filter, outRow); 257 } else { 258 ConvolveHorizontally<false>(srcData, filter, outRow); 259 } 260 } 261 262 void convolve_vertically( 263 const SkConvolutionFilter1D::ConvolutionFixed* filterValues, 264 int filterLength, unsigned char* const* sourceDataRows, int pixelWidth, 265 unsigned char* outRow, SurfaceFormat format) { 266 if (format == SurfaceFormat::A8) { 267 ConvolveVerticallyA8(filterValues, filterLength, sourceDataRows, pixelWidth, 268 outRow); 269 return; 270 } 271 272 bool hasAlpha = !IsOpaque(format); 273 #ifdef USE_SSE2 274 if (mozilla::supports_avx2()) { 275 convolve_vertically_avx2(filterValues, filterLength, sourceDataRows, 276 pixelWidth, outRow, hasAlpha); 277 return; 278 } 279 if (mozilla::supports_sse2()) { 280 convolve_vertically_sse2(filterValues, filterLength, sourceDataRows, 281 pixelWidth, outRow, hasAlpha); 282 return; 283 } 284 #elif defined(USE_NEON) 285 if (mozilla::supports_neon()) { 286 convolve_vertically_neon(filterValues, filterLength, sourceDataRows, 287 pixelWidth, outRow, hasAlpha); 288 return; 289 } 290 #endif 291 if (hasAlpha) { 292 ConvolveVertically<true>(filterValues, filterLength, sourceDataRows, 293 pixelWidth, outRow); 294 } else { 295 ConvolveVertically<false>(filterValues, filterLength, sourceDataRows, 296 pixelWidth, outRow); 297 } 298 } 299 300 // Stores a list of rows in a circular buffer. The usage is you write into it 301 // by calling AdvanceRow. It will keep track of which row in the buffer it 302 // should use next, and the total number of rows added. 303 class CircularRowBuffer { 304 public: 305 // The number of pixels in each row is given in |sourceRowPixelWidth|. 306 // The maximum number of rows needed in the buffer is |maxYFilterSize| 307 // (we only need to store enough rows for the biggest filter). 308 // 309 // We use the |firstInputRow| to compute the coordinates of all of the 310 // following rows returned by Advance(). 311 CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize, 312 int firstInputRow) 313 : fRowByteWidth(destRowPixelWidth * 4), 314 fNumRows(maxYFilterSize), 315 fNextRow(0), 316 fNextRowCoordinate(firstInputRow) {} 317 318 bool AllocBuffer() { 319 return fBuffer.resize(fRowByteWidth * fNumRows) && 320 fRowAddresses.resize(fNumRows); 321 } 322 323 // Moves to the next row in the buffer, returning a pointer to the beginning 324 // of it. 325 unsigned char* advanceRow() { 326 unsigned char* row = &fBuffer[fNextRow * fRowByteWidth]; 327 fNextRowCoordinate++; 328 329 // Set the pointer to the next row to use, wrapping around if necessary. 330 fNextRow++; 331 if (fNextRow == fNumRows) { 332 fNextRow = 0; 333 } 334 return row; 335 } 336 337 // Returns a pointer to an "unrolled" array of rows. These rows will start 338 // at the y coordinate placed into |*firstRowIndex| and will continue in 339 // order for the maximum number of rows in this circular buffer. 340 // 341 // The |firstRowIndex_| may be negative. This means the circular buffer 342 // starts before the top of the image (it hasn't been filled yet). 343 unsigned char* const* GetRowAddresses(int* firstRowIndex) { 344 // Example for a 4-element circular buffer holding coords 6-9. 345 // Row 0 Coord 8 346 // Row 1 Coord 9 347 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10. 348 // Row 3 Coord 7 349 // 350 // The "next" row is also the first (lowest) coordinate. This computation 351 // may yield a negative value, but that's OK, the math will work out 352 // since the user of this buffer will compute the offset relative 353 // to the firstRowIndex and the negative rows will never be used. 354 *firstRowIndex = fNextRowCoordinate - fNumRows; 355 356 int curRow = fNextRow; 357 for (int i = 0; i < fNumRows; i++) { 358 fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth]; 359 360 // Advance to the next row, wrapping if necessary. 361 curRow++; 362 if (curRow == fNumRows) { 363 curRow = 0; 364 } 365 } 366 return &fRowAddresses[0]; 367 } 368 369 private: 370 // The buffer storing the rows. They are packed, each one fRowByteWidth. 371 mozilla::Vector<unsigned char> fBuffer; 372 373 // Number of bytes per row in the |buffer|. 374 int fRowByteWidth; 375 376 // The number of rows available in the buffer. 377 int fNumRows; 378 379 // The next row index we should write into. This wraps around as the 380 // circular buffer is used. 381 int fNextRow; 382 383 // The y coordinate of the |fNextRow|. This is incremented each time a 384 // new row is appended and does not wrap. 385 int fNextRowCoordinate; 386 387 // Buffer used by GetRowAddresses(). 388 mozilla::Vector<unsigned char*> fRowAddresses; 389 }; 390 391 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {} 392 393 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default; 394 395 bool SkConvolutionFilter1D::AddFilter(int filterOffset, 396 const ConvolutionFixed* filterValues, 397 int filterLength) { 398 // It is common for leading/trailing filter values to be zeros. In such 399 // cases it is beneficial to only store the central factors. 400 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on 401 // a 1080p image this optimization gives a ~10% speed improvement. 402 int filterSize = filterLength; 403 int firstNonZero = 0; 404 while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) { 405 firstNonZero++; 406 } 407 408 if (firstNonZero < filterLength) { 409 // Here we have at least one non-zero factor. 410 int lastNonZero = filterLength - 1; 411 while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) { 412 lastNonZero--; 413 } 414 415 filterOffset += firstNonZero; 416 filterLength = lastNonZero + 1 - firstNonZero; 417 MOZ_ASSERT(filterLength > 0); 418 419 if (!fFilterValues.append(&filterValues[firstNonZero], filterLength)) { 420 return false; 421 } 422 } else { 423 // Here all the factors were zeroes. 424 filterLength = 0; 425 } 426 427 FilterInstance instance = { 428 // We pushed filterLength elements onto fFilterValues 429 int(fFilterValues.length()) - filterLength, filterOffset, filterLength, 430 filterSize}; 431 if (!fFilters.append(instance)) { 432 if (filterLength > 0) { 433 fFilterValues.shrinkBy(filterLength); 434 } 435 return false; 436 } 437 438 fMaxFilter = std::max(fMaxFilter, filterLength); 439 return true; 440 } 441 442 bool SkConvolutionFilter1D::ComputeFilterValues( 443 const SkBitmapFilter& aBitmapFilter, int32_t aSrcSize, int32_t aDstSize) { 444 // When we're doing a magnification, the scale will be larger than one. This 445 // means the destination pixels are much smaller than the source pixels, and 446 // that the range covered by the filter won't necessarily cover any source 447 // pixel boundaries. Therefore, we use these clamped values (max of 1) for 448 // some computations. 449 float scale = float(aDstSize) / float(aSrcSize); 450 float clampedScale = std::min(1.0f, scale); 451 // This is how many source pixels from the center we need to count 452 // to support the filtering function. 453 float srcSupport = aBitmapFilter.width() / clampedScale; 454 float invScale = 1.0f / scale; 455 456 mozilla::Vector<float, 64> filterValues; 457 mozilla::Vector<ConvolutionFixed, 64> fixedFilterValues; 458 459 // Loop over all pixels in the output range. We will generate one set of 460 // filter values for each one. Those values will tell us how to blend the 461 // source pixels to compute the destination pixel. 462 463 // This value is computed based on how SkTDArray::resizeStorageToAtLeast works 464 // in order to ensure that it does not overflow or assert. That functions 465 // computes 466 // n+4 + (n+4)/4 467 // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and 468 // solving n gives n = (2^31-6)*4/5 = 1717986913.6 469 const int32_t maxToPassToReserveAdditional = 1717986913; 470 471 int32_t filterValueCount = int32_t(ceilf(aDstSize * srcSupport * 2)); 472 if (aDstSize > maxToPassToReserveAdditional || filterValueCount < 0 || 473 filterValueCount > maxToPassToReserveAdditional || 474 !reserveAdditional(aDstSize, filterValueCount)) { 475 return false; 476 } 477 size_t oldFiltersLength = fFilters.length(); 478 size_t oldFilterValuesLength = fFilterValues.length(); 479 int oldMaxFilter = fMaxFilter; 480 for (int32_t destI = 0; destI < aDstSize; destI++) { 481 // This is the pixel in the source directly under the pixel in the dest. 482 // Note that we base computations on the "center" of the pixels. To see 483 // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x 484 // downscale should "cover" the pixels around the pixel with *its center* 485 // at coordinates (2.5, 2.5) in the source, not those around (0, 0). 486 // Hence we need to scale coordinates (0.5, 0.5), not (0, 0). 487 float srcPixel = (static_cast<float>(destI) + 0.5f) * invScale; 488 489 // Compute the (inclusive) range of source pixels the filter covers. 490 float srcBegin = std::max(0.0f, floorf(srcPixel - srcSupport)); 491 float srcEnd = std::min(aSrcSize - 1.0f, ceilf(srcPixel + srcSupport)); 492 493 // Compute the unnormalized filter value at each location of the source 494 // it covers. 495 496 // Sum of the filter values for normalizing. 497 // Distance from the center of the filter, this is the filter coordinate 498 // in source space. We also need to consider the center of the pixel 499 // when comparing distance against 'srcPixel'. In the 5x downscale 500 // example used above the distance from the center of the filter to 501 // the pixel with coordinates (2, 2) should be 0, because its center 502 // is at (2.5, 2.5). 503 int32_t filterCount = int32_t(srcEnd - srcBegin) + 1; 504 if (filterCount <= 0 || !filterValues.resize(filterCount) || 505 !fixedFilterValues.resize(filterCount)) { 506 return false; 507 } 508 509 float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale; 510 float filterSum = 0.0f; 511 for (int32_t index = 0; index < filterCount; index++) { 512 float filterValue = aBitmapFilter.evaluate(destFilterDist); 513 filterValues[index] = filterValue; 514 filterSum += filterValue; 515 destFilterDist += clampedScale; 516 } 517 518 // The filter must be normalized so that we don't affect the brightness of 519 // the image. Convert to normalized fixed point. 520 ConvolutionFixed fixedSum = 0; 521 float invFilterSum = 1.0f / filterSum; 522 for (int32_t fixedI = 0; fixedI < filterCount; fixedI++) { 523 ConvolutionFixed curFixed = ToFixed(filterValues[fixedI] * invFilterSum); 524 fixedSum += curFixed; 525 fixedFilterValues[fixedI] = curFixed; 526 } 527 528 // The conversion to fixed point will leave some rounding errors, which 529 // we add back in to avoid affecting the brightness of the image. We 530 // arbitrarily add this to the center of the filter array (this won't always 531 // be the center of the filter function since it could get clipped on the 532 // edges, but it doesn't matter enough to worry about that case). 533 ConvolutionFixed leftovers = ToFixed(1) - fixedSum; 534 fixedFilterValues[filterCount / 2] += leftovers; 535 536 if (!AddFilter(int32_t(srcBegin), fixedFilterValues.begin(), filterCount)) { 537 fFilters.shrinkTo(oldFiltersLength); 538 fFilterValues.shrinkTo(oldFilterValuesLength); 539 fMaxFilter = oldMaxFilter; 540 return false; 541 } 542 } 543 544 return maxFilter() > 0 && numValues() == aDstSize; 545 } 546 547 // Does a two-dimensional convolution on the given source image. 548 // 549 // It is assumed the source pixel offsets referenced in the input filters 550 // reference only valid pixels, so the source image size is not required. Each 551 // row of the source image starts |sourceByteRowStride| after the previous 552 // one (this allows you to have rows with some padding at the end). 553 // 554 // The result will be put into the given output buffer. The destination image 555 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be 556 // in rows of exactly xfilter.numValues() * 4 bytes. 557 // 558 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on 559 // the alpha channel if the image is opaque. If you don't know, set this to 560 // true and it will work properly, but setting this to false will be a few 561 // percent faster if you know the image is opaque. 562 // 563 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order 564 // (this is ARGB when loaded into 32-bit words on a little-endian machine). 565 /** 566 * Returns false if it was unable to perform the convolution/rescale. in which 567 * case the output buffer is assumed to be undefined. 568 */ 569 bool BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride, 570 SurfaceFormat format, const SkConvolutionFilter1D& filterX, 571 const SkConvolutionFilter1D& filterY, 572 int outputByteRowStride, unsigned char* output) { 573 int maxYFilterSize = filterY.maxFilter(); 574 575 // The next row in the input that we will generate a horizontally 576 // convolved row for. If the filter doesn't start at the beginning of the 577 // image (this is the case when we are only resizing a subset), then we 578 // don't want to generate any output rows before that. Compute the starting 579 // row for convolution as the first pixel for the first vertical filter. 580 int filterOffset = 0, filterLength = 0; 581 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 582 filterY.FilterForValue(0, &filterOffset, &filterLength); 583 int nextXRow = filterOffset; 584 585 // We loop over each row in the input doing a horizontal convolution. This 586 // will result in a horizontally convolved image. We write the results into 587 // a circular buffer of convolved rows and do vertical convolution as rows 588 // are available. This prevents us from having to store the entire 589 // intermediate image and helps cache coherency. 590 // We will need four extra rows to allow horizontal convolution could be done 591 // simultaneously. We also pad each row in row buffer to be aligned-up to 592 // 32 bytes. 593 // TODO(jiesun): We do not use aligned load from row buffer in vertical 594 // convolution pass yet. Somehow Windows does not like it. 595 int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F; 596 int rowBufferHeight = maxYFilterSize; 597 598 // check for too-big allocation requests : crbug.com/528628 599 { 600 int64_t size = int64_t(rowBufferWidth) * int64_t(rowBufferHeight); 601 // need some limit, to avoid over-committing success from malloc, but then 602 // crashing when we try to actually use the memory. 603 // 100meg seems big enough to allow "normal" zoom factors and image sizes 604 // through while avoiding the crash seen by the bug (crbug.com/528628) 605 if (size > 100 * 1024 * 1024) { 606 // printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too 607 // big\n", size); 608 return false; 609 } 610 } 611 612 CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset); 613 if (!rowBuffer.AllocBuffer()) { 614 return false; 615 } 616 617 // Loop over every possible output row, processing just enough horizontal 618 // convolutions to run each subsequent vertical convolution. 619 MOZ_ASSERT(outputByteRowStride >= 620 filterX.numValues() * BytesPerPixel(format)); 621 int numOutputRows = filterY.numValues(); 622 623 // We need to check which is the last line to convolve before we advance 4 624 // lines in one iteration. 625 int lastFilterOffset, lastFilterLength; 626 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 627 &lastFilterLength); 628 629 for (int outY = 0; outY < numOutputRows; outY++) { 630 filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength); 631 632 // Generate output rows until we have enough to run the current filter. 633 while (nextXRow < filterOffset + filterLength) { 634 convolve_horizontally( 635 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX, 636 rowBuffer.advanceRow(), format); 637 nextXRow++; 638 } 639 640 // Compute where in the output image this row of final data will go. 641 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; 642 643 // Get the list of rows that the circular buffer has, in order. 644 int firstRowInCircularBuffer; 645 unsigned char* const* rowsToConvolve = 646 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 647 648 // Now compute the start of the subset of those rows that the filter needs. 649 unsigned char* const* firstRowForFilter = 650 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 651 652 convolve_vertically(filterValues, filterLength, firstRowForFilter, 653 filterX.numValues(), curOutputRow, format); 654 } 655 return true; 656 } 657 658 } // namespace skia