tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SkConvolver.cpp (26681B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 // Copyright (c) 2011-2016 Google Inc.
      4 // Use of this source code is governed by a BSD-style license that can be
      5 // found in the gfx/skia/LICENSE file.
      6 
      7 #include "SkConvolver.h"
      8 
      9 #ifdef USE_SSE2
     10 #  include "mozilla/SSE.h"
     11 #endif
     12 
     13 #ifdef USE_NEON
     14 #  include "mozilla/arm.h"
     15 #endif
     16 
     17 namespace skia {
     18 
     19 using mozilla::gfx::BytesPerPixel;
     20 using mozilla::gfx::IsOpaque;
     21 using mozilla::gfx::SurfaceFormat;
     22 
     23 // Converts the argument to an 8-bit unsigned value by clamping to the range
     24 // 0-255.
     25 static inline unsigned char ClampTo8(int a) {
     26  if (static_cast<unsigned>(a) < 256) {
     27    return a;  // Avoid the extra check in the common case.
     28  }
     29  if (a < 0) {
     30    return 0;
     31  }
     32  return 255;
     33 }
     34 
     35 // Convolves horizontally along a single row. The row data is given in
     36 // |srcData| and continues for the numValues() of the filter.
     37 template <bool hasAlpha>
     38 void ConvolveHorizontally(const unsigned char* srcData,
     39                          const SkConvolutionFilter1D& filter,
     40                          unsigned char* outRow) {
     41  // Loop over each pixel on this row in the output image.
     42  int numValues = filter.numValues();
     43  for (int outX = 0; outX < numValues; outX++) {
     44    // Get the filter that determines the current output pixel.
     45    int filterOffset, filterLength;
     46    const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
     47        filter.FilterForValue(outX, &filterOffset, &filterLength);
     48 
     49    // Compute the first pixel in this row that the filter affects. It will
     50    // touch |filterLength| pixels (4 bytes each) after this.
     51    const unsigned char* rowToFilter = &srcData[filterOffset * 4];
     52 
     53    // Apply the filter to the row to get the destination pixel in |accum|.
     54    int accum[4] = {0};
     55    for (int filterX = 0; filterX < filterLength; filterX++) {
     56      SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
     57      accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
     58      accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
     59      accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
     60      if (hasAlpha) {
     61        accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
     62      }
     63    }
     64 
     65    // Bring this value back in range. All of the filter scaling factors
     66    // are in fixed point with kShiftBits bits of fractional part.
     67    accum[0] >>= SkConvolutionFilter1D::kShiftBits;
     68    accum[1] >>= SkConvolutionFilter1D::kShiftBits;
     69    accum[2] >>= SkConvolutionFilter1D::kShiftBits;
     70 
     71    if (hasAlpha) {
     72      accum[3] >>= SkConvolutionFilter1D::kShiftBits;
     73    }
     74 
     75    // Store the new pixel.
     76    outRow[outX * 4 + 0] = ClampTo8(accum[0]);
     77    outRow[outX * 4 + 1] = ClampTo8(accum[1]);
     78    outRow[outX * 4 + 2] = ClampTo8(accum[2]);
     79    if (hasAlpha) {
     80      outRow[outX * 4 + 3] = ClampTo8(accum[3]);
     81    }
     82  }
     83 }
     84 
     85 // Does vertical convolution to produce one output row. The filter values and
     86 // length are given in the first two parameters. These are applied to each
     87 // of the rows pointed to in the |sourceDataRows| array, with each row
     88 // being |pixelWidth| wide.
     89 //
     90 // The output must have room for |pixelWidth * 4| bytes.
     91 template <bool hasAlpha>
     92 void ConvolveVertically(
     93    const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
     94    int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
     95    unsigned char* outRow) {
     96  // We go through each column in the output and do a vertical convolution,
     97  // generating one output pixel each time.
     98  for (int outX = 0; outX < pixelWidth; outX++) {
     99    // Compute the number of bytes over in each row that the current column
    100    // we're convolving starts at. The pixel will cover the next 4 bytes.
    101    int byteOffset = outX * 4;
    102 
    103    // Apply the filter to one column of pixels.
    104    int accum[4] = {0};
    105    for (int filterY = 0; filterY < filterLength; filterY++) {
    106      SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
    107      accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
    108      accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
    109      accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
    110      if (hasAlpha) {
    111        accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
    112      }
    113    }
    114 
    115    // Bring this value back in range. All of the filter scaling factors
    116    // are in fixed point with kShiftBits bits of precision.
    117    accum[0] >>= SkConvolutionFilter1D::kShiftBits;
    118    accum[1] >>= SkConvolutionFilter1D::kShiftBits;
    119    accum[2] >>= SkConvolutionFilter1D::kShiftBits;
    120    if (hasAlpha) {
    121      accum[3] >>= SkConvolutionFilter1D::kShiftBits;
    122    }
    123 
    124    // Store the new pixel.
    125    outRow[byteOffset + 0] = ClampTo8(accum[0]);
    126    outRow[byteOffset + 1] = ClampTo8(accum[1]);
    127    outRow[byteOffset + 2] = ClampTo8(accum[2]);
    128 
    129    if (hasAlpha) {
    130      unsigned char alpha = ClampTo8(accum[3]);
    131 
    132      // Make sure the alpha channel doesn't come out smaller than any of the
    133      // color channels. We use premultipled alpha channels, so this should
    134      // never happen, but rounding errors will cause this from time to time.
    135      // These "impossible" colors will cause overflows (and hence random pixel
    136      // values) when the resulting bitmap is drawn to the screen.
    137      //
    138      // We only need to do this when generating the final output row (here).
    139      int maxColorChannel =
    140          std::max(outRow[byteOffset + 0],
    141                   std::max(outRow[byteOffset + 1], outRow[byteOffset + 2]));
    142      if (alpha < maxColorChannel) {
    143        outRow[byteOffset + 3] = maxColorChannel;
    144      } else {
    145        outRow[byteOffset + 3] = alpha;
    146      }
    147    } else {
    148      // No alpha channel, the image is opaque.
    149      outRow[byteOffset + 3] = 0xff;
    150    }
    151  }
    152 }
    153 
    154 // Convolves horizontally along a single row. The row data is given in
    155 // |srcData| and continues for the numValues() of the filter.
    156 void ConvolveHorizontallyA8(const unsigned char* srcData,
    157                            const SkConvolutionFilter1D& filter,
    158                            unsigned char* outRow) {
    159  // Loop over each pixel on this row in the output image.
    160  int numValues = filter.numValues();
    161  for (int outX = 0; outX < numValues; outX++) {
    162    // Get the filter that determines the current output pixel.
    163    int filterOffset, filterLength;
    164    const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
    165        filter.FilterForValue(outX, &filterOffset, &filterLength);
    166 
    167    // Compute the first pixel in this row that the filter affects. It will
    168    // touch |filterLength| pixels (4 bytes each) after this.
    169    const unsigned char* rowToFilter = &srcData[filterOffset];
    170 
    171    // Apply the filter to the row to get the destination pixel in |accum|.
    172    int accum = 0;
    173    for (int filterX = 0; filterX < filterLength; filterX++) {
    174      SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
    175      accum += curFilter * rowToFilter[filterX];
    176    }
    177 
    178    // Bring this value back in range. All of the filter scaling factors
    179    // are in fixed point with kShiftBits bits of fractional part.
    180    accum >>= SkConvolutionFilter1D::kShiftBits;
    181 
    182    // Store the new pixel.
    183    outRow[outX] = ClampTo8(accum);
    184  }
    185 }
    186 
    187 // Does vertical convolution to produce one output row. The filter values and
    188 // length are given in the first two parameters. These are applied to each
    189 // of the rows pointed to in the |sourceDataRows| array, with each row
    190 // being |pixelWidth| wide.
    191 //
    192 // The output must have room for |pixelWidth| bytes.
    193 void ConvolveVerticallyA8(
    194    const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
    195    int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
    196    unsigned char* outRow) {
    197  // We go through each column in the output and do a vertical convolution,
    198  // generating one output pixel each time.
    199  for (int outX = 0; outX < pixelWidth; outX++) {
    200    // Apply the filter to one column of pixels.
    201    int accum = 0;
    202    for (int filterY = 0; filterY < filterLength; filterY++) {
    203      SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
    204      accum += curFilter * sourceDataRows[filterY][outX];
    205    }
    206 
    207    // Bring this value back in range. All of the filter scaling factors
    208    // are in fixed point with kShiftBits bits of precision.
    209    accum >>= SkConvolutionFilter1D::kShiftBits;
    210 
    211    // Store the new pixel.
    212    outRow[outX] = ClampTo8(accum);
    213  }
    214 }
    215 
    216 #ifdef USE_SSE2
    217 void convolve_vertically_avx2(const int16_t* filter, int filterLen,
    218                              uint8_t* const* srcRows, int width, uint8_t* out,
    219                              bool hasAlpha);
    220 void convolve_horizontally_sse2(const unsigned char* srcData,
    221                                const SkConvolutionFilter1D& filter,
    222                                unsigned char* outRow, bool hasAlpha);
    223 void convolve_vertically_sse2(const int16_t* filter, int filterLen,
    224                              uint8_t* const* srcRows, int width, uint8_t* out,
    225                              bool hasAlpha);
    226 #elif defined(USE_NEON)
    227 void convolve_horizontally_neon(const unsigned char* srcData,
    228                                const SkConvolutionFilter1D& filter,
    229                                unsigned char* outRow, bool hasAlpha);
    230 void convolve_vertically_neon(const int16_t* filter, int filterLen,
    231                              uint8_t* const* srcRows, int width, uint8_t* out,
    232                              bool hasAlpha);
    233 #endif
    234 
    235 void convolve_horizontally(const unsigned char* srcData,
    236                           const SkConvolutionFilter1D& filter,
    237                           unsigned char* outRow, SurfaceFormat format) {
    238  if (format == SurfaceFormat::A8) {
    239    ConvolveHorizontallyA8(srcData, filter, outRow);
    240    return;
    241  }
    242 
    243  bool hasAlpha = !IsOpaque(format);
    244 #ifdef USE_SSE2
    245  if (mozilla::supports_sse2()) {
    246    convolve_horizontally_sse2(srcData, filter, outRow, hasAlpha);
    247    return;
    248  }
    249 #elif defined(USE_NEON)
    250  if (mozilla::supports_neon()) {
    251    convolve_horizontally_neon(srcData, filter, outRow, hasAlpha);
    252    return;
    253  }
    254 #endif
    255  if (hasAlpha) {
    256    ConvolveHorizontally<true>(srcData, filter, outRow);
    257  } else {
    258    ConvolveHorizontally<false>(srcData, filter, outRow);
    259  }
    260 }
    261 
    262 void convolve_vertically(
    263    const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
    264    int filterLength, unsigned char* const* sourceDataRows, int pixelWidth,
    265    unsigned char* outRow, SurfaceFormat format) {
    266  if (format == SurfaceFormat::A8) {
    267    ConvolveVerticallyA8(filterValues, filterLength, sourceDataRows, pixelWidth,
    268                         outRow);
    269    return;
    270  }
    271 
    272  bool hasAlpha = !IsOpaque(format);
    273 #ifdef USE_SSE2
    274  if (mozilla::supports_avx2()) {
    275    convolve_vertically_avx2(filterValues, filterLength, sourceDataRows,
    276                             pixelWidth, outRow, hasAlpha);
    277    return;
    278  }
    279  if (mozilla::supports_sse2()) {
    280    convolve_vertically_sse2(filterValues, filterLength, sourceDataRows,
    281                             pixelWidth, outRow, hasAlpha);
    282    return;
    283  }
    284 #elif defined(USE_NEON)
    285  if (mozilla::supports_neon()) {
    286    convolve_vertically_neon(filterValues, filterLength, sourceDataRows,
    287                             pixelWidth, outRow, hasAlpha);
    288    return;
    289  }
    290 #endif
    291  if (hasAlpha) {
    292    ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
    293                             pixelWidth, outRow);
    294  } else {
    295    ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
    296                              pixelWidth, outRow);
    297  }
    298 }
    299 
    300 // Stores a list of rows in a circular buffer. The usage is you write into it
    301 // by calling AdvanceRow. It will keep track of which row in the buffer it
    302 // should use next, and the total number of rows added.
    303 class CircularRowBuffer {
    304 public:
    305  // The number of pixels in each row is given in |sourceRowPixelWidth|.
    306  // The maximum number of rows needed in the buffer is |maxYFilterSize|
    307  // (we only need to store enough rows for the biggest filter).
    308  //
    309  // We use the |firstInputRow| to compute the coordinates of all of the
    310  // following rows returned by Advance().
    311  CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
    312                    int firstInputRow)
    313      : fRowByteWidth(destRowPixelWidth * 4),
    314        fNumRows(maxYFilterSize),
    315        fNextRow(0),
    316        fNextRowCoordinate(firstInputRow) {}
    317 
    318  bool AllocBuffer() {
    319    return fBuffer.resize(fRowByteWidth * fNumRows) &&
    320           fRowAddresses.resize(fNumRows);
    321  }
    322 
    323  // Moves to the next row in the buffer, returning a pointer to the beginning
    324  // of it.
    325  unsigned char* advanceRow() {
    326    unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
    327    fNextRowCoordinate++;
    328 
    329    // Set the pointer to the next row to use, wrapping around if necessary.
    330    fNextRow++;
    331    if (fNextRow == fNumRows) {
    332      fNextRow = 0;
    333    }
    334    return row;
    335  }
    336 
    337  // Returns a pointer to an "unrolled" array of rows. These rows will start
    338  // at the y coordinate placed into |*firstRowIndex| and will continue in
    339  // order for the maximum number of rows in this circular buffer.
    340  //
    341  // The |firstRowIndex_| may be negative. This means the circular buffer
    342  // starts before the top of the image (it hasn't been filled yet).
    343  unsigned char* const* GetRowAddresses(int* firstRowIndex) {
    344    // Example for a 4-element circular buffer holding coords 6-9.
    345    //   Row 0   Coord 8
    346    //   Row 1   Coord 9
    347    //   Row 2   Coord 6  <- fNextRow = 2, fNextRowCoordinate = 10.
    348    //   Row 3   Coord 7
    349    //
    350    // The "next" row is also the first (lowest) coordinate. This computation
    351    // may yield a negative value, but that's OK, the math will work out
    352    // since the user of this buffer will compute the offset relative
    353    // to the firstRowIndex and the negative rows will never be used.
    354    *firstRowIndex = fNextRowCoordinate - fNumRows;
    355 
    356    int curRow = fNextRow;
    357    for (int i = 0; i < fNumRows; i++) {
    358      fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
    359 
    360      // Advance to the next row, wrapping if necessary.
    361      curRow++;
    362      if (curRow == fNumRows) {
    363        curRow = 0;
    364      }
    365    }
    366    return &fRowAddresses[0];
    367  }
    368 
    369 private:
    370  // The buffer storing the rows. They are packed, each one fRowByteWidth.
    371  mozilla::Vector<unsigned char> fBuffer;
    372 
    373  // Number of bytes per row in the |buffer|.
    374  int fRowByteWidth;
    375 
    376  // The number of rows available in the buffer.
    377  int fNumRows;
    378 
    379  // The next row index we should write into. This wraps around as the
    380  // circular buffer is used.
    381  int fNextRow;
    382 
    383  // The y coordinate of the |fNextRow|. This is incremented each time a
    384  // new row is appended and does not wrap.
    385  int fNextRowCoordinate;
    386 
    387  // Buffer used by GetRowAddresses().
    388  mozilla::Vector<unsigned char*> fRowAddresses;
    389 };
    390 
    391 SkConvolutionFilter1D::SkConvolutionFilter1D() : fMaxFilter(0) {}
    392 
    393 SkConvolutionFilter1D::~SkConvolutionFilter1D() = default;
    394 
    395 bool SkConvolutionFilter1D::AddFilter(int filterOffset,
    396                                      const ConvolutionFixed* filterValues,
    397                                      int filterLength) {
    398  // It is common for leading/trailing filter values to be zeros. In such
    399  // cases it is beneficial to only store the central factors.
    400  // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
    401  // a 1080p image this optimization gives a ~10% speed improvement.
    402  int filterSize = filterLength;
    403  int firstNonZero = 0;
    404  while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
    405    firstNonZero++;
    406  }
    407 
    408  if (firstNonZero < filterLength) {
    409    // Here we have at least one non-zero factor.
    410    int lastNonZero = filterLength - 1;
    411    while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
    412      lastNonZero--;
    413    }
    414 
    415    filterOffset += firstNonZero;
    416    filterLength = lastNonZero + 1 - firstNonZero;
    417    MOZ_ASSERT(filterLength > 0);
    418 
    419    if (!fFilterValues.append(&filterValues[firstNonZero], filterLength)) {
    420      return false;
    421    }
    422  } else {
    423    // Here all the factors were zeroes.
    424    filterLength = 0;
    425  }
    426 
    427  FilterInstance instance = {
    428      // We pushed filterLength elements onto fFilterValues
    429      int(fFilterValues.length()) - filterLength, filterOffset, filterLength,
    430      filterSize};
    431  if (!fFilters.append(instance)) {
    432    if (filterLength > 0) {
    433      fFilterValues.shrinkBy(filterLength);
    434    }
    435    return false;
    436  }
    437 
    438  fMaxFilter = std::max(fMaxFilter, filterLength);
    439  return true;
    440 }
    441 
    442 bool SkConvolutionFilter1D::ComputeFilterValues(
    443    const SkBitmapFilter& aBitmapFilter, int32_t aSrcSize, int32_t aDstSize) {
    444  // When we're doing a magnification, the scale will be larger than one. This
    445  // means the destination pixels are much smaller than the source pixels, and
    446  // that the range covered by the filter won't necessarily cover any source
    447  // pixel boundaries. Therefore, we use these clamped values (max of 1) for
    448  // some computations.
    449  float scale = float(aDstSize) / float(aSrcSize);
    450  float clampedScale = std::min(1.0f, scale);
    451  // This is how many source pixels from the center we need to count
    452  // to support the filtering function.
    453  float srcSupport = aBitmapFilter.width() / clampedScale;
    454  float invScale = 1.0f / scale;
    455 
    456  mozilla::Vector<float, 64> filterValues;
    457  mozilla::Vector<ConvolutionFixed, 64> fixedFilterValues;
    458 
    459  // Loop over all pixels in the output range. We will generate one set of
    460  // filter values for each one. Those values will tell us how to blend the
    461  // source pixels to compute the destination pixel.
    462 
    463  // This value is computed based on how SkTDArray::resizeStorageToAtLeast works
    464  // in order to ensure that it does not overflow or assert. That functions
    465  // computes
    466  //   n+4 + (n+4)/4
    467  // and we want to to fit in a 32 bit signed int. Equating that to 2^31-1 and
    468  // solving n gives n = (2^31-6)*4/5 = 1717986913.6
    469  const int32_t maxToPassToReserveAdditional = 1717986913;
    470 
    471  int32_t filterValueCount = int32_t(ceilf(aDstSize * srcSupport * 2));
    472  if (aDstSize > maxToPassToReserveAdditional || filterValueCount < 0 ||
    473      filterValueCount > maxToPassToReserveAdditional ||
    474      !reserveAdditional(aDstSize, filterValueCount)) {
    475    return false;
    476  }
    477  size_t oldFiltersLength = fFilters.length();
    478  size_t oldFilterValuesLength = fFilterValues.length();
    479  int oldMaxFilter = fMaxFilter;
    480  for (int32_t destI = 0; destI < aDstSize; destI++) {
    481    // This is the pixel in the source directly under the pixel in the dest.
    482    // Note that we base computations on the "center" of the pixels. To see
    483    // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
    484    // downscale should "cover" the pixels around the pixel with *its center*
    485    // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
    486    // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
    487    float srcPixel = (static_cast<float>(destI) + 0.5f) * invScale;
    488 
    489    // Compute the (inclusive) range of source pixels the filter covers.
    490    float srcBegin = std::max(0.0f, floorf(srcPixel - srcSupport));
    491    float srcEnd = std::min(aSrcSize - 1.0f, ceilf(srcPixel + srcSupport));
    492 
    493    // Compute the unnormalized filter value at each location of the source
    494    // it covers.
    495 
    496    // Sum of the filter values for normalizing.
    497    // Distance from the center of the filter, this is the filter coordinate
    498    // in source space. We also need to consider the center of the pixel
    499    // when comparing distance against 'srcPixel'. In the 5x downscale
    500    // example used above the distance from the center of the filter to
    501    // the pixel with coordinates (2, 2) should be 0, because its center
    502    // is at (2.5, 2.5).
    503    int32_t filterCount = int32_t(srcEnd - srcBegin) + 1;
    504    if (filterCount <= 0 || !filterValues.resize(filterCount) ||
    505        !fixedFilterValues.resize(filterCount)) {
    506      return false;
    507    }
    508 
    509    float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
    510    float filterSum = 0.0f;
    511    for (int32_t index = 0; index < filterCount; index++) {
    512      float filterValue = aBitmapFilter.evaluate(destFilterDist);
    513      filterValues[index] = filterValue;
    514      filterSum += filterValue;
    515      destFilterDist += clampedScale;
    516    }
    517 
    518    // The filter must be normalized so that we don't affect the brightness of
    519    // the image. Convert to normalized fixed point.
    520    ConvolutionFixed fixedSum = 0;
    521    float invFilterSum = 1.0f / filterSum;
    522    for (int32_t fixedI = 0; fixedI < filterCount; fixedI++) {
    523      ConvolutionFixed curFixed = ToFixed(filterValues[fixedI] * invFilterSum);
    524      fixedSum += curFixed;
    525      fixedFilterValues[fixedI] = curFixed;
    526    }
    527 
    528    // The conversion to fixed point will leave some rounding errors, which
    529    // we add back in to avoid affecting the brightness of the image. We
    530    // arbitrarily add this to the center of the filter array (this won't always
    531    // be the center of the filter function since it could get clipped on the
    532    // edges, but it doesn't matter enough to worry about that case).
    533    ConvolutionFixed leftovers = ToFixed(1) - fixedSum;
    534    fixedFilterValues[filterCount / 2] += leftovers;
    535 
    536    if (!AddFilter(int32_t(srcBegin), fixedFilterValues.begin(), filterCount)) {
    537      fFilters.shrinkTo(oldFiltersLength);
    538      fFilterValues.shrinkTo(oldFilterValuesLength);
    539      fMaxFilter = oldMaxFilter;
    540      return false;
    541    }
    542  }
    543 
    544  return maxFilter() > 0 && numValues() == aDstSize;
    545 }
    546 
    547 // Does a two-dimensional convolution on the given source image.
    548 //
    549 // It is assumed the source pixel offsets referenced in the input filters
    550 // reference only valid pixels, so the source image size is not required. Each
    551 // row of the source image starts |sourceByteRowStride| after the previous
    552 // one (this allows you to have rows with some padding at the end).
    553 //
    554 // The result will be put into the given output buffer. The destination image
    555 // size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
    556 // in rows of exactly xfilter.numValues() * 4 bytes.
    557 //
    558 // |sourceHasAlpha| is a hint that allows us to avoid doing computations on
    559 // the alpha channel if the image is opaque. If you don't know, set this to
    560 // true and it will work properly, but setting this to false will be a few
    561 // percent faster if you know the image is opaque.
    562 //
    563 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
    564 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
    565 /**
    566 *  Returns false if it was unable to perform the convolution/rescale. in which
    567 * case the output buffer is assumed to be undefined.
    568 */
    569 bool BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride,
    570                    SurfaceFormat format, const SkConvolutionFilter1D& filterX,
    571                    const SkConvolutionFilter1D& filterY,
    572                    int outputByteRowStride, unsigned char* output) {
    573  int maxYFilterSize = filterY.maxFilter();
    574 
    575  // The next row in the input that we will generate a horizontally
    576  // convolved row for. If the filter doesn't start at the beginning of the
    577  // image (this is the case when we are only resizing a subset), then we
    578  // don't want to generate any output rows before that. Compute the starting
    579  // row for convolution as the first pixel for the first vertical filter.
    580  int filterOffset = 0, filterLength = 0;
    581  const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
    582      filterY.FilterForValue(0, &filterOffset, &filterLength);
    583  int nextXRow = filterOffset;
    584 
    585  // We loop over each row in the input doing a horizontal convolution. This
    586  // will result in a horizontally convolved image. We write the results into
    587  // a circular buffer of convolved rows and do vertical convolution as rows
    588  // are available. This prevents us from having to store the entire
    589  // intermediate image and helps cache coherency.
    590  // We will need four extra rows to allow horizontal convolution could be done
    591  // simultaneously. We also pad each row in row buffer to be aligned-up to
    592  // 32 bytes.
    593  // TODO(jiesun): We do not use aligned load from row buffer in vertical
    594  // convolution pass yet. Somehow Windows does not like it.
    595  int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
    596  int rowBufferHeight = maxYFilterSize;
    597 
    598  // check for too-big allocation requests : crbug.com/528628
    599  {
    600    int64_t size = int64_t(rowBufferWidth) * int64_t(rowBufferHeight);
    601    // need some limit, to avoid over-committing success from malloc, but then
    602    // crashing when we try to actually use the memory.
    603    // 100meg seems big enough to allow "normal" zoom factors and image sizes
    604    // through while avoiding the crash seen by the bug (crbug.com/528628)
    605    if (size > 100 * 1024 * 1024) {
    606      //            printf_stderr("BGRAConvolve2D: tmp allocation [%lld] too
    607      //            big\n", size);
    608      return false;
    609    }
    610  }
    611 
    612  CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset);
    613  if (!rowBuffer.AllocBuffer()) {
    614    return false;
    615  }
    616 
    617  // Loop over every possible output row, processing just enough horizontal
    618  // convolutions to run each subsequent vertical convolution.
    619  MOZ_ASSERT(outputByteRowStride >=
    620             filterX.numValues() * BytesPerPixel(format));
    621  int numOutputRows = filterY.numValues();
    622 
    623  // We need to check which is the last line to convolve before we advance 4
    624  // lines in one iteration.
    625  int lastFilterOffset, lastFilterLength;
    626  filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
    627                         &lastFilterLength);
    628 
    629  for (int outY = 0; outY < numOutputRows; outY++) {
    630    filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength);
    631 
    632    // Generate output rows until we have enough to run the current filter.
    633    while (nextXRow < filterOffset + filterLength) {
    634      convolve_horizontally(
    635          &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX,
    636          rowBuffer.advanceRow(), format);
    637      nextXRow++;
    638    }
    639 
    640    // Compute where in the output image this row of final data will go.
    641    unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
    642 
    643    // Get the list of rows that the circular buffer has, in order.
    644    int firstRowInCircularBuffer;
    645    unsigned char* const* rowsToConvolve =
    646        rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
    647 
    648    // Now compute the start of the subset of those rows that the filter needs.
    649    unsigned char* const* firstRowForFilter =
    650        &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
    651 
    652    convolve_vertically(filterValues, filterLength, firstRowForFilter,
    653                        filterX.numValues(), curOutputRow, format);
    654  }
    655  return true;
    656 }
    657 
    658 }  // namespace skia