Bug 486918. Part 1: Import Chromium's higher-quality image scalers, since we know those to be good and shippable. r=jrmuizel

2024-11-24 21:31:04 +00:00 · 2012-08-23 15:36:04 -04:00 · 2012-08-23 15:36:04 -04:00 · b4317922d2
commit b4317922d2
parent 1827b6489f
8 changed files with 1800 additions and 0 deletions
--- a/gfx/2d/HelpersSkia.h
+++ b/gfx/2d/HelpersSkia.h
@ -10,6 +10,7 @@
 #include "skia/SkCanvas.h"
 #include "skia/SkDashPathEffect.h"
 #include "mozilla/Assertions.h"
 #include <vector>
 namespace mozilla {
 namespace gfx {
--- a/gfx/2d/Makefile.in
+++ b/gfx/2d/Makefile.in
@ -29,6 +29,7 @@ EXPORTS_mozilla/gfx	= \
        Point.h \
        Matrix.h \
        Rect.h \
        Scale.h \
        Types.h \
        Tools.h \
        UserData.h \
@ -46,6 +47,7 @@ CPPSRCS	= \
        RecordedEvent.cpp \
        DrawEventRecorder.cpp \
        Blur.cpp \
        Scale.cpp \
        ScaledFontBase.cpp \
        DrawTargetDual.cpp \
        ImageScaling.cpp \
@ -76,6 +78,8 @@ CPPSRCS	+= \
        SourceSurfaceSkia.cpp \
        DrawTargetSkia.cpp \
        PathSkia.cpp \
        convolver.cpp \
        image_operations.cpp \
        $(NULL)
 DEFINES += -DUSE_SKIA
@ -135,6 +139,12 @@ endif
 endif
 include $(topsrcdir)/config/rules.mk
 include $(topsrcdir)/ipc/chromium/chromium-config.mk
 # Due to bug 796023, we can't have -DUNICODE and -D_UNICODE; defining those
 # macros changes the type of LOGFONT to LOGFONTW instead of LOGFONTA. This
 # changes the symbol names of exported C++ functions that use LOGFONT.
 DEFINES := $(filter-out -DUNICODE -D_UNICODE,$(DEFINES))
 #ifeq ($(MOZ_WIDGET_TOOLKIT),cocoa)
 #CPPSRCS	+= \
--- a/gfx/2d/Scale.cpp
+++ b/gfx/2d/Scale.cpp
@ -0,0 +1,54 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "Scale.h"
 #ifdef USE_SKIA
 #include "HelpersSkia.h"
 #include "skia/SkBitmap.h"
 #include "image_operations.h"
 #endif
 namespace mozilla {
 namespace gfx {
 bool Scale(uint8_t* srcData, int32_t srcWidth, int32_t srcHeight, int32_t srcStride,
           uint8_t* dstData, int32_t dstWidth, int32_t dstHeight, int32_t dstStride,
           SurfaceFormat format)
 {
 #ifdef USE_SKIA
  bool opaque;
  if (format == FORMAT_B8G8R8A8) {
    opaque = false;
  } else {
    opaque = true;
  }
  SkBitmap::Config config = GfxFormatToSkiaConfig(format);
  SkBitmap imgSrc;
  imgSrc.setConfig(config, srcWidth, srcHeight, srcStride);
  imgSrc.setPixels(srcData);
  imgSrc.setIsOpaque(opaque);
  // Rescaler is compatible with 32 bpp only. Convert to RGB32 if needed.
  if (config != SkBitmap::kARGB_8888_Config) {
    imgSrc.copyTo(&imgSrc, SkBitmap::kARGB_8888_Config);
  }
  // This returns an SkBitmap backed by dstData; since it also wrote to dstData,
  // we don't need to look at that SkBitmap.
  SkBitmap result = skia::ImageOperations::Resize(imgSrc,
                                                  skia::ImageOperations::RESIZE_BEST,
                                                  dstWidth, dstHeight,
                                                  dstData);
  return result.readyToDraw();
 #else
  return false;
 #endif
 }
 }
 }
--- a/gfx/2d/Scale.h
+++ b/gfx/2d/Scale.h
@ -0,0 +1,36 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #ifndef MOZILLA_GFX_SCALE_H_
 #define MOZILLA_GFX_SCALE_H_
 #include "Types.h"
 namespace mozilla {
 namespace gfx {
 /**
 * Scale an image using a high-quality filter.
 *
 * Synchronously scales an image and writes the output to the destination in
 * 32-bit format. The destination must be pre-allocated by the caller.
 *
 * Returns true if scaling was successful, and false otherwise. Currently, this
 * function is implemented using Skia. If Skia is not enabled when building,
 * calling this function will always return false.
 *
 * IMPLEMTATION NOTES:
 * This API is not currently easily hardware acceleratable. A better API might
 * take a SourceSurface and return a SourceSurface; the Direct2D backend, for
 * example, could simply set a status bit on a copy of the image, and use
 * Direct2D's high-quality scaler at draw time.
 */
 GFX2D_API bool Scale(uint8_t* srcData, int32_t srcWidth, int32_t srcHeight, int32_t srcStride,
                     uint8_t* dstData, int32_t dstWidth, int32_t dstHeight, int32_t dstStride,
                     SurfaceFormat format);
 }
 }
 #endif /* MOZILLA_GFX_BLUR_H_ */
--- a/gfx/2d/convolver.cpp
+++ b/gfx/2d/convolver.cpp
@ -0,0 +1,864 @@
 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 #include "convolver.h"
 #include <algorithm>
 #include "nsAlgorithm.h"
 #include "skia/SkTypes.h"
 // note: SIMD_SSE2 is not enabled because of bugs, apparently
 #if defined(SIMD_SSE2)
 #include <emmintrin.h>  // ARCH_CPU_X86_FAMILY was defined in build/config.h
 #endif
 namespace skia {
 namespace {
 // Converts the argument to an 8-bit unsigned value by clamping to the range
 // 0-255.
 inline unsigned char ClampTo8(int a) {
  if (static_cast<unsigned>(a) < 256)
    return a;  // Avoid the extra check in the common case.
  if (a < 0)
    return 0;
  return 255;
 }
 // Stores a list of rows in a circular buffer. The usage is you write into it
 // by calling AdvanceRow. It will keep track of which row in the buffer it
 // should use next, and the total number of rows added.
 class CircularRowBuffer {
 public:
  // The number of pixels in each row is given in |source_row_pixel_width|.
  // The maximum number of rows needed in the buffer is |max_y_filter_size|
  // (we only need to store enough rows for the biggest filter).
  //
  // We use the |first_input_row| to compute the coordinates of all of the
  // following rows returned by Advance().
  CircularRowBuffer(int dest_row_pixel_width, int max_y_filter_size,
                    int first_input_row)
      : row_byte_width_(dest_row_pixel_width * 4),
        num_rows_(max_y_filter_size),
        next_row_(0),
        next_row_coordinate_(first_input_row) {
    buffer_.resize(row_byte_width_ * max_y_filter_size);
    row_addresses_.resize(num_rows_);
  }
  // Moves to the next row in the buffer, returning a pointer to the beginning
  // of it.
  unsigned char* AdvanceRow() {
    unsigned char* row = &buffer_[next_row_ * row_byte_width_];
    next_row_coordinate_++;
    // Set the pointer to the next row to use, wrapping around if necessary.
    next_row_++;
    if (next_row_ == num_rows_)
      next_row_ = 0;
    return row;
  }
  // Returns a pointer to an "unrolled" array of rows. These rows will start
  // at the y coordinate placed into |*first_row_index| and will continue in
  // order for the maximum number of rows in this circular buffer.
  //
  // The |first_row_index_| may be negative. This means the circular buffer
  // starts before the top of the image (it hasn't been filled yet).
  unsigned char* const* GetRowAddresses(int* first_row_index) {
    // Example for a 4-element circular buffer holding coords 6-9.
    //   Row 0   Coord 8
    //   Row 1   Coord 9
    //   Row 2   Coord 6  <- next_row_ = 2, next_row_coordinate_ = 10.
    //   Row 3   Coord 7
    //
    // The "next" row is also the first (lowest) coordinate. This computation
    // may yield a negative value, but that's OK, the math will work out
    // since the user of this buffer will compute the offset relative
    // to the first_row_index and the negative rows will never be used.
    *first_row_index = next_row_coordinate_ - num_rows_;
    int cur_row = next_row_;
    for (int i = 0; i < num_rows_; i++) {
      row_addresses_[i] = &buffer_[cur_row * row_byte_width_];
      // Advance to the next row, wrapping if necessary.
      cur_row++;
      if (cur_row == num_rows_)
        cur_row = 0;
    }
    return &row_addresses_[0];
  }
 private:
  // The buffer storing the rows. They are packed, each one row_byte_width_.
  std::vector<unsigned char> buffer_;
  // Number of bytes per row in the |buffer_|.
  int row_byte_width_;
  // The number of rows available in the buffer.
  int num_rows_;
  // The next row index we should write into. This wraps around as the
  // circular buffer is used.
  int next_row_;
  // The y coordinate of the |next_row_|. This is incremented each time a
  // new row is appended and does not wrap.
  int next_row_coordinate_;
  // Buffer used by GetRowAddresses().
  std::vector<unsigned char*> row_addresses_;
 };
 // Convolves horizontally along a single row. The row data is given in
 // |src_data| and continues for the num_values() of the filter.
 template<bool has_alpha>
 void ConvolveHorizontally(const unsigned char* src_data,
                          const ConvolutionFilter1D& filter,
                          unsigned char* out_row) {
  // Loop over each pixel on this row in the output image.
  int num_values = filter.num_values();
  for (int out_x = 0; out_x < num_values; out_x++) {
    // Get the filter that determines the current output pixel.
    int filter_offset, filter_length;
    const ConvolutionFilter1D::Fixed* filter_values =
        filter.FilterForValue(out_x, &filter_offset, &filter_length);
    // Compute the first pixel in this row that the filter affects. It will
    // touch |filter_length| pixels (4 bytes each) after this.
    const unsigned char* row_to_filter = &src_data[filter_offset * 4];
    // Apply the filter to the row to get the destination pixel in |accum|.
    int accum[4] = {0};
    for (int filter_x = 0; filter_x < filter_length; filter_x++) {
      ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_x];
      accum[0] += cur_filter * row_to_filter[filter_x * 4 + 0];
      accum[1] += cur_filter * row_to_filter[filter_x * 4 + 1];
      accum[2] += cur_filter * row_to_filter[filter_x * 4 + 2];
      if (has_alpha)
        accum[3] += cur_filter * row_to_filter[filter_x * 4 + 3];
    }
    // Bring this value back in range. All of the filter scaling factors
    // are in fixed point with kShiftBits bits of fractional part.
    accum[0] >>= ConvolutionFilter1D::kShiftBits;
    accum[1] >>= ConvolutionFilter1D::kShiftBits;
    accum[2] >>= ConvolutionFilter1D::kShiftBits;
    if (has_alpha)
      accum[3] >>= ConvolutionFilter1D::kShiftBits;
    // Store the new pixel.
    out_row[out_x * 4 + 0] = ClampTo8(accum[0]);
    out_row[out_x * 4 + 1] = ClampTo8(accum[1]);
    out_row[out_x * 4 + 2] = ClampTo8(accum[2]);
    if (has_alpha)
      out_row[out_x * 4 + 3] = ClampTo8(accum[3]);
  }
 }
 // Does vertical convolution to produce one output row. The filter values and
 // length are given in the first two parameters. These are applied to each
 // of the rows pointed to in the |source_data_rows| array, with each row
 // being |pixel_width| wide.
 //
 // The output must have room for |pixel_width * 4| bytes.
 template<bool has_alpha>
 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,
                        int filter_length,
                        unsigned char* const* source_data_rows,
                        int pixel_width,
                        unsigned char* out_row) {
  // We go through each column in the output and do a vertical convolution,
  // generating one output pixel each time.
  for (int out_x = 0; out_x < pixel_width; out_x++) {
    // Compute the number of bytes over in each row that the current column
    // we're convolving starts at. The pixel will cover the next 4 bytes.
    int byte_offset = out_x * 4;
    // Apply the filter to one column of pixels.
    int accum[4] = {0};
    for (int filter_y = 0; filter_y < filter_length; filter_y++) {
      ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_y];
      accum[0] += cur_filter * source_data_rows[filter_y][byte_offset + 0];
      accum[1] += cur_filter * source_data_rows[filter_y][byte_offset + 1];
      accum[2] += cur_filter * source_data_rows[filter_y][byte_offset + 2];
      if (has_alpha)
        accum[3] += cur_filter * source_data_rows[filter_y][byte_offset + 3];
    }
    // Bring this value back in range. All of the filter scaling factors
    // are in fixed point with kShiftBits bits of precision.
    accum[0] >>= ConvolutionFilter1D::kShiftBits;
    accum[1] >>= ConvolutionFilter1D::kShiftBits;
    accum[2] >>= ConvolutionFilter1D::kShiftBits;
    if (has_alpha)
      accum[3] >>= ConvolutionFilter1D::kShiftBits;
    // Store the new pixel.
    out_row[byte_offset + 0] = ClampTo8(accum[0]);
    out_row[byte_offset + 1] = ClampTo8(accum[1]);
    out_row[byte_offset + 2] = ClampTo8(accum[2]);
    if (has_alpha) {
      unsigned char alpha = ClampTo8(accum[3]);
      // Make sure the alpha channel doesn't come out smaller than any of the
      // color channels. We use premultipled alpha channels, so this should
      // never happen, but rounding errors will cause this from time to time.
      // These "impossible" colors will cause overflows (and hence random pixel
      // values) when the resulting bitmap is drawn to the screen.
      //
      // We only need to do this when generating the final output row (here).
      int max_color_channel = NS_MAX(out_row[byte_offset + 0],
          NS_MAX(out_row[byte_offset + 1], out_row[byte_offset + 2]));
      if (alpha < max_color_channel)
        out_row[byte_offset + 3] = max_color_channel;
      else
        out_row[byte_offset + 3] = alpha;
    } else {
      // No alpha channel, the image is opaque.
      out_row[byte_offset + 3] = 0xff;
    }
  }
 }
 // Convolves horizontally along a single row. The row data is given in
 // |src_data| and continues for the num_values() of the filter.
 void ConvolveHorizontally_SSE2(const unsigned char* src_data,
                               const ConvolutionFilter1D& filter,
                               unsigned char* out_row) {
 #if defined(SIMD_SSE2)
  int num_values = filter.num_values();
  int filter_offset, filter_length;
  __m128i zero = _mm_setzero_si128();
  __m128i mask[4];
  // |mask| will be used to decimate all extra filter coefficients that are
  // loaded by SIMD when |filter_length| is not divisible by 4.
  // mask[0] is not used in following algorithm.
  mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
  mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
  mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
  // Output one pixel each iteration, calculating all channels (RGBA) together.
  for (int out_x = 0; out_x < num_values; out_x++) {
    const ConvolutionFilter1D::Fixed* filter_values =
        filter.FilterForValue(out_x, &filter_offset, &filter_length);
    __m128i accum = _mm_setzero_si128();
    // Compute the first pixel in this row that the filter affects. It will
    // touch |filter_length| pixels (4 bytes each) after this.
    const __m128i* row_to_filter =
        reinterpret_cast<const __m128i*>(&src_data[filter_offset << 2]);
    // We will load and accumulate with four coefficients per iteration.
    for (int filter_x = 0; filter_x < filter_length >> 2; filter_x++) {
      // Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
      __m128i coeff, coeff16;
      // [16] xx xx xx xx c3 c2 c1 c0
      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
      // [16] xx xx xx xx c1 c1 c0 c0
      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
      // [16] c1 c1 c1 c1 c0 c0 c0 c0
      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
      // Load four pixels => unpack the first two pixels to 16 bits =>
      // multiply with coefficients => accumulate the convolution result.
      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
      __m128i src8 = _mm_loadu_si128(row_to_filter);
      // [16] a1 b1 g1 r1 a0 b0 g0 r0
      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32]  a0*c0 b0*c0 g0*c0 r0*c0
      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      // [32]  a1*c1 b1*c1 g1*c1 r1*c1
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      // Duplicate 3rd and 4th coefficients for all channels =>
      // unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
      // => accumulate the convolution results.
      // [16] xx xx xx xx c3 c3 c2 c2
      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
      // [16] c3 c3 c3 c3 c2 c2 c2 c2
      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
      // [16] a3 g3 b3 r3 a2 g2 b2 r2
      src16 = _mm_unpackhi_epi8(src8, zero);
      mul_hi = _mm_mulhi_epi16(src16, coeff16);
      mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32]  a2*c2 b2*c2 g2*c2 r2*c2
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      // [32]  a3*c3 b3*c3 g3*c3 r3*c3
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      // Advance the pixel and coefficients pointers.
      row_to_filter += 1;
      filter_values += 4;
    }
    // When |filter_length| is not divisible by 4, we need to decimate some of
    // the filter coefficient that was loaded incorrectly to zero; Other than
    // that the algorithm is same with above, exceot that the 4th pixel will be
    // always absent.
    int r = filter_length&3;
    if (r) {
      // Note: filter_values must be padded to align_up(filter_offset, 8).
      __m128i coeff, coeff16;
      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
      // Mask out extra filter taps.
      coeff = _mm_and_si128(coeff, mask[r]);
      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
      // Note: line buffer must be padded to align_up(filter_offset, 16).
      // We resolve this by use C-version for the last horizontal line.
      __m128i src8 = _mm_loadu_si128(row_to_filter);
      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
      src16 = _mm_unpackhi_epi8(src8, zero);
      coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
      coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
      mul_hi = _mm_mulhi_epi16(src16, coeff16);
      mul_lo = _mm_mullo_epi16(src16, coeff16);
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum = _mm_add_epi32(accum, t);
    }
    // Shift right for fixed point implementation.
    accum = _mm_srai_epi32(accum, ConvolutionFilter1D::kShiftBits);
    // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
    accum = _mm_packs_epi32(accum, zero);
    // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
    accum = _mm_packus_epi16(accum, zero);
    // Store the pixel value of 32 bits.
    *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum);
    out_row += 4;
  }
 #endif
 }
 // Convolves horizontally along four rows. The row data is given in
 // |src_data| and continues for the num_values() of the filter.
 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please
 // refer to that function for detailed comments.
 void ConvolveHorizontally4_SSE2(const unsigned char* src_data[4],
                                const ConvolutionFilter1D& filter,
                                unsigned char* out_row[4]) {
 #if defined(SIMD_SSE2)
  int num_values = filter.num_values();
  int filter_offset, filter_length;
  __m128i zero = _mm_setzero_si128();
  __m128i mask[4];
  // |mask| will be used to decimate all extra filter coefficients that are
  // loaded by SIMD when |filter_length| is not divisible by 4.
  // mask[0] is not used in following algorithm.
  mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
  mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
  mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
  // Output one pixel each iteration, calculating all channels (RGBA) together.
  for (int out_x = 0; out_x < num_values; out_x++) {
    const ConvolutionFilter1D::Fixed* filter_values =
        filter.FilterForValue(out_x, &filter_offset, &filter_length);
    // four pixels in a column per iteration.
    __m128i accum0 = _mm_setzero_si128();
    __m128i accum1 = _mm_setzero_si128();
    __m128i accum2 = _mm_setzero_si128();
    __m128i accum3 = _mm_setzero_si128();
    int start = (filter_offset<<2);
    // We will load and accumulate with four coefficients per iteration.
    for (int filter_x = 0; filter_x < (filter_length >> 2); filter_x++) {
      __m128i coeff, coeff16lo, coeff16hi;
      // [16] xx xx xx xx c3 c2 c1 c0
      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
      // [16] xx xx xx xx c1 c1 c0 c0
      coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
      // [16] c1 c1 c1 c1 c0 c0 c0 c0
      coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
      // [16] xx xx xx xx c3 c3 c2 c2
      coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
      // [16] c3 c3 c3 c3 c2 c2 c2 c2
      coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
      __m128i src8, src16, mul_hi, mul_lo, t;
 #define ITERATION(src, accum)                                          \
      src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));   \
      src16 = _mm_unpacklo_epi8(src8, zero);                           \
      mul_hi = _mm_mulhi_epi16(src16, coeff16lo);                      \
      mul_lo = _mm_mullo_epi16(src16, coeff16lo);                      \
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
      accum = _mm_add_epi32(accum, t);                                 \
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
      accum = _mm_add_epi32(accum, t);                                 \
      src16 = _mm_unpackhi_epi8(src8, zero);                           \
      mul_hi = _mm_mulhi_epi16(src16, coeff16hi);                      \
      mul_lo = _mm_mullo_epi16(src16, coeff16hi);                      \
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);                          \
      accum = _mm_add_epi32(accum, t);                                 \
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);                          \
      accum = _mm_add_epi32(accum, t)
      ITERATION(src_data[0] + start, accum0);
      ITERATION(src_data[1] + start, accum1);
      ITERATION(src_data[2] + start, accum2);
      ITERATION(src_data[3] + start, accum3);
      start += 16;
      filter_values += 4;
    }
    int r = filter_length & 3;
    if (r) {
      // Note: filter_values must be padded to align_up(filter_offset, 8);
      __m128i coeff;
      coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
      // Mask out extra filter taps.
      coeff = _mm_and_si128(coeff, mask[r]);
      __m128i coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
      /* c1 c1 c1 c1 c0 c0 c0 c0 */
      coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
      __m128i coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
      coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
      __m128i src8, src16, mul_hi, mul_lo, t;
      ITERATION(src_data[0] + start, accum0);
      ITERATION(src_data[1] + start, accum1);
      ITERATION(src_data[2] + start, accum2);
      ITERATION(src_data[3] + start, accum3);
    }
    accum0 = _mm_srai_epi32(accum0, ConvolutionFilter1D::kShiftBits);
    accum0 = _mm_packs_epi32(accum0, zero);
    accum0 = _mm_packus_epi16(accum0, zero);
    accum1 = _mm_srai_epi32(accum1, ConvolutionFilter1D::kShiftBits);
    accum1 = _mm_packs_epi32(accum1, zero);
    accum1 = _mm_packus_epi16(accum1, zero);
    accum2 = _mm_srai_epi32(accum2, ConvolutionFilter1D::kShiftBits);
    accum2 = _mm_packs_epi32(accum2, zero);
    accum2 = _mm_packus_epi16(accum2, zero);
    accum3 = _mm_srai_epi32(accum3, ConvolutionFilter1D::kShiftBits);
    accum3 = _mm_packs_epi32(accum3, zero);
    accum3 = _mm_packus_epi16(accum3, zero);
    *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
    *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
    *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
    *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
    out_row[0] += 4;
    out_row[1] += 4;
    out_row[2] += 4;
    out_row[3] += 4;
  }
 #endif
 }
 // Does vertical convolution to produce one output row. The filter values and
 // length are given in the first two parameters. These are applied to each
 // of the rows pointed to in the |source_data_rows| array, with each row
 // being |pixel_width| wide.
 //
 // The output must have room for |pixel_width * 4| bytes.
 template<bool has_alpha>
 void ConvolveVertically_SSE2(const ConvolutionFilter1D::Fixed* filter_values,
                             int filter_length,
                             unsigned char* const* source_data_rows,
                             int pixel_width,
                             unsigned char* out_row) {
 #if defined(SIMD_SSE2)
  int width = pixel_width & ~3;
  __m128i zero = _mm_setzero_si128();
  __m128i accum0, accum1, accum2, accum3, coeff16;
  const __m128i* src;
  // Output four pixels per iteration (16 bytes).
  for (int out_x = 0; out_x < width; out_x += 4) {
    // Accumulated result for each pixel. 32 bits per RGBA channel.
    accum0 = _mm_setzero_si128();
    accum1 = _mm_setzero_si128();
    accum2 = _mm_setzero_si128();
    accum3 = _mm_setzero_si128();
    // Convolve with one filter coefficient per iteration.
    for (int filter_y = 0; filter_y < filter_length; filter_y++) {
      // Duplicate the filter coefficient 8 times.
      // [16] cj cj cj cj cj cj cj cj
      coeff16 = _mm_set1_epi16(filter_values[filter_y]);
      // Load four pixels (16 bytes) together.
      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
      src = reinterpret_cast<const __m128i*>(
          &source_data_rows[filter_y][out_x << 2]);
      __m128i src8 = _mm_loadu_si128(src);
      // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
      // multiply with current coefficient => accumulate the result.
      // [16] a1 b1 g1 r1 a0 b0 g0 r0
      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32] a0 b0 g0 r0
      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum0 = _mm_add_epi32(accum0, t);
      // [32] a1 b1 g1 r1
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum1 = _mm_add_epi32(accum1, t);
      // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
      // multiply with current coefficient => accumulate the result.
      // [16] a3 b3 g3 r3 a2 b2 g2 r2
      src16 = _mm_unpackhi_epi8(src8, zero);
      mul_hi = _mm_mulhi_epi16(src16, coeff16);
      mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32] a2 b2 g2 r2
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum2 = _mm_add_epi32(accum2, t);
      // [32] a3 b3 g3 r3
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum3 = _mm_add_epi32(accum3, t);
    }
    // Shift right for fixed point implementation.
    accum0 = _mm_srai_epi32(accum0, ConvolutionFilter1D::kShiftBits);
    accum1 = _mm_srai_epi32(accum1, ConvolutionFilter1D::kShiftBits);
    accum2 = _mm_srai_epi32(accum2, ConvolutionFilter1D::kShiftBits);
    accum3 = _mm_srai_epi32(accum3, ConvolutionFilter1D::kShiftBits);
    // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
    // [16] a1 b1 g1 r1 a0 b0 g0 r0
    accum0 = _mm_packs_epi32(accum0, accum1);
    // [16] a3 b3 g3 r3 a2 b2 g2 r2
    accum2 = _mm_packs_epi32(accum2, accum3);
    // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
    // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
    accum0 = _mm_packus_epi16(accum0, accum2);
    if (has_alpha) {
      // Compute the max(ri, gi, bi) for each pixel.
      // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
      __m128i a = _mm_srli_epi32(accum0, 8);
      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
      __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
      // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
      a = _mm_srli_epi32(accum0, 16);
      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
      b = _mm_max_epu8(a, b);  // Max of r and g and b.
      // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
      b = _mm_slli_epi32(b, 24);
      // Make sure the value of alpha channel is always larger than maximum
      // value of color channels.
      accum0 = _mm_max_epu8(b, accum0);
    } else {
      // Set value of alpha channels to 0xFF.
      __m128i mask = _mm_set1_epi32(0xff000000);
      accum0 = _mm_or_si128(accum0, mask);
    }
    // Store the convolution result (16 bytes) and advance the pixel pointers.
    _mm_storeu_si128(reinterpret_cast<__m128i*>(out_row), accum0);
    out_row += 16;
  }
  // When the width of the output is not divisible by 4, We need to save one
  // pixel (4 bytes) each time. And also the fourth pixel is always absent.
  if (pixel_width & 3) {
    accum0 = _mm_setzero_si128();
    accum1 = _mm_setzero_si128();
    accum2 = _mm_setzero_si128();
    for (int filter_y = 0; filter_y < filter_length; ++filter_y) {
      coeff16 = _mm_set1_epi16(filter_values[filter_y]);
      // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
      src = reinterpret_cast<const __m128i*>(
          &source_data_rows[filter_y][width<<2]);
      __m128i src8 = _mm_loadu_si128(src);
      // [16] a1 b1 g1 r1 a0 b0 g0 r0
      __m128i src16 = _mm_unpacklo_epi8(src8, zero);
      __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
      __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32] a0 b0 g0 r0
      __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum0 = _mm_add_epi32(accum0, t);
      // [32] a1 b1 g1 r1
      t = _mm_unpackhi_epi16(mul_lo, mul_hi);
      accum1 = _mm_add_epi32(accum1, t);
      // [16] a3 b3 g3 r3 a2 b2 g2 r2
      src16 = _mm_unpackhi_epi8(src8, zero);
      mul_hi = _mm_mulhi_epi16(src16, coeff16);
      mul_lo = _mm_mullo_epi16(src16, coeff16);
      // [32] a2 b2 g2 r2
      t = _mm_unpacklo_epi16(mul_lo, mul_hi);
      accum2 = _mm_add_epi32(accum2, t);
    }
    accum0 = _mm_srai_epi32(accum0, ConvolutionFilter1D::kShiftBits);
    accum1 = _mm_srai_epi32(accum1, ConvolutionFilter1D::kShiftBits);
    accum2 = _mm_srai_epi32(accum2, ConvolutionFilter1D::kShiftBits);
    // [16] a1 b1 g1 r1 a0 b0 g0 r0
    accum0 = _mm_packs_epi32(accum0, accum1);
    // [16] a3 b3 g3 r3 a2 b2 g2 r2
    accum2 = _mm_packs_epi32(accum2, zero);
    // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
    accum0 = _mm_packus_epi16(accum0, accum2);
    if (has_alpha) {
      // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
      __m128i a = _mm_srli_epi32(accum0, 8);
      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
      __m128i b = _mm_max_epu8(a, accum0);  // Max of r and g.
      // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
      a = _mm_srli_epi32(accum0, 16);
      // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
      b = _mm_max_epu8(a, b);  // Max of r and g and b.
      // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
      b = _mm_slli_epi32(b, 24);
      accum0 = _mm_max_epu8(b, accum0);
    } else {
      __m128i mask = _mm_set1_epi32(0xff000000);
      accum0 = _mm_or_si128(accum0, mask);
    }
    for (int out_x = width; out_x < pixel_width; out_x++) {
      *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum0);
      accum0 = _mm_srli_si128(accum0, 4);
      out_row += 4;
    }
  }
 #endif
 }
 }  // namespace
 // ConvolutionFilter1D ---------------------------------------------------------
 ConvolutionFilter1D::ConvolutionFilter1D()
    : max_filter_(0) {
 }
 ConvolutionFilter1D::~ConvolutionFilter1D() {
 }
 void ConvolutionFilter1D::AddFilter(int filter_offset,
                                    const float* filter_values,
                                    int filter_length) {
  SkASSERT(filter_length > 0);
  std::vector<Fixed> fixed_values;
  fixed_values.reserve(filter_length);
  for (int i = 0; i < filter_length; ++i)
    fixed_values.push_back(FloatToFixed(filter_values[i]));
  AddFilter(filter_offset, &fixed_values[0], filter_length);
 }
 void ConvolutionFilter1D::AddFilter(int filter_offset,
                                    const Fixed* filter_values,
                                    int filter_length) {
  // It is common for leading/trailing filter values to be zeros. In such
  // cases it is beneficial to only store the central factors.
  // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
  // a 1080p image this optimization gives a ~10% speed improvement.
  int first_non_zero = 0;
  while (first_non_zero < filter_length && filter_values[first_non_zero] == 0)
    first_non_zero++;
  if (first_non_zero < filter_length) {
    // Here we have at least one non-zero factor.
    int last_non_zero = filter_length - 1;
    while (last_non_zero >= 0 && filter_values[last_non_zero] == 0)
      last_non_zero--;
    filter_offset += first_non_zero;
    filter_length = last_non_zero + 1 - first_non_zero;
    SkASSERT(filter_length > 0);
    for (int i = first_non_zero; i <= last_non_zero; i++)
      filter_values_.push_back(filter_values[i]);
  } else {
    // Here all the factors were zeroes.
    filter_length = 0;
  }
  FilterInstance instance;
  // We pushed filter_length elements onto filter_values_
  instance.data_location = (static_cast<int>(filter_values_.size()) -
                            filter_length);
  instance.offset = filter_offset;
  instance.length = filter_length;
  filters_.push_back(instance);
  max_filter_ = NS_MAX(max_filter_, filter_length);
 }
 void BGRAConvolve2D(const unsigned char* source_data,
                    int source_byte_row_stride,
                    bool source_has_alpha,
                    const ConvolutionFilter1D& filter_x,
                    const ConvolutionFilter1D& filter_y,
                    int output_byte_row_stride,
                    unsigned char* output,
                    bool use_sse2) {
 #if !defined(SIMD_SSE2)
  // Even we have runtime support for SSE2 instructions, since the binary
  // was not built with SSE2 support, we had to fallback to C version.
  use_sse2 = false;
 #endif
  int max_y_filter_size = filter_y.max_filter();
  // The next row in the input that we will generate a horizontally
  // convolved row for. If the filter doesn't start at the beginning of the
  // image (this is the case when we are only resizing a subset), then we
  // don't want to generate any output rows before that. Compute the starting
  // row for convolution as the first pixel for the first vertical filter.
  int filter_offset, filter_length;
  const ConvolutionFilter1D::Fixed* filter_values =
      filter_y.FilterForValue(0, &filter_offset, &filter_length);
  int next_x_row = filter_offset;
  // We loop over each row in the input doing a horizontal convolution. This
  // will result in a horizontally convolved image. We write the results into
  // a circular buffer of convolved rows and do vertical convolution as rows
  // are available. This prevents us from having to store the entire
  // intermediate image and helps cache coherency.
  // We will need four extra rows to allow horizontal convolution could be done
  // simultaneously. We also padding each row in row buffer to be aligned-up to
  // 16 bytes.
  // TODO(jiesun): We do not use aligned load from row buffer in vertical
  // convolution pass yet. Somehow Windows does not like it.
  int row_buffer_width = (filter_x.num_values() + 15) & ~0xF;
  int row_buffer_height = max_y_filter_size + (use_sse2 ? 4 : 0);
  CircularRowBuffer row_buffer(row_buffer_width,
                               row_buffer_height,
                               filter_offset);
  // Loop over every possible output row, processing just enough horizontal
  // convolutions to run each subsequent vertical convolution.
  SkASSERT(output_byte_row_stride >= filter_x.num_values() * 4);
  int num_output_rows = filter_y.num_values();
  // We need to check which is the last line to convolve before we advance 4
  // lines in one iteration.
  int last_filter_offset, last_filter_length;
  filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset,
                          &last_filter_length);
  for (int out_y = 0; out_y < num_output_rows; out_y++) {
    filter_values = filter_y.FilterForValue(out_y,
                                            &filter_offset, &filter_length);
    // Generate output rows until we have enough to run the current filter.
    if (use_sse2) {
      while (next_x_row < filter_offset + filter_length) {
        if (next_x_row + 3 < last_filter_offset + last_filter_length - 1) {
          const unsigned char* src[4];
          unsigned char* out_row[4];
          for (int i = 0; i < 4; ++i) {
            src[i] = &source_data[(next_x_row + i) * source_byte_row_stride];
            out_row[i] = row_buffer.AdvanceRow();
          }
          ConvolveHorizontally4_SSE2(src, filter_x, out_row);
          next_x_row += 4;
        } else {
          // For the last row, SSE2 load possibly to access data beyond the
          // image area. therefore we use C version here. 
          if (next_x_row == last_filter_offset + last_filter_length - 1) {
            if (source_has_alpha) {
              ConvolveHorizontally<true>(
                  &source_data[next_x_row * source_byte_row_stride],
                  filter_x, row_buffer.AdvanceRow());
            } else {
              ConvolveHorizontally<false>(
                  &source_data[next_x_row * source_byte_row_stride],
                  filter_x, row_buffer.AdvanceRow());
            }
          } else {
            ConvolveHorizontally_SSE2(
                &source_data[next_x_row * source_byte_row_stride],
                filter_x, row_buffer.AdvanceRow());
          }
          next_x_row++;
        }
      }
    } else {
      while (next_x_row < filter_offset + filter_length) {
        if (source_has_alpha) {
          ConvolveHorizontally<true>(
              &source_data[next_x_row * source_byte_row_stride],
              filter_x, row_buffer.AdvanceRow());
        } else {
          ConvolveHorizontally<false>(
              &source_data[next_x_row * source_byte_row_stride],
              filter_x, row_buffer.AdvanceRow());
        }
        next_x_row++;
      }
    }
    // Compute where in the output image this row of final data will go.
    unsigned char* cur_output_row = &output[out_y * output_byte_row_stride];
    // Get the list of rows that the circular buffer has, in order.
    int first_row_in_circular_buffer;
    unsigned char* const* rows_to_convolve =
        row_buffer.GetRowAddresses(&first_row_in_circular_buffer);
    // Now compute the start of the subset of those rows that the filter
    // needs.
    unsigned char* const* first_row_for_filter =
        &rows_to_convolve[filter_offset - first_row_in_circular_buffer];
    if (source_has_alpha) {
      if (use_sse2) {
        ConvolveVertically_SSE2<true>(filter_values, filter_length,
                                      first_row_for_filter,
                                      filter_x.num_values(), cur_output_row);
      } else {
        ConvolveVertically<true>(filter_values, filter_length,
                                 first_row_for_filter,
                                 filter_x.num_values(), cur_output_row);
      }
    } else {
      if (use_sse2) {
        ConvolveVertically_SSE2<false>(filter_values, filter_length,
                                       first_row_for_filter,
                                       filter_x.num_values(), cur_output_row);
      } else {
        ConvolveVertically<false>(filter_values, filter_length,
                                 first_row_for_filter,
                                 filter_x.num_values(), cur_output_row);
      }
    }
  }
 }
 }  // namespace skia
--- a/gfx/2d/convolver.h
+++ b/gfx/2d/convolver.h
@ -0,0 +1,166 @@
 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 #ifndef SKIA_EXT_CONVOLVER_H_
 #define SKIA_EXT_CONVOLVER_H_
 #include <cmath>
 #include <vector>
 #include "base/basictypes.h"
 #include "prtypes.h"
 #include "base/cpu.h"
 #include "skia/SkTypes.h"
 // avoid confusion with Mac OS X's math library (Carbon)
 #if defined(__APPLE__)
 #undef FloatToFixed
 #undef FixedToFloat
 #endif
 namespace skia {
 // Represents a filter in one dimension. Each output pixel has one entry in this
 // object for the filter values contributing to it. You build up the filter
 // list by calling AddFilter for each output pixel (in order).
 //
 // We do 2-dimensional convolution by first convolving each row by one
 // ConvolutionFilter1D, then convolving each column by another one.
 //
 // Entries are stored in fixed point, shifted left by kShiftBits.
 class ConvolutionFilter1D {
 public:
  typedef short Fixed;
  // The number of bits that fixed point values are shifted by.
  enum { kShiftBits = 14 };
  ConvolutionFilter1D();
  ~ConvolutionFilter1D();
  // Convert between floating point and our fixed point representation.
  static Fixed FloatToFixed(float f) {
    return static_cast<Fixed>(f * (1 << kShiftBits));
  }
  static unsigned char FixedToChar(Fixed x) {
    return static_cast<unsigned char>(x >> kShiftBits);
  }
  static float FixedToFloat(Fixed x) {
    // The cast relies on Fixed being a short, implying that on
    // the platforms we care about all (16) bits will fit into
    // the mantissa of a (32-bit) float.
    COMPILE_ASSERT(sizeof(Fixed) == 2, fixed_type_should_fit_in_float_mantissa);
    float raw = static_cast<float>(x);
    return ldexpf(raw, -kShiftBits);
  }
  // Returns the maximum pixel span of a filter.
  int max_filter() const { return max_filter_; }
  // Returns the number of filters in this filter. This is the dimension of the
  // output image.
  int num_values() const { return static_cast<int>(filters_.size()); }
  // Appends the given list of scaling values for generating a given output
  // pixel. |filter_offset| is the distance from the edge of the image to where
  // the scaling factors start. The scaling factors apply to the source pixels
  // starting from this position, and going for the next |filter_length| pixels.
  //
  // You will probably want to make sure your input is normalized (that is,
  // all entries in |filter_values| sub to one) to prevent affecting the overall
  // brighness of the image.
  //
  // The filter_length must be > 0.
  //
  // This version will automatically convert your input to fixed point.
  void AddFilter(int filter_offset,
                        const float* filter_values,
                        int filter_length);
  // Same as the above version, but the input is already fixed point.
  void AddFilter(int filter_offset,
                 const Fixed* filter_values,
                 int filter_length);
  // Retrieves a filter for the given |value_offset|, a position in the output
  // image in the direction we're convolving. The offset and length of the
  // filter values are put into the corresponding out arguments (see AddFilter
  // above for what these mean), and a pointer to the first scaling factor is
  // returned. There will be |filter_length| values in this array.
  inline const Fixed* FilterForValue(int value_offset,
                                     int* filter_offset,
                                     int* filter_length) const {
    const FilterInstance& filter = filters_[value_offset];
    *filter_offset = filter.offset;
    *filter_length = filter.length;
    if (filter.length == 0) {
      return NULL;
    }
    return &filter_values_[filter.data_location];
  }
  inline void PaddingForSIMD(int padding_count) {
    // Padding |padding_count| of more dummy coefficients after the coefficients
    // of last filter to prevent SIMD instructions which load 8 or 16 bytes
    // together to access invalid memory areas. We are not trying to align the
    // coefficients right now due to the opaqueness of <vector> implementation.
    // This has to be done after all |AddFilter| calls.
    for (int i = 0; i < padding_count; ++i)
      filter_values_.push_back(static_cast<Fixed>(0));
  }
 private:
  struct FilterInstance {
    // Offset within filter_values for this instance of the filter.
    int data_location;
    // Distance from the left of the filter to the center. IN PIXELS
    int offset;
    // Number of values in this filter instance.
    int length;
  };
  // Stores the information for each filter added to this class.
  std::vector<FilterInstance> filters_;
  // We store all the filter values in this flat list, indexed by
  // |FilterInstance.data_location| to avoid the mallocs required for storing
  // each one separately.
  std::vector<Fixed> filter_values_;
  // The maximum size of any filter we've added.
  int max_filter_;
 };
 // Does a two-dimensional convolution on the given source image.
 //
 // It is assumed the source pixel offsets referenced in the input filters
 // reference only valid pixels, so the source image size is not required. Each
 // row of the source image starts |source_byte_row_stride| after the previous
 // one (this allows you to have rows with some padding at the end).
 //
 // The result will be put into the given output buffer. The destination image
 // size will be xfilter.num_values() * yfilter.num_values() pixels. It will be
 // in rows of exactly xfilter.num_values() * 4 bytes.
 //
 // |source_has_alpha| is a hint that allows us to avoid doing computations on
 // the alpha channel if the image is opaque. If you don't know, set this to
 // true and it will work properly, but setting this to false will be a few
 // percent faster if you know the image is opaque.
 //
 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
 // (this is ARGB when loaded into 32-bit words on a little-endian machine).
 void BGRAConvolve2D(const unsigned char* source_data,
                    int source_byte_row_stride,
                    bool source_has_alpha,
                    const ConvolutionFilter1D& xfilter,
                    const ConvolutionFilter1D& yfilter,
                    int output_byte_row_stride,
                    unsigned char* output,
                    bool use_sse2);
 }  // namespace skia
 #endif  // SKIA_EXT_CONVOLVER_H_
--- a/gfx/2d/image_operations.cpp
+++ b/gfx/2d/image_operations.cpp
@ -0,0 +1,536 @@
 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 #include "base/basictypes.h"
 #define _USE_MATH_DEFINES
 #include <algorithm>
 #include <cmath>
 #include <limits>
 #include "image_operations.h"
 #include "nsAlgorithm.h"
 #include "base/stack_container.h"
 #include "convolver.h"
 #include "skia/SkColorPriv.h"
 #include "skia/SkBitmap.h"
 #include "skia/SkRect.h"
 #include "skia/SkFontHost.h"
 namespace skia {
 namespace {
 // Returns the ceiling/floor as an integer.
 inline int CeilInt(float val) {
  return static_cast<int>(ceil(val));
 }
 inline int FloorInt(float val) {
  return static_cast<int>(floor(val));
 }
 // Filter function computation -------------------------------------------------
 // Evaluates the box filter, which goes from -0.5 to +0.5.
 float EvalBox(float x) {
  return (x >= -0.5f && x < 0.5f) ? 1.0f : 0.0f;
 }
 // Evaluates the Lanczos filter of the given filter size window for the given
 // position.
 //
 // |filter_size| is the width of the filter (the "window"), outside of which
 // the value of the function is 0. Inside of the window, the value is the
 // normalized sinc function:
 //   lanczos(x) = sinc(x) * sinc(x / filter_size);
 // where
 //   sinc(x) = sin(pi*x) / (pi*x);
 float EvalLanczos(int filter_size, float x) {
  if (x <= -filter_size || x >= filter_size)
    return 0.0f;  // Outside of the window.
  if (x > -std::numeric_limits<float>::epsilon() &&
      x < std::numeric_limits<float>::epsilon())
    return 1.0f;  // Special case the discontinuity at the origin.
  float xpi = x * static_cast<float>(M_PI);
  return (sin(xpi) / xpi) *  // sinc(x)
          sin(xpi / filter_size) / (xpi / filter_size);  // sinc(x/filter_size)
 }
 // Evaluates the Hamming filter of the given filter size window for the given
 // position.
 //
 // The filter covers [-filter_size, +filter_size]. Outside of this window
 // the value of the function is 0. Inside of the window, the value is sinus
 // cardinal multiplied by a recentered Hamming function. The traditional
 // Hamming formula for a window of size N and n ranging in [0, N-1] is:
 //   hamming(n) = 0.54 - 0.46 * cos(2 * pi * n / (N-1)))
 // In our case we want the function centered for x == 0 and at its minimum
 // on both ends of the window (x == +/- filter_size), hence the adjusted
 // formula:
 //   hamming(x) = (0.54 -
 //                 0.46 * cos(2 * pi * (x - filter_size)/ (2 * filter_size)))
 //              = 0.54 - 0.46 * cos(pi * x / filter_size - pi)
 //              = 0.54 + 0.46 * cos(pi * x / filter_size)
 float EvalHamming(int filter_size, float x) {
  if (x <= -filter_size || x >= filter_size)
    return 0.0f;  // Outside of the window.
  if (x > -std::numeric_limits<float>::epsilon() &&
      x < std::numeric_limits<float>::epsilon())
    return 1.0f;  // Special case the sinc discontinuity at the origin.
  const float xpi = x * static_cast<float>(M_PI);
  return ((sin(xpi) / xpi) *  // sinc(x)
          (0.54f + 0.46f * cos(xpi / filter_size)));  // hamming(x)
 }
 // ResizeFilter ----------------------------------------------------------------
 // Encapsulates computation and storage of the filters required for one complete
 // resize operation.
 class ResizeFilter {
 public:
  ResizeFilter(ImageOperations::ResizeMethod method,
               int src_full_width, int src_full_height,
               int dest_width, int dest_height,
               const SkIRect& dest_subset);
  // Returns the filled filter values.
  const ConvolutionFilter1D& x_filter() { return x_filter_; }
  const ConvolutionFilter1D& y_filter() { return y_filter_; }
 private:
  // Returns the number of pixels that the filer spans, in filter space (the
  // destination image).
  float GetFilterSupport(float scale) {
    switch (method_) {
      case ImageOperations::RESIZE_BOX:
        // The box filter just scales with the image scaling.
        return 0.5f;  // Only want one side of the filter = /2.
      case ImageOperations::RESIZE_HAMMING1:
        // The Hamming filter takes as much space in the source image in
        // each direction as the size of the window = 1 for Hamming1.
        return 1.0f;
      case ImageOperations::RESIZE_LANCZOS2:
        // The Lanczos filter takes as much space in the source image in
        // each direction as the size of the window = 2 for Lanczos2.
        return 2.0f;
      case ImageOperations::RESIZE_LANCZOS3:
        // The Lanczos filter takes as much space in the source image in
        // each direction as the size of the window = 3 for Lanczos3.
        return 3.0f;
      default:
        return 1.0f;
    }
  }
  // Computes one set of filters either horizontally or vertically. The caller
  // will specify the "min" and "max" rather than the bottom/top and
  // right/bottom so that the same code can be re-used in each dimension.
  //
  // |src_depend_lo| and |src_depend_size| gives the range for the source
  // depend rectangle (horizontally or vertically at the caller's discretion
  // -- see above for what this means).
  //
  // Likewise, the range of destination values to compute and the scale factor
  // for the transform is also specified.
  void ComputeFilters(int src_size,
                      int dest_subset_lo, int dest_subset_size,
                      float scale, float src_support,
                      ConvolutionFilter1D* output);
  // Computes the filter value given the coordinate in filter space.
  inline float ComputeFilter(float pos) {
    switch (method_) {
      case ImageOperations::RESIZE_BOX:
        return EvalBox(pos);
      case ImageOperations::RESIZE_HAMMING1:
        return EvalHamming(1, pos);
      case ImageOperations::RESIZE_LANCZOS2:
        return EvalLanczos(2, pos);
      case ImageOperations::RESIZE_LANCZOS3:
        return EvalLanczos(3, pos);
      default:
        return 0;
    }
  }
  ImageOperations::ResizeMethod method_;
  // Size of the filter support on one side only in the destination space.
  // See GetFilterSupport.
  float x_filter_support_;
  float y_filter_support_;
  // Subset of scaled destination bitmap to compute.
  SkIRect out_bounds_;
  ConvolutionFilter1D x_filter_;
  ConvolutionFilter1D y_filter_;
  DISALLOW_COPY_AND_ASSIGN(ResizeFilter);
 };
 ResizeFilter::ResizeFilter(ImageOperations::ResizeMethod method,
                           int src_full_width, int src_full_height,
                           int dest_width, int dest_height,
                           const SkIRect& dest_subset)
    : method_(method),
      out_bounds_(dest_subset) {
  // method_ will only ever refer to an "algorithm method".
  SkASSERT((ImageOperations::RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
           (method <= ImageOperations::RESIZE_LAST_ALGORITHM_METHOD));
  float scale_x = static_cast<float>(dest_width) /
                  static_cast<float>(src_full_width);
  float scale_y = static_cast<float>(dest_height) /
                  static_cast<float>(src_full_height);
  x_filter_support_ = GetFilterSupport(scale_x);
  y_filter_support_ = GetFilterSupport(scale_y);
  // Support of the filter in source space.
  float src_x_support = x_filter_support_ / scale_x;
  float src_y_support = y_filter_support_ / scale_y;
  ComputeFilters(src_full_width, dest_subset.fLeft, dest_subset.width(),
                 scale_x, src_x_support, &x_filter_);
  ComputeFilters(src_full_height, dest_subset.fTop, dest_subset.height(),
                 scale_y, src_y_support, &y_filter_);
 }
 // TODO(egouriou): Take advantage of periods in the convolution.
 // Practical resizing filters are periodic outside of the border area.
 // For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the
 // source become p pixels in the destination) will have a period of p.
 // A nice consequence is a period of 1 when downscaling by an integral
 // factor. Downscaling from typical display resolutions is also bound
 // to produce interesting periods as those are chosen to have multiple
 // small factors.
 // Small periods reduce computational load and improve cache usage if
 // the coefficients can be shared. For periods of 1 we can consider
 // loading the factors only once outside the borders.
 void ResizeFilter::ComputeFilters(int src_size,
                                  int dest_subset_lo, int dest_subset_size,
                                  float scale, float src_support,
                                  ConvolutionFilter1D* output) {
  int dest_subset_hi = dest_subset_lo + dest_subset_size;  // [lo, hi)
  // When we're doing a magnification, the scale will be larger than one. This
  // means the destination pixels are much smaller than the source pixels, and
  // that the range covered by the filter won't necessarily cover any source
  // pixel boundaries. Therefore, we use these clamped values (max of 1) for
  // some computations.
  float clamped_scale = NS_MIN(1.0f, scale);
  // Speed up the divisions below by turning them into multiplies.
  float inv_scale = 1.0f / scale;
  StackVector<float, 64> filter_values;
  StackVector<int16_t, 64> fixed_filter_values;
  // Loop over all pixels in the output range. We will generate one set of
  // filter values for each one. Those values will tell us how to blend the
  // source pixels to compute the destination pixel.
  for (int dest_subset_i = dest_subset_lo; dest_subset_i < dest_subset_hi;
       dest_subset_i++) {
    // Reset the arrays. We don't declare them inside so they can re-use the
    // same malloc-ed buffer.
    filter_values->clear();
    fixed_filter_values->clear();
    // This is the pixel in the source directly under the pixel in the dest.
    // Note that we base computations on the "center" of the pixels. To see
    // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
    // downscale should "cover" the pixels around the pixel with *its center*
    // at coordinates (2.5, 2.5) in the source, not those around (0, 0).
    // Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
    // TODO(evannier): this code is therefore incorrect and should read:
    // float src_pixel = (static_cast<float>(dest_subset_i) + 0.5f) * inv_scale;
    // I leave it incorrect, because changing it would require modifying
    // the results for the webkit test, which I will do in a subsequent checkin.
    float src_pixel = dest_subset_i * inv_scale;
    // Compute the (inclusive) range of source pixels the filter covers.
    int src_begin = NS_MAX(0, FloorInt(src_pixel - src_support));
    int src_end = NS_MIN(src_size - 1, CeilInt(src_pixel + src_support));
    // Compute the unnormalized filter value at each location of the source
    // it covers.
    float filter_sum = 0.0f;  // Sub of the filter values for normalizing.
    for (int cur_filter_pixel = src_begin; cur_filter_pixel <= src_end;
         cur_filter_pixel++) {
      // Distance from the center of the filter, this is the filter coordinate
      // in source space. We also need to consider the center of the pixel
      // when comparing distance against 'src_pixel'. In the 5x downscale
      // example used above the distance from the center of the filter to
      // the pixel with coordinates (2, 2) should be 0, because its center
      // is at (2.5, 2.5).
      // TODO(evannier): as above (in regards to the 0.5 pixel error),
      // this code is incorrect, but is left it for the same reasons.
      // float src_filter_dist =
      //     ((static_cast<float>(cur_filter_pixel) + 0.5f) - src_pixel);
      float src_filter_dist = cur_filter_pixel - src_pixel;
      // Since the filter really exists in dest space, map it there.
      float dest_filter_dist = src_filter_dist * clamped_scale;
      // Compute the filter value at that location.
      float filter_value = ComputeFilter(dest_filter_dist);
      filter_values->push_back(filter_value);
      filter_sum += filter_value;
    }
    // The filter must be normalized so that we don't affect the brightness of
    // the image. Convert to normalized fixed point.
    int16_t fixed_sum = 0;
    for (size_t i = 0; i < filter_values->size(); i++) {
      int16_t cur_fixed = output->FloatToFixed(filter_values[i] / filter_sum);
      fixed_sum += cur_fixed;
      fixed_filter_values->push_back(cur_fixed);
    }
    // The conversion to fixed point will leave some rounding errors, which
    // we add back in to avoid affecting the brightness of the image. We
    // arbitrarily add this to the center of the filter array (this won't always
    // be the center of the filter function since it could get clipped on the
    // edges, but it doesn't matter enough to worry about that case).
    int16_t leftovers = output->FloatToFixed(1.0f) - fixed_sum;
    fixed_filter_values[fixed_filter_values->size() / 2] += leftovers;
    // Now it's ready to go.
    output->AddFilter(src_begin, &fixed_filter_values[0],
                      static_cast<int>(fixed_filter_values->size()));
  }
  output->PaddingForSIMD(8);
 }
 ImageOperations::ResizeMethod ResizeMethodToAlgorithmMethod(
    ImageOperations::ResizeMethod method) {
  // Convert any "Quality Method" into an "Algorithm Method"
  if (method >= ImageOperations::RESIZE_FIRST_ALGORITHM_METHOD &&
      method <= ImageOperations::RESIZE_LAST_ALGORITHM_METHOD) {
    return method;
  }
  // The call to ImageOperationsGtv::Resize() above took care of
  // GPU-acceleration in the cases where it is possible. So now we just
  // pick the appropriate software method for each resize quality.
  switch (method) {
    // Users of RESIZE_GOOD are willing to trade a lot of quality to
    // get speed, allowing the use of linear resampling to get hardware
    // acceleration (SRB). Hence any of our "good" software filters
    // will be acceptable, and we use the fastest one, Hamming-1.
    case ImageOperations::RESIZE_GOOD:
      // Users of RESIZE_BETTER are willing to trade some quality in order
      // to improve performance, but are guaranteed not to devolve to a linear
      // resampling. In visual tests we see that Hamming-1 is not as good as
      // Lanczos-2, however it is about 40% faster and Lanczos-2 itself is
      // about 30% faster than Lanczos-3. The use of Hamming-1 has been deemed
      // an acceptable trade-off between quality and speed.
    case ImageOperations::RESIZE_BETTER:
      return ImageOperations::RESIZE_HAMMING1;
    default:
      return ImageOperations::RESIZE_LANCZOS3;
  }
 }
 }  // namespace
 // Resize ----------------------------------------------------------------------
 // static
 SkBitmap ImageOperations::Resize(const SkBitmap& source,
                                 ResizeMethod method,
                                 int dest_width, int dest_height,
                                 const SkIRect& dest_subset,
                                 void* dest_pixels /* = nullptr */) {
  if (method == ImageOperations::RESIZE_SUBPIXEL)
    return ResizeSubpixel(source, dest_width, dest_height, dest_subset);
  else
    return ResizeBasic(source, method, dest_width, dest_height, dest_subset,
                       dest_pixels);
 }
 // static
 SkBitmap ImageOperations::ResizeSubpixel(const SkBitmap& source,
                                         int dest_width, int dest_height,
                                         const SkIRect& dest_subset) {
  // Currently only works on Linux/BSD because these are the only platforms
  // where SkFontHost::GetSubpixelOrder is defined.
 #if defined(XP_UNIX)
  // Understand the display.
  const SkFontHost::LCDOrder order = SkFontHost::GetSubpixelOrder();
  const SkFontHost::LCDOrientation orientation =
      SkFontHost::GetSubpixelOrientation();
  // Decide on which dimension, if any, to deploy subpixel rendering.
  int w = 1;
  int h = 1;
  switch (orientation) {
    case SkFontHost::kHorizontal_LCDOrientation:
      w = dest_width < source.width() ? 3 : 1;
      break;
    case SkFontHost::kVertical_LCDOrientation:
      h = dest_height < source.height() ? 3 : 1;
      break;
  }
  // Resize the image.
  const int width = dest_width * w;
  const int height = dest_height * h;
  SkIRect subset = { dest_subset.fLeft, dest_subset.fTop,
                     dest_subset.fLeft + dest_subset.width() * w,
                     dest_subset.fTop + dest_subset.height() * h };
  SkBitmap img = ResizeBasic(source, ImageOperations::RESIZE_LANCZOS3, width,
                             height, subset);
  const int row_words = img.rowBytes() / 4;
  if (w == 1 && h == 1)
    return img;
  // Render into subpixels.
  SkBitmap result;
  result.setConfig(SkBitmap::kARGB_8888_Config, dest_subset.width(),
                   dest_subset.height());
  result.allocPixels();
  if (!result.readyToDraw())
    return img;
  SkAutoLockPixels locker(img);
  if (!img.readyToDraw())
    return img;
  uint32_t* src_row = img.getAddr32(0, 0);
  uint32_t* dst_row = result.getAddr32(0, 0);
  for (int y = 0; y < dest_subset.height(); y++) {
    uint32_t* src = src_row;
    uint32_t* dst = dst_row;
    for (int x = 0; x < dest_subset.width(); x++, src += w, dst++) {
      uint8_t r = 0, g = 0, b = 0, a = 0;
      switch (order) {
        case SkFontHost::kRGB_LCDOrder:
          switch (orientation) {
            case SkFontHost::kHorizontal_LCDOrientation:
              r = SkGetPackedR32(src[0]);
              g = SkGetPackedG32(src[1]);
              b = SkGetPackedB32(src[2]);
              a = SkGetPackedA32(src[1]);
              break;
            case SkFontHost::kVertical_LCDOrientation:
              r = SkGetPackedR32(src[0 * row_words]);
              g = SkGetPackedG32(src[1 * row_words]);
              b = SkGetPackedB32(src[2 * row_words]);
              a = SkGetPackedA32(src[1 * row_words]);
              break;
          }
          break;
        case SkFontHost::kBGR_LCDOrder:
          switch (orientation) {
            case SkFontHost::kHorizontal_LCDOrientation:
              b = SkGetPackedB32(src[0]);
              g = SkGetPackedG32(src[1]);
              r = SkGetPackedR32(src[2]);
              a = SkGetPackedA32(src[1]);
              break;
            case SkFontHost::kVertical_LCDOrientation:
              b = SkGetPackedB32(src[0 * row_words]);
              g = SkGetPackedG32(src[1 * row_words]);
              r = SkGetPackedR32(src[2 * row_words]);
              a = SkGetPackedA32(src[1 * row_words]);
              break;
          }
          break;
        case SkFontHost::kNONE_LCDOrder:
          break;
      }
      // Premultiplied alpha is very fragile.
      a = a > r ? a : r;
      a = a > g ? a : g;
      a = a > b ? a : b;
      *dst = SkPackARGB32(a, r, g, b);
    }
    src_row += h * row_words;
    dst_row += result.rowBytes() / 4;
  }
  result.setIsOpaque(img.isOpaque());
  return result;
 #else
  return SkBitmap();
 #endif  // OS_POSIX && !OS_MACOSX && !defined(OS_ANDROID)
 }
 // static
 SkBitmap ImageOperations::ResizeBasic(const SkBitmap& source,
                                      ResizeMethod method,
                                      int dest_width, int dest_height,
                                      const SkIRect& dest_subset,
                                      void* dest_pixels /* = nullptr */) {
  // Ensure that the ResizeMethod enumeration is sound.
  SkASSERT(((RESIZE_FIRST_QUALITY_METHOD <= method) &&
            (method <= RESIZE_LAST_QUALITY_METHOD)) ||
           ((RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
            (method <= RESIZE_LAST_ALGORITHM_METHOD)));
  // If the size of source or destination is 0, i.e. 0x0, 0xN or Nx0, just
  // return empty.
  if (source.width() < 1 || source.height() < 1 ||
      dest_width < 1 || dest_height < 1)
    return SkBitmap();
  method = ResizeMethodToAlgorithmMethod(method);
  // Check that we deal with an "algorithm methods" from this point onward.
  SkASSERT((ImageOperations::RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
           (method <= ImageOperations::RESIZE_LAST_ALGORITHM_METHOD));
  SkAutoLockPixels locker(source);
  if (!source.readyToDraw())
      return SkBitmap();
  ResizeFilter filter(method, source.width(), source.height(),
                      dest_width, dest_height, dest_subset);
  // Get a source bitmap encompassing this touched area. We construct the
  // offsets and row strides such that it looks like a new bitmap, while
  // referring to the old data.
  const uint8_t* source_subset =
      reinterpret_cast<const uint8_t*>(source.getPixels());
  // Convolve into the result.
  SkBitmap result;
  result.setConfig(SkBitmap::kARGB_8888_Config,
                   dest_subset.width(), dest_subset.height());
  if (dest_pixels) {
    result.setPixels(dest_pixels);
  } else {
    result.allocPixels();
  }
  if (!result.readyToDraw())
    return SkBitmap();
  BGRAConvolve2D(source_subset, static_cast<int>(source.rowBytes()),
                 !source.isOpaque(), filter.x_filter(), filter.y_filter(),
                 static_cast<int>(result.rowBytes()),
                 static_cast<unsigned char*>(result.getPixels()),
                 /* sse = */ false);
  // Preserve the "opaque" flag for use as an optimization later.
  result.setIsOpaque(source.isOpaque());
  return result;
 }
 // static
 SkBitmap ImageOperations::Resize(const SkBitmap& source,
                                 ResizeMethod method,
                                 int dest_width, int dest_height,
                                 void* dest_pixels /* = nullptr */) {
  SkIRect dest_subset = { 0, 0, dest_width, dest_height };
  return Resize(source, method, dest_width, dest_height, dest_subset,
                dest_pixels);
 }
 }  // namespace skia
--- a/gfx/2d/image_operations.h
+++ b/gfx/2d/image_operations.h
@ -0,0 +1,133 @@
 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 #ifndef SKIA_EXT_IMAGE_OPERATIONS_H_
 #define SKIA_EXT_IMAGE_OPERATIONS_H_
 #include "skia/SkTypes.h"
 #include "Types.h"
 class SkBitmap;
 struct SkIRect;
 namespace skia {
 class ImageOperations {
 public:
  enum ResizeMethod {
    //
    // Quality Methods
    //
    // Those enumeration values express a desired quality/speed tradeoff.
    // They are translated into an algorithm-specific method that depends
    // on the capabilities (CPU, GPU) of the underlying platform.
    // It is possible for all three methods to be mapped to the same
    // algorithm on a given platform.
    // Good quality resizing. Fastest resizing with acceptable visual quality.
    // This is typically intended for use during interactive layouts
    // where slower platforms may want to trade image quality for large
    // increase in resizing performance.
    //
    // For example the resizing implementation may devolve to linear
    // filtering if this enables GPU acceleration to be used.
    //
    // Note that the underlying resizing method may be determined
    // on the fly based on the parameters for a given resize call.
    // For example an implementation using a GPU-based linear filter
    // in the common case may still use a higher-quality software-based
    // filter in cases where using the GPU would actually be slower - due
    // to too much latency - or impossible - due to image format or size
    // constraints.
    RESIZE_GOOD,
    // Medium quality resizing. Close to high quality resizing (better
    // than linear interpolation) with potentially some quality being
    // traded-off for additional speed compared to RESIZE_BEST.
    //
    // This is intended, for example, for generation of large thumbnails
    // (hundreds of pixels in each dimension) from large sources, where
    // a linear filter would produce too many artifacts but where
    // a RESIZE_HIGH might be too costly time-wise.
    RESIZE_BETTER,
    // High quality resizing. The algorithm is picked to favor image quality.
    RESIZE_BEST,
    //
    // Algorithm-specific enumerations
    //
    // Box filter. This is a weighted average of all of the pixels touching
    // the destination pixel. For enlargement, this is nearest neighbor.
    //
    // You probably don't want this, it is here for testing since it is easy to
    // compute. Use RESIZE_LANCZOS3 instead.
    RESIZE_BOX,
    // 1-cycle Hamming filter. This is tall is the middle and falls off towards
    // the window edges but without going to 0. This is about 40% faster than
    // a 2-cycle Lanczos.
    RESIZE_HAMMING1,
    // 2-cycle Lanczos filter. This is tall in the middle, goes negative on
    // each side, then returns to zero. Does not provide as good a frequency
    // response as a 3-cycle Lanczos but is roughly 30% faster.
    RESIZE_LANCZOS2,
    // 3-cycle Lanczos filter. This is tall in the middle, goes negative on
    // each side, then oscillates 2 more times. It gives nice sharp edges.
    RESIZE_LANCZOS3,
    // Lanczos filter + subpixel interpolation. If subpixel rendering is not
    // appropriate we automatically fall back to Lanczos.
    RESIZE_SUBPIXEL,
    // enum aliases for first and last methods by algorithm or by quality.
    RESIZE_FIRST_QUALITY_METHOD = RESIZE_GOOD,
    RESIZE_LAST_QUALITY_METHOD = RESIZE_BEST,
    RESIZE_FIRST_ALGORITHM_METHOD = RESIZE_BOX,
    RESIZE_LAST_ALGORITHM_METHOD = RESIZE_SUBPIXEL,
  };
  // Resizes the given source bitmap using the specified resize method, so that
  // the entire image is (dest_size) big. The dest_subset is the rectangle in
  // this destination image that should actually be returned.
  //
  // The output image will be (dest_subset.width(), dest_subset.height()). This
  // will save work if you do not need the entire bitmap.
  //
  // The destination subset must be smaller than the destination image.
  static SkBitmap Resize(const SkBitmap& source,
                         ResizeMethod method,
                         int dest_width, int dest_height,
                         const SkIRect& dest_subset,
                         void* dest_pixels = nullptr);
  // Alternate version for resizing and returning the entire bitmap rather than
  // a subset.
  static SkBitmap Resize(const SkBitmap& source,
                         ResizeMethod method,
                         int dest_width, int dest_height,
                         void* dest_pixels = nullptr);
 private:
  ImageOperations();  // Class for scoping only.
  // Supports all methods except RESIZE_SUBPIXEL.
  static SkBitmap ResizeBasic(const SkBitmap& source,
                              ResizeMethod method,
                              int dest_width, int dest_height,
                              const SkIRect& dest_subset,
                              void* dest_pixels = nullptr);
  // Subpixel renderer.
  static SkBitmap ResizeSubpixel(const SkBitmap& source,
                                 int dest_width, int dest_height,
                                 const SkIRect& dest_subset);
 };
 }  // namespace skia
 #endif  // SKIA_EXT_IMAGE_OPERATIONS_H_