mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 13:51:41 +00:00
Bug 1250037 - part 3 - optimize the Moz2d fallback box blur implementation. r=bas
MozReview-Commit-ID: 70YnDEI20ow
This commit is contained in:
parent
2b779e5359
commit
58e976f33d
426
gfx/2d/Blur.cpp
426
gfx/2d/Blur.cpp
@ -26,159 +26,268 @@ namespace mozilla {
|
||||
namespace gfx {
|
||||
|
||||
/**
|
||||
* Box blur involves looking at one pixel, and setting its value to the average
|
||||
* of its neighbouring pixels.
|
||||
* @param aInput The input buffer.
|
||||
* @param aOutput The output buffer.
|
||||
* @param aLeftLobe The number of pixels to blend on the left.
|
||||
* @param aRightLobe The number of pixels to blend on the right.
|
||||
* @param aWidth The number of columns in the buffers.
|
||||
* @param aRows The number of rows in the buffers.
|
||||
* @param aSkipRect An area to skip blurring in.
|
||||
* XXX shouldn't we pass stride in separately here?
|
||||
* Helper function to process each row of the box blur.
|
||||
* It takes care of transposing the data on input or output depending
|
||||
* on whether we intend a horizontal or vertical blur, and whether we're
|
||||
* reading from the initial source or writing to the final destination.
|
||||
* It allows starting or ending anywhere within the row to accomodate
|
||||
* a skip rect.
|
||||
*/
|
||||
static void
|
||||
BoxBlurHorizontal(unsigned char* aInput,
|
||||
unsigned char* aOutput,
|
||||
int32_t aLeftLobe,
|
||||
int32_t aRightLobe,
|
||||
int32_t aWidth,
|
||||
int32_t aRows,
|
||||
const IntRect& aSkipRect)
|
||||
template<bool aTransposeInput, bool aTransposeOutput>
|
||||
static inline void
|
||||
BoxBlurRow(const uint8_t* aInput,
|
||||
uint8_t* aOutput,
|
||||
int32_t aLeftLobe,
|
||||
int32_t aRightLobe,
|
||||
int32_t aWidth,
|
||||
int32_t aStride,
|
||||
int32_t aStart,
|
||||
int32_t aEnd)
|
||||
{
|
||||
MOZ_ASSERT(aWidth > 0);
|
||||
// If the input or output is transposed, then we will move down a row
|
||||
// for each step, instead of moving over a column. Since these values
|
||||
// only depend on a template parameter, they will more easily get
|
||||
// copy-propagated in the non-transposed case, which is why they
|
||||
// are not passed as parameters.
|
||||
const int32_t inputStep = aTransposeInput ? aStride : 1;
|
||||
const int32_t outputStep = aTransposeOutput ? aStride : 1;
|
||||
|
||||
int32_t boxSize = aLeftLobe + aRightLobe + 1;
|
||||
bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
|
||||
aWidth <= aSkipRect.XMost();
|
||||
if (boxSize == 1) {
|
||||
memcpy(aOutput, aInput, aWidth*aRows);
|
||||
return;
|
||||
// We need to sample aLeftLobe pixels to the left and aRightLobe pixels
|
||||
// to the right of the current position, then average them. So this is
|
||||
// the size of the total width of this filter.
|
||||
const int32_t boxSize = aLeftLobe + aRightLobe + 1;
|
||||
|
||||
// Instead of dividing the pixel sum by boxSize to average, we can just
|
||||
// compute a scale that will normalize the result so that it can be quickly
|
||||
// shifted into the desired range.
|
||||
const uint32_t reciprocal = (1 << 24) / boxSize;
|
||||
|
||||
// The shift would normally truncate the result, whereas we would rather
|
||||
// prefer to round the result to the closest increment. By adding 0.5 units
|
||||
// to the initial sum, we bias the sum so that it will be rounded by the
|
||||
// truncation instead.
|
||||
uint32_t alphaSum = (boxSize + 1) / 2;
|
||||
|
||||
// We process the row with a moving filter, keeping a sum (alphaSum) of
|
||||
// boxSize pixels. As we move over a pixel, we need to add on a pixel
|
||||
// from the right extreme of the window that moved into range, and subtract
|
||||
// off a pixel from the left extreme of window that moved out of range.
|
||||
// But first, we need to initialization alphaSum to the contents of
|
||||
// the window before we can get going. If the window moves out of bounds
|
||||
// of the row, we clamp each sample to be the closest pixel from within
|
||||
// row bounds, so the 0th and aWidth-1th pixel.
|
||||
int32_t initLeft = aStart - aLeftLobe;
|
||||
if (initLeft < 0) {
|
||||
// If the left lobe samples before the row, add in clamped samples.
|
||||
alphaSum += -initLeft * aInput[0];
|
||||
initLeft = 0;
|
||||
}
|
||||
int32_t initRight = aStart + boxSize - aLeftLobe;
|
||||
if (initRight > aWidth) {
|
||||
// If the right lobe samples after the row, add in clamped samples.
|
||||
alphaSum += (initRight - aWidth) * aInput[(aWidth - 1) * inputStep];
|
||||
initRight = aWidth;
|
||||
}
|
||||
// Finally, add in all the valid, non-clamped samples to fill up the
|
||||
// rest of the window.
|
||||
const uint8_t* src = &aInput[initLeft * inputStep];
|
||||
const uint8_t* iterEnd = &aInput[initRight * inputStep];
|
||||
|
||||
#define INIT_ITER \
|
||||
alphaSum += *src; \
|
||||
src += inputStep;
|
||||
|
||||
// We unroll the per-pixel loop here substantially. The amount of work
|
||||
// done per sample is so small that the cost of a loop condition check
|
||||
// and a branch can substantially add to or even dominate the performance
|
||||
// of the loop.
|
||||
while (src + 16 * inputStep <= iterEnd) {
|
||||
INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
|
||||
INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
|
||||
INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
|
||||
INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
|
||||
}
|
||||
while (src < iterEnd) {
|
||||
INIT_ITER;
|
||||
}
|
||||
|
||||
// Now we start moving the window over the row. We will be accessing
|
||||
// pixels form aStart - aLeftLobe up to aEnd + aRightLobe, which may be
|
||||
// out of bounds of the row. To avoid having to check within the inner
|
||||
// loops if we are in bound, we instead compute the points at which
|
||||
// we will move out of bounds of the row on the left side (splitLeft)
|
||||
// and right side (splitRight).
|
||||
int32_t splitLeft = min(max(aLeftLobe, aStart), aEnd);
|
||||
int32_t splitRight = min(max(aWidth - (boxSize - aLeftLobe), aStart), aEnd);
|
||||
// If the filter window is actually large than the size of the row,
|
||||
// there will be a middle area of overlap where the leftmost and rightmost
|
||||
// pixel of the filter will both be outside the row. In this case, we need
|
||||
// to invert the splits so that splitLeft <= splitRight.
|
||||
if (boxSize > aWidth) {
|
||||
swap(splitLeft, splitRight);
|
||||
}
|
||||
|
||||
// Process all pixels up to splitLeft that would sample before the start of the row.
|
||||
// Note that because inputStep and outputStep may not be a const 1 value, it is more
|
||||
// performant to increment pointers here for the source and destination rather than
|
||||
// use a loop counter, since doing so would entail an expensive multiplication that
|
||||
// significantly slows down the loop.
|
||||
uint8_t* dst = &aOutput[aStart * outputStep];
|
||||
iterEnd = &aOutput[splitLeft * outputStep];
|
||||
src = &aInput[(aStart + boxSize - aLeftLobe) * inputStep];
|
||||
uint8_t firstVal = aInput[0];
|
||||
|
||||
#define LEFT_ITER \
|
||||
*dst = (alphaSum * reciprocal) >> 24; \
|
||||
alphaSum += *src - firstVal; \
|
||||
dst += outputStep; \
|
||||
src += inputStep;
|
||||
|
||||
while (dst + 16 * outputStep <= iterEnd) {
|
||||
LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
|
||||
LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
|
||||
LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
|
||||
LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
|
||||
}
|
||||
while (dst < iterEnd) {
|
||||
LEFT_ITER;
|
||||
}
|
||||
|
||||
// Process all pixels between splitLeft and splitRight.
|
||||
iterEnd = &aOutput[splitRight * outputStep];
|
||||
if (boxSize <= aWidth) {
|
||||
// The filter window is smaller than the row size, so the leftmost and rightmost
|
||||
// samples are both within row bounds.
|
||||
src = &aInput[(splitLeft - aLeftLobe) * inputStep];
|
||||
int32_t boxStep = boxSize * inputStep;
|
||||
|
||||
#define CENTER_ITER \
|
||||
*dst = (alphaSum * reciprocal) >> 24; \
|
||||
alphaSum += src[boxStep] - *src; \
|
||||
dst += outputStep; \
|
||||
src += inputStep;
|
||||
|
||||
while (dst + 16 * outputStep <= iterEnd) {
|
||||
CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
|
||||
CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
|
||||
CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
|
||||
CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
|
||||
}
|
||||
uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
|
||||
|
||||
for (int32_t y = 0; y < aRows; y++) {
|
||||
// Check whether the skip rect intersects this row. If the skip
|
||||
// rect covers the whole surface in this row, we can avoid
|
||||
// this row entirely (and any others along the skip rect).
|
||||
bool inSkipRectY = y >= aSkipRect.y &&
|
||||
y < aSkipRect.YMost();
|
||||
if (inSkipRectY && skipRectCoversWholeRow) {
|
||||
y = aSkipRect.YMost() - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t alphaSum = 0;
|
||||
for (int32_t i = 0; i < boxSize; i++) {
|
||||
int32_t pos = i - aLeftLobe;
|
||||
// See assertion above; if aWidth is zero, then we would have no
|
||||
// valid position to clamp to.
|
||||
pos = max(pos, 0);
|
||||
pos = min(pos, aWidth - 1);
|
||||
alphaSum += aInput[aWidth * y + pos];
|
||||
}
|
||||
for (int32_t x = 0; x < aWidth; x++) {
|
||||
// Check whether we are within the skip rect. If so, go
|
||||
// to the next point outside the skip rect.
|
||||
if (inSkipRectY && x >= aSkipRect.x &&
|
||||
x < aSkipRect.XMost()) {
|
||||
x = aSkipRect.XMost();
|
||||
if (x >= aWidth)
|
||||
break;
|
||||
|
||||
// Recalculate the neighbouring alpha values for
|
||||
// our new point on the surface.
|
||||
alphaSum = 0;
|
||||
for (int32_t i = 0; i < boxSize; i++) {
|
||||
int32_t pos = x + i - aLeftLobe;
|
||||
// See assertion above; if aWidth is zero, then we would have no
|
||||
// valid position to clamp to.
|
||||
pos = max(pos, 0);
|
||||
pos = min(pos, aWidth - 1);
|
||||
alphaSum += aInput[aWidth * y + pos];
|
||||
}
|
||||
}
|
||||
int32_t tmp = x - aLeftLobe;
|
||||
int32_t last = max(tmp, 0);
|
||||
int32_t next = min(tmp + boxSize, aWidth - 1);
|
||||
|
||||
aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
|
||||
|
||||
alphaSum += aInput[aWidth * y + next] -
|
||||
aInput[aWidth * y + last];
|
||||
}
|
||||
while (dst < iterEnd) {
|
||||
CENTER_ITER;
|
||||
}
|
||||
} else {
|
||||
// The filter window is larger than the row size, and we're in the area of split
|
||||
// overlap. So the leftmost and rightmost samples are both out of bounds and need
|
||||
// to be clamped. We can just precompute the difference here consequently.
|
||||
int32_t firstLastDiff = aInput[(aWidth -1) * inputStep] - aInput[0];
|
||||
while (dst < iterEnd) {
|
||||
*dst = (alphaSum * reciprocal) >> 24;
|
||||
alphaSum += firstLastDiff;
|
||||
dst += outputStep;
|
||||
}
|
||||
}
|
||||
|
||||
// Process all remaining pixels after splitRight that would sample after the row end.
|
||||
iterEnd = &aOutput[aEnd * outputStep];
|
||||
src = &aInput[(splitRight - aLeftLobe) * inputStep];
|
||||
uint8_t lastVal = aInput[(aWidth - 1) * inputStep];
|
||||
|
||||
#define RIGHT_ITER \
|
||||
*dst = (alphaSum * reciprocal) >> 24; \
|
||||
alphaSum += lastVal - *src; \
|
||||
dst += outputStep; \
|
||||
src += inputStep;
|
||||
|
||||
while (dst + 16 * outputStep <= iterEnd) {
|
||||
RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
|
||||
RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
|
||||
RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
|
||||
RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
|
||||
}
|
||||
while (dst < iterEnd) {
|
||||
RIGHT_ITER;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to BoxBlurHorizontal, except it blurs top and bottom instead of
|
||||
* left and right.
|
||||
* XXX shouldn't we pass stride in separately here?
|
||||
* Box blur involves looking at one pixel, and setting its value to the average
|
||||
* of its neighbouring pixels. This is meant to provide a 3-pass approximation of a
|
||||
* Gaussian blur.
|
||||
* @param aTranspose Whether to transpose the buffer when reading and writing to it.
|
||||
* @param aData The buffer to be blurred.
|
||||
* @param aLobes The number of pixels to blend on the left and right for each of 3 passes.
|
||||
* @param aWidth The number of columns in the buffers.
|
||||
* @param aRows The number of rows in the buffers.
|
||||
* @param aStride The stride of the buffer.
|
||||
*/
|
||||
template<bool aTranspose>
|
||||
static void
|
||||
BoxBlurVertical(unsigned char* aInput,
|
||||
unsigned char* aOutput,
|
||||
int32_t aTopLobe,
|
||||
int32_t aBottomLobe,
|
||||
int32_t aWidth,
|
||||
int32_t aRows,
|
||||
const IntRect& aSkipRect)
|
||||
BoxBlur(uint8_t* aData,
|
||||
const int32_t aLobes[3][2],
|
||||
int32_t aWidth,
|
||||
int32_t aRows,
|
||||
int32_t aStride,
|
||||
IntRect aSkipRect)
|
||||
{
|
||||
MOZ_ASSERT(aRows > 0);
|
||||
if (aTranspose) {
|
||||
swap(aWidth, aRows);
|
||||
swap(aSkipRect.x, aSkipRect.y);
|
||||
swap(aSkipRect.width, aSkipRect.height);
|
||||
}
|
||||
|
||||
int32_t boxSize = aTopLobe + aBottomLobe + 1;
|
||||
bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
|
||||
aRows <= aSkipRect.YMost();
|
||||
if (boxSize == 1) {
|
||||
memcpy(aOutput, aInput, aWidth*aRows);
|
||||
return;
|
||||
MOZ_ASSERT(aWidth > 0);
|
||||
|
||||
// All three passes of the box blur that approximate the Gaussian are done
|
||||
// on each row in turn, so we only need two temporary row buffers to process
|
||||
// each row, instead of a full-sized buffer. Data moves from the source to the
|
||||
// first temporary, from the first temporary to the second, then from the second
|
||||
// back to the destination. This way is more cache-friendly than processing whe
|
||||
// whole buffer in each pass and thus yields a nice speedup.
|
||||
uint8_t* tmpRow = new (std::nothrow) uint8_t[2 * aWidth];
|
||||
if (!tmpRow) {
|
||||
return;
|
||||
}
|
||||
uint8_t* tmpRow2 = tmpRow + aWidth;
|
||||
|
||||
const int32_t stride = aTranspose ? 1 : aStride;
|
||||
bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
|
||||
aWidth <= aSkipRect.XMost();
|
||||
|
||||
for (int32_t y = 0; y < aRows; y++) {
|
||||
// Check whether the skip rect intersects this row. If the skip
|
||||
// rect covers the whole surface in this row, we can avoid
|
||||
// this row entirely (and any others along the skip rect).
|
||||
bool inSkipRectY = y >= aSkipRect.y &&
|
||||
y < aSkipRect.YMost();
|
||||
if (inSkipRectY && skipRectCoversWholeRow) {
|
||||
aData += stride * (aSkipRect.YMost() - y);
|
||||
y = aSkipRect.YMost() - 1;
|
||||
continue;
|
||||
}
|
||||
uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
|
||||
|
||||
for (int32_t x = 0; x < aWidth; x++) {
|
||||
bool inSkipRectX = x >= aSkipRect.x &&
|
||||
x < aSkipRect.XMost();
|
||||
if (inSkipRectX && skipRectCoversWholeColumn) {
|
||||
x = aSkipRect.XMost() - 1;
|
||||
continue;
|
||||
}
|
||||
// Read in data from the source transposed if necessary.
|
||||
BoxBlurRow<aTranspose, false>(aData, tmpRow, aLobes[0][0], aLobes[0][1], aWidth, aStride, 0, aWidth);
|
||||
|
||||
uint32_t alphaSum = 0;
|
||||
for (int32_t i = 0; i < boxSize; i++) {
|
||||
int32_t pos = i - aTopLobe;
|
||||
// See assertion above; if aRows is zero, then we would have no
|
||||
// valid position to clamp to.
|
||||
pos = max(pos, 0);
|
||||
pos = min(pos, aRows - 1);
|
||||
alphaSum += aInput[aWidth * pos + x];
|
||||
}
|
||||
for (int32_t y = 0; y < aRows; y++) {
|
||||
if (inSkipRectX && y >= aSkipRect.y &&
|
||||
y < aSkipRect.YMost()) {
|
||||
y = aSkipRect.YMost();
|
||||
if (y >= aRows)
|
||||
break;
|
||||
// For the middle pass, the data is already pre-transposed and does not need to be post-transposed yet.
|
||||
BoxBlurRow<false, false>(tmpRow, tmpRow2, aLobes[1][0], aLobes[1][1], aWidth, aStride, 0, aWidth);
|
||||
|
||||
alphaSum = 0;
|
||||
for (int32_t i = 0; i < boxSize; i++) {
|
||||
int32_t pos = y + i - aTopLobe;
|
||||
// See assertion above; if aRows is zero, then we would have no
|
||||
// valid position to clamp to.
|
||||
pos = max(pos, 0);
|
||||
pos = min(pos, aRows - 1);
|
||||
alphaSum += aInput[aWidth * pos + x];
|
||||
}
|
||||
}
|
||||
int32_t tmp = y - aTopLobe;
|
||||
int32_t last = max(tmp, 0);
|
||||
int32_t next = min(tmp + boxSize, aRows - 1);
|
||||
|
||||
aOutput[aWidth * y + x] = (uint64_t(alphaSum) * reciprocal) >> 32;
|
||||
|
||||
alphaSum += aInput[aWidth * next + x] -
|
||||
aInput[aWidth * last + x];
|
||||
}
|
||||
// Write back data to the destination transposed if necessary too.
|
||||
// Make sure not to overwrite the skip rect by only outputting to the
|
||||
// destination before and after the skip rect, if requested.
|
||||
int32_t skipStart = inSkipRectY ? min(max(aSkipRect.x, 0), aWidth) : aWidth;
|
||||
int32_t skipEnd = max(skipStart, aSkipRect.XMost());
|
||||
if (skipStart > 0) {
|
||||
BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, 0, skipStart);
|
||||
}
|
||||
if (skipEnd < aWidth) {
|
||||
BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, skipEnd, aWidth);
|
||||
}
|
||||
|
||||
aData += stride;
|
||||
}
|
||||
|
||||
delete[] tmpRow;
|
||||
}
|
||||
|
||||
static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
|
||||
@ -227,8 +336,8 @@ static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
|
||||
}
|
||||
|
||||
static void
|
||||
SpreadHorizontal(unsigned char* aInput,
|
||||
unsigned char* aOutput,
|
||||
SpreadHorizontal(uint8_t* aInput,
|
||||
uint8_t* aOutput,
|
||||
int32_t aRadius,
|
||||
int32_t aWidth,
|
||||
int32_t aRows,
|
||||
@ -275,8 +384,8 @@ SpreadHorizontal(unsigned char* aInput,
|
||||
}
|
||||
|
||||
static void
|
||||
SpreadVertical(unsigned char* aInput,
|
||||
unsigned char* aOutput,
|
||||
SpreadVertical(uint8_t* aInput,
|
||||
uint8_t* aOutput,
|
||||
int32_t aRadius,
|
||||
int32_t aWidth,
|
||||
int32_t aRows,
|
||||
@ -481,7 +590,7 @@ AlphaBoxBlur::Blur(uint8_t* aData)
|
||||
if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
|
||||
// No need to use CheckedInt here - we have validated it in the constructor.
|
||||
size_t szB = stride * size.height;
|
||||
unsigned char* tmpData = new (std::nothrow) uint8_t[szB];
|
||||
uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
|
||||
|
||||
if (!tmpData) {
|
||||
return;
|
||||
@ -489,8 +598,8 @@ AlphaBoxBlur::Blur(uint8_t* aData)
|
||||
|
||||
memset(tmpData, 0, szB);
|
||||
|
||||
SpreadHorizontal(aData, tmpData, mSpreadRadius.width, GetSize().width, GetSize().height, stride, mSkipRect);
|
||||
SpreadVertical(tmpData, aData, mSpreadRadius.height, GetSize().width, GetSize().height, stride, mSkipRect);
|
||||
SpreadHorizontal(aData, tmpData, mSpreadRadius.width, size.width, size.height, stride, mSkipRect);
|
||||
SpreadVertical(tmpData, aData, mSpreadRadius.height, size.width, size.height, stride, mSkipRect);
|
||||
|
||||
delete [] tmpData;
|
||||
}
|
||||
@ -509,39 +618,12 @@ AlphaBoxBlur::Blur(uint8_t* aData)
|
||||
if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
|
||||
// Fallback to old blurring code when the surface is so large it may
|
||||
// overflow our integral image!
|
||||
|
||||
// No need to use CheckedInt here - we have validated it in the constructor.
|
||||
size_t szB = stride * size.height;
|
||||
uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
|
||||
if (!tmpData) {
|
||||
return;
|
||||
}
|
||||
|
||||
memset(tmpData, 0, szB);
|
||||
|
||||
uint8_t* a = aData;
|
||||
uint8_t* b = tmpData;
|
||||
if (mBlurRadius.width > 0) {
|
||||
BoxBlurHorizontal(a, b, horizontalLobes[0][0], horizontalLobes[0][1], stride, GetSize().height, mSkipRect);
|
||||
BoxBlurHorizontal(b, a, horizontalLobes[1][0], horizontalLobes[1][1], stride, GetSize().height, mSkipRect);
|
||||
BoxBlurHorizontal(a, b, horizontalLobes[2][0], horizontalLobes[2][1], stride, GetSize().height, mSkipRect);
|
||||
} else {
|
||||
a = tmpData;
|
||||
b = aData;
|
||||
BoxBlur<false>(aData, horizontalLobes, size.width, size.height, stride, mSkipRect);
|
||||
}
|
||||
// The result is in 'b' here.
|
||||
if (mBlurRadius.height > 0) {
|
||||
BoxBlurVertical(b, a, verticalLobes[0][0], verticalLobes[0][1], stride, GetSize().height, mSkipRect);
|
||||
BoxBlurVertical(a, b, verticalLobes[1][0], verticalLobes[1][1], stride, GetSize().height, mSkipRect);
|
||||
BoxBlurVertical(b, a, verticalLobes[2][0], verticalLobes[2][1], stride, GetSize().height, mSkipRect);
|
||||
} else {
|
||||
a = b;
|
||||
BoxBlur<true>(aData, verticalLobes, size.width, size.height, stride, mSkipRect);
|
||||
}
|
||||
// The result is in 'a' here.
|
||||
if (a == tmpData) {
|
||||
memcpy(aData, tmpData, szB);
|
||||
}
|
||||
delete [] tmpData;
|
||||
} else {
|
||||
size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
|
||||
if (integralImageStride == 0) {
|
||||
|
Loading…
Reference in New Issue
Block a user