root/src/servers/app/drawing/Painter/bitmap_painter/DrawBitmapBilinear.h
/*
 * Copyright 2009, Christian Packmann.
 * Copyright 2008, Andrej Spielmann <andrej.spielmann@seh.ox.ac.uk>.
 * Copyright 2005-2014, Stephan Aßmus <superstippi@gmx.de>.
 * Copyright 2015, Julian Harnath <julian.harnath@rwth-aachen.de>
 * All rights reserved. Distributed under the terms of the MIT License.
 */
#ifndef DRAW_BITMAP_BILINEAR_H
#define DRAW_BITMAP_BILINEAR_H

#include "Painter.h"

#include <typeinfo>


// Prototypes for assembler routines
extern "C" {
        void bilinear_scale_xloop_mmxsse(const uint8* src, void* dst,
                void* xWeights, uint32 xmin, uint32 xmax, uint32 wTop, uint32 srcBPR);
}


extern uint32 gSIMDFlags;


namespace BitmapPainterPrivate {


struct FilterInfo {
        uint16 index;   // index into source bitmap row/column
        uint16 weight;  // weight of the pixel at index [0..255]
};


struct FilterData {
        FilterInfo* fWeightsX;
        FilterInfo* fWeightsY;
        uint32 fIndexOffsetX;
        uint32 fIndexOffsetY;
};


template<class OptimizedVersion>
struct DrawBitmapBilinearOptimized {
        void Draw(PainterAggInterface& aggInterface, const BRect& destinationRect,
                agg::rendering_buffer* bitmap, const FilterData& filterData)
        {
                fSource = bitmap;
                fSourceBytesPerRow = bitmap->stride();
                fDestination = NULL;
                fDestinationBytesPerRow = aggInterface.fBuffer.stride();
                fWeightsX = filterData.fWeightsX;
                fWeightsY = filterData.fWeightsY;

                const int32 left = (int32)destinationRect.left;
                const int32 top = (int32)destinationRect.top;
                const int32 right = (int32)destinationRect.right;
                const int32 bottom = (int32)destinationRect.bottom;

                renderer_base& baseRenderer = aggInterface.fBaseRenderer;

                // iterate over clipping boxes
                baseRenderer.first_clip_box();
                do {
                        const int32 x1 = max_c(baseRenderer.xmin(), left);
                        const int32 x2 = min_c(baseRenderer.xmax(), right);
                        if (x1 > x2)
                                continue;

                        int32 y1 = max_c(baseRenderer.ymin(), top);
                        int32 y2 = min_c(baseRenderer.ymax(), bottom);
                        if (y1 > y2)
                                continue;

                        // buffer offset into destination
                        fDestination = aggInterface.fBuffer.row_ptr(y1) + x1 * 4;

                        // x and y are needed as indices into the weight arrays, so the
                        // offset into the target buffer needs to be compensated
                        const int32 xIndexL = x1 - left - filterData.fIndexOffsetX;
                        const int32 xIndexR = x2 - left - filterData.fIndexOffsetX;
                        y1 -= top + filterData.fIndexOffsetY;
                        y2 -= top + filterData.fIndexOffsetY;

                        //printf("x: %ld - %ld\n", xIndexL, xIndexR);
                        //printf("y: %ld - %ld\n", y1, y2);

                        static_cast<OptimizedVersion*>(this)->DrawToClipRect(
                                xIndexL, xIndexR, y1, y2);

                } while (baseRenderer.next_clip_box());
        }

protected:
        agg::rendering_buffer*  fSource;
        uint32                                  fSourceBytesPerRow;
        uint8*                                  fDestination;
        uint32                                  fDestinationBytesPerRow;
        FilterInfo*                             fWeightsX;
        FilterInfo*                             fWeightsY;
};


struct ColorTypeRgb {
        static void
        Interpolate(uint32* t, const uint8* s, uint32 sourceBytesPerRow,
                uint16 wLeft, uint16 wTop, uint16 wRight, uint16 wBottom)
        {
                // left and right of top row
                t[0] = (s[0] * wLeft + s[4] * wRight) * wTop;
                t[1] = (s[1] * wLeft + s[5] * wRight) * wTop;
                t[2] = (s[2] * wLeft + s[6] * wRight) * wTop;

                // left and right of bottom row
                s += sourceBytesPerRow;
                t[0] += (s[0] * wLeft + s[4] * wRight) * wBottom;
                t[1] += (s[1] * wLeft + s[5] * wRight) * wBottom;
                t[2] += (s[2] * wLeft + s[6] * wRight) * wBottom;

                t[0] >>= 16;
                t[1] >>= 16;
                t[2] >>= 16;
        }

        static void
        InterpolateLastColumn(uint32* t, const uint8* s, const uint8* sBottom,
                uint16 wTop, uint16 wBottom)
        {
                t[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
                t[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
                t[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
        }

        static void
        InterpolateLastRow(uint32* t, const uint8* s, uint16 wLeft,
                uint16 wRight)
        {
                t[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
                t[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
                t[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
        }
};


struct ColorTypeRgba {
        static void
        Interpolate(uint32* t, const uint8* s, uint32 sourceBytesPerRow,
                uint16 wLeft, uint16 wTop, uint16 wRight, uint16 wBottom)
        {
                // left and right of top row
                t[0] = (s[0] * wLeft + s[4] * wRight) * wTop;
                t[1] = (s[1] * wLeft + s[5] * wRight) * wTop;
                t[2] = (s[2] * wLeft + s[6] * wRight) * wTop;
                t[3] = (s[3] * wLeft + s[7] * wRight) * wTop;

                // left and right of bottom row
                s += sourceBytesPerRow;

                t[0] += (s[0] * wLeft + s[4] * wRight) * wBottom;
                t[1] += (s[1] * wLeft + s[5] * wRight) * wBottom;
                t[2] += (s[2] * wLeft + s[6] * wRight) * wBottom;
                t[3] += (s[3] * wLeft + s[7] * wRight) * wBottom;

                t[0] >>= 16;
                t[1] >>= 16;
                t[2] >>= 16;
                t[3] >>= 16;
        }

        static void
        InterpolateLastColumn(uint32* t, const uint8* s, const uint8* sBottom,
                uint16 wTop, uint16 wBottom)
        {
                t[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
                t[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
                t[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
                t[3] = (s[3] * wTop + sBottom[3] * wBottom) >> 8;
        }

        static void
        InterpolateLastRow(uint32* t, const uint8* s, uint16 wLeft,
                uint16 wRight)
        {
                t[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
                t[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
                t[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
                t[3] = (s[3] * wLeft + s[7] * wRight) >> 8;
        }
};


struct DrawModeCopy {
        static void
        Blend(uint8*& d, uint32* t)
        {
                d[0] = t[0];
                d[1] = t[1];
                d[2] = t[2];
                d += 4;
        }
};


struct DrawModeAlphaOverlay {
        static void
        Blend(uint8*& d, uint32* t)
        {
                uint8 t0 = t[0];
                uint8 t1 = t[1];
                uint8 t2 = t[2];
                uint8 t3 = t[3];

                if (t3 == 255) {
                        d[0] = t0;
                        d[1] = t1;
                        d[2] = t2;
                } else {
                        d[0] = ((t0 - d[0]) * t3 + (d[0] << 8)) >> 8;
                        d[1] = ((t1 - d[1]) * t3 + (d[1] << 8)) >> 8;
                        d[2] = ((t2 - d[2]) * t3 + (d[2] << 8)) >> 8;
                }

                d += 4;
        }
};


template<class ColorType, class DrawMode>
struct BilinearDefault :
        DrawBitmapBilinearOptimized<BilinearDefault<ColorType, DrawMode> > {

        void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
        {
                // In this mode we anticipate many pixels wich need filtering,
                // there are no special cases for direct hit pixels except for
                // the last column/row and the right/bottom corner pixel.

                // The last column/row handling does not need to be performed
                // for all clipping rects!
                int32 yMax = y2;
                if (this->fWeightsY[yMax].weight == 255)
                        yMax--;
                int32 xIndexMax = xIndexR;
                if (this->fWeightsX[xIndexMax].weight == 255)
                        xIndexMax--;

                for (; y1 <= yMax; y1++) {
                        // cache the weight of the top and bottom row
                        const uint16 wTop = this->fWeightsY[y1].weight;
                        const uint16 wBottom = 255 - this->fWeightsY[y1].weight;

                        // buffer offset into source (top row)
                        const uint8* src = this->fSource->row_ptr(
                                this->fWeightsY[y1].index);

                        // buffer handle for destination to be incremented per
                        // pixel
                        uint8* d = this->fDestination;

                        for (int32 x = xIndexL; x <= xIndexMax; x++) {
                                const uint8* s = src + this->fWeightsX[x].index;

                                // calculate the weighted sum of all four
                                // interpolated pixels
                                const uint16 wLeft = this->fWeightsX[x].weight;
                                const uint16 wRight = 255 - wLeft;

                                uint32 t[4];

                                if (this->fSource->height() > 1) {
                                        ColorType::Interpolate(&t[0], s, this->fSourceBytesPerRow,
                                                wLeft, wTop, wRight, wBottom);
                                } else {
                                        ColorType::InterpolateLastRow(&t[0], s,  wLeft, wRight);
                                }
                                DrawMode::Blend(d, &t[0]);
                        }
                        // last column of pixels if necessary
                        if (xIndexMax < xIndexR && this->fSource->height() > 1) {
                                const uint8* s = src + this->fWeightsX[xIndexR].index;
                                const uint8* sBottom = s + this->fSourceBytesPerRow;

                                uint32 t[4];
                                ColorType::InterpolateLastColumn(&t[0], s, sBottom, wTop,
                                        wBottom);
                                DrawMode::Blend(d, &t[0]);
                        }

                        this->fDestination += this->fDestinationBytesPerRow;
                }

                // last row of pixels if necessary
                // buffer offset into source (bottom row)
                const uint8* src
                        = this->fSource->row_ptr(this->fWeightsY[y2].index);
                // buffer handle for destination to be incremented per pixel
                uint8* d = this->fDestination;

                if (yMax < y2) {
                        for (int32 x = xIndexL; x <= xIndexMax; x++) {
                                const uint8* s = src + this->fWeightsX[x].index;
                                const uint16 wLeft = this->fWeightsX[x].weight;
                                const uint16 wRight = 255 - wLeft;
                                uint32 t[4];
                                ColorType::InterpolateLastRow(&t[0], s, wLeft, wRight);
                                DrawMode::Blend(d, &t[0]);
                        }
                }

                // pixel in bottom right corner if necessary
                if (yMax < y2 && xIndexMax < xIndexR) {
                        const uint8* s = src + this->fWeightsX[xIndexR].index;
                        *(uint32*)d = *(uint32*)s;
                }
        }
};


struct BilinearLowFilterRatio :
        DrawBitmapBilinearOptimized<BilinearLowFilterRatio> {
        void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
        {
                // In this mode, we anticipate to hit many destination pixels
                // that map directly to a source pixel, we have more branches
                // in the inner loop but save time because of the special
                // cases. If there are too few direct hit pixels, the branches
                // only waste time.

                for (; y1 <= y2; y1++) {
                        // cache the weight of the top and bottom row
                        const uint16 wTop = fWeightsY[y1].weight;
                        const uint16 wBottom = 255 - fWeightsY[y1].weight;

                        // buffer offset into source (top row)
                        const uint8* src = fSource->row_ptr(fWeightsY[y1].index);
                        // buffer handle for destination to be incremented per
                        // pixel
                        uint8* d = fDestination;

                        if (wTop == 255) {
                                for (int32 x = xIndexL; x <= xIndexR; x++) {
                                        const uint8* s = src + fWeightsX[x].index;
                                        // This case is important to prevent out
                                        // of bounds access at bottom edge of the source
                                        // bitmap. If the scale is low and integer, it will
                                        // also help the speed.
                                        if (fWeightsX[x].weight == 255) {
                                                // As above, but to prevent out of bounds
                                                // on the right edge.
                                                *(uint32*)d = *(uint32*)s;
                                        } else {
                                                // Only the left and right pixels are
                                                // interpolated, since the top row has 100%
                                                // weight.
                                                const uint16 wLeft = fWeightsX[x].weight;
                                                const uint16 wRight = 255 - wLeft;
                                                d[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
                                                d[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
                                                d[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
                                        }
                                        d += 4;
                                }
                        } else {
                                for (int32 x = xIndexL; x <= xIndexR; x++) {
                                        const uint8* s = src + fWeightsX[x].index;
                                        if (fWeightsX[x].weight == 255) {
                                                // Prevent out of bounds access on the right
                                                // edge or simply speed up.
                                                const uint8* sBottom = s + fSourceBytesPerRow;
                                                d[0] = (s[0] * wTop + sBottom[0] * wBottom)
                                                        >> 8;
                                                d[1] = (s[1] * wTop + sBottom[1] * wBottom)
                                                        >> 8;
                                                d[2] = (s[2] * wTop + sBottom[2] * wBottom)
                                                        >> 8;
                                        } else {
                                                // calculate the weighted sum of all four
                                                // interpolated pixels
                                                const uint16 wLeft = fWeightsX[x].weight;
                                                const uint16 wRight = 255 - wLeft;
                                                // left and right of top row
                                                uint32 t0 = (s[0] * wLeft + s[4] * wRight)
                                                        * wTop;
                                                uint32 t1 = (s[1] * wLeft + s[5] * wRight)
                                                        * wTop;
                                                uint32 t2 = (s[2] * wLeft + s[6] * wRight)
                                                        * wTop;

                                                // left and right of bottom row
                                                s += fSourceBytesPerRow;
                                                t0 += (s[0] * wLeft + s[4] * wRight) * wBottom;
                                                t1 += (s[1] * wLeft + s[5] * wRight) * wBottom;
                                                t2 += (s[2] * wLeft + s[6] * wRight) * wBottom;

                                                d[0] = t0 >> 16;
                                                d[1] = t1 >> 16;
                                                d[2] = t2 >> 16;
                                        }
                                        d += 4;
                                }
                        }
                        fDestination += fDestinationBytesPerRow;
                }
        }
};


#ifdef __i386__

struct BilinearSimd : DrawBitmapBilinearOptimized<BilinearSimd> {
        void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
        {
                // Basically the same as the "standard" mode, but we use SIMD
                // routines for the processing of the single display lines.

                // The last column/row handling does not need to be performed
                // for all clipping rects!
                int32 yMax = y2;
                if (fWeightsY[yMax].weight == 255)
                        yMax--;
                int32 xIndexMax = xIndexR;
                if (fWeightsX[xIndexMax].weight == 255)
                        xIndexMax--;

                for (; y1 <= yMax; y1++) {
                        // cache the weight of the top and bottom row
                        const uint16 wTop = fWeightsY[y1].weight;
                        const uint16 wBottom = 255 - fWeightsY[y1].weight;

                        // buffer offset into source (top row)
                        const uint8* src = fSource->row_ptr(fWeightsY[y1].index);
                        // buffer handle for destination to be incremented per
                        // pixel
                        uint8* d = fDestination;
                        bilinear_scale_xloop_mmxsse(src, fDestination, fWeightsX, xIndexL,
                                xIndexMax, wTop, fSourceBytesPerRow);
                        // increase pointer by processed pixels
                        d += (xIndexMax - xIndexL + 1) * 4;

                        // last column of pixels if necessary
                        if (xIndexMax < xIndexR) {
                                const uint8* s = src + fWeightsX[xIndexR].index;
                                const uint8* sBottom = s + fSourceBytesPerRow;
                                d[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
                                d[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
                                d[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
                        }

                        fDestination += fDestinationBytesPerRow;
                }

                // last row of pixels if necessary
                // buffer offset into source (bottom row)
                const uint8* src = fSource->row_ptr(fWeightsY[y2].index);
                // buffer handle for destination to be incremented per pixel
                uint8* d = fDestination;

                if (yMax < y2) {
                        for (int32 x = xIndexL; x <= xIndexMax; x++) {
                                const uint8* s = src + fWeightsX[x].index;
                                const uint16 wLeft = fWeightsX[x].weight;
                                const uint16 wRight = 255 - wLeft;
                                d[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
                                d[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
                                d[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
                                d += 4;
                        }
                }

                // pixel in bottom right corner if necessary
                if (yMax < y2 && xIndexMax < xIndexR) {
                        const uint8* s = src + fWeightsX[xIndexR].index;
                        *(uint32*)d = *(uint32*)s;
                }
        }
};

#endif  // __i386__


template<class ColorType, class DrawMode>
struct DrawBitmapBilinear {
        void
        Draw(const Painter* painter, PainterAggInterface& aggInterface,
                agg::rendering_buffer& bitmap, BPoint offset,
                double scaleX, double scaleY, BRect destinationRect)
        {
                //bigtime_t now = system_time();
                uint32 dstWidth = destinationRect.IntegerWidth() + 1;
                uint32 dstHeight = destinationRect.IntegerHeight() + 1;
                uint32 srcWidth = bitmap.width();
                uint32 srcHeight = bitmap.height();

                // Do not calculate more filter weights than necessary and also
                // keep the stack based allocations reasonably sized
                const BRegion& clippingRegion = *painter->ClippingRegion();
                if (clippingRegion.Frame().IntegerWidth() + 1 < (int32)dstWidth)
                        dstWidth = clippingRegion.Frame().IntegerWidth() + 1;
                if (clippingRegion.Frame().IntegerHeight() + 1 < (int32)dstHeight)
                        dstHeight = clippingRegion.Frame().IntegerHeight() + 1;

                // When calculating less filter weights than specified by
                // destinationRect, we need to compensate the offset.
                FilterData filterData;
                filterData.fIndexOffsetX = 0;
                filterData.fIndexOffsetY = 0;
                if (clippingRegion.Frame().left > destinationRect.left) {
                        filterData.fIndexOffsetX = (int32)(clippingRegion.Frame().left
                                - destinationRect.left);
                }
                if (clippingRegion.Frame().top > destinationRect.top) {
                        filterData.fIndexOffsetY = (int32)(clippingRegion.Frame().top
                                - destinationRect.top);
                }

//#define FILTER_INFOS_ON_HEAP
#ifdef FILTER_INFOS_ON_HEAP
                filterData.fWeightsX = new (nothrow) FilterInfo[dstWidth];
                filterData.fWeightsY = new (nothrow) FilterInfo[dstHeight];
                if (filterData.fWeightsX == NULL || filterData.fWeightsY == NULL) {
                        delete[] filterData.fWeightsX;
                        delete[] filterData.fWeightsY;
                        return;
                }
#else
                // stack based saves about 200µs on 1.85 GHz Core 2 Duo
                // should not pose a problem with stack overflows
                // (needs around 12Kb for 1920x1200)
                FilterInfo xWeights[dstWidth];
                FilterInfo yWeights[dstHeight];
                filterData.fWeightsX = &xWeights[0];
                filterData.fWeightsY = &yWeights[0];
#endif

                // Extract the cropping information for the source bitmap,
                // If only a part of the source bitmap is to be drawn with scale,
                // the offset will be different from the destinationRect left top
                // corner.
                const int32 xBitmapShift = (int32)(destinationRect.left - offset.x);
                const int32 yBitmapShift = (int32)(destinationRect.top - offset.y);

                for (uint32 i = 0; i < dstWidth; i++) {
                        // fractional index into source
                        // NOTE: It is very important to calculate the fractional index
                        // into the source pixel grid like this to prevent out of bounds
                        // access! It will result in the rightmost pixel of the destination
                        // to access the rightmost pixel of the source with a weighting
                        // of 255. This in turn will trigger an optimization in the loop
                        // that also prevents out of bounds access.
                        float index = (i + filterData.fIndexOffsetX) * (srcWidth - 1)
                                / (srcWidth * scaleX - 1);
                        // round down to get the left pixel
                        filterData.fWeightsX[i].index = (uint16)index;
                        filterData.fWeightsX[i].weight =
                                255 - (uint16)((index - filterData.fWeightsX[i].index) * 255);
                        // handle cropped source bitmap
                        filterData.fWeightsX[i].index += xBitmapShift;
                        // precompute index for 32 bit pixels
                        filterData.fWeightsX[i].index *= 4;
                }

                for (uint32 i = 0; i < dstHeight; i++) {
                        // fractional index into source
                        // NOTE: It is very important to calculate the fractional index
                        // into the source pixel grid like this to prevent out of bounds
                        // access! It will result in the bottommost pixel of the
                        // destination to access the bottommost pixel of the source with a
                        // weighting of 255. This in turn will trigger an optimization in
                        // the loop that also prevents out of bounds access.
                        float index = (i + filterData.fIndexOffsetY) * (srcHeight - 1)
                                / (srcHeight * scaleY - 1);
                        // round down to get the top pixel
                        filterData.fWeightsY[i].index = (uint16)index;
                        filterData.fWeightsY[i].weight =
                                255 - (uint16)((index - filterData.fWeightsY[i].index) * 255);
                        // handle cropped source bitmap
                        filterData.fWeightsY[i].index += yBitmapShift;
                }
                //printf("X: %d/%d ... %d/%d, %d/%d (%ld)\n",
                //      xWeights[0].index, xWeights[0].weight,
                //      xWeights[dstWidth - 2].index, xWeights[dstWidth - 2].weight,
                //      xWeights[dstWidth - 1].index, xWeights[dstWidth - 1].weight,
                //      dstWidth);
                //printf("Y: %d/%d ... %d/%d, %d/%d (%ld)\n",
                //      yWeights[0].index, yWeights[0].weight,
                //      yWeights[dstHeight - 2].index, yWeights[dstHeight - 2].weight,
                //      yWeights[dstHeight - 1].index, yWeights[dstHeight - 1].weight,
                //      dstHeight);

                // Figure out which version of the code we want to use...
                enum {
                        kOptimizeForLowFilterRatio = 0,
                        kUseDefaultVersion,
                        kUseSIMDVersion
                };

                int codeSelect = kUseDefaultVersion;

                if (typeid(ColorType) == typeid(ColorTypeRgb)
                        && typeid(DrawMode) == typeid(DrawModeCopy)) {
                        uint32 neededSIMDFlags = APPSERVER_SIMD_MMX | APPSERVER_SIMD_SSE;
                        if ((gSIMDFlags & neededSIMDFlags) == neededSIMDFlags)
                                codeSelect = kUseSIMDVersion;
                        else {
                                if (scaleX == scaleY && (scaleX == 1.5 || scaleX == 2.0
                                        || scaleX == 2.5 || scaleX == 3.0)) {
                                        codeSelect = kOptimizeForLowFilterRatio;
                                }
                        }
                }

                switch (codeSelect) {
                        case kUseDefaultVersion:
                        {
                                BilinearDefault<ColorType, DrawMode> bilinearPainter;
                                bilinearPainter.Draw(aggInterface, destinationRect, &bitmap,
                                        filterData);
                                break;
                        }

                        case kOptimizeForLowFilterRatio:
                        {
                                BilinearLowFilterRatio bilinearPainter;
                                bilinearPainter.Draw(aggInterface, destinationRect,
                                        &bitmap, filterData);
                                break;
                        }

#ifdef __i386__
                        case kUseSIMDVersion:
                        {
                                BilinearSimd bilinearPainter;
                                bilinearPainter.Draw(aggInterface, destinationRect, &bitmap,
                                        filterData);
                                break;
                        }
#endif  // __i386__
                }

#ifdef FILTER_INFOS_ON_HEAP
                delete[] filterData.fWeightsX;
                delete[] filterData.fWeightsY;
#endif
                //printf("draw bitmap %.5fx%.5f: %lld\n", scaleX, scaleY,
                //      system_time() - now);
        }
};


} // namespace BitmapPainterPrivate


#endif // DRAW_BITMAP_BILINEAR_H