tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pixman-inlines.h (49829B)


      1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
      2 /*
      3 * Copyright © 2000 SuSE, Inc.
      4 * Copyright © 2007 Red Hat, Inc.
      5 *
      6 * Permission to use, copy, modify, distribute, and sell this software and its
      7 * documentation for any purpose is hereby granted without fee, provided that
      8 * the above copyright notice appear in all copies and that both that
      9 * copyright notice and this permission notice appear in supporting
     10 * documentation, and that the name of SuSE not be used in advertising or
     11 * publicity pertaining to distribution of the software without specific,
     12 * written prior permission.  SuSE makes no representations about the
     13 * suitability of this software for any purpose.  It is provided "as is"
     14 * without express or implied warranty.
     15 *
     16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
     17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
     18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     22 *
     23 * Author:  Keith Packard, SuSE, Inc.
     24 */
     25 
     26 #ifndef PIXMAN_FAST_PATH_H__
     27 #define PIXMAN_FAST_PATH_H__
     28 
     29 #include "pixman-private.h"
     30 
     31 #define PIXMAN_REPEAT_COVER -1
     32 
     33 /* Flags describing input parameters to fast path macro template.
     34 * Turning on some flag values may indicate that
     35 * "some property X is available so template can use this" or
     36 * "some property X should be handled by template".
     37 *
     38 * FLAG_HAVE_SOLID_MASK
     39 *  Input mask is solid so template should handle this.
     40 *
     41 * FLAG_HAVE_NON_SOLID_MASK
     42 *  Input mask is bits mask so template should handle this.
     43 *
     44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
     45 * exclusive. (It's not allowed to turn both flags on)
     46 */
     47 #define FLAG_NONE				(0)
     48 #define FLAG_HAVE_SOLID_MASK			(1 <<   1)
     49 #define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
     50 
     51 /* To avoid too short repeated scanline function calls, extend source
     52 * scanlines having width less than below constant value.
     53 */
     54 #define REPEAT_NORMAL_MIN_WIDTH			64
     55 
     56 static force_inline pixman_bool_t
     57 repeat (pixman_repeat_t repeat, int *c, int size)
     58 {
     59    if (repeat == PIXMAN_REPEAT_NONE)
     60    {
     61 if (*c < 0 || *c >= size)
     62     return FALSE;
     63    }
     64    else if (repeat == PIXMAN_REPEAT_NORMAL)
     65    {
     66 while (*c >= size)
     67     *c -= size;
     68 while (*c < 0)
     69     *c += size;
     70    }
     71    else if (repeat == PIXMAN_REPEAT_PAD)
     72    {
     73 *c = CLIP (*c, 0, size - 1);
     74    }
     75    else /* REFLECT */
     76    {
     77 *c = MOD (*c, size * 2);
     78 if (*c >= size)
     79     *c = size * 2 - *c - 1;
     80    }
     81    return TRUE;
     82 }
     83 
     84 static force_inline int
     85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
     86 {
     87    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
     88    ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
     89 }
     90 
     91 #if BILINEAR_INTERPOLATION_BITS <= 4
     92 /* Inspired by Filter_32_opaque from Skia */
     93 static force_inline uint32_t
     94 bilinear_interpolation (uint32_t tl, uint32_t tr,
     95 		uint32_t bl, uint32_t br,
     96 		int distx, int disty)
     97 {
     98    int distxy, distxiy, distixy, distixiy;
     99    uint32_t lo, hi;
    100 
    101    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
    102    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
    103 
    104    distxy = distx * disty;
    105    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
    106    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
    107    distixiy =
    108 16 * 16 - (disty << 4) -
    109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
    110 
    111    lo = (tl & 0xff00ff) * distixiy;
    112    hi = ((tl >> 8) & 0xff00ff) * distixiy;
    113 
    114    lo += (tr & 0xff00ff) * distxiy;
    115    hi += ((tr >> 8) & 0xff00ff) * distxiy;
    116 
    117    lo += (bl & 0xff00ff) * distixy;
    118    hi += ((bl >> 8) & 0xff00ff) * distixy;
    119 
    120    lo += (br & 0xff00ff) * distxy;
    121    hi += ((br >> 8) & 0xff00ff) * distxy;
    122 
    123    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
    124 }
    125 
    126 #else
    127 #if SIZEOF_LONG > 4
    128 
    129 static force_inline uint32_t
    130 bilinear_interpolation (uint32_t tl, uint32_t tr,
    131 		uint32_t bl, uint32_t br,
    132 		int distx, int disty)
    133 {
    134    uint64_t distxy, distxiy, distixy, distixiy;
    135    uint64_t tl64, tr64, bl64, br64;
    136    uint64_t f, r;
    137 
    138    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
    139    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
    140 
    141    distxy = distx * disty;
    142    distxiy = distx * (256 - disty);
    143    distixy = (256 - distx) * disty;
    144    distixiy = (256 - distx) * (256 - disty);
    145 
    146    /* Alpha and Blue */
    147    tl64 = tl & 0xff0000ff;
    148    tr64 = tr & 0xff0000ff;
    149    bl64 = bl & 0xff0000ff;
    150    br64 = br & 0xff0000ff;
    151 
    152    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
    153    r = f & 0x0000ff0000ff0000ull;
    154 
    155    /* Red and Green */
    156    tl64 = tl;
    157    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
    158 
    159    tr64 = tr;
    160    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
    161 
    162    bl64 = bl;
    163    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
    164 
    165    br64 = br;
    166    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
    167 
    168    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
    169    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
    170 
    171    return (uint32_t)(r >> 16);
    172 }
    173 
    174 #else
    175 
    176 static force_inline uint32_t
    177 bilinear_interpolation (uint32_t tl, uint32_t tr,
    178 		uint32_t bl, uint32_t br,
    179 		int distx, int disty)
    180 {
    181    int distxy, distxiy, distixy, distixiy;
    182    uint32_t f, r;
    183 
    184    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
    185    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
    186 
    187    distxy = distx * disty;
    188    distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
    189    distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
    190    distixiy =
    191 256 * 256 - (disty << 8) -
    192 (distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
    193 
    194    /* Blue */
    195    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
    196      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
    197 
    198    /* Green */
    199    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
    200      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
    201    r |= f & 0xff000000;
    202 
    203    tl >>= 16;
    204    tr >>= 16;
    205    bl >>= 16;
    206    br >>= 16;
    207    r >>= 16;
    208 
    209    /* Red */
    210    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
    211      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
    212    r |= f & 0x00ff0000;
    213 
    214    /* Alpha */
    215    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
    216      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
    217    r |= f & 0xff000000;
    218 
    219    return r;
    220 }
    221 
    222 #endif
    223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
    224 
    225 static force_inline argb_t
    226 bilinear_interpolation_float (argb_t tl, argb_t tr,
    227 		      argb_t bl, argb_t br,
    228 		      float distx, float disty)
    229 {
    230    float distxy, distxiy, distixy, distixiy;
    231    argb_t r;
    232 
    233    distxy = distx * disty;
    234    distxiy = distx * (1.f - disty);
    235    distixy = (1.f - distx) * disty;
    236    distixiy = (1.f - distx) * (1.f - disty);
    237 
    238    r.a = tl.a * distixiy + tr.a * distxiy +
    239          bl.a * distixy  + br.a * distxy;
    240    r.r = tl.r * distixiy + tr.r * distxiy +
    241          bl.r * distixy  + br.r * distxy;
    242    r.g = tl.g * distixiy + tr.g * distxiy +
    243          bl.g * distixy  + br.g * distxy;
    244    r.b = tl.b * distixiy + tr.b * distxiy +
    245          bl.b * distixy  + br.b * distxy;
    246 
    247    return r;
    248 }
    249 
    250 /*
    251 * For each scanline fetched from source image with PAD repeat:
    252 * - calculate how many pixels need to be padded on the left side
    253 * - calculate how many pixels need to be padded on the right side
    254 * - update width to only count pixels which are fetched from the image
    255 * All this information is returned via 'width', 'left_pad', 'right_pad'
    256 * arguments. The code is assuming that 'unit_x' is positive.
    257 *
    258 * Note: 64-bit math is used in order to avoid potential overflows, which
    259 *       is probably excessive in many cases. This particular function
    260 *       may need its own correctness test and performance tuning.
    261 */
    262 static force_inline void
    263 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
    264 			pixman_fixed_t  vx,
    265 			pixman_fixed_t  unit_x,
    266 			int32_t *       width,
    267 			int32_t *       left_pad,
    268 			int32_t *       right_pad)
    269 {
    270    int64_t max_vx = (int64_t) source_image_width << 16;
    271    int64_t tmp;
    272    if (vx < 0)
    273    {
    274 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
    275 if (tmp > *width)
    276 {
    277     *left_pad = *width;
    278     *width = 0;
    279 }
    280 else
    281 {
    282     *left_pad = (int32_t) tmp;
    283     *width -= (int32_t) tmp;
    284 }
    285    }
    286    else
    287    {
    288 *left_pad = 0;
    289    }
    290    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
    291    if (tmp < 0)
    292    {
    293 *right_pad = *width;
    294 *width = 0;
    295    }
    296    else if (tmp >= *width)
    297    {
    298 *right_pad = 0;
    299    }
    300    else
    301    {
    302 *right_pad = *width - (int32_t) tmp;
    303 *width = (int32_t) tmp;
    304    }
    305 }
    306 
    307 /* A macroified version of specialized nearest scalers for some
    308 * common 8888 and 565 formats. It supports SRC and OVER ops.
    309 *
    310 * There are two repeat versions, one that handles repeat normal,
    311 * and one without repeat handling that only works if the src region
    312 * used is completely covered by the pre-repeated source samples.
    313 *
    314 * The loops are unrolled to process two pixels per iteration for better
    315 * performance on most CPU architectures (superscalar processors
    316 * can issue several operations simultaneously, other processors can hide
    317 * instructions latencies by pipelining operations). Unrolling more
    318 * does not make much sense because the compiler will start running out
    319 * of spare registers soon.
    320 */
    321 
    322 #define GET_8888_ALPHA(s) ((s) >> 24)
    323 /* This is not actually used since we don't have an OVER with
    324    565 source, but it is needed to build. */
    325 #define GET_0565_ALPHA(s) 0xff
    326 #define GET_x888_ALPHA(s) 0xff
    327 
    328 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
    329 		      src_type_t, dst_type_t, OP, repeat_mode)				\
    330 static force_inline void									\
    331 scanline_func_name (dst_type_t       *dst,							\
    332 	    const src_type_t *src,							\
    333 	    int32_t           w,							\
    334 	    pixman_fixed_t    vx,							\
    335 	    pixman_fixed_t    unit_x,							\
    336 	    pixman_fixed_t    src_width_fixed,						\
    337 	    pixman_bool_t     fully_transparent_src)					\
    338 {												\
    339 uint32_t   d;										\
    340 src_type_t s1, s2;									\
    341 uint8_t    a1, a2;									\
    342 int        x1, x2;									\
    343 											\
    344 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
    345     return;										\
    346 											\
    347 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
    348     abort();										\
    349 											\
    350 while ((w -= 2) >= 0)									\
    351 {											\
    352     x1 = pixman_fixed_to_int (vx);							\
    353     vx += unit_x;									\
    354     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
    355     {											\
    356 	/* This works because we know that unit_x is positive */			\
    357 	while (vx >= 0)									\
    358 	    vx -= src_width_fixed;							\
    359     }											\
    360     s1 = *(src + x1);									\
    361 											\
    362     x2 = pixman_fixed_to_int (vx);							\
    363     vx += unit_x;									\
    364     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
    365     {											\
    366 	/* This works because we know that unit_x is positive */			\
    367 	while (vx >= 0)									\
    368 	    vx -= src_width_fixed;							\
    369     }											\
    370     s2 = *(src + x2);									\
    371 											\
    372     if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
    373     {											\
    374 	a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
    375 	a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
    376 											\
    377 	if (a1 == 0xff)									\
    378 	{										\
    379 	    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
    380 	}										\
    381 	else if (s1)									\
    382 	{										\
    383 	    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
    384 	    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
    385 	    a1 ^= 0xff;									\
    386 	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
    387 	    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
    388 	}										\
    389 	dst++;										\
    390 											\
    391 	if (a2 == 0xff)									\
    392 	{										\
    393 	    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
    394 	}										\
    395 	else if (s2)									\
    396 	{										\
    397 	    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
    398 	    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
    399 	    a2 ^= 0xff;									\
    400 	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
    401 	    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
    402 	}										\
    403 	dst++;										\
    404     }											\
    405     else /* PIXMAN_OP_SRC */								\
    406     {											\
    407 	*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
    408 	*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
    409     }											\
    410 }											\
    411 											\
    412 if (w & 1)										\
    413 {											\
    414     x1 = pixman_fixed_to_int (vx);							\
    415     s1 = *(src + x1);									\
    416 											\
    417     if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
    418     {											\
    419 	a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
    420 											\
    421 	if (a1 == 0xff)									\
    422 	{										\
    423 	    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
    424 	}										\
    425 	else if (s1)									\
    426 	{										\
    427 	    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
    428 	    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
    429 	    a1 ^= 0xff;									\
    430 	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
    431 	    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
    432 	}										\
    433 	dst++;										\
    434     }											\
    435     else /* PIXMAN_OP_SRC */								\
    436     {											\
    437 	*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
    438     }											\
    439 }											\
    440 }
    441 
    442 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
    443 			  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
    444 static void											\
    445 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
    446 					   pixman_composite_info_t *info)               \
    447 {												\
    448    PIXMAN_COMPOSITE_ARGS (info);					                        \
    449    dst_type_t *dst_line;						                        \
    450    mask_type_t *mask_line;									\
    451    src_type_t *src_first_line;									\
    452    int       y;										\
    453    pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
    454    pixman_fixed_t max_vy;									\
    455    pixman_vector_t v;										\
    456    pixman_fixed_t vx, vy;									\
    457    pixman_fixed_t unit_x, unit_y;								\
    458    int32_t left_pad, right_pad;								\
    459 											\
    460    src_type_t *src;										\
    461    dst_type_t *dst;										\
    462    mask_type_t solid_mask;									\
    463    const mask_type_t *mask = &solid_mask;							\
    464    int src_stride, mask_stride, dst_stride;							\
    465 											\
    466    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
    467    if (have_mask)										\
    468    {												\
    469 if (mask_is_solid)									\
    470     solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
    471 else											\
    472     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
    473 			   mask_stride, mask_line, 1);					\
    474    }												\
    475    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
    476     * transformed from destination space to source space */					\
    477    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
    478 											\
    479    /* reference point is the center of the pixel */						\
    480    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
    481    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
    482    v.vector[2] = pixman_fixed_1;								\
    483 											\
    484    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
    485 return;											\
    486 											\
    487    unit_x = src_image->common.transform->matrix[0][0];						\
    488    unit_y = src_image->common.transform->matrix[1][1];						\
    489 											\
    490    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
    491    v.vector[0] -= pixman_fixed_e;								\
    492    v.vector[1] -= pixman_fixed_e;								\
    493 											\
    494    vx = v.vector[0];										\
    495    vy = v.vector[1];										\
    496 											\
    497    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
    498    {												\
    499 max_vy = pixman_int_to_fixed (src_image->bits.height);					\
    500 											\
    501 /* Clamp repeating positions inside the actual samples */				\
    502 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
    503 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
    504    }												\
    505 											\
    506    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
    507 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
    508    {												\
    509 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
    510 				&width, &left_pad, &right_pad);				\
    511 vx += left_pad * unit_x;								\
    512    }												\
    513 											\
    514    while (--height >= 0)									\
    515    {												\
    516 dst = dst_line;										\
    517 dst_line += dst_stride;									\
    518 if (have_mask && !mask_is_solid)							\
    519 {											\
    520     mask = mask_line;									\
    521     mask_line += mask_stride;								\
    522 }											\
    523 											\
    524 y = pixman_fixed_to_int (vy);								\
    525 vy += unit_y;										\
    526 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
    527     repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
    528 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
    529 {											\
    530     repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
    531     src = src_first_line + src_stride * y;						\
    532     if (left_pad > 0)									\
    533     {											\
    534 	scanline_func (mask, dst,							\
    535 		       src + src_image->bits.width - src_image->bits.width + 1,		\
    536 		       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
    537     }											\
    538     if (width > 0)									\
    539     {											\
    540 	scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
    541 		       dst + left_pad, src + src_image->bits.width, width,		\
    542 		       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
    543     }											\
    544     if (right_pad > 0)									\
    545     {											\
    546 	scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
    547 		       dst + left_pad + width, src + src_image->bits.width,		\
    548 		       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
    549     }											\
    550 }											\
    551 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
    552 {											\
    553     static const src_type_t zero[1] = { 0 };						\
    554     if (y < 0 || y >= src_image->bits.height)						\
    555     {											\
    556 	scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
    557 		       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
    558 	continue;									\
    559     }											\
    560     src = src_first_line + src_stride * y;						\
    561     if (left_pad > 0)									\
    562     {											\
    563 	scanline_func (mask, dst, zero + 1, left_pad,					\
    564 		       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
    565     }											\
    566     if (width > 0)									\
    567     {											\
    568 	scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
    569 		       dst + left_pad, src + src_image->bits.width, width,		\
    570 		       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
    571     }											\
    572     if (right_pad > 0)									\
    573     {											\
    574 	scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
    575 		       dst + left_pad + width, zero + 1, right_pad,			\
    576 		       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
    577     }											\
    578 }											\
    579 else											\
    580 {											\
    581     src = src_first_line + src_stride * y;						\
    582     scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
    583 		   unit_x, src_width_fixed, FALSE);					\
    584 }											\
    585    }												\
    586 }
    587 
    588 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
    589 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
    590 			  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
    591 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
    592 			  dst_type_t, repeat_mode, have_mask, mask_is_solid)
    593 
    594 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
    595 		      repeat_mode)							\
    596    static force_inline void									\
    597    scanline_func##scale_func_name##_wrapper (							\
    598 	    const uint8_t    *mask,							\
    599 	    dst_type_t       *dst,							\
    600 	    const src_type_t *src,							\
    601 	    int32_t          w,								\
    602 	    pixman_fixed_t   vx,							\
    603 	    pixman_fixed_t   unit_x,							\
    604 	    pixman_fixed_t   max_vx,							\
    605 	    pixman_bool_t    fully_transparent_src)					\
    606    {												\
    607 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
    608    }												\
    609    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
    610 		       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
    611 
    612 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
    613 		      repeat_mode)							\
    614 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
    615 		      dst_type_t, repeat_mode)
    616 
    617 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
    618 	     src_type_t, dst_type_t, OP, repeat_mode)				\
    619    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
    620 		  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
    621 		  OP, repeat_mode)						\
    622    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
    623 		  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
    624 		  src_type_t, dst_type_t, repeat_mode)
    625 
    626 
    627 #define SCALED_NEAREST_FLAGS						\
    628    (FAST_PATH_SCALE_TRANSFORM	|					\
    629     FAST_PATH_NO_ALPHA_MAP	|					\
    630     FAST_PATH_NEAREST_FILTER	|					\
    631     FAST_PATH_NO_ACCESSORS	|					\
    632     FAST_PATH_NARROW_FORMAT)
    633 
    634 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
    635    {   PIXMAN_OP_ ## op,						\
    636 PIXMAN_ ## s,							\
    637 (SCALED_NEAREST_FLAGS		|				\
    638  FAST_PATH_NORMAL_REPEAT	|				\
    639  FAST_PATH_X_UNIT_POSITIVE),					\
    640 PIXMAN_null, 0,							\
    641 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    642 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
    643    }
    644 
    645 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
    646    {   PIXMAN_OP_ ## op,						\
    647 PIXMAN_ ## s,							\
    648 (SCALED_NEAREST_FLAGS		|				\
    649  FAST_PATH_PAD_REPEAT		|				\
    650  FAST_PATH_X_UNIT_POSITIVE),					\
    651 PIXMAN_null, 0,							\
    652 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    653 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
    654    }
    655 
    656 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
    657    {   PIXMAN_OP_ ## op,						\
    658 PIXMAN_ ## s,							\
    659 (SCALED_NEAREST_FLAGS		|				\
    660  FAST_PATH_NONE_REPEAT		|				\
    661  FAST_PATH_X_UNIT_POSITIVE),					\
    662 PIXMAN_null, 0,							\
    663 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    664 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
    665    }
    666 
    667 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
    668    {   PIXMAN_OP_ ## op,						\
    669 PIXMAN_ ## s,							\
    670 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
    671 PIXMAN_null, 0,							\
    672 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    673 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
    674    }
    675 
    676 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
    677    {   PIXMAN_OP_ ## op,						\
    678 PIXMAN_ ## s,							\
    679 (SCALED_NEAREST_FLAGS		|				\
    680  FAST_PATH_NORMAL_REPEAT	|				\
    681  FAST_PATH_X_UNIT_POSITIVE),					\
    682 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
    683 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    684 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
    685    }
    686 
    687 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
    688    {   PIXMAN_OP_ ## op,						\
    689 PIXMAN_ ## s,							\
    690 (SCALED_NEAREST_FLAGS		|				\
    691  FAST_PATH_PAD_REPEAT		|				\
    692  FAST_PATH_X_UNIT_POSITIVE),					\
    693 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
    694 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    695 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
    696    }
    697 
    698 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
    699    {   PIXMAN_OP_ ## op,						\
    700 PIXMAN_ ## s,							\
    701 (SCALED_NEAREST_FLAGS		|				\
    702  FAST_PATH_NONE_REPEAT		|				\
    703  FAST_PATH_X_UNIT_POSITIVE),					\
    704 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
    705 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    706 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
    707    }
    708 
    709 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
    710    {   PIXMAN_OP_ ## op,						\
    711 PIXMAN_ ## s,							\
    712 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
    713 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
    714 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    715 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
    716    }
    717 
    718 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
    719    {   PIXMAN_OP_ ## op,						\
    720 PIXMAN_ ## s,							\
    721 (SCALED_NEAREST_FLAGS		|				\
    722  FAST_PATH_NORMAL_REPEAT	|				\
    723  FAST_PATH_X_UNIT_POSITIVE),					\
    724 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
    725 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    726 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
    727    }
    728 
    729 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
    730    {   PIXMAN_OP_ ## op,						\
    731 PIXMAN_ ## s,							\
    732 (SCALED_NEAREST_FLAGS		|				\
    733  FAST_PATH_PAD_REPEAT		|				\
    734  FAST_PATH_X_UNIT_POSITIVE),					\
    735 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
    736 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    737 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
    738    }
    739 
    740 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
    741    {   PIXMAN_OP_ ## op,						\
    742 PIXMAN_ ## s,							\
    743 (SCALED_NEAREST_FLAGS		|				\
    744  FAST_PATH_NONE_REPEAT		|				\
    745  FAST_PATH_X_UNIT_POSITIVE),					\
    746 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
    747 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    748 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
    749    }
    750 
    751 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
    752    {   PIXMAN_OP_ ## op,						\
    753 PIXMAN_ ## s,							\
    754 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
    755 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
    756 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
    757 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
    758    }
    759 
    760 /* Prefer the use of 'cover' variant, because it is faster */
    761 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
    762    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
    763    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
    764    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
    765    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
    766 
    767 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
    768    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
    769    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
    770    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
    771 
    772 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
    773    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
    774    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
    775    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),              \
    776    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
    777 
    778 /*****************************************************************************/
    779 
    780 /*
    781 * Identify 5 zones in each scanline for bilinear scaling. Depending on
    782 * whether 2 pixels to be interpolated are fetched from the image itself,
    783 * from the padding area around it or from both image and padding area.
    784 */
    785 static force_inline void
    786 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
    787 				 pixman_fixed_t  vx,
    788 				 pixman_fixed_t  unit_x,
    789 				 int32_t *       left_pad,
    790 				 int32_t *       left_tz,
    791 				 int32_t *       width,
    792 				 int32_t *       right_tz,
    793 				 int32_t *       right_pad)
    794 {
    795 int width1 = *width, left_pad1, right_pad1;
    796 int width2 = *width, left_pad2, right_pad2;
    797 
    798 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
    799 				&width1, &left_pad1, &right_pad1);
    800 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
    801 				unit_x, &width2, &left_pad2, &right_pad2);
    802 
    803 *left_pad = left_pad2;
    804 *left_tz = left_pad1 - left_pad2;
    805 *right_tz = right_pad2 - right_pad1;
    806 *right_pad = right_pad1;
    807 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
    808 }
    809 
    810 /*
    811 * Main loop template for single pass bilinear scaling. It needs to be
    812 * provided with 'scanline_func' which should do the compositing operation.
    813 * The needed function has the following prototype:
    814 *
    815 *	scanline_func (dst_type_t *       dst,
    816 *		       const mask_type_ * mask,
    817 *		       const src_type_t * src_top,
    818 *		       const src_type_t * src_bottom,
    819 *		       int32_t            width,
    820 *		       int                weight_top,
    821 *		       int                weight_bottom,
    822 *		       pixman_fixed_t     vx,
    823 *		       pixman_fixed_t     unit_x,
    824 *		       pixman_fixed_t     max_vx,
    825 *		       pixman_bool_t      zero_src)
    826 *
    827 * Where:
    828 *  dst                 - destination scanline buffer for storing results
    829 *  mask                - mask buffer (or single value for solid mask)
    830 *  src_top, src_bottom - two source scanlines
    831 *  width               - number of pixels to process
    832 *  weight_top          - weight of the top row for interpolation
    833 *  weight_bottom       - weight of the bottom row for interpolation
    834 *  vx                  - initial position for fetching the first pair of
    835 *                        pixels from the source buffer
    836 *  unit_x              - position increment needed to move to the next pair
    837 *                        of pixels
    838 *  max_vx              - image size as a fixed point value, can be used for
    839 *                        implementing NORMAL repeat (when it is supported)
    840 *  zero_src            - boolean hint variable, which is set to TRUE when
    841 *                        all source pixels are fetched from zero padding
    842 *                        zone for NONE repeat
    843 *
    844 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
    845 *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
    846 *       for NONE repeat when handling fuzzy antialiased top or bottom image
    847 *       edges. Also both top and bottom weight variables are guaranteed to
    848 *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
    849 *       For example, the weights can fit into unsigned byte or be used
    850 *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
    851 *       precision.
    852 */
    853 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
    854 			  dst_type_t, repeat_mode, flags)				\
    855 static void											\
    856 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
    857 					   pixman_composite_info_t *info)		\
    858 {												\
    859    PIXMAN_COMPOSITE_ARGS (info);								\
    860    dst_type_t *dst_line;									\
    861    mask_type_t *mask_line;									\
    862    src_type_t *src_first_line;									\
    863    int       y1, y2;										\
    864    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
    865    pixman_vector_t v;										\
    866    pixman_fixed_t vx, vy;									\
    867    pixman_fixed_t unit_x, unit_y;								\
    868    int32_t left_pad, left_tz, right_tz, right_pad;						\
    869 											\
    870    dst_type_t *dst;										\
    871    mask_type_t solid_mask;									\
    872    const mask_type_t *mask = &solid_mask;							\
    873    int src_stride, mask_stride, dst_stride;							\
    874 											\
    875    int src_width;										\
    876    pixman_fixed_t src_width_fixed;								\
    877    int max_x;											\
    878    pixman_bool_t need_src_extension;								\
    879 											\
    880    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
    881    if (flags & FLAG_HAVE_SOLID_MASK)								\
    882    {												\
    883 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
    884 mask_stride = 0;									\
    885    }												\
    886    else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
    887    {												\
    888 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
    889 		       mask_stride, mask_line, 1);					\
    890    }												\
    891 											\
    892    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
    893     * transformed from destination space to source space */					\
    894    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
    895 											\
    896    /* reference point is the center of the pixel */						\
    897    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
    898    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
    899    v.vector[2] = pixman_fixed_1;								\
    900 											\
    901    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
    902 return;											\
    903 											\
    904    unit_x = src_image->common.transform->matrix[0][0];						\
    905    unit_y = src_image->common.transform->matrix[1][1];						\
    906 											\
    907    v.vector[0] -= pixman_fixed_1 / 2;								\
    908    v.vector[1] -= pixman_fixed_1 / 2;								\
    909 											\
    910    vy = v.vector[1];										\
    911 											\
    912    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
    913 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
    914    {												\
    915 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
    916 				&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
    917 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
    918 {											\
    919     /* PAD repeat does not need special handling for 'transition zones' and */		\
    920     /* they can be combined with 'padding zones' safely */				\
    921     left_pad += left_tz;								\
    922     right_pad += right_tz;								\
    923     left_tz = right_tz = 0;								\
    924 }											\
    925 v.vector[0] += left_pad * unit_x;							\
    926    }												\
    927 											\
    928    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
    929    {												\
    930 vx = v.vector[0];									\
    931 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
    932 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
    933 											\
    934 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
    935 {											\
    936     src_width = 0;									\
    937 											\
    938     while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
    939 	src_width += src_image->bits.width;						\
    940 											\
    941     need_src_extension = TRUE;								\
    942 }											\
    943 else											\
    944 {											\
    945     src_width = src_image->bits.width;							\
    946     need_src_extension = FALSE;								\
    947 }											\
    948 											\
    949 src_width_fixed = pixman_int_to_fixed (src_width);					\
    950    }												\
    951 											\
    952    while (--height >= 0)									\
    953    {												\
    954 int weight1, weight2;									\
    955 dst = dst_line;										\
    956 dst_line += dst_stride;									\
    957 vx = v.vector[0];									\
    958 if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
    959 {											\
    960     mask = mask_line;									\
    961     mask_line += mask_stride;								\
    962 }											\
    963 											\
    964 y1 = pixman_fixed_to_int (vy);								\
    965 weight2 = pixman_fixed_to_bilinear_weight (vy);						\
    966 if (weight2)										\
    967 {											\
    968     /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
    969     y2 = y1 + 1;									\
    970     weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
    971 }											\
    972 else											\
    973 {											\
    974     /* set both top and bottom row to the same scanline and tweak weights */		\
    975     y2 = y1;										\
    976     weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
    977 }											\
    978 vy += unit_y;										\
    979 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
    980 {											\
    981     src_type_t *src1, *src2;								\
    982     src_type_t buf1[2];									\
    983     src_type_t buf2[2];									\
    984     repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
    985     repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
    986     src1 = src_first_line + src_stride * y1;						\
    987     src2 = src_first_line + src_stride * y2;						\
    988 											\
    989     if (left_pad > 0)									\
    990     {											\
    991 	buf1[0] = buf1[1] = src1[0];							\
    992 	buf2[0] = buf2[1] = src2[0];							\
    993 	scanline_func (dst, mask,							\
    994 		       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
    995 	dst += left_pad;								\
    996 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
    997 	    mask += left_pad;								\
    998     }											\
    999     if (width > 0)									\
   1000     {											\
   1001 	scanline_func (dst, mask,							\
   1002 		       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
   1003 	dst += width;									\
   1004 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1005 	    mask += width;								\
   1006     }											\
   1007     if (right_pad > 0)									\
   1008     {											\
   1009 	buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
   1010 	buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
   1011 	scanline_func (dst, mask,							\
   1012 		       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
   1013     }											\
   1014 }											\
   1015 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
   1016 {											\
   1017     src_type_t *src1, *src2;								\
   1018     src_type_t buf1[2];									\
   1019     src_type_t buf2[2];									\
   1020     /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
   1021     if (y1 < 0)										\
   1022     {											\
   1023 	weight1 = 0;									\
   1024 	y1 = 0;										\
   1025     }											\
   1026     if (y1 >= src_image->bits.height)							\
   1027     {											\
   1028 	weight1 = 0;									\
   1029 	y1 = src_image->bits.height - 1;						\
   1030     }											\
   1031     if (y2 < 0)										\
   1032     {											\
   1033 	weight2 = 0;									\
   1034 	y2 = 0;										\
   1035     }											\
   1036     if (y2 >= src_image->bits.height)							\
   1037     {											\
   1038 	weight2 = 0;									\
   1039 	y2 = src_image->bits.height - 1;						\
   1040     }											\
   1041     src1 = src_first_line + src_stride * y1;						\
   1042     src2 = src_first_line + src_stride * y2;						\
   1043 											\
   1044     if (left_pad > 0)									\
   1045     {											\
   1046 	buf1[0] = buf1[1] = 0;								\
   1047 	buf2[0] = buf2[1] = 0;								\
   1048 	scanline_func (dst, mask,							\
   1049 		       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
   1050 	dst += left_pad;								\
   1051 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1052 	    mask += left_pad;								\
   1053     }											\
   1054     if (left_tz > 0)									\
   1055     {											\
   1056 	buf1[0] = 0;									\
   1057 	buf1[1] = src1[0];								\
   1058 	buf2[0] = 0;									\
   1059 	buf2[1] = src2[0];								\
   1060 	scanline_func (dst, mask,							\
   1061 		       buf1, buf2, left_tz, weight1, weight2,				\
   1062 		       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
   1063 	dst += left_tz;									\
   1064 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1065 	    mask += left_tz;								\
   1066 	vx += left_tz * unit_x;								\
   1067     }											\
   1068     if (width > 0)									\
   1069     {											\
   1070 	scanline_func (dst, mask,							\
   1071 		       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
   1072 	dst += width;									\
   1073 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1074 	    mask += width;								\
   1075 	vx += width * unit_x;								\
   1076     }											\
   1077     if (right_tz > 0)									\
   1078     {											\
   1079 	buf1[0] = src1[src_image->bits.width - 1];					\
   1080 	buf1[1] = 0;									\
   1081 	buf2[0] = src2[src_image->bits.width - 1];					\
   1082 	buf2[1] = 0;									\
   1083 	scanline_func (dst, mask,							\
   1084 		       buf1, buf2, right_tz, weight1, weight2,				\
   1085 		       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
   1086 	dst += right_tz;								\
   1087 	if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
   1088 	    mask += right_tz;								\
   1089     }											\
   1090     if (right_pad > 0)									\
   1091     {											\
   1092 	buf1[0] = buf1[1] = 0;								\
   1093 	buf2[0] = buf2[1] = 0;								\
   1094 	scanline_func (dst, mask,							\
   1095 		       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
   1096     }											\
   1097 }											\
   1098 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
   1099 {											\
   1100     int32_t	    num_pixels;								\
   1101     int32_t	    width_remain;							\
   1102     src_type_t *    src_line_top;							\
   1103     src_type_t *    src_line_bottom;							\
   1104     src_type_t	    buf1[2];								\
   1105     src_type_t	    buf2[2];								\
   1106     src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
   1107     src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
   1108     int		    i, j;								\
   1109 											\
   1110     repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
   1111     repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
   1112     src_line_top = src_first_line + src_stride * y1;					\
   1113     src_line_bottom = src_first_line + src_stride * y2;					\
   1114 											\
   1115     if (need_src_extension)								\
   1116     {											\
   1117 	for (i=0; i<src_width;)								\
   1118 	{										\
   1119 	    for (j=0; j<src_image->bits.width; j++, i++)				\
   1120 	    {										\
   1121 		extended_src_line0[i] = src_line_top[j];				\
   1122 		extended_src_line1[i] = src_line_bottom[j];				\
   1123 	    }										\
   1124 	}										\
   1125 											\
   1126 	src_line_top = &extended_src_line0[0];						\
   1127 	src_line_bottom = &extended_src_line1[0];					\
   1128     }											\
   1129 											\
   1130     /* Top & Bottom wrap around buffer */						\
   1131     buf1[0] = src_line_top[src_width - 1];						\
   1132     buf1[1] = src_line_top[0];								\
   1133     buf2[0] = src_line_bottom[src_width - 1];						\
   1134     buf2[1] = src_line_bottom[0];							\
   1135 											\
   1136     width_remain = width;								\
   1137 											\
   1138     while (width_remain > 0)								\
   1139     {											\
   1140 	/* We use src_width_fixed because it can make vx in original source range */	\
   1141 	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
   1142 											\
   1143 	/* Wrap around part */								\
   1144 	if (pixman_fixed_to_int (vx) == src_width - 1)					\
   1145 	{										\
   1146 	    /* for positive unit_x							\
   1147 	     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
   1148 	     *										\
   1149 	     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
   1150 	     * So we are safe from overflow.						\
   1151 	     */										\
   1152 	    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
   1153 											\
   1154 	    if (num_pixels > width_remain)						\
   1155 		num_pixels = width_remain;						\
   1156 											\
   1157 	    scanline_func (dst, mask, buf1, buf2, num_pixels,				\
   1158 			   weight1, weight2, pixman_fixed_frac(vx),			\
   1159 			   unit_x, src_width_fixed, FALSE);				\
   1160 											\
   1161 	    width_remain -= num_pixels;							\
   1162 	    vx += num_pixels * unit_x;							\
   1163 	    dst += num_pixels;								\
   1164 											\
   1165 	    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
   1166 		mask += num_pixels;							\
   1167 											\
   1168 	    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
   1169 	}										\
   1170 											\
   1171 	/* Normal scanline composite */							\
   1172 	if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
   1173 	{										\
   1174 	    /* for positive unit_x							\
   1175 	     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
   1176 	     *										\
   1177 	     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
   1178 	     * So we are safe from overflow here.					\
   1179 	     */										\
   1180 	    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
   1181 			  / unit_x) + 1;						\
   1182 											\
   1183 	    if (num_pixels > width_remain)						\
   1184 		num_pixels = width_remain;						\
   1185 											\
   1186 	    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,	\
   1187 			   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);	\
   1188 											\
   1189 	    width_remain -= num_pixels;							\
   1190 	    vx += num_pixels * unit_x;							\
   1191 	    dst += num_pixels;								\
   1192 											\
   1193 	    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
   1194 	        mask += num_pixels;							\
   1195 	}										\
   1196     }											\
   1197 }											\
   1198 else											\
   1199 {											\
   1200     scanline_func (dst, mask, src_first_line + src_stride * y1,				\
   1201 		   src_first_line + src_stride * y2, width,				\
   1202 		   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
   1203 }											\
   1204    }												\
   1205 }
   1206 
   1207 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
   1208 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
   1209 			  dst_type_t, repeat_mode, flags)				\
   1210 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
   1211 			  dst_type_t, repeat_mode, flags)
   1212 
   1213 #define SCALED_BILINEAR_FLAGS						\
   1214    (FAST_PATH_SCALE_TRANSFORM	|					\
   1215     FAST_PATH_NO_ALPHA_MAP	|					\
   1216     FAST_PATH_BILINEAR_FILTER	|					\
   1217     FAST_PATH_NO_ACCESSORS	|					\
   1218     FAST_PATH_NARROW_FORMAT)
   1219 
   1220 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
   1221    {   PIXMAN_OP_ ## op,						\
   1222 PIXMAN_ ## s,							\
   1223 (SCALED_BILINEAR_FLAGS		|				\
   1224  FAST_PATH_PAD_REPEAT		|				\
   1225  FAST_PATH_X_UNIT_POSITIVE),					\
   1226 PIXMAN_null, 0,							\
   1227 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1228 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
   1229    }
   1230 
   1231 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
   1232    {   PIXMAN_OP_ ## op,						\
   1233 PIXMAN_ ## s,							\
   1234 (SCALED_BILINEAR_FLAGS		|				\
   1235  FAST_PATH_NONE_REPEAT		|				\
   1236  FAST_PATH_X_UNIT_POSITIVE),					\
   1237 PIXMAN_null, 0,							\
   1238 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1239 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
   1240    }
   1241 
   1242 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
   1243    {   PIXMAN_OP_ ## op,						\
   1244 PIXMAN_ ## s,							\
   1245 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
   1246 PIXMAN_null, 0,							\
   1247 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1248 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
   1249    }
   1250 
   1251 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
   1252    {   PIXMAN_OP_ ## op,						\
   1253 PIXMAN_ ## s,							\
   1254 (SCALED_BILINEAR_FLAGS		|				\
   1255  FAST_PATH_NORMAL_REPEAT	|				\
   1256  FAST_PATH_X_UNIT_POSITIVE),					\
   1257 PIXMAN_null, 0,							\
   1258 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1259 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
   1260    }
   1261 
   1262 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
   1263    {   PIXMAN_OP_ ## op,						\
   1264 PIXMAN_ ## s,							\
   1265 (SCALED_BILINEAR_FLAGS		|				\
   1266  FAST_PATH_PAD_REPEAT		|				\
   1267  FAST_PATH_X_UNIT_POSITIVE),					\
   1268 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1269 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1270 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
   1271    }
   1272 
   1273 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
   1274    {   PIXMAN_OP_ ## op,						\
   1275 PIXMAN_ ## s,							\
   1276 (SCALED_BILINEAR_FLAGS		|				\
   1277  FAST_PATH_NONE_REPEAT		|				\
   1278  FAST_PATH_X_UNIT_POSITIVE),					\
   1279 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1280 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1281 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
   1282    }
   1283 
   1284 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
   1285    {   PIXMAN_OP_ ## op,						\
   1286 PIXMAN_ ## s,							\
   1287 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
   1288 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1289 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1290 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
   1291    }
   1292 
   1293 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
   1294    {   PIXMAN_OP_ ## op,						\
   1295 PIXMAN_ ## s,							\
   1296 (SCALED_BILINEAR_FLAGS		|				\
   1297  FAST_PATH_NORMAL_REPEAT	|				\
   1298  FAST_PATH_X_UNIT_POSITIVE),					\
   1299 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
   1300 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1301 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
   1302    }
   1303 
   1304 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
   1305    {   PIXMAN_OP_ ## op,						\
   1306 PIXMAN_ ## s,							\
   1307 (SCALED_BILINEAR_FLAGS		|				\
   1308  FAST_PATH_PAD_REPEAT		|				\
   1309  FAST_PATH_X_UNIT_POSITIVE),					\
   1310 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1311 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1312 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
   1313    }
   1314 
   1315 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
   1316    {   PIXMAN_OP_ ## op,						\
   1317 PIXMAN_ ## s,							\
   1318 (SCALED_BILINEAR_FLAGS		|				\
   1319  FAST_PATH_NONE_REPEAT		|				\
   1320  FAST_PATH_X_UNIT_POSITIVE),					\
   1321 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1322 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1323 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
   1324    }
   1325 
   1326 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
   1327    {   PIXMAN_OP_ ## op,						\
   1328 PIXMAN_ ## s,							\
   1329 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
   1330 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1331 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1332 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
   1333    }
   1334 
   1335 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
   1336    {   PIXMAN_OP_ ## op,						\
   1337 PIXMAN_ ## s,							\
   1338 (SCALED_BILINEAR_FLAGS		|				\
   1339  FAST_PATH_NORMAL_REPEAT	|				\
   1340  FAST_PATH_X_UNIT_POSITIVE),					\
   1341 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
   1342 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
   1343 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
   1344    }
   1345 
   1346 /* Prefer the use of 'cover' variant, because it is faster */
   1347 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
   1348    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
   1349    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
   1350    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
   1351    SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
   1352 
   1353 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
   1354    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
   1355    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
   1356    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
   1357    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
   1358 
   1359 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
   1360    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
   1361    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
   1362    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
   1363    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
   1364 
   1365 #endif