pixman-inlines.h (49829B)
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ 2 /* 3 * Copyright © 2000 SuSE, Inc. 4 * Copyright © 2007 Red Hat, Inc. 5 * 6 * Permission to use, copy, modify, distribute, and sell this software and its 7 * documentation for any purpose is hereby granted without fee, provided that 8 * the above copyright notice appear in all copies and that both that 9 * copyright notice and this permission notice appear in supporting 10 * documentation, and that the name of SuSE not be used in advertising or 11 * publicity pertaining to distribution of the software without specific, 12 * written prior permission. SuSE makes no representations about the 13 * suitability of this software for any purpose. It is provided "as is" 14 * without express or implied warranty. 15 * 16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE 18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 22 * 23 * Author: Keith Packard, SuSE, Inc. 24 */ 25 26 #ifndef PIXMAN_FAST_PATH_H__ 27 #define PIXMAN_FAST_PATH_H__ 28 29 #include "pixman-private.h" 30 31 #define PIXMAN_REPEAT_COVER -1 32 33 /* Flags describing input parameters to fast path macro template. 34 * Turning on some flag values may indicate that 35 * "some property X is available so template can use this" or 36 * "some property X should be handled by template". 37 * 38 * FLAG_HAVE_SOLID_MASK 39 * Input mask is solid so template should handle this. 40 * 41 * FLAG_HAVE_NON_SOLID_MASK 42 * Input mask is bits mask so template should handle this. 43 * 44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually 45 * exclusive. (It's not allowed to turn both flags on) 46 */ 47 #define FLAG_NONE (0) 48 #define FLAG_HAVE_SOLID_MASK (1 << 1) 49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2) 50 51 /* To avoid too short repeated scanline function calls, extend source 52 * scanlines having width less than below constant value. 53 */ 54 #define REPEAT_NORMAL_MIN_WIDTH 64 55 56 static force_inline pixman_bool_t 57 repeat (pixman_repeat_t repeat, int *c, int size) 58 { 59 if (repeat == PIXMAN_REPEAT_NONE) 60 { 61 if (*c < 0 || *c >= size) 62 return FALSE; 63 } 64 else if (repeat == PIXMAN_REPEAT_NORMAL) 65 { 66 while (*c >= size) 67 *c -= size; 68 while (*c < 0) 69 *c += size; 70 } 71 else if (repeat == PIXMAN_REPEAT_PAD) 72 { 73 *c = CLIP (*c, 0, size - 1); 74 } 75 else /* REFLECT */ 76 { 77 *c = MOD (*c, size * 2); 78 if (*c >= size) 79 *c = size * 2 - *c - 1; 80 } 81 return TRUE; 82 } 83 84 static force_inline int 85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x) 86 { 87 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & 88 ((1 << BILINEAR_INTERPOLATION_BITS) - 1); 89 } 90 91 #if BILINEAR_INTERPOLATION_BITS <= 4 92 /* Inspired by Filter_32_opaque from Skia */ 93 static force_inline uint32_t 94 bilinear_interpolation (uint32_t tl, uint32_t tr, 95 uint32_t bl, uint32_t br, 96 int distx, int disty) 97 { 98 int distxy, distxiy, distixy, distixiy; 99 uint32_t lo, hi; 100 101 distx <<= (4 - BILINEAR_INTERPOLATION_BITS); 102 disty <<= (4 - BILINEAR_INTERPOLATION_BITS); 103 104 distxy = distx * disty; 105 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ 106 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ 107 distixiy = 108 16 * 16 - (disty << 4) - 109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ 110 111 lo = (tl & 0xff00ff) * distixiy; 112 hi = ((tl >> 8) & 0xff00ff) * distixiy; 113 114 lo += (tr & 0xff00ff) * distxiy; 115 hi += ((tr >> 8) & 0xff00ff) * distxiy; 116 117 lo += (bl & 0xff00ff) * distixy; 118 hi += ((bl >> 8) & 0xff00ff) * distixy; 119 120 lo += (br & 0xff00ff) * distxy; 121 hi += ((br >> 8) & 0xff00ff) * distxy; 122 123 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); 124 } 125 126 #else 127 #if SIZEOF_LONG > 4 128 129 static force_inline uint32_t 130 bilinear_interpolation (uint32_t tl, uint32_t tr, 131 uint32_t bl, uint32_t br, 132 int distx, int disty) 133 { 134 uint64_t distxy, distxiy, distixy, distixiy; 135 uint64_t tl64, tr64, bl64, br64; 136 uint64_t f, r; 137 138 distx <<= (8 - BILINEAR_INTERPOLATION_BITS); 139 disty <<= (8 - BILINEAR_INTERPOLATION_BITS); 140 141 distxy = distx * disty; 142 distxiy = distx * (256 - disty); 143 distixy = (256 - distx) * disty; 144 distixiy = (256 - distx) * (256 - disty); 145 146 /* Alpha and Blue */ 147 tl64 = tl & 0xff0000ff; 148 tr64 = tr & 0xff0000ff; 149 bl64 = bl & 0xff0000ff; 150 br64 = br & 0xff0000ff; 151 152 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; 153 r = f & 0x0000ff0000ff0000ull; 154 155 /* Red and Green */ 156 tl64 = tl; 157 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); 158 159 tr64 = tr; 160 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); 161 162 bl64 = bl; 163 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); 164 165 br64 = br; 166 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); 167 168 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; 169 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); 170 171 return (uint32_t)(r >> 16); 172 } 173 174 #else 175 176 static force_inline uint32_t 177 bilinear_interpolation (uint32_t tl, uint32_t tr, 178 uint32_t bl, uint32_t br, 179 int distx, int disty) 180 { 181 int distxy, distxiy, distixy, distixiy; 182 uint32_t f, r; 183 184 distx <<= (8 - BILINEAR_INTERPOLATION_BITS); 185 disty <<= (8 - BILINEAR_INTERPOLATION_BITS); 186 187 distxy = distx * disty; 188 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ 189 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ 190 distixiy = 191 256 * 256 - (disty << 8) - 192 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ 193 194 /* Blue */ 195 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy 196 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; 197 198 /* Green */ 199 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy 200 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; 201 r |= f & 0xff000000; 202 203 tl >>= 16; 204 tr >>= 16; 205 bl >>= 16; 206 br >>= 16; 207 r >>= 16; 208 209 /* Red */ 210 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy 211 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; 212 r |= f & 0x00ff0000; 213 214 /* Alpha */ 215 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy 216 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; 217 r |= f & 0xff000000; 218 219 return r; 220 } 221 222 #endif 223 #endif // BILINEAR_INTERPOLATION_BITS <= 4 224 225 static force_inline argb_t 226 bilinear_interpolation_float (argb_t tl, argb_t tr, 227 argb_t bl, argb_t br, 228 float distx, float disty) 229 { 230 float distxy, distxiy, distixy, distixiy; 231 argb_t r; 232 233 distxy = distx * disty; 234 distxiy = distx * (1.f - disty); 235 distixy = (1.f - distx) * disty; 236 distixiy = (1.f - distx) * (1.f - disty); 237 238 r.a = tl.a * distixiy + tr.a * distxiy + 239 bl.a * distixy + br.a * distxy; 240 r.r = tl.r * distixiy + tr.r * distxiy + 241 bl.r * distixy + br.r * distxy; 242 r.g = tl.g * distixiy + tr.g * distxiy + 243 bl.g * distixy + br.g * distxy; 244 r.b = tl.b * distixiy + tr.b * distxiy + 245 bl.b * distixy + br.b * distxy; 246 247 return r; 248 } 249 250 /* 251 * For each scanline fetched from source image with PAD repeat: 252 * - calculate how many pixels need to be padded on the left side 253 * - calculate how many pixels need to be padded on the right side 254 * - update width to only count pixels which are fetched from the image 255 * All this information is returned via 'width', 'left_pad', 'right_pad' 256 * arguments. The code is assuming that 'unit_x' is positive. 257 * 258 * Note: 64-bit math is used in order to avoid potential overflows, which 259 * is probably excessive in many cases. This particular function 260 * may need its own correctness test and performance tuning. 261 */ 262 static force_inline void 263 pad_repeat_get_scanline_bounds (int32_t source_image_width, 264 pixman_fixed_t vx, 265 pixman_fixed_t unit_x, 266 int32_t * width, 267 int32_t * left_pad, 268 int32_t * right_pad) 269 { 270 int64_t max_vx = (int64_t) source_image_width << 16; 271 int64_t tmp; 272 if (vx < 0) 273 { 274 tmp = ((int64_t) unit_x - 1 - vx) / unit_x; 275 if (tmp > *width) 276 { 277 *left_pad = *width; 278 *width = 0; 279 } 280 else 281 { 282 *left_pad = (int32_t) tmp; 283 *width -= (int32_t) tmp; 284 } 285 } 286 else 287 { 288 *left_pad = 0; 289 } 290 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; 291 if (tmp < 0) 292 { 293 *right_pad = *width; 294 *width = 0; 295 } 296 else if (tmp >= *width) 297 { 298 *right_pad = 0; 299 } 300 else 301 { 302 *right_pad = *width - (int32_t) tmp; 303 *width = (int32_t) tmp; 304 } 305 } 306 307 /* A macroified version of specialized nearest scalers for some 308 * common 8888 and 565 formats. It supports SRC and OVER ops. 309 * 310 * There are two repeat versions, one that handles repeat normal, 311 * and one without repeat handling that only works if the src region 312 * used is completely covered by the pre-repeated source samples. 313 * 314 * The loops are unrolled to process two pixels per iteration for better 315 * performance on most CPU architectures (superscalar processors 316 * can issue several operations simultaneously, other processors can hide 317 * instructions latencies by pipelining operations). Unrolling more 318 * does not make much sense because the compiler will start running out 319 * of spare registers soon. 320 */ 321 322 #define GET_8888_ALPHA(s) ((s) >> 24) 323 /* This is not actually used since we don't have an OVER with 324 565 source, but it is needed to build. */ 325 #define GET_0565_ALPHA(s) 0xff 326 #define GET_x888_ALPHA(s) 0xff 327 328 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ 329 src_type_t, dst_type_t, OP, repeat_mode) \ 330 static force_inline void \ 331 scanline_func_name (dst_type_t *dst, \ 332 const src_type_t *src, \ 333 int32_t w, \ 334 pixman_fixed_t vx, \ 335 pixman_fixed_t unit_x, \ 336 pixman_fixed_t src_width_fixed, \ 337 pixman_bool_t fully_transparent_src) \ 338 { \ 339 uint32_t d; \ 340 src_type_t s1, s2; \ 341 uint8_t a1, a2; \ 342 int x1, x2; \ 343 \ 344 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ 345 return; \ 346 \ 347 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ 348 abort(); \ 349 \ 350 while ((w -= 2) >= 0) \ 351 { \ 352 x1 = pixman_fixed_to_int (vx); \ 353 vx += unit_x; \ 354 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 355 { \ 356 /* This works because we know that unit_x is positive */ \ 357 while (vx >= 0) \ 358 vx -= src_width_fixed; \ 359 } \ 360 s1 = *(src + x1); \ 361 \ 362 x2 = pixman_fixed_to_int (vx); \ 363 vx += unit_x; \ 364 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 365 { \ 366 /* This works because we know that unit_x is positive */ \ 367 while (vx >= 0) \ 368 vx -= src_width_fixed; \ 369 } \ 370 s2 = *(src + x2); \ 371 \ 372 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ 373 { \ 374 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ 375 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ 376 \ 377 if (a1 == 0xff) \ 378 { \ 379 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 380 } \ 381 else if (s1) \ 382 { \ 383 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ 384 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ 385 a1 ^= 0xff; \ 386 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ 387 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 388 } \ 389 dst++; \ 390 \ 391 if (a2 == 0xff) \ 392 { \ 393 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ 394 } \ 395 else if (s2) \ 396 { \ 397 d = convert_## DST_FORMAT ## _to_8888 (*dst); \ 398 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ 399 a2 ^= 0xff; \ 400 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ 401 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 402 } \ 403 dst++; \ 404 } \ 405 else /* PIXMAN_OP_SRC */ \ 406 { \ 407 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 408 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ 409 } \ 410 } \ 411 \ 412 if (w & 1) \ 413 { \ 414 x1 = pixman_fixed_to_int (vx); \ 415 s1 = *(src + x1); \ 416 \ 417 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ 418 { \ 419 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ 420 \ 421 if (a1 == 0xff) \ 422 { \ 423 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 424 } \ 425 else if (s1) \ 426 { \ 427 d = convert_## DST_FORMAT ## _to_8888 (*dst); \ 428 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ 429 a1 ^= 0xff; \ 430 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ 431 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 432 } \ 433 dst++; \ 434 } \ 435 else /* PIXMAN_OP_SRC */ \ 436 { \ 437 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 438 } \ 439 } \ 440 } 441 442 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 443 dst_type_t, repeat_mode, have_mask, mask_is_solid) \ 444 static void \ 445 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ 446 pixman_composite_info_t *info) \ 447 { \ 448 PIXMAN_COMPOSITE_ARGS (info); \ 449 dst_type_t *dst_line; \ 450 mask_type_t *mask_line; \ 451 src_type_t *src_first_line; \ 452 int y; \ 453 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \ 454 pixman_fixed_t max_vy; \ 455 pixman_vector_t v; \ 456 pixman_fixed_t vx, vy; \ 457 pixman_fixed_t unit_x, unit_y; \ 458 int32_t left_pad, right_pad; \ 459 \ 460 src_type_t *src; \ 461 dst_type_t *dst; \ 462 mask_type_t solid_mask; \ 463 const mask_type_t *mask = &solid_mask; \ 464 int src_stride, mask_stride, dst_stride; \ 465 \ 466 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ 467 if (have_mask) \ 468 { \ 469 if (mask_is_solid) \ 470 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ 471 else \ 472 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ 473 mask_stride, mask_line, 1); \ 474 } \ 475 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ 476 * transformed from destination space to source space */ \ 477 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ 478 \ 479 /* reference point is the center of the pixel */ \ 480 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ 481 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ 482 v.vector[2] = pixman_fixed_1; \ 483 \ 484 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ 485 return; \ 486 \ 487 unit_x = src_image->common.transform->matrix[0][0]; \ 488 unit_y = src_image->common.transform->matrix[1][1]; \ 489 \ 490 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ 491 v.vector[0] -= pixman_fixed_e; \ 492 v.vector[1] -= pixman_fixed_e; \ 493 \ 494 vx = v.vector[0]; \ 495 vy = v.vector[1]; \ 496 \ 497 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 498 { \ 499 max_vy = pixman_int_to_fixed (src_image->bits.height); \ 500 \ 501 /* Clamp repeating positions inside the actual samples */ \ 502 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 503 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ 504 } \ 505 \ 506 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ 507 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 508 { \ 509 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ 510 &width, &left_pad, &right_pad); \ 511 vx += left_pad * unit_x; \ 512 } \ 513 \ 514 while (--height >= 0) \ 515 { \ 516 dst = dst_line; \ 517 dst_line += dst_stride; \ 518 if (have_mask && !mask_is_solid) \ 519 { \ 520 mask = mask_line; \ 521 mask_line += mask_stride; \ 522 } \ 523 \ 524 y = pixman_fixed_to_int (vy); \ 525 vy += unit_y; \ 526 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 527 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ 528 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 529 { \ 530 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ 531 src = src_first_line + src_stride * y; \ 532 if (left_pad > 0) \ 533 { \ 534 scanline_func (mask, dst, \ 535 src + src_image->bits.width - src_image->bits.width + 1, \ 536 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ 537 } \ 538 if (width > 0) \ 539 { \ 540 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ 541 dst + left_pad, src + src_image->bits.width, width, \ 542 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ 543 } \ 544 if (right_pad > 0) \ 545 { \ 546 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ 547 dst + left_pad + width, src + src_image->bits.width, \ 548 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ 549 } \ 550 } \ 551 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 552 { \ 553 static const src_type_t zero[1] = { 0 }; \ 554 if (y < 0 || y >= src_image->bits.height) \ 555 { \ 556 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \ 557 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 558 continue; \ 559 } \ 560 src = src_first_line + src_stride * y; \ 561 if (left_pad > 0) \ 562 { \ 563 scanline_func (mask, dst, zero + 1, left_pad, \ 564 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 565 } \ 566 if (width > 0) \ 567 { \ 568 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ 569 dst + left_pad, src + src_image->bits.width, width, \ 570 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ 571 } \ 572 if (right_pad > 0) \ 573 { \ 574 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ 575 dst + left_pad + width, zero + 1, right_pad, \ 576 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 577 } \ 578 } \ 579 else \ 580 { \ 581 src = src_first_line + src_stride * y; \ 582 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \ 583 unit_x, src_width_fixed, FALSE); \ 584 } \ 585 } \ 586 } 587 588 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ 589 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 590 dst_type_t, repeat_mode, have_mask, mask_is_solid) \ 591 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ 592 dst_type_t, repeat_mode, have_mask, mask_is_solid) 593 594 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ 595 repeat_mode) \ 596 static force_inline void \ 597 scanline_func##scale_func_name##_wrapper ( \ 598 const uint8_t *mask, \ 599 dst_type_t *dst, \ 600 const src_type_t *src, \ 601 int32_t w, \ 602 pixman_fixed_t vx, \ 603 pixman_fixed_t unit_x, \ 604 pixman_fixed_t max_vx, \ 605 pixman_bool_t fully_transparent_src) \ 606 { \ 607 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ 608 } \ 609 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ 610 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) 611 612 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ 613 repeat_mode) \ 614 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ 615 dst_type_t, repeat_mode) 616 617 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ 618 src_type_t, dst_type_t, OP, repeat_mode) \ 619 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ 620 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ 621 OP, repeat_mode) \ 622 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ 623 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ 624 src_type_t, dst_type_t, repeat_mode) 625 626 627 #define SCALED_NEAREST_FLAGS \ 628 (FAST_PATH_SCALE_TRANSFORM | \ 629 FAST_PATH_NO_ALPHA_MAP | \ 630 FAST_PATH_NEAREST_FILTER | \ 631 FAST_PATH_NO_ACCESSORS | \ 632 FAST_PATH_NARROW_FORMAT) 633 634 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ 635 { PIXMAN_OP_ ## op, \ 636 PIXMAN_ ## s, \ 637 (SCALED_NEAREST_FLAGS | \ 638 FAST_PATH_NORMAL_REPEAT | \ 639 FAST_PATH_X_UNIT_POSITIVE), \ 640 PIXMAN_null, 0, \ 641 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 642 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 643 } 644 645 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ 646 { PIXMAN_OP_ ## op, \ 647 PIXMAN_ ## s, \ 648 (SCALED_NEAREST_FLAGS | \ 649 FAST_PATH_PAD_REPEAT | \ 650 FAST_PATH_X_UNIT_POSITIVE), \ 651 PIXMAN_null, 0, \ 652 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 653 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 654 } 655 656 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ 657 { PIXMAN_OP_ ## op, \ 658 PIXMAN_ ## s, \ 659 (SCALED_NEAREST_FLAGS | \ 660 FAST_PATH_NONE_REPEAT | \ 661 FAST_PATH_X_UNIT_POSITIVE), \ 662 PIXMAN_null, 0, \ 663 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 664 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 665 } 666 667 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ 668 { PIXMAN_OP_ ## op, \ 669 PIXMAN_ ## s, \ 670 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 671 PIXMAN_null, 0, \ 672 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 673 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 674 } 675 676 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 677 { PIXMAN_OP_ ## op, \ 678 PIXMAN_ ## s, \ 679 (SCALED_NEAREST_FLAGS | \ 680 FAST_PATH_NORMAL_REPEAT | \ 681 FAST_PATH_X_UNIT_POSITIVE), \ 682 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 683 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 684 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 685 } 686 687 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ 688 { PIXMAN_OP_ ## op, \ 689 PIXMAN_ ## s, \ 690 (SCALED_NEAREST_FLAGS | \ 691 FAST_PATH_PAD_REPEAT | \ 692 FAST_PATH_X_UNIT_POSITIVE), \ 693 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 694 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 695 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 696 } 697 698 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ 699 { PIXMAN_OP_ ## op, \ 700 PIXMAN_ ## s, \ 701 (SCALED_NEAREST_FLAGS | \ 702 FAST_PATH_NONE_REPEAT | \ 703 FAST_PATH_X_UNIT_POSITIVE), \ 704 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 705 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 706 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 707 } 708 709 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ 710 { PIXMAN_OP_ ## op, \ 711 PIXMAN_ ## s, \ 712 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 713 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 714 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 715 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 716 } 717 718 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 719 { PIXMAN_OP_ ## op, \ 720 PIXMAN_ ## s, \ 721 (SCALED_NEAREST_FLAGS | \ 722 FAST_PATH_NORMAL_REPEAT | \ 723 FAST_PATH_X_UNIT_POSITIVE), \ 724 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 725 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 726 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 727 } 728 729 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ 730 { PIXMAN_OP_ ## op, \ 731 PIXMAN_ ## s, \ 732 (SCALED_NEAREST_FLAGS | \ 733 FAST_PATH_PAD_REPEAT | \ 734 FAST_PATH_X_UNIT_POSITIVE), \ 735 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 736 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 737 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 738 } 739 740 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ 741 { PIXMAN_OP_ ## op, \ 742 PIXMAN_ ## s, \ 743 (SCALED_NEAREST_FLAGS | \ 744 FAST_PATH_NONE_REPEAT | \ 745 FAST_PATH_X_UNIT_POSITIVE), \ 746 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 747 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 748 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 749 } 750 751 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ 752 { PIXMAN_OP_ ## op, \ 753 PIXMAN_ ## s, \ 754 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 755 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 756 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 757 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 758 } 759 760 /* Prefer the use of 'cover' variant, because it is faster */ 761 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ 762 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ 763 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ 764 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ 765 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) 766 767 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ 768 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ 769 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ 770 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) 771 772 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ 773 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ 774 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ 775 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ 776 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) 777 778 /*****************************************************************************/ 779 780 /* 781 * Identify 5 zones in each scanline for bilinear scaling. Depending on 782 * whether 2 pixels to be interpolated are fetched from the image itself, 783 * from the padding area around it or from both image and padding area. 784 */ 785 static force_inline void 786 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, 787 pixman_fixed_t vx, 788 pixman_fixed_t unit_x, 789 int32_t * left_pad, 790 int32_t * left_tz, 791 int32_t * width, 792 int32_t * right_tz, 793 int32_t * right_pad) 794 { 795 int width1 = *width, left_pad1, right_pad1; 796 int width2 = *width, left_pad2, right_pad2; 797 798 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, 799 &width1, &left_pad1, &right_pad1); 800 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, 801 unit_x, &width2, &left_pad2, &right_pad2); 802 803 *left_pad = left_pad2; 804 *left_tz = left_pad1 - left_pad2; 805 *right_tz = right_pad2 - right_pad1; 806 *right_pad = right_pad1; 807 *width -= *left_pad + *left_tz + *right_tz + *right_pad; 808 } 809 810 /* 811 * Main loop template for single pass bilinear scaling. It needs to be 812 * provided with 'scanline_func' which should do the compositing operation. 813 * The needed function has the following prototype: 814 * 815 * scanline_func (dst_type_t * dst, 816 * const mask_type_ * mask, 817 * const src_type_t * src_top, 818 * const src_type_t * src_bottom, 819 * int32_t width, 820 * int weight_top, 821 * int weight_bottom, 822 * pixman_fixed_t vx, 823 * pixman_fixed_t unit_x, 824 * pixman_fixed_t max_vx, 825 * pixman_bool_t zero_src) 826 * 827 * Where: 828 * dst - destination scanline buffer for storing results 829 * mask - mask buffer (or single value for solid mask) 830 * src_top, src_bottom - two source scanlines 831 * width - number of pixels to process 832 * weight_top - weight of the top row for interpolation 833 * weight_bottom - weight of the bottom row for interpolation 834 * vx - initial position for fetching the first pair of 835 * pixels from the source buffer 836 * unit_x - position increment needed to move to the next pair 837 * of pixels 838 * max_vx - image size as a fixed point value, can be used for 839 * implementing NORMAL repeat (when it is supported) 840 * zero_src - boolean hint variable, which is set to TRUE when 841 * all source pixels are fetched from zero padding 842 * zone for NONE repeat 843 * 844 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 845 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that 846 * for NONE repeat when handling fuzzy antialiased top or bottom image 847 * edges. Also both top and bottom weight variables are guaranteed to 848 * have value, which is less than BILINEAR_INTERPOLATION_RANGE. 849 * For example, the weights can fit into unsigned byte or be used 850 * with 8-bit SIMD multiplication instructions for 8-bit interpolation 851 * precision. 852 */ 853 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 854 dst_type_t, repeat_mode, flags) \ 855 static void \ 856 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ 857 pixman_composite_info_t *info) \ 858 { \ 859 PIXMAN_COMPOSITE_ARGS (info); \ 860 dst_type_t *dst_line; \ 861 mask_type_t *mask_line; \ 862 src_type_t *src_first_line; \ 863 int y1, y2; \ 864 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ 865 pixman_vector_t v; \ 866 pixman_fixed_t vx, vy; \ 867 pixman_fixed_t unit_x, unit_y; \ 868 int32_t left_pad, left_tz, right_tz, right_pad; \ 869 \ 870 dst_type_t *dst; \ 871 mask_type_t solid_mask; \ 872 const mask_type_t *mask = &solid_mask; \ 873 int src_stride, mask_stride, dst_stride; \ 874 \ 875 int src_width; \ 876 pixman_fixed_t src_width_fixed; \ 877 int max_x; \ 878 pixman_bool_t need_src_extension; \ 879 \ 880 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ 881 if (flags & FLAG_HAVE_SOLID_MASK) \ 882 { \ 883 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ 884 mask_stride = 0; \ 885 } \ 886 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 887 { \ 888 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ 889 mask_stride, mask_line, 1); \ 890 } \ 891 \ 892 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ 893 * transformed from destination space to source space */ \ 894 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ 895 \ 896 /* reference point is the center of the pixel */ \ 897 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ 898 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ 899 v.vector[2] = pixman_fixed_1; \ 900 \ 901 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ 902 return; \ 903 \ 904 unit_x = src_image->common.transform->matrix[0][0]; \ 905 unit_y = src_image->common.transform->matrix[1][1]; \ 906 \ 907 v.vector[0] -= pixman_fixed_1 / 2; \ 908 v.vector[1] -= pixman_fixed_1 / 2; \ 909 \ 910 vy = v.vector[1]; \ 911 \ 912 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ 913 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 914 { \ 915 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ 916 &left_pad, &left_tz, &width, &right_tz, &right_pad); \ 917 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 918 { \ 919 /* PAD repeat does not need special handling for 'transition zones' and */ \ 920 /* they can be combined with 'padding zones' safely */ \ 921 left_pad += left_tz; \ 922 right_pad += right_tz; \ 923 left_tz = right_tz = 0; \ 924 } \ 925 v.vector[0] += left_pad * unit_x; \ 926 } \ 927 \ 928 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 929 { \ 930 vx = v.vector[0]; \ 931 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ 932 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \ 933 \ 934 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ 935 { \ 936 src_width = 0; \ 937 \ 938 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ 939 src_width += src_image->bits.width; \ 940 \ 941 need_src_extension = TRUE; \ 942 } \ 943 else \ 944 { \ 945 src_width = src_image->bits.width; \ 946 need_src_extension = FALSE; \ 947 } \ 948 \ 949 src_width_fixed = pixman_int_to_fixed (src_width); \ 950 } \ 951 \ 952 while (--height >= 0) \ 953 { \ 954 int weight1, weight2; \ 955 dst = dst_line; \ 956 dst_line += dst_stride; \ 957 vx = v.vector[0]; \ 958 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 959 { \ 960 mask = mask_line; \ 961 mask_line += mask_stride; \ 962 } \ 963 \ 964 y1 = pixman_fixed_to_int (vy); \ 965 weight2 = pixman_fixed_to_bilinear_weight (vy); \ 966 if (weight2) \ 967 { \ 968 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ 969 y2 = y1 + 1; \ 970 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ 971 } \ 972 else \ 973 { \ 974 /* set both top and bottom row to the same scanline and tweak weights */ \ 975 y2 = y1; \ 976 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ 977 } \ 978 vy += unit_y; \ 979 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 980 { \ 981 src_type_t *src1, *src2; \ 982 src_type_t buf1[2]; \ 983 src_type_t buf2[2]; \ 984 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ 985 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ 986 src1 = src_first_line + src_stride * y1; \ 987 src2 = src_first_line + src_stride * y2; \ 988 \ 989 if (left_pad > 0) \ 990 { \ 991 buf1[0] = buf1[1] = src1[0]; \ 992 buf2[0] = buf2[1] = src2[0]; \ 993 scanline_func (dst, mask, \ 994 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ 995 dst += left_pad; \ 996 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 997 mask += left_pad; \ 998 } \ 999 if (width > 0) \ 1000 { \ 1001 scanline_func (dst, mask, \ 1002 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ 1003 dst += width; \ 1004 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1005 mask += width; \ 1006 } \ 1007 if (right_pad > 0) \ 1008 { \ 1009 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ 1010 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ 1011 scanline_func (dst, mask, \ 1012 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ 1013 } \ 1014 } \ 1015 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 1016 { \ 1017 src_type_t *src1, *src2; \ 1018 src_type_t buf1[2]; \ 1019 src_type_t buf2[2]; \ 1020 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ 1021 if (y1 < 0) \ 1022 { \ 1023 weight1 = 0; \ 1024 y1 = 0; \ 1025 } \ 1026 if (y1 >= src_image->bits.height) \ 1027 { \ 1028 weight1 = 0; \ 1029 y1 = src_image->bits.height - 1; \ 1030 } \ 1031 if (y2 < 0) \ 1032 { \ 1033 weight2 = 0; \ 1034 y2 = 0; \ 1035 } \ 1036 if (y2 >= src_image->bits.height) \ 1037 { \ 1038 weight2 = 0; \ 1039 y2 = src_image->bits.height - 1; \ 1040 } \ 1041 src1 = src_first_line + src_stride * y1; \ 1042 src2 = src_first_line + src_stride * y2; \ 1043 \ 1044 if (left_pad > 0) \ 1045 { \ 1046 buf1[0] = buf1[1] = 0; \ 1047 buf2[0] = buf2[1] = 0; \ 1048 scanline_func (dst, mask, \ 1049 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ 1050 dst += left_pad; \ 1051 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1052 mask += left_pad; \ 1053 } \ 1054 if (left_tz > 0) \ 1055 { \ 1056 buf1[0] = 0; \ 1057 buf1[1] = src1[0]; \ 1058 buf2[0] = 0; \ 1059 buf2[1] = src2[0]; \ 1060 scanline_func (dst, mask, \ 1061 buf1, buf2, left_tz, weight1, weight2, \ 1062 pixman_fixed_frac (vx), unit_x, 0, FALSE); \ 1063 dst += left_tz; \ 1064 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1065 mask += left_tz; \ 1066 vx += left_tz * unit_x; \ 1067 } \ 1068 if (width > 0) \ 1069 { \ 1070 scanline_func (dst, mask, \ 1071 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ 1072 dst += width; \ 1073 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1074 mask += width; \ 1075 vx += width * unit_x; \ 1076 } \ 1077 if (right_tz > 0) \ 1078 { \ 1079 buf1[0] = src1[src_image->bits.width - 1]; \ 1080 buf1[1] = 0; \ 1081 buf2[0] = src2[src_image->bits.width - 1]; \ 1082 buf2[1] = 0; \ 1083 scanline_func (dst, mask, \ 1084 buf1, buf2, right_tz, weight1, weight2, \ 1085 pixman_fixed_frac (vx), unit_x, 0, FALSE); \ 1086 dst += right_tz; \ 1087 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1088 mask += right_tz; \ 1089 } \ 1090 if (right_pad > 0) \ 1091 { \ 1092 buf1[0] = buf1[1] = 0; \ 1093 buf2[0] = buf2[1] = 0; \ 1094 scanline_func (dst, mask, \ 1095 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ 1096 } \ 1097 } \ 1098 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 1099 { \ 1100 int32_t num_pixels; \ 1101 int32_t width_remain; \ 1102 src_type_t * src_line_top; \ 1103 src_type_t * src_line_bottom; \ 1104 src_type_t buf1[2]; \ 1105 src_type_t buf2[2]; \ 1106 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ 1107 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ 1108 int i, j; \ 1109 \ 1110 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ 1111 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ 1112 src_line_top = src_first_line + src_stride * y1; \ 1113 src_line_bottom = src_first_line + src_stride * y2; \ 1114 \ 1115 if (need_src_extension) \ 1116 { \ 1117 for (i=0; i<src_width;) \ 1118 { \ 1119 for (j=0; j<src_image->bits.width; j++, i++) \ 1120 { \ 1121 extended_src_line0[i] = src_line_top[j]; \ 1122 extended_src_line1[i] = src_line_bottom[j]; \ 1123 } \ 1124 } \ 1125 \ 1126 src_line_top = &extended_src_line0[0]; \ 1127 src_line_bottom = &extended_src_line1[0]; \ 1128 } \ 1129 \ 1130 /* Top & Bottom wrap around buffer */ \ 1131 buf1[0] = src_line_top[src_width - 1]; \ 1132 buf1[1] = src_line_top[0]; \ 1133 buf2[0] = src_line_bottom[src_width - 1]; \ 1134 buf2[1] = src_line_bottom[0]; \ 1135 \ 1136 width_remain = width; \ 1137 \ 1138 while (width_remain > 0) \ 1139 { \ 1140 /* We use src_width_fixed because it can make vx in original source range */ \ 1141 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 1142 \ 1143 /* Wrap around part */ \ 1144 if (pixman_fixed_to_int (vx) == src_width - 1) \ 1145 { \ 1146 /* for positive unit_x \ 1147 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ 1148 * \ 1149 * vx is in range [0, src_width_fixed - pixman_fixed_e] \ 1150 * So we are safe from overflow. \ 1151 */ \ 1152 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ 1153 \ 1154 if (num_pixels > width_remain) \ 1155 num_pixels = width_remain; \ 1156 \ 1157 scanline_func (dst, mask, buf1, buf2, num_pixels, \ 1158 weight1, weight2, pixman_fixed_frac(vx), \ 1159 unit_x, src_width_fixed, FALSE); \ 1160 \ 1161 width_remain -= num_pixels; \ 1162 vx += num_pixels * unit_x; \ 1163 dst += num_pixels; \ 1164 \ 1165 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1166 mask += num_pixels; \ 1167 \ 1168 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 1169 } \ 1170 \ 1171 /* Normal scanline composite */ \ 1172 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ 1173 { \ 1174 /* for positive unit_x \ 1175 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ 1176 * \ 1177 * vx is in range [0, src_width_fixed - pixman_fixed_e] \ 1178 * So we are safe from overflow here. \ 1179 */ \ 1180 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ 1181 / unit_x) + 1; \ 1182 \ 1183 if (num_pixels > width_remain) \ 1184 num_pixels = width_remain; \ 1185 \ 1186 scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ 1187 weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ 1188 \ 1189 width_remain -= num_pixels; \ 1190 vx += num_pixels * unit_x; \ 1191 dst += num_pixels; \ 1192 \ 1193 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1194 mask += num_pixels; \ 1195 } \ 1196 } \ 1197 } \ 1198 else \ 1199 { \ 1200 scanline_func (dst, mask, src_first_line + src_stride * y1, \ 1201 src_first_line + src_stride * y2, width, \ 1202 weight1, weight2, vx, unit_x, max_vx, FALSE); \ 1203 } \ 1204 } \ 1205 } 1206 1207 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ 1208 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 1209 dst_type_t, repeat_mode, flags) \ 1210 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ 1211 dst_type_t, repeat_mode, flags) 1212 1213 #define SCALED_BILINEAR_FLAGS \ 1214 (FAST_PATH_SCALE_TRANSFORM | \ 1215 FAST_PATH_NO_ALPHA_MAP | \ 1216 FAST_PATH_BILINEAR_FILTER | \ 1217 FAST_PATH_NO_ACCESSORS | \ 1218 FAST_PATH_NARROW_FORMAT) 1219 1220 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ 1221 { PIXMAN_OP_ ## op, \ 1222 PIXMAN_ ## s, \ 1223 (SCALED_BILINEAR_FLAGS | \ 1224 FAST_PATH_PAD_REPEAT | \ 1225 FAST_PATH_X_UNIT_POSITIVE), \ 1226 PIXMAN_null, 0, \ 1227 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1228 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1229 } 1230 1231 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ 1232 { PIXMAN_OP_ ## op, \ 1233 PIXMAN_ ## s, \ 1234 (SCALED_BILINEAR_FLAGS | \ 1235 FAST_PATH_NONE_REPEAT | \ 1236 FAST_PATH_X_UNIT_POSITIVE), \ 1237 PIXMAN_null, 0, \ 1238 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1239 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1240 } 1241 1242 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ 1243 { PIXMAN_OP_ ## op, \ 1244 PIXMAN_ ## s, \ 1245 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1246 PIXMAN_null, 0, \ 1247 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1248 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1249 } 1250 1251 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ 1252 { PIXMAN_OP_ ## op, \ 1253 PIXMAN_ ## s, \ 1254 (SCALED_BILINEAR_FLAGS | \ 1255 FAST_PATH_NORMAL_REPEAT | \ 1256 FAST_PATH_X_UNIT_POSITIVE), \ 1257 PIXMAN_null, 0, \ 1258 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1259 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1260 } 1261 1262 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ 1263 { PIXMAN_OP_ ## op, \ 1264 PIXMAN_ ## s, \ 1265 (SCALED_BILINEAR_FLAGS | \ 1266 FAST_PATH_PAD_REPEAT | \ 1267 FAST_PATH_X_UNIT_POSITIVE), \ 1268 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1269 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1270 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1271 } 1272 1273 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ 1274 { PIXMAN_OP_ ## op, \ 1275 PIXMAN_ ## s, \ 1276 (SCALED_BILINEAR_FLAGS | \ 1277 FAST_PATH_NONE_REPEAT | \ 1278 FAST_PATH_X_UNIT_POSITIVE), \ 1279 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1280 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1281 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1282 } 1283 1284 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ 1285 { PIXMAN_OP_ ## op, \ 1286 PIXMAN_ ## s, \ 1287 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1288 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1289 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1290 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1291 } 1292 1293 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 1294 { PIXMAN_OP_ ## op, \ 1295 PIXMAN_ ## s, \ 1296 (SCALED_BILINEAR_FLAGS | \ 1297 FAST_PATH_NORMAL_REPEAT | \ 1298 FAST_PATH_X_UNIT_POSITIVE), \ 1299 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1300 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1301 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1302 } 1303 1304 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ 1305 { PIXMAN_OP_ ## op, \ 1306 PIXMAN_ ## s, \ 1307 (SCALED_BILINEAR_FLAGS | \ 1308 FAST_PATH_PAD_REPEAT | \ 1309 FAST_PATH_X_UNIT_POSITIVE), \ 1310 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1311 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1312 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1313 } 1314 1315 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ 1316 { PIXMAN_OP_ ## op, \ 1317 PIXMAN_ ## s, \ 1318 (SCALED_BILINEAR_FLAGS | \ 1319 FAST_PATH_NONE_REPEAT | \ 1320 FAST_PATH_X_UNIT_POSITIVE), \ 1321 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1322 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1323 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1324 } 1325 1326 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ 1327 { PIXMAN_OP_ ## op, \ 1328 PIXMAN_ ## s, \ 1329 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1330 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1331 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1332 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1333 } 1334 1335 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 1336 { PIXMAN_OP_ ## op, \ 1337 PIXMAN_ ## s, \ 1338 (SCALED_BILINEAR_FLAGS | \ 1339 FAST_PATH_NORMAL_REPEAT | \ 1340 FAST_PATH_X_UNIT_POSITIVE), \ 1341 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1342 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1343 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1344 } 1345 1346 /* Prefer the use of 'cover' variant, because it is faster */ 1347 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ 1348 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ 1349 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ 1350 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ 1351 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) 1352 1353 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ 1354 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ 1355 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ 1356 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ 1357 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) 1358 1359 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ 1360 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ 1361 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ 1362 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ 1363 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) 1364 1365 #endif