tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pixman-fast-path.c (95444B)


      1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
      2 /*
      3 * Copyright © 2000 SuSE, Inc.
      4 * Copyright © 2007 Red Hat, Inc.
      5 *
      6 * Permission to use, copy, modify, distribute, and sell this software and its
      7 * documentation for any purpose is hereby granted without fee, provided that
      8 * the above copyright notice appear in all copies and that both that
      9 * copyright notice and this permission notice appear in supporting
     10 * documentation, and that the name of SuSE not be used in advertising or
     11 * publicity pertaining to distribution of the software without specific,
     12 * written prior permission.  SuSE makes no representations about the
     13 * suitability of this software for any purpose.  It is provided "as is"
     14 * without express or implied warranty.
     15 *
     16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
     17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
     18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     22 *
     23 * Author:  Keith Packard, SuSE, Inc.
     24 */
     25 
     26 #ifdef HAVE_CONFIG_H
     27 #include <pixman-config.h>
     28 #endif
     29 #include <string.h>
     30 #include <stdlib.h>
     31 #include "pixman-private.h"
     32 #include "pixman-combine32.h"
     33 #include "pixman-inlines.h"
     34 
     35 static force_inline uint32_t
     36 fetch_24 (uint8_t *a)
     37 {
     38    if (((uintptr_t)a) & 1)
     39    {
     40 #ifdef WORDS_BIGENDIAN
     41 return (*a << 16) | (*(uint16_t *)(a + 1));
     42 #else
     43 return *a | (*(uint16_t *)(a + 1) << 8);
     44 #endif
     45    }
     46    else
     47    {
     48 #ifdef WORDS_BIGENDIAN
     49 return (*(uint16_t *)a << 8) | *(a + 2);
     50 #else
     51 return *(uint16_t *)a | (*(a + 2) << 16);
     52 #endif
     53    }
     54 }
     55 
     56 static force_inline void
     57 store_24 (uint8_t *a,
     58          uint32_t v)
     59 {
     60    if (((uintptr_t)a) & 1)
     61    {
     62 #ifdef WORDS_BIGENDIAN
     63 *a = (uint8_t) (v >> 16);
     64 *(uint16_t *)(a + 1) = (uint16_t) (v);
     65 #else
     66 *a = (uint8_t) (v);
     67 *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
     68 #endif
     69    }
     70    else
     71    {
     72 #ifdef WORDS_BIGENDIAN
     73 *(uint16_t *)a = (uint16_t)(v >> 8);
     74 *(a + 2) = (uint8_t)v;
     75 #else
     76 *(uint16_t *)a = (uint16_t)v;
     77 *(a + 2) = (uint8_t)(v >> 16);
     78 #endif
     79    }
     80 }
     81 
     82 static force_inline uint32_t
     83 over (uint32_t src,
     84      uint32_t dest)
     85 {
     86    uint32_t a = ~src >> 24;
     87 
     88    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
     89 
     90    return dest;
     91 }
     92 
     93 static force_inline uint32_t
     94 in (uint32_t x,
     95    uint8_t  y)
     96 {
     97    uint16_t a = y;
     98 
     99    UN8x4_MUL_UN8 (x, a);
    100 
    101    return x;
    102 }
    103 
    104 /*
    105 * Naming convention:
    106 *
    107 *  op_src_mask_dest
    108 */
    109 static void
    110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
    111                                 pixman_composite_info_t *info)
    112 {
    113    PIXMAN_COMPOSITE_ARGS (info);
    114    uint32_t    *src, *src_line;
    115    uint32_t    *dst, *dst_line;
    116    uint8_t     *mask, *mask_line;
    117    int src_stride, mask_stride, dst_stride;
    118    uint8_t m;
    119    uint32_t s, d;
    120    int32_t w;
    121 
    122    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    123    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    124    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    125 
    126    while (height--)
    127    {
    128 src = src_line;
    129 src_line += src_stride;
    130 dst = dst_line;
    131 dst_line += dst_stride;
    132 mask = mask_line;
    133 mask_line += mask_stride;
    134 
    135 w = width;
    136 while (w--)
    137 {
    138     m = *mask++;
    139     if (m)
    140     {
    141 	s = *src | 0xff000000;
    142 
    143 	if (m == 0xff)
    144 	{
    145 	    *dst = s;
    146 	}
    147 	else
    148 	{
    149 	    d = in (s, m);
    150 	    *dst = over (d, *dst);
    151 	}
    152     }
    153     src++;
    154     dst++;
    155 }
    156    }
    157 }
    158 
    159 static void
    160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
    161                         pixman_composite_info_t *info)
    162 {
    163    PIXMAN_COMPOSITE_ARGS (info);
    164    uint32_t src, srca;
    165    uint8_t     *dst_line, *dst;
    166    uint8_t     *mask_line, *mask, m;
    167    int dst_stride, mask_stride;
    168    int32_t w;
    169    uint16_t t;
    170 
    171    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    172 
    173    srca = src >> 24;
    174 
    175    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    176    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    177 
    178    if (srca == 0xff)
    179    {
    180 while (height--)
    181 {
    182     dst = dst_line;
    183     dst_line += dst_stride;
    184     mask = mask_line;
    185     mask_line += mask_stride;
    186     w = width;
    187 
    188     while (w--)
    189     {
    190 	m = *mask++;
    191 
    192 	if (m == 0)
    193 	    *dst = 0;
    194 	else if (m != 0xff)
    195 	    *dst = MUL_UN8 (m, *dst, t);
    196 
    197 	dst++;
    198     }
    199 }
    200    }
    201    else
    202    {
    203 while (height--)
    204 {
    205     dst = dst_line;
    206     dst_line += dst_stride;
    207     mask = mask_line;
    208     mask_line += mask_stride;
    209     w = width;
    210 
    211     while (w--)
    212     {
    213 	m = *mask++;
    214 	m = MUL_UN8 (m, srca, t);
    215 
    216 	if (m == 0)
    217 	    *dst = 0;
    218 	else if (m != 0xff)
    219 	    *dst = MUL_UN8 (m, *dst, t);
    220 
    221 	dst++;
    222     }
    223 }
    224    }
    225 }
    226 
    227 static void
    228 fast_composite_in_8_8 (pixman_implementation_t *imp,
    229                       pixman_composite_info_t *info)
    230 {
    231    PIXMAN_COMPOSITE_ARGS (info);
    232    uint8_t     *dst_line, *dst;
    233    uint8_t     *src_line, *src;
    234    int dst_stride, src_stride;
    235    int32_t w;
    236    uint8_t s;
    237    uint16_t t;
    238 
    239    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
    240    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    241 
    242    while (height--)
    243    {
    244 dst = dst_line;
    245 dst_line += dst_stride;
    246 src = src_line;
    247 src_line += src_stride;
    248 w = width;
    249 
    250 while (w--)
    251 {
    252     s = *src++;
    253 
    254     if (s == 0)
    255 	*dst = 0;
    256     else if (s != 0xff)
    257 	*dst = MUL_UN8 (s, *dst, t);
    258 
    259     dst++;
    260 }
    261    }
    262 }
    263 
    264 static void
    265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
    266                              pixman_composite_info_t *info)
    267 {
    268    PIXMAN_COMPOSITE_ARGS (info);
    269    uint32_t src, srca;
    270    uint32_t    *dst_line, *dst, d;
    271    uint8_t     *mask_line, *mask, m;
    272    int dst_stride, mask_stride;
    273    int32_t w;
    274 
    275    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    276 
    277    srca = src >> 24;
    278    if (src == 0)
    279 return;
    280 
    281    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    282    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    283 
    284    while (height--)
    285    {
    286 dst = dst_line;
    287 dst_line += dst_stride;
    288 mask = mask_line;
    289 mask_line += mask_stride;
    290 w = width;
    291 
    292 while (w--)
    293 {
    294     m = *mask++;
    295     if (m == 0xff)
    296     {
    297 	if (srca == 0xff)
    298 	    *dst = src;
    299 	else
    300 	    *dst = over (src, *dst);
    301     }
    302     else if (m)
    303     {
    304 	d = in (src, m);
    305 	*dst = over (d, *dst);
    306     }
    307     dst++;
    308 }
    309    }
    310 }
    311 
    312 static void
    313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
    314 			   pixman_composite_info_t *info)
    315 {
    316    PIXMAN_COMPOSITE_ARGS (info);
    317    uint32_t src, s;
    318    uint32_t    *dst_line, *dst, d;
    319    uint32_t    *mask_line, *mask, ma;
    320    int dst_stride, mask_stride;
    321    int32_t w;
    322 
    323    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    324 
    325    if (src == 0)
    326 return;
    327 
    328    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    329    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    330 
    331    while (height--)
    332    {
    333 dst = dst_line;
    334 dst_line += dst_stride;
    335 mask = mask_line;
    336 mask_line += mask_stride;
    337 w = width;
    338 
    339 while (w--)
    340 {
    341     ma = *mask++;
    342 
    343     if (ma)
    344     {
    345 	d = *dst;
    346 	s = src;
    347 
    348 	UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
    349 
    350 	*dst = s;
    351     }
    352 
    353     dst++;
    354 }
    355    }
    356 }
    357 
    358 static void
    359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
    360                                    pixman_composite_info_t *info)
    361 {
    362    PIXMAN_COMPOSITE_ARGS (info);
    363    uint32_t src, srca, s;
    364    uint32_t    *dst_line, *dst, d;
    365    uint32_t    *mask_line, *mask, ma;
    366    int dst_stride, mask_stride;
    367    int32_t w;
    368 
    369    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    370 
    371    srca = src >> 24;
    372    if (src == 0)
    373 return;
    374 
    375    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    376    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    377 
    378    while (height--)
    379    {
    380 dst = dst_line;
    381 dst_line += dst_stride;
    382 mask = mask_line;
    383 mask_line += mask_stride;
    384 w = width;
    385 
    386 while (w--)
    387 {
    388     ma = *mask++;
    389     if (ma == 0xffffffff)
    390     {
    391 	if (srca == 0xff)
    392 	    *dst = src;
    393 	else
    394 	    *dst = over (src, *dst);
    395     }
    396     else if (ma)
    397     {
    398 	d = *dst;
    399 	s = src;
    400 
    401 	UN8x4_MUL_UN8x4 (s, ma);
    402 	UN8x4_MUL_UN8 (ma, srca);
    403 	ma = ~ma;
    404 	UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    405 
    406 	*dst = d;
    407     }
    408 
    409     dst++;
    410 }
    411    }
    412 }
    413 
    414 static void
    415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
    416                              pixman_composite_info_t *info)
    417 {
    418    PIXMAN_COMPOSITE_ARGS (info);
    419    uint32_t src, srca;
    420    uint8_t     *dst_line, *dst;
    421    uint32_t d;
    422    uint8_t     *mask_line, *mask, m;
    423    int dst_stride, mask_stride;
    424    int32_t w;
    425 
    426    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    427 
    428    srca = src >> 24;
    429    if (src == 0)
    430 return;
    431 
    432    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
    433    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    434 
    435    while (height--)
    436    {
    437 dst = dst_line;
    438 dst_line += dst_stride;
    439 mask = mask_line;
    440 mask_line += mask_stride;
    441 w = width;
    442 
    443 while (w--)
    444 {
    445     m = *mask++;
    446     if (m == 0xff)
    447     {
    448 	if (srca == 0xff)
    449 	{
    450 	    d = src;
    451 	}
    452 	else
    453 	{
    454 	    d = fetch_24 (dst);
    455 	    d = over (src, d);
    456 	}
    457 	store_24 (dst, d);
    458     }
    459     else if (m)
    460     {
    461 	d = over (in (src, m), fetch_24 (dst));
    462 	store_24 (dst, d);
    463     }
    464     dst += 3;
    465 }
    466    }
    467 }
    468 
    469 static void
    470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
    471                              pixman_composite_info_t *info)
    472 {
    473    PIXMAN_COMPOSITE_ARGS (info);
    474    uint32_t src, srca;
    475    uint16_t    *dst_line, *dst;
    476    uint32_t d;
    477    uint8_t     *mask_line, *mask, m;
    478    int dst_stride, mask_stride;
    479    int32_t w;
    480 
    481    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    482 
    483    srca = src >> 24;
    484    if (src == 0)
    485 return;
    486 
    487    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    488    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    489 
    490    while (height--)
    491    {
    492 dst = dst_line;
    493 dst_line += dst_stride;
    494 mask = mask_line;
    495 mask_line += mask_stride;
    496 w = width;
    497 
    498 while (w--)
    499 {
    500     m = *mask++;
    501     if (m == 0xff)
    502     {
    503 	if (srca == 0xff)
    504 	{
    505 	    d = src;
    506 	}
    507 	else
    508 	{
    509 	    d = *dst;
    510 	    d = over (src, convert_0565_to_0888 (d));
    511 	}
    512 	*dst = convert_8888_to_0565 (d);
    513     }
    514     else if (m)
    515     {
    516 	d = *dst;
    517 	d = over (in (src, m), convert_0565_to_0888 (d));
    518 	*dst = convert_8888_to_0565 (d);
    519     }
    520     dst++;
    521 }
    522    }
    523 }
    524 
    525 static void
    526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
    527                                    pixman_composite_info_t *info)
    528 {
    529    PIXMAN_COMPOSITE_ARGS (info);
    530    uint32_t  src, srca, s;
    531    uint16_t  src16;
    532    uint16_t *dst_line, *dst;
    533    uint32_t  d;
    534    uint32_t *mask_line, *mask, ma;
    535    int dst_stride, mask_stride;
    536    int32_t w;
    537 
    538    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    539 
    540    srca = src >> 24;
    541    if (src == 0)
    542 return;
    543 
    544    src16 = convert_8888_to_0565 (src);
    545 
    546    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    547    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    548 
    549    while (height--)
    550    {
    551 dst = dst_line;
    552 dst_line += dst_stride;
    553 mask = mask_line;
    554 mask_line += mask_stride;
    555 w = width;
    556 
    557 while (w--)
    558 {
    559     ma = *mask++;
    560     if (ma == 0xffffffff)
    561     {
    562 	if (srca == 0xff)
    563 	{
    564 	    *dst = src16;
    565 	}
    566 	else
    567 	{
    568 	    d = *dst;
    569 	    d = over (src, convert_0565_to_0888 (d));
    570 	    *dst = convert_8888_to_0565 (d);
    571 	}
    572     }
    573     else if (ma)
    574     {
    575 	d = *dst;
    576 	d = convert_0565_to_0888 (d);
    577 
    578 	s = src;
    579 
    580 	UN8x4_MUL_UN8x4 (s, ma);
    581 	UN8x4_MUL_UN8 (ma, srca);
    582 	ma = ~ma;
    583 	UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    584 
    585 	*dst = convert_8888_to_0565 (d);
    586     }
    587     dst++;
    588 }
    589    }
    590 }
    591 
    592 static void
    593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
    594                               pixman_composite_info_t *info)
    595 {
    596    PIXMAN_COMPOSITE_ARGS (info);
    597    uint32_t    *dst_line, *dst;
    598    uint32_t    *src_line, *src, s;
    599    int dst_stride, src_stride;
    600    uint8_t a;
    601    int32_t w;
    602 
    603    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    604    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    605 
    606    while (height--)
    607    {
    608 dst = dst_line;
    609 dst_line += dst_stride;
    610 src = src_line;
    611 src_line += src_stride;
    612 w = width;
    613 
    614 while (w--)
    615 {
    616     s = *src++;
    617     a = s >> 24;
    618     if (a == 0xff)
    619 	*dst = s;
    620     else if (s)
    621 	*dst = over (s, *dst);
    622     dst++;
    623 }
    624    }
    625 }
    626 
    627 static void
    628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
    629 		      pixman_composite_info_t *info)
    630 {
    631    PIXMAN_COMPOSITE_ARGS (info);
    632    uint32_t    *dst_line, *dst;
    633    uint32_t    *src_line, *src;
    634    int dst_stride, src_stride;
    635    int32_t w;
    636 
    637    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    638    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    639 
    640    while (height--)
    641    {
    642 dst = dst_line;
    643 dst_line += dst_stride;
    644 src = src_line;
    645 src_line += src_stride;
    646 w = width;
    647 
    648 while (w--)
    649     *dst++ = (*src++) | 0xff000000;
    650    }
    651 }
    652 
    653 #if 0
    654 static void
    655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
    656 		       pixman_composite_info_t *info)
    657 {
    658    PIXMAN_COMPOSITE_ARGS (info);
    659    uint8_t     *dst_line, *dst;
    660    uint32_t d;
    661    uint32_t    *src_line, *src, s;
    662    uint8_t a;
    663    int dst_stride, src_stride;
    664    int32_t w;
    665 
    666    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
    667    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    668 
    669    while (height--)
    670    {
    671 dst = dst_line;
    672 dst_line += dst_stride;
    673 src = src_line;
    674 src_line += src_stride;
    675 w = width;
    676 
    677 while (w--)
    678 {
    679     s = *src++;
    680     a = s >> 24;
    681     if (a)
    682     {
    683 	if (a == 0xff)
    684 	    d = s;
    685 	else
    686 	    d = over (s, fetch_24 (dst));
    687 
    688 	store_24 (dst, d);
    689     }
    690     dst += 3;
    691 }
    692    }
    693 }
    694 #endif
    695 
    696 static void
    697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
    698                               pixman_composite_info_t *info)
    699 {
    700    PIXMAN_COMPOSITE_ARGS (info);
    701    uint16_t    *dst_line, *dst;
    702    uint32_t d;
    703    uint32_t    *src_line, *src, s;
    704    uint8_t a;
    705    int dst_stride, src_stride;
    706    int32_t w;
    707 
    708    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    709    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    710 
    711    while (height--)
    712    {
    713 dst = dst_line;
    714 dst_line += dst_stride;
    715 src = src_line;
    716 src_line += src_stride;
    717 w = width;
    718 
    719 while (w--)
    720 {
    721     s = *src++;
    722     a = s >> 24;
    723     if (s)
    724     {
    725 	if (a == 0xff)
    726 	{
    727 	    d = s;
    728 	}
    729 	else
    730 	{
    731 	    d = *dst;
    732 	    d = over (s, convert_0565_to_0888 (d));
    733 	}
    734 	*dst = convert_8888_to_0565 (d);
    735     }
    736     dst++;
    737 }
    738    }
    739 }
    740 
    741 static void
    742 fast_composite_add_8_8 (pixman_implementation_t *imp,
    743 		pixman_composite_info_t *info)
    744 {
    745    PIXMAN_COMPOSITE_ARGS (info);
    746    uint8_t     *dst_line, *dst;
    747    uint8_t     *src_line, *src;
    748    int dst_stride, src_stride;
    749    int32_t w;
    750    uint8_t s, d;
    751    uint16_t t;
    752 
    753    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
    754    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    755 
    756    while (height--)
    757    {
    758 dst = dst_line;
    759 dst_line += dst_stride;
    760 src = src_line;
    761 src_line += src_stride;
    762 w = width;
    763 
    764 while (w--)
    765 {
    766     s = *src++;
    767     if (s)
    768     {
    769 	if (s != 0xff)
    770 	{
    771 	    d = *dst;
    772 	    t = d + s;
    773 	    s = t | (0 - (t >> 8));
    774 	}
    775 	*dst = s;
    776     }
    777     dst++;
    778 }
    779    }
    780 }
    781 
    782 static void
    783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
    784                              pixman_composite_info_t *info)
    785 {
    786    PIXMAN_COMPOSITE_ARGS (info);
    787    uint16_t    *dst_line, *dst;
    788    uint32_t	d;
    789    uint16_t    *src_line, *src;
    790    uint32_t	s;
    791    int dst_stride, src_stride;
    792    int32_t w;
    793 
    794    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
    795    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    796 
    797    while (height--)
    798    {
    799 dst = dst_line;
    800 dst_line += dst_stride;
    801 src = src_line;
    802 src_line += src_stride;
    803 w = width;
    804 
    805 while (w--)
    806 {
    807     s = *src++;
    808     if (s)
    809     {
    810 	d = *dst;
    811 	s = convert_0565_to_8888 (s);
    812 	if (d)
    813 	{
    814 	    d = convert_0565_to_8888 (d);
    815 	    UN8x4_ADD_UN8x4 (s, d);
    816 	}
    817 	*dst = convert_8888_to_0565 (s);
    818     }
    819     dst++;
    820 }
    821    }
    822 }
    823 
    824 static void
    825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
    826                              pixman_composite_info_t *info)
    827 {
    828    PIXMAN_COMPOSITE_ARGS (info);
    829    uint32_t    *dst_line, *dst;
    830    uint32_t    *src_line, *src;
    831    int dst_stride, src_stride;
    832    int32_t w;
    833    uint32_t s, d;
    834 
    835    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    836    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    837 
    838    while (height--)
    839    {
    840 dst = dst_line;
    841 dst_line += dst_stride;
    842 src = src_line;
    843 src_line += src_stride;
    844 w = width;
    845 
    846 while (w--)
    847 {
    848     s = *src++;
    849     if (s)
    850     {
    851 	if (s != 0xffffffff)
    852 	{
    853 	    d = *dst;
    854 	    if (d)
    855 		UN8x4_ADD_UN8x4 (s, d);
    856 	}
    857 	*dst = s;
    858     }
    859     dst++;
    860 }
    861    }
    862 }
    863 
    864 static void
    865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
    866 		  pixman_composite_info_t *info)
    867 {
    868    PIXMAN_COMPOSITE_ARGS (info);
    869    uint8_t     *dst_line, *dst;
    870    uint8_t     *mask_line, *mask;
    871    int dst_stride, mask_stride;
    872    int32_t w;
    873    uint32_t src;
    874    uint8_t sa;
    875 
    876    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    877    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    878    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    879    sa = (src >> 24);
    880 
    881    while (height--)
    882    {
    883 dst = dst_line;
    884 dst_line += dst_stride;
    885 mask = mask_line;
    886 mask_line += mask_stride;
    887 w = width;
    888 
    889 while (w--)
    890 {
    891     uint16_t tmp;
    892     uint16_t a;
    893     uint32_t m, d;
    894     uint32_t r;
    895 
    896     a = *mask++;
    897     d = *dst;
    898 
    899     m = MUL_UN8 (sa, a, tmp);
    900     r = ADD_UN8 (m, d, tmp);
    901 
    902     *dst++ = r;
    903 }
    904    }
    905 }
    906 
    907 #ifdef WORDS_BIGENDIAN
    908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
    909 #define UPDATE_BITMASK(n) ((n) >> 1)
    910 #else
    911 #define CREATE_BITMASK(n) (1U << (n))
    912 #define UPDATE_BITMASK(n) ((n) << 1)
    913 #endif
    914 
    915 #define TEST_BIT(p, n)					\
    916    (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
    917 #define SET_BIT(p, n)							\
    918    do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
    919 
    920 static void
    921 fast_composite_add_1_1 (pixman_implementation_t *imp,
    922 		pixman_composite_info_t *info)
    923 {
    924    PIXMAN_COMPOSITE_ARGS (info);
    925    uint32_t     *dst_line, *dst;
    926    uint32_t     *src_line, *src;
    927    int           dst_stride, src_stride;
    928    int32_t       w;
    929 
    930    PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
    931                           src_stride, src_line, 1);
    932    PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
    933                           dst_stride, dst_line, 1);
    934 
    935    while (height--)
    936    {
    937 dst = dst_line;
    938 dst_line += dst_stride;
    939 src = src_line;
    940 src_line += src_stride;
    941 w = width;
    942 
    943 while (w--)
    944 {
    945     /*
    946      * TODO: improve performance by processing uint32_t data instead
    947      *       of individual bits
    948      */
    949     if (TEST_BIT (src, src_x + w))
    950 	SET_BIT (dst, dest_x + w);
    951 }
    952    }
    953 }
    954 
    955 static void
    956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
    957                              pixman_composite_info_t *info)
    958 {
    959    PIXMAN_COMPOSITE_ARGS (info);
    960    uint32_t     src, srca;
    961    uint32_t    *dst, *dst_line;
    962    uint32_t    *mask, *mask_line;
    963    int          mask_stride, dst_stride;
    964    uint32_t     bitcache, bitmask;
    965    int32_t      w;
    966 
    967    if (width <= 0)
    968 return;
    969 
    970    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    971    srca = src >> 24;
    972    if (src == 0)
    973 return;
    974 
    975    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
    976                           dst_stride, dst_line, 1);
    977    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
    978                           mask_stride, mask_line, 1);
    979    mask_line += mask_x >> 5;
    980 
    981    if (srca == 0xff)
    982    {
    983 while (height--)
    984 {
    985     dst = dst_line;
    986     dst_line += dst_stride;
    987     mask = mask_line;
    988     mask_line += mask_stride;
    989     w = width;
    990 
    991     bitcache = *mask++;
    992     bitmask = CREATE_BITMASK (mask_x & 31);
    993 
    994     while (w--)
    995     {
    996 	if (bitmask == 0)
    997 	{
    998 	    bitcache = *mask++;
    999 	    bitmask = CREATE_BITMASK (0);
   1000 	}
   1001 	if (bitcache & bitmask)
   1002 	    *dst = src;
   1003 	bitmask = UPDATE_BITMASK (bitmask);
   1004 	dst++;
   1005     }
   1006 }
   1007    }
   1008    else
   1009    {
   1010 while (height--)
   1011 {
   1012     dst = dst_line;
   1013     dst_line += dst_stride;
   1014     mask = mask_line;
   1015     mask_line += mask_stride;
   1016     w = width;
   1017 
   1018     bitcache = *mask++;
   1019     bitmask = CREATE_BITMASK (mask_x & 31);
   1020 
   1021     while (w--)
   1022     {
   1023 	if (bitmask == 0)
   1024 	{
   1025 	    bitcache = *mask++;
   1026 	    bitmask = CREATE_BITMASK (0);
   1027 	}
   1028 	if (bitcache & bitmask)
   1029 	    *dst = over (src, *dst);
   1030 	bitmask = UPDATE_BITMASK (bitmask);
   1031 	dst++;
   1032     }
   1033 }
   1034    }
   1035 }
   1036 
   1037 static void
   1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
   1039                              pixman_composite_info_t *info)
   1040 {
   1041    PIXMAN_COMPOSITE_ARGS (info);
   1042    uint32_t     src, srca;
   1043    uint16_t    *dst, *dst_line;
   1044    uint32_t    *mask, *mask_line;
   1045    int          mask_stride, dst_stride;
   1046    uint32_t     bitcache, bitmask;
   1047    int32_t      w;
   1048    uint32_t     d;
   1049    uint16_t     src565;
   1050 
   1051    if (width <= 0)
   1052 return;
   1053 
   1054    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1055    srca = src >> 24;
   1056    if (src == 0)
   1057 return;
   1058 
   1059    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
   1060                           dst_stride, dst_line, 1);
   1061    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
   1062                           mask_stride, mask_line, 1);
   1063    mask_line += mask_x >> 5;
   1064 
   1065    if (srca == 0xff)
   1066    {
   1067 src565 = convert_8888_to_0565 (src);
   1068 while (height--)
   1069 {
   1070     dst = dst_line;
   1071     dst_line += dst_stride;
   1072     mask = mask_line;
   1073     mask_line += mask_stride;
   1074     w = width;
   1075 
   1076     bitcache = *mask++;
   1077     bitmask = CREATE_BITMASK (mask_x & 31);
   1078 
   1079     while (w--)
   1080     {
   1081 	if (bitmask == 0)
   1082 	{
   1083 	    bitcache = *mask++;
   1084 	    bitmask = CREATE_BITMASK (0);
   1085 	}
   1086 	if (bitcache & bitmask)
   1087 	    *dst = src565;
   1088 	bitmask = UPDATE_BITMASK (bitmask);
   1089 	dst++;
   1090     }
   1091 }
   1092    }
   1093    else
   1094    {
   1095 while (height--)
   1096 {
   1097     dst = dst_line;
   1098     dst_line += dst_stride;
   1099     mask = mask_line;
   1100     mask_line += mask_stride;
   1101     w = width;
   1102 
   1103     bitcache = *mask++;
   1104     bitmask = CREATE_BITMASK (mask_x & 31);
   1105 
   1106     while (w--)
   1107     {
   1108 	if (bitmask == 0)
   1109 	{
   1110 	    bitcache = *mask++;
   1111 	    bitmask = CREATE_BITMASK (0);
   1112 	}
   1113 	if (bitcache & bitmask)
   1114 	{
   1115 	    d = over (src, convert_0565_to_0888 (*dst));
   1116 	    *dst = convert_8888_to_0565 (d);
   1117 	}
   1118 	bitmask = UPDATE_BITMASK (bitmask);
   1119 	dst++;
   1120     }
   1121 }
   1122    }
   1123 }
   1124 
   1125 /*
   1126 * Simple bitblt
   1127 */
   1128 
   1129 static void
   1130 fast_composite_solid_fill (pixman_implementation_t *imp,
   1131                           pixman_composite_info_t *info)
   1132 {
   1133    PIXMAN_COMPOSITE_ARGS (info);
   1134    uint32_t src;
   1135 
   1136    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1137 
   1138    if (dest_image->bits.format == PIXMAN_a1)
   1139    {
   1140 src = src >> 31;
   1141    }
   1142    else if (dest_image->bits.format == PIXMAN_a8)
   1143    {
   1144 src = src >> 24;
   1145    }
   1146    else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
   1147             dest_image->bits.format == PIXMAN_b5g6r5)
   1148    {
   1149 src = convert_8888_to_0565 (src);
   1150    }
   1151 
   1152    pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
   1153                 PIXMAN_FORMAT_BPP (dest_image->bits.format),
   1154                 dest_x, dest_y,
   1155                 width, height,
   1156                 src);
   1157 }
   1158 
   1159 static void
   1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
   1161 		   pixman_composite_info_t *info)
   1162 {
   1163    PIXMAN_COMPOSITE_ARGS (info);
   1164    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
   1165    uint32_t n_bytes = width * bpp;
   1166    int dst_stride, src_stride;
   1167    uint8_t    *dst;
   1168    uint8_t    *src;
   1169 
   1170    src_stride = src_image->bits.rowstride * 4;
   1171    dst_stride = dest_image->bits.rowstride * 4;
   1172 
   1173    src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
   1174    dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
   1175 
   1176    while (height--)
   1177    {
   1178 memcpy (dst, src, n_bytes);
   1179 
   1180 dst += dst_stride;
   1181 src += src_stride;
   1182    }
   1183 }
   1184 
   1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
   1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
   1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
   1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
   1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
   1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
   1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
   1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
   1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
   1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
   1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
   1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
   1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
   1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
   1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
   1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
   1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
   1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
   1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
   1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
   1205 
   1206 #define REPEAT_MIN_WIDTH    32
   1207 
   1208 static void
   1209 fast_composite_tiled_repeat (pixman_implementation_t *imp,
   1210 		     pixman_composite_info_t *info)
   1211 {
   1212    PIXMAN_COMPOSITE_ARGS (info);
   1213    pixman_composite_func_t func;
   1214    pixman_format_code_t mask_format;
   1215    uint32_t src_flags, mask_flags;
   1216    int32_t sx, sy;
   1217    int32_t width_remain;
   1218    int32_t num_pixels;
   1219    int32_t src_width;
   1220    int32_t i, j;
   1221    pixman_image_t extended_src_image;
   1222    uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
   1223    pixman_bool_t need_src_extension;
   1224    uint32_t *src_line;
   1225    int32_t src_stride;
   1226    int32_t src_bpp;
   1227    pixman_composite_info_t info2 = *info;
   1228 
   1229    src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
   1230 	    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
   1231 
   1232    if (mask_image)
   1233    {
   1234 mask_format = mask_image->common.extended_format_code;
   1235 mask_flags = info->mask_flags;
   1236    }
   1237    else
   1238    {
   1239 mask_format = PIXMAN_null;
   1240 mask_flags = FAST_PATH_IS_OPAQUE;
   1241    }
   1242 
   1243    _pixman_implementation_lookup_composite (
   1244 imp->toplevel, info->op,
   1245 src_image->common.extended_format_code, src_flags,
   1246 mask_format, mask_flags,
   1247 dest_image->common.extended_format_code, info->dest_flags,
   1248 &imp, &func);
   1249 
   1250    src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
   1251 
   1252    if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
   1253 (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
   1254 !src_image->bits.indexed)
   1255    {
   1256 sx = src_x;
   1257 sx = MOD (sx, src_image->bits.width);
   1258 sx += width;
   1259 src_width = 0;
   1260 
   1261 while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
   1262     src_width += src_image->bits.width;
   1263 
   1264 src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
   1265 
   1266 /* Initialize/validate stack-allocated temporary image */
   1267 _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
   1268 			 src_width, 1, &extended_src[0], src_stride,
   1269 			 FALSE);
   1270 _pixman_image_validate (&extended_src_image);
   1271 
   1272 info2.src_image = &extended_src_image;
   1273 need_src_extension = TRUE;
   1274    }
   1275    else
   1276    {
   1277 src_width = src_image->bits.width;
   1278 need_src_extension = FALSE;
   1279    }
   1280 
   1281    sx = src_x;
   1282    sy = src_y;
   1283 
   1284    while (--height >= 0)
   1285    {
   1286 sx = MOD (sx, src_width);
   1287 sy = MOD (sy, src_image->bits.height);
   1288 
   1289 if (need_src_extension)
   1290 {
   1291     if (src_bpp == 32)
   1292     {
   1293 	PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
   1294 
   1295 	for (i = 0; i < src_width; )
   1296 	{
   1297 	    for (j = 0; j < src_image->bits.width; j++, i++)
   1298 		extended_src[i] = src_line[j];
   1299 	}
   1300     }
   1301     else if (src_bpp == 16)
   1302     {
   1303 	uint16_t *src_line_16;
   1304 
   1305 	PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
   1306 			       src_line_16, 1);
   1307 	src_line = (uint32_t*)src_line_16;
   1308 
   1309 	for (i = 0; i < src_width; )
   1310 	{
   1311 	    for (j = 0; j < src_image->bits.width; j++, i++)
   1312 		((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
   1313 	}
   1314     }
   1315     else if (src_bpp == 8)
   1316     {
   1317 	uint8_t *src_line_8;
   1318 
   1319 	PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
   1320 			       src_line_8, 1);
   1321 	src_line = (uint32_t*)src_line_8;
   1322 
   1323 	for (i = 0; i < src_width; )
   1324 	{
   1325 	    for (j = 0; j < src_image->bits.width; j++, i++)
   1326 		((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
   1327 	}
   1328     }
   1329 
   1330     info2.src_y = 0;
   1331 }
   1332 else
   1333 {
   1334     info2.src_y = sy;
   1335 }
   1336 
   1337 width_remain = width;
   1338 
   1339 while (width_remain > 0)
   1340 {
   1341     num_pixels = src_width - sx;
   1342 
   1343     if (num_pixels > width_remain)
   1344 	num_pixels = width_remain;
   1345 
   1346     info2.src_x = sx;
   1347     info2.width = num_pixels;
   1348     info2.height = 1;
   1349 
   1350     func (imp, &info2);
   1351 
   1352     width_remain -= num_pixels;
   1353     info2.mask_x += num_pixels;
   1354     info2.dest_x += num_pixels;
   1355     sx = 0;
   1356 }
   1357 
   1358 sx = src_x;
   1359 sy++;
   1360 info2.mask_x = info->mask_x;
   1361 info2.mask_y++;
   1362 info2.dest_x = info->dest_x;
   1363 info2.dest_y++;
   1364    }
   1365 
   1366    if (need_src_extension)
   1367 _pixman_image_fini (&extended_src_image);
   1368 }
   1369 
   1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
   1371 static force_inline void
   1372 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
   1373 			     const uint16_t * src,
   1374 			     int32_t          w,
   1375 			     pixman_fixed_t   vx,
   1376 			     pixman_fixed_t   unit_x,
   1377 			     pixman_fixed_t   max_vx,
   1378 			     pixman_bool_t    fully_transparent_src)
   1379 {
   1380    uint16_t tmp1, tmp2, tmp3, tmp4;
   1381    while ((w -= 4) >= 0)
   1382    {
   1383 tmp1 = *(src + pixman_fixed_to_int (vx));
   1384 vx += unit_x;
   1385 tmp2 = *(src + pixman_fixed_to_int (vx));
   1386 vx += unit_x;
   1387 tmp3 = *(src + pixman_fixed_to_int (vx));
   1388 vx += unit_x;
   1389 tmp4 = *(src + pixman_fixed_to_int (vx));
   1390 vx += unit_x;
   1391 *dst++ = tmp1;
   1392 *dst++ = tmp2;
   1393 *dst++ = tmp3;
   1394 *dst++ = tmp4;
   1395    }
   1396    if (w & 2)
   1397    {
   1398 tmp1 = *(src + pixman_fixed_to_int (vx));
   1399 vx += unit_x;
   1400 tmp2 = *(src + pixman_fixed_to_int (vx));
   1401 vx += unit_x;
   1402 *dst++ = tmp1;
   1403 *dst++ = tmp2;
   1404    }
   1405    if (w & 1)
   1406 *dst = *(src + pixman_fixed_to_int (vx));
   1407 }
   1408 
   1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
   1410 	       scaled_nearest_scanline_565_565_SRC,
   1411 	       uint16_t, uint16_t, COVER)
   1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
   1413 	       scaled_nearest_scanline_565_565_SRC,
   1414 	       uint16_t, uint16_t, NONE)
   1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
   1416 	       scaled_nearest_scanline_565_565_SRC,
   1417 	       uint16_t, uint16_t, PAD)
   1418 
   1419 static force_inline uint32_t
   1420 fetch_nearest (pixman_repeat_t src_repeat,
   1421        pixman_format_code_t format,
   1422        uint32_t *src, int x, int src_width)
   1423 {
   1424    if (repeat (src_repeat, &x, src_width))
   1425    {
   1426 if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
   1427     return *(src + x) | 0xff000000;
   1428 else
   1429     return *(src + x);
   1430    }
   1431    else
   1432    {
   1433 return 0;
   1434    }
   1435 }
   1436 
   1437 static force_inline void
   1438 combine_over (uint32_t s, uint32_t *dst)
   1439 {
   1440    if (s)
   1441    {
   1442 uint8_t ia = 0xff - (s >> 24);
   1443 
   1444 if (ia)
   1445     UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
   1446 else
   1447     *dst = s;
   1448    }
   1449 }
   1450 
   1451 static force_inline void
   1452 combine_src (uint32_t s, uint32_t *dst)
   1453 {
   1454    *dst = s;
   1455 }
   1456 
   1457 static void
   1458 fast_composite_scaled_nearest (pixman_implementation_t *imp,
   1459 		       pixman_composite_info_t *info)
   1460 {
   1461    PIXMAN_COMPOSITE_ARGS (info);
   1462    uint32_t       *dst_line;
   1463    uint32_t       *src_line;
   1464    int             dst_stride, src_stride;
   1465    int		    src_width, src_height;
   1466    pixman_repeat_t src_repeat;
   1467    pixman_fixed_t unit_x, unit_y;
   1468    pixman_format_code_t src_format;
   1469    pixman_vector_t v;
   1470    pixman_fixed_t vy;
   1471 
   1472    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1473    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
   1474     * transformed from destination space to source space
   1475     */
   1476    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
   1477 
   1478    /* reference point is the center of the pixel */
   1479    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
   1480    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
   1481    v.vector[2] = pixman_fixed_1;
   1482 
   1483    if (!pixman_transform_point_3d (src_image->common.transform, &v))
   1484 return;
   1485 
   1486    unit_x = src_image->common.transform->matrix[0][0];
   1487    unit_y = src_image->common.transform->matrix[1][1];
   1488 
   1489    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
   1490    v.vector[0] -= pixman_fixed_e;
   1491    v.vector[1] -= pixman_fixed_e;
   1492 
   1493    src_height = src_image->bits.height;
   1494    src_width = src_image->bits.width;
   1495    src_repeat = src_image->common.repeat;
   1496    src_format = src_image->bits.format;
   1497 
   1498    vy = v.vector[1];
   1499    while (height--)
   1500    {
   1501        pixman_fixed_t vx = v.vector[0];
   1502 int y = pixman_fixed_to_int (vy);
   1503 uint32_t *dst = dst_line;
   1504 
   1505 dst_line += dst_stride;
   1506 
   1507        /* adjust the y location by a unit vector in the y direction
   1508         * this is equivalent to transforming y+1 of the destination point to source space */
   1509        vy += unit_y;
   1510 
   1511 if (!repeat (src_repeat, &y, src_height))
   1512 {
   1513     if (op == PIXMAN_OP_SRC)
   1514 	memset (dst, 0, sizeof (*dst) * width);
   1515 }
   1516 else
   1517 {
   1518     int w = width;
   1519 
   1520     uint32_t *src = src_line + y * src_stride;
   1521 
   1522     while (w >= 2)
   1523     {
   1524 	uint32_t s1, s2;
   1525 	int x1, x2;
   1526 
   1527 	x1 = pixman_fixed_to_int (vx);
   1528 	vx += unit_x;
   1529 
   1530 	x2 = pixman_fixed_to_int (vx);
   1531 	vx += unit_x;
   1532 
   1533 	w -= 2;
   1534 
   1535 	s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
   1536 	s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
   1537 
   1538 	if (op == PIXMAN_OP_OVER)
   1539 	{
   1540 	    combine_over (s1, dst++);
   1541 	    combine_over (s2, dst++);
   1542 	}
   1543 	else
   1544 	{
   1545 	    combine_src (s1, dst++);
   1546 	    combine_src (s2, dst++);
   1547 	}
   1548     }
   1549 
   1550     while (w--)
   1551     {
   1552 	uint32_t s;
   1553 	int x;
   1554 
   1555 	x = pixman_fixed_to_int (vx);
   1556 	vx += unit_x;
   1557 
   1558 	s = fetch_nearest (src_repeat, src_format, src, x, src_width);
   1559 
   1560 	if (op == PIXMAN_OP_OVER)
   1561 	    combine_over (s, dst++);
   1562 	else
   1563 	    combine_src (s, dst++);
   1564     }
   1565 }
   1566    }
   1567 }
   1568 
   1569 #define CACHE_LINE_SIZE 64
   1570 
   1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
   1572                                                                              \
   1573 static void                                                                   \
   1574 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
   1575 			 int             dst_stride,                  \
   1576 			 const pix_type *src,                         \
   1577 			 int             src_stride,                  \
   1578 			 int             w,                           \
   1579 			 int             h)                           \
   1580 {                                                                             \
   1581    int x, y;                                                                 \
   1582    for (y = 0; y < h; y++)                                                   \
   1583    {                                                                         \
   1584 const pix_type *s = src + (h - y - 1);                                \
   1585 pix_type *d = dst + dst_stride * y;                                   \
   1586 for (x = 0; x < w; x++)                                               \
   1587 {                                                                     \
   1588     *d++ = *s;                                                        \
   1589     s += src_stride;                                                  \
   1590 }                                                                     \
   1591    }                                                                         \
   1592 }                                                                             \
   1593                                                                              \
   1594 static void                                                                   \
   1595 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
   1596 			  int             dst_stride,                 \
   1597 			  const pix_type *src,                        \
   1598 			  int             src_stride,                 \
   1599 			  int             w,                          \
   1600 			  int             h)                          \
   1601 {                                                                             \
   1602    int x, y;                                                                 \
   1603    for (y = 0; y < h; y++)                                                   \
   1604    {                                                                         \
   1605 const pix_type *s = src + src_stride * (w - 1) + y;                   \
   1606 pix_type *d = dst + dst_stride * y;                                   \
   1607 for (x = 0; x < w; x++)                                               \
   1608 {                                                                     \
   1609     *d++ = *s;                                                        \
   1610     s -= src_stride;                                                  \
   1611 }                                                                     \
   1612    }                                                                         \
   1613 }                                                                             \
   1614                                                                              \
   1615 static void                                                                   \
   1616 blt_rotated_90_##suffix (pix_type       *dst,                                 \
   1617 		 int             dst_stride,                          \
   1618 		 const pix_type *src,                                 \
   1619 		 int             src_stride,                          \
   1620 		 int             W,                                   \
   1621 		 int             H)                                   \
   1622 {                                                                             \
   1623    int x;                                                                    \
   1624    int leading_pixels = 0, trailing_pixels = 0;                              \
   1625    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
   1626                                                                              \
   1627    /*                                                                        \
   1628     * split processing into handling destination as TILE_SIZExH cache line   \
   1629     * aligned vertical stripes (optimistically assuming that destination     \
   1630     * stride is a multiple of cache line, if not - it will be just a bit     \
   1631     * slower)                                                                \
   1632     */                                                                       \
   1633                                                                              \
   1634    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
   1635    {                                                                         \
   1636 leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
   1637 		    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1638 if (leading_pixels > W)                                               \
   1639     leading_pixels = W;                                               \
   1640                                                                              \
   1641 /* unaligned leading part NxH (where N < TILE_SIZE) */                \
   1642 blt_rotated_90_trivial_##suffix (                                     \
   1643     dst,                                                              \
   1644     dst_stride,                                                       \
   1645     src,                                                              \
   1646     src_stride,                                                       \
   1647     leading_pixels,                                                   \
   1648     H);                                                               \
   1649                                                                       \
   1650 dst += leading_pixels;                                                \
   1651 src += leading_pixels * src_stride;                                   \
   1652 W -= leading_pixels;                                                  \
   1653    }                                                                         \
   1654                                                                              \
   1655    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
   1656    {                                                                         \
   1657 trailing_pixels = (((uintptr_t)(dst + W) &                            \
   1658 		    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1659 if (trailing_pixels > W)                                              \
   1660     trailing_pixels = W;                                              \
   1661 W -= trailing_pixels;                                                 \
   1662    }                                                                         \
   1663                                                                              \
   1664    for (x = 0; x < W; x += TILE_SIZE)                                        \
   1665    {                                                                         \
   1666 /* aligned middle part TILE_SIZExH */                                 \
   1667 blt_rotated_90_trivial_##suffix (                                     \
   1668     dst + x,                                                          \
   1669     dst_stride,                                                       \
   1670     src + src_stride * x,                                             \
   1671     src_stride,                                                       \
   1672     TILE_SIZE,                                                        \
   1673     H);                                                               \
   1674    }                                                                         \
   1675                                                                              \
   1676    if (trailing_pixels)                                                      \
   1677    {                                                                         \
   1678 /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
   1679 blt_rotated_90_trivial_##suffix (                                     \
   1680     dst + W,                                                          \
   1681     dst_stride,                                                       \
   1682     src + W * src_stride,                                             \
   1683     src_stride,                                                       \
   1684     trailing_pixels,                                                  \
   1685     H);                                                               \
   1686    }                                                                         \
   1687 }                                                                             \
   1688                                                                              \
   1689 static void                                                                   \
   1690 blt_rotated_270_##suffix (pix_type       *dst,                                \
   1691 		  int             dst_stride,                         \
   1692 		  const pix_type *src,                                \
   1693 		  int             src_stride,                         \
   1694 		  int             W,                                  \
   1695 		  int             H)                                  \
   1696 {                                                                             \
   1697    int x;                                                                    \
   1698    int leading_pixels = 0, trailing_pixels = 0;                              \
   1699    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
   1700                                                                              \
   1701    /*                                                                        \
   1702     * split processing into handling destination as TILE_SIZExH cache line   \
   1703     * aligned vertical stripes (optimistically assuming that destination     \
   1704     * stride is a multiple of cache line, if not - it will be just a bit     \
   1705     * slower)                                                                \
   1706     */                                                                       \
   1707                                                                              \
   1708    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
   1709    {                                                                         \
   1710 leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
   1711 		    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1712 if (leading_pixels > W)                                               \
   1713     leading_pixels = W;                                               \
   1714                                                                              \
   1715 /* unaligned leading part NxH (where N < TILE_SIZE) */                \
   1716 blt_rotated_270_trivial_##suffix (                                    \
   1717     dst,                                                              \
   1718     dst_stride,                                                       \
   1719     src + src_stride * (W - leading_pixels),                          \
   1720     src_stride,                                                       \
   1721     leading_pixels,                                                   \
   1722     H);                                                               \
   1723                                                                       \
   1724 dst += leading_pixels;                                                \
   1725 W -= leading_pixels;                                                  \
   1726    }                                                                         \
   1727                                                                              \
   1728    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
   1729    {                                                                         \
   1730 trailing_pixels = (((uintptr_t)(dst + W) &                            \
   1731 		    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1732 if (trailing_pixels > W)                                              \
   1733     trailing_pixels = W;                                              \
   1734 W -= trailing_pixels;                                                 \
   1735 src += trailing_pixels * src_stride;                                  \
   1736    }                                                                         \
   1737                                                                              \
   1738    for (x = 0; x < W; x += TILE_SIZE)                                        \
   1739    {                                                                         \
   1740 /* aligned middle part TILE_SIZExH */                                 \
   1741 blt_rotated_270_trivial_##suffix (                                    \
   1742     dst + x,                                                          \
   1743     dst_stride,                                                       \
   1744     src + src_stride * (W - x - TILE_SIZE),                           \
   1745     src_stride,                                                       \
   1746     TILE_SIZE,                                                        \
   1747     H);                                                               \
   1748    }                                                                         \
   1749                                                                              \
   1750    if (trailing_pixels)                                                      \
   1751    {                                                                         \
   1752 /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
   1753 blt_rotated_270_trivial_##suffix (                                    \
   1754     dst + W,                                                          \
   1755     dst_stride,                                                       \
   1756     src - trailing_pixels * src_stride,                               \
   1757     src_stride,                                                       \
   1758     trailing_pixels,                                                  \
   1759     H);                                                               \
   1760    }                                                                         \
   1761 }                                                                             \
   1762                                                                              \
   1763 static void                                                                   \
   1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
   1765 			   pixman_composite_info_t *info)	      \
   1766 {									      \
   1767    PIXMAN_COMPOSITE_ARGS (info);					      \
   1768    pix_type       *dst_line;						      \
   1769    pix_type       *src_line;                                                 \
   1770    int             dst_stride, src_stride;                                   \
   1771    int             src_x_t, src_y_t;                                         \
   1772                                                                              \
   1773    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
   1774 		   dst_stride, dst_line, 1);                          \
   1775    src_x_t = -src_y + pixman_fixed_to_int (                                  \
   1776 			src_image->common.transform->matrix[0][2] +   \
   1777 			pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
   1778    src_y_t = src_x + pixman_fixed_to_int (                                   \
   1779 			src_image->common.transform->matrix[1][2] +   \
   1780 			pixman_fixed_1 / 2 - pixman_fixed_e);         \
   1781    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
   1782 		   src_stride, src_line, 1);                          \
   1783    blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
   1784 		     width, height);                                  \
   1785 }                                                                             \
   1786                                                                              \
   1787 static void                                                                   \
   1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
   1789 			    pixman_composite_info_t *info)            \
   1790 {                                                                             \
   1791    PIXMAN_COMPOSITE_ARGS (info);					      \
   1792    pix_type       *dst_line;						      \
   1793    pix_type       *src_line;                                                 \
   1794    int             dst_stride, src_stride;                                   \
   1795    int             src_x_t, src_y_t;                                         \
   1796                                                                              \
   1797    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
   1798 		   dst_stride, dst_line, 1);                          \
   1799    src_x_t = src_y + pixman_fixed_to_int (                                   \
   1800 			src_image->common.transform->matrix[0][2] +   \
   1801 			pixman_fixed_1 / 2 - pixman_fixed_e);         \
   1802    src_y_t = -src_x + pixman_fixed_to_int (                                  \
   1803 			src_image->common.transform->matrix[1][2] +   \
   1804 			pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
   1805    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
   1806 		   src_stride, src_line, 1);                          \
   1807    blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
   1808 		      width, height);                                 \
   1809 }
   1810 
   1811 FAST_SIMPLE_ROTATE (8, uint8_t)
   1812 FAST_SIMPLE_ROTATE (565, uint16_t)
   1813 FAST_SIMPLE_ROTATE (8888, uint32_t)
   1814 
   1815 static const pixman_fast_path_t c_fast_paths[] =
   1816 {
   1817    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
   1818    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
   1819    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
   1820    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
   1821    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
   1822    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
   1823    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
   1824    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
   1825    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
   1826    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
   1827    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
   1828    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
   1829    PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
   1830    PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
   1831    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
   1832    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
   1833    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
   1834    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
   1835    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
   1836    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
   1837    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
   1838    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
   1839    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
   1840    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
   1841    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
   1842    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
   1843    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
   1844    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
   1845    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
   1846    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
   1847    PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
   1848    PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
   1849    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
   1850    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
   1851    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
   1852    PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
   1853    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
   1854    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
   1855    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
   1856    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
   1857    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
   1858    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
   1859    PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
   1860    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
   1861    PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
   1862    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
   1863    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
   1864    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
   1865    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
   1866    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
   1867    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
   1868    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
   1869    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
   1870    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
   1871    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
   1872    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
   1873    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
   1874    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
   1875    PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
   1876    PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
   1877    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
   1878    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
   1879    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
   1880    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
   1881    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
   1882 
   1883    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
   1884    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
   1885    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
   1886    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
   1887 
   1888    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
   1889    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
   1890 
   1891    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
   1892    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
   1893 
   1894    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
   1895 
   1896    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1897    SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1898    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1899    SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1900    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1901    SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1902 
   1903    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
   1904    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
   1905    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
   1906    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
   1907 
   1908    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
   1909 
   1910 #define NEAREST_FAST_PATH(op,s,d)		\
   1911    {   PIXMAN_OP_ ## op,			\
   1912 PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
   1913 PIXMAN_null, 0,				\
   1914 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
   1915 fast_composite_scaled_nearest,		\
   1916    }
   1917 
   1918    NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
   1919    NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
   1920    NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
   1921    NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
   1922 
   1923    NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
   1924    NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
   1925    NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
   1926    NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
   1927 
   1928    NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
   1929    NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
   1930    NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
   1931    NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
   1932 
   1933    NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
   1934    NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
   1935    NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
   1936    NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
   1937 
   1938 #define SIMPLE_ROTATE_FLAGS(angle)					  \
   1939    (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
   1940     FAST_PATH_NEAREST_FILTER			|			  \
   1941     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
   1942     FAST_PATH_STANDARD_FLAGS)
   1943 
   1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
   1945    {   PIXMAN_OP_ ## op,						  \
   1946 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
   1947 PIXMAN_null, 0,							  \
   1948 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
   1949 fast_composite_rotate_90_##suffix,				  \
   1950    },									  \
   1951    {   PIXMAN_OP_ ## op,						  \
   1952 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
   1953 PIXMAN_null, 0,							  \
   1954 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
   1955 fast_composite_rotate_270_##suffix,				  \
   1956    }
   1957 
   1958    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
   1959    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
   1960    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
   1961    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
   1962    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
   1963 
   1964    /* Simple repeat fast path entry. */
   1965    {	PIXMAN_OP_any,
   1966 PIXMAN_any,
   1967 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
   1968  FAST_PATH_NORMAL_REPEAT),
   1969 PIXMAN_any, 0,
   1970 PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
   1971 fast_composite_tiled_repeat
   1972    },
   1973 
   1974    {   PIXMAN_OP_NONE	},
   1975 };
   1976 
   1977 #ifdef WORDS_BIGENDIAN
   1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
   1979 #else
   1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
   1981 #endif
   1982 
   1983 static force_inline void
   1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
   1985 {
   1986    if (offs)
   1987    {
   1988 int leading_pixels = 32 - offs;
   1989 if (leading_pixels >= width)
   1990 {
   1991     if (v)
   1992 	*dst |= A1_FILL_MASK (width, offs);
   1993     else
   1994 	*dst &= ~A1_FILL_MASK (width, offs);
   1995     return;
   1996 }
   1997 else
   1998 {
   1999     if (v)
   2000 	*dst++ |= A1_FILL_MASK (leading_pixels, offs);
   2001     else
   2002 	*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
   2003     width -= leading_pixels;
   2004 }
   2005    }
   2006    while (width >= 32)
   2007    {
   2008 if (v)
   2009     *dst++ = 0xFFFFFFFF;
   2010 else
   2011     *dst++ = 0;
   2012 width -= 32;
   2013    }
   2014    if (width > 0)
   2015    {
   2016 if (v)
   2017     *dst |= A1_FILL_MASK (width, 0);
   2018 else
   2019     *dst &= ~A1_FILL_MASK (width, 0);
   2020    }
   2021 }
   2022 
   2023 static void
   2024 pixman_fill1 (uint32_t *bits,
   2025              int       stride,
   2026              int       x,
   2027              int       y,
   2028              int       width,
   2029              int       height,
   2030              uint32_t  filler)
   2031 {
   2032    uint32_t *dst = bits + y * stride + (x >> 5);
   2033    int offs = x & 31;
   2034 
   2035    if (filler & 1)
   2036    {
   2037 while (height--)
   2038 {
   2039     pixman_fill1_line (dst, offs, width, 1);
   2040     dst += stride;
   2041 }
   2042    }
   2043    else
   2044    {
   2045 while (height--)
   2046 {
   2047     pixman_fill1_line (dst, offs, width, 0);
   2048     dst += stride;
   2049 }
   2050    }
   2051 }
   2052 
   2053 static void
   2054 pixman_fill8 (uint32_t *bits,
   2055              int       stride,
   2056              int       x,
   2057              int       y,
   2058              int       width,
   2059              int       height,
   2060              uint32_t  filler)
   2061 {
   2062    int byte_stride = stride * (int) sizeof (uint32_t);
   2063    uint8_t *dst = (uint8_t *) bits;
   2064    uint8_t v = filler & 0xff;
   2065    int i;
   2066 
   2067    dst = dst + y * byte_stride + x;
   2068 
   2069    while (height--)
   2070    {
   2071 for (i = 0; i < width; ++i)
   2072     dst[i] = v;
   2073 
   2074 dst += byte_stride;
   2075    }
   2076 }
   2077 
   2078 static void
   2079 pixman_fill16 (uint32_t *bits,
   2080               int       stride,
   2081               int       x,
   2082               int       y,
   2083               int       width,
   2084               int       height,
   2085               uint32_t  filler)
   2086 {
   2087    int short_stride =
   2088 (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
   2089    uint16_t *dst = (uint16_t *)bits;
   2090    uint16_t v = filler & 0xffff;
   2091    int i;
   2092 
   2093    dst = dst + y * short_stride + x;
   2094 
   2095    while (height--)
   2096    {
   2097 for (i = 0; i < width; ++i)
   2098     dst[i] = v;
   2099 
   2100 dst += short_stride;
   2101    }
   2102 }
   2103 
   2104 static void
   2105 pixman_fill32 (uint32_t *bits,
   2106               int       stride,
   2107               int       x,
   2108               int       y,
   2109               int       width,
   2110               int       height,
   2111               uint32_t  filler)
   2112 {
   2113    int i;
   2114 
   2115    bits = bits + y * stride + x;
   2116 
   2117    while (height--)
   2118    {
   2119 for (i = 0; i < width; ++i)
   2120     bits[i] = filler;
   2121 
   2122 bits += stride;
   2123    }
   2124 }
   2125 
   2126 static pixman_bool_t
   2127 fast_path_fill (pixman_implementation_t *imp,
   2128                uint32_t *               bits,
   2129                int                      stride,
   2130                int                      bpp,
   2131                int                      x,
   2132                int                      y,
   2133                int                      width,
   2134                int                      height,
   2135                uint32_t		 filler)
   2136 {
   2137    switch (bpp)
   2138    {
   2139    case 1:
   2140 pixman_fill1 (bits, stride, x, y, width, height, filler);
   2141 break;
   2142 
   2143    case 8:
   2144 pixman_fill8 (bits, stride, x, y, width, height, filler);
   2145 break;
   2146 
   2147    case 16:
   2148 pixman_fill16 (bits, stride, x, y, width, height, filler);
   2149 break;
   2150 
   2151    case 32:
   2152 pixman_fill32 (bits, stride, x, y, width, height, filler);
   2153 break;
   2154 
   2155    default:
   2156 return FALSE;
   2157    }
   2158 
   2159    return TRUE;
   2160 }
   2161 
   2162 /*****************************************************************************/
   2163 
   2164 static uint32_t *
   2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
   2166 {
   2167    int32_t w = iter->width;
   2168    uint32_t *dst = iter->buffer;
   2169    const uint16_t *src = (const uint16_t *)iter->bits;
   2170 
   2171    iter->bits += iter->stride;
   2172 
   2173    /* Align the source buffer at 4 bytes boundary */
   2174    if (w > 0 && ((uintptr_t)src & 3))
   2175    {
   2176 *dst++ = convert_0565_to_8888 (*src++);
   2177 w--;
   2178    }
   2179    /* Process two pixels per iteration */
   2180    while ((w -= 2) >= 0)
   2181    {
   2182 uint32_t sr, sb, sg, t0, t1;
   2183 uint32_t s = *(const uint32_t *)src;
   2184 src += 2;
   2185 sr = (s >> 8) & 0x00F800F8;
   2186 sb = (s << 3) & 0x00F800F8;
   2187 sg = (s >> 3) & 0x00FC00FC;
   2188 sr |= sr >> 5;
   2189 sb |= sb >> 5;
   2190 sg |= sg >> 6;
   2191 t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
   2192      (sb & 0xFF) | 0xFF000000;
   2193 t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
   2194      (sb >> 16) | 0xFF000000;
   2195 #ifdef WORDS_BIGENDIAN
   2196 *dst++ = t1;
   2197 *dst++ = t0;
   2198 #else
   2199 *dst++ = t0;
   2200 *dst++ = t1;
   2201 #endif
   2202    }
   2203    if (w & 1)
   2204    {
   2205 *dst = convert_0565_to_8888 (*src);
   2206    }
   2207 
   2208    return iter->buffer;
   2209 }
   2210 
   2211 static uint32_t *
   2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
   2213 {
   2214    iter->bits += iter->stride;
   2215    return iter->buffer;
   2216 }
   2217 
   2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F
   2219 * constant is always allocated in a register on RISC architectures.
   2220 */
   2221 static force_inline uint32_t
   2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
   2223 {
   2224    uint32_t a, b;
   2225    a = (s >> 3) & x1F001F;
   2226    b = s & 0xFC00;
   2227    a |= a >> 5;
   2228    a |= b >> 5;
   2229    return a;
   2230 }
   2231 
   2232 static void
   2233 fast_write_back_r5g6b5 (pixman_iter_t *iter)
   2234 {
   2235    int32_t w = iter->width;
   2236    uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
   2237    const uint32_t *src = iter->buffer;
   2238    /* Workaround to ensure that x1F001F variable is allocated in a register */
   2239    static volatile uint32_t volatile_x1F001F = 0x1F001F;
   2240    uint32_t x1F001F = volatile_x1F001F;
   2241 
   2242    while ((w -= 4) >= 0)
   2243    {
   2244 uint32_t s1 = *src++;
   2245 uint32_t s2 = *src++;
   2246 uint32_t s3 = *src++;
   2247 uint32_t s4 = *src++;
   2248 *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
   2249 *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
   2250 *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
   2251 *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
   2252    }
   2253    if (w & 2)
   2254    {
   2255 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
   2256 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
   2257    }
   2258    if (w & 1)
   2259    {
   2260 *dst = convert_8888_to_0565_workaround (*src, x1F001F);
   2261    }
   2262 }
   2263 
   2264 typedef struct
   2265 {
   2266    int		y;
   2267    uint64_t *	buffer;
   2268 } line_t;
   2269 
   2270 typedef struct
   2271 {
   2272    line_t		lines[2];
   2273    pixman_fixed_t	y;
   2274    pixman_fixed_t	x;
   2275    uint64_t		data[1];
   2276 } bilinear_info_t;
   2277 
   2278 static void
   2279 fetch_horizontal (bits_image_t *image, line_t *line,
   2280 	  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
   2281 {
   2282    uint32_t *bits = image->bits + y * image->rowstride;
   2283    int i;
   2284 
   2285    for (i = 0; i < n; ++i)
   2286    {
   2287 int x0 = pixman_fixed_to_int (x);
   2288 int x1 = x0 + 1;
   2289 int32_t dist_x;
   2290 
   2291 uint32_t left = *(bits + x0);
   2292 uint32_t right = *(bits + x1);
   2293 
   2294 dist_x = pixman_fixed_to_bilinear_weight (x);
   2295 dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
   2296 
   2297 #if SIZEOF_LONG <= 4
   2298 {
   2299     uint32_t lag, rag, ag;
   2300     uint32_t lrb, rrb, rb;
   2301 
   2302     lag = (left & 0xff00ff00) >> 8;
   2303     rag = (right & 0xff00ff00) >> 8;
   2304     ag = (lag << 8) + dist_x * (rag - lag);
   2305 
   2306     lrb = (left & 0x00ff00ff);
   2307     rrb = (right & 0x00ff00ff);
   2308     rb = (lrb << 8) + dist_x * (rrb - lrb);
   2309 
   2310     *((uint32_t *)(line->buffer + i)) = ag;
   2311     *((uint32_t *)(line->buffer + i) + 1) = rb;
   2312 }
   2313 #else
   2314 {
   2315     uint64_t lagrb, ragrb;
   2316     uint32_t lag, rag;
   2317     uint32_t lrb, rrb;
   2318 
   2319     lag = (left & 0xff00ff00);
   2320     lrb = (left & 0x00ff00ff);
   2321     rag = (right & 0xff00ff00);
   2322     rrb = (right & 0x00ff00ff);
   2323     lagrb = (((uint64_t)lag) << 24) | lrb;
   2324     ragrb = (((uint64_t)rag) << 24) | rrb;
   2325 
   2326     line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
   2327 }
   2328 #endif
   2329 
   2330 x += ux;
   2331    }
   2332 
   2333    line->y = y;
   2334 }
   2335 
   2336 static uint32_t *
   2337 fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
   2338 {
   2339    pixman_fixed_t fx, ux;
   2340    bilinear_info_t *info = iter->data;
   2341    line_t *line0, *line1;
   2342    int y0, y1;
   2343    int32_t dist_y;
   2344    int i;
   2345 
   2346    COMPILE_TIME_ASSERT (BILINEAR_INTERPOLATION_BITS < 8);
   2347 
   2348    fx = info->x;
   2349    ux = iter->image->common.transform->matrix[0][0];
   2350 
   2351    y0 = pixman_fixed_to_int (info->y);
   2352    y1 = y0 + 1;
   2353    dist_y = pixman_fixed_to_bilinear_weight (info->y);
   2354    dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
   2355 
   2356    line0 = &info->lines[y0 & 0x01];
   2357    line1 = &info->lines[y1 & 0x01];
   2358 
   2359    if (line0->y != y0)
   2360    {
   2361 fetch_horizontal (
   2362     &iter->image->bits, line0, y0, fx, ux, iter->width);
   2363    }
   2364 
   2365    if (line1->y != y1)
   2366    {
   2367 fetch_horizontal (
   2368     &iter->image->bits, line1, y1, fx, ux, iter->width);
   2369    }
   2370 
   2371    for (i = 0; i < iter->width; ++i)
   2372    {
   2373 #if SIZEOF_LONG <= 4
   2374 uint32_t ta, tr, tg, tb;
   2375 uint32_t ba, br, bg, bb;
   2376 uint32_t tag, trb;
   2377 uint32_t bag, brb;
   2378 uint32_t a, r, g, b;
   2379 
   2380 tag = *((uint32_t *)(line0->buffer + i));
   2381 trb = *((uint32_t *)(line0->buffer + i) + 1);
   2382 bag = *((uint32_t *)(line1->buffer + i));
   2383 brb = *((uint32_t *)(line1->buffer + i) + 1);
   2384 
   2385 ta = tag >> 16;
   2386 ba = bag >> 16;
   2387 a = (ta << 8) + dist_y * (ba - ta);
   2388 
   2389 tr = trb >> 16;
   2390 br = brb >> 16;
   2391 r = (tr << 8) + dist_y * (br - tr);
   2392 
   2393 tg = tag & 0xffff;
   2394 bg = bag & 0xffff;
   2395 g = (tg << 8) + dist_y * (bg - tg);
   2396 
   2397 tb = trb & 0xffff;
   2398 bb = brb & 0xffff;
   2399 b = (tb << 8) + dist_y * (bb - tb);
   2400 
   2401 a = (a <<  8) & 0xff000000;
   2402 r = (r <<  0) & 0x00ff0000;
   2403 g = (g >>  8) & 0x0000ff00;
   2404 b = (b >> 16) & 0x000000ff;
   2405 #else
   2406 uint64_t top = line0->buffer[i];
   2407 uint64_t bot = line1->buffer[i];
   2408 uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
   2409 uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
   2410 uint64_t tgb = (top & 0x0000ffff0000ffffULL);
   2411 uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
   2412 uint64_t ar, gb;
   2413 uint32_t a, r, g, b;
   2414 
   2415 ar = (tar << 8) + dist_y * (bar - tar);
   2416 gb = (tgb << 8) + dist_y * (bgb - tgb);
   2417 
   2418 a = ((ar >> 24) & 0xff000000);
   2419 r = ((ar >>  0) & 0x00ff0000);
   2420 g = ((gb >> 40) & 0x0000ff00);
   2421 b = ((gb >> 16) & 0x000000ff);
   2422 #endif
   2423 
   2424 iter->buffer[i] = a | r | g | b;
   2425    }
   2426 
   2427    info->y += iter->image->common.transform->matrix[1][1];
   2428 
   2429    return iter->buffer;
   2430 }
   2431 
   2432 static void
   2433 bilinear_cover_iter_fini (pixman_iter_t *iter)
   2434 {
   2435    free (iter->data);
   2436 }
   2437 
   2438 static void
   2439 fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
   2440 {
   2441    int width = iter->width;
   2442    bilinear_info_t *info;
   2443    pixman_vector_t v;
   2444 
   2445    /* Reference point is the center of the pixel */
   2446    v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
   2447    v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
   2448    v.vector[2] = pixman_fixed_1;
   2449 
   2450    if (!pixman_transform_point_3d (iter->image->common.transform, &v))
   2451 goto fail;
   2452 
   2453    info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
   2454    if (!info)
   2455 goto fail;
   2456 
   2457    info->x = v.vector[0] - pixman_fixed_1 / 2;
   2458    info->y = v.vector[1] - pixman_fixed_1 / 2;
   2459 
   2460    /* It is safe to set the y coordinates to -1 initially
   2461     * because COVER_CLIP_BILINEAR ensures that we will only
   2462     * be asked to fetch lines in the [0, height) interval
   2463     */
   2464    info->lines[0].y = -1;
   2465    info->lines[0].buffer = &(info->data[0]);
   2466    info->lines[1].y = -1;
   2467    info->lines[1].buffer = &(info->data[width]);
   2468 
   2469    iter->get_scanline = fast_fetch_bilinear_cover;
   2470    iter->fini = bilinear_cover_iter_fini;
   2471 
   2472    iter->data = info;
   2473    return;
   2474 
   2475 fail:
   2476    /* Something went wrong, either a bad matrix or OOM; in such cases,
   2477     * we don't guarantee any particular rendering.
   2478     */
   2479    _pixman_log_error (
   2480 FUNC, "Allocation failure or bad matrix, skipping rendering\n");
   2481    
   2482    iter->get_scanline = _pixman_iter_get_scanline_noop;
   2483    iter->fini = NULL;
   2484 }
   2485 
   2486 static uint32_t *
   2487 bits_image_fetch_bilinear_no_repeat_8888 (pixman_iter_t *iter,
   2488 				  const uint32_t *mask)
   2489 {
   2490 
   2491    pixman_image_t * ima = iter->image;
   2492    int              offset = iter->x;
   2493    int              line = iter->y++;
   2494    int              width = iter->width;
   2495    uint32_t *       buffer = iter->buffer;
   2496 
   2497    bits_image_t *bits = &ima->bits;
   2498    pixman_fixed_t x_top, x_bottom, x;
   2499    pixman_fixed_t ux_top, ux_bottom, ux;
   2500    pixman_vector_t v;
   2501    uint32_t top_mask, bottom_mask;
   2502    uint32_t *top_row;
   2503    uint32_t *bottom_row;
   2504    uint32_t *end;
   2505    uint32_t zero[2] = { 0, 0 };
   2506    uint32_t one = 1;
   2507    int y, y1, y2;
   2508    int disty;
   2509    int mask_inc;
   2510    int w;
   2511 
   2512    /* reference point is the center of the pixel */
   2513    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
   2514    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
   2515    v.vector[2] = pixman_fixed_1;
   2516 
   2517    if (!pixman_transform_point_3d (bits->common.transform, &v))
   2518 return iter->buffer;
   2519 
   2520    ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
   2521    x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
   2522 
   2523    y = v.vector[1] - pixman_fixed_1/2;
   2524    disty = pixman_fixed_to_bilinear_weight (y);
   2525 
   2526    /* Load the pointers to the first and second lines from the source
   2527     * image that bilinear code must read.
   2528     *
   2529     * The main trick in this code is about the check if any line are
   2530     * outside of the image;
   2531     *
   2532     * When I realize that a line (any one) is outside, I change
   2533     * the pointer to a dummy area with zeros. Once I change this, I
   2534     * must be sure the pointer will not change, so I set the
   2535     * variables to each pointer increments inside the loop.
   2536     */
   2537    y1 = pixman_fixed_to_int (y);
   2538    y2 = y1 + 1;
   2539 
   2540    if (y1 < 0 || y1 >= bits->height)
   2541    {
   2542 top_row = zero;
   2543 x_top = 0;
   2544 ux_top = 0;
   2545    }
   2546    else
   2547    {
   2548 top_row = bits->bits + y1 * bits->rowstride;
   2549 x_top = x;
   2550 ux_top = ux;
   2551    }
   2552 
   2553    if (y2 < 0 || y2 >= bits->height)
   2554    {
   2555 bottom_row = zero;
   2556 x_bottom = 0;
   2557 ux_bottom = 0;
   2558    }
   2559    else
   2560    {
   2561 bottom_row = bits->bits + y2 * bits->rowstride;
   2562 x_bottom = x;
   2563 ux_bottom = ux;
   2564    }
   2565 
   2566    /* Instead of checking whether the operation uses the mast in
   2567     * each loop iteration, verify this only once and prepare the
   2568     * variables to make the code smaller inside the loop.
   2569     */
   2570    if (!mask)
   2571    {
   2572        mask_inc = 0;
   2573        mask = &one;
   2574    }
   2575    else
   2576    {
   2577        /* If have a mask, prepare the variables to check it */
   2578        mask_inc = 1;
   2579    }
   2580 
   2581    /* If both are zero, then the whole thing is zero */
   2582    if (top_row == zero && bottom_row == zero)
   2583    {
   2584 memset (buffer, 0, width * sizeof (uint32_t));
   2585 return iter->buffer;
   2586    }
   2587    else if (bits->format == PIXMAN_x8r8g8b8)
   2588    {
   2589 if (top_row == zero)
   2590 {
   2591     top_mask = 0;
   2592     bottom_mask = 0xff000000;
   2593 }
   2594 else if (bottom_row == zero)
   2595 {
   2596     top_mask = 0xff000000;
   2597     bottom_mask = 0;
   2598 }
   2599 else
   2600 {
   2601     top_mask = 0xff000000;
   2602     bottom_mask = 0xff000000;
   2603 }
   2604    }
   2605    else
   2606    {
   2607 top_mask = 0;
   2608 bottom_mask = 0;
   2609    }
   2610 
   2611    end = buffer + width;
   2612 
   2613    /* Zero fill to the left of the image */
   2614    while (buffer < end && x < pixman_fixed_minus_1)
   2615    {
   2616 *buffer++ = 0;
   2617 x += ux;
   2618 x_top += ux_top;
   2619 x_bottom += ux_bottom;
   2620 mask += mask_inc;
   2621    }
   2622 
   2623    /* Left edge
   2624     */
   2625    while (buffer < end && x < 0)
   2626    {
   2627 uint32_t tr, br;
   2628 int32_t distx;
   2629 
   2630 tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
   2631 br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
   2632 
   2633 distx = pixman_fixed_to_bilinear_weight (x);
   2634 
   2635 *buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
   2636 
   2637 x += ux;
   2638 x_top += ux_top;
   2639 x_bottom += ux_bottom;
   2640 mask += mask_inc;
   2641    }
   2642 
   2643    /* Main part */
   2644    w = pixman_int_to_fixed (bits->width - 1);
   2645 
   2646    while (buffer < end  &&  x < w)
   2647    {
   2648 if (*mask)
   2649 {
   2650     uint32_t tl, tr, bl, br;
   2651     int32_t distx;
   2652 
   2653     tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
   2654     tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
   2655     bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
   2656     br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
   2657 
   2658     distx = pixman_fixed_to_bilinear_weight (x);
   2659 
   2660     *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
   2661 }
   2662 
   2663 buffer++;
   2664 x += ux;
   2665 x_top += ux_top;
   2666 x_bottom += ux_bottom;
   2667 mask += mask_inc;
   2668    }
   2669 
   2670    /* Right Edge */
   2671    w = pixman_int_to_fixed (bits->width);
   2672    while (buffer < end  &&  x < w)
   2673    {
   2674 if (*mask)
   2675 {
   2676     uint32_t tl, bl;
   2677     int32_t distx;
   2678 
   2679     tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
   2680     bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
   2681 
   2682     distx = pixman_fixed_to_bilinear_weight (x);
   2683 
   2684     *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
   2685 }
   2686 
   2687 buffer++;
   2688 x += ux;
   2689 x_top += ux_top;
   2690 x_bottom += ux_bottom;
   2691 mask += mask_inc;
   2692    }
   2693 
   2694    /* Zero fill to the left of the image */
   2695    while (buffer < end)
   2696 *buffer++ = 0;
   2697 
   2698    return iter->buffer;
   2699 }
   2700 
   2701 typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
   2702 
   2703 static force_inline void
   2704 bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
   2705 				       int              offset,
   2706 				       int              line,
   2707 				       int              width,
   2708 				       uint32_t *       buffer,
   2709 				       const uint32_t * mask,
   2710 
   2711 				       convert_pixel_t	convert_pixel,
   2712 				       pixman_format_code_t	format,
   2713 				       pixman_repeat_t	repeat_mode)
   2714 {
   2715    bits_image_t *bits = &image->bits;
   2716    pixman_fixed_t *params = image->common.filter_params;
   2717    int cwidth = pixman_fixed_to_int (params[0]);
   2718    int cheight = pixman_fixed_to_int (params[1]);
   2719    int x_off = ((cwidth << 16) - pixman_fixed_1) >> 1;
   2720    int y_off = ((cheight << 16) - pixman_fixed_1) >> 1;
   2721    int x_phase_bits = pixman_fixed_to_int (params[2]);
   2722    int y_phase_bits = pixman_fixed_to_int (params[3]);
   2723    int x_phase_shift = 16 - x_phase_bits;
   2724    int y_phase_shift = 16 - y_phase_bits;
   2725    pixman_fixed_t vx, vy;
   2726    pixman_fixed_t ux, uy;
   2727    pixman_vector_t v;
   2728    int k;
   2729 
   2730    /* reference point is the center of the pixel */
   2731    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
   2732    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
   2733    v.vector[2] = pixman_fixed_1;
   2734 
   2735    if (!pixman_transform_point_3d (image->common.transform, &v))
   2736 return;
   2737 
   2738    ux = image->common.transform->matrix[0][0];
   2739    uy = image->common.transform->matrix[1][0];
   2740 
   2741    vx = v.vector[0];
   2742    vy = v.vector[1];
   2743 
   2744    for (k = 0; k < width; ++k)
   2745    {
   2746 pixman_fixed_t *y_params;
   2747 int satot, srtot, sgtot, sbtot;
   2748 pixman_fixed_t x, y;
   2749 int32_t x1, x2, y1, y2;
   2750 int32_t px, py;
   2751 int i, j;
   2752 
   2753 if (mask && !mask[k])
   2754     goto next;
   2755 
   2756 /* Round x and y to the middle of the closest phase before continuing. This
   2757  * ensures that the convolution matrix is aligned right, since it was
   2758  * positioned relative to a particular phase (and not relative to whatever
   2759  * exact fraction we happen to get here).
   2760  */
   2761 x = ((vx >> x_phase_shift) << x_phase_shift) + ((1 << x_phase_shift) >> 1);
   2762 y = ((vy >> y_phase_shift) << y_phase_shift) + ((1 << y_phase_shift) >> 1);
   2763 
   2764 px = (x & 0xffff) >> x_phase_shift;
   2765 py = (y & 0xffff) >> y_phase_shift;
   2766 
   2767 x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
   2768 y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
   2769 x2 = x1 + cwidth;
   2770 y2 = y1 + cheight;
   2771 
   2772 satot = srtot = sgtot = sbtot = 0;
   2773 
   2774 y_params = params + 4 + (1 << x_phase_bits) * cwidth + py * cheight;
   2775 
   2776 for (i = y1; i < y2; ++i)
   2777 {
   2778     pixman_fixed_t fy = *y_params++;
   2779 
   2780     if (fy)
   2781     {
   2782 	pixman_fixed_t *x_params = params + 4 + px * cwidth;
   2783 
   2784 	for (j = x1; j < x2; ++j)
   2785 	{
   2786 	    pixman_fixed_t fx = *x_params++;
   2787 	    int rx = j;
   2788 	    int ry = i;
   2789 	    
   2790 	    if (fx)
   2791 	    {
   2792 		pixman_fixed_t f;
   2793 		uint32_t pixel, mask;
   2794 		uint8_t *row;
   2795 
   2796 		mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
   2797 
   2798 		if (repeat_mode != PIXMAN_REPEAT_NONE)
   2799 		{
   2800 		    repeat (repeat_mode, &rx, bits->width);
   2801 		    repeat (repeat_mode, &ry, bits->height);
   2802 
   2803 		    row = (uint8_t *)(bits->bits + bits->rowstride * ry);
   2804 		    pixel = convert_pixel (row, rx) | mask;
   2805 		}
   2806 		else
   2807 		{
   2808 		    if (rx < 0 || ry < 0 || rx >= bits->width || ry >= bits->height)
   2809 		    {
   2810 			pixel = 0;
   2811 		    }
   2812 		    else
   2813 		    {
   2814 			row = (uint8_t *)(bits->bits + bits->rowstride * ry);
   2815 			pixel = convert_pixel (row, rx) | mask;
   2816 		    }
   2817 		}
   2818 
   2819 		f = ((pixman_fixed_32_32_t)fx * fy + 0x8000) >> 16;
   2820 		srtot += (int)RED_8 (pixel) * f;
   2821 		sgtot += (int)GREEN_8 (pixel) * f;
   2822 		sbtot += (int)BLUE_8 (pixel) * f;
   2823 		satot += (int)ALPHA_8 (pixel) * f;
   2824 	    }
   2825 	}
   2826     }
   2827 }
   2828 
   2829 satot = (satot + 0x8000) >> 16;
   2830 srtot = (srtot + 0x8000) >> 16;
   2831 sgtot = (sgtot + 0x8000) >> 16;
   2832 sbtot = (sbtot + 0x8000) >> 16;
   2833 
   2834 satot = CLIP (satot, 0, 0xff);
   2835 srtot = CLIP (srtot, 0, 0xff);
   2836 sgtot = CLIP (sgtot, 0, 0xff);
   2837 sbtot = CLIP (sbtot, 0, 0xff);
   2838 
   2839 buffer[k] = (satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot << 0);
   2840 
   2841    next:
   2842 vx += ux;
   2843 vy += uy;
   2844    }
   2845 }
   2846 
   2847 static const uint32_t zero[2] = { 0, 0 };
   2848 
   2849 static force_inline void
   2850 bits_image_fetch_bilinear_affine (pixman_image_t * image,
   2851 			  int              offset,
   2852 			  int              line,
   2853 			  int              width,
   2854 			  uint32_t *       buffer,
   2855 			  const uint32_t * mask,
   2856 
   2857 			  convert_pixel_t	convert_pixel,
   2858 			  pixman_format_code_t	format,
   2859 			  pixman_repeat_t	repeat_mode)
   2860 {
   2861    pixman_fixed_t x, y;
   2862    pixman_fixed_t ux, uy;
   2863    pixman_vector_t v;
   2864    bits_image_t *bits = &image->bits;
   2865    int i;
   2866 
   2867    /* reference point is the center of the pixel */
   2868    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
   2869    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
   2870    v.vector[2] = pixman_fixed_1;
   2871 
   2872    if (!pixman_transform_point_3d (image->common.transform, &v))
   2873 return;
   2874 
   2875    ux = image->common.transform->matrix[0][0];
   2876    uy = image->common.transform->matrix[1][0];
   2877 
   2878    x = v.vector[0];
   2879    y = v.vector[1];
   2880 
   2881    for (i = 0; i < width; ++i)
   2882    {
   2883 int x1, y1, x2, y2;
   2884 uint32_t tl, tr, bl, br;
   2885 int32_t distx, disty;
   2886 int width = image->bits.width;
   2887 int height = image->bits.height;
   2888 const uint8_t *row1;
   2889 const uint8_t *row2;
   2890 
   2891 if (mask && !mask[i])
   2892     goto next;
   2893 
   2894 x1 = x - pixman_fixed_1 / 2;
   2895 y1 = y - pixman_fixed_1 / 2;
   2896 
   2897 distx = pixman_fixed_to_bilinear_weight (x1);
   2898 disty = pixman_fixed_to_bilinear_weight (y1);
   2899 
   2900 y1 = pixman_fixed_to_int (y1);
   2901 y2 = y1 + 1;
   2902 x1 = pixman_fixed_to_int (x1);
   2903 x2 = x1 + 1;
   2904 
   2905 if (repeat_mode != PIXMAN_REPEAT_NONE)
   2906 {
   2907     uint32_t mask;
   2908 
   2909     mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
   2910 
   2911     repeat (repeat_mode, &x1, width);
   2912     repeat (repeat_mode, &y1, height);
   2913     repeat (repeat_mode, &x2, width);
   2914     repeat (repeat_mode, &y2, height);
   2915 
   2916     row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
   2917     row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
   2918 
   2919     tl = convert_pixel (row1, x1) | mask;
   2920     tr = convert_pixel (row1, x2) | mask;
   2921     bl = convert_pixel (row2, x1) | mask;
   2922     br = convert_pixel (row2, x2) | mask;
   2923 }
   2924 else
   2925 {
   2926     uint32_t mask1, mask2;
   2927     int bpp;
   2928 
   2929     /* Note: PIXMAN_FORMAT_BPP() returns an unsigned value,
   2930      * which means if you use it in expressions, those
   2931      * expressions become unsigned themselves. Since
   2932      * the variables below can be negative in some cases,
   2933      * that will lead to crashes on 64 bit architectures.
   2934      *
   2935      * So this line makes sure bpp is signed
   2936      */
   2937     bpp = PIXMAN_FORMAT_BPP (format);
   2938 
   2939     if (x1 >= width || x2 < 0 || y1 >= height || y2 < 0)
   2940     {
   2941 	buffer[i] = 0;
   2942 	goto next;
   2943     }
   2944 
   2945     if (y2 == 0)
   2946     {
   2947 	row1 = (const uint8_t *)zero;
   2948 	mask1 = 0;
   2949     }
   2950     else
   2951     {
   2952 	row1 = (uint8_t *)(bits->bits + bits->rowstride * y1);
   2953 	row1 += bpp / 8 * x1;
   2954 
   2955 	mask1 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
   2956     }
   2957 
   2958     if (y1 == height - 1)
   2959     {
   2960 	row2 = (const uint8_t *)zero;
   2961 	mask2 = 0;
   2962     }
   2963     else
   2964     {
   2965 	row2 = (uint8_t *)(bits->bits + bits->rowstride * y2);
   2966 	row2 += bpp / 8 * x1;
   2967 
   2968 	mask2 = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
   2969     }
   2970 
   2971     if (x2 == 0)
   2972     {
   2973 	tl = 0;
   2974 	bl = 0;
   2975     }
   2976     else
   2977     {
   2978 	tl = convert_pixel (row1, 0) | mask1;
   2979 	bl = convert_pixel (row2, 0) | mask2;
   2980     }
   2981 
   2982     if (x1 == width - 1)
   2983     {
   2984 	tr = 0;
   2985 	br = 0;
   2986     }
   2987     else
   2988     {
   2989 	tr = convert_pixel (row1, 1) | mask1;
   2990 	br = convert_pixel (row2, 1) | mask2;
   2991     }
   2992 }
   2993 
   2994 buffer[i] = bilinear_interpolation (
   2995     tl, tr, bl, br, distx, disty);
   2996 
   2997    next:
   2998 x += ux;
   2999 y += uy;
   3000    }
   3001 }
   3002 
   3003 static force_inline void
   3004 bits_image_fetch_nearest_affine (pixman_image_t * image,
   3005 			 int              offset,
   3006 			 int              line,
   3007 			 int              width,
   3008 			 uint32_t *       buffer,
   3009 			 const uint32_t * mask,
   3010 			 
   3011 			 convert_pixel_t	convert_pixel,
   3012 			 pixman_format_code_t	format,
   3013 			 pixman_repeat_t	repeat_mode)
   3014 {
   3015    pixman_fixed_t x, y;
   3016    pixman_fixed_t ux, uy;
   3017    pixman_vector_t v;
   3018    bits_image_t *bits = &image->bits;
   3019    int i;
   3020 
   3021    /* reference point is the center of the pixel */
   3022    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
   3023    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
   3024    v.vector[2] = pixman_fixed_1;
   3025 
   3026    if (!pixman_transform_point_3d (image->common.transform, &v))
   3027 return;
   3028 
   3029    ux = image->common.transform->matrix[0][0];
   3030    uy = image->common.transform->matrix[1][0];
   3031 
   3032    x = v.vector[0];
   3033    y = v.vector[1];
   3034 
   3035    for (i = 0; i < width; ++i)
   3036    {
   3037 int width, height, x0, y0;
   3038 const uint8_t *row;
   3039 
   3040 if (mask && !mask[i])
   3041     goto next;
   3042 
   3043 width = image->bits.width;
   3044 height = image->bits.height;
   3045 x0 = pixman_fixed_to_int (x - pixman_fixed_e);
   3046 y0 = pixman_fixed_to_int (y - pixman_fixed_e);
   3047 
   3048 if (repeat_mode == PIXMAN_REPEAT_NONE &&
   3049     (y0 < 0 || y0 >= height || x0 < 0 || x0 >= width))
   3050 {
   3051     buffer[i] = 0;
   3052 }
   3053 else
   3054 {
   3055     uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000;
   3056 
   3057     if (repeat_mode != PIXMAN_REPEAT_NONE)
   3058     {
   3059 	repeat (repeat_mode, &x0, width);
   3060 	repeat (repeat_mode, &y0, height);
   3061     }
   3062 
   3063     row = (uint8_t *)(bits->bits + bits->rowstride * y0);
   3064 
   3065     buffer[i] = convert_pixel (row, x0) | mask;
   3066 }
   3067 
   3068    next:
   3069 x += ux;
   3070 y += uy;
   3071    }
   3072 }
   3073 
   3074 static force_inline uint32_t
   3075 convert_a8r8g8b8 (const uint8_t *row, int x)
   3076 {
   3077    return *(((uint32_t *)row) + x);
   3078 }
   3079 
   3080 static force_inline uint32_t
   3081 convert_x8r8g8b8 (const uint8_t *row, int x)
   3082 {
   3083    return *(((uint32_t *)row) + x);
   3084 }
   3085 
   3086 static force_inline uint32_t
   3087 convert_a8 (const uint8_t *row, int x)
   3088 {
   3089    return (uint32_t) *(row + x) << 24;
   3090 }
   3091 
   3092 static force_inline uint32_t
   3093 convert_r5g6b5 (const uint8_t *row, int x)
   3094 {
   3095    return convert_0565_to_0888 (*((uint16_t *)row + x));
   3096 }
   3097 
   3098 #define MAKE_SEPARABLE_CONVOLUTION_FETCHER(name, format, repeat_mode)  \
   3099    static uint32_t *							\
   3100    bits_image_fetch_separable_convolution_affine_ ## name (pixman_iter_t   *iter, \
   3101 						    const uint32_t * mask) \
   3102    {									\
   3103 bits_image_fetch_separable_convolution_affine (                 \
   3104     iter->image,                                                \
   3105     iter->x, iter->y++,                                         \
   3106     iter->width,                                                \
   3107     iter->buffer, mask,                                         \
   3108     convert_ ## format,                                         \
   3109     PIXMAN_ ## format,                                          \
   3110     repeat_mode);                                               \
   3111 								\
   3112 return iter->buffer;                                            \
   3113    }
   3114 
   3115 #define MAKE_BILINEAR_FETCHER(name, format, repeat_mode)		\
   3116    static uint32_t *							\
   3117    bits_image_fetch_bilinear_affine_ ## name (pixman_iter_t   *iter,	\
   3118 				       const uint32_t * mask)	\
   3119    {									\
   3120 bits_image_fetch_bilinear_affine (iter->image,			\
   3121 				  iter->x, iter->y++,		\
   3122 				  iter->width,			\
   3123 				  iter->buffer, mask,		\
   3124 				  convert_ ## format,		\
   3125 				  PIXMAN_ ## format,		\
   3126 				  repeat_mode);			\
   3127 return iter->buffer;						\
   3128    }
   3129 
   3130 #define MAKE_NEAREST_FETCHER(name, format, repeat_mode)			\
   3131    static uint32_t *							\
   3132    bits_image_fetch_nearest_affine_ ## name (pixman_iter_t   *iter,	\
   3133 				      const uint32_t * mask)	\
   3134    {									\
   3135 bits_image_fetch_nearest_affine (iter->image,			\
   3136 				 iter->x, iter->y++,		\
   3137 				 iter->width,			\
   3138 				 iter->buffer, mask,		\
   3139 				 convert_ ## format,		\
   3140 				 PIXMAN_ ## format,		\
   3141 				 repeat_mode);			\
   3142 return iter->buffer;						\
   3143    }
   3144 
   3145 #define MAKE_FETCHERS(name, format, repeat_mode)			\
   3146    MAKE_NEAREST_FETCHER (name, format, repeat_mode)			\
   3147    MAKE_BILINEAR_FETCHER (name, format, repeat_mode)			\
   3148    MAKE_SEPARABLE_CONVOLUTION_FETCHER (name, format, repeat_mode)
   3149 
   3150 MAKE_FETCHERS (pad_a8r8g8b8,     a8r8g8b8, PIXMAN_REPEAT_PAD)
   3151 MAKE_FETCHERS (none_a8r8g8b8,    a8r8g8b8, PIXMAN_REPEAT_NONE)
   3152 MAKE_FETCHERS (reflect_a8r8g8b8, a8r8g8b8, PIXMAN_REPEAT_REFLECT)
   3153 MAKE_FETCHERS (normal_a8r8g8b8,  a8r8g8b8, PIXMAN_REPEAT_NORMAL)
   3154 MAKE_FETCHERS (pad_x8r8g8b8,     x8r8g8b8, PIXMAN_REPEAT_PAD)
   3155 MAKE_FETCHERS (none_x8r8g8b8,    x8r8g8b8, PIXMAN_REPEAT_NONE)
   3156 MAKE_FETCHERS (reflect_x8r8g8b8, x8r8g8b8, PIXMAN_REPEAT_REFLECT)
   3157 MAKE_FETCHERS (normal_x8r8g8b8,  x8r8g8b8, PIXMAN_REPEAT_NORMAL)
   3158 MAKE_FETCHERS (pad_a8,           a8,       PIXMAN_REPEAT_PAD)
   3159 MAKE_FETCHERS (none_a8,          a8,       PIXMAN_REPEAT_NONE)
   3160 MAKE_FETCHERS (reflect_a8,	 a8,       PIXMAN_REPEAT_REFLECT)
   3161 MAKE_FETCHERS (normal_a8,	 a8,       PIXMAN_REPEAT_NORMAL)
   3162 MAKE_FETCHERS (pad_r5g6b5,       r5g6b5,   PIXMAN_REPEAT_PAD)
   3163 MAKE_FETCHERS (none_r5g6b5,      r5g6b5,   PIXMAN_REPEAT_NONE)
   3164 MAKE_FETCHERS (reflect_r5g6b5,   r5g6b5,   PIXMAN_REPEAT_REFLECT)
   3165 MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
   3166 
   3167 #define IMAGE_FLAGS							\
   3168    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
   3169     FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
   3170 
   3171 static const pixman_iter_info_t fast_iters[] = 
   3172 {
   3173    { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
   3174      _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
   3175 
   3176    { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
   3177      ITER_NARROW | ITER_DEST,
   3178      _pixman_iter_init_bits_stride,
   3179      fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
   3180    
   3181    { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
   3182      ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
   3183      _pixman_iter_init_bits_stride,
   3184      fast_dest_fetch_noop, fast_write_back_r5g6b5 },
   3185 
   3186    { PIXMAN_a8r8g8b8,
   3187      (FAST_PATH_STANDARD_FLAGS			|
   3188       FAST_PATH_SCALE_TRANSFORM		|
   3189       FAST_PATH_BILINEAR_FILTER		|
   3190       FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
   3191      ITER_NARROW | ITER_SRC,
   3192      fast_bilinear_cover_iter_init,
   3193      NULL, NULL
   3194    },
   3195 
   3196 #define FAST_BILINEAR_FLAGS						\
   3197    (FAST_PATH_NO_ALPHA_MAP		|				\
   3198     FAST_PATH_NO_ACCESSORS		|				\
   3199     FAST_PATH_HAS_TRANSFORM		|				\
   3200     FAST_PATH_AFFINE_TRANSFORM		|				\
   3201     FAST_PATH_X_UNIT_POSITIVE		|				\
   3202     FAST_PATH_Y_UNIT_ZERO		|				\
   3203     FAST_PATH_NONE_REPEAT		|				\
   3204     FAST_PATH_BILINEAR_FILTER)
   3205 
   3206    { PIXMAN_a8r8g8b8,
   3207      FAST_BILINEAR_FLAGS,
   3208      ITER_NARROW | ITER_SRC,
   3209      NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
   3210    },
   3211 
   3212    { PIXMAN_x8r8g8b8,
   3213      FAST_BILINEAR_FLAGS,
   3214      ITER_NARROW | ITER_SRC,
   3215      NULL, bits_image_fetch_bilinear_no_repeat_8888, NULL
   3216    },
   3217 
   3218 #define GENERAL_BILINEAR_FLAGS						\
   3219    (FAST_PATH_NO_ALPHA_MAP		|				\
   3220     FAST_PATH_NO_ACCESSORS		|				\
   3221     FAST_PATH_HAS_TRANSFORM		|				\
   3222     FAST_PATH_AFFINE_TRANSFORM		|				\
   3223     FAST_PATH_BILINEAR_FILTER)
   3224 
   3225 #define GENERAL_NEAREST_FLAGS						\
   3226    (FAST_PATH_NO_ALPHA_MAP		|				\
   3227     FAST_PATH_NO_ACCESSORS		|				\
   3228     FAST_PATH_HAS_TRANSFORM		|				\
   3229     FAST_PATH_AFFINE_TRANSFORM		|				\
   3230     FAST_PATH_NEAREST_FILTER)
   3231 
   3232 #define GENERAL_SEPARABLE_CONVOLUTION_FLAGS				\
   3233    (FAST_PATH_NO_ALPHA_MAP            |				\
   3234     FAST_PATH_NO_ACCESSORS            |				\
   3235     FAST_PATH_HAS_TRANSFORM           |				\
   3236     FAST_PATH_AFFINE_TRANSFORM        |				\
   3237     FAST_PATH_SEPARABLE_CONVOLUTION_FILTER)
   3238    
   3239 #define SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
   3240    { PIXMAN_ ## format,						\
   3241      GENERAL_SEPARABLE_CONVOLUTION_FLAGS | FAST_PATH_ ## repeat ## _REPEAT, \
   3242      ITER_NARROW | ITER_SRC,						\
   3243      NULL, bits_image_fetch_separable_convolution_affine_ ## name, NULL \
   3244    },
   3245 
   3246 #define BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
   3247    { PIXMAN_ ## format,						\
   3248      GENERAL_BILINEAR_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
   3249      ITER_NARROW | ITER_SRC,						\
   3250      NULL, bits_image_fetch_bilinear_affine_ ## name, NULL,		\
   3251    },
   3252 
   3253 #define NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
   3254    { PIXMAN_ ## format,						\
   3255      GENERAL_NEAREST_FLAGS | FAST_PATH_ ## repeat ## _REPEAT,		\
   3256      ITER_NARROW | ITER_SRC,						\
   3257      NULL, bits_image_fetch_nearest_affine_ ## name, NULL		\
   3258    },
   3259 
   3260 #define AFFINE_FAST_PATHS(name, format, repeat)				\
   3261    NEAREST_AFFINE_FAST_PATH(name, format, repeat)			\
   3262    BILINEAR_AFFINE_FAST_PATH(name, format, repeat)			\
   3263    SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
   3264    
   3265    AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
   3266    AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
   3267    AFFINE_FAST_PATHS (reflect_a8r8g8b8, a8r8g8b8, REFLECT)
   3268    AFFINE_FAST_PATHS (normal_a8r8g8b8, a8r8g8b8, NORMAL)
   3269    AFFINE_FAST_PATHS (pad_x8r8g8b8, x8r8g8b8, PAD)
   3270    AFFINE_FAST_PATHS (none_x8r8g8b8, x8r8g8b8, NONE)
   3271    AFFINE_FAST_PATHS (reflect_x8r8g8b8, x8r8g8b8, REFLECT)
   3272    AFFINE_FAST_PATHS (normal_x8r8g8b8, x8r8g8b8, NORMAL)
   3273    AFFINE_FAST_PATHS (pad_a8, a8, PAD)
   3274    AFFINE_FAST_PATHS (none_a8, a8, NONE)
   3275    AFFINE_FAST_PATHS (reflect_a8, a8, REFLECT)
   3276    AFFINE_FAST_PATHS (normal_a8, a8, NORMAL)
   3277    AFFINE_FAST_PATHS (pad_r5g6b5, r5g6b5, PAD)
   3278    AFFINE_FAST_PATHS (none_r5g6b5, r5g6b5, NONE)
   3279    AFFINE_FAST_PATHS (reflect_r5g6b5, r5g6b5, REFLECT)
   3280    AFFINE_FAST_PATHS (normal_r5g6b5, r5g6b5, NORMAL)
   3281 
   3282    { PIXMAN_null },
   3283 };
   3284 
   3285 pixman_implementation_t *
   3286 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
   3287 {
   3288    pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
   3289 
   3290    imp->fill = fast_path_fill;
   3291    imp->iter_info = fast_iters;
   3292 
   3293    return imp;
   3294 }