tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

pixman-mips-dspr2-asm.S (120733B)


      1 /*
      2 * Copyright (c) 2012
      3 *      MIPS Technologies, Inc., California.
      4 *
      5 * Redistribution and use in source and binary forms, with or without
      6 * modification, are permitted provided that the following conditions
      7 * are met:
      8 * 1. Redistributions of source code must retain the above copyright
      9 *    notice, this list of conditions and the following disclaimer.
     10 * 2. Redistributions in binary form must reproduce the above copyright
     11 *    notice, this list of conditions and the following disclaimer in the
     12 *    documentation and/or other materials provided with the distribution.
     13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
     14 *    contributors may be used to endorse or promote products derived from
     15 *    this software without specific prior written permission.
     16 *
     17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
     18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
     21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27 * SUCH DAMAGE.
     28 *
     29 * Author:  Nemanja Lukic (nemanja.lukic@rt-rk.com)
     30 */
     31 
     32 #include "pixman-private.h"
     33 #include "pixman-mips-dspr2-asm.h"
     34 
     35 LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
     36 /*
     37 * a0 - *dest
     38 * a1 - count (bytes)
     39 * a2 - value to fill buffer with
     40 */
     41 
     42    beqz     a1, 3f
     43     andi    t1, a0, 0x0002
     44    beqz     t1, 0f          /* check if address is 4-byte aligned */
     45     nop
     46    sh       a2, 0(a0)
     47    addiu    a0, a0, 2
     48    addiu    a1, a1, -2
     49 0:
     50    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
     51    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
     52    beqz     t1, 2f
     53     nop
     54 1:
     55    addiu    t1, t1, -1
     56    beqz     t1, 11f
     57     addiu   a1, a1, -32
     58    pref     30, 32(a0)
     59    sw       a2, 0(a0)
     60    sw       a2, 4(a0)
     61    sw       a2, 8(a0)
     62    sw       a2, 12(a0)
     63    sw       a2, 16(a0)
     64    sw       a2, 20(a0)
     65    sw       a2, 24(a0)
     66    sw       a2, 28(a0)
     67    b        1b
     68     addiu   a0, a0, 32
     69 11:
     70    sw       a2, 0(a0)
     71    sw       a2, 4(a0)
     72    sw       a2, 8(a0)
     73    sw       a2, 12(a0)
     74    sw       a2, 16(a0)
     75    sw       a2, 20(a0)
     76    sw       a2, 24(a0)
     77    sw       a2, 28(a0)
     78    addiu    a0, a0, 32
     79 2:
     80    blez     a1, 3f
     81     addiu   a1, a1, -2
     82    sh       a2, 0(a0)
     83    b        2b
     84     addiu   a0, a0, 2
     85 3:
     86    jr       ra
     87     nop
     88 
     89 END(pixman_fill_buff16_mips)
     90 
     91 LEAF_MIPS32R2(pixman_fill_buff32_mips)
     92 /*
     93 * a0 - *dest
     94 * a1 - count (bytes)
     95 * a2 - value to fill buffer with
     96 */
     97 
     98    beqz     a1, 3f
     99     nop
    100    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
    101    beqz     t1, 2f
    102     nop
    103 1:
    104    addiu    t1, t1, -1
    105    beqz     t1, 11f
    106     addiu   a1, a1, -32
    107    pref     30, 32(a0)
    108    sw       a2, 0(a0)
    109    sw       a2, 4(a0)
    110    sw       a2, 8(a0)
    111    sw       a2, 12(a0)
    112    sw       a2, 16(a0)
    113    sw       a2, 20(a0)
    114    sw       a2, 24(a0)
    115    sw       a2, 28(a0)
    116    b        1b
    117     addiu   a0, a0, 32
    118 11:
    119    sw       a2, 0(a0)
    120    sw       a2, 4(a0)
    121    sw       a2, 8(a0)
    122    sw       a2, 12(a0)
    123    sw       a2, 16(a0)
    124    sw       a2, 20(a0)
    125    sw       a2, 24(a0)
    126    sw       a2, 28(a0)
    127    addiu    a0, a0, 32
    128 2:
    129    blez     a1, 3f
    130     addiu   a1, a1, -4
    131    sw       a2, 0(a0)
    132    b        2b
    133     addiu   a0, a0, 4
    134 3:
    135    jr       ra
    136     nop
    137 
    138 END(pixman_fill_buff32_mips)
    139 
    140 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
    141 /*
    142 * a0 - dst (r5g6b5)
    143 * a1 - src (a8r8g8b8)
    144 * a2 - w
    145 */
    146 
    147    beqz     a2, 3f
    148     nop
    149    addiu    t1, a2, -1
    150    beqz     t1, 2f
    151     nop
    152    li       t4, 0xf800f800
    153    li       t5, 0x07e007e0
    154    li       t6, 0x001f001f
    155 1:
    156    lw       t0, 0(a1)
    157    lw       t1, 4(a1)
    158    addiu    a1, a1, 8
    159    addiu    a2, a2, -2
    160 
    161    CONVERT_2x8888_TO_2x0565 t0, t1, t2, t3, t4, t5, t6, t7, t8
    162 
    163    sh       t2, 0(a0)
    164    sh       t3, 2(a0)
    165 
    166    addiu    t2, a2, -1
    167    bgtz     t2, 1b
    168     addiu   a0, a0, 4
    169 2:
    170    beqz     a2, 3f
    171     nop
    172    lw       t0, 0(a1)
    173 
    174    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
    175 
    176    sh       t1, 0(a0)
    177 3:
    178    j        ra
    179     nop
    180 
    181 END(pixman_composite_src_8888_0565_asm_mips)
    182 
    183 LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
    184 /*
    185 * a0 - dst (a8r8g8b8)
    186 * a1 - src (r5g6b5)
    187 * a2 - w
    188 */
    189 
    190    beqz     a2, 3f
    191     nop
    192    addiu    t1, a2, -1
    193    beqz     t1, 2f
    194     nop
    195    li       t4, 0x07e007e0
    196    li       t5, 0x001F001F
    197 1:
    198    lhu      t0, 0(a1)
    199    lhu      t1, 2(a1)
    200    addiu    a1, a1, 4
    201    addiu    a2, a2, -2
    202 
    203    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
    204 
    205    sw       t2, 0(a0)
    206    sw       t3, 4(a0)
    207 
    208    addiu    t2, a2, -1
    209    bgtz     t2, 1b
    210     addiu   a0, a0, 8
    211 2:
    212    beqz     a2, 3f
    213     nop
    214    lhu      t0, 0(a1)
    215 
    216    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
    217 
    218    sw       t1, 0(a0)
    219 3:
    220    j        ra
    221     nop
    222 
    223 END(pixman_composite_src_0565_8888_asm_mips)
    224 
    225 LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
    226 /*
    227 * a0 - dst (a8r8g8b8)
    228 * a1 - src (x8r8g8b8)
    229 * a2 - w
    230 */
    231 
    232    beqz     a2, 4f
    233     nop
    234    li       t9, 0xff000000
    235    srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
    236    beqz     t8, 3f       /* branch if less than 8 src pixels */
    237     nop
    238 1:
    239    addiu    t8, t8, -1
    240    beqz     t8, 2f
    241     addiu   a2, a2, -8
    242    pref     0, 32(a1)
    243    lw       t0, 0(a1)
    244    lw       t1, 4(a1)
    245    lw       t2, 8(a1)
    246    lw       t3, 12(a1)
    247    lw       t4, 16(a1)
    248    lw       t5, 20(a1)
    249    lw       t6, 24(a1)
    250    lw       t7, 28(a1)
    251    addiu    a1, a1, 32
    252    or       t0, t0, t9
    253    or       t1, t1, t9
    254    or       t2, t2, t9
    255    or       t3, t3, t9
    256    or       t4, t4, t9
    257    or       t5, t5, t9
    258    or       t6, t6, t9
    259    or       t7, t7, t9
    260    pref     30, 32(a0)
    261    sw       t0, 0(a0)
    262    sw       t1, 4(a0)
    263    sw       t2, 8(a0)
    264    sw       t3, 12(a0)
    265    sw       t4, 16(a0)
    266    sw       t5, 20(a0)
    267    sw       t6, 24(a0)
    268    sw       t7, 28(a0)
    269    b        1b
    270     addiu   a0, a0, 32
    271 2:
    272    lw       t0, 0(a1)
    273    lw       t1, 4(a1)
    274    lw       t2, 8(a1)
    275    lw       t3, 12(a1)
    276    lw       t4, 16(a1)
    277    lw       t5, 20(a1)
    278    lw       t6, 24(a1)
    279    lw       t7, 28(a1)
    280    addiu    a1, a1, 32
    281    or       t0, t0, t9
    282    or       t1, t1, t9
    283    or       t2, t2, t9
    284    or       t3, t3, t9
    285    or       t4, t4, t9
    286    or       t5, t5, t9
    287    or       t6, t6, t9
    288    or       t7, t7, t9
    289    sw       t0, 0(a0)
    290    sw       t1, 4(a0)
    291    sw       t2, 8(a0)
    292    sw       t3, 12(a0)
    293    sw       t4, 16(a0)
    294    sw       t5, 20(a0)
    295    sw       t6, 24(a0)
    296    sw       t7, 28(a0)
    297    beqz     a2, 4f
    298     addiu   a0, a0, 32
    299 3:
    300    lw       t0, 0(a1)
    301    addiu    a1, a1, 4
    302    addiu    a2, a2, -1
    303    or       t1, t0, t9
    304    sw       t1, 0(a0)
    305    bnez     a2, 3b
    306     addiu   a0, a0, 4
    307 4:
    308    jr       ra
    309     nop
    310 
    311 END(pixman_composite_src_x888_8888_asm_mips)
    312 
    313 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
    314 LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
    315 /*
    316 * a0 - dst (a8r8g8b8)
    317 * a1 - src (b8g8r8)
    318 * a2 - w
    319 */
    320 
    321    beqz              a2, 6f
    322     nop
    323 
    324    lui               t8, 0xff00;
    325    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
    326    beqz              t9, 4f      /* branch if less than 4 src pixels */
    327     nop
    328 
    329    li                t0, 0x1
    330    li                t1, 0x2
    331    li                t2, 0x3
    332    andi              t3, a1, 0x3
    333    beq               t3, t0, 1f
    334     nop
    335    beq               t3, t1, 2f
    336     nop
    337    beq               t3, t2, 3f
    338     nop
    339 
    340 0:
    341    beqz              t9, 4f
    342     addiu            t9, t9, -1
    343    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
    344    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
    345    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
    346 
    347    addiu             a1, a1, 12
    348    addiu             a2, a2, -4
    349 
    350    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
    351    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
    352    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
    353 
    354    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
    355    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
    356    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
    357    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
    358    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
    359    or                t4, t4, t8           /* t4 = FF | R1 | G1 | B1 */
    360    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
    361    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
    362    or                t5, t5, t8           /* t5 = FF | R3 | G3 | B3 */
    363    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
    364    or                t2, t2, t8           /* t5 = FF | R3 | G3 | B3 */
    365 
    366    sw                t4, 0(a0)
    367    sw                t3, 4(a0)
    368    sw                t5, 8(a0)
    369    sw                t2, 12(a0)
    370    b                 0b
    371     addiu            a0, a0, 16
    372 
    373 1:
    374    lbu               t6, 0(a1)            /* t6 =  0 |  0 |  0 | R1 */
    375    lhu               t7, 1(a1)            /* t7 =  0 |  0 | B1 | G1 */
    376    sll               t6, t6, 16           /* t6 =  0 | R1 |  0 | 0  */
    377    wsbh              t7, t7               /* t7 =  0 |  0 | G1 | B1 */
    378    or                t7, t6, t7           /* t7 =  0 | R1 | G1 | B1 */
    379 11:
    380    beqz              t9, 4f
    381     addiu            t9, t9, -1
    382    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
    383    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
    384    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
    385 
    386    addiu             a1, a1, 12
    387    addiu             a2, a2, -4
    388 
    389    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
    390    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
    391    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
    392 
    393    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
    394    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
    395    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
    396    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
    397    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
    398    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
    399    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
    400    or                t3, t3, t8           /* t1 = FF | R3 | G3 | B3 */
    401    or                t4, t4, t8           /* t3 = FF | R4 | G4 | B4 */
    402 
    403    sw                t7, 0(a0)
    404    sw                t0, 4(a0)
    405    sw                t3, 8(a0)
    406    sw                t4, 12(a0)
    407    rotr              t7, t2, 16           /* t7 = xx | R5 | G5 | B5 */
    408    b                 11b
    409     addiu            a0, a0, 16
    410 
    411 2:
    412    lhu               t7, 0(a1)            /* t7 =  0 |  0 | G1 | R1 */
    413    wsbh              t7, t7               /* t7 =  0 |  0 | R1 | G1 */
    414 21:
    415    beqz              t9, 4f
    416     addiu            t9, t9, -1
    417    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
    418    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
    419    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
    420 
    421    addiu             a1, a1, 12
    422    addiu             a2, a2, -4
    423 
    424    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
    425    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
    426    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
    427 
    428    precr_sra.ph.w    t7, t0, 0            /* t7 = R1 | G1 | B1 | R2 */
    429    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
    430    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
    431    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
    432    srl               t7, t7, 8            /* t7 =  0 | R1 | G1 | B1 */
    433    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
    434    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
    435    or                t0, t0, t8           /* t0 = FF | R2 | G2 | B2 */
    436    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
    437    or                t3, t3, t8           /* t3 = FF | R4 | G4 | B4 */
    438 
    439    sw                t7, 0(a0)
    440    sw                t0, 4(a0)
    441    sw                t1, 8(a0)
    442    sw                t3, 12(a0)
    443    srl               t7, t2, 16           /* t7 =  0 |  0 | R5 | G5 */
    444    b                 21b
    445     addiu            a0, a0, 16
    446 
    447 3:
    448    lbu               t7, 0(a1)            /* t7 =  0 |  0 |  0 | R1 */
    449 31:
    450    beqz              t9, 4f
    451     addiu            t9, t9, -1
    452    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
    453    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
    454    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
    455 
    456    addiu             a1, a1, 12
    457    addiu             a2, a2, -4
    458 
    459    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
    460    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
    461    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
    462 
    463    precr_sra.ph.w    t7, t0, 0            /* t7 = xx | R1 | G1 | B1 */
    464    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
    465    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
    466    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
    467    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
    468    or                t7, t7, t8           /* t7 = FF | R1 | G1 | B1 */
    469    or                t3, t3, t8           /* t3 = FF | R2 | G2 | B2 */
    470    or                t1, t1, t8           /* t1 = FF | R3 | G3 | B3 */
    471    or                t4, t4, t8           /* t4 = FF | R4 | G4 | B4 */
    472 
    473    sw                t7, 0(a0)
    474    sw                t3, 4(a0)
    475    sw                t1, 8(a0)
    476    sw                t4, 12(a0)
    477    srl               t7, t2, 16           /* t7 =  0 |  0 | xx | R5 */
    478    b                 31b
    479     addiu            a0, a0, 16
    480 
    481 4:
    482    beqz              a2, 6f
    483     nop
    484 5:
    485    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
    486    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
    487    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
    488    addiu             a1, a1, 3
    489 
    490    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
    491    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
    492 
    493    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
    494    or                t2, t2, t0           /* t2 =  0 | R | G | B */
    495    or                t2, t2, t8           /* t2 = FF | R | G | B */
    496 
    497    sw                t2, 0(a0)
    498    addiu             a2, a2, -1
    499    bnez              a2, 5b
    500     addiu            a0, a0, 4
    501 6:
    502    j                 ra
    503     nop
    504 
    505 END(pixman_composite_src_0888_8888_rev_asm_mips)
    506 
    507 LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
    508 /*
    509 * a0 - dst (r5g6b5)
    510 * a1 - src (b8g8r8)
    511 * a2 - w
    512 */
    513 
    514    SAVE_REGS_ON_STACK 0, v0, v1
    515    beqz              a2, 6f
    516     nop
    517 
    518    li                t6, 0xf800f800
    519    li                t7, 0x07e007e0
    520    li                t8, 0x001F001F
    521    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
    522    beqz              t9, 4f      /* branch if less than 4 src pixels */
    523     nop
    524 
    525    li                t0, 0x1
    526    li                t1, 0x2
    527    li                t2, 0x3
    528    andi              t3, a1, 0x3
    529    beq               t3, t0, 1f
    530     nop
    531    beq               t3, t1, 2f
    532     nop
    533    beq               t3, t2, 3f
    534     nop
    535 
    536 0:
    537    beqz              t9, 4f
    538     addiu            t9, t9, -1
    539    lw                t0, 0(a1)            /* t0 = R2 | B1 | G1 | R1 */
    540    lw                t1, 4(a1)            /* t1 = G3 | R3 | B2 | G2 */
    541    lw                t2, 8(a1)            /* t2 = B4 | G4 | R4 | B3 */
    542 
    543    addiu             a1, a1, 12
    544    addiu             a2, a2, -4
    545 
    546    wsbh              t0, t0               /* t0 = B1 | R2 | R1 | G1 */
    547    wsbh              t1, t1               /* t1 = R3 | G3 | G2 | B2 */
    548    wsbh              t2, t2               /* t2 = G4 | B4 | B3 | R4 */
    549 
    550    packrl.ph         t3, t1, t0           /* t3 = G2 | B2 | B1 | R2 */
    551    packrl.ph         t4, t0, t0           /* t4 = R1 | G1 | B1 | R2 */
    552    rotr              t3, t3, 16           /* t3 = B1 | R2 | G2 | B2 */
    553    srl               t4, t4, 8            /* t4 =  0 | R1 | G1 | B1 */
    554    packrl.ph         t5, t2, t1           /* t5 = B3 | R4 | R3 | G3 */
    555    rotr              t5, t5, 24           /* t5 = R4 | R3 | G3 | B3 */
    556    rotr              t2, t2, 16           /* t2 = B3 | R4 | G4 | B4 */
    557 
    558    CONVERT_2x8888_TO_2x0565 t4, t3, t4, t3, t6, t7, t8, v0, v1
    559    CONVERT_2x8888_TO_2x0565 t5, t2, t5, t2, t6, t7, t8, v0, v1
    560 
    561    sh                t4, 0(a0)
    562    sh                t3, 2(a0)
    563    sh                t5, 4(a0)
    564    sh                t2, 6(a0)
    565    b                 0b
    566     addiu            a0, a0, 8
    567 
    568 1:
    569    lbu               t4, 0(a1)            /* t4 =  0 |  0 |  0 | R1 */
    570    lhu               t5, 1(a1)            /* t5 =  0 |  0 | B1 | G1 */
    571    sll               t4, t4, 16           /* t4 =  0 | R1 |  0 | 0  */
    572    wsbh              t5, t5               /* t5 =  0 |  0 | G1 | B1 */
    573    or                t5, t4, t5           /* t5 =  0 | R1 | G1 | B1 */
    574 11:
    575    beqz              t9, 4f
    576     addiu            t9, t9, -1
    577    lw                t0, 3(a1)            /* t0 = R3 | B2 | G2 | R2 */
    578    lw                t1, 7(a1)            /* t1 = G4 | R4 | B3 | G3 */
    579    lw                t2, 11(a1)           /* t2 = B5 | G5 | R5 | B4 */
    580 
    581    addiu             a1, a1, 12
    582    addiu             a2, a2, -4
    583 
    584    wsbh              t0, t0               /* t0 = B2 | R3 | R2 | G2 */
    585    wsbh              t1, t1               /* t1 = R4 | G4 | G3 | B3 */
    586    wsbh              t2, t2               /* t2 = G5 | B5 | B4 | R5 */
    587 
    588    packrl.ph         t3, t1, t0           /* t3 = G3 | B3 | B2 | R3 */
    589    packrl.ph         t4, t2, t1           /* t4 = B4 | R5 | R4 | G4 */
    590    rotr              t0, t0, 24           /* t0 = R3 | R2 | G2 | B2 */
    591    rotr              t3, t3, 16           /* t3 = B2 | R3 | G3 | B3 */
    592    rotr              t4, t4, 24           /* t4 = R5 | R4 | G4 | B4 */
    593 
    594    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
    595    CONVERT_2x8888_TO_2x0565 t3, t4, t3, t4, t6, t7, t8, v0, v1
    596 
    597    sh                t5, 0(a0)
    598    sh                t0, 2(a0)
    599    sh                t3, 4(a0)
    600    sh                t4, 6(a0)
    601    rotr              t5, t2, 16           /* t5 = xx | R5 | G5 | B5 */
    602    b                 11b
    603     addiu            a0, a0, 8
    604 
    605 2:
    606    lhu               t5, 0(a1)            /* t5 =  0 |  0 | G1 | R1 */
    607    wsbh              t5, t5               /* t5 =  0 |  0 | R1 | G1 */
    608 21:
    609    beqz              t9, 4f
    610     addiu            t9, t9, -1
    611    lw                t0, 2(a1)            /* t0 = B2 | G2 | R2 | B1 */
    612    lw                t1, 6(a1)            /* t1 = R4 | B3 | G3 | R3 */
    613    lw                t2, 10(a1)           /* t2 = G5 | R5 | B4 | G4 */
    614 
    615    addiu             a1, a1, 12
    616    addiu             a2, a2, -4
    617 
    618    wsbh              t0, t0               /* t0 = G2 | B2 | B1 | R2 */
    619    wsbh              t1, t1               /* t1 = B3 | R4 | R3 | G3 */
    620    wsbh              t2, t2               /* t2 = R5 | G5 | G4 | B4 */
    621 
    622    precr_sra.ph.w    t5, t0, 0            /* t5 = R1 | G1 | B1 | R2 */
    623    rotr              t0, t0, 16           /* t0 = B1 | R2 | G2 | B2 */
    624    packrl.ph         t3, t2, t1           /* t3 = G4 | B4 | B3 | R4 */
    625    rotr              t1, t1, 24           /* t1 = R4 | R3 | G3 | B3 */
    626    srl               t5, t5, 8            /* t5 =  0 | R1 | G1 | B1 */
    627    rotr              t3, t3, 16           /* t3 = B3 | R4 | G4 | B4 */
    628 
    629    CONVERT_2x8888_TO_2x0565 t5, t0, t5, t0, t6, t7, t8, v0, v1
    630    CONVERT_2x8888_TO_2x0565 t1, t3, t1, t3, t6, t7, t8, v0, v1
    631 
    632    sh                t5, 0(a0)
    633    sh                t0, 2(a0)
    634    sh                t1, 4(a0)
    635    sh                t3, 6(a0)
    636    srl               t5, t2, 16           /* t5 =  0 |  0 | R5 | G5 */
    637    b                 21b
    638     addiu            a0, a0, 8
    639 
    640 3:
    641    lbu               t5, 0(a1)            /* t5 =  0 |  0 |  0 | R1 */
    642 31:
    643    beqz              t9, 4f
    644     addiu            t9, t9, -1
    645    lw                t0, 1(a1)            /* t0 = G2 | R2 | B1 | G1 */
    646    lw                t1, 5(a1)            /* t1 = B3 | G3 | R3 | B2 */
    647    lw                t2, 9(a1)            /* t2 = R5 | B4 | G4 | R4 */
    648 
    649    addiu             a1, a1, 12
    650    addiu             a2, a2, -4
    651 
    652    wsbh              t0, t0               /* t0 = R2 | G2 | G1 | B1 */
    653    wsbh              t1, t1               /* t1 = G3 | B3 | B2 | R3 */
    654    wsbh              t2, t2               /* t2 = B4 | R5 | R4 | G4 */
    655 
    656    precr_sra.ph.w    t5, t0, 0            /* t5 = xx | R1 | G1 | B1 */
    657    packrl.ph         t3, t1, t0           /* t3 = B2 | R3 | R2 | G2 */
    658    rotr              t1, t1, 16           /* t1 = B2 | R3 | G3 | B3 */
    659    rotr              t4, t2, 24           /* t4 = R5 | R4 | G4 | B4 */
    660    rotr              t3, t3, 24           /* t3 = R3 | R2 | G2 | B2 */
    661 
    662    CONVERT_2x8888_TO_2x0565 t5, t3, t5, t3, t6, t7, t8, v0, v1
    663    CONVERT_2x8888_TO_2x0565 t1, t4, t1, t4, t6, t7, t8, v0, v1
    664 
    665    sh                t5, 0(a0)
    666    sh                t3, 2(a0)
    667    sh                t1, 4(a0)
    668    sh                t4, 6(a0)
    669    srl               t5, t2, 16           /* t5 =  0 |  0 | xx | R5 */
    670    b                 31b
    671     addiu            a0, a0, 8
    672 
    673 4:
    674    beqz              a2, 6f
    675     nop
    676 5:
    677    lbu               t0, 0(a1)            /* t0 =  0 | 0 | 0 | R */
    678    lbu               t1, 1(a1)            /* t1 =  0 | 0 | 0 | G */
    679    lbu               t2, 2(a1)            /* t2 =  0 | 0 | 0 | B */
    680    addiu             a1, a1, 3
    681 
    682    sll               t0, t0, 16           /* t2 =  0 | R | 0 | 0 */
    683    sll               t1, t1, 8            /* t1 =  0 | 0 | G | 0 */
    684 
    685    or                t2, t2, t1           /* t2 =  0 | 0 | G | B */
    686    or                t2, t2, t0           /* t2 =  0 | R | G | B */
    687 
    688    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
    689 
    690    sh                t3, 0(a0)
    691    addiu             a2, a2, -1
    692    bnez              a2, 5b
    693     addiu            a0, a0, 2
    694 6:
    695    RESTORE_REGS_FROM_STACK 0, v0, v1
    696    j                 ra
    697     nop
    698 
    699 END(pixman_composite_src_0888_0565_rev_asm_mips)
    700 #endif
    701 
    702 LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
    703 /*
    704 * a0 - dst  (a8b8g8r8)
    705 * a1 - src  (a8r8g8b8)
    706 * a2 - w
    707 */
    708 
    709    SAVE_REGS_ON_STACK 0, v0
    710    li       v0, 0x00ff00ff
    711 
    712    beqz     a2, 3f
    713     nop
    714    addiu    t1, a2, -1
    715    beqz     t1, 2f
    716     nop
    717 1:
    718    lw       t0, 0(a1)
    719    lw       t1, 4(a1)
    720    addiu    a1, a1, 8
    721    addiu    a2, a2, -2
    722    srl      t2, t0, 24
    723    srl      t3, t1, 24
    724 
    725    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
    726 
    727    sll      t0, t0, 8
    728    sll      t1, t1, 8
    729    andi     t2, t2, 0xff
    730    andi     t3, t3, 0xff
    731    or       t0, t0, t2
    732    or       t1, t1, t3
    733    wsbh     t0, t0
    734    wsbh     t1, t1
    735    rotr     t0, t0, 16
    736    rotr     t1, t1, 16
    737    sw       t0, 0(a0)
    738    sw       t1, 4(a0)
    739 
    740    addiu    t2, a2, -1
    741    bgtz     t2, 1b
    742     addiu   a0, a0, 8
    743 2:
    744    beqz     a2, 3f
    745     nop
    746    lw       t0, 0(a1)
    747    srl      t1, t0, 24
    748 
    749    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
    750 
    751    sll      t0, t0, 8
    752    andi     t1, t1, 0xff
    753    or       t0, t0, t1
    754    wsbh     t0, t0
    755    rotr     t0, t0, 16
    756    sw       t0, 0(a0)
    757 3:
    758    RESTORE_REGS_FROM_STACK 0, v0
    759    j        ra
    760     nop
    761 
    762 END(pixman_composite_src_pixbuf_8888_asm_mips)
    763 
    764 LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
    765 /*
    766 * a0 - dst  (a8r8g8b8)
    767 * a1 - src  (a8r8g8b8)
    768 * a2 - w
    769 */
    770 
    771    SAVE_REGS_ON_STACK 0, v0
    772    li       v0, 0x00ff00ff
    773 
    774    beqz     a2, 3f
    775     nop
    776    addiu    t1, a2, -1
    777    beqz     t1, 2f
    778     nop
    779 1:
    780    lw       t0, 0(a1)
    781    lw       t1, 4(a1)
    782    addiu    a1, a1, 8
    783    addiu    a2, a2, -2
    784    srl      t2, t0, 24
    785    srl      t3, t1, 24
    786 
    787    MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
    788 
    789    sll      t0, t0, 8
    790    sll      t1, t1, 8
    791    andi     t2, t2, 0xff
    792    andi     t3, t3, 0xff
    793    or       t0, t0, t2
    794    or       t1, t1, t3
    795    rotr     t0, t0, 8
    796    rotr     t1, t1, 8
    797    sw       t0, 0(a0)
    798    sw       t1, 4(a0)
    799 
    800    addiu    t2, a2, -1
    801    bgtz     t2, 1b
    802     addiu   a0, a0, 8
    803 2:
    804    beqz     a2, 3f
    805     nop
    806    lw       t0, 0(a1)
    807    srl      t1, t0, 24
    808 
    809    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
    810 
    811    sll      t0, t0, 8
    812    andi     t1, t1, 0xff
    813    or       t0, t0, t1
    814    rotr     t0, t0, 8
    815    sw       t0, 0(a0)
    816 3:
    817    RESTORE_REGS_FROM_STACK 0, v0
    818    j        ra
    819     nop
    820 
    821 END(pixman_composite_src_rpixbuf_8888_asm_mips)
    822 
    823 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
    824 /*
    825 * a0 - dst  (a8r8g8b8)
    826 * a1 - src  (32bit constant)
    827 * a2 - mask (a8)
    828 * a3 - w
    829 */
    830 
    831 
    832    SAVE_REGS_ON_STACK 0, v0
    833    li       v0, 0x00ff00ff
    834 
    835    beqz     a3, 3f
    836     nop
    837    addiu    t1, a3, -1
    838    beqz     t1, 2f
    839     nop
    840 
    841 1:
    842                       /* a1 = source      (32bit constant) */
    843    lbu      t0, 0(a2) /* t2 = mask        (a8) */
    844    lbu      t1, 1(a2) /* t3 = mask        (a8) */
    845    addiu    a2, a2, 2
    846 
    847    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
    848 
    849    sw       t2, 0(a0)
    850    sw       t3, 4(a0)
    851    addiu    a3, a3, -2
    852    addiu    t2, a3, -1
    853    bgtz     t2, 1b
    854     addiu   a0, a0, 8
    855 
    856    beqz     a3, 3f
    857     nop
    858 
    859 2:
    860    lbu      t0, 0(a2)
    861    addiu    a2, a2, 1
    862 
    863    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
    864 
    865    sw       t1, 0(a0)
    866    addiu    a3, a3, -1
    867    addiu    a0, a0, 4
    868 
    869 3:
    870    RESTORE_REGS_FROM_STACK 0, v0
    871    j        ra
    872     nop
    873 
    874 END(pixman_composite_src_n_8_8888_asm_mips)
    875 
    876 LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
    877 /*
    878 * a0 - dst  (a8)
    879 * a1 - src  (32bit constant)
    880 * a2 - mask (a8)
    881 * a3 - w
    882 */
    883 
    884    li                t9, 0x00ff00ff
    885    beqz              a3, 3f
    886     nop
    887    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
    888    beqz              t7, 1f      /* branch if less than 4 src pixels */
    889     nop
    890 
    891    srl               t8, a1, 24
    892    replv.ph          t8, t8
    893 
    894 0:
    895    beqz              t7, 1f
    896     addiu            t7, t7, -1
    897    lbu               t0, 0(a2)
    898    lbu               t1, 1(a2)
    899    lbu               t2, 2(a2)
    900    lbu               t3, 3(a2)
    901 
    902    addiu             a2, a2, 4
    903 
    904    precr_sra.ph.w    t1, t0, 0
    905    precr_sra.ph.w    t3, t2, 0
    906    precr.qb.ph       t0, t3, t1
    907 
    908    muleu_s.ph.qbl    t2, t0, t8
    909    muleu_s.ph.qbr    t3, t0, t8
    910    shra_r.ph         t4, t2, 8
    911    shra_r.ph         t5, t3, 8
    912    and               t4, t4, t9
    913    and               t5, t5, t9
    914    addq.ph           t2, t2, t4
    915    addq.ph           t3, t3, t5
    916    shra_r.ph         t2, t2, 8
    917    shra_r.ph         t3, t3, 8
    918    precr.qb.ph       t2, t2, t3
    919 
    920    sb                t2, 0(a0)
    921    srl               t2, t2, 8
    922    sb                t2, 1(a0)
    923    srl               t2, t2, 8
    924    sb                t2, 2(a0)
    925    srl               t2, t2, 8
    926    sb                t2, 3(a0)
    927    addiu             a3, a3, -4
    928    b                 0b
    929     addiu            a0, a0, 4
    930 
    931 1:
    932    beqz              a3, 3f
    933     nop
    934    srl               t8, a1, 24
    935 2:
    936    lbu               t0, 0(a2)
    937    addiu             a2, a2, 1
    938 
    939    mul               t2, t0, t8
    940    shra_r.ph         t3, t2, 8
    941    andi              t3, t3, 0x00ff
    942    addq.ph           t2, t2, t3
    943    shra_r.ph         t2, t2, 8
    944 
    945    sb                t2, 0(a0)
    946    addiu             a3, a3, -1
    947    bnez              a3, 2b
    948     addiu            a0, a0, 1
    949 
    950 3:
    951    j                 ra
    952     nop
    953 
    954 END(pixman_composite_src_n_8_8_asm_mips)
    955 
    956 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
    957 /*
    958 * a0 - dst  (a8r8g8b8)
    959 * a1 - src  (32bit constant)
    960 * a2 - mask (a8r8g8b8)
    961 * a3 - w
    962 */
    963 
    964    beqz         a3, 8f
    965     nop
    966    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
    967 
    968    li           t6, 0xff
    969    addiu        t7, zero, -1 /* t7 = 0xffffffff */
    970    srl          t8, a1, 24   /* t8 = srca */
    971    li           t9, 0x00ff00ff
    972 
    973    addiu        t1, a3, -1
    974    beqz         t1, 4f       /* last pixel */
    975     nop
    976 
    977 0:
    978    lw           t0, 0(a2)    /* t0 = mask */
    979    lw           t1, 4(a2)    /* t1 = mask */
    980    addiu        a3, a3, -2   /* w = w - 2 */
    981    or           t2, t0, t1
    982    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
    983     addiu       a2, a2, 8
    984    and          t2, t0, t1
    985    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
    986     nop
    987 
    988 //if(ma)
    989    lw           t2, 0(a0)    /* t2 = dst */
    990    lw           t3, 4(a0)    /* t3 = dst */
    991    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
    992    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
    993    not          t0, t0
    994    not          t1, t1
    995    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
    996    addu_s.qb    t2, t4, t2
    997    addu_s.qb    t3, t5, t3
    998    sw           t2, 0(a0)
    999    sw           t3, 4(a0)
   1000    addiu        t1, a3, -1
   1001    bgtz         t1, 0b
   1002     addiu       a0, a0, 8
   1003    b            4f
   1004     nop
   1005 1:
   1006 //if (t0 == 0xffffffff) && (t1 == 0xffffffff):
   1007    beq          t8, t6, 2f   /* if (srca == 0xff) */
   1008     nop
   1009    lw           t2, 0(a0)    /* t2 = dst */
   1010    lw           t3, 4(a0)    /* t3 = dst */
   1011    not          t0, a1
   1012    not          t1, a1
   1013    srl          t0, t0, 24
   1014    srl          t1, t1, 24
   1015    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
   1016    addu_s.qb    t2, a1, t2
   1017    addu_s.qb    t3, a1, t3
   1018    sw           t2, 0(a0)
   1019    sw           t3, 4(a0)
   1020    addiu        t1, a3, -1
   1021    bgtz         t1, 0b
   1022     addiu       a0, a0, 8
   1023    b            4f
   1024     nop
   1025 2:
   1026    sw           a1, 0(a0)
   1027    sw           a1, 4(a0)
   1028 3:
   1029    addiu        t1, a3, -1
   1030    bgtz         t1, 0b
   1031     addiu       a0, a0, 8
   1032 
   1033 4:
   1034    beqz         a3, 7f
   1035     nop
   1036                              /* a1 = src */
   1037    lw           t0, 0(a2)    /* t0 = mask */
   1038    beqz         t0, 7f       /* if (t0 == 0) */
   1039     nop
   1040    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
   1041     nop
   1042 //if(ma)
   1043    lw           t1, 0(a0)    /* t1 = dst */
   1044    MIPS_UN8x4_MUL_UN8x4  a1, t0, t2, t9, t3, t4, t5, s0
   1045    MIPS_UN8x4_MUL_UN8    t0, t8, t0, t9, t3, t4, t5
   1046    not          t0, t0
   1047    MIPS_UN8x4_MUL_UN8x4  t1, t0, t1, t9, t3, t4, t5, s0
   1048    addu_s.qb    t1, t2, t1
   1049    sw           t1, 0(a0)
   1050    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
   1051    j            ra
   1052     nop
   1053 5:
   1054 //if (t0 == 0xffffffff)
   1055    beq          t8, t6, 6f   /* if (srca == 0xff) */
   1056     nop
   1057    lw           t1, 0(a0)    /* t1 = dst */
   1058    not          t0, a1
   1059    srl          t0, t0, 24
   1060    MIPS_UN8x4_MUL_UN8 t1, t0, t1, t9, t2, t3, t4
   1061    addu_s.qb    t1, a1, t1
   1062    sw           t1, 0(a0)
   1063    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
   1064    j            ra
   1065     nop
   1066 6:
   1067    sw           a1, 0(a0)
   1068 7:
   1069    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
   1070 8:
   1071    j            ra
   1072     nop
   1073 
   1074 END(pixman_composite_over_n_8888_8888_ca_asm_mips)
   1075 
   1076 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
   1077 /*
   1078 * a0 - dst  (r5g6b5)
   1079 * a1 - src  (32bit constant)
   1080 * a2 - mask (a8r8g8b8)
   1081 * a3 - w
   1082 */
   1083 
   1084    beqz         a3, 8f
   1085     nop
   1086    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1087 
   1088    li           t6, 0xff
   1089    addiu        t7, zero, -1 /* t7 = 0xffffffff */
   1090    srl          t8, a1, 24   /* t8 = srca */
   1091    li           t9, 0x00ff00ff
   1092    li           s6, 0xf800f800
   1093    li           s7, 0x07e007e0
   1094    li           s8, 0x001F001F
   1095 
   1096    addiu        t1, a3, -1
   1097    beqz         t1, 4f       /* last pixel */
   1098     nop
   1099 
   1100 0:
   1101    lw           t0, 0(a2)    /* t0 = mask */
   1102    lw           t1, 4(a2)    /* t1 = mask */
   1103    addiu        a3, a3, -2   /* w = w - 2 */
   1104    or           t2, t0, t1
   1105    beqz         t2, 3f      /* if (t0 == 0) && (t1 == 0) */
   1106     addiu       a2, a2, 8
   1107    and          t2, t0, t1
   1108    beq          t2, t7, 1f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
   1109     nop
   1110 
   1111 //if(ma)
   1112    lhu          t2, 0(a0)    /* t2 = dst */
   1113    lhu          t3, 2(a0)    /* t3 = dst */
   1114    MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
   1115    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
   1116    not          t0, t0
   1117    not          t1, t1
   1118    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
   1119    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
   1120    addu_s.qb    t2, t4, t2
   1121    addu_s.qb    t3, t5, t3
   1122    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
   1123    sh           t2, 0(a0)
   1124    sh           t3, 2(a0)
   1125    addiu        t1, a3, -1
   1126    bgtz         t1, 0b
   1127     addiu       a0, a0, 4
   1128    b            4f
   1129     nop
   1130 1:
   1131 //if (t0 == 0xffffffff) && (t1 == 0xffffffff):
   1132    beq          t8, t6, 2f   /* if (srca == 0xff) */
   1133     nop
   1134    lhu          t2, 0(a0)    /* t2 = dst */
   1135    lhu          t3, 2(a0)    /* t3 = dst */
   1136    not          t0, a1
   1137    not          t1, a1
   1138    srl          t0, t0, 24
   1139    srl          t1, t1, 24
   1140    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, s7, s8, s0, s1, s2, s3
   1141    MIPS_2xUN8x4_MUL_2xUN8   t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
   1142    addu_s.qb    t2, a1, t2
   1143    addu_s.qb    t3, a1, t3
   1144    CONVERT_2x8888_TO_2x0565 t2, t3, t2, t3, s6, s7, s8, s0, s1
   1145    sh           t2, 0(a0)
   1146    sh           t3, 2(a0)
   1147    addiu        t1, a3, -1
   1148    bgtz         t1, 0b
   1149     addiu       a0, a0, 4
   1150    b            4f
   1151     nop
   1152 2:
   1153    CONVERT_1x8888_TO_1x0565 a1, t2, s0, s1
   1154    sh           t2, 0(a0)
   1155    sh           t2, 2(a0)
   1156 3:
   1157    addiu        t1, a3, -1
   1158    bgtz         t1, 0b
   1159     addiu       a0, a0, 4
   1160 
   1161 4:
   1162    beqz         a3, 7f
   1163     nop
   1164                              /* a1 = src */
   1165    lw           t0, 0(a2)    /* t0 = mask */
   1166    beqz         t0, 7f       /* if (t0 == 0) */
   1167     nop
   1168    beq          t0, t7, 5f  /* if (t0 == 0xffffffff) */
   1169     nop
   1170 //if(ma)
   1171    lhu          t1, 0(a0)    /* t1 = dst */
   1172    MIPS_UN8x4_MUL_UN8x4     a1, t0, t2, t9, t3, t4, t5, s0
   1173    MIPS_UN8x4_MUL_UN8       t0, t8, t0, t9, t3, t4, t5
   1174    not          t0, t0
   1175    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
   1176    MIPS_UN8x4_MUL_UN8x4     s1, t0, s1, t9, t3, t4, t5, s0
   1177    addu_s.qb    s1, t2, s1
   1178    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
   1179    sh           t1, 0(a0)
   1180    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1181    j            ra
   1182     nop
   1183 5:
   1184 //if (t0 == 0xffffffff)
   1185    beq          t8, t6, 6f   /* if (srca == 0xff) */
   1186     nop
   1187    lhu          t1, 0(a0)    /* t1 = dst */
   1188    not          t0, a1
   1189    srl          t0, t0, 24
   1190    CONVERT_1x0565_TO_1x8888 t1, s1, s2, s3
   1191    MIPS_UN8x4_MUL_UN8       s1, t0, s1, t9, t2, t3, t4
   1192    addu_s.qb    s1, a1, s1
   1193    CONVERT_1x8888_TO_1x0565 s1, t1, s0, s2
   1194    sh           t1, 0(a0)
   1195    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1196    j            ra
   1197     nop
   1198 6:
   1199    CONVERT_1x8888_TO_1x0565 a1, t1, s0, s2
   1200    sh           t1, 0(a0)
   1201 7:
   1202    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1203 8:
   1204    j            ra
   1205     nop
   1206 
   1207 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
   1208 
   1209 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
   1210 /*
   1211 * a0 - dst  (a8)
   1212 * a1 - src  (32bit constant)
   1213 * a2 - mask (a8)
   1214 * a3 - w
   1215 */
   1216 
   1217    SAVE_REGS_ON_STACK 0, v0
   1218    li                t9, 0x00ff00ff
   1219    beqz              a3, 3f
   1220     nop
   1221    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
   1222    beqz              v0, 1f      /* branch if less than 4 src pixels */
   1223     nop
   1224 
   1225    srl               t8, a1, 24
   1226    replv.ph          t8, t8
   1227 
   1228 0:
   1229    beqz              v0, 1f
   1230     addiu            v0, v0, -1
   1231    lbu               t0, 0(a2)
   1232    lbu               t1, 1(a2)
   1233    lbu               t2, 2(a2)
   1234    lbu               t3, 3(a2)
   1235    lbu               t4, 0(a0)
   1236    lbu               t5, 1(a0)
   1237    lbu               t6, 2(a0)
   1238    lbu               t7, 3(a0)
   1239 
   1240    addiu             a2, a2, 4
   1241 
   1242    precr_sra.ph.w    t1, t0, 0
   1243    precr_sra.ph.w    t3, t2, 0
   1244    precr_sra.ph.w    t5, t4, 0
   1245    precr_sra.ph.w    t7, t6, 0
   1246 
   1247    precr.qb.ph       t0, t3, t1
   1248    precr.qb.ph       t1, t7, t5
   1249 
   1250    muleu_s.ph.qbl    t2, t0, t8
   1251    muleu_s.ph.qbr    t3, t0, t8
   1252    shra_r.ph         t4, t2, 8
   1253    shra_r.ph         t5, t3, 8
   1254    and               t4, t4, t9
   1255    and               t5, t5, t9
   1256    addq.ph           t2, t2, t4
   1257    addq.ph           t3, t3, t5
   1258    shra_r.ph         t2, t2, 8
   1259    shra_r.ph         t3, t3, 8
   1260    precr.qb.ph       t0, t2, t3
   1261    not               t6, t0
   1262 
   1263    preceu.ph.qbl     t7, t6
   1264    preceu.ph.qbr     t6, t6
   1265 
   1266    muleu_s.ph.qbl    t2, t1, t7
   1267    muleu_s.ph.qbr    t3, t1, t6
   1268    shra_r.ph         t4, t2, 8
   1269    shra_r.ph         t5, t3, 8
   1270    and               t4, t4, t9
   1271    and               t5, t5, t9
   1272    addq.ph           t2, t2, t4
   1273    addq.ph           t3, t3, t5
   1274    shra_r.ph         t2, t2, 8
   1275    shra_r.ph         t3, t3, 8
   1276    precr.qb.ph       t1, t2, t3
   1277 
   1278    addu_s.qb         t2, t0, t1
   1279 
   1280    sb                t2, 0(a0)
   1281    srl               t2, t2, 8
   1282    sb                t2, 1(a0)
   1283    srl               t2, t2, 8
   1284    sb                t2, 2(a0)
   1285    srl               t2, t2, 8
   1286    sb                t2, 3(a0)
   1287    addiu             a3, a3, -4
   1288    b                 0b
   1289     addiu            a0, a0, 4
   1290 
   1291 1:
   1292    beqz              a3, 3f
   1293     nop
   1294    srl               t8, a1, 24
   1295 2:
   1296    lbu               t0, 0(a2)
   1297    lbu               t1, 0(a0)
   1298    addiu             a2, a2, 1
   1299 
   1300    mul               t2, t0, t8
   1301    shra_r.ph         t3, t2, 8
   1302    andi              t3, t3, 0x00ff
   1303    addq.ph           t2, t2, t3
   1304    shra_r.ph         t2, t2, 8
   1305    not               t3, t2
   1306    andi              t3, t3, 0x00ff
   1307 
   1308 
   1309    mul               t4, t1, t3
   1310    shra_r.ph         t5, t4, 8
   1311    andi              t5, t5, 0x00ff
   1312    addq.ph           t4, t4, t5
   1313    shra_r.ph         t4, t4, 8
   1314    andi              t4, t4, 0x00ff
   1315 
   1316    addu_s.qb         t2, t2, t4
   1317    sb                t2, 0(a0)
   1318    addiu             a3, a3, -1
   1319    bnez              a3, 2b
   1320     addiu            a0, a0, 1
   1321 
   1322 3:
   1323    RESTORE_REGS_FROM_STACK 0, v0
   1324    j                 ra
   1325     nop
   1326 
   1327 END(pixman_composite_over_n_8_8_asm_mips)
   1328 
   1329 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
   1330 /*
   1331 * a0 - dst  (a8r8g8b8)
   1332 * a1 - src  (32bit constant)
   1333 * a2 - mask (a8)
   1334 * a3 - w
   1335 */
   1336 
   1337    SAVE_REGS_ON_STACK 4, s0, s1, s2, s3, s4
   1338    beqz      a3, 4f
   1339     nop
   1340    li        t4, 0x00ff00ff
   1341    li        t5, 0xff
   1342    addiu     t0, a3, -1
   1343    beqz      t0, 3f         /* last pixel */
   1344     srl      t6, a1, 24     /* t6 = srca */
   1345    not       s4, a1
   1346    beq       t5, t6, 2f     /* if (srca == 0xff) */
   1347     srl      s4, s4, 24
   1348 1:
   1349                             /* a1 = src */
   1350    lbu       t0, 0(a2)      /* t0 = mask */
   1351    lbu       t1, 1(a2)      /* t1 = mask */
   1352    or        t2, t0, t1
   1353    beqz      t2, 111f       /* if (t0 == 0) && (t1 == 0) */
   1354     addiu    a2, a2, 2
   1355    and       t3, t0, t1
   1356 
   1357    lw        t2, 0(a0)      /* t2 = dst */
   1358    beq       t3, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
   1359     lw       t3, 4(a0)      /* t3 = dst */
   1360 
   1361    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, s0, s1, t4, t6, t7, t8, t9, s2, s3
   1362    not       s2, s0
   1363    not       s3, s1
   1364    srl       s2, s2, 24
   1365    srl       s3, s3, 24
   1366    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s2, s3, t2, t3, t4, t0, t1, t6, t7, t8, t9
   1367    addu_s.qb s2, t2, s0
   1368    addu_s.qb s3, t3, s1
   1369    sw        s2, 0(a0)
   1370    b         111f
   1371     sw       s3, 4(a0)
   1372 11:
   1373    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, s4, s4, t2, t3, t4, t0, t1, t6, t7, t8, t9
   1374    addu_s.qb s2, t2, a1
   1375    addu_s.qb s3, t3, a1
   1376    sw        s2, 0(a0)
   1377    sw        s3, 4(a0)
   1378 
   1379 111:
   1380    addiu     a3, a3, -2
   1381    addiu     t0, a3, -1
   1382    bgtz      t0, 1b
   1383     addiu    a0, a0, 8
   1384    b         3f
   1385     nop
   1386 2:
   1387                             /* a1 = src */
   1388    lbu       t0, 0(a2)      /* t0 = mask */
   1389    lbu       t1, 1(a2)      /* t1 = mask */
   1390    or        t2, t0, t1
   1391    beqz      t2, 222f       /* if (t0 == 0) && (t1 == 0) */
   1392     addiu    a2, a2, 2
   1393    and       t3, t0, t1
   1394    beq       t3, t5, 22f    /* if (t0 == 0xff) && (t1 == 0xff) */
   1395     nop
   1396    lw        t2, 0(a0)      /* t2 = dst */
   1397    lw        t3, 4(a0)      /* t3 = dst */
   1398 
   1399    OVER_2x8888_2x8_2x8888 a1, a1, t0, t1, t2, t3, \
   1400                           t6, t7, t4, t8, t9, s0, s1, s2, s3
   1401    sw        t6, 0(a0)
   1402    b         222f
   1403     sw        t7, 4(a0)
   1404 22:
   1405    sw        a1, 0(a0)
   1406    sw        a1, 4(a0)
   1407 222:
   1408    addiu     a3, a3, -2
   1409    addiu     t0, a3, -1
   1410    bgtz      t0, 2b
   1411     addiu    a0, a0, 8
   1412 3:
   1413    blez      a3, 4f
   1414     nop
   1415                             /* a1 = src */
   1416    lbu       t0, 0(a2)      /* t0 = mask */
   1417    beqz      t0, 4f         /* if (t0 == 0) */
   1418     addiu    a2, a2, 1
   1419    move      t3, a1
   1420    beq       t0, t5, 31f    /* if (t0 == 0xff) */
   1421     lw       t1, 0(a0)      /* t1 = dst */
   1422 
   1423    MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
   1424 31:
   1425    not       t2, t3
   1426    srl       t2, t2, 24
   1427    MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
   1428    addu_s.qb t2, t1, t3
   1429    sw        t2, 0(a0)
   1430 4:
   1431    RESTORE_REGS_FROM_STACK 4, s0, s1, s2, s3, s4
   1432    j         ra
   1433     nop
   1434 
   1435 END(pixman_composite_over_n_8_8888_asm_mips)
   1436 
   1437 LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
   1438 /*
   1439 * a0 - dst  (r5g6b5)
   1440 * a1 - src  (32bit constant)
   1441 * a2 - mask (a8)
   1442 * a3 - w
   1443 */
   1444    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1445    beqz     a3, 4f
   1446     nop
   1447    li       t4, 0x00ff00ff
   1448    li       t5, 0xff
   1449    li       t6, 0xf800f800
   1450    li       t7, 0x07e007e0
   1451    li       t8, 0x001F001F
   1452    addiu    t1, a3, -1
   1453    beqz     t1, 3f         /* last pixel */
   1454     srl     t0, a1, 24     /* t0 = srca */
   1455    not      v0, a1
   1456    beq      t0, t5, 2f     /* if (srca == 0xff) */
   1457     srl     v0, v0, 24
   1458 1:
   1459                            /* a1 = src */
   1460    lbu      t0, 0(a2)      /* t0 = mask */
   1461    lbu      t1, 1(a2)      /* t1 = mask */
   1462    or       t2, t0, t1
   1463    beqz     t2, 111f       /* if (t0 == 0) && (t1 == 0) */
   1464     addiu   a2, a2, 2
   1465    lhu      t2, 0(a0)      /* t2 = dst */
   1466    lhu      t3, 2(a0)      /* t3 = dst */
   1467    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t7, t8, t9, s2, s3, s4
   1468    and      t9, t0, t1
   1469    beq      t9, t5, 11f    /* if (t0 == 0xff) && (t1 == 0xff) */
   1470     nop
   1471 
   1472    MIPS_2xUN8x4_MUL_2xUN8   a1, a1, t0, t1, s2, s3, t4, t9, s4, s5, s6, s7, s8
   1473    not      s4, s2
   1474    not      s5, s3
   1475    srl      s4, s4, 24
   1476    srl      s5, s5, 24
   1477    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, s4, s5, s0, s1, t4, t9, t0, t1, s6, s7, s8
   1478    addu_s.qb                s4, s2, s0
   1479    addu_s.qb                s5, s3, s1
   1480    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
   1481    sh       t2, 0(a0)
   1482    b        111f
   1483     sh      t3, 2(a0)
   1484 11:
   1485    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, v0, v0, s0, s1, t4, t9, t0, t1, s6, s7, s8
   1486    addu_s.qb                s4, a1, s0
   1487    addu_s.qb                s5, a1, s1
   1488    CONVERT_2x8888_TO_2x0565 s4, s5, t2, t3, t6, t7, t8, s0, s1
   1489    sh       t2, 0(a0)
   1490    sh       t3, 2(a0)
   1491 111:
   1492    addiu    a3, a3, -2
   1493    addiu    t0, a3, -1
   1494    bgtz     t0, 1b
   1495     addiu   a0, a0, 4
   1496    b        3f
   1497     nop
   1498 2:
   1499    CONVERT_1x8888_TO_1x0565 a1, s0, s1, s2
   1500 21:
   1501                            /* a1 = src */
   1502    lbu      t0, 0(a2)      /* t0 = mask */
   1503    lbu      t1, 1(a2)      /* t1 = mask */
   1504    or       t2, t0, t1
   1505    beqz     t2, 222f       /* if (t0 == 0) && (t1 == 0) */
   1506     addiu   a2, a2, 2
   1507    and      t9, t0, t1
   1508    move     s2, s0
   1509    beq      t9, t5, 22f    /* if (t0 == 0xff) && (t2 == 0xff) */
   1510     move    s3, s0
   1511    lhu      t2, 0(a0)      /* t2 = dst */
   1512    lhu      t3, 2(a0)      /* t3 = dst */
   1513 
   1514    CONVERT_2x0565_TO_2x8888 t2, t3, s2, s3, t7, t8, s4, s5, s6, s7
   1515    OVER_2x8888_2x8_2x8888   a1, a1, t0, t1, s2, s3, \
   1516                             t2, t3, t4, t9, s4, s5, s6, s7, s8
   1517    CONVERT_2x8888_TO_2x0565 t2, t3, s2, s3, t6, t7, t8, s4, s5
   1518 22:
   1519    sh       s2, 0(a0)
   1520    sh       s3, 2(a0)
   1521 222:
   1522    addiu    a3, a3, -2
   1523    addiu    t0, a3, -1
   1524    bgtz     t0, 21b
   1525     addiu   a0, a0, 4
   1526 3:
   1527    blez      a3, 4f
   1528     nop
   1529                            /* a1 = src */
   1530    lbu      t0, 0(a2)      /* t0 = mask */
   1531    beqz     t0, 4f         /* if (t0 == 0) */
   1532     nop
   1533    lhu      t1, 0(a0)      /* t1 = dst */
   1534    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
   1535    beq      t0, t5, 31f    /* if (t0 == 0xff) */
   1536     move    t3, a1
   1537 
   1538    MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
   1539 31:
   1540    not      t6, t3
   1541    srl      t6, t6, 24
   1542    MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
   1543    addu_s.qb                t1, t2, t3
   1544    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
   1545    sh       t2, 0(a0)
   1546 4:
   1547    RESTORE_REGS_FROM_STACK  24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   1548    j        ra
   1549     nop
   1550 
   1551 END(pixman_composite_over_n_8_0565_asm_mips)
   1552 
   1553 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
   1554 /*
   1555 * a0 - dst  (a8r8g8b8)
   1556 * a1 - src  (a8r8g8b8)
   1557 * a2 - mask (32bit constant)
   1558 * a3 - w
   1559 */
   1560 
   1561    SAVE_REGS_ON_STACK 0, s0
   1562    li       t4, 0x00ff00ff
   1563    beqz     a3, 3f
   1564     nop
   1565    addiu    t1, a3, -1
   1566    srl      a2, a2, 24
   1567    beqz     t1, 2f
   1568     nop
   1569 
   1570 1:
   1571    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1572    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1573                       /* a2 = mask        (32bit constant) */
   1574    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   1575    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   1576    addiu    a1, a1, 8
   1577 
   1578    OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
   1579                           t5, t6, t4, t7, t8, t9, t0, t1, s0
   1580 
   1581    sw       t5, 0(a0)
   1582    sw       t6, 4(a0)
   1583    addiu    a3, a3, -2
   1584    addiu    t1, a3, -1
   1585    bgtz     t1, 1b
   1586     addiu   a0, a0, 8
   1587 2:
   1588    beqz     a3, 3f
   1589     nop
   1590    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1591                       /* a2 = mask        (32bit constant) */
   1592    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   1593 
   1594    OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
   1595 
   1596    sw       t3, 0(a0)
   1597 3:
   1598    RESTORE_REGS_FROM_STACK 0, s0
   1599    j        ra
   1600     nop
   1601 
   1602 END(pixman_composite_over_8888_n_8888_asm_mips)
   1603 
   1604 LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
   1605 /*
   1606 * a0 - dst  (r5g6b5)
   1607 * a1 - src  (a8r8g8b8)
   1608 * a2 - mask (32bit constant)
   1609 * a3 - w
   1610 */
   1611 
   1612    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
   1613    li       t6, 0x00ff00ff
   1614    li       t7, 0xf800f800
   1615    li       t8, 0x07e007e0
   1616    li       t9, 0x001F001F
   1617    beqz     a3, 3f
   1618     nop
   1619    srl      a2, a2, 24
   1620    addiu    t1, a3, -1
   1621    beqz     t1, 2f
   1622     nop
   1623 1:
   1624    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1625    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1626                       /* a2 = mask        (32bit constant) */
   1627    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   1628    lhu      t3, 2(a0) /* t2 = destination (r5g6b5) */
   1629    addiu    a1, a1, 8
   1630 
   1631    CONVERT_2x0565_TO_2x8888 t2, t3, t4, t5, t8, t9, s0, s1, t2, t3
   1632    OVER_2x8888_2x8_2x8888   t0, t1, a2, a2, t4, t5, \
   1633                             t2, t3, t6, t0, t1, s0, s1, s2, s3
   1634    CONVERT_2x8888_TO_2x0565 t2, t3, t4, t5, t7, t8, t9, s0, s1
   1635 
   1636    sh       t4, 0(a0)
   1637    sh       t5, 2(a0)
   1638    addiu    a3, a3, -2
   1639    addiu    t1, a3, -1
   1640    bgtz     t1, 1b
   1641     addiu   a0, a0, 4
   1642 2:
   1643    beqz     a3, 3f
   1644     nop
   1645    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1646                       /* a2 = mask        (32bit constant) */
   1647    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
   1648 
   1649    CONVERT_1x0565_TO_1x8888 t1, t2, t4, t5
   1650    OVER_8888_8_8888         t0, a2, t2, t1, t6, t3, t4, t5, t7
   1651    CONVERT_1x8888_TO_1x0565 t1, t3, t4, t5
   1652 
   1653    sh       t3, 0(a0)
   1654 3:
   1655    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
   1656    j                 ra
   1657     nop
   1658 
   1659 END(pixman_composite_over_8888_n_0565_asm_mips)
   1660 
   1661 LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
   1662 /*
   1663 * a0 - dst  (r5g6b5)
   1664 * a1 - src  (r5g6b5)
   1665 * a2 - mask (32bit constant)
   1666 * a3 - w
   1667 */
   1668 
   1669    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
   1670    li       t6, 0x00ff00ff
   1671    li       t7, 0xf800f800
   1672    li       t8, 0x07e007e0
   1673    li       t9, 0x001F001F
   1674    beqz     a3, 3f
   1675     nop
   1676    srl      a2, a2, 24
   1677    addiu    t1, a3, -1
   1678    beqz     t1, 2f
   1679     nop
   1680 1:
   1681    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   1682    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
   1683                       /* a2 = mask        (32bit constant) */
   1684    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   1685    lhu      t3, 2(a0) /* t3 = destination (r5g6b5) */
   1686    addiu    a1, a1, 4
   1687 
   1688    CONVERT_2x0565_TO_2x8888 t0, t1, t4, t5, t8, t9, s0, s1, s2, s3
   1689    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, t8, t9, s2, s3, s4, s5
   1690    OVER_2x8888_2x8_2x8888   t4, t5, a2, a2, s0, s1, \
   1691                             t0, t1, t6, s2, s3, s4, s5, t4, t5
   1692    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t7, t8, t9, s2, s3
   1693 
   1694    sh       s0, 0(a0)
   1695    sh       s1, 2(a0)
   1696    addiu    a3, a3, -2
   1697    addiu    t1, a3, -1
   1698    bgtz     t1, 1b
   1699     addiu   a0, a0, 4
   1700 2:
   1701    beqz     a3, 3f
   1702     nop
   1703    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   1704                       /* a2 = mask        (32bit constant) */
   1705    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
   1706 
   1707    CONVERT_1x0565_TO_1x8888 t0, t2, t4, t5
   1708    CONVERT_1x0565_TO_1x8888 t1, t3, t4, t5
   1709    OVER_8888_8_8888         t2, a2, t3, t0, t6, t1, t4, t5, t7
   1710    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
   1711 
   1712    sh       t3, 0(a0)
   1713 3:
   1714    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
   1715    j        ra
   1716     nop
   1717 
   1718 END(pixman_composite_over_0565_n_0565_asm_mips)
   1719 
   1720 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
   1721 /*
   1722 * a0 - dst  (a8r8g8b8)
   1723 * a1 - src  (a8r8g8b8)
   1724 * a2 - mask (a8)
   1725 * a3 - w
   1726 */
   1727 
   1728    SAVE_REGS_ON_STACK 0, s0, s1
   1729    li       t4, 0x00ff00ff
   1730    beqz     a3, 3f
   1731     nop
   1732    addiu    t1, a3, -1
   1733    beqz     t1, 2f
   1734     nop
   1735 1:
   1736    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1737    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1738    lbu      t2, 0(a2) /* t2 = mask        (a8) */
   1739    lbu      t3, 1(a2) /* t3 = mask        (a8) */
   1740    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
   1741    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
   1742    addiu    a1, a1, 8
   1743    addiu    a2, a2, 2
   1744 
   1745    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
   1746                           t7, t8, t4, t9, s0, s1, t0, t1, t2
   1747 
   1748    sw       t7, 0(a0)
   1749    sw       t8, 4(a0)
   1750    addiu    a3, a3, -2
   1751    addiu    t1, a3, -1
   1752    bgtz     t1, 1b
   1753     addiu   a0, a0, 8
   1754 2:
   1755    beqz     a3, 3f
   1756     nop
   1757    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1758    lbu      t1, 0(a2) /* t1 = mask        (a8) */
   1759    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   1760 
   1761    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
   1762 
   1763    sw       t3, 0(a0)
   1764 3:
   1765    RESTORE_REGS_FROM_STACK 0, s0, s1
   1766    j        ra
   1767     nop
   1768 
   1769 END(pixman_composite_over_8888_8_8888_asm_mips)
   1770 
   1771 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
   1772 /*
   1773 * a0 - dst  (r5g6b5)
   1774 * a1 - src  (a8r8g8b8)
   1775 * a2 - mask (a8)
   1776 * a3 - w
   1777 */
   1778 
   1779    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
   1780    li       t6, 0x00ff00ff
   1781    li       t7, 0xf800f800
   1782    li       t8, 0x07e007e0
   1783    li       t9, 0x001F001F
   1784    beqz     a3, 3f
   1785     nop
   1786    addiu    t1, a3, -1
   1787    beqz     t1, 2f
   1788     nop
   1789 1:
   1790    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1791    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1792    lbu      t2, 0(a2) /* t2 = mask        (a8) */
   1793    lbu      t3, 1(a2) /* t3 = mask        (a8) */
   1794    lhu      t4, 0(a0) /* t4 = destination (r5g6b5) */
   1795    lhu      t5, 2(a0) /* t5 = destination (r5g6b5) */
   1796    addiu    a1, a1, 8
   1797    addiu    a2, a2, 2
   1798 
   1799    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
   1800    OVER_2x8888_2x8_2x8888   t0, t1, t2, t3, s0, s1, \
   1801                             t4, t5, t6, s2, s3, s4, s5, t0, t1
   1802    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
   1803 
   1804    sh       s0, 0(a0)
   1805    sh       s1, 2(a0)
   1806    addiu    a3, a3, -2
   1807    addiu    t1, a3, -1
   1808    bgtz     t1, 1b
   1809     addiu   a0, a0, 4
   1810 2:
   1811    beqz     a3, 3f
   1812     nop
   1813    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1814    lbu      t1, 0(a2) /* t1 = mask        (a8) */
   1815    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   1816 
   1817    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
   1818    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
   1819    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
   1820 
   1821    sh       t3, 0(a0)
   1822 3:
   1823    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
   1824    j        ra
   1825     nop
   1826 
   1827 END(pixman_composite_over_8888_8_0565_asm_mips)
   1828 
   1829 LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
   1830 /*
   1831 * a0 - dst  (r5g6b5)
   1832 * a1 - src  (r5g6b5)
   1833 * a2 - mask (a8)
   1834 * a3 - w
   1835 */
   1836 
   1837    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5
   1838    li       t4, 0xf800f800
   1839    li       t5, 0x07e007e0
   1840    li       t6, 0x001F001F
   1841    li       t7, 0x00ff00ff
   1842    beqz     a3, 3f
   1843     nop
   1844    addiu    t1, a3, -1
   1845    beqz     t1, 2f
   1846     nop
   1847 1:
   1848    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   1849    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
   1850    lbu      t2, 0(a2) /* t2 = mask        (a8) */
   1851    lbu      t3, 1(a2) /* t3 = mask        (a8) */
   1852    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
   1853    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
   1854    addiu    a1, a1, 4
   1855    addiu    a2, a2, 2
   1856 
   1857    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
   1858    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
   1859    OVER_2x8888_2x8_2x8888   s0, s1, t2, t3, s2, s3, \
   1860                             t0, t1, t7, s4, s5, t8, t9, s0, s1
   1861    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
   1862 
   1863    sh       s0, 0(a0)
   1864    sh       s1, 2(a0)
   1865    addiu    a3, a3, -2
   1866    addiu    t1, a3, -1
   1867    bgtz     t1, 1b
   1868     addiu   a0, a0, 4
   1869 2:
   1870    beqz     a3, 3f
   1871     nop
   1872    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   1873    lbu      t1, 0(a2) /* t1 = mask        (a8) */
   1874    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   1875 
   1876    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
   1877    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
   1878    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
   1879    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
   1880 
   1881    sh       t3, 0(a0)
   1882 3:
   1883    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5
   1884    j        ra
   1885     nop
   1886 
   1887 END(pixman_composite_over_0565_8_0565_asm_mips)
   1888 
   1889 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
   1890 /*
   1891 * a0 - dst  (a8r8g8b8)
   1892 * a1 - src  (a8r8g8b8)
   1893 * a2 - mask (a8r8g8b8)
   1894 * a3 - w
   1895 */
   1896 
   1897    SAVE_REGS_ON_STACK 0, s0, s1, s2
   1898    li       t4, 0x00ff00ff
   1899    beqz     a3, 3f
   1900     nop
   1901    addiu    t1, a3, -1
   1902    beqz     t1, 2f
   1903     nop
   1904 1:
   1905    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1906    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1907    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
   1908    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
   1909    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
   1910    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
   1911    addiu    a1, a1, 8
   1912    addiu    a2, a2, 8
   1913    srl      t2, t2, 24
   1914    srl      t3, t3, 24
   1915 
   1916    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t0, t1
   1917 
   1918    sw       t7, 0(a0)
   1919    sw       t8, 4(a0)
   1920    addiu    a3, a3, -2
   1921    addiu    t1, a3, -1
   1922    bgtz     t1, 1b
   1923     addiu   a0, a0, 8
   1924 2:
   1925    beqz     a3, 3f
   1926     nop
   1927    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1928    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
   1929    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   1930    srl      t1, t1, 24
   1931 
   1932    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
   1933 
   1934    sw       t3, 0(a0)
   1935 3:
   1936    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   1937    j        ra
   1938     nop
   1939 
   1940 END(pixman_composite_over_8888_8888_8888_asm_mips)
   1941 
   1942 LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
   1943 /*
   1944 * a0 - dst  (a8r8g8b8)
   1945 * a1 - src  (a8r8g8b8)
   1946 * a2 - w
   1947 */
   1948 
   1949    SAVE_REGS_ON_STACK 0, s0, s1, s2
   1950    li           t4, 0x00ff00ff
   1951    beqz         a2, 3f
   1952     nop
   1953    addiu        t1, a2, -1
   1954    beqz         t1, 2f
   1955     nop
   1956 1:
   1957    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1958    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   1959    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   1960    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   1961    addiu        a1, a1, 8
   1962 
   1963    not          t5, t0
   1964    srl          t5, t5, 24
   1965    not          t6, t1
   1966    srl          t6, t6, 24
   1967 
   1968    or           t7, t5, t6
   1969    beqz         t7, 11f
   1970     or          t8, t0, t1
   1971    beqz         t8, 12f
   1972 
   1973    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t5, t6, t7, t8, t4, t9, s0, s1, s2, t2, t3
   1974 
   1975    addu_s.qb    t0, t7, t0
   1976    addu_s.qb    t1, t8, t1
   1977 11:
   1978    sw           t0, 0(a0)
   1979    sw           t1, 4(a0)
   1980 12:
   1981    addiu        a2, a2, -2
   1982    addiu        t1, a2, -1
   1983    bgtz         t1, 1b
   1984     addiu       a0, a0, 8
   1985 2:
   1986    beqz         a2, 3f
   1987     nop
   1988 
   1989    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   1990    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   1991    addiu        a1, a1, 4
   1992 
   1993    not          t2, t0
   1994    srl          t2, t2, 24
   1995 
   1996    beqz         t2, 21f
   1997     nop
   1998    beqz         t0, 3f
   1999 
   2000    MIPS_UN8x4_MUL_UN8 t1, t2, t3, t4, t5, t6, t7
   2001 
   2002    addu_s.qb    t0, t3, t0
   2003 21:
   2004    sw           t0, 0(a0)
   2005 
   2006 3:
   2007    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2008    j            ra
   2009     nop
   2010 
   2011 END(pixman_composite_over_8888_8888_asm_mips)
   2012 
   2013 LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
   2014 /*
   2015 * a0 - dst  (r5g6b5)
   2016 * a1 - src  (a8r8g8b8)
   2017 * a2 - w
   2018 */
   2019 
   2020    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
   2021    li           t4, 0x00ff00ff
   2022    li           s3, 0xf800f800
   2023    li           s4, 0x07e007e0
   2024    li           s5, 0x001F001F
   2025    beqz         a2, 3f
   2026     nop
   2027    addiu        t1, a2, -1
   2028    beqz         t1, 2f
   2029     nop
   2030 1:
   2031    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2032    lw           t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   2033    lhu          t2, 0(a0) /* t2 = destination (r5g6b5) */
   2034    lhu          t3, 2(a0) /* t3 = destination (r5g6b5) */
   2035    addiu        a1, a1, 8
   2036 
   2037    not          t5, t0
   2038    srl          t5, t5, 24
   2039    not          t6, t1
   2040    srl          t6, t6, 24
   2041 
   2042    or           t7, t5, t6
   2043    beqz         t7, 11f
   2044     or          t8, t0, t1
   2045    beqz         t8, 12f
   2046 
   2047    CONVERT_2x0565_TO_2x8888 t2, t3, s0, s1, s4, s5, t7, t8, t9, s2
   2048    MIPS_2xUN8x4_MUL_2xUN8   s0, s1, t5, t6, t7, t8, t4, t9, t2, t3, s2, s0, s1
   2049 
   2050    addu_s.qb    t0, t7, t0
   2051    addu_s.qb    t1, t8, t1
   2052 11:
   2053    CONVERT_2x8888_TO_2x0565 t0, t1, t7, t8, s3, s4, s5, t2, t3
   2054    sh           t7, 0(a0)
   2055    sh           t8, 2(a0)
   2056 12:
   2057    addiu        a2, a2, -2
   2058    addiu        t1, a2, -1
   2059    bgtz         t1, 1b
   2060     addiu       a0, a0, 4
   2061 2:
   2062    beqz         a2, 3f
   2063     nop
   2064 
   2065    lw           t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2066    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
   2067    addiu        a1, a1, 4
   2068 
   2069    not          t2, t0
   2070    srl          t2, t2, 24
   2071 
   2072    beqz         t2, 21f
   2073     nop
   2074    beqz         t0, 3f
   2075 
   2076    CONVERT_1x0565_TO_1x8888 t1, s0, t8, t9
   2077    MIPS_UN8x4_MUL_UN8       s0, t2, t3, t4, t5, t6, t7
   2078 
   2079    addu_s.qb    t0, t3, t0
   2080 21:
   2081    CONVERT_1x8888_TO_1x0565 t0, s0, t8, t9
   2082    sh           s0, 0(a0)
   2083 
   2084 3:
   2085    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
   2086    j            ra
   2087     nop
   2088 
   2089 END(pixman_composite_over_8888_0565_asm_mips)
   2090 
   2091 LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
   2092 /*
   2093 * a0 - dst  (r5g6b5)
   2094 * a1 - src  (32bit constant)
   2095 * a2 - w
   2096 */
   2097 
   2098    beqz         a2, 5f
   2099     nop
   2100 
   2101    not          t0, a1
   2102    srl          t0, t0, 24
   2103    bgtz         t0, 1f
   2104     nop
   2105    CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
   2106 0:
   2107    sh           t1, 0(a0)
   2108    addiu        a2, a2, -1
   2109    bgtz         a2, 0b
   2110     addiu       a0, a0, 2
   2111    j            ra
   2112     nop
   2113 
   2114 1:
   2115    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2116    li           t4, 0x00ff00ff
   2117    li           t5, 0xf800f800
   2118    li           t6, 0x07e007e0
   2119    li           t7, 0x001F001F
   2120    addiu        t1, a2, -1
   2121    beqz         t1, 3f
   2122     nop
   2123 2:
   2124    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
   2125    lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
   2126 
   2127    CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
   2128    MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
   2129    addu_s.qb                t1, t1, a1
   2130    addu_s.qb                t2, t2, a1
   2131    CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
   2132 
   2133    sh           t3, 0(a0)
   2134    sh           t8, 2(a0)
   2135 
   2136    addiu        a2, a2, -2
   2137    addiu        t1, a2, -1
   2138    bgtz         t1, 2b
   2139     addiu       a0, a0, 4
   2140 3:
   2141    beqz         a2, 4f
   2142     nop
   2143 
   2144    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
   2145 
   2146    CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
   2147    MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
   2148    addu_s.qb                t1, t1, a1
   2149    CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
   2150 
   2151    sh           t2, 0(a0)
   2152 
   2153 4:
   2154    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2155 5:
   2156    j            ra
   2157     nop
   2158 
   2159 END(pixman_composite_over_n_0565_asm_mips)
   2160 
   2161 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
   2162 /*
   2163 * a0 - dst  (a8r8g8b8)
   2164 * a1 - src  (32bit constant)
   2165 * a2 - w
   2166 */
   2167 
   2168    beqz         a2, 5f
   2169     nop
   2170 
   2171    not          t0, a1
   2172    srl          t0, t0, 24
   2173    bgtz         t0, 1f
   2174     nop
   2175 0:
   2176    sw           a1, 0(a0)
   2177    addiu        a2, a2, -1
   2178    bgtz         a2, 0b
   2179     addiu       a0, a0, 4
   2180    j            ra
   2181     nop
   2182 
   2183 1:
   2184    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2185    li           t4, 0x00ff00ff
   2186    addiu        t1, a2, -1
   2187    beqz         t1, 3f
   2188     nop
   2189 2:
   2190    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2191    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   2192 
   2193    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
   2194 
   2195    addu_s.qb    t7, t7, a1
   2196    addu_s.qb    t8, t8, a1
   2197 
   2198    sw           t7, 0(a0)
   2199    sw           t8, 4(a0)
   2200 
   2201    addiu        a2, a2, -2
   2202    addiu        t1, a2, -1
   2203    bgtz         t1, 2b
   2204     addiu       a0, a0, 8
   2205 3:
   2206    beqz         a2, 4f
   2207     nop
   2208 
   2209    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   2210 
   2211    MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
   2212 
   2213    addu_s.qb    t3, t3, a1
   2214 
   2215    sw           t3, 0(a0)
   2216 
   2217 4:
   2218    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2219 5:
   2220    j            ra
   2221     nop
   2222 
   2223 END(pixman_composite_over_n_8888_asm_mips)
   2224 
   2225 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
   2226 /*
   2227 * a0 - dst  (a8)
   2228 * a1 - src  (a8)
   2229 * a2 - mask (a8)
   2230 * a3 - w
   2231 */
   2232 
   2233    SAVE_REGS_ON_STACK 0, v0, v1
   2234    li                t9, 0x00ff00ff
   2235    beqz              a3, 3f
   2236     nop
   2237 
   2238    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
   2239    beqz              v0, 1f      /* branch if less than 4 src pixels */
   2240     nop
   2241 
   2242 0:
   2243    beqz              v0, 1f
   2244     addiu            v0, v0, -1
   2245    lbu               t0, 0(a2)
   2246    lbu               t1, 1(a2)
   2247    lbu               t2, 2(a2)
   2248    lbu               t3, 3(a2)
   2249    lbu               t4, 0(a0)
   2250    lbu               t5, 1(a0)
   2251    lbu               t6, 2(a0)
   2252    lbu               t7, 3(a0)
   2253 
   2254    addiu             a2, a2, 4
   2255 
   2256    precr_sra.ph.w    t1, t0, 0
   2257    precr_sra.ph.w    t3, t2, 0
   2258    precr_sra.ph.w    t5, t4, 0
   2259    precr_sra.ph.w    t7, t6, 0
   2260 
   2261    precr.qb.ph       t0, t3, t1
   2262    precr.qb.ph       t1, t7, t5
   2263 
   2264    lbu               t4, 0(a1)
   2265    lbu               v1, 1(a1)
   2266    lbu               t7, 2(a1)
   2267    lbu               t8, 3(a1)
   2268 
   2269    addiu             a1, a1, 4
   2270 
   2271    precr_sra.ph.w    v1, t4, 0
   2272    precr_sra.ph.w    t8, t7, 0
   2273 
   2274    muleu_s.ph.qbl    t2, t0, t8
   2275    muleu_s.ph.qbr    t3, t0, v1
   2276    shra_r.ph         t4, t2, 8
   2277    shra_r.ph         t5, t3, 8
   2278    and               t4, t4, t9
   2279    and               t5, t5, t9
   2280    addq.ph           t2, t2, t4
   2281    addq.ph           t3, t3, t5
   2282    shra_r.ph         t2, t2, 8
   2283    shra_r.ph         t3, t3, 8
   2284    precr.qb.ph       t0, t2, t3
   2285 
   2286    addu_s.qb         t2, t0, t1
   2287 
   2288    sb                t2, 0(a0)
   2289    srl               t2, t2, 8
   2290    sb                t2, 1(a0)
   2291    srl               t2, t2, 8
   2292    sb                t2, 2(a0)
   2293    srl               t2, t2, 8
   2294    sb                t2, 3(a0)
   2295    addiu             a3, a3, -4
   2296    b                 0b
   2297     addiu            a0, a0, 4
   2298 
   2299 1:
   2300    beqz              a3, 3f
   2301     nop
   2302 2:
   2303    lbu               t8, 0(a1)
   2304    lbu               t0, 0(a2)
   2305    lbu               t1, 0(a0)
   2306    addiu             a1, a1, 1
   2307    addiu             a2, a2, 1
   2308 
   2309    mul               t2, t0, t8
   2310    shra_r.ph         t3, t2, 8
   2311    andi              t3, t3, 0xff
   2312    addq.ph           t2, t2, t3
   2313    shra_r.ph         t2, t2, 8
   2314    andi              t2, t2, 0xff
   2315 
   2316    addu_s.qb         t2, t2, t1
   2317    sb                t2, 0(a0)
   2318    addiu             a3, a3, -1
   2319    bnez              a3, 2b
   2320     addiu            a0, a0, 1
   2321 
   2322 3:
   2323    RESTORE_REGS_FROM_STACK 0, v0, v1
   2324    j                 ra
   2325     nop
   2326 
   2327 END(pixman_composite_add_8_8_8_asm_mips)
   2328 
   2329 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
   2330 /*
   2331 * a0 - dst  (a8)
   2332 * a1 - src  (32bit constant)
   2333 * a2 - mask (a8)
   2334 * a3 - w
   2335 */
   2336 
   2337    SAVE_REGS_ON_STACK 0, v0
   2338    li                t9, 0x00ff00ff
   2339    beqz              a3, 3f
   2340     nop
   2341 
   2342    srl               v0, a3, 2   /* v0 = how many multiples of 4 dst pixels */
   2343    beqz              v0, 1f      /* branch if less than 4 src pixels */
   2344     nop
   2345 
   2346    srl               t8, a1, 24
   2347    replv.ph          t8, t8
   2348 
   2349 0:
   2350    beqz              v0, 1f
   2351     addiu            v0, v0, -1
   2352    lbu               t0, 0(a2)
   2353    lbu               t1, 1(a2)
   2354    lbu               t2, 2(a2)
   2355    lbu               t3, 3(a2)
   2356    lbu               t4, 0(a0)
   2357    lbu               t5, 1(a0)
   2358    lbu               t6, 2(a0)
   2359    lbu               t7, 3(a0)
   2360 
   2361    addiu             a2, a2, 4
   2362 
   2363    precr_sra.ph.w    t1, t0, 0
   2364    precr_sra.ph.w    t3, t2, 0
   2365    precr_sra.ph.w    t5, t4, 0
   2366    precr_sra.ph.w    t7, t6, 0
   2367 
   2368    precr.qb.ph       t0, t3, t1
   2369    precr.qb.ph       t1, t7, t5
   2370 
   2371    muleu_s.ph.qbl    t2, t0, t8
   2372    muleu_s.ph.qbr    t3, t0, t8
   2373    shra_r.ph         t4, t2, 8
   2374    shra_r.ph         t5, t3, 8
   2375    and               t4, t4, t9
   2376    and               t5, t5, t9
   2377    addq.ph           t2, t2, t4
   2378    addq.ph           t3, t3, t5
   2379    shra_r.ph         t2, t2, 8
   2380    shra_r.ph         t3, t3, 8
   2381    precr.qb.ph       t0, t2, t3
   2382 
   2383    addu_s.qb         t2, t0, t1
   2384 
   2385    sb                t2, 0(a0)
   2386    srl               t2, t2, 8
   2387    sb                t2, 1(a0)
   2388    srl               t2, t2, 8
   2389    sb                t2, 2(a0)
   2390    srl               t2, t2, 8
   2391    sb                t2, 3(a0)
   2392    addiu             a3, a3, -4
   2393    b                 0b
   2394     addiu            a0, a0, 4
   2395 
   2396 1:
   2397    beqz              a3, 3f
   2398     nop
   2399    srl               t8, a1, 24
   2400 2:
   2401    lbu               t0, 0(a2)
   2402    lbu               t1, 0(a0)
   2403    addiu             a2, a2, 1
   2404 
   2405    mul               t2, t0, t8
   2406    shra_r.ph         t3, t2, 8
   2407    andi              t3, t3, 0xff
   2408    addq.ph           t2, t2, t3
   2409    shra_r.ph         t2, t2, 8
   2410    andi              t2, t2, 0xff
   2411 
   2412    addu_s.qb         t2, t2, t1
   2413    sb                t2, 0(a0)
   2414    addiu             a3, a3, -1
   2415    bnez              a3, 2b
   2416     addiu            a0, a0, 1
   2417 
   2418 3:
   2419    RESTORE_REGS_FROM_STACK 0, v0
   2420    j                 ra
   2421     nop
   2422 
   2423 END(pixman_composite_add_n_8_8_asm_mips)
   2424 
   2425 LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
   2426 /*
   2427 * a0 - dst  (a8r8g8b8)
   2428 * a1 - src  (32bit constant)
   2429 * a2 - mask (a8)
   2430 * a3 - w
   2431 */
   2432 
   2433    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2434    li       t4, 0x00ff00ff
   2435    beqz     a3, 3f
   2436     nop
   2437    addiu    t1, a3, -1
   2438    beqz     t1, 2f
   2439     nop
   2440 1:
   2441                       /* a1 = source      (32bit constant) */
   2442    lbu      t0, 0(a2) /* t0 = mask        (a8) */
   2443    lbu      t1, 1(a2) /* t1 = mask        (a8) */
   2444    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2445    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   2446    addiu    a2, a2, 2
   2447 
   2448    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 a1, a1, \
   2449                                       t0, t1, \
   2450                                       t2, t3, \
   2451                                       t5, t6, \
   2452                                       t4, t7, t8, t9, s0, s1, s2
   2453 
   2454    sw       t5, 0(a0)
   2455    sw       t6, 4(a0)
   2456    addiu    a3, a3, -2
   2457    addiu    t1, a3, -1
   2458    bgtz     t1, 1b
   2459     addiu   a0, a0, 8
   2460 2:
   2461    beqz     a3, 3f
   2462     nop
   2463                       /* a1 = source      (32bit constant) */
   2464    lbu      t0, 0(a2) /* t0 = mask        (a8) */
   2465    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   2466 
   2467    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 a1, t0, t1, t2, t4, t3, t5, t6
   2468 
   2469    sw       t2, 0(a0)
   2470 3:
   2471    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2472    j        ra
   2473     nop
   2474 
   2475 END(pixman_composite_add_n_8_8888_asm_mips)
   2476 
   2477 LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
   2478 /*
   2479 * a0 - dst  (r5g6b5)
   2480 * a1 - src  (r5g6b5)
   2481 * a2 - mask (a8)
   2482 * a3 - w
   2483 */
   2484 
   2485    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
   2486    li       t4, 0xf800f800
   2487    li       t5, 0x07e007e0
   2488    li       t6, 0x001F001F
   2489    li       t7, 0x00ff00ff
   2490    beqz     a3, 3f
   2491     nop
   2492    addiu    t1, a3, -1
   2493    beqz     t1, 2f
   2494     nop
   2495 1:
   2496    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   2497    lhu      t1, 2(a1) /* t1 = source      (r5g6b5) */
   2498    lbu      t2, 0(a2) /* t2 = mask        (a8) */
   2499    lbu      t3, 1(a2) /* t3 = mask        (a8) */
   2500    lhu      t8, 0(a0) /* t8 = destination (r5g6b5) */
   2501    lhu      t9, 2(a0) /* t9 = destination (r5g6b5) */
   2502    addiu    a1, a1, 4
   2503    addiu    a2, a2, 2
   2504 
   2505    CONVERT_2x0565_TO_2x8888  t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
   2506    CONVERT_2x0565_TO_2x8888  t8, t9, s2, s3, t5, t6, s4, s5, s6, s7
   2507    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4  s0, s1, \
   2508                                        t2, t3, \
   2509                                        s2, s3, \
   2510                                        t0, t1, \
   2511                                        t7, s4, s5, s6, s7, t8, t9
   2512    CONVERT_2x8888_TO_2x0565  t0, t1, s0, s1, t4, t5, t6, s2, s3
   2513 
   2514    sh       s0, 0(a0)
   2515    sh       s1, 2(a0)
   2516    addiu    a3, a3, -2
   2517    addiu    t1, a3, -1
   2518    bgtz     t1, 1b
   2519     addiu   a0, a0, 4
   2520 2:
   2521    beqz     a3, 3f
   2522     nop
   2523    lhu      t0, 0(a1) /* t0 = source      (r5g6b5) */
   2524    lbu      t1, 0(a2) /* t1 = mask        (a8) */
   2525    lhu      t2, 0(a0) /* t2 = destination (r5g6b5) */
   2526 
   2527    CONVERT_1x0565_TO_1x8888  t0, t3, t4, t5
   2528    CONVERT_1x0565_TO_1x8888  t2, t4, t5, t6
   2529    MIPS_UN8x4_MUL_UN8_ADD_UN8x4  t3, t1, t4, t0, t7, t2, t5, t6
   2530    CONVERT_1x8888_TO_1x0565  t0, t3, t4, t5
   2531 
   2532    sh       t3, 0(a0)
   2533 3:
   2534    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
   2535    j        ra
   2536     nop
   2537 
   2538 END(pixman_composite_add_0565_8_0565_asm_mips)
   2539 
   2540 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
   2541 /*
   2542 * a0 - dst  (a8r8g8b8)
   2543 * a1 - src  (a8r8g8b8)
   2544 * a2 - mask (a8)
   2545 * a3 - w
   2546 */
   2547 
   2548    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2549    li       t4, 0x00ff00ff
   2550    beqz     a3, 3f
   2551     nop
   2552    addiu    t1, a3, -1
   2553    beqz     t1, 2f
   2554     nop
   2555 1:
   2556    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2557    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   2558    lbu      t2, 0(a2) /* t2 = mask        (a8) */
   2559    lbu      t3, 1(a2) /* t3 = mask        (a8) */
   2560    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
   2561    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
   2562    addiu    a1, a1, 8
   2563    addiu    a2, a2, 2
   2564 
   2565    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
   2566                                       t2, t3, \
   2567                                       t5, t6, \
   2568                                       t7, t8, \
   2569                                       t4, t9, s0, s1, s2, t0, t1
   2570 
   2571    sw       t7, 0(a0)
   2572    sw       t8, 4(a0)
   2573    addiu    a3, a3, -2
   2574    addiu    t1, a3, -1
   2575    bgtz     t1, 1b
   2576     addiu   a0, a0, 8
   2577 2:
   2578    beqz     a3, 3f
   2579     nop
   2580    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2581    lbu      t1, 0(a2) /* t1 = mask        (a8) */
   2582    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2583 
   2584    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
   2585 
   2586    sw       t3, 0(a0)
   2587 3:
   2588    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2589    j        ra
   2590     nop
   2591 
   2592 END(pixman_composite_add_8888_8_8888_asm_mips)
   2593 
   2594 LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
   2595 /*
   2596 * a0 - dst  (a8r8g8b8)
   2597 * a1 - src  (a8r8g8b8)
   2598 * a2 - mask (32bit constant)
   2599 * a3 - w
   2600 */
   2601 
   2602    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2603    li       t4, 0x00ff00ff
   2604    beqz     a3, 3f
   2605     nop
   2606    srl      a2, a2, 24
   2607    addiu    t1, a3, -1
   2608    beqz     t1, 2f
   2609     nop
   2610 1:
   2611    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2612    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   2613                       /* a2 = mask        (32bit constant) */
   2614    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2615    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   2616    addiu    a1, a1, 8
   2617 
   2618    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
   2619                                       a2, a2, \
   2620                                       t2, t3, \
   2621                                       t5, t6, \
   2622                                       t4, t7, t8, t9, s0, s1, s2
   2623 
   2624    sw       t5, 0(a0)
   2625    sw       t6, 4(a0)
   2626    addiu    a3, a3, -2
   2627    addiu    t1, a3, -1
   2628    bgtz     t1, 1b
   2629     addiu   a0, a0, 8
   2630 2:
   2631    beqz     a3, 3f
   2632     nop
   2633    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2634                       /* a2 = mask        (32bit constant) */
   2635    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   2636 
   2637    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, a2, t1, t3, t4, t5, t6, t7
   2638 
   2639    sw       t3, 0(a0)
   2640 3:
   2641    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2642    j        ra
   2643     nop
   2644 
   2645 END(pixman_composite_add_8888_n_8888_asm_mips)
   2646 
   2647 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
   2648 /*
   2649 * a0 - dst  (a8r8g8b8)
   2650 * a1 - src  (a8r8g8b8)
   2651 * a2 - mask (a8r8g8b8)
   2652 * a3 - w
   2653 */
   2654 
   2655    SAVE_REGS_ON_STACK 0, s0, s1, s2
   2656    li       t4, 0x00ff00ff
   2657    beqz     a3, 3f
   2658     nop
   2659    addiu    t1, a3, -1
   2660    beqz     t1, 2f
   2661     nop
   2662 1:
   2663    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2664    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
   2665    lw       t2, 0(a2) /* t2 = mask        (a8r8g8b8) */
   2666    lw       t3, 4(a2) /* t3 = mask        (a8r8g8b8) */
   2667    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
   2668    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
   2669    addiu    a1, a1, 8
   2670    addiu    a2, a2, 8
   2671    srl      t2, t2, 24
   2672    srl      t3, t3, 24
   2673 
   2674    MIPS_2xUN8x4_MUL_2xUN8_ADD_2xUN8x4 t0, t1, \
   2675                                       t2, t3, \
   2676                                       t5, t6, \
   2677                                       t7, t8, \
   2678                                       t4, t9, s0, s1, s2, t0, t1
   2679 
   2680    sw       t7, 0(a0)
   2681    sw       t8, 4(a0)
   2682    addiu    a3, a3, -2
   2683    addiu    t1, a3, -1
   2684    bgtz     t1, 1b
   2685     addiu   a0, a0, 8
   2686 2:
   2687    beqz     a3, 3f
   2688     nop
   2689    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
   2690    lw       t1, 0(a2) /* t1 = mask        (a8r8g8b8) */
   2691    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2692    srl      t1, t1, 24
   2693 
   2694    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t3, t4, t5, t6, t7
   2695 
   2696    sw       t3, 0(a0)
   2697 3:
   2698    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
   2699    j        ra
   2700     nop
   2701 
   2702 END(pixman_composite_add_8888_8888_8888_asm_mips)
   2703 
   2704 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
   2705 /*
   2706 * a0 - dst  (a8)
   2707 * a1 - src  (a8)
   2708 * a2 - w
   2709 */
   2710 
   2711    beqz              a2, 3f
   2712     nop
   2713    srl               t9, a2, 2   /* t9 = how many multiples of 4 dst pixels */
   2714    beqz              t9, 1f      /* branch if less than 4 src pixels */
   2715     nop
   2716 
   2717 0:
   2718    beqz              t9, 1f
   2719     addiu            t9, t9, -1
   2720    lbu               t0, 0(a1)
   2721    lbu               t1, 1(a1)
   2722    lbu               t2, 2(a1)
   2723    lbu               t3, 3(a1)
   2724    lbu               t4, 0(a0)
   2725    lbu               t5, 1(a0)
   2726    lbu               t6, 2(a0)
   2727    lbu               t7, 3(a0)
   2728 
   2729    addiu             a1, a1, 4
   2730 
   2731    precr_sra.ph.w    t1, t0, 0
   2732    precr_sra.ph.w    t3, t2, 0
   2733    precr_sra.ph.w    t5, t4, 0
   2734    precr_sra.ph.w    t7, t6, 0
   2735 
   2736    precr.qb.ph       t0, t3, t1
   2737    precr.qb.ph       t1, t7, t5
   2738 
   2739    addu_s.qb         t2, t0, t1
   2740 
   2741    sb                t2, 0(a0)
   2742    srl               t2, t2, 8
   2743    sb                t2, 1(a0)
   2744    srl               t2, t2, 8
   2745    sb                t2, 2(a0)
   2746    srl               t2, t2, 8
   2747    sb                t2, 3(a0)
   2748    addiu             a2, a2, -4
   2749    b                 0b
   2750     addiu            a0, a0, 4
   2751 
   2752 1:
   2753    beqz              a2, 3f
   2754     nop
   2755 2:
   2756    lbu               t0, 0(a1)
   2757    lbu               t1, 0(a0)
   2758    addiu             a1, a1, 1
   2759 
   2760    addu_s.qb         t2, t0, t1
   2761    sb                t2, 0(a0)
   2762    addiu             a2, a2, -1
   2763    bnez              a2, 2b
   2764     addiu            a0, a0, 1
   2765 
   2766 3:
   2767    j                 ra
   2768     nop
   2769 
   2770 END(pixman_composite_add_8_8_asm_mips)
   2771 
   2772 LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
   2773 /*
   2774 * a0 - dst (a8r8g8b8)
   2775 * a1 - src (a8r8g8b8)
   2776 * a2 - w
   2777 */
   2778 
   2779    beqz         a2, 4f
   2780     nop
   2781 
   2782    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
   2783    beqz         t9, 3f         /* branch if less than 4 src pixels */
   2784     nop
   2785 1:
   2786    addiu        t9, t9, -1
   2787    beqz         t9, 2f
   2788     addiu       a2, a2, -4
   2789 
   2790    lw           t0, 0(a1)
   2791    lw           t1, 4(a1)
   2792    lw           t2, 8(a1)
   2793    lw           t3, 12(a1)
   2794    lw           t4, 0(a0)
   2795    lw           t5, 4(a0)
   2796    lw           t6, 8(a0)
   2797    lw           t7, 12(a0)
   2798    addiu        a1, a1, 16
   2799 
   2800    addu_s.qb    t4, t4, t0
   2801    addu_s.qb    t5, t5, t1
   2802    addu_s.qb    t6, t6, t2
   2803    addu_s.qb    t7, t7, t3
   2804 
   2805    sw           t4, 0(a0)
   2806    sw           t5, 4(a0)
   2807    sw           t6, 8(a0)
   2808    sw           t7, 12(a0)
   2809    b            1b
   2810     addiu       a0, a0, 16
   2811 2:
   2812    lw           t0, 0(a1)
   2813    lw           t1, 4(a1)
   2814    lw           t2, 8(a1)
   2815    lw           t3, 12(a1)
   2816    lw           t4, 0(a0)
   2817    lw           t5, 4(a0)
   2818    lw           t6, 8(a0)
   2819    lw           t7, 12(a0)
   2820    addiu        a1, a1, 16
   2821 
   2822    addu_s.qb    t4, t4, t0
   2823    addu_s.qb    t5, t5, t1
   2824    addu_s.qb    t6, t6, t2
   2825    addu_s.qb    t7, t7, t3
   2826 
   2827    sw           t4, 0(a0)
   2828    sw           t5, 4(a0)
   2829    sw           t6, 8(a0)
   2830    sw           t7, 12(a0)
   2831 
   2832    beqz         a2, 4f
   2833     addiu       a0, a0, 16
   2834 3:
   2835    lw           t0, 0(a1)
   2836    lw           t1, 0(a0)
   2837    addiu        a1, a1, 4
   2838    addiu        a2, a2, -1
   2839    addu_s.qb    t1, t1, t0
   2840    sw           t1, 0(a0)
   2841    bnez         a2, 3b
   2842     addiu       a0, a0, 4
   2843 4:
   2844    jr           ra
   2845     nop
   2846 
   2847 END(pixman_composite_add_8888_8888_asm_mips)
   2848 
   2849 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
   2850 /*
   2851 * a0 - dst  (r5g6b5)
   2852 * a1 - src  (a8)
   2853 * a2 - w
   2854 */
   2855 
   2856    beqz     a2, 4f
   2857     nop
   2858 
   2859    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
   2860    li       t2, 0xf800f800
   2861    li       t3, 0x07e007e0
   2862    li       t4, 0x001F001F
   2863    li       t5, 0x00ff00ff
   2864 
   2865    addiu    t1, a2, -1
   2866    beqz     t1, 2f
   2867     nop
   2868 1:
   2869    lbu      t0, 0(a1) /* t0 = source      (a8) */
   2870    lbu      t1, 1(a1) /* t1 = source      (a8) */
   2871    lhu      t6, 0(a0) /* t6 = destination (r5g6b5) */
   2872    lhu      t7, 2(a0) /* t7 = destination (r5g6b5) */
   2873    addiu    a1, a1, 2
   2874 
   2875    not      t0, t0
   2876    not      t1, t1
   2877    andi     t0, 0xff  /* t0 = neg source1 */
   2878    andi     t1, 0xff  /* t1 = neg source2 */
   2879    CONVERT_2x0565_TO_2x8888 t6, t7, t8, t9, t3, t4, s0, s1, s2, s3
   2880    MIPS_2xUN8x4_MUL_2xUN8   t8, t9, t0, t1, t6, t7, t5, s0, s1, s2, s3, t8, t9
   2881    CONVERT_2x8888_TO_2x0565 t6, t7, t8, t9, t2, t3, t4, s0, s1
   2882 
   2883    sh       t8, 0(a0)
   2884    sh       t9, 2(a0)
   2885    addiu    a2, a2, -2
   2886    addiu    t1, a2, -1
   2887    bgtz     t1, 1b
   2888     addiu   a0, a0, 4
   2889 2:
   2890    beqz     a2, 3f
   2891     nop
   2892    lbu      t0, 0(a1) /* t0 = source      (a8) */
   2893    lhu      t1, 0(a0) /* t1 = destination (r5g6b5) */
   2894 
   2895    not      t0, t0
   2896    andi     t0, 0xff  /* t0 = neg source */
   2897    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t4
   2898    MIPS_UN8x4_MUL_UN8        t2, t0, t1, t5, t3, t4, t6
   2899    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t4
   2900 
   2901    sh       t2, 0(a0)
   2902 3:
   2903    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
   2904 4:
   2905    j        ra
   2906     nop
   2907 
   2908 END(pixman_composite_out_reverse_8_0565_asm_mips)
   2909 
   2910 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
   2911 /*
   2912 * a0 - dst  (a8r8g8b8)
   2913 * a1 - src  (a8)
   2914 * a2 - w
   2915 */
   2916 
   2917    beqz     a2, 3f
   2918     nop
   2919    li       t4, 0x00ff00ff
   2920    addiu    t1, a2, -1
   2921    beqz     t1, 2f
   2922     nop
   2923 1:
   2924    lbu      t0, 0(a1) /* t0 = source      (a8) */
   2925    lbu      t1, 1(a1) /* t1 = source      (a8) */
   2926    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
   2927    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
   2928    addiu    a1, a1, 2
   2929    not      t0, t0
   2930    not      t1, t1
   2931    andi     t0, 0xff  /* t0 = neg source */
   2932    andi     t1, 0xff  /* t1 = neg source */
   2933 
   2934    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t1, t5, t6, t4, t7, t8, t9, t2, t3, t0
   2935 
   2936    sw       t5, 0(a0)
   2937    sw       t6, 4(a0)
   2938    addiu    a2, a2, -2
   2939    addiu    t1, a2, -1
   2940    bgtz     t1, 1b
   2941     addiu   a0, a0, 8
   2942 2:
   2943    beqz     a2, 3f
   2944     nop
   2945    lbu      t0, 0(a1) /* t0 = source      (a8) */
   2946    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
   2947    not      t0, t0
   2948    andi     t0, 0xff  /* t0 = neg source */
   2949 
   2950    MIPS_UN8x4_MUL_UN8 t1, t0, t2, t4, t3, t5, t6
   2951 
   2952    sw       t2, 0(a0)
   2953 3:
   2954    j        ra
   2955     nop
   2956 
   2957 END(pixman_composite_out_reverse_8_8888_asm_mips)
   2958 
   2959 LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
   2960 /*
   2961 * a0 - dst  (a8r8g8b8)
   2962 * a1 - src  (32bit constant)
   2963 * a2 - w
   2964 */
   2965 
   2966    beqz              a2, 5f
   2967     nop
   2968 
   2969    SAVE_REGS_ON_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
   2970    li                t0, 0x00ff00ff
   2971    srl               t9, a2, 2   /* t9 = how many multiples of 4 src pixels */
   2972    beqz              t9, 2f      /* branch if less than 4 src pixels */
   2973     nop
   2974 1:
   2975    beqz              t9, 2f
   2976     addiu            t9, t9, -1
   2977 
   2978    lw                t1, 0(a0)
   2979    lw                t2, 4(a0)
   2980    lw                t3, 8(a0)
   2981    lw                t4, 12(a0)
   2982 
   2983    addiu             a2, a2, -4
   2984 
   2985    not               t5, t1
   2986    not               t6, t2
   2987    not               t7, t3
   2988    not               t8, t4
   2989    srl               t5, t5, 24
   2990    srl               t6, t6, 24
   2991    srl               t7, t7, 24
   2992    srl               t8, t8, 24
   2993    replv.ph          t5, t5
   2994    replv.ph          t6, t6
   2995    replv.ph          t7, t7
   2996    replv.ph          t8, t8
   2997    muleu_s.ph.qbl    s0, a1, t5
   2998    muleu_s.ph.qbr    s1, a1, t5
   2999    muleu_s.ph.qbl    s2, a1, t6
   3000    muleu_s.ph.qbr    s3, a1, t6
   3001    muleu_s.ph.qbl    s4, a1, t7
   3002    muleu_s.ph.qbr    s5, a1, t7
   3003    muleu_s.ph.qbl    s6, a1, t8
   3004    muleu_s.ph.qbr    s7, a1, t8
   3005 
   3006    shra_r.ph         t5, s0, 8
   3007    shra_r.ph         t6, s1, 8
   3008    shra_r.ph         t7, s2, 8
   3009    shra_r.ph         t8, s3, 8
   3010    and               t5, t5, t0
   3011    and               t6, t6, t0
   3012    and               t7, t7, t0
   3013    and               t8, t8, t0
   3014    addq.ph           s0, s0, t5
   3015    addq.ph           s1, s1, t6
   3016    addq.ph           s2, s2, t7
   3017    addq.ph           s3, s3, t8
   3018    shra_r.ph         s0, s0, 8
   3019    shra_r.ph         s1, s1, 8
   3020    shra_r.ph         s2, s2, 8
   3021    shra_r.ph         s3, s3, 8
   3022    shra_r.ph         t5, s4, 8
   3023    shra_r.ph         t6, s5, 8
   3024    shra_r.ph         t7, s6, 8
   3025    shra_r.ph         t8, s7, 8
   3026    and               t5, t5, t0
   3027    and               t6, t6, t0
   3028    and               t7, t7, t0
   3029    and               t8, t8, t0
   3030    addq.ph           s4, s4, t5
   3031    addq.ph           s5, s5, t6
   3032    addq.ph           s6, s6, t7
   3033    addq.ph           s7, s7, t8
   3034    shra_r.ph         s4, s4, 8
   3035    shra_r.ph         s5, s5, 8
   3036    shra_r.ph         s6, s6, 8
   3037    shra_r.ph         s7, s7, 8
   3038 
   3039    precr.qb.ph       t5, s0, s1
   3040    precr.qb.ph       t6, s2, s3
   3041    precr.qb.ph       t7, s4, s5
   3042    precr.qb.ph       t8, s6, s7
   3043    addu_s.qb         t5, t1, t5
   3044    addu_s.qb         t6, t2, t6
   3045    addu_s.qb         t7, t3, t7
   3046    addu_s.qb         t8, t4, t8
   3047 
   3048    sw                t5, 0(a0)
   3049    sw                t6, 4(a0)
   3050    sw                t7, 8(a0)
   3051    sw                t8, 12(a0)
   3052    b                 1b
   3053     addiu            a0, a0, 16
   3054 
   3055 2:
   3056    beqz              a2, 4f
   3057     nop
   3058 3:
   3059    lw                t1, 0(a0)
   3060 
   3061    not               t2, t1
   3062    srl               t2, t2, 24
   3063    replv.ph          t2, t2
   3064 
   3065    muleu_s.ph.qbl    t4, a1, t2
   3066    muleu_s.ph.qbr    t5, a1, t2
   3067    shra_r.ph         t6, t4, 8
   3068    shra_r.ph         t7, t5, 8
   3069 
   3070    and               t6,t6,t0
   3071    and               t7,t7,t0
   3072 
   3073    addq.ph           t8, t4, t6
   3074    addq.ph           t9, t5, t7
   3075 
   3076    shra_r.ph         t8, t8, 8
   3077    shra_r.ph         t9, t9, 8
   3078 
   3079    precr.qb.ph       t9, t8, t9
   3080 
   3081    addu_s.qb         t9, t1, t9
   3082    sw                t9, 0(a0)
   3083 
   3084    addiu             a2, a2, -1
   3085    bnez              a2, 3b
   3086     addiu            a0, a0, 4
   3087 4:
   3088    RESTORE_REGS_FROM_STACK 20, s0, s1, s2, s3, s4, s5, s6, s7
   3089 5:
   3090    j                 ra
   3091     nop
   3092 
   3093 END(pixman_composite_over_reverse_n_8888_asm_mips)
   3094 
   3095 LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
   3096 /*
   3097 * a0 - dst  (a8)
   3098 * a1 - src  (32bit constant)
   3099 * a2 - w
   3100 */
   3101 
   3102    li                t9, 0x00ff00ff
   3103    beqz              a2, 3f
   3104     nop
   3105    srl               t7, a2, 2   /* t7 = how many multiples of 4 dst pixels */
   3106    beqz              t7, 1f      /* branch if less than 4 src pixels */
   3107     nop
   3108 
   3109    srl               t8, a1, 24
   3110    replv.ph          t8, t8
   3111 
   3112 0:
   3113    beqz              t7, 1f
   3114     addiu            t7, t7, -1
   3115    lbu               t0, 0(a0)
   3116    lbu               t1, 1(a0)
   3117    lbu               t2, 2(a0)
   3118    lbu               t3, 3(a0)
   3119 
   3120    precr_sra.ph.w    t1, t0, 0
   3121    precr_sra.ph.w    t3, t2, 0
   3122    precr.qb.ph       t0, t3, t1
   3123 
   3124    muleu_s.ph.qbl    t2, t0, t8
   3125    muleu_s.ph.qbr    t3, t0, t8
   3126    shra_r.ph         t4, t2, 8
   3127    shra_r.ph         t5, t3, 8
   3128    and               t4, t4, t9
   3129    and               t5, t5, t9
   3130    addq.ph           t2, t2, t4
   3131    addq.ph           t3, t3, t5
   3132    shra_r.ph         t2, t2, 8
   3133    shra_r.ph         t3, t3, 8
   3134    precr.qb.ph       t2, t2, t3
   3135 
   3136    sb                t2, 0(a0)
   3137    srl               t2, t2, 8
   3138    sb                t2, 1(a0)
   3139    srl               t2, t2, 8
   3140    sb                t2, 2(a0)
   3141    srl               t2, t2, 8
   3142    sb                t2, 3(a0)
   3143    addiu             a2, a2, -4
   3144    b                 0b
   3145     addiu            a0, a0, 4
   3146 
   3147 1:
   3148    beqz              a2, 3f
   3149     nop
   3150    srl               t8, a1, 24
   3151 2:
   3152    lbu               t0, 0(a0)
   3153 
   3154    mul               t2, t0, t8
   3155    shra_r.ph         t3, t2, 8
   3156    andi              t3, t3, 0x00ff
   3157    addq.ph           t2, t2, t3
   3158    shra_r.ph         t2, t2, 8
   3159 
   3160    sb                t2, 0(a0)
   3161    addiu             a2, a2, -1
   3162    bnez              a2, 2b
   3163     addiu            a0, a0, 1
   3164 
   3165 3:
   3166    j                 ra
   3167     nop
   3168 
   3169 END(pixman_composite_in_n_8_asm_mips)
   3170 
   3171 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
   3172 /*
   3173 * a0     - dst  (a8r8g8b8)
   3174 * a1     - src  (a8r8g8b8)
   3175 * a2     - w
   3176 * a3     - vx
   3177 * 16(sp) - unit_x
   3178 */
   3179 
   3180    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
   3181    lw       t8, 16(sp) /* t8 = unit_x */
   3182    li       t6, 0x00ff00ff
   3183    beqz     a2, 3f
   3184     nop
   3185    addiu    t1, a2, -1
   3186    beqz     t1, 2f
   3187     nop
   3188 1:
   3189    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3190    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3191    addu     t0, a1, t0
   3192    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3193    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3194 
   3195    sra      t1, a3, 16 /* t0 = vx >> 16 */
   3196    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3197    addu     t1, a1, t1
   3198    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
   3199    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3200 
   3201    lw       t2, 0(a0)  /* t2 = destination (a8r8g8b8) */
   3202    lw       t3, 4(a0)  /* t3 = destination (a8r8g8b8) */
   3203 
   3204    OVER_2x8888_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t9, s0, s1, s2, s3
   3205 
   3206    sw       t4, 0(a0)
   3207    sw       t5, 4(a0)
   3208    addiu    a2, a2, -2
   3209    addiu    t1, a2, -1
   3210    bgtz     t1, 1b
   3211     addiu   a0, a0, 8
   3212 2:
   3213    beqz     a2, 3f
   3214     nop
   3215    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3216    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3217    addu     t0, a1, t0
   3218    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3219    lw       t1, 0(a0)  /* t1 = destination (a8r8g8b8) */
   3220    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3221 
   3222    OVER_8888_8888 t0, t1, t2, t6, t4, t5, t3, t7
   3223 
   3224    sw       t2, 0(a0)
   3225 3:
   3226    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
   3227    j        ra
   3228     nop
   3229 
   3230 END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
   3231 
   3232 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
   3233 /*
   3234 * a0     - dst  (r5g6b5)
   3235 * a1     - src  (a8r8g8b8)
   3236 * a2     - w
   3237 * a3     - vx
   3238 * 16(sp) - unit_x
   3239 */
   3240 
   3241    SAVE_REGS_ON_STACK 24, s0, s1, s2, s3, s4, v0, v1
   3242    lw       t8, 40(sp) /* t8 = unit_x */
   3243    li       t4, 0x00ff00ff
   3244    li       t5, 0xf800f800
   3245    li       t6, 0x07e007e0
   3246    li       t7, 0x001F001F
   3247    beqz     a2, 3f
   3248     nop
   3249    addiu    t1, a2, -1
   3250    beqz     t1, 2f
   3251     nop
   3252 1:
   3253    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3254    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3255    addu     t0, a1, t0
   3256    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3257    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3258    sra      t1, a3, 16 /* t0 = vx >> 16 */
   3259    sll      t1, t1, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3260    addu     t1, a1, t1
   3261    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
   3262    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3263    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
   3264    lhu      t3, 2(a0)  /* t3 = destination (r5g6b5) */
   3265 
   3266    CONVERT_2x0565_TO_2x8888 t2, t3, v0, v1, t6, t7, s0, s1, s2, s3
   3267    OVER_2x8888_2x8888       t0, t1, v0, v1, t2, t3, t4, t9, s0, s1, s2, s3, s4
   3268    CONVERT_2x8888_TO_2x0565 t2, t3, v0, v1, t5, t6, t7, t9, s2
   3269 
   3270    sh       v0, 0(a0)
   3271    sh       v1, 2(a0)
   3272    addiu    a2, a2, -2
   3273    addiu    t1, a2, -1
   3274    bgtz     t1, 1b
   3275     addiu   a0, a0, 4
   3276 2:
   3277    beqz     a2, 3f
   3278     nop
   3279    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3280    sll      t0, t0, 2  /* t0 = t0 * 4 (a8r8g8b8) */
   3281    addu     t0, a1, t0
   3282    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3283    lhu      t1, 0(a0)  /* t1 = destination (r5g6b5) */
   3284    addu     a3, a3, t8 /* a3 = vx + unit_x */
   3285 
   3286    CONVERT_1x0565_TO_1x8888 t1, t2, t5, t6
   3287    OVER_8888_8888           t0, t2, t1, t4, t3, t5, t6, t7
   3288    CONVERT_1x8888_TO_1x0565 t1, t2, t5, t6
   3289 
   3290    sh       t2, 0(a0)
   3291 3:
   3292    RESTORE_REGS_FROM_STACK 24, s0, s1, s2, s3, s4, v0, v1
   3293    j        ra
   3294     nop
   3295 
   3296 END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
   3297 
   3298 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
   3299 /*
   3300 * a0     - dst (a8r8g8b8)
   3301 * a1     - src (r5g6b5)
   3302 * a2     - w
   3303 * a3     - vx
   3304 * 16(sp) - unit_x
   3305 */
   3306 
   3307    SAVE_REGS_ON_STACK 0, v0
   3308    beqz     a2, 3f
   3309     nop
   3310 
   3311    lw       v0, 16(sp) /* v0 = unit_x */
   3312    addiu    t1, a2, -1
   3313    beqz     t1, 2f
   3314     nop
   3315 
   3316    li       t4, 0x07e007e0
   3317    li       t5, 0x001F001F
   3318 1:
   3319    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3320    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
   3321    addu     t0, a1, t0
   3322    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
   3323    addu     a3, a3, v0 /* a3 = vx + unit_x */
   3324    sra      t1, a3, 16 /* t1 = vx >> 16 */
   3325    sll      t1, t1, 1  /* t1 = t1 * 2 ((r5g6b5)) */
   3326    addu     t1, a1, t1
   3327    lhu      t1, 0(t1)  /* t1 = source ((r5g6b5)) */
   3328    addu     a3, a3, v0 /* a3 = vx + unit_x */
   3329    addiu    a2, a2, -2
   3330 
   3331    CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
   3332 
   3333    sw       t2, 0(a0)
   3334    sw       t3, 4(a0)
   3335 
   3336    addiu    t2, a2, -1
   3337    bgtz     t2, 1b
   3338     addiu   a0, a0, 8
   3339 2:
   3340    beqz     a2, 3f
   3341     nop
   3342    sra      t0, a3, 16 /* t0 = vx >> 16 */
   3343    sll      t0, t0, 1  /* t0 = t0 * 2 ((r5g6b5)) */
   3344    addu     t0, a1, t0
   3345    lhu      t0, 0(t0)  /* t0 = source ((r5g6b5)) */
   3346 
   3347    CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
   3348 
   3349    sw       t1, 0(a0)
   3350 3:
   3351    RESTORE_REGS_FROM_STACK 0, v0
   3352    j        ra
   3353     nop
   3354 
   3355 END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
   3356 
   3357 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
   3358 /*
   3359 * a0     - dst  (r5g6b5)
   3360 * a1     - src  (a8r8g8b8)
   3361 * a2     - mask (a8)
   3362 * a3     - w
   3363 * 16(sp) - vx
   3364 * 20(sp) - unit_x
   3365 */
   3366    beqz     a3, 4f
   3367     nop
   3368 
   3369    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
   3370    lw       v0, 36(sp) /* v0 = vx */
   3371    lw       v1, 40(sp) /* v1 = unit_x */
   3372    li       t6, 0x00ff00ff
   3373    li       t7, 0xf800f800
   3374    li       t8, 0x07e007e0
   3375    li       t9, 0x001F001F
   3376 
   3377    addiu    t1, a3, -1
   3378    beqz     t1, 2f
   3379     nop
   3380 1:
   3381    sra      t0, v0, 16 /* t0 = vx >> 16 */
   3382    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
   3383    addu     t0, a1, t0
   3384    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3385    addu     v0, v0, v1 /* v0 = vx + unit_x */
   3386    sra      t1, v0, 16 /* t1 = vx >> 16 */
   3387    sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
   3388    addu     t1, a1, t1
   3389    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
   3390    addu     v0, v0, v1 /* v0 = vx + unit_x */
   3391    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
   3392    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
   3393    lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
   3394    lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
   3395    addiu    a2, a2, 2
   3396 
   3397    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
   3398    OVER_2x8888_2x8_2x8888   t0, t1, \
   3399                             t2, t3, \
   3400                             s0, s1, \
   3401                             t4, t5, \
   3402                             t6, s2, s3, s4, s5, t2, t3
   3403    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
   3404 
   3405    sh       s0, 0(a0)
   3406    sh       s1, 2(a0)
   3407    addiu    a3, a3, -2
   3408    addiu    t1, a3, -1
   3409    bgtz     t1, 1b
   3410     addiu   a0, a0, 4
   3411 2:
   3412    beqz     a3, 3f
   3413     nop
   3414    sra      t0, v0, 16 /* t0 = vx >> 16 */
   3415    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
   3416    addu     t0, a1, t0
   3417    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
   3418    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
   3419    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
   3420 
   3421    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
   3422    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
   3423    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
   3424 
   3425    sh       t3, 0(a0)
   3426 3:
   3427    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
   3428 4:
   3429    j        ra
   3430     nop
   3431 
   3432 END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
   3433 
   3434 LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
   3435 /*
   3436 * a0     - dst  (r5g6b5)
   3437 * a1     - src  (r5g6b5)
   3438 * a2     - mask (a8)
   3439 * a3     - w
   3440 * 16(sp) - vx
   3441 * 20(sp) - unit_x
   3442 */
   3443 
   3444    beqz     a3, 4f
   3445     nop
   3446    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
   3447    lw       v0, 36(sp) /* v0 = vx */
   3448    lw       v1, 40(sp) /* v1 = unit_x */
   3449    li       t4, 0xf800f800
   3450    li       t5, 0x07e007e0
   3451    li       t6, 0x001F001F
   3452    li       t7, 0x00ff00ff
   3453 
   3454    addiu    t1, a3, -1
   3455    beqz     t1, 2f
   3456     nop
   3457 1:
   3458    sra      t0, v0, 16 /* t0 = vx >> 16 */
   3459    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
   3460    addu     t0, a1, t0
   3461    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
   3462    addu     v0, v0, v1 /* v0 = vx + unit_x */
   3463    sra      t1, v0, 16 /* t1 = vx >> 16 */
   3464    sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
   3465    addu     t1, a1, t1
   3466    lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
   3467    addu     v0, v0, v1 /* v0 = vx + unit_x */
   3468    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
   3469    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
   3470    lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
   3471    lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
   3472    addiu    a2, a2, 2
   3473 
   3474    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
   3475    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
   3476    OVER_2x8888_2x8_2x8888   s0, s1, \
   3477                             t2, t3, \
   3478                             s2, s3, \
   3479                             t0, t1, \
   3480                             t7, t8, t9, s4, s5, s0, s1
   3481    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
   3482 
   3483    sh       s0, 0(a0)
   3484    sh       s1, 2(a0)
   3485    addiu    a3, a3, -2
   3486    addiu    t1, a3, -1
   3487    bgtz     t1, 1b
   3488     addiu   a0, a0, 4
   3489 2:
   3490    beqz     a3, 3f
   3491     nop
   3492    sra      t0, v0, 16 /* t0 = vx >> 16 */
   3493    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
   3494    addu     t0, a1, t0
   3495 
   3496    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
   3497    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
   3498    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
   3499 
   3500    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
   3501    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
   3502    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
   3503    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
   3504 
   3505    sh       t3, 0(a0)
   3506 3:
   3507    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
   3508 4:
   3509    j        ra
   3510     nop
   3511 
   3512 END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
   3513 
   3514 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
   3515 /*
   3516 * a0     - *dst
   3517 * a1     - *src_top
   3518 * a2     - *src_bottom
   3519 * a3     - w
   3520 * 16(sp) - wt
   3521 * 20(sp) - wb
   3522 * 24(sp) - vx
   3523 * 28(sp) - unit_x
   3524 */
   3525 
   3526    beqz     a3, 1f
   3527     nop
   3528 
   3529    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3530 
   3531    lw       s0, 36(sp)     /* s0 = wt */
   3532    lw       s1, 40(sp)     /* s1 = wb */
   3533    lw       s2, 44(sp)     /* s2 = vx */
   3534    lw       s3, 48(sp)     /* s3 = unit_x */
   3535    li       v0, BILINEAR_INTERPOLATION_RANGE
   3536 
   3537    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3538    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3539 0:
   3540    andi     t4, s2, 0xffff /* t4 = (short)vx */
   3541    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3542    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3543 
   3544    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3545    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
   3546    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3547    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
   3548 
   3549    sra      t9, s2, 16
   3550    sll      t9, t9, 2
   3551    addiu    t8, t9, 4
   3552    lwx      t0, t9(a1)     /* t0 = tl */
   3553    lwx      t1, t8(a1)     /* t1 = tr */
   3554    addiu    a3, a3, -1
   3555    lwx      t2, t9(a2)     /* t2 = bl */
   3556    lwx      t3, t8(a2)     /* t3 = br */
   3557 
   3558    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3559 
   3560    addu     s2, s2, s3     /* vx += unit_x; */
   3561    sw       t0, 0(a0)
   3562    bnez     a3, 0b
   3563     addiu   a0, a0, 4
   3564 
   3565    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3566 1:
   3567    j        ra
   3568     nop
   3569 
   3570 END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
   3571 
   3572 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
   3573 /*
   3574 * a0     - *dst
   3575 * a1     - *src_top
   3576 * a2     - *src_bottom
   3577 * a3     - w
   3578 * 16(sp) - wt
   3579 * 20(sp) - wb
   3580 * 24(sp) - vx
   3581 * 28(sp) - unit_x
   3582 */
   3583 
   3584    beqz     a3, 1f
   3585     nop
   3586 
   3587    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3588 
   3589    lw       s0, 36(sp)     /* s0 = wt */
   3590    lw       s1, 40(sp)     /* s1 = wb */
   3591    lw       s2, 44(sp)     /* s2 = vx */
   3592    lw       s3, 48(sp)     /* s3 = unit_x */
   3593    li       v0, BILINEAR_INTERPOLATION_RANGE
   3594 
   3595    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3596    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3597 0:
   3598    andi     t4, s2, 0xffff /* t4 = (short)vx */
   3599    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3600    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3601 
   3602    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3603    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
   3604    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3605    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
   3606 
   3607    sra      t9, s2, 16
   3608    sll      t9, t9, 2
   3609    addiu    t8, t9, 4
   3610    lwx      t0, t9(a1)     /* t0 = tl */
   3611    lwx      t1, t8(a1)     /* t1 = tr */
   3612    addiu    a3, a3, -1
   3613    lwx      t2, t9(a2)     /* t2 = bl */
   3614    lwx      t3, t8(a2)     /* t3 = br */
   3615 
   3616    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3617    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
   3618 
   3619    addu     s2, s2, s3     /* vx += unit_x; */
   3620    sh       t1, 0(a0)
   3621    bnez     a3, 0b
   3622     addiu   a0, a0, 2
   3623 
   3624    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3625 1:
   3626    j        ra
   3627     nop
   3628 
   3629 END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
   3630 
   3631 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
   3632 /*
   3633 * a0     - *dst
   3634 * a1     - *src_top
   3635 * a2     - *src_bottom
   3636 * a3     - w
   3637 * 16(sp) - wt
   3638 * 20(sp) - wb
   3639 * 24(sp) - vx
   3640 * 28(sp) - unit_x
   3641 */
   3642 
   3643    beqz     a3, 1f
   3644     nop
   3645 
   3646    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3647 
   3648    lw       s0, 44(sp)     /* s0 = wt */
   3649    lw       s1, 48(sp)     /* s1 = wb */
   3650    lw       s2, 52(sp)     /* s2 = vx */
   3651    lw       s3, 56(sp)     /* s3 = unit_x */
   3652    li       v0, BILINEAR_INTERPOLATION_RANGE
   3653    li       v1, 0x07e007e0
   3654    li       s8, 0x001f001f
   3655 
   3656    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3657    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3658 0:
   3659    andi     t4, s2, 0xffff /* t4 = (short)vx */
   3660    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3661    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3662 
   3663    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3664    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
   3665    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3666    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
   3667 
   3668    sra      t9, s2, 16
   3669    sll      t9, t9, 1
   3670    addiu    t8, t9, 2
   3671    lhx      t0, t9(a1)     /* t0 = tl */
   3672    lhx      t1, t8(a1)     /* t1 = tr */
   3673    andi     t1, t1, 0xffff
   3674    addiu    a3, a3, -1
   3675    lhx      t2, t9(a2)     /* t2 = bl */
   3676    lhx      t3, t8(a2)     /* t3 = br */
   3677    andi     t3, t3, 0xffff
   3678 
   3679    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
   3680    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
   3681    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3682 
   3683    addu     s2, s2, s3     /* vx += unit_x; */
   3684    sw       t0, 0(a0)
   3685    bnez     a3, 0b
   3686     addiu   a0, a0, 4
   3687 
   3688    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3689 1:
   3690    j        ra
   3691     nop
   3692 
   3693 END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
   3694 
   3695 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
   3696 /*
   3697 * a0     - *dst
   3698 * a1     - *src_top
   3699 * a2     - *src_bottom
   3700 * a3     - w
   3701 * 16(sp) - wt
   3702 * 20(sp) - wb
   3703 * 24(sp) - vx
   3704 * 28(sp) - unit_x
   3705 */
   3706 
   3707    beqz     a3, 1f
   3708     nop
   3709 
   3710    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3711 
   3712    lw       s0, 44(sp)     /* s0 = wt */
   3713    lw       s1, 48(sp)     /* s1 = wb */
   3714    lw       s2, 52(sp)     /* s2 = vx */
   3715    lw       s3, 56(sp)     /* s3 = unit_x */
   3716    li       v0, BILINEAR_INTERPOLATION_RANGE
   3717    li       v1, 0x07e007e0
   3718    li       s8, 0x001f001f
   3719 
   3720    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3721    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3722 0:
   3723    andi     t4, s2, 0xffff /* t4 = (short)vx */
   3724    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3725    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3726 
   3727    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3728    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
   3729    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3730    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
   3731 
   3732    sra      t9, s2, 16
   3733    sll      t9, t9, 1
   3734    addiu    t8, t9, 2
   3735    lhx      t0, t9(a1)     /* t0 = tl */
   3736    lhx      t1, t8(a1)     /* t1 = tr */
   3737    andi     t1, t1, 0xffff
   3738    addiu    a3, a3, -1
   3739    lhx      t2, t9(a2)     /* t2 = bl */
   3740    lhx      t3, t8(a2)     /* t3 = br */
   3741    andi     t3, t3, 0xffff
   3742 
   3743    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
   3744    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
   3745    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3746    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
   3747 
   3748    addu     s2, s2, s3     /* vx += unit_x; */
   3749    sh       t1, 0(a0)
   3750    bnez     a3, 0b
   3751     addiu   a0, a0, 2
   3752 
   3753    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3754 1:
   3755    j        ra
   3756     nop
   3757 
   3758 END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
   3759 
   3760 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
   3761 /*
   3762 * a0     - *dst
   3763 * a1     - *src_top
   3764 * a2     - *src_bottom
   3765 * a3     - w
   3766 * 16(sp) - wt
   3767 * 20(sp) - wb
   3768 * 24(sp) - vx
   3769 * 28(sp) - unit_x
   3770 */
   3771 
   3772    beqz     a3, 1f
   3773     nop
   3774 
   3775    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3776 
   3777    lw       s0, 40(sp)     /* s0 = wt */
   3778    lw       s1, 44(sp)     /* s1 = wb */
   3779    lw       s2, 48(sp)     /* s2 = vx */
   3780    lw       s3, 52(sp)     /* s3 = unit_x */
   3781    li       v0, BILINEAR_INTERPOLATION_RANGE
   3782    li       s8, 0x00ff00ff
   3783 
   3784    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3785    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3786 0:
   3787    andi     t4, s2, 0xffff /* t4 = (short)vx */
   3788    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3789    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3790 
   3791    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3792    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
   3793    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3794    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
   3795 
   3796    sra      t9, s2, 16
   3797    sll      t9, t9, 2
   3798    addiu    t8, t9, 4
   3799    lwx      t0, t9(a1)     /* t0 = tl */
   3800    lwx      t1, t8(a1)     /* t1 = tr */
   3801    addiu    a3, a3, -1
   3802    lwx      t2, t9(a2)     /* t2 = bl */
   3803    lwx      t3, t8(a2)     /* t3 = br */
   3804 
   3805    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3806    lw       t1, 0(a0)      /* t1 = dest */
   3807    OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
   3808 
   3809    addu     s2, s2, s3     /* vx += unit_x; */
   3810    sw       t2, 0(a0)
   3811    bnez     a3, 0b
   3812     addiu   a0, a0, 4
   3813 
   3814    RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3815 1:
   3816    j        ra
   3817     nop
   3818 
   3819 END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
   3820 
   3821 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
   3822 /*
   3823 * a0     - *dst
   3824 * a1     - *src_top
   3825 * a2     - *src_bottom
   3826 * a3     - w
   3827 * 16(sp) - wt
   3828 * 20(sp) - wb
   3829 * 24(sp) - vx
   3830 * 28(sp) - unit_x
   3831 */
   3832 
   3833    beqz         a3, 1f
   3834     nop
   3835 
   3836    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3837 
   3838    lw           s0, 36(sp)     /* s0 = wt */
   3839    lw           s1, 40(sp)     /* s1 = wb */
   3840    lw           s2, 44(sp)     /* s2 = vx */
   3841    lw           s3, 48(sp)     /* s3 = unit_x */
   3842    li           v0, BILINEAR_INTERPOLATION_RANGE
   3843 
   3844    sll          s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3845    sll          s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3846 0:
   3847    andi         t4, s2, 0xffff /* t4 = (short)vx */
   3848    srl          t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3849    subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
   3850 
   3851    mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
   3852    mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
   3853    mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
   3854    mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
   3855 
   3856    sra          t9, s2, 16
   3857    sll          t9, t9, 2
   3858    addiu        t8, t9, 4
   3859    lwx          t0, t9(a1)     /* t0 = tl */
   3860    lwx          t1, t8(a1)     /* t1 = tr */
   3861    addiu        a3, a3, -1
   3862    lwx          t2, t9(a2)     /* t2 = bl */
   3863    lwx          t3, t8(a2)     /* t3 = br */
   3864 
   3865    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3866    lw           t1, 0(a0)
   3867    addu_s.qb    t2, t0, t1
   3868 
   3869    addu         s2, s2, s3     /* vx += unit_x; */
   3870    sw           t2, 0(a0)
   3871    bnez         a3, 0b
   3872     addiu       a0, a0, 4
   3873 
   3874    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
   3875 1:
   3876    j            ra
   3877     nop
   3878 
   3879 END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
   3880 
   3881 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
   3882 /*
   3883 * a0     - *dst
   3884 * a1     - *mask
   3885 * a2     - *src_top
   3886 * a3     - *src_bottom
   3887 * 16(sp) - wt
   3888 * 20(sp) - wb
   3889 * 24(sp) - vx
   3890 * 28(sp) - unit_x
   3891 * 32(sp) - w
   3892 */
   3893 
   3894    lw       v1, 32(sp)
   3895    beqz     v1, 1f
   3896     nop
   3897 
   3898    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3899 
   3900    lw       s0, 44(sp)        /* s0 = wt */
   3901    lw       s1, 48(sp)        /* s1 = wb */
   3902    lw       s2, 52(sp)        /* s2 = vx */
   3903    lw       s3, 56(sp)        /* s3 = unit_x */
   3904    li       v0, BILINEAR_INTERPOLATION_RANGE
   3905    li       s8, 0x00ff00ff
   3906 
   3907    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3908    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3909 0:
   3910    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   3911    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3912    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
   3913 
   3914    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   3915    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   3916    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   3917    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   3918 
   3919    sra      t9, s2, 16
   3920    sll      t9, t9, 2
   3921    addiu    t8, t9, 4
   3922    lwx      t0, t9(a2)        /* t0 = tl */
   3923    lwx      t1, t8(a2)        /* t1 = tr */
   3924    addiu    v1, v1, -1
   3925    lwx      t2, t9(a3)        /* t2 = bl */
   3926    lwx      t3, t8(a3)        /* t3 = br */
   3927 
   3928    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3929    lbu      t1, 0(a1)         /* t1 = mask */
   3930    addiu    a1, a1, 1
   3931    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
   3932 
   3933    addu     s2, s2, s3        /* vx += unit_x; */
   3934    sw       t0, 0(a0)
   3935    bnez     v1, 0b
   3936     addiu   a0, a0, 4
   3937 
   3938    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3939 1:
   3940    j        ra
   3941     nop
   3942 
   3943 END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
   3944 
   3945 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
   3946 /*
   3947 * a0     - *dst
   3948 * a1     - *mask
   3949 * a2     - *src_top
   3950 * a3     - *src_bottom
   3951 * 16(sp) - wt
   3952 * 20(sp) - wb
   3953 * 24(sp) - vx
   3954 * 28(sp) - unit_x
   3955 * 32(sp) - w
   3956 */
   3957 
   3958    lw       v1, 32(sp)
   3959    beqz     v1, 1f
   3960     nop
   3961 
   3962    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   3963 
   3964    lw       s0, 44(sp)        /* s0 = wt */
   3965    lw       s1, 48(sp)        /* s1 = wb */
   3966    lw       s2, 52(sp)        /* s2 = vx */
   3967    lw       s3, 56(sp)        /* s3 = unit_x */
   3968    li       v0, BILINEAR_INTERPOLATION_RANGE
   3969    li       s8, 0x00ff00ff
   3970 
   3971    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3972    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   3973 0:
   3974    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   3975    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   3976    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
   3977 
   3978    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   3979    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   3980    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   3981    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   3982 
   3983    sra      t9, s2, 16
   3984    sll      t9, t9, 2
   3985    addiu    t8, t9, 4
   3986    lwx      t0, t9(a2)        /* t0 = tl */
   3987    lwx      t1, t8(a2)        /* t1 = tr */
   3988    addiu    v1, v1, -1
   3989    lwx      t2, t9(a3)        /* t2 = bl */
   3990    lwx      t3, t8(a3)        /* t3 = br */
   3991 
   3992    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   3993    lbu      t1, 0(a1)         /* t1 = mask */
   3994    addiu    a1, a1, 1
   3995    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
   3996    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
   3997 
   3998    addu     s2, s2, s3        /* vx += unit_x; */
   3999    sh       t1, 0(a0)
   4000    bnez     v1, 0b
   4001     addiu   a0, a0, 2
   4002 
   4003    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   4004 1:
   4005    j        ra
   4006     nop
   4007 
   4008 END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
   4009 
   4010 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
   4011 /*
   4012 * a0     - *dst
   4013 * a1     - *mask
   4014 * a2     - *src_top
   4015 * a3     - *src_bottom
   4016 * 16(sp) - wt
   4017 * 20(sp) - wb
   4018 * 24(sp) - vx
   4019 * 28(sp) - unit_x
   4020 * 32(sp) - w
   4021 */
   4022 
   4023    lw       t0, 32(sp)
   4024    beqz     t0, 1f
   4025     nop
   4026 
   4027    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
   4028 
   4029    lw       s0, 48(sp)        /* s0 = wt */
   4030    lw       s1, 52(sp)        /* s1 = wb */
   4031    lw       s2, 56(sp)        /* s2 = vx */
   4032    lw       s3, 60(sp)        /* s3 = unit_x */
   4033    lw       ra, 64(sp)        /* ra = w */
   4034    li       v0, 0x00ff00ff
   4035    li       v1, 0x07e007e0
   4036    li       s8, 0x001f001f
   4037 
   4038    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4039    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4040 0:
   4041    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   4042    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   4043    li       t5, BILINEAR_INTERPOLATION_RANGE
   4044    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
   4045 
   4046    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   4047    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   4048    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   4049    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   4050 
   4051    sra      t9, s2, 16
   4052    sll      t9, t9, 1
   4053    addiu    t8, t9, 2
   4054    lhx      t0, t9(a2)        /* t0 = tl */
   4055    lhx      t1, t8(a2)        /* t1 = tr */
   4056    andi     t1, t1, 0xffff
   4057    addiu    ra, ra, -1
   4058    lhx      t2, t9(a3)        /* t2 = bl */
   4059    lhx      t3, t8(a3)        /* t3 = br */
   4060    andi     t3, t3, 0xffff
   4061 
   4062    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
   4063    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
   4064    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   4065    lbu      t1, 0(a1)         /* t1 = mask */
   4066    addiu    a1, a1, 1
   4067    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
   4068 
   4069    addu     s2, s2, s3        /* vx += unit_x; */
   4070    sw       t0, 0(a0)
   4071    bnez     ra, 0b
   4072     addiu   a0, a0, 4
   4073 
   4074    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
   4075 1:
   4076    j        ra
   4077     nop
   4078 
   4079 END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
   4080 
   4081 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
   4082 /*
   4083 * a0     - *dst
   4084 * a1     - *mask
   4085 * a2     - *src_top
   4086 * a3     - *src_bottom
   4087 * 16(sp) - wt
   4088 * 20(sp) - wb
   4089 * 24(sp) - vx
   4090 * 28(sp) - unit_x
   4091 * 32(sp) - w
   4092 */
   4093 
   4094    lw       t0, 32(sp)
   4095    beqz     t0, 1f
   4096     nop
   4097 
   4098    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
   4099 
   4100    lw       s0, 48(sp)        /* s0 = wt */
   4101    lw       s1, 52(sp)        /* s1 = wb */
   4102    lw       s2, 56(sp)        /* s2 = vx */
   4103    lw       s3, 60(sp)        /* s3 = unit_x */
   4104    lw       ra, 64(sp)        /* ra = w */
   4105    li       v0, 0x00ff00ff
   4106    li       v1, 0x07e007e0
   4107    li       s8, 0x001f001f
   4108 
   4109    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4110    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4111 0:
   4112    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   4113    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   4114    li       t5, BILINEAR_INTERPOLATION_RANGE
   4115    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
   4116 
   4117    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   4118    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   4119    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   4120    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   4121 
   4122    sra      t9, s2, 16
   4123    sll      t9, t9, 1
   4124    addiu    t8, t9, 2
   4125    lhx      t0, t9(a2)        /* t0 = tl */
   4126    lhx      t1, t8(a2)        /* t1 = tr */
   4127    andi     t1, t1, 0xffff
   4128    addiu    ra, ra, -1
   4129    lhx      t2, t9(a3)        /* t2 = bl */
   4130    lhx      t3, t8(a3)        /* t3 = br */
   4131    andi     t3, t3, 0xffff
   4132 
   4133    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
   4134    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
   4135    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   4136    lbu      t1, 0(a1)         /* t1 = mask */
   4137    addiu    a1, a1, 1
   4138    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
   4139    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
   4140 
   4141    addu     s2, s2, s3        /* vx += unit_x; */
   4142    sh       t1, 0(a0)
   4143    bnez     ra, 0b
   4144     addiu   a0, a0, 2
   4145 
   4146    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
   4147 1:
   4148    j        ra
   4149     nop
   4150 
   4151 END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
   4152 
   4153 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
   4154 /*
   4155 * a0     - dst        (a8r8g8b8)
   4156 * a1     - mask       (a8)
   4157 * a2     - src_top    (a8r8g8b8)
   4158 * a3     - src_bottom (a8r8g8b8)
   4159 * 16(sp) - wt
   4160 * 20(sp) - wb
   4161 * 24(sp) - vx
   4162 * 28(sp) - unit_x
   4163 * 32(sp) - w
   4164 */
   4165 
   4166    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   4167 
   4168    lw       v1, 60(sp)        /* v1 = w(sp + 32 + 28 save regs stack offset)*/
   4169    beqz     v1, 1f
   4170     nop
   4171 
   4172    lw       s0, 44(sp)        /* s0 = wt */
   4173    lw       s1, 48(sp)        /* s1 = wb */
   4174    lw       s2, 52(sp)        /* s2 = vx */
   4175    lw       s3, 56(sp)        /* s3 = unit_x */
   4176    li       v0, BILINEAR_INTERPOLATION_RANGE
   4177    li       s8, 0x00ff00ff
   4178 
   4179    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4180    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4181 
   4182 0:
   4183    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   4184    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   4185    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
   4186 
   4187    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   4188    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   4189    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   4190    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   4191 
   4192    sra      t9, s2, 16
   4193    sll      t9, t9, 2
   4194    addiu    t8, t9, 4
   4195    lwx      t0, t9(a2)        /* t0 = tl */
   4196    lwx      t1, t8(a2)        /* t1 = tr */
   4197    addiu    v1, v1, -1
   4198    lwx      t2, t9(a3)        /* t2 = bl */
   4199    lwx      t3, t8(a3)        /* t3 = br */
   4200 
   4201    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
   4202                                      t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   4203    lbu      t1, 0(a1)         /* t1 = mask */
   4204    lw       t2, 0(a0)         /* t2 = dst */
   4205    addiu    a1, a1, 1
   4206    OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
   4207 
   4208    addu     s2, s2, s3        /* vx += unit_x; */
   4209    sw       t0, 0(a0)
   4210    bnez     v1, 0b
   4211     addiu   a0, a0, 4
   4212 
   4213 1:
   4214    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   4215    j        ra
   4216     nop
   4217 
   4218 END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
   4219 
   4220 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
   4221 /*
   4222 * a0     - *dst
   4223 * a1     - *mask
   4224 * a2     - *src_top
   4225 * a3     - *src_bottom
   4226 * 16(sp) - wt
   4227 * 20(sp) - wb
   4228 * 24(sp) - vx
   4229 * 28(sp) - unit_x
   4230 * 32(sp) - w
   4231 */
   4232 
   4233    lw       v1, 32(sp)
   4234    beqz     v1, 1f
   4235     nop
   4236 
   4237    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   4238 
   4239    lw       s0, 44(sp)        /* s0 = wt */
   4240    lw       s1, 48(sp)        /* s1 = wb */
   4241    lw       s2, 52(sp)        /* s2 = vx */
   4242    lw       s3, 56(sp)        /* s3 = unit_x */
   4243    li       v0, BILINEAR_INTERPOLATION_RANGE
   4244    li       s8, 0x00ff00ff
   4245 
   4246    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4247    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
   4248 0:
   4249    andi     t4, s2, 0xffff    /* t4 = (short)vx */
   4250    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
   4251    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
   4252 
   4253    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
   4254    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
   4255    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
   4256    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
   4257 
   4258    sra      t9, s2, 16
   4259    sll      t9, t9, 2
   4260    addiu    t8, t9, 4
   4261    lwx      t0, t9(a2)        /* t0 = tl */
   4262    lwx      t1, t8(a2)        /* t1 = tr */
   4263    addiu    v1, v1, -1
   4264    lwx      t2, t9(a3)        /* t2 = bl */
   4265    lwx      t3, t8(a3)        /* t3 = br */
   4266 
   4267    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
   4268    lbu      t1, 0(a1)         /* t1 = mask */
   4269    lw       t2, 0(a0)         /* t2 = dst */
   4270    addiu    a1, a1, 1
   4271    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
   4272 
   4273    addu     s2, s2, s3        /* vx += unit_x; */
   4274    sw       t0, 0(a0)
   4275    bnez     v1, 0b
   4276     addiu   a0, a0, 4
   4277 
   4278    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
   4279 1:
   4280    j        ra
   4281     nop
   4282 
   4283 END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)