tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lossless_mips_dsp_r2.c (39181B)


      1 // Copyright 2014 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // Image transforms and color space conversion methods for lossless decoder.
     11 //
     12 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
     13 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
     14 
     15 #include "src/dsp/dsp.h"
     16 
     17 #if defined(WEBP_USE_MIPS_DSP_R2)
     18 
     19 #include "src/dsp/lossless.h"
     20 #include "src/dsp/lossless_common.h"
     21 
     22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
     23 static void FUNC_NAME(const TYPE* src,                                         \
     24                      const uint32_t* const color_map,                         \
     25                      TYPE* dst, int y_start, int y_end,                       \
     26                      int width) {                                             \
     27  int y;                                                                       \
     28  for (y = y_start; y < y_end; ++y) {                                          \
     29    int x;                                                                     \
     30    for (x = 0; x < (width >> 2); ++x) {                                       \
     31      int tmp1, tmp2, tmp3, tmp4;                                              \
     32      __asm__ volatile (                                                       \
     33      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
     34        "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
     35        "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
     36        "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
     37        "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
     38        "addiu     %[src],   %[src],      4             \n\t"                  \
     39      ".endif                                           \n\t"                  \
     40      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
     41        "lw        %[tmp1],  0(%[src])                  \n\t"                  \
     42        "lw        %[tmp2],  4(%[src])                  \n\t"                  \
     43        "lw        %[tmp3],  8(%[src])                  \n\t"                  \
     44        "lw        %[tmp4],  12(%[src])                 \n\t"                  \
     45        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
     46        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
     47        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
     48        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
     49        "addiu     %[src],   %[src],      16            \n\t"                  \
     50      ".endif                                           \n\t"                  \
     51        "sll       %[tmp1],  %[tmp1],     2             \n\t"                  \
     52        "sll       %[tmp2],  %[tmp2],     2             \n\t"                  \
     53        "sll       %[tmp3],  %[tmp3],     2             \n\t"                  \
     54        "sll       %[tmp4],  %[tmp4],     2             \n\t"                  \
     55        "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                  \
     56        "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
     57        "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
     58        "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
     59      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
     60        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
     61        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
     62        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
     63        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
     64        "sb        %[tmp1],  0(%[dst])                  \n\t"                  \
     65        "sb        %[tmp2],  1(%[dst])                  \n\t"                  \
     66        "sb        %[tmp3],  2(%[dst])                  \n\t"                  \
     67        "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
     68        "addiu     %[dst],   %[dst],      4             \n\t"                  \
     69      ".endif                                           \n\t"                  \
     70      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
     71        "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
     72        "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
     73        "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
     74        "sw        %[tmp4],  12(%[dst])                 \n\t"                  \
     75        "addiu     %[dst],   %[dst],      16            \n\t"                  \
     76      ".endif                                           \n\t"                  \
     77        : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),             \
     78          [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst)                   \
     79        : [color_map]"r"(color_map)                                            \
     80        : "memory"                                                             \
     81      );                                                                       \
     82    }                                                                          \
     83    for (x = 0; x < (width & 3); ++x) {                                        \
     84      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
     85    }                                                                          \
     86  }                                                                            \
     87 }
     88 
     89 MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
     90 MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
     91 
     92 #undef MAP_COLOR_FUNCS
     93 
     94 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
     95                                                   uint32_t c2) {
     96  int temp0, temp1, temp2, temp3, temp4, temp5;
     97  __asm__ volatile (
     98    "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
     99    "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
    100    "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
    101    "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
    102    "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
    103    "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
    104    "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
    105    "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
    106    "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
    107    "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
    108    "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
    109    "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
    110    "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
    111    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    112      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
    113    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
    114    : "memory"
    115  );
    116  return temp2;
    117 }
    118 
    119 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
    120                                                   uint32_t c2) {
    121  int temp0, temp1, temp2, temp3, temp4, temp5;
    122  __asm__ volatile (
    123    "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
    124    "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
    125    "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
    126    "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
    127    "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
    128    "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
    129    "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
    130    "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
    131    "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
    132    "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
    133    "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
    134    "shra.ph          %[temp3],   %[temp3],   1           \n\t"
    135    "shra.ph          %[temp4],   %[temp4],   1           \n\t"
    136    "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
    137    "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
    138    "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
    139    "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
    140    "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
    141    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    142      [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
    143    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
    144    : "memory"
    145  );
    146  return temp1;
    147 }
    148 
    149 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
    150  int temp0, temp1, temp2, temp3, temp4, temp5;
    151  __asm__ volatile (
    152    "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
    153    "pick.qb      %[temp1], %[b],     %[c]             \n\t"
    154    "pick.qb      %[temp2], %[c],     %[b]             \n\t"
    155    "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
    156    "pick.qb      %[temp4], %[a],     %[c]             \n\t"
    157    "pick.qb      %[temp5], %[c],     %[a]             \n\t"
    158    "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
    159    "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
    160    "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
    161    "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
    162    "subu         %[temp3], %[temp3], %[temp0]         \n\t"
    163    "slti         %[temp0], %[temp3], 0x1              \n\t"
    164    "movz         %[a],     %[b],     %[temp0]         \n\t"
    165    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
    166      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
    167      [a]"+&r"(a)
    168    : [b]"r"(b), [c]"r"(c)
    169  );
    170  return a;
    171 }
    172 
    173 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
    174  __asm__ volatile (
    175    "adduh.qb    %[a0], %[a0], %[a1]       \n\t"
    176    : [a0]"+r"(a0)
    177    : [a1]"r"(a1)
    178  );
    179  return a0;
    180 }
    181 
    182 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
    183  return Average2(Average2(a0, a2), a1);
    184 }
    185 
    186 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
    187                                     uint32_t a2, uint32_t a3) {
    188  return Average2(Average2(a0, a1), Average2(a2, a3));
    189 }
    190 
    191 static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
    192                                     const uint32_t* const top) {
    193  return Average3(*left, top[0], top[1]);
    194 }
    195 
    196 static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
    197                                     const uint32_t* const top) {
    198  return Average2(*left, top[-1]);
    199 }
    200 
    201 static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
    202                                     const uint32_t* const top) {
    203  return Average2(*left, top[0]);
    204 }
    205 
    206 static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
    207                                     const uint32_t* const top) {
    208  (void)left;
    209  return Average2(top[-1], top[0]);
    210 }
    211 
    212 static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
    213                                     const uint32_t* const top) {
    214  (void)left;
    215  return Average2(top[0], top[1]);
    216 }
    217 
    218 static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
    219                                      const uint32_t* const top) {
    220  return Average4(*left, top[-1], top[0], top[1]);
    221 }
    222 
    223 static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
    224                                      const uint32_t* const top) {
    225  return Select(top[0], *left, top[-1]);
    226 }
    227 
    228 static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
    229                                      const uint32_t* const top) {
    230  return ClampedAddSubtractFull(*left, top[0], top[-1]);
    231 }
    232 
    233 static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
    234                                      const uint32_t* const top) {
    235  return ClampedAddSubtractHalf(*left, top[0], top[-1]);
    236 }
    237 
    238 // Add green to blue and red channels (i.e. perform the inverse transform of
    239 // 'subtract green').
    240 static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
    241                                           uint32_t* dst) {
    242  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
    243  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    244  const uint32_t* const p_loop2_end = src + num_pixels;
    245  __asm__ volatile (
    246    ".set       push                                          \n\t"
    247    ".set       noreorder                                     \n\t"
    248    "beq        %[src],          %[p_loop1_end],     3f       \n\t"
    249    " nop                                                     \n\t"
    250  "0:                                                         \n\t"
    251    "lw         %[temp0],        0(%[src])                    \n\t"
    252    "lw         %[temp1],        4(%[src])                    \n\t"
    253    "lw         %[temp2],        8(%[src])                    \n\t"
    254    "lw         %[temp3],        12(%[src])                   \n\t"
    255    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
    256    "ext        %[temp5],        %[temp1],           8,    8  \n\t"
    257    "ext        %[temp6],        %[temp2],           8,    8  \n\t"
    258    "ext        %[temp7],        %[temp3],           8,    8  \n\t"
    259    "addiu      %[src],          %[src],             16       \n\t"
    260    "addiu      %[dst],          %[dst],             16       \n\t"
    261    "replv.ph   %[temp4],        %[temp4]                     \n\t"
    262    "replv.ph   %[temp5],        %[temp5]                     \n\t"
    263    "replv.ph   %[temp6],        %[temp6]                     \n\t"
    264    "replv.ph   %[temp7],        %[temp7]                     \n\t"
    265    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
    266    "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
    267    "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
    268    "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
    269    "sw         %[temp0],        -16(%[dst])                  \n\t"
    270    "sw         %[temp1],        -12(%[dst])                  \n\t"
    271    "sw         %[temp2],        -8(%[dst])                   \n\t"
    272    "bne        %[src],          %[p_loop1_end],     0b       \n\t"
    273    " sw        %[temp3],        -4(%[dst])                   \n\t"
    274  "3:                                                         \n\t"
    275    "beq        %[src],          %[p_loop2_end],     2f       \n\t"
    276    " nop                                                     \n\t"
    277  "1:                                                         \n\t"
    278    "lw         %[temp0],        0(%[src])                    \n\t"
    279    "addiu      %[src],          %[src],             4        \n\t"
    280    "addiu      %[dst],          %[dst],             4        \n\t"
    281    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
    282    "replv.ph   %[temp4],        %[temp4]                     \n\t"
    283    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
    284    "bne        %[src],          %[p_loop2_end],     1b       \n\t"
    285    " sw        %[temp0],        -4(%[dst])                   \n\t"
    286  "2:                                                         \n\t"
    287    ".set       pop                                           \n\t"
    288    : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
    289      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
    290      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
    291      [temp7]"=&r"(temp7)
    292    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    293    : "memory"
    294  );
    295 }
    296 
    297 static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
    298                                            const uint32_t* src, int num_pixels,
    299                                            uint32_t* dst) {
    300  int temp0, temp1, temp2, temp3, temp4, temp5;
    301  uint32_t argb, argb1, new_red;
    302  const uint32_t G_to_R = m->green_to_red;
    303  const uint32_t G_to_B = m->green_to_blue;
    304  const uint32_t R_to_B = m->red_to_blue;
    305  const uint32_t* const p_loop_end = src + (num_pixels & ~1);
    306  __asm__ volatile (
    307    ".set            push                                    \n\t"
    308    ".set            noreorder                               \n\t"
    309    "beq             %[src],       %[p_loop_end],  1f        \n\t"
    310    " nop                                                    \n\t"
    311    "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
    312    "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
    313    "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
    314    "shll.ph         %[temp0],     %[temp0],       8         \n\t"
    315    "shll.ph         %[temp1],     %[temp1],       8         \n\t"
    316    "shll.ph         %[temp2],     %[temp2],       8         \n\t"
    317    "shra.ph         %[temp0],     %[temp0],       8         \n\t"
    318    "shra.ph         %[temp1],     %[temp1],       8         \n\t"
    319    "shra.ph         %[temp2],     %[temp2],       8         \n\t"
    320  "0:                                                        \n\t"
    321    "lw              %[argb],      0(%[src])                 \n\t"
    322    "lw              %[argb1],     4(%[src])                 \n\t"
    323    "sw              %[argb],      0(%[dst])                 \n\t"
    324    "sw              %[argb1],     4(%[dst])                 \n\t"
    325    "addiu           %[src],       %[src],         8         \n\t"
    326    "addiu           %[dst],       %[dst],         8         \n\t"
    327    "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
    328    "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
    329    "shll.ph         %[temp3],     %[temp3],       8         \n\t"
    330    "shra.ph         %[temp3],     %[temp3],       8         \n\t"
    331    "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
    332    "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
    333    "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
    334    "ins             %[argb1],     %[argb],        16,   16  \n\t"
    335    "shra.ph         %[temp5],     %[temp5],       5         \n\t"
    336    "shra.ph         %[temp3],     %[temp3],       5         \n\t"
    337    "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
    338    "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
    339    "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
    340    "shll.ph         %[temp4],     %[temp5],       8         \n\t"
    341    "shra.ph         %[temp4],     %[temp4],       8         \n\t"
    342    "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
    343    "sb              %[temp5],     -2(%[dst])                \n\t"
    344    "sra             %[temp5],     %[temp5],       16        \n\t"
    345    "shra.ph         %[temp4],     %[temp4],       5         \n\t"
    346    "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
    347    "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
    348    "sb              %[temp5],     -6(%[dst])                \n\t"
    349    "sb              %[temp3],     -4(%[dst])                \n\t"
    350    "sra             %[temp3],     %[temp3],       16        \n\t"
    351    "bne             %[src],       %[p_loop_end],  0b        \n\t"
    352    " sb             %[temp3],     -8(%[dst])                \n\t"
    353  "1:                                                        \n\t"
    354    ".set            pop                                     \n\t"
    355    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    356      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
    357      [new_red]"=&r"(new_red), [argb]"=&r"(argb),
    358      [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
    359    : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
    360      [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
    361    : "memory", "hi", "lo"
    362  );
    363 
    364  // Fall-back to C-version for left-overs.
    365  if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
    366 }
    367 
    368 static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
    369                                       int num_pixels, uint8_t* dst) {
    370  int temp0, temp1, temp2, temp3;
    371  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    372  const uint32_t* const p_loop2_end = src + num_pixels;
    373  __asm__ volatile (
    374    ".set       push                                       \n\t"
    375    ".set       noreorder                                  \n\t"
    376    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
    377    " nop                                                  \n\t"
    378  "0:                                                      \n\t"
    379    "lw         %[temp3],    12(%[src])                    \n\t"
    380    "lw         %[temp2],    8(%[src])                     \n\t"
    381    "lw         %[temp1],    4(%[src])                     \n\t"
    382    "lw         %[temp0],    0(%[src])                     \n\t"
    383    "ins        %[temp3],    %[temp2],          24,   8    \n\t"
    384    "sll        %[temp2],    %[temp2],          8          \n\t"
    385    "rotr       %[temp3],    %[temp3],          16         \n\t"
    386    "ins        %[temp2],    %[temp1],          0,    16   \n\t"
    387    "sll        %[temp1],    %[temp1],          8          \n\t"
    388    "wsbh       %[temp3],    %[temp3]                      \n\t"
    389    "balign     %[temp0],    %[temp1],          1          \n\t"
    390    "wsbh       %[temp2],    %[temp2]                      \n\t"
    391    "wsbh       %[temp0],    %[temp0]                      \n\t"
    392    "usw        %[temp3],    8(%[dst])                     \n\t"
    393    "rotr       %[temp0],    %[temp0],          16         \n\t"
    394    "usw        %[temp2],    4(%[dst])                     \n\t"
    395    "addiu      %[src],      %[src],            16         \n\t"
    396    "usw        %[temp0],    0(%[dst])                     \n\t"
    397    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
    398    " addiu     %[dst],      %[dst],            12         \n\t"
    399  "3:                                                      \n\t"
    400    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
    401    " nop                                                  \n\t"
    402  "1:                                                      \n\t"
    403    "lw         %[temp0],    0(%[src])                     \n\t"
    404    "addiu      %[src],      %[src],            4          \n\t"
    405    "wsbh       %[temp1],    %[temp0]                      \n\t"
    406    "addiu      %[dst],      %[dst],            3          \n\t"
    407    "ush        %[temp1],    -2(%[dst])                    \n\t"
    408    "sra        %[temp0],    %[temp0],          16         \n\t"
    409    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
    410    " sb        %[temp0],    -3(%[dst])                    \n\t"
    411  "2:                                                      \n\t"
    412    ".set       pop                                        \n\t"
    413    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    414      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
    415    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    416    : "memory"
    417  );
    418 }
    419 
    420 static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
    421                                        int num_pixels, uint8_t* dst) {
    422  int temp0, temp1, temp2, temp3;
    423  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    424  const uint32_t* const p_loop2_end = src + num_pixels;
    425  __asm__ volatile (
    426    ".set       push                                       \n\t"
    427    ".set       noreorder                                  \n\t"
    428    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
    429    " nop                                                  \n\t"
    430  "0:                                                      \n\t"
    431    "lw         %[temp0],    0(%[src])                     \n\t"
    432    "lw         %[temp1],    4(%[src])                     \n\t"
    433    "lw         %[temp2],    8(%[src])                     \n\t"
    434    "lw         %[temp3],    12(%[src])                    \n\t"
    435    "wsbh       %[temp0],    %[temp0]                      \n\t"
    436    "wsbh       %[temp1],    %[temp1]                      \n\t"
    437    "wsbh       %[temp2],    %[temp2]                      \n\t"
    438    "wsbh       %[temp3],    %[temp3]                      \n\t"
    439    "addiu      %[src],      %[src],            16         \n\t"
    440    "balign     %[temp0],    %[temp0],          1          \n\t"
    441    "balign     %[temp1],    %[temp1],          1          \n\t"
    442    "balign     %[temp2],    %[temp2],          1          \n\t"
    443    "balign     %[temp3],    %[temp3],          1          \n\t"
    444    "usw        %[temp0],    0(%[dst])                     \n\t"
    445    "usw        %[temp1],    4(%[dst])                     \n\t"
    446    "usw        %[temp2],    8(%[dst])                     \n\t"
    447    "usw        %[temp3],    12(%[dst])                    \n\t"
    448    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
    449    " addiu     %[dst],      %[dst],            16         \n\t"
    450  "3:                                                      \n\t"
    451    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
    452    " nop                                                  \n\t"
    453  "1:                                                      \n\t"
    454    "lw         %[temp0],    0(%[src])                     \n\t"
    455    "wsbh       %[temp0],    %[temp0]                      \n\t"
    456    "addiu      %[src],      %[src],            4          \n\t"
    457    "balign     %[temp0],    %[temp0],          1          \n\t"
    458    "usw        %[temp0],    0(%[dst])                     \n\t"
    459    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
    460    " addiu     %[dst],      %[dst],            4          \n\t"
    461  "2:                                                      \n\t"
    462    ".set       pop                                        \n\t"
    463    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    464      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
    465    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    466    : "memory"
    467  );
    468 }
    469 
    470 static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
    471                                            int num_pixels, uint8_t* dst) {
    472  int temp0, temp1, temp2, temp3, temp4, temp5;
    473  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    474  const uint32_t* const p_loop2_end = src + num_pixels;
    475  __asm__ volatile (
    476    ".set           push                                       \n\t"
    477    ".set           noreorder                                  \n\t"
    478    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
    479    " nop                                                      \n\t"
    480  "0:                                                          \n\t"
    481    "lw             %[temp0],    0(%[src])                     \n\t"
    482    "lw             %[temp1],    4(%[src])                     \n\t"
    483    "lw             %[temp2],    8(%[src])                     \n\t"
    484    "lw             %[temp3],    12(%[src])                    \n\t"
    485    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
    486    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
    487    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
    488    "ext            %[temp4],    %[temp1],          28,   4    \n\t"
    489    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
    490    "ext            %[temp5],    %[temp1],          12,   4    \n\t"
    491    "ins            %[temp1],    %[temp4],          0,    4    \n\t"
    492    "ext            %[temp4],    %[temp2],          28,   4    \n\t"
    493    "ins            %[temp1],    %[temp5],          16,   4    \n\t"
    494    "ext            %[temp5],    %[temp2],          12,   4    \n\t"
    495    "ins            %[temp2],    %[temp4],          0,    4    \n\t"
    496    "ext            %[temp4],    %[temp3],          28,   4    \n\t"
    497    "ins            %[temp2],    %[temp5],          16,   4    \n\t"
    498    "ext            %[temp5],    %[temp3],          12,   4    \n\t"
    499    "ins            %[temp3],    %[temp4],          0,    4    \n\t"
    500    "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
    501    "ins            %[temp3],    %[temp5],          16,   4    \n\t"
    502    "addiu          %[src],      %[src],            16         \n\t"
    503    "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
    504 #if (WEBP_SWAP_16BIT_CSP == 1)
    505    "usw            %[temp1],    0(%[dst])                     \n\t"
    506    "usw            %[temp3],    4(%[dst])                     \n\t"
    507 #else
    508    "wsbh           %[temp1],    %[temp1]                      \n\t"
    509    "wsbh           %[temp3],    %[temp3]                      \n\t"
    510    "usw            %[temp1],    0(%[dst])                     \n\t"
    511    "usw            %[temp3],    4(%[dst])                     \n\t"
    512 #endif
    513    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
    514    " addiu         %[dst],      %[dst],            8          \n\t"
    515  "3:                                                          \n\t"
    516    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
    517    " nop                                                      \n\t"
    518  "1:                                                          \n\t"
    519    "lw             %[temp0],    0(%[src])                     \n\t"
    520    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
    521    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
    522    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
    523    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
    524    "addiu          %[src],      %[src],            4          \n\t"
    525    "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
    526 #if (WEBP_SWAP_16BIT_CSP == 1)
    527    "ush            %[temp0],    0(%[dst])                     \n\t"
    528 #else
    529    "wsbh           %[temp0],    %[temp0]                      \n\t"
    530    "ush            %[temp0],    0(%[dst])                     \n\t"
    531 #endif
    532    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
    533    " addiu         %[dst],      %[dst],            2          \n\t"
    534  "2:                                                          \n\t"
    535    ".set           pop                                        \n\t"
    536    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    537      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
    538      [dst]"+&r"(dst), [src]"+&r"(src)
    539    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    540    : "memory"
    541  );
    542 }
    543 
    544 static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
    545                                          int num_pixels, uint8_t* dst) {
    546  int temp0, temp1, temp2, temp3, temp4, temp5;
    547  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    548  const uint32_t* const p_loop2_end = src + num_pixels;
    549  __asm__ volatile (
    550    ".set           push                                       \n\t"
    551    ".set           noreorder                                  \n\t"
    552    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
    553    " nop                                                      \n\t"
    554  "0:                                                          \n\t"
    555    "lw             %[temp0],    0(%[src])                     \n\t"
    556    "lw             %[temp1],    4(%[src])                     \n\t"
    557    "lw             %[temp2],    8(%[src])                     \n\t"
    558    "lw             %[temp3],    12(%[src])                    \n\t"
    559    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
    560    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
    561    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
    562    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
    563    "ext            %[temp5],    %[temp1],          5,    11   \n\t"
    564    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
    565    "ext            %[temp0],    %[temp1],          8,    16   \n\t"
    566    "ext            %[temp1],    %[temp1],          3,    5    \n\t"
    567    "ins            %[temp0],    %[temp5],          0,    11   \n\t"
    568    "ext            %[temp5],    %[temp2],          5,    11   \n\t"
    569    "ins            %[temp0],    %[temp1],          0,    5    \n\t"
    570    "ext            %[temp1],    %[temp2],          8,    16   \n\t"
    571    "ext            %[temp2],    %[temp2],          3,    5    \n\t"
    572    "ins            %[temp1],    %[temp5],          0,    11   \n\t"
    573    "ext            %[temp5],    %[temp3],          5,    11   \n\t"
    574    "ins            %[temp1],    %[temp2],          0,    5    \n\t"
    575    "ext            %[temp2],    %[temp3],          8,    16   \n\t"
    576    "ext            %[temp3],    %[temp3],          3,    5    \n\t"
    577    "ins            %[temp2],    %[temp5],          0,    11   \n\t"
    578    "append         %[temp0],    %[temp4],          16         \n\t"
    579    "ins            %[temp2],    %[temp3],          0,    5    \n\t"
    580    "addiu          %[src],      %[src],            16         \n\t"
    581    "append         %[temp2],    %[temp1],          16         \n\t"
    582 #if (WEBP_SWAP_16BIT_CSP == 1)
    583    "usw            %[temp0],    0(%[dst])                     \n\t"
    584    "usw            %[temp2],    4(%[dst])                     \n\t"
    585 #else
    586    "wsbh           %[temp0],    %[temp0]                      \n\t"
    587    "wsbh           %[temp2],    %[temp2]                      \n\t"
    588    "usw            %[temp0],    0(%[dst])                     \n\t"
    589    "usw            %[temp2],    4(%[dst])                     \n\t"
    590 #endif
    591    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
    592    " addiu         %[dst],      %[dst],            8          \n\t"
    593  "3:                                                          \n\t"
    594    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
    595    " nop                                                      \n\t"
    596  "1:                                                          \n\t"
    597    "lw             %[temp0],    0(%[src])                     \n\t"
    598    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
    599    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
    600    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
    601    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
    602    "addiu          %[src],      %[src],            4          \n\t"
    603    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
    604 #if (WEBP_SWAP_16BIT_CSP == 1)
    605    "ush            %[temp4],    0(%[dst])                     \n\t"
    606 #else
    607    "wsbh           %[temp4],    %[temp4]                      \n\t"
    608    "ush            %[temp4],    0(%[dst])                     \n\t"
    609 #endif
    610    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
    611    " addiu         %[dst],      %[dst],            2          \n\t"
    612  "2:                                                          \n\t"
    613    ".set           pop                                        \n\t"
    614    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    615      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
    616      [dst]"+&r"(dst), [src]"+&r"(src)
    617    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    618    : "memory"
    619  );
    620 }
    621 
    622 static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
    623                                       int num_pixels, uint8_t* dst) {
    624  int temp0, temp1, temp2, temp3;
    625  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
    626  const uint32_t* const p_loop2_end = src + num_pixels;
    627  __asm__ volatile (
    628    ".set       push                                         \n\t"
    629    ".set       noreorder                                    \n\t"
    630    "beq        %[src],      %[p_loop1_end],    3f           \n\t"
    631    " nop                                                    \n\t"
    632  "0:                                                        \n\t"
    633    "lw         %[temp0],    0(%[src])                       \n\t"
    634    "lw         %[temp1],    4(%[src])                       \n\t"
    635    "lw         %[temp2],    8(%[src])                       \n\t"
    636    "lw         %[temp3],    12(%[src])                      \n\t"
    637    "ins        %[temp0],    %[temp1],          24,    8     \n\t"
    638    "sra        %[temp1],    %[temp1],          8            \n\t"
    639    "ins        %[temp1],    %[temp2],          16,    16    \n\t"
    640    "sll        %[temp2],    %[temp2],          8            \n\t"
    641    "balign     %[temp3],    %[temp2],          1            \n\t"
    642    "addiu      %[src],      %[src],            16           \n\t"
    643    "usw        %[temp0],    0(%[dst])                       \n\t"
    644    "usw        %[temp1],    4(%[dst])                       \n\t"
    645    "usw        %[temp3],    8(%[dst])                       \n\t"
    646    "bne        %[src],      %[p_loop1_end],    0b           \n\t"
    647    " addiu     %[dst],      %[dst],            12           \n\t"
    648  "3:                                                        \n\t"
    649    "beq        %[src],      %[p_loop2_end],    2f           \n\t"
    650    " nop                                                    \n\t"
    651  "1:                                                        \n\t"
    652    "lw         %[temp0],    0(%[src])                       \n\t"
    653    "addiu      %[src],      %[src],            4            \n\t"
    654    "addiu      %[dst],      %[dst],            3            \n\t"
    655    "ush        %[temp0],    -3(%[dst])                      \n\t"
    656    "sra        %[temp0],    %[temp0],          16           \n\t"
    657    "bne        %[src],      %[p_loop2_end],    1b           \n\t"
    658    " sb        %[temp0],    -1(%[dst])                      \n\t"
    659  "2:                                                        \n\t"
    660    ".set       pop                                          \n\t"
    661    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
    662      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
    663    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
    664    : "memory"
    665  );
    666 }
    667 
    668 //------------------------------------------------------------------------------
    669 // Entry point
    670 
    671 extern void VP8LDspInitMIPSdspR2(void);
    672 
    673 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
    674  VP8LMapColor32b = MapARGB_MIPSdspR2;
    675  VP8LMapColor8b = MapAlpha_MIPSdspR2;
    676 
    677  VP8LPredictors[5] = Predictor5_MIPSdspR2;
    678  VP8LPredictors[6] = Predictor6_MIPSdspR2;
    679  VP8LPredictors[7] = Predictor7_MIPSdspR2;
    680  VP8LPredictors[8] = Predictor8_MIPSdspR2;
    681  VP8LPredictors[9] = Predictor9_MIPSdspR2;
    682  VP8LPredictors[10] = Predictor10_MIPSdspR2;
    683  VP8LPredictors[11] = Predictor11_MIPSdspR2;
    684  VP8LPredictors[12] = Predictor12_MIPSdspR2;
    685  VP8LPredictors[13] = Predictor13_MIPSdspR2;
    686 
    687  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
    688  VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
    689 
    690  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
    691  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
    692  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
    693  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
    694  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
    695 }
    696 
    697 #else  // !WEBP_USE_MIPS_DSP_R2
    698 
    699 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
    700 
    701 #endif  // WEBP_USE_MIPS_DSP_R2