lossless_mips_dsp_r2.c (39181B)
1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Image transforms and color space conversion methods for lossless decoder. 11 // 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) 14 15 #include "src/dsp/dsp.h" 16 17 #if defined(WEBP_USE_MIPS_DSP_R2) 18 19 #include "src/dsp/lossless.h" 20 #include "src/dsp/lossless_common.h" 21 22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ 23 static void FUNC_NAME(const TYPE* src, \ 24 const uint32_t* const color_map, \ 25 TYPE* dst, int y_start, int y_end, \ 26 int width) { \ 27 int y; \ 28 for (y = y_start; y < y_end; ++y) { \ 29 int x; \ 30 for (x = 0; x < (width >> 2); ++x) { \ 31 int tmp1, tmp2, tmp3, tmp4; \ 32 __asm__ volatile ( \ 33 ".ifc " #TYPE ", uint8_t \n\t" \ 34 "lbu %[tmp1], 0(%[src]) \n\t" \ 35 "lbu %[tmp2], 1(%[src]) \n\t" \ 36 "lbu %[tmp3], 2(%[src]) \n\t" \ 37 "lbu %[tmp4], 3(%[src]) \n\t" \ 38 "addiu %[src], %[src], 4 \n\t" \ 39 ".endif \n\t" \ 40 ".ifc " #TYPE ", uint32_t \n\t" \ 41 "lw %[tmp1], 0(%[src]) \n\t" \ 42 "lw %[tmp2], 4(%[src]) \n\t" \ 43 "lw %[tmp3], 8(%[src]) \n\t" \ 44 "lw %[tmp4], 12(%[src]) \n\t" \ 45 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ 46 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ 47 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ 48 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ 49 "addiu %[src], %[src], 16 \n\t" \ 50 ".endif \n\t" \ 51 "sll %[tmp1], %[tmp1], 2 \n\t" \ 52 "sll %[tmp2], %[tmp2], 2 \n\t" \ 53 "sll %[tmp3], %[tmp3], 2 \n\t" \ 54 "sll %[tmp4], %[tmp4], 2 \n\t" \ 55 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ 56 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ 57 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ 58 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ 59 ".ifc " #TYPE ", uint8_t \n\t" \ 60 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ 61 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ 62 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ 63 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ 64 "sb %[tmp1], 0(%[dst]) \n\t" \ 65 "sb %[tmp2], 1(%[dst]) \n\t" \ 66 "sb %[tmp3], 2(%[dst]) \n\t" \ 67 "sb %[tmp4], 3(%[dst]) \n\t" \ 68 "addiu %[dst], %[dst], 4 \n\t" \ 69 ".endif \n\t" \ 70 ".ifc " #TYPE ", uint32_t \n\t" \ 71 "sw %[tmp1], 0(%[dst]) \n\t" \ 72 "sw %[tmp2], 4(%[dst]) \n\t" \ 73 "sw %[tmp3], 8(%[dst]) \n\t" \ 74 "sw %[tmp4], 12(%[dst]) \n\t" \ 75 "addiu %[dst], %[dst], 16 \n\t" \ 76 ".endif \n\t" \ 77 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ 78 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ 79 : [color_map]"r"(color_map) \ 80 : "memory" \ 81 ); \ 82 } \ 83 for (x = 0; x < (width & 3); ++x) { \ 84 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ 85 } \ 86 } \ 87 } 88 89 MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) 90 MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) 91 92 #undef MAP_COLOR_FUNCS 93 94 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, 95 uint32_t c2) { 96 int temp0, temp1, temp2, temp3, temp4, temp5; 97 __asm__ volatile ( 98 "preceu.ph.qbr %[temp1], %[c0] \n\t" 99 "preceu.ph.qbl %[temp2], %[c0] \n\t" 100 "preceu.ph.qbr %[temp3], %[c1] \n\t" 101 "preceu.ph.qbl %[temp4], %[c1] \n\t" 102 "preceu.ph.qbr %[temp5], %[c2] \n\t" 103 "preceu.ph.qbl %[temp0], %[c2] \n\t" 104 "subq.ph %[temp3], %[temp3], %[temp5] \n\t" 105 "subq.ph %[temp4], %[temp4], %[temp0] \n\t" 106 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" 107 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" 108 "shll_s.ph %[temp1], %[temp1], 7 \n\t" 109 "shll_s.ph %[temp2], %[temp2], 7 \n\t" 110 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t" 111 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 112 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5) 113 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) 114 : "memory" 115 ); 116 return temp2; 117 } 118 119 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, 120 uint32_t c2) { 121 int temp0, temp1, temp2, temp3, temp4, temp5; 122 __asm__ volatile ( 123 "adduh.qb %[temp5], %[c0], %[c1] \n\t" 124 "preceu.ph.qbr %[temp3], %[c2] \n\t" 125 "preceu.ph.qbr %[temp1], %[temp5] \n\t" 126 "preceu.ph.qbl %[temp2], %[temp5] \n\t" 127 "preceu.ph.qbl %[temp4], %[c2] \n\t" 128 "subq.ph %[temp3], %[temp1], %[temp3] \n\t" 129 "subq.ph %[temp4], %[temp2], %[temp4] \n\t" 130 "shrl.ph %[temp5], %[temp3], 15 \n\t" 131 "shrl.ph %[temp0], %[temp4], 15 \n\t" 132 "addq.ph %[temp3], %[temp3], %[temp5] \n\t" 133 "addq.ph %[temp4], %[temp0], %[temp4] \n\t" 134 "shra.ph %[temp3], %[temp3], 1 \n\t" 135 "shra.ph %[temp4], %[temp4], 1 \n\t" 136 "addq.ph %[temp1], %[temp1], %[temp3] \n\t" 137 "addq.ph %[temp2], %[temp2], %[temp4] \n\t" 138 "shll_s.ph %[temp1], %[temp1], 7 \n\t" 139 "shll_s.ph %[temp2], %[temp2], 7 \n\t" 140 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t" 141 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 142 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5) 143 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2) 144 : "memory" 145 ); 146 return temp1; 147 } 148 149 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 150 int temp0, temp1, temp2, temp3, temp4, temp5; 151 __asm__ volatile ( 152 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t" 153 "pick.qb %[temp1], %[b], %[c] \n\t" 154 "pick.qb %[temp2], %[c], %[b] \n\t" 155 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t" 156 "pick.qb %[temp4], %[a], %[c] \n\t" 157 "pick.qb %[temp5], %[c], %[a] \n\t" 158 "subu.qb %[temp3], %[temp1], %[temp2] \n\t" 159 "subu.qb %[temp0], %[temp4], %[temp5] \n\t" 160 "raddu.w.qb %[temp3], %[temp3] \n\t" 161 "raddu.w.qb %[temp0], %[temp0] \n\t" 162 "subu %[temp3], %[temp3], %[temp0] \n\t" 163 "slti %[temp0], %[temp3], 0x1 \n\t" 164 "movz %[a], %[b], %[temp0] \n\t" 165 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 166 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0), 167 [a]"+&r"(a) 168 : [b]"r"(b), [c]"r"(c) 169 ); 170 return a; 171 } 172 173 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 174 __asm__ volatile ( 175 "adduh.qb %[a0], %[a0], %[a1] \n\t" 176 : [a0]"+r"(a0) 177 : [a1]"r"(a1) 178 ); 179 return a0; 180 } 181 182 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 183 return Average2(Average2(a0, a2), a1); 184 } 185 186 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 187 uint32_t a2, uint32_t a3) { 188 return Average2(Average2(a0, a1), Average2(a2, a3)); 189 } 190 191 static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, 192 const uint32_t* const top) { 193 return Average3(*left, top[0], top[1]); 194 } 195 196 static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, 197 const uint32_t* const top) { 198 return Average2(*left, top[-1]); 199 } 200 201 static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, 202 const uint32_t* const top) { 203 return Average2(*left, top[0]); 204 } 205 206 static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, 207 const uint32_t* const top) { 208 (void)left; 209 return Average2(top[-1], top[0]); 210 } 211 212 static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, 213 const uint32_t* const top) { 214 (void)left; 215 return Average2(top[0], top[1]); 216 } 217 218 static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, 219 const uint32_t* const top) { 220 return Average4(*left, top[-1], top[0], top[1]); 221 } 222 223 static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, 224 const uint32_t* const top) { 225 return Select(top[0], *left, top[-1]); 226 } 227 228 static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, 229 const uint32_t* const top) { 230 return ClampedAddSubtractFull(*left, top[0], top[-1]); 231 } 232 233 static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, 234 const uint32_t* const top) { 235 return ClampedAddSubtractHalf(*left, top[0], top[-1]); 236 } 237 238 // Add green to blue and red channels (i.e. perform the inverse transform of 239 // 'subtract green'). 240 static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels, 241 uint32_t* dst) { 242 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 243 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 244 const uint32_t* const p_loop2_end = src + num_pixels; 245 __asm__ volatile ( 246 ".set push \n\t" 247 ".set noreorder \n\t" 248 "beq %[src], %[p_loop1_end], 3f \n\t" 249 " nop \n\t" 250 "0: \n\t" 251 "lw %[temp0], 0(%[src]) \n\t" 252 "lw %[temp1], 4(%[src]) \n\t" 253 "lw %[temp2], 8(%[src]) \n\t" 254 "lw %[temp3], 12(%[src]) \n\t" 255 "ext %[temp4], %[temp0], 8, 8 \n\t" 256 "ext %[temp5], %[temp1], 8, 8 \n\t" 257 "ext %[temp6], %[temp2], 8, 8 \n\t" 258 "ext %[temp7], %[temp3], 8, 8 \n\t" 259 "addiu %[src], %[src], 16 \n\t" 260 "addiu %[dst], %[dst], 16 \n\t" 261 "replv.ph %[temp4], %[temp4] \n\t" 262 "replv.ph %[temp5], %[temp5] \n\t" 263 "replv.ph %[temp6], %[temp6] \n\t" 264 "replv.ph %[temp7], %[temp7] \n\t" 265 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 266 "addu.qb %[temp1], %[temp1], %[temp5] \n\t" 267 "addu.qb %[temp2], %[temp2], %[temp6] \n\t" 268 "addu.qb %[temp3], %[temp3], %[temp7] \n\t" 269 "sw %[temp0], -16(%[dst]) \n\t" 270 "sw %[temp1], -12(%[dst]) \n\t" 271 "sw %[temp2], -8(%[dst]) \n\t" 272 "bne %[src], %[p_loop1_end], 0b \n\t" 273 " sw %[temp3], -4(%[dst]) \n\t" 274 "3: \n\t" 275 "beq %[src], %[p_loop2_end], 2f \n\t" 276 " nop \n\t" 277 "1: \n\t" 278 "lw %[temp0], 0(%[src]) \n\t" 279 "addiu %[src], %[src], 4 \n\t" 280 "addiu %[dst], %[dst], 4 \n\t" 281 "ext %[temp4], %[temp0], 8, 8 \n\t" 282 "replv.ph %[temp4], %[temp4] \n\t" 283 "addu.qb %[temp0], %[temp0], %[temp4] \n\t" 284 "bne %[src], %[p_loop2_end], 1b \n\t" 285 " sw %[temp0], -4(%[dst]) \n\t" 286 "2: \n\t" 287 ".set pop \n\t" 288 : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0), 289 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 290 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), 291 [temp7]"=&r"(temp7) 292 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 293 : "memory" 294 ); 295 } 296 297 static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m, 298 const uint32_t* src, int num_pixels, 299 uint32_t* dst) { 300 int temp0, temp1, temp2, temp3, temp4, temp5; 301 uint32_t argb, argb1, new_red; 302 const uint32_t G_to_R = m->green_to_red; 303 const uint32_t G_to_B = m->green_to_blue; 304 const uint32_t R_to_B = m->red_to_blue; 305 const uint32_t* const p_loop_end = src + (num_pixels & ~1); 306 __asm__ volatile ( 307 ".set push \n\t" 308 ".set noreorder \n\t" 309 "beq %[src], %[p_loop_end], 1f \n\t" 310 " nop \n\t" 311 "replv.ph %[temp0], %[G_to_R] \n\t" 312 "replv.ph %[temp1], %[G_to_B] \n\t" 313 "replv.ph %[temp2], %[R_to_B] \n\t" 314 "shll.ph %[temp0], %[temp0], 8 \n\t" 315 "shll.ph %[temp1], %[temp1], 8 \n\t" 316 "shll.ph %[temp2], %[temp2], 8 \n\t" 317 "shra.ph %[temp0], %[temp0], 8 \n\t" 318 "shra.ph %[temp1], %[temp1], 8 \n\t" 319 "shra.ph %[temp2], %[temp2], 8 \n\t" 320 "0: \n\t" 321 "lw %[argb], 0(%[src]) \n\t" 322 "lw %[argb1], 4(%[src]) \n\t" 323 "sw %[argb], 0(%[dst]) \n\t" 324 "sw %[argb1], 4(%[dst]) \n\t" 325 "addiu %[src], %[src], 8 \n\t" 326 "addiu %[dst], %[dst], 8 \n\t" 327 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" 328 "preceu.ph.qbra %[temp3], %[temp3] \n\t" 329 "shll.ph %[temp3], %[temp3], 8 \n\t" 330 "shra.ph %[temp3], %[temp3], 8 \n\t" 331 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" 332 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" 333 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" 334 "ins %[argb1], %[argb], 16, 16 \n\t" 335 "shra.ph %[temp5], %[temp5], 5 \n\t" 336 "shra.ph %[temp3], %[temp3], 5 \n\t" 337 "addu.ph %[new_red], %[new_red], %[temp5] \n\t" 338 "addu.ph %[argb1], %[argb1], %[temp3] \n\t" 339 "preceu.ph.qbra %[temp5], %[new_red] \n\t" 340 "shll.ph %[temp4], %[temp5], 8 \n\t" 341 "shra.ph %[temp4], %[temp4], 8 \n\t" 342 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" 343 "sb %[temp5], -2(%[dst]) \n\t" 344 "sra %[temp5], %[temp5], 16 \n\t" 345 "shra.ph %[temp4], %[temp4], 5 \n\t" 346 "addu.ph %[argb1], %[argb1], %[temp4] \n\t" 347 "preceu.ph.qbra %[temp3], %[argb1] \n\t" 348 "sb %[temp5], -6(%[dst]) \n\t" 349 "sb %[temp3], -4(%[dst]) \n\t" 350 "sra %[temp3], %[temp3], 16 \n\t" 351 "bne %[src], %[p_loop_end], 0b \n\t" 352 " sb %[temp3], -8(%[dst]) \n\t" 353 "1: \n\t" 354 ".set pop \n\t" 355 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 356 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 357 [new_red]"=&r"(new_red), [argb]"=&r"(argb), 358 [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src) 359 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), 360 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) 361 : "memory", "hi", "lo" 362 ); 363 364 // Fall-back to C-version for left-overs. 365 if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst); 366 } 367 368 static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src, 369 int num_pixels, uint8_t* dst) { 370 int temp0, temp1, temp2, temp3; 371 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 372 const uint32_t* const p_loop2_end = src + num_pixels; 373 __asm__ volatile ( 374 ".set push \n\t" 375 ".set noreorder \n\t" 376 "beq %[src], %[p_loop1_end], 3f \n\t" 377 " nop \n\t" 378 "0: \n\t" 379 "lw %[temp3], 12(%[src]) \n\t" 380 "lw %[temp2], 8(%[src]) \n\t" 381 "lw %[temp1], 4(%[src]) \n\t" 382 "lw %[temp0], 0(%[src]) \n\t" 383 "ins %[temp3], %[temp2], 24, 8 \n\t" 384 "sll %[temp2], %[temp2], 8 \n\t" 385 "rotr %[temp3], %[temp3], 16 \n\t" 386 "ins %[temp2], %[temp1], 0, 16 \n\t" 387 "sll %[temp1], %[temp1], 8 \n\t" 388 "wsbh %[temp3], %[temp3] \n\t" 389 "balign %[temp0], %[temp1], 1 \n\t" 390 "wsbh %[temp2], %[temp2] \n\t" 391 "wsbh %[temp0], %[temp0] \n\t" 392 "usw %[temp3], 8(%[dst]) \n\t" 393 "rotr %[temp0], %[temp0], 16 \n\t" 394 "usw %[temp2], 4(%[dst]) \n\t" 395 "addiu %[src], %[src], 16 \n\t" 396 "usw %[temp0], 0(%[dst]) \n\t" 397 "bne %[src], %[p_loop1_end], 0b \n\t" 398 " addiu %[dst], %[dst], 12 \n\t" 399 "3: \n\t" 400 "beq %[src], %[p_loop2_end], 2f \n\t" 401 " nop \n\t" 402 "1: \n\t" 403 "lw %[temp0], 0(%[src]) \n\t" 404 "addiu %[src], %[src], 4 \n\t" 405 "wsbh %[temp1], %[temp0] \n\t" 406 "addiu %[dst], %[dst], 3 \n\t" 407 "ush %[temp1], -2(%[dst]) \n\t" 408 "sra %[temp0], %[temp0], 16 \n\t" 409 "bne %[src], %[p_loop2_end], 1b \n\t" 410 " sb %[temp0], -3(%[dst]) \n\t" 411 "2: \n\t" 412 ".set pop \n\t" 413 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 414 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 415 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 416 : "memory" 417 ); 418 } 419 420 static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src, 421 int num_pixels, uint8_t* dst) { 422 int temp0, temp1, temp2, temp3; 423 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 424 const uint32_t* const p_loop2_end = src + num_pixels; 425 __asm__ volatile ( 426 ".set push \n\t" 427 ".set noreorder \n\t" 428 "beq %[src], %[p_loop1_end], 3f \n\t" 429 " nop \n\t" 430 "0: \n\t" 431 "lw %[temp0], 0(%[src]) \n\t" 432 "lw %[temp1], 4(%[src]) \n\t" 433 "lw %[temp2], 8(%[src]) \n\t" 434 "lw %[temp3], 12(%[src]) \n\t" 435 "wsbh %[temp0], %[temp0] \n\t" 436 "wsbh %[temp1], %[temp1] \n\t" 437 "wsbh %[temp2], %[temp2] \n\t" 438 "wsbh %[temp3], %[temp3] \n\t" 439 "addiu %[src], %[src], 16 \n\t" 440 "balign %[temp0], %[temp0], 1 \n\t" 441 "balign %[temp1], %[temp1], 1 \n\t" 442 "balign %[temp2], %[temp2], 1 \n\t" 443 "balign %[temp3], %[temp3], 1 \n\t" 444 "usw %[temp0], 0(%[dst]) \n\t" 445 "usw %[temp1], 4(%[dst]) \n\t" 446 "usw %[temp2], 8(%[dst]) \n\t" 447 "usw %[temp3], 12(%[dst]) \n\t" 448 "bne %[src], %[p_loop1_end], 0b \n\t" 449 " addiu %[dst], %[dst], 16 \n\t" 450 "3: \n\t" 451 "beq %[src], %[p_loop2_end], 2f \n\t" 452 " nop \n\t" 453 "1: \n\t" 454 "lw %[temp0], 0(%[src]) \n\t" 455 "wsbh %[temp0], %[temp0] \n\t" 456 "addiu %[src], %[src], 4 \n\t" 457 "balign %[temp0], %[temp0], 1 \n\t" 458 "usw %[temp0], 0(%[dst]) \n\t" 459 "bne %[src], %[p_loop2_end], 1b \n\t" 460 " addiu %[dst], %[dst], 4 \n\t" 461 "2: \n\t" 462 ".set pop \n\t" 463 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 464 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 465 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 466 : "memory" 467 ); 468 } 469 470 static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src, 471 int num_pixels, uint8_t* dst) { 472 int temp0, temp1, temp2, temp3, temp4, temp5; 473 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 474 const uint32_t* const p_loop2_end = src + num_pixels; 475 __asm__ volatile ( 476 ".set push \n\t" 477 ".set noreorder \n\t" 478 "beq %[src], %[p_loop1_end], 3f \n\t" 479 " nop \n\t" 480 "0: \n\t" 481 "lw %[temp0], 0(%[src]) \n\t" 482 "lw %[temp1], 4(%[src]) \n\t" 483 "lw %[temp2], 8(%[src]) \n\t" 484 "lw %[temp3], 12(%[src]) \n\t" 485 "ext %[temp4], %[temp0], 28, 4 \n\t" 486 "ext %[temp5], %[temp0], 12, 4 \n\t" 487 "ins %[temp0], %[temp4], 0, 4 \n\t" 488 "ext %[temp4], %[temp1], 28, 4 \n\t" 489 "ins %[temp0], %[temp5], 16, 4 \n\t" 490 "ext %[temp5], %[temp1], 12, 4 \n\t" 491 "ins %[temp1], %[temp4], 0, 4 \n\t" 492 "ext %[temp4], %[temp2], 28, 4 \n\t" 493 "ins %[temp1], %[temp5], 16, 4 \n\t" 494 "ext %[temp5], %[temp2], 12, 4 \n\t" 495 "ins %[temp2], %[temp4], 0, 4 \n\t" 496 "ext %[temp4], %[temp3], 28, 4 \n\t" 497 "ins %[temp2], %[temp5], 16, 4 \n\t" 498 "ext %[temp5], %[temp3], 12, 4 \n\t" 499 "ins %[temp3], %[temp4], 0, 4 \n\t" 500 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t" 501 "ins %[temp3], %[temp5], 16, 4 \n\t" 502 "addiu %[src], %[src], 16 \n\t" 503 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" 504 #if (WEBP_SWAP_16BIT_CSP == 1) 505 "usw %[temp1], 0(%[dst]) \n\t" 506 "usw %[temp3], 4(%[dst]) \n\t" 507 #else 508 "wsbh %[temp1], %[temp1] \n\t" 509 "wsbh %[temp3], %[temp3] \n\t" 510 "usw %[temp1], 0(%[dst]) \n\t" 511 "usw %[temp3], 4(%[dst]) \n\t" 512 #endif 513 "bne %[src], %[p_loop1_end], 0b \n\t" 514 " addiu %[dst], %[dst], 8 \n\t" 515 "3: \n\t" 516 "beq %[src], %[p_loop2_end], 2f \n\t" 517 " nop \n\t" 518 "1: \n\t" 519 "lw %[temp0], 0(%[src]) \n\t" 520 "ext %[temp4], %[temp0], 28, 4 \n\t" 521 "ext %[temp5], %[temp0], 12, 4 \n\t" 522 "ins %[temp0], %[temp4], 0, 4 \n\t" 523 "ins %[temp0], %[temp5], 16, 4 \n\t" 524 "addiu %[src], %[src], 4 \n\t" 525 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" 526 #if (WEBP_SWAP_16BIT_CSP == 1) 527 "ush %[temp0], 0(%[dst]) \n\t" 528 #else 529 "wsbh %[temp0], %[temp0] \n\t" 530 "ush %[temp0], 0(%[dst]) \n\t" 531 #endif 532 "bne %[src], %[p_loop2_end], 1b \n\t" 533 " addiu %[dst], %[dst], 2 \n\t" 534 "2: \n\t" 535 ".set pop \n\t" 536 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 537 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 538 [dst]"+&r"(dst), [src]"+&r"(src) 539 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 540 : "memory" 541 ); 542 } 543 544 static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src, 545 int num_pixels, uint8_t* dst) { 546 int temp0, temp1, temp2, temp3, temp4, temp5; 547 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 548 const uint32_t* const p_loop2_end = src + num_pixels; 549 __asm__ volatile ( 550 ".set push \n\t" 551 ".set noreorder \n\t" 552 "beq %[src], %[p_loop1_end], 3f \n\t" 553 " nop \n\t" 554 "0: \n\t" 555 "lw %[temp0], 0(%[src]) \n\t" 556 "lw %[temp1], 4(%[src]) \n\t" 557 "lw %[temp2], 8(%[src]) \n\t" 558 "lw %[temp3], 12(%[src]) \n\t" 559 "ext %[temp4], %[temp0], 8, 16 \n\t" 560 "ext %[temp5], %[temp0], 5, 11 \n\t" 561 "ext %[temp0], %[temp0], 3, 5 \n\t" 562 "ins %[temp4], %[temp5], 0, 11 \n\t" 563 "ext %[temp5], %[temp1], 5, 11 \n\t" 564 "ins %[temp4], %[temp0], 0, 5 \n\t" 565 "ext %[temp0], %[temp1], 8, 16 \n\t" 566 "ext %[temp1], %[temp1], 3, 5 \n\t" 567 "ins %[temp0], %[temp5], 0, 11 \n\t" 568 "ext %[temp5], %[temp2], 5, 11 \n\t" 569 "ins %[temp0], %[temp1], 0, 5 \n\t" 570 "ext %[temp1], %[temp2], 8, 16 \n\t" 571 "ext %[temp2], %[temp2], 3, 5 \n\t" 572 "ins %[temp1], %[temp5], 0, 11 \n\t" 573 "ext %[temp5], %[temp3], 5, 11 \n\t" 574 "ins %[temp1], %[temp2], 0, 5 \n\t" 575 "ext %[temp2], %[temp3], 8, 16 \n\t" 576 "ext %[temp3], %[temp3], 3, 5 \n\t" 577 "ins %[temp2], %[temp5], 0, 11 \n\t" 578 "append %[temp0], %[temp4], 16 \n\t" 579 "ins %[temp2], %[temp3], 0, 5 \n\t" 580 "addiu %[src], %[src], 16 \n\t" 581 "append %[temp2], %[temp1], 16 \n\t" 582 #if (WEBP_SWAP_16BIT_CSP == 1) 583 "usw %[temp0], 0(%[dst]) \n\t" 584 "usw %[temp2], 4(%[dst]) \n\t" 585 #else 586 "wsbh %[temp0], %[temp0] \n\t" 587 "wsbh %[temp2], %[temp2] \n\t" 588 "usw %[temp0], 0(%[dst]) \n\t" 589 "usw %[temp2], 4(%[dst]) \n\t" 590 #endif 591 "bne %[src], %[p_loop1_end], 0b \n\t" 592 " addiu %[dst], %[dst], 8 \n\t" 593 "3: \n\t" 594 "beq %[src], %[p_loop2_end], 2f \n\t" 595 " nop \n\t" 596 "1: \n\t" 597 "lw %[temp0], 0(%[src]) \n\t" 598 "ext %[temp4], %[temp0], 8, 16 \n\t" 599 "ext %[temp5], %[temp0], 5, 11 \n\t" 600 "ext %[temp0], %[temp0], 3, 5 \n\t" 601 "ins %[temp4], %[temp5], 0, 11 \n\t" 602 "addiu %[src], %[src], 4 \n\t" 603 "ins %[temp4], %[temp0], 0, 5 \n\t" 604 #if (WEBP_SWAP_16BIT_CSP == 1) 605 "ush %[temp4], 0(%[dst]) \n\t" 606 #else 607 "wsbh %[temp4], %[temp4] \n\t" 608 "ush %[temp4], 0(%[dst]) \n\t" 609 #endif 610 "bne %[src], %[p_loop2_end], 1b \n\t" 611 " addiu %[dst], %[dst], 2 \n\t" 612 "2: \n\t" 613 ".set pop \n\t" 614 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 615 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 616 [dst]"+&r"(dst), [src]"+&r"(src) 617 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 618 : "memory" 619 ); 620 } 621 622 static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src, 623 int num_pixels, uint8_t* dst) { 624 int temp0, temp1, temp2, temp3; 625 const uint32_t* const p_loop1_end = src + (num_pixels & ~3); 626 const uint32_t* const p_loop2_end = src + num_pixels; 627 __asm__ volatile ( 628 ".set push \n\t" 629 ".set noreorder \n\t" 630 "beq %[src], %[p_loop1_end], 3f \n\t" 631 " nop \n\t" 632 "0: \n\t" 633 "lw %[temp0], 0(%[src]) \n\t" 634 "lw %[temp1], 4(%[src]) \n\t" 635 "lw %[temp2], 8(%[src]) \n\t" 636 "lw %[temp3], 12(%[src]) \n\t" 637 "ins %[temp0], %[temp1], 24, 8 \n\t" 638 "sra %[temp1], %[temp1], 8 \n\t" 639 "ins %[temp1], %[temp2], 16, 16 \n\t" 640 "sll %[temp2], %[temp2], 8 \n\t" 641 "balign %[temp3], %[temp2], 1 \n\t" 642 "addiu %[src], %[src], 16 \n\t" 643 "usw %[temp0], 0(%[dst]) \n\t" 644 "usw %[temp1], 4(%[dst]) \n\t" 645 "usw %[temp3], 8(%[dst]) \n\t" 646 "bne %[src], %[p_loop1_end], 0b \n\t" 647 " addiu %[dst], %[dst], 12 \n\t" 648 "3: \n\t" 649 "beq %[src], %[p_loop2_end], 2f \n\t" 650 " nop \n\t" 651 "1: \n\t" 652 "lw %[temp0], 0(%[src]) \n\t" 653 "addiu %[src], %[src], 4 \n\t" 654 "addiu %[dst], %[dst], 3 \n\t" 655 "ush %[temp0], -3(%[dst]) \n\t" 656 "sra %[temp0], %[temp0], 16 \n\t" 657 "bne %[src], %[p_loop2_end], 1b \n\t" 658 " sb %[temp0], -1(%[dst]) \n\t" 659 "2: \n\t" 660 ".set pop \n\t" 661 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 662 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src) 663 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 664 : "memory" 665 ); 666 } 667 668 //------------------------------------------------------------------------------ 669 // Entry point 670 671 extern void VP8LDspInitMIPSdspR2(void); 672 673 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) { 674 VP8LMapColor32b = MapARGB_MIPSdspR2; 675 VP8LMapColor8b = MapAlpha_MIPSdspR2; 676 677 VP8LPredictors[5] = Predictor5_MIPSdspR2; 678 VP8LPredictors[6] = Predictor6_MIPSdspR2; 679 VP8LPredictors[7] = Predictor7_MIPSdspR2; 680 VP8LPredictors[8] = Predictor8_MIPSdspR2; 681 VP8LPredictors[9] = Predictor9_MIPSdspR2; 682 VP8LPredictors[10] = Predictor10_MIPSdspR2; 683 VP8LPredictors[11] = Predictor11_MIPSdspR2; 684 VP8LPredictors[12] = Predictor12_MIPSdspR2; 685 VP8LPredictors[13] = Predictor13_MIPSdspR2; 686 687 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2; 688 VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2; 689 690 VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2; 691 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2; 692 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2; 693 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2; 694 VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2; 695 } 696 697 #else // !WEBP_USE_MIPS_DSP_R2 698 699 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) 700 701 #endif // WEBP_USE_MIPS_DSP_R2