lossless_enc_mips_dsp_r2.c (13591B)
1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Image transform methods for lossless encoder. 11 // 12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) 13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com) 14 15 #include "src/dsp/dsp.h" 16 17 #if defined(WEBP_USE_MIPS_DSP_R2) 18 19 #include "src/dsp/lossless.h" 20 21 static void SubtractGreenFromBlueAndRed_MIPSdspR2(uint32_t* argb_data, 22 int num_pixels) { 23 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; 24 uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3); 25 uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3); 26 __asm__ volatile ( 27 ".set push \n\t" 28 ".set noreorder \n\t" 29 "beq %[argb_data], %[p_loop1_end], 3f \n\t" 30 " nop \n\t" 31 "0: \n\t" 32 "lw %[temp0], 0(%[argb_data]) \n\t" 33 "lw %[temp1], 4(%[argb_data]) \n\t" 34 "lw %[temp2], 8(%[argb_data]) \n\t" 35 "lw %[temp3], 12(%[argb_data]) \n\t" 36 "ext %[temp4], %[temp0], 8, 8 \n\t" 37 "ext %[temp5], %[temp1], 8, 8 \n\t" 38 "ext %[temp6], %[temp2], 8, 8 \n\t" 39 "ext %[temp7], %[temp3], 8, 8 \n\t" 40 "addiu %[argb_data], %[argb_data], 16 \n\t" 41 "replv.ph %[temp4], %[temp4] \n\t" 42 "replv.ph %[temp5], %[temp5] \n\t" 43 "replv.ph %[temp6], %[temp6] \n\t" 44 "replv.ph %[temp7], %[temp7] \n\t" 45 "subu.qb %[temp0], %[temp0], %[temp4] \n\t" 46 "subu.qb %[temp1], %[temp1], %[temp5] \n\t" 47 "subu.qb %[temp2], %[temp2], %[temp6] \n\t" 48 "subu.qb %[temp3], %[temp3], %[temp7] \n\t" 49 "sw %[temp0], -16(%[argb_data]) \n\t" 50 "sw %[temp1], -12(%[argb_data]) \n\t" 51 "sw %[temp2], -8(%[argb_data]) \n\t" 52 "bne %[argb_data], %[p_loop1_end], 0b \n\t" 53 " sw %[temp3], -4(%[argb_data]) \n\t" 54 "3: \n\t" 55 "beq %[argb_data], %[p_loop2_end], 2f \n\t" 56 " nop \n\t" 57 "1: \n\t" 58 "lw %[temp0], 0(%[argb_data]) \n\t" 59 "addiu %[argb_data], %[argb_data], 4 \n\t" 60 "ext %[temp4], %[temp0], 8, 8 \n\t" 61 "replv.ph %[temp4], %[temp4] \n\t" 62 "subu.qb %[temp0], %[temp0], %[temp4] \n\t" 63 "bne %[argb_data], %[p_loop2_end], 1b \n\t" 64 " sw %[temp0], -4(%[argb_data]) \n\t" 65 "2: \n\t" 66 ".set pop \n\t" 67 : [argb_data]"+&r"(argb_data), [temp0]"=&r"(temp0), 68 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), 69 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), 70 [temp7]"=&r"(temp7) 71 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end) 72 : "memory" 73 ); 74 } 75 76 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, 77 int8_t color) { 78 return (uint32_t)((int)(color_pred) * color) >> 5; 79 } 80 81 static void TransformColor_MIPSdspR2( 82 const VP8LMultipliers* WEBP_RESTRICT const m, uint32_t* WEBP_RESTRICT data, 83 int num_pixels) { 84 int temp0, temp1, temp2, temp3, temp4, temp5; 85 uint32_t argb, argb1, new_red, new_red1; 86 const uint32_t G_to_R = m->green_to_red; 87 const uint32_t G_to_B = m->green_to_blue; 88 const uint32_t R_to_B = m->red_to_blue; 89 uint32_t* const p_loop_end = data + (num_pixels & ~1); 90 __asm__ volatile ( 91 ".set push \n\t" 92 ".set noreorder \n\t" 93 "beq %[data], %[p_loop_end], 1f \n\t" 94 " nop \n\t" 95 "replv.ph %[temp0], %[G_to_R] \n\t" 96 "replv.ph %[temp1], %[G_to_B] \n\t" 97 "replv.ph %[temp2], %[R_to_B] \n\t" 98 "shll.ph %[temp0], %[temp0], 8 \n\t" 99 "shll.ph %[temp1], %[temp1], 8 \n\t" 100 "shll.ph %[temp2], %[temp2], 8 \n\t" 101 "shra.ph %[temp0], %[temp0], 8 \n\t" 102 "shra.ph %[temp1], %[temp1], 8 \n\t" 103 "shra.ph %[temp2], %[temp2], 8 \n\t" 104 "0: \n\t" 105 "lw %[argb], 0(%[data]) \n\t" 106 "lw %[argb1], 4(%[data]) \n\t" 107 "lhu %[new_red], 2(%[data]) \n\t" 108 "lhu %[new_red1], 6(%[data]) \n\t" 109 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" 110 "precr.qb.ph %[temp4], %[argb], %[argb1] \n\t" 111 "preceu.ph.qbra %[temp3], %[temp3] \n\t" 112 "preceu.ph.qbla %[temp4], %[temp4] \n\t" 113 "shll.ph %[temp3], %[temp3], 8 \n\t" 114 "shll.ph %[temp4], %[temp4], 8 \n\t" 115 "shra.ph %[temp3], %[temp3], 8 \n\t" 116 "shra.ph %[temp4], %[temp4], 8 \n\t" 117 "mul.ph %[temp5], %[temp3], %[temp0] \n\t" 118 "mul.ph %[temp3], %[temp3], %[temp1] \n\t" 119 "mul.ph %[temp4], %[temp4], %[temp2] \n\t" 120 "addiu %[data], %[data], 8 \n\t" 121 "ins %[new_red1], %[new_red], 16, 16 \n\t" 122 "ins %[argb1], %[argb], 16, 16 \n\t" 123 "shra.ph %[temp5], %[temp5], 5 \n\t" 124 "shra.ph %[temp3], %[temp3], 5 \n\t" 125 "shra.ph %[temp4], %[temp4], 5 \n\t" 126 "subu.ph %[new_red1], %[new_red1], %[temp5] \n\t" 127 "subu.ph %[argb1], %[argb1], %[temp3] \n\t" 128 "preceu.ph.qbra %[temp5], %[new_red1] \n\t" 129 "subu.ph %[argb1], %[argb1], %[temp4] \n\t" 130 "preceu.ph.qbra %[temp3], %[argb1] \n\t" 131 "sb %[temp5], -2(%[data]) \n\t" 132 "sb %[temp3], -4(%[data]) \n\t" 133 "sra %[temp5], %[temp5], 16 \n\t" 134 "sra %[temp3], %[temp3], 16 \n\t" 135 "sb %[temp5], -6(%[data]) \n\t" 136 "bne %[data], %[p_loop_end], 0b \n\t" 137 " sb %[temp3], -8(%[data]) \n\t" 138 "1: \n\t" 139 ".set pop \n\t" 140 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), 141 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), 142 [new_red1]"=&r"(new_red1), [new_red]"=&r"(new_red), 143 [argb]"=&r"(argb), [argb1]"=&r"(argb1), [data]"+&r"(data) 144 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B), 145 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end) 146 : "memory", "hi", "lo" 147 ); 148 149 if (num_pixels & 1) { 150 const uint32_t argb_ = data[0]; 151 const uint32_t green = argb_ >> 8; 152 const uint32_t red = argb_ >> 16; 153 uint32_t new_blue = argb_; 154 new_red = red; 155 new_red -= ColorTransformDelta(m->green_to_red, green); 156 new_red &= 0xff; 157 new_blue -= ColorTransformDelta(m->green_to_blue, green); 158 new_blue -= ColorTransformDelta(m->red_to_blue, red); 159 new_blue &= 0xff; 160 data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue); 161 } 162 } 163 164 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, 165 uint8_t red_to_blue, 166 uint32_t argb) { 167 const uint32_t green = argb >> 8; 168 const uint32_t red = argb >> 16; 169 uint8_t new_blue = argb; 170 new_blue -= ColorTransformDelta(green_to_blue, green); 171 new_blue -= ColorTransformDelta(red_to_blue, red); 172 return (new_blue & 0xff); 173 } 174 175 static void CollectColorBlueTransforms_MIPSdspR2( 176 const uint32_t* WEBP_RESTRICT argb, int stride, 177 int tile_width, int tile_height, 178 int green_to_blue, int red_to_blue, uint32_t histo[]) { 179 const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); 180 const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); 181 const uint32_t mask = 0xff00ffu; 182 while (tile_height-- > 0) { 183 int x; 184 const uint32_t* p_argb = argb; 185 argb += stride; 186 for (x = 0; x < (tile_width >> 1); ++x) { 187 int temp0, temp1, temp2, temp3, temp4, temp5, temp6; 188 __asm__ volatile ( 189 "lw %[temp0], 0(%[p_argb]) \n\t" 190 "lw %[temp1], 4(%[p_argb]) \n\t" 191 "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t" 192 "ins %[temp1], %[temp0], 16, 16 \n\t" 193 "shra.ph %[temp2], %[temp2], 8 \n\t" 194 "shra.ph %[temp3], %[temp1], 8 \n\t" 195 "mul.ph %[temp5], %[temp2], %[rtb] \n\t" 196 "mul.ph %[temp6], %[temp3], %[gtb] \n\t" 197 "and %[temp4], %[temp1], %[mask] \n\t" 198 "addiu %[p_argb], %[p_argb], 8 \n\t" 199 "shra.ph %[temp5], %[temp5], 5 \n\t" 200 "shra.ph %[temp6], %[temp6], 5 \n\t" 201 "subu.qb %[temp2], %[temp4], %[temp5] \n\t" 202 "subu.qb %[temp2], %[temp2], %[temp6] \n\t" 203 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 204 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), 205 [temp5]"=&r"(temp5), [temp6]"=&r"(temp6) 206 : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask) 207 : "memory", "hi", "lo" 208 ); 209 ++histo[(uint8_t)(temp2 >> 16)]; 210 ++histo[(uint8_t)temp2]; 211 } 212 if (tile_width & 1) { 213 ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)]; 214 } 215 } 216 } 217 218 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, 219 uint32_t argb) { 220 const uint32_t green = argb >> 8; 221 uint32_t new_red = argb >> 16; 222 new_red -= ColorTransformDelta(green_to_red, green); 223 return (new_red & 0xff); 224 } 225 226 static void CollectColorRedTransforms_MIPSdspR2( 227 const uint32_t* WEBP_RESTRICT argb, int stride, 228 int tile_width, int tile_height, int green_to_red, uint32_t histo[]) { 229 const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); 230 while (tile_height-- > 0) { 231 int x; 232 const uint32_t* p_argb = argb; 233 argb += stride; 234 for (x = 0; x < (tile_width >> 1); ++x) { 235 int temp0, temp1, temp2, temp3, temp4; 236 __asm__ volatile ( 237 "lw %[temp0], 0(%[p_argb]) \n\t" 238 "lw %[temp1], 4(%[p_argb]) \n\t" 239 "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t" 240 "ins %[temp1], %[temp0], 16, 16 \n\t" 241 "shra.ph %[temp3], %[temp1], 8 \n\t" 242 "mul.ph %[temp2], %[temp3], %[gtr] \n\t" 243 "addiu %[p_argb], %[p_argb], 8 \n\t" 244 "shra.ph %[temp2], %[temp2], 5 \n\t" 245 "subu.qb %[temp2], %[temp4], %[temp2] \n\t" 246 : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), 247 [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4) 248 : [gtr]"r"(gtr) 249 : "memory", "hi", "lo" 250 ); 251 ++histo[(uint8_t)(temp2 >> 16)]; 252 ++histo[(uint8_t)temp2]; 253 } 254 if (tile_width & 1) { 255 ++histo[TransformColorRed(green_to_red, *p_argb)]; 256 } 257 } 258 } 259 260 //------------------------------------------------------------------------------ 261 // Entry point 262 263 extern void VP8LEncDspInitMIPSdspR2(void); 264 265 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) { 266 VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_MIPSdspR2; 267 VP8LTransformColor = TransformColor_MIPSdspR2; 268 VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_MIPSdspR2; 269 VP8LCollectColorRedTransforms = CollectColorRedTransforms_MIPSdspR2; 270 } 271 272 #else // !WEBP_USE_MIPS_DSP_R2 273 274 WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2) 275 276 #endif // WEBP_USE_MIPS_DSP_R2