upsampling_mips_dsp_r2.c (13794B)
1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // YUV to RGB upsampling functions. 11 // 12 // Author(s): Branimir Vasic (branimir.vasic@imgtec.com) 13 // Djordje Pesut (djordje.pesut@imgtec.com) 14 15 #include "src/dsp/dsp.h" 16 17 #if defined(WEBP_USE_MIPS_DSP_R2) 18 19 #include <assert.h> 20 #include "src/dsp/yuv.h" 21 22 #define YUV_TO_RGB(Y, U, V, R, G, B) do { \ 23 const int t1 = MultHi(Y, 19077); \ 24 const int t2 = MultHi(V, 13320); \ 25 R = MultHi(V, 26149); \ 26 G = MultHi(U, 6419); \ 27 B = MultHi(U, 33050); \ 28 R = t1 + R; \ 29 G = t1 - G; \ 30 B = t1 + B; \ 31 R = R - 14234; \ 32 G = G - t2 + 8708; \ 33 B = B - 17685; \ 34 __asm__ volatile ( \ 35 "shll_s.w %[" #R "], %[" #R "], 17 \n\t" \ 36 "shll_s.w %[" #G "], %[" #G "], 17 \n\t" \ 37 "shll_s.w %[" #B "], %[" #B "], 17 \n\t" \ 38 "precrqu_s.qb.ph %[" #R "], %[" #R "], $zero \n\t" \ 39 "precrqu_s.qb.ph %[" #G "], %[" #G "], $zero \n\t" \ 40 "precrqu_s.qb.ph %[" #B "], %[" #B "], $zero \n\t" \ 41 "srl %[" #R "], %[" #R "], 24 \n\t" \ 42 "srl %[" #G "], %[" #G "], 24 \n\t" \ 43 "srl %[" #B "], %[" #B "], 24 \n\t" \ 44 : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B) \ 45 : \ 46 ); \ 47 } while (0) 48 49 #if !defined(WEBP_REDUCE_CSP) 50 static WEBP_INLINE void YuvToRgb(int y, int u, int v, uint8_t* const rgb) { 51 int r, g, b; 52 YUV_TO_RGB(y, u, v, r, g, b); 53 rgb[0] = r; 54 rgb[1] = g; 55 rgb[2] = b; 56 } 57 static WEBP_INLINE void YuvToBgr(int y, int u, int v, uint8_t* const bgr) { 58 int r, g, b; 59 YUV_TO_RGB(y, u, v, r, g, b); 60 bgr[0] = b; 61 bgr[1] = g; 62 bgr[2] = r; 63 } 64 static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) { 65 int r, g, b; 66 YUV_TO_RGB(y, u, v, r, g, b); 67 { 68 const int rg = (r & 0xf8) | (g >> 5); 69 const int gb = ((g << 3) & 0xe0) | (b >> 3); 70 #if (WEBP_SWAP_16BIT_CSP == 1) 71 rgb[0] = gb; 72 rgb[1] = rg; 73 #else 74 rgb[0] = rg; 75 rgb[1] = gb; 76 #endif 77 } 78 } 79 static WEBP_INLINE void YuvToRgba4444(int y, int u, int v, 80 uint8_t* const argb) { 81 int r, g, b; 82 YUV_TO_RGB(y, u, v, r, g, b); 83 { 84 const int rg = (r & 0xf0) | (g >> 4); 85 const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits 86 #if (WEBP_SWAP_16BIT_CSP == 1) 87 argb[0] = ba; 88 argb[1] = rg; 89 #else 90 argb[0] = rg; 91 argb[1] = ba; 92 #endif 93 } 94 } 95 #endif // WEBP_REDUCE_CSP 96 97 //----------------------------------------------------------------------------- 98 // Alpha handling variants 99 100 #if !defined(WEBP_REDUCE_CSP) 101 static WEBP_INLINE void YuvToArgb(uint8_t y, uint8_t u, uint8_t v, 102 uint8_t* const argb) { 103 int r, g, b; 104 YUV_TO_RGB(y, u, v, r, g, b); 105 argb[0] = 0xff; 106 argb[1] = r; 107 argb[2] = g; 108 argb[3] = b; 109 } 110 #endif // WEBP_REDUCE_CSP 111 static WEBP_INLINE void YuvToBgra(uint8_t y, uint8_t u, uint8_t v, 112 uint8_t* const bgra) { 113 int r, g, b; 114 YUV_TO_RGB(y, u, v, r, g, b); 115 bgra[0] = b; 116 bgra[1] = g; 117 bgra[2] = r; 118 bgra[3] = 0xff; 119 } 120 static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v, 121 uint8_t* const rgba) { 122 int r, g, b; 123 YUV_TO_RGB(y, u, v, r, g, b); 124 rgba[0] = r; 125 rgba[1] = g; 126 rgba[2] = b; 127 rgba[3] = 0xff; 128 } 129 130 //------------------------------------------------------------------------------ 131 // Fancy upsampler 132 133 #ifdef FANCY_UPSAMPLING 134 135 // Given samples laid out in a square as: 136 // [a b] 137 // [c d] 138 // we interpolate u/v as: 139 // ([9*a + 3*b + 3*c + d 3*a + 9*b + 3*c + d] + [8 8]) / 16 140 // ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16 141 142 // We process u and v together stashed into 32bit (16bit each). 143 #define LOAD_UV(u, v) ((u) | ((v) << 16)) 144 145 #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \ 146 static void FUNC_NAME(const uint8_t* WEBP_RESTRICT top_y, \ 147 const uint8_t* WEBP_RESTRICT bottom_y, \ 148 const uint8_t* WEBP_RESTRICT top_u, \ 149 const uint8_t* WEBP_RESTRICT top_v, \ 150 const uint8_t* WEBP_RESTRICT cur_u, \ 151 const uint8_t* WEBP_RESTRICT cur_v, \ 152 uint8_t* WEBP_RESTRICT top_dst, \ 153 uint8_t* WEBP_RESTRICT bottom_dst, int len) { \ 154 int x; \ 155 const int last_pixel_pair = (len - 1) >> 1; \ 156 uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]); /* top-left sample */ \ 157 uint32_t l_uv = LOAD_UV(cur_u[0], cur_v[0]); /* left-sample */ \ 158 assert(top_y != NULL); \ 159 { \ 160 const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ 161 FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ 162 } \ 163 if (bottom_y != NULL) { \ 164 const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ 165 FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst); \ 166 } \ 167 for (x = 1; x <= last_pixel_pair; ++x) { \ 168 const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]); /* top sample */ \ 169 const uint32_t uv = LOAD_UV(cur_u[x], cur_v[x]); /* sample */ \ 170 /* precompute invariant values associated with first and second diagonals*/\ 171 const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u; \ 172 const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3; \ 173 const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3; \ 174 { \ 175 const uint32_t uv0 = (diag_12 + tl_uv) >> 1; \ 176 const uint32_t uv1 = (diag_03 + t_uv) >> 1; \ 177 FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ 178 top_dst + (2 * x - 1) * XSTEP); \ 179 FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16), \ 180 top_dst + (2 * x - 0) * XSTEP); \ 181 } \ 182 if (bottom_y != NULL) { \ 183 const uint32_t uv0 = (diag_03 + l_uv) >> 1; \ 184 const uint32_t uv1 = (diag_12 + uv) >> 1; \ 185 FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16), \ 186 bottom_dst + (2 * x - 1) * XSTEP); \ 187 FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16), \ 188 bottom_dst + (2 * x + 0) * XSTEP); \ 189 } \ 190 tl_uv = t_uv; \ 191 l_uv = uv; \ 192 } \ 193 if (!(len & 1)) { \ 194 { \ 195 const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ 196 FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ 197 top_dst + (len - 1) * XSTEP); \ 198 } \ 199 if (bottom_y != NULL) { \ 200 const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2; \ 201 FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16), \ 202 bottom_dst + (len - 1) * XSTEP); \ 203 } \ 204 } \ 205 } 206 207 // All variants implemented. 208 UPSAMPLE_FUNC(UpsampleRgbaLinePair, YuvToRgba, 4) 209 UPSAMPLE_FUNC(UpsampleBgraLinePair, YuvToBgra, 4) 210 #if !defined(WEBP_REDUCE_CSP) 211 UPSAMPLE_FUNC(UpsampleRgbLinePair, YuvToRgb, 3) 212 UPSAMPLE_FUNC(UpsampleBgrLinePair, YuvToBgr, 3) 213 UPSAMPLE_FUNC(UpsampleArgbLinePair, YuvToArgb, 4) 214 UPSAMPLE_FUNC(UpsampleRgba4444LinePair, YuvToRgba4444, 2) 215 UPSAMPLE_FUNC(UpsampleRgb565LinePair, YuvToRgb565, 2) 216 #endif // WEBP_REDUCE_CSP 217 218 #undef LOAD_UV 219 #undef UPSAMPLE_FUNC 220 221 //------------------------------------------------------------------------------ 222 // Entry point 223 224 extern void WebPInitUpsamplersMIPSdspR2(void); 225 226 WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) { 227 WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair; 228 WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair; 229 WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair; 230 WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair; 231 #if !defined(WEBP_REDUCE_CSP) 232 WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePair; 233 WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePair; 234 WebPUpsamplers[MODE_ARGB] = UpsampleArgbLinePair; 235 WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair; 236 WebPUpsamplers[MODE_RGB_565] = UpsampleRgb565LinePair; 237 WebPUpsamplers[MODE_Argb] = UpsampleArgbLinePair; 238 WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair; 239 #endif // WEBP_REDUCE_CSP 240 } 241 242 #endif // FANCY_UPSAMPLING 243 244 //------------------------------------------------------------------------------ 245 // YUV444 converter 246 247 #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP) \ 248 static void FUNC_NAME(const uint8_t* WEBP_RESTRICT y, \ 249 const uint8_t* WEBP_RESTRICT u, \ 250 const uint8_t* WEBP_RESTRICT v, \ 251 uint8_t* WEBP_RESTRICT dst, int len) { \ 252 int i; \ 253 for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]); \ 254 } 255 256 YUV444_FUNC(Yuv444ToRgba, YuvToRgba, 4) 257 YUV444_FUNC(Yuv444ToBgra, YuvToBgra, 4) 258 #if !defined(WEBP_REDUCE_CSP) 259 YUV444_FUNC(Yuv444ToRgb, YuvToRgb, 3) 260 YUV444_FUNC(Yuv444ToBgr, YuvToBgr, 3) 261 YUV444_FUNC(Yuv444ToArgb, YuvToArgb, 4) 262 YUV444_FUNC(Yuv444ToRgba4444, YuvToRgba4444, 2) 263 YUV444_FUNC(Yuv444ToRgb565, YuvToRgb565, 2) 264 #endif // WEBP_REDUCE_CSP 265 266 #undef YUV444_FUNC 267 268 //------------------------------------------------------------------------------ 269 // Entry point 270 271 extern void WebPInitYUV444ConvertersMIPSdspR2(void); 272 273 WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersMIPSdspR2(void) { 274 WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba; 275 WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra; 276 WebPYUV444Converters[MODE_rgbA] = Yuv444ToRgba; 277 WebPYUV444Converters[MODE_bgrA] = Yuv444ToBgra; 278 #if !defined(WEBP_REDUCE_CSP) 279 WebPYUV444Converters[MODE_RGB] = Yuv444ToRgb; 280 WebPYUV444Converters[MODE_BGR] = Yuv444ToBgr; 281 WebPYUV444Converters[MODE_ARGB] = Yuv444ToArgb; 282 WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444; 283 WebPYUV444Converters[MODE_RGB_565] = Yuv444ToRgb565; 284 WebPYUV444Converters[MODE_Argb] = Yuv444ToArgb; 285 WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444; 286 #endif // WEBP_REDUCE_CSP 287 } 288 289 #else // !WEBP_USE_MIPS_DSP_R2 290 291 WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersMIPSdspR2) 292 293 #endif // WEBP_USE_MIPS_DSP_R2 294 295 #if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_MIPS_DSP_R2)) 296 WEBP_DSP_INIT_STUB(WebPInitUpsamplersMIPSdspR2) 297 #endif