min_max_operations_mips.c (17045B)
1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * This file contains the implementation of function 13 * WebRtcSpl_MaxAbsValueW16() 14 * 15 * The description header can be found in signal_processing_library.h. 16 * 17 */ 18 19 #include "common_audio/signal_processing/include/signal_processing_library.h" 20 #include "rtc_base/checks.h" 21 22 // Maximum absolute value of word16 vector. 23 int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) { 24 int32_t totMax = 0; 25 int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3; 26 size_t i, loop_size; 27 28 RTC_DCHECK_GT(length, 0); 29 30 #if defined(MIPS_DSP_R1) 31 const int32_t* tmpvec32 = (int32_t*)vector; 32 loop_size = length >> 4; 33 34 for (i = 0; i < loop_size; i++) { 35 __asm__ volatile( 36 "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" 37 "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" 38 "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" 39 "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" 40 41 "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" 42 "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" 43 "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" 44 "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" 45 46 "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" 47 "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" 48 "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" 49 "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" 50 51 "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" 52 "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" 53 "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" 54 "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" 55 56 "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" 57 "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" 58 "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" 59 60 "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" 61 "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" 62 "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" 63 "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" 64 "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" 65 66 "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" 67 "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" 68 "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" 69 "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" 70 "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" 71 "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" 72 73 "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" 74 "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" 75 76 "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" 77 : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), 78 [tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3), 79 [totMax] "+r"(totMax), [tmpvec32] "+r"(tmpvec32) 80 : 81 : "memory"); 82 } 83 __asm__ volatile( 84 "rotr %[tmp32_0], %[totMax], 16 \n\t" 85 "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" 86 "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" 87 "packrl.ph %[totMax], $0, %[totMax] \n\t" 88 : [tmp32_0] "=&r"(tmp32_0), [totMax] "+r"(totMax) 89 :); 90 loop_size = length & 0xf; 91 for (i = 0; i < loop_size; i++) { 92 __asm__ volatile( 93 "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" 94 "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" 95 "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" 96 "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" 97 "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" 98 : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), 99 [tmpvec32] "+r"(tmpvec32), [totMax] "+r"(totMax) 100 : 101 : "memory"); 102 } 103 #else // #if defined(MIPS_DSP_R1) 104 int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX; 105 int32_t r, r1, r2, r3; 106 const int16_t* tmpvector = vector; 107 loop_size = length >> 4; 108 for (i = 0; i < loop_size; i++) { 109 __asm__ volatile( 110 "lh %[tmp32_0], 0(%[tmpvector]) \n\t" 111 "lh %[tmp32_1], 2(%[tmpvector]) \n\t" 112 "lh %[tmp32_2], 4(%[tmpvector]) \n\t" 113 "lh %[tmp32_3], 6(%[tmpvector]) \n\t" 114 115 "abs %[tmp32_0], %[tmp32_0] \n\t" 116 "abs %[tmp32_1], %[tmp32_1] \n\t" 117 "abs %[tmp32_2], %[tmp32_2] \n\t" 118 "abs %[tmp32_3], %[tmp32_3] \n\t" 119 120 "slt %[r], %[totMax], %[tmp32_0] \n\t" 121 "movn %[totMax], %[tmp32_0], %[r] \n\t" 122 "slt %[r1], %[totMax], %[tmp32_1] \n\t" 123 "movn %[totMax], %[tmp32_1], %[r1] \n\t" 124 "slt %[r2], %[totMax], %[tmp32_2] \n\t" 125 "movn %[totMax], %[tmp32_2], %[r2] \n\t" 126 "slt %[r3], %[totMax], %[tmp32_3] \n\t" 127 "movn %[totMax], %[tmp32_3], %[r3] \n\t" 128 129 "lh %[tmp32_0], 8(%[tmpvector]) \n\t" 130 "lh %[tmp32_1], 10(%[tmpvector]) \n\t" 131 "lh %[tmp32_2], 12(%[tmpvector]) \n\t" 132 "lh %[tmp32_3], 14(%[tmpvector]) \n\t" 133 134 "abs %[tmp32_0], %[tmp32_0] \n\t" 135 "abs %[tmp32_1], %[tmp32_1] \n\t" 136 "abs %[tmp32_2], %[tmp32_2] \n\t" 137 "abs %[tmp32_3], %[tmp32_3] \n\t" 138 139 "slt %[r], %[totMax], %[tmp32_0] \n\t" 140 "movn %[totMax], %[tmp32_0], %[r] \n\t" 141 "slt %[r1], %[totMax], %[tmp32_1] \n\t" 142 "movn %[totMax], %[tmp32_1], %[r1] \n\t" 143 "slt %[r2], %[totMax], %[tmp32_2] \n\t" 144 "movn %[totMax], %[tmp32_2], %[r2] \n\t" 145 "slt %[r3], %[totMax], %[tmp32_3] \n\t" 146 "movn %[totMax], %[tmp32_3], %[r3] \n\t" 147 148 "lh %[tmp32_0], 16(%[tmpvector]) \n\t" 149 "lh %[tmp32_1], 18(%[tmpvector]) \n\t" 150 "lh %[tmp32_2], 20(%[tmpvector]) \n\t" 151 "lh %[tmp32_3], 22(%[tmpvector]) \n\t" 152 153 "abs %[tmp32_0], %[tmp32_0] \n\t" 154 "abs %[tmp32_1], %[tmp32_1] \n\t" 155 "abs %[tmp32_2], %[tmp32_2] \n\t" 156 "abs %[tmp32_3], %[tmp32_3] \n\t" 157 158 "slt %[r], %[totMax], %[tmp32_0] \n\t" 159 "movn %[totMax], %[tmp32_0], %[r] \n\t" 160 "slt %[r1], %[totMax], %[tmp32_1] \n\t" 161 "movn %[totMax], %[tmp32_1], %[r1] \n\t" 162 "slt %[r2], %[totMax], %[tmp32_2] \n\t" 163 "movn %[totMax], %[tmp32_2], %[r2] \n\t" 164 "slt %[r3], %[totMax], %[tmp32_3] \n\t" 165 "movn %[totMax], %[tmp32_3], %[r3] \n\t" 166 167 "lh %[tmp32_0], 24(%[tmpvector]) \n\t" 168 "lh %[tmp32_1], 26(%[tmpvector]) \n\t" 169 "lh %[tmp32_2], 28(%[tmpvector]) \n\t" 170 "lh %[tmp32_3], 30(%[tmpvector]) \n\t" 171 172 "abs %[tmp32_0], %[tmp32_0] \n\t" 173 "abs %[tmp32_1], %[tmp32_1] \n\t" 174 "abs %[tmp32_2], %[tmp32_2] \n\t" 175 "abs %[tmp32_3], %[tmp32_3] \n\t" 176 177 "slt %[r], %[totMax], %[tmp32_0] \n\t" 178 "movn %[totMax], %[tmp32_0], %[r] \n\t" 179 "slt %[r1], %[totMax], %[tmp32_1] \n\t" 180 "movn %[totMax], %[tmp32_1], %[r1] \n\t" 181 "slt %[r2], %[totMax], %[tmp32_2] \n\t" 182 "movn %[totMax], %[tmp32_2], %[r2] \n\t" 183 "slt %[r3], %[totMax], %[tmp32_3] \n\t" 184 "movn %[totMax], %[tmp32_3], %[r3] \n\t" 185 186 "addiu %[tmpvector], %[tmpvector], 32 \n\t" 187 : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), 188 [tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3), 189 [totMax] "+r"(totMax), [r] "=&r"(r), [tmpvector] "+r"(tmpvector), 190 [r1] "=&r"(r1), [r2] "=&r"(r2), [r3] "=&r"(r3) 191 : 192 : "memory"); 193 } 194 loop_size = length & 0xf; 195 for (i = 0; i < loop_size; i++) { 196 __asm__ volatile( 197 "lh %[tmp32_0], 0(%[tmpvector]) \n\t" 198 "addiu %[tmpvector], %[tmpvector], 2 \n\t" 199 "abs %[tmp32_0], %[tmp32_0] \n\t" 200 "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" 201 "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" 202 : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), 203 [tmpvector] "+r"(tmpvector), [totMax] "+r"(totMax) 204 : 205 : "memory"); 206 } 207 208 __asm__ volatile( 209 "slt %[r], %[v16MaxMax], %[totMax] \n\t" 210 "movn %[totMax], %[v16MaxMax], %[r] \n\t" 211 : [totMax] "+r"(totMax), [r] "=&r"(r) 212 : [v16MaxMax] "r"(v16MaxMax)); 213 #endif // #if defined(MIPS_DSP_R1) 214 return (int16_t)totMax; 215 } 216 217 #if defined(MIPS_DSP_R1_LE) 218 // Maximum absolute value of word32 vector. Version for MIPS platform. 219 int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) { 220 // Use uint32_t for the local variables, to accommodate the return value 221 // of abs(0x80000000), which is 0x80000000. 222 223 uint32_t absolute = 0, maximum = 0; 224 int tmp1 = 0, max_value = 0x7fffffff; 225 226 RTC_DCHECK_GT(length, 0); 227 228 __asm__ volatile( 229 ".set push \n\t" 230 ".set noreorder \n\t" 231 232 "1: \n\t" 233 "lw %[absolute], 0(%[vector]) \n\t" 234 "absq_s.w %[absolute], %[absolute] \n\t" 235 "addiu %[length], %[length], -1 \n\t" 236 "slt %[tmp1], %[maximum], %[absolute] \n\t" 237 "movn %[maximum], %[absolute], %[tmp1] \n\t" 238 "bgtz %[length], 1b \n\t" 239 " addiu %[vector], %[vector], 4 \n\t" 240 "slt %[tmp1], %[max_value], %[maximum] \n\t" 241 "movn %[maximum], %[max_value], %[tmp1] \n\t" 242 243 ".set pop \n\t" 244 245 : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [absolute] "+r"(absolute) 246 : [vector] "r"(vector), [length] "r"(length), [max_value] "r"(max_value) 247 : "memory"); 248 249 return (int32_t)maximum; 250 } 251 #endif // #if defined(MIPS_DSP_R1_LE) 252 253 // Maximum value of word16 vector. Version for MIPS platform. 254 int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) { 255 int16_t maximum = WEBRTC_SPL_WORD16_MIN; 256 int tmp1; 257 int16_t value; 258 259 RTC_DCHECK_GT(length, 0); 260 261 __asm__ volatile( 262 ".set push \n\t" 263 ".set noreorder \n\t" 264 265 "1: \n\t" 266 "lh %[value], 0(%[vector]) \n\t" 267 "addiu %[length], %[length], -1 \n\t" 268 "slt %[tmp1], %[maximum], %[value] \n\t" 269 "movn %[maximum], %[value], %[tmp1] \n\t" 270 "bgtz %[length], 1b \n\t" 271 " addiu %[vector], %[vector], 2 \n\t" 272 ".set pop \n\t" 273 274 : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value) 275 : [vector] "r"(vector), [length] "r"(length) 276 : "memory"); 277 278 return maximum; 279 } 280 281 // Maximum value of word32 vector. Version for MIPS platform. 282 int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) { 283 int32_t maximum = WEBRTC_SPL_WORD32_MIN; 284 int tmp1, value; 285 286 RTC_DCHECK_GT(length, 0); 287 288 __asm__ volatile( 289 ".set push \n\t" 290 ".set noreorder \n\t" 291 292 "1: \n\t" 293 "lw %[value], 0(%[vector]) \n\t" 294 "addiu %[length], %[length], -1 \n\t" 295 "slt %[tmp1], %[maximum], %[value] \n\t" 296 "movn %[maximum], %[value], %[tmp1] \n\t" 297 "bgtz %[length], 1b \n\t" 298 " addiu %[vector], %[vector], 4 \n\t" 299 300 ".set pop \n\t" 301 302 : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value) 303 : [vector] "r"(vector), [length] "r"(length) 304 : "memory"); 305 306 return maximum; 307 } 308 309 // Minimum value of word16 vector. Version for MIPS platform. 310 int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) { 311 int16_t minimum = WEBRTC_SPL_WORD16_MAX; 312 int tmp1; 313 int16_t value; 314 315 RTC_DCHECK_GT(length, 0); 316 317 __asm__ volatile( 318 ".set push \n\t" 319 ".set noreorder \n\t" 320 321 "1: \n\t" 322 "lh %[value], 0(%[vector]) \n\t" 323 "addiu %[length], %[length], -1 \n\t" 324 "slt %[tmp1], %[value], %[minimum] \n\t" 325 "movn %[minimum], %[value], %[tmp1] \n\t" 326 "bgtz %[length], 1b \n\t" 327 " addiu %[vector], %[vector], 2 \n\t" 328 329 ".set pop \n\t" 330 331 : [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value) 332 : [vector] "r"(vector), [length] "r"(length) 333 : "memory"); 334 335 return minimum; 336 } 337 338 // Minimum value of word32 vector. Version for MIPS platform. 339 int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) { 340 int32_t minimum = WEBRTC_SPL_WORD32_MAX; 341 int tmp1, value; 342 343 RTC_DCHECK_GT(length, 0); 344 345 __asm__ volatile( 346 ".set push \n\t" 347 ".set noreorder \n\t" 348 349 "1: \n\t" 350 "lw %[value], 0(%[vector]) \n\t" 351 "addiu %[length], %[length], -1 \n\t" 352 "slt %[tmp1], %[value], %[minimum] \n\t" 353 "movn %[minimum], %[value], %[tmp1] \n\t" 354 "bgtz %[length], 1b \n\t" 355 " addiu %[vector], %[vector], 4 \n\t" 356 357 ".set pop \n\t" 358 359 : [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value) 360 : [vector] "r"(vector), [length] "r"(length) 361 : "memory"); 362 363 return minimum; 364 }