mathops.h (22889B)
1 /* Copyright (c) 2002-2008 Jean-Marc Valin 2 Copyright (c) 2007-2008 CSIRO 3 Copyright (c) 2007-2009 Xiph.Org Foundation 4 Copyright (c) 2024 Arm Limited 5 Written by Jean-Marc Valin, and Yunho Huh */ 6 /** 7 @file mathops.h 8 @brief Various math functions 9 */ 10 /* 11 Redistribution and use in source and binary forms, with or without 12 modification, are permitted provided that the following conditions 13 are met: 14 15 - Redistributions of source code must retain the above copyright 16 notice, this list of conditions and the following disclaimer. 17 18 - Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 26 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #ifndef MATHOPS_H 36 #define MATHOPS_H 37 38 #include "arch.h" 39 #include "entcode.h" 40 #include "os_support.h" 41 42 43 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) 44 #include "arm/mathops_arm.h" 45 #endif 46 47 #define PI 3.1415926535897931 48 49 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ 50 #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) 51 52 unsigned isqrt32(opus_uint32 _val); 53 54 /* CELT doesn't need it for fixed-point, by analysis.c does. */ 55 #if !defined(FIXED_POINT) || defined(ANALYSIS_C) 56 #define cA 0.43157974f 57 #define cB 0.67848403f 58 #define cC 0.08595542f 59 #define cE ((float)PI/2) 60 static OPUS_INLINE float fast_atan2f(float y, float x) { 61 float x2, y2; 62 x2 = x*x; 63 y2 = y*y; 64 /* For very small values, we don't care about the answer, so 65 we can just return 0. */ 66 if (x2 + y2 < 1e-18f) 67 { 68 return 0; 69 } 70 if(x2<y2){ 71 float den = (y2 + cB*x2) * (y2 + cC*x2); 72 return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE); 73 }else{ 74 float den = (x2 + cB*y2) * (x2 + cC*y2); 75 return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); 76 } 77 } 78 #undef cA 79 #undef cB 80 #undef cC 81 #undef cE 82 #endif 83 84 85 #ifndef OVERRIDE_CELT_MAXABS16 86 static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) 87 { 88 int i; 89 opus_val16 maxval = 0; 90 opus_val16 minval = 0; 91 for (i=0;i<len;i++) 92 { 93 maxval = MAX16(maxval, x[i]); 94 minval = MIN16(minval, x[i]); 95 } 96 return MAX32(EXTEND32(maxval),-EXTEND32(minval)); 97 } 98 #endif 99 100 #if defined(ENABLE_RES24) && defined(FIXED_POINT) 101 static OPUS_INLINE opus_res celt_maxabs_res(const opus_res *x, int len) 102 { 103 int i; 104 opus_res maxval = 0; 105 opus_res minval = 0; 106 for (i=0;i<len;i++) 107 { 108 maxval = MAX32(maxval, x[i]); 109 minval = MIN32(minval, x[i]); 110 } 111 /* opus_res should never reach such amplitude, so we should be safe. */ 112 celt_sig_assert(minval != -2147483648); 113 return MAX32(maxval,-minval); 114 } 115 #else 116 #define celt_maxabs_res celt_maxabs16 117 #endif 118 119 120 #ifndef OVERRIDE_CELT_MAXABS32 121 #ifdef FIXED_POINT 122 static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len) 123 { 124 int i; 125 opus_val32 maxval = 0; 126 opus_val32 minval = 0; 127 for (i=0;i<len;i++) 128 { 129 maxval = MAX32(maxval, x[i]); 130 minval = MIN32(minval, x[i]); 131 } 132 return MAX32(maxval, -minval); 133 } 134 #else 135 #define celt_maxabs32(x,len) celt_maxabs16(x,len) 136 #endif 137 #endif 138 139 #ifndef FIXED_POINT 140 /* Calculates the arctangent of x using a Remez approximation of order 15, 141 * incorporating only odd-powered terms. */ 142 static OPUS_INLINE float celt_atan_norm(float x) 143 { 144 #define ATAN2_2_OVER_PI 0.636619772367581f 145 float x_sq = x * x; 146 147 /* Polynomial coefficients approximated in the [0, 1] range. 148 * Lolremez command: lolremez --degree 6 --range "0:1" 149 * "(atan(sqrt(x))-sqrt(x))/(x*sqrt(x))" "1/(sqrt(x)*x)" 150 * Please note that ATAN2_COEFF_A01 is fixed to 1.0f. */ 151 #define ATAN2_COEFF_A03 -3.3331659436225891113281250000e-01f 152 #define ATAN2_COEFF_A05 1.99627041816711425781250000000e-01f 153 #define ATAN2_COEFF_A07 -1.3976582884788513183593750000e-01f 154 #define ATAN2_COEFF_A09 9.79423448443412780761718750000e-02f 155 #define ATAN2_COEFF_A11 -5.7773590087890625000000000000e-02f 156 #define ATAN2_COEFF_A13 2.30401363223791122436523437500e-02f 157 #define ATAN2_COEFF_A15 -4.3554059229791164398193359375e-03f 158 return ATAN2_2_OVER_PI * (x + x * x_sq * (ATAN2_COEFF_A03 159 + x_sq * (ATAN2_COEFF_A05 160 + x_sq * (ATAN2_COEFF_A07 161 + x_sq * (ATAN2_COEFF_A09 162 + x_sq * (ATAN2_COEFF_A11 163 + x_sq * (ATAN2_COEFF_A13 164 + x_sq * (ATAN2_COEFF_A15)))))))); 165 } 166 167 /* Calculates the arctangent of y/x, returning an approximate value in radians. 168 * Please refer to the linked wiki page (https://en.wikipedia.org/wiki/Atan2) 169 * to learn how atan2 results are computed. */ 170 static OPUS_INLINE float celt_atan2p_norm(float y, float x) 171 { 172 celt_sig_assert(x>=0 && y>=0); 173 174 /* For very small values, we don't care about the answer. */ 175 if ((x*x + y*y) < 1e-18f) 176 { 177 return 0; 178 } 179 180 if (y < x) 181 { 182 return celt_atan_norm(y / x); 183 } else { 184 return 1.f - celt_atan_norm(x / y); 185 } 186 } 187 #endif 188 189 #if !defined(FIXED_POINT) || defined(ENABLE_QEXT) 190 /* Computes estimated cosine values for (PI/2 * x) using only terms with even 191 * exponents. */ 192 static OPUS_INLINE float celt_cos_norm2(float x) 193 { 194 float x_norm_sq; 195 int output_sign; 196 /* Restrict x to [-1, 3]. */ 197 x -= 4*floor(.25*(x+1)); 198 /* Negative sign for [1, 3]. */ 199 output_sign = 1 - 2*(x>1); 200 /* Restrict to [-1, 1]. */ 201 x -= 2*(x>1); 202 203 /* The cosine function, cos(x), has a Taylor series representation consisting 204 * exclusively of even-powered polynomial terms. */ 205 x_norm_sq = x * x; 206 207 /* Polynomial coefficients approximated in the [0, 1] range using only terms 208 * with even exponents. 209 * Lolremez command: lolremez --degree 4 --range 0:1 "cos(sqrt(x)*pi*0.5)" */ 210 #define COS_COEFF_A0 9.999999403953552246093750000000e-01f 211 #define COS_COEFF_A2 -1.233698248863220214843750000000000f 212 #define COS_COEFF_A4 2.536507546901702880859375000000e-01f 213 #define COS_COEFF_A6 -2.08106283098459243774414062500e-02f 214 #define COS_COEFF_A8 8.581906440667808055877685546875e-04f 215 return output_sign * (COS_COEFF_A0 + x_norm_sq * (COS_COEFF_A2 + 216 x_norm_sq * (COS_COEFF_A4 + 217 x_norm_sq * (COS_COEFF_A6 + 218 x_norm_sq * (COS_COEFF_A8))))); 219 } 220 221 #endif 222 223 #ifndef FIXED_POINT 224 225 #define celt_sqrt(x) ((float)sqrt(x)) 226 #define celt_sqrt32(x) ((float)sqrt(x)) 227 #define celt_rsqrt(x) (1.f/celt_sqrt(x)) 228 #define celt_rsqrt_norm(x) (celt_rsqrt(x)) 229 #define celt_rsqrt_norm32(x) (celt_rsqrt(x)) 230 #define celt_cos_norm(x) ((float)cos((.5f*PI)*(x))) 231 #define celt_rcp(x) (1.f/(x)) 232 #define celt_div(a,b) ((a)/(b)) 233 #define frac_div32(a,b) ((float)(a)/(b)) 234 #define frac_div32_q29(a,b) frac_div32(a,b) 235 236 #ifdef FLOAT_APPROX 237 /* Calculates the base-2 logarithm (log2(x)) of a number. It is designed for 238 * systems using radix-2 floating-point representation, with the exponent 239 * located at bits 23 to 30 and an offset of 127. Note that special cases like 240 * denormalized numbers, positive/negative infinity, and NaN are not handled. 241 * log2(x) = log2(x^exponent * mantissa) 242 * = exponent + log2(mantissa) */ 243 244 /* Log2 x normalization single precision coefficients calculated by 245 * 1 / (1 + 0.125 * index). 246 * Coefficients in Double Precision 247 * double log2_x_norm_coeff[8] = { 248 * 1.0000000000000000000, 8.888888888888888e-01, 249 * 8.000000000000000e-01, 7.272727272727273e-01, 250 * 6.666666666666666e-01, 6.153846153846154e-01, 251 * 5.714285714285714e-01, 5.333333333333333e-01} */ 252 static const float log2_x_norm_coeff[8] = { 253 1.000000000000000000000000000f, 8.88888895511627197265625e-01f, 254 8.00000000000000000000000e-01f, 7.27272748947143554687500e-01f, 255 6.66666686534881591796875e-01f, 6.15384638309478759765625e-01f, 256 5.71428596973419189453125e-01f, 5.33333361148834228515625e-01f}; 257 258 /* Log2 y normalization single precision coefficients calculated by 259 * log2(1 + 0.125 * index). 260 * Coefficients in Double Precision 261 * double log2_y_norm_coeff[8] = { 262 * 0.0000000000000000000, 1.699250014423124e-01, 263 * 3.219280948873623e-01, 4.594316186372973e-01, 264 * 5.849625007211562e-01, 7.004397181410922e-01, 265 * 8.073549220576041e-01, 9.068905956085185e-01}; */ 266 static const float log2_y_norm_coeff[8] = { 267 0.0000000000000000000000000000f, 1.699250042438507080078125e-01f, 268 3.219280838966369628906250e-01f, 4.594316184520721435546875e-01f, 269 5.849624872207641601562500e-01f, 7.004396915435791015625000e-01f, 270 8.073549270629882812500000e-01f, 9.068905711174011230468750e-01f}; 271 272 static OPUS_INLINE float celt_log2(float x) 273 { 274 opus_int32 integer; 275 opus_int32 range_idx; 276 union { 277 float f; 278 opus_uint32 i; 279 } in; 280 in.f = x; 281 integer = (opus_int32)(in.i>>23)-127; 282 in.i = (opus_int32)in.i - (opus_int32)((opus_uint32)integer<<23); 283 284 /* Normalize the mantissa range from [1, 2] to [1,1.125], and then shift x 285 * by 1.0625 to [-0.0625, 0.0625]. */ 286 range_idx = (in.i >> 20) & 0x7; 287 in.f = in.f * log2_x_norm_coeff[range_idx] - 1.0625f; 288 289 /* Polynomial coefficients approximated in the [1, 1.125] range. 290 * Lolremez command: lolremez --degree 4 --range -0.0625:0.0625 291 * "log(x+1.0625)/log(2)" 292 * Coefficients in Double Precision 293 * A0: 8.7462840624502679e-2 A1: 1.3578296070972002 294 * A2: -6.3897703690210047e-1 A3: 4.0197125617419959e-1 295 * A4: -2.8415445877832832e-1 */ 296 #define LOG2_COEFF_A0 8.74628424644470214843750000e-02f 297 #define LOG2_COEFF_A1 1.357829570770263671875000000000f 298 #define LOG2_COEFF_A2 -6.3897705078125000000000000e-01f 299 #define LOG2_COEFF_A3 4.01971250772476196289062500e-01f 300 #define LOG2_COEFF_A4 -2.8415444493293762207031250e-01f 301 in.f = LOG2_COEFF_A0 + in.f * (LOG2_COEFF_A1 302 + in.f * (LOG2_COEFF_A2 303 + in.f * (LOG2_COEFF_A3 304 + in.f * (LOG2_COEFF_A4)))); 305 return integer + in.f + log2_y_norm_coeff[range_idx]; 306 } 307 308 /* Calculates an approximation of 2^x. The approximation was achieved by 309 * employing a base-2 exponential function and utilizing a Remez approximation 310 * of order 5, ensuring a controlled relative error. 311 * exp2(x) = exp2(integer + fraction) 312 * = exp2(integer) * exp2(fraction) */ 313 static OPUS_INLINE float celt_exp2(float x) 314 { 315 opus_int32 integer; 316 float frac; 317 union { 318 float f; 319 opus_uint32 i; 320 } res; 321 integer = (int)floor(x); 322 if (integer < -50) 323 return 0; 324 frac = x-integer; 325 326 /* Polynomial coefficients approximated in the [0, 1] range. 327 * Lolremez command: lolremez --degree 5 --range 0:1 328 * "exp(x*0.693147180559945)" "exp(x*0.693147180559945)" 329 * NOTE: log(2) ~ 0.693147180559945 */ 330 #define EXP2_COEFF_A0 9.999999403953552246093750000000e-01f 331 #define EXP2_COEFF_A1 6.931530833244323730468750000000e-01f 332 #define EXP2_COEFF_A2 2.401536107063293457031250000000e-01f 333 #define EXP2_COEFF_A3 5.582631751894950866699218750000e-02f 334 #define EXP2_COEFF_A4 8.989339694380760192871093750000e-03f 335 #define EXP2_COEFF_A5 1.877576694823801517486572265625e-03f 336 res.f = EXP2_COEFF_A0 + frac * (EXP2_COEFF_A1 337 + frac * (EXP2_COEFF_A2 338 + frac * (EXP2_COEFF_A3 339 + frac * (EXP2_COEFF_A4 340 + frac * (EXP2_COEFF_A5))))); 341 res.i = (opus_uint32)((opus_int32)res.i + (opus_int32)((opus_uint32)integer<<23)) & 0x7fffffff; 342 return res.f; 343 } 344 345 #else 346 #define celt_log2(x) ((float)(1.442695040888963387*log(x))) 347 #define celt_exp2(x) ((float)exp(0.6931471805599453094*(x))) 348 #endif 349 350 #define celt_exp2_db celt_exp2 351 #define celt_log2_db celt_log2 352 353 #define celt_sin(x) celt_cos_norm2((0.5f*PI) * (x) - 1.0f) 354 #define celt_log(x) (celt_log2(x) * 0.6931471805599453f) 355 #define celt_exp(x) (celt_exp2((x) * 1.4426950408889634f)) 356 357 #endif 358 359 #ifdef FIXED_POINT 360 361 #include "os_support.h" 362 363 #ifndef OVERRIDE_CELT_ILOG2 364 /** Integer log in base2. Undefined for zero and negative numbers */ 365 static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) 366 { 367 celt_sig_assert(x>0); 368 return EC_ILOG(x)-1; 369 } 370 #endif 371 372 373 /** Integer log in base2. Defined for zero, but not for negative numbers */ 374 static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x) 375 { 376 return x <= 0 ? 0 : celt_ilog2(x); 377 } 378 379 opus_val16 celt_rsqrt_norm(opus_val32 x); 380 381 opus_val32 celt_rsqrt_norm32(opus_val32 x); 382 383 opus_val32 celt_sqrt(opus_val32 x); 384 385 opus_val32 celt_sqrt32(opus_val32 x); 386 387 opus_val16 celt_cos_norm(opus_val32 x); 388 389 opus_val32 celt_cos_norm32(opus_val32 x); 390 391 /** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */ 392 static OPUS_INLINE opus_val16 celt_log2(opus_val32 x) 393 { 394 int i; 395 opus_val16 n, frac; 396 /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605, 397 0.15530808010959576, -0.08556153059057618 */ 398 static const opus_val16 C[5] = {-6801+(1<<(13-10)), 15746, -5217, 2545, -1401}; 399 if (x==0) 400 return -32767; 401 i = celt_ilog2(x); 402 n = VSHR32(x,i-15)-32768-16384; 403 frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4])))))))); 404 return SHL32(i-13,10)+SHR32(frac,14-10); 405 } 406 407 /* 408 K0 = 1 409 K1 = log(2) 410 K2 = 3-4*log(2) 411 K3 = 3*log(2) - 2 412 */ 413 #define D0 16383 414 #define D1 22804 415 #define D2 14819 416 #define D3 10204 417 418 static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x) 419 { 420 opus_val16 frac; 421 frac = SHL16(x, 4); 422 return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac)))))); 423 } 424 425 #undef D0 426 #undef D1 427 #undef D2 428 #undef D3 429 430 /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */ 431 static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x) 432 { 433 int integer; 434 opus_val16 frac; 435 integer = SHR16(x,10); 436 if (integer>14) 437 return 0x7f000000; 438 else if (integer < -15) 439 return 0; 440 frac = celt_exp2_frac(x-SHL16(integer,10)); 441 return VSHR32(EXTEND32(frac), -integer-2); 442 } 443 444 #ifdef ENABLE_QEXT 445 446 /* Calculates the base-2 logarithm of a Q14 input value. The result is returned 447 * in Q(DB_SHIFT). If the input value is 0, the function will output -32.0f. */ 448 static OPUS_INLINE opus_val32 celt_log2_db(opus_val32 x) { 449 /* Q30 */ 450 static const opus_val32 log2_x_norm_coeff[8] = { 451 1073741824, 954437184, 858993472, 780903168, 452 715827904, 660764224, 613566784, 572662336}; 453 /* Q24 */ 454 static const opus_val32 log2_y_norm_coeff[8] = { 455 0, 2850868, 5401057, 7707983, 456 9814042, 11751428, 13545168, 15215099}; 457 static const opus_val32 LOG2_COEFF_A0 = 1467383; /* Q24 */ 458 static const opus_val32 LOG2_COEFF_A1 = 182244800; /* Q27 */ 459 static const opus_val32 LOG2_COEFF_A2 = -21440512; /* Q25 */ 460 static const opus_val32 LOG2_COEFF_A3 = 107903336; /* Q28 */ 461 static const opus_val32 LOG2_COEFF_A4 = -610217024; /* Q31 */ 462 463 opus_int32 integer, norm_coeff_idx, tmp; 464 opus_val32 mantissa; 465 if (x==0) { 466 return -536870912; /* -32.0f */ 467 } 468 integer = SUB32(celt_ilog2(x), 14); /* Q0 */ 469 mantissa = VSHR32(x, integer + 14 - 29); /* Q29 */ 470 norm_coeff_idx = SHR32(mantissa, 29 - 3) & 0x7; 471 /* mantissa is in Q28 (29 + Q_NORM_CONST - 31 where Q_NORM_CONST is Q30) 472 * 285212672 (Q28) is 1.0625f. */ 473 mantissa = SUB32(MULT32_32_Q31(mantissa, log2_x_norm_coeff[norm_coeff_idx]), 474 285212672); 475 476 /* q_a3(Q28): q_mantissa + q_a4 - 31 477 * q_a2(Q25): q_mantissa + q_a3 - 31 478 * q_a1(Q27): q_mantissa + q_a2 - 31 + 5 479 * q_a0(Q24): q_mantissa + q_a1 - 31 480 * where q_mantissa is Q28 */ 481 /* Split evaluation in steps to avoid exploding macro expansion. */ 482 tmp = MULT32_32_Q31(mantissa, LOG2_COEFF_A4); 483 tmp = MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A3, tmp)); 484 tmp = SHL32(MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A2, tmp)), 5 /* SHL32 for LOG2_COEFF_A1 */); 485 tmp = MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A1, tmp)); 486 return ADD32(log2_y_norm_coeff[norm_coeff_idx], 487 ADD32(SHL32(integer, DB_SHIFT), 488 ADD32(LOG2_COEFF_A0, tmp))); 489 } 490 491 /* Calculates exp2 for Q28 within a specific range (0 to 1.0) using fixed-point 492 * arithmetic. The input number must be adjusted for Q DB_SHIFT. */ 493 static OPUS_INLINE opus_val32 celt_exp2_db_frac(opus_val32 x) 494 { 495 /* Approximation constants. */ 496 static const opus_int32 EXP2_COEFF_A0 = 268435440; /* Q28 */ 497 static const opus_int32 EXP2_COEFF_A1 = 744267456; /* Q30 */ 498 static const opus_int32 EXP2_COEFF_A2 = 1031451904; /* Q32 */ 499 static const opus_int32 EXP2_COEFF_A3 = 959088832; /* Q34 */ 500 static const opus_int32 EXP2_COEFF_A4 = 617742720; /* Q36 */ 501 static const opus_int32 EXP2_COEFF_A5 = 516104352; /* Q38 */ 502 opus_int32 tmp; 503 /* Converts input value from Q24 to Q29. */ 504 opus_val32 x_q29 = SHL32(x, 29 - 24); 505 /* Split evaluation in steps to avoid exploding macro expansion. */ 506 tmp = ADD32(EXP2_COEFF_A4, MULT32_32_Q31(x_q29, EXP2_COEFF_A5)); 507 tmp = ADD32(EXP2_COEFF_A3, MULT32_32_Q31(x_q29, tmp)); 508 tmp = ADD32(EXP2_COEFF_A2, MULT32_32_Q31(x_q29, tmp)); 509 tmp = ADD32(EXP2_COEFF_A1, MULT32_32_Q31(x_q29, tmp)); 510 return ADD32(EXP2_COEFF_A0, MULT32_32_Q31(x_q29, tmp)); 511 } 512 513 /* Calculates exp2 for Q16 using fixed-point arithmetic. The input number must 514 * be adjusted for Q DB_SHIFT. */ 515 static OPUS_INLINE opus_val32 celt_exp2_db(opus_val32 x) 516 { 517 int integer; 518 opus_val32 frac; 519 integer = SHR32(x,DB_SHIFT); 520 if (integer>14) 521 return 0x7f000000; 522 else if (integer <= -17) 523 return 0; 524 frac = celt_exp2_db_frac(x-SHL32(integer, DB_SHIFT)); /* Q28 */ 525 return VSHR32(frac, -integer + 28 - 16); /* Q16 */ 526 } 527 #else 528 529 #define celt_log2_db(x) SHL32(EXTEND32(celt_log2(x)), DB_SHIFT-10) 530 #define celt_exp2_db_frac(x) SHL32(celt_exp2_frac(PSHR32(x, DB_SHIFT-10)), 14) 531 #define celt_exp2_db(x) celt_exp2(PSHR32(x, DB_SHIFT-10)) 532 533 #endif 534 535 536 opus_val32 celt_rcp(opus_val32 x); 537 opus_val32 celt_rcp_norm32(opus_val32 x); 538 539 #define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b)) 540 541 opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b); 542 opus_val32 frac_div32(opus_val32 a, opus_val32 b); 543 544 /* Computes atan(x) multiplied by 2/PI. The input value (x) should be within the 545 * range of -1 to 1 and represented in Q30 format. The function will return the 546 * result in Q30 format. */ 547 static OPUS_INLINE opus_val32 celt_atan_norm(opus_val32 x) 548 { 549 /* Approximation constants. */ 550 static const opus_int32 ATAN_2_OVER_PI = 1367130551; /* Q31 */ 551 static const opus_int32 ATAN_COEFF_A03 = -715791936; /* Q31 */ 552 static const opus_int32 ATAN_COEFF_A05 = 857391616; /* Q32 */ 553 static const opus_int32 ATAN_COEFF_A07 = -1200579328; /* Q33 */ 554 static const opus_int32 ATAN_COEFF_A09 = 1682636672; /* Q34 */ 555 static const opus_int32 ATAN_COEFF_A11 = -1985085440; /* Q35 */ 556 static const opus_int32 ATAN_COEFF_A13 = 1583306112; /* Q36 */ 557 static const opus_int32 ATAN_COEFF_A15 = -598602432; /* Q37 */ 558 opus_int32 x_sq_q30; 559 opus_int32 x_q31; 560 opus_int32 tmp; 561 /* The expected x is in the range of [-1.0f, 1.0f] */ 562 celt_sig_assert((x <= 1073741824) && (x >= -1073741824)); 563 564 /* If x = 1.0f, returns 0.5f */ 565 if (x == 1073741824) 566 { 567 return 536870912; /* 0.5f (Q30) */ 568 } 569 /* If x = 1.0f, returns 0.5f */ 570 if (x == -1073741824) 571 { 572 return -536870912; /* -0.5f (Q30) */ 573 } 574 x_q31 = SHL32(x, 1); 575 x_sq_q30 = MULT32_32_Q31(x_q31, x); 576 /* Split evaluation in steps to avoid exploding macro expansion. */ 577 tmp = MULT32_32_Q31(x_sq_q30, ATAN_COEFF_A15); 578 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A13, tmp)); 579 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A11, tmp)); 580 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A09, tmp)); 581 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A07, tmp)); 582 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A05, tmp)); 583 tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A03, tmp)); 584 tmp = ADD32(x, MULT32_32_Q31(x_q31, tmp)); 585 return MULT32_32_Q31(ATAN_2_OVER_PI, tmp); 586 } 587 588 /* Calculates the arctangent of y/x, multiplies the result by 2/pi, and returns 589 * the value in Q30 format. Both input values (x and y) must be within the range 590 * of 0 to 1 and represented in Q30 format. Inputs must be zero or greater, and 591 * at least one input must be non-zero. */ 592 static OPUS_INLINE opus_val32 celt_atan2p_norm(opus_val32 y, opus_val32 x) 593 { 594 celt_sig_assert(x>=0 && y>=0); 595 if (y==0 && x==0) { 596 return 0; 597 } else if (y < x) { 598 return celt_atan_norm(SHR32(frac_div32(y, x), 1)); 599 } else { 600 celt_sig_assert(y > 0); 601 return 1073741824 /* 1.0f Q30 */ - 602 celt_atan_norm(SHR32(frac_div32(x, y), 1)); 603 } 604 } 605 606 #define M1 32767 607 #define M2 -21 608 #define M3 -11943 609 #define M4 4936 610 611 /* Atan approximation using a 4th order polynomial. Input is in Q15 format 612 and normalized by pi/4. Output is in Q15 format */ 613 static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x) 614 { 615 return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); 616 } 617 618 #undef M1 619 #undef M2 620 #undef M3 621 #undef M4 622 623 /* atan2() approximation valid for positive input values */ 624 static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) 625 { 626 if (x==0 && y==0) { 627 return 0; 628 } else if (y < x) 629 { 630 opus_val32 arg; 631 arg = celt_div(SHL32(EXTEND32(y),15),x); 632 if (arg >= 32767) 633 arg = 32767; 634 return SHR16(celt_atan01(EXTRACT16(arg)),1); 635 } else { 636 opus_val32 arg; 637 arg = celt_div(SHL32(EXTEND32(x),15),y); 638 if (arg >= 32767) 639 arg = 32767; 640 return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1); 641 } 642 } 643 644 #endif /* FIXED_POINT */ 645 646 #ifndef DISABLE_FLOAT_API 647 648 void celt_float2int16_c(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt); 649 650 #ifndef OVERRIDE_FLOAT2INT16 651 #define celt_float2int16(in, out, cnt, arch) ((void)(arch), celt_float2int16_c(in, out, cnt)) 652 #endif 653 654 int opus_limit2_checkwithin1_c(float *samples, int cnt); 655 656 #ifndef OVERRIDE_LIMIT2_CHECKWITHIN1 657 #define opus_limit2_checkwithin1(samples, cnt, arch) ((void)(arch), opus_limit2_checkwithin1_c(samples, cnt)) 658 #endif 659 660 #endif /* DISABLE_FLOAT_API */ 661 662 #endif /* MATHOPS_H */