vq.c (25048B)
1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2009 Xiph.Org Foundation 3 Written by Jean-Marc Valin */ 4 /* 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions 7 are met: 8 9 - Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 - Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #include "mathops.h" 34 #include "cwrs.h" 35 #include "vq.h" 36 #include "arch.h" 37 #include "os_support.h" 38 #include "bands.h" 39 #include "rate.h" 40 #include "pitch.h" 41 #include "SigProc_FIX.h" 42 43 #if defined(FIXED_POINT) 44 void norm_scaleup(celt_norm *X, int N, int shift) { 45 int i; 46 celt_assert(shift >= 0); 47 if (shift <= 0) return; 48 for (i=0;i<N;i++) X[i] = SHL32(X[i], shift); 49 } 50 51 void norm_scaledown(celt_norm *X, int N, int shift) { 52 int i; 53 celt_assert(shift >= 0); 54 if (shift <= 0) return; 55 for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift); 56 } 57 58 opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) { 59 int i; 60 opus_val32 sum = 0; 61 (void)arch; 62 for (i=0;i<len;i++) sum += x[i]*y[i]; 63 return sum; 64 } 65 opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) { 66 int i; 67 opus_val64 sum = 0; 68 (void)arch; 69 for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i]; 70 return sum>>2*(NORM_SHIFT-14); 71 } 72 #endif 73 74 #ifndef OVERRIDE_vq_exp_rotation1 75 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 76 { 77 int i; 78 opus_val16 ms; 79 celt_norm *Xptr; 80 Xptr = X; 81 ms = NEG16(s); 82 norm_scaledown(X, len, NORM_SHIFT-14); 83 for (i=0;i<len-stride;i++) 84 { 85 celt_norm x1, x2; 86 x1 = Xptr[0]; 87 x2 = Xptr[stride]; 88 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 89 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 90 } 91 Xptr = &X[len-2*stride-1]; 92 for (i=len-2*stride-1;i>=0;i--) 93 { 94 celt_norm x1, x2; 95 x1 = Xptr[0]; 96 x2 = Xptr[stride]; 97 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 98 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 99 } 100 norm_scaleup(X, len, NORM_SHIFT-14); 101 } 102 #endif /* OVERRIDE_vq_exp_rotation1 */ 103 104 void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 105 { 106 static const int SPREAD_FACTOR[3]={15,10,5}; 107 int i; 108 opus_val16 c, s; 109 opus_val16 gain, theta; 110 int stride2=0; 111 int factor; 112 113 if (2*K>=len || spread==SPREAD_NONE) 114 return; 115 factor = SPREAD_FACTOR[spread-1]; 116 117 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); 118 theta = HALF16(MULT16_16_Q15(gain,gain)); 119 120 c = celt_cos_norm(EXTEND32(theta)); 121 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ 122 123 if (len>=8*stride) 124 { 125 stride2 = 1; 126 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. 127 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ 128 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) 129 stride2++; 130 } 131 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 132 extract_collapse_mask().*/ 133 len = celt_udiv(len, stride); 134 for (i=0;i<stride;i++) 135 { 136 if (dir < 0) 137 { 138 if (stride2) 139 exp_rotation1(X+i*len, len, stride2, s, c); 140 exp_rotation1(X+i*len, len, 1, c, s); 141 } else { 142 exp_rotation1(X+i*len, len, 1, c, -s); 143 if (stride2) 144 exp_rotation1(X+i*len, len, stride2, s, -c); 145 } 146 } 147 } 148 149 /** Normalizes the decoded integer pvq codeword to unit norm. */ 150 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, 151 int N, opus_val32 Ryy, opus_val32 gain, int shift) 152 { 153 int i; 154 #ifdef FIXED_POINT 155 int k; 156 #endif 157 opus_val32 t; 158 opus_val32 g; 159 160 #ifdef FIXED_POINT 161 k = celt_ilog2(Ryy)>>1; 162 #endif 163 t = VSHR32(Ryy, 2*(k-7)-15); 164 g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); 165 i=0; 166 (void)shift; 167 #if defined(FIXED_POINT) && defined(ENABLE_QEXT) 168 if (shift>0) { 169 int tot_shift = NORM_SHIFT+1-k-shift; 170 if (tot_shift >= 0) { 171 do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); 172 while (++i < N); 173 } else { 174 do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); 175 while (++i < N); 176 } 177 } else 178 #endif 179 do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); 180 while (++i < N); 181 } 182 183 static unsigned extract_collapse_mask(int *iy, int N, int B) 184 { 185 unsigned collapse_mask; 186 int N0; 187 int i; 188 if (B<=1) 189 return 1; 190 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 191 exp_rotation().*/ 192 N0 = celt_udiv(N, B); 193 collapse_mask = 0; 194 i=0; do { 195 int j; 196 unsigned tmp=0; 197 j=0; do { 198 tmp |= iy[i*N0+j]; 199 } while (++j<N0); 200 collapse_mask |= (tmp!=0)<<i; 201 } while (++i<B); 202 return collapse_mask; 203 } 204 205 opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) 206 { 207 VARDECL(celt_norm, y); 208 VARDECL(int, signx); 209 int i, j; 210 int pulsesLeft; 211 opus_val32 sum; 212 opus_val32 xy; 213 opus_val16 yy; 214 SAVE_STACK; 215 216 (void)arch; 217 ALLOC(y, N, celt_norm); 218 ALLOC(signx, N, int); 219 #ifdef FIXED_POINT 220 { 221 int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2; 222 shift = IMAX(0, shift+(NORM_SHIFT-14)-14); 223 norm_scaledown(X, N, shift); 224 } 225 #endif 226 /* Get rid of the sign */ 227 sum = 0; 228 j=0; do { 229 signx[j] = X[j]<0; 230 /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */ 231 X[j] = ABS16(X[j]); 232 iy[j] = 0; 233 y[j] = 0; 234 } while (++j<N); 235 236 xy = yy = 0; 237 238 pulsesLeft = K; 239 240 /* Do a pre-search by projecting on the pyramid */ 241 if (K > (N>>1)) 242 { 243 opus_val16 rcp; 244 j=0; do { 245 sum += X[j]; 246 } while (++j<N); 247 248 /* If X is too small, just replace it with a pulse at 0 */ 249 #ifdef FIXED_POINT 250 if (sum <= K) 251 #else 252 /* Prevents infinities and NaNs from causing too many pulses 253 to be allocated. 64 is an approximation of infinity here. */ 254 if (!(sum > EPSILON && sum < 64)) 255 #endif 256 { 257 X[0] = QCONST16(1.f,14); 258 j=1; do 259 X[j]=0; 260 while (++j<N); 261 sum = QCONST16(1.f,14); 262 } 263 #ifdef FIXED_POINT 264 rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum))); 265 #else 266 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ 267 rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum))); 268 #endif 269 j=0; do { 270 #ifdef FIXED_POINT 271 /* It's really important to round *towards zero* here */ 272 iy[j] = MULT16_16_Q15(X[j],rcp); 273 #else 274 iy[j] = (int)floor(rcp*X[j]); 275 #endif 276 y[j] = (celt_norm)iy[j]; 277 yy = MAC16_16(yy, y[j],y[j]); 278 xy = MAC16_16(xy, X[j],y[j]); 279 y[j] *= 2; 280 pulsesLeft -= iy[j]; 281 } while (++j<N); 282 } 283 celt_sig_assert(pulsesLeft>=0); 284 285 /* This should never happen, but just in case it does (e.g. on silence) 286 we fill the first bin with pulses. */ 287 #ifdef FIXED_POINT_DEBUG 288 celt_sig_assert(pulsesLeft<=N+3); 289 #endif 290 if (pulsesLeft > N+3) 291 { 292 opus_val16 tmp = (opus_val16)pulsesLeft; 293 yy = MAC16_16(yy, tmp, tmp); 294 yy = MAC16_16(yy, tmp, y[0]); 295 iy[0] += pulsesLeft; 296 pulsesLeft=0; 297 } 298 299 for (i=0;i<pulsesLeft;i++) 300 { 301 opus_val16 Rxy, Ryy; 302 int best_id; 303 opus_val32 best_num; 304 opus_val16 best_den; 305 #ifdef FIXED_POINT 306 int rshift; 307 #endif 308 #ifdef FIXED_POINT 309 rshift = 1+celt_ilog2(K-pulsesLeft+i+1); 310 #endif 311 best_id = 0; 312 /* The squared magnitude term gets added anyway, so we might as well 313 add it outside the loop */ 314 yy = ADD16(yy, 1); 315 316 /* Calculations for position 0 are out of the loop, in part to reduce 317 mispredicted branches (since the if condition is usually false) 318 in the loop. */ 319 /* Temporary sums of the new pulse(s) */ 320 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift)); 321 /* We're multiplying y[j] by two so we don't have to do it here */ 322 Ryy = ADD16(yy, y[0]); 323 324 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 325 Rxy is positive because the sign is pre-computed) */ 326 Rxy = MULT16_16_Q15(Rxy,Rxy); 327 best_den = Ryy; 328 best_num = Rxy; 329 j=1; 330 do { 331 /* Temporary sums of the new pulse(s) */ 332 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 333 /* We're multiplying y[j] by two so we don't have to do it here */ 334 Ryy = ADD16(yy, y[j]); 335 336 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 337 Rxy is positive because the sign is pre-computed) */ 338 Rxy = MULT16_16_Q15(Rxy,Rxy); 339 /* The idea is to check for num/den >= best_num/best_den, but that way 340 we can do it without any division */ 341 /* OPT: It's not clear whether a cmov is faster than a branch here 342 since the condition is more often false than true and using 343 a cmov introduces data dependencies across iterations. The optimal 344 choice may be architecture-dependent. */ 345 if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))) 346 { 347 best_den = Ryy; 348 best_num = Rxy; 349 best_id = j; 350 } 351 } while (++j<N); 352 353 /* Updating the sums of the new pulse(s) */ 354 xy = ADD32(xy, EXTEND32(X[best_id])); 355 /* We're multiplying y[j] by two so we don't have to do it here */ 356 yy = ADD16(yy, y[best_id]); 357 358 /* Only now that we've made the final choice, update y/iy */ 359 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 360 y[best_id] += 2; 361 iy[best_id]++; 362 } 363 364 /* Put the original sign back */ 365 j=0; 366 do { 367 /*iy[j] = signx[j] ? -iy[j] : iy[j];*/ 368 /* OPT: The is more likely to be compiled without a branch than the code above 369 but has the same performance otherwise. */ 370 iy[j] = (iy[j]^-signx[j]) + signx[j]; 371 } while (++j<N); 372 RESTORE_STACK; 373 return yy; 374 } 375 376 #ifdef ENABLE_QEXT 377 #include "macros.h" 378 379 static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) { 380 opus_val32 sum; 381 opus_val32 rcp_sum; 382 int offset; 383 sum = ABS32(X[0]) + ABS32(X[1]); 384 if (sum < EPSILON) { 385 iy[0] = K; 386 up_iy[0] = up*K; 387 iy[1]=up_iy[1]=0; 388 *refine=0; 389 #ifdef FIXED_POINT 390 return (opus_val64)K*K*up*up>>2*shift; 391 #else 392 (void)shift; 393 return K*(float)K*up*up; 394 #endif 395 } 396 #ifdef FIXED_POINT 397 int sum_shift; 398 opus_val32 X0; 399 sum_shift = 30-celt_ilog2(sum); 400 rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); 401 X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum); 402 iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7); 403 up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7); 404 #else 405 rcp_sum = 1.f/sum; 406 iy[0] = (int)floor(.5f+K*X[0]*rcp_sum); 407 up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum); 408 #endif 409 up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0])); 410 offset = up_iy[0] - up*iy[0]; 411 iy[1] = K-abs(iy[0]); 412 up_iy[1] = up*K-abs(up_iy[0]); 413 if (X[1] < 0) { 414 iy[1] = -iy[1]; 415 up_iy[1] = -up_iy[1]; 416 offset = -offset; 417 } 418 *refine = offset; 419 #ifdef FIXED_POINT 420 return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift; 421 #else 422 return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1]; 423 #endif 424 } 425 426 static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) { 427 int i; 428 int dir; 429 VARDECL(opus_val32, rounding); 430 int iysum = 0; 431 SAVE_STACK; 432 ALLOC(rounding, N, opus_val32); 433 for (i=0;i<N;i++) { 434 opus_val32 tmp; 435 tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]); 436 #ifdef FIXED_POINT 437 iy[i] = (tmp+64) >> 7; 438 #else 439 iy[i] = (int)floor(.5+tmp); 440 #endif 441 rounding[i] = tmp - SHL32(iy[i], 7); 442 } 443 if (iy != iy0) { 444 for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i])); 445 } 446 for (i=0;i<N;i++) iysum += iy[i]; 447 if (abs(iysum - K) > 32) { 448 RESTORE_STACK; 449 return 1; 450 } 451 dir = iysum < K ? 1 : -1; 452 while (iysum != K) { 453 opus_val32 roundval=-1000000*dir; 454 int roundpos=0; 455 for (i=0;i<N;i++) { 456 if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) { 457 roundval = rounding[i]; 458 roundpos = i; 459 } 460 } 461 iy[roundpos] += dir; 462 rounding[roundpos] -= SHL32(dir, 15); 463 iysum+=dir; 464 } 465 RESTORE_STACK; 466 return 0; 467 } 468 469 static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) { 470 opus_val32 rcp_sum; 471 opus_val32 sum=0; 472 int i; 473 int failed=0; 474 opus_val64 yy=0; 475 VARDECL(opus_val32, Xn); 476 SAVE_STACK; 477 for (i=0;i<N;i++) sum += ABS32(X[i]); 478 ALLOC(Xn, N, opus_val32); 479 if (sum < EPSILON) 480 failed = 1; 481 else { 482 #ifdef FIXED_POINT 483 int sum_shift = 30-celt_ilog2(sum); 484 rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); 485 for (i=0;i<N;i++) { 486 Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum); 487 } 488 #else 489 rcp_sum = celt_rcp(sum); 490 for (i=0;i<N;i++) { 491 Xn[i] = ABS32(X[i])*rcp_sum; 492 } 493 #endif 494 } 495 failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N); 496 failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N); 497 if (failed) { 498 iy[0] = K; 499 for (i=1;i<N;i++) iy[i] = 0; 500 up_iy[0] = up*K; 501 for (i=1;i<N;i++) up_iy[i] = 0; 502 } 503 for (i=0;i<N;i++) { 504 yy += up_iy[i]*(opus_val64)up_iy[i]; 505 if (X[i] < 0) { 506 iy[i] = -iy[i]; 507 up_iy[i] = -up_iy[i]; 508 } 509 refine[i] = up_iy[i]-up*iy[i]; 510 } 511 RESTORE_STACK; 512 #ifdef FIXED_POINT 513 return (yy + (1<<2*shift>>1))>>2*shift; 514 #else 515 (void)shift; 516 return yy; 517 #endif 518 } 519 #endif 520 521 #ifdef ENABLE_QEXT 522 /* Take advantage of the fact that "large" refine values are much less likely 523 than smaller ones. */ 524 static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) { 525 int large; 526 large = abs(refine)>up/2; 527 ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1); 528 if (large) { 529 ec_enc_bits(enc, refine < 0, 1); 530 ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1); 531 } else { 532 ec_enc_bits(enc, refine+up/2, extra_bits); 533 } 534 } 535 536 static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) { 537 int large, refine; 538 large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1); 539 if (large) { 540 int sign = ec_dec_bits(dec, 1); 541 refine = ec_dec_bits(dec, extra_bits-1) + up/2+1; 542 if (sign) refine = -refine; 543 } else { 544 refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2; 545 } 546 return refine; 547 } 548 #endif 549 550 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, 551 opus_val32 gain, int resynth 552 ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch) 553 { 554 VARDECL(int, iy); 555 opus_val32 yy; 556 unsigned collapse_mask; 557 #ifdef ENABLE_QEXT 558 int yy_shift = 0; 559 #endif 560 SAVE_STACK; 561 562 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 563 celt_assert2(N>1, "alg_quant() needs at least two dimensions"); 564 565 /* Covers vectorization by up to 4. */ 566 ALLOC(iy, N+3, int); 567 568 exp_rotation(X, N, 1, B, K, spread); 569 570 #ifdef ENABLE_QEXT 571 if (N==2 && extra_bits >= 2) { 572 int refine; 573 int up_iy[2]; 574 int up; 575 yy_shift = IMAX(0, extra_bits-7); 576 up = (1<<extra_bits)-1; 577 yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); 578 collapse_mask = extract_collapse_mask(up_iy, N, B); 579 encode_pulses(iy, N, K, enc); 580 ec_enc_uint(ext_enc, refine+(up-1)/2, up); 581 if (resynth) 582 normalise_residual(up_iy, X, N, yy, gain, yy_shift); 583 } else if (extra_bits >= 2) { 584 int i; 585 VARDECL(int, up_iy); 586 VARDECL(int, refine); 587 int up, use_entropy; 588 ALLOC(up_iy, N, int); 589 ALLOC(refine, N, int); 590 yy_shift = IMAX(0, extra_bits-7); 591 up = (1<<extra_bits)-1; 592 yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); 593 collapse_mask = extract_collapse_mask(up_iy, N, B); 594 encode_pulses(iy, N, K, enc); 595 use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; 596 for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); 597 if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); 598 if (resynth) 599 normalise_residual(up_iy, X, N, yy, gain, yy_shift); 600 } else 601 #endif 602 { 603 yy = op_pvq_search(X, iy, K, N, arch); 604 collapse_mask = extract_collapse_mask(iy, N, B); 605 encode_pulses(iy, N, K, enc); 606 if (resynth) 607 normalise_residual(iy, X, N, yy, gain, 0); 608 } 609 610 if (resynth) 611 exp_rotation(X, N, -1, B, K, spread); 612 613 RESTORE_STACK; 614 return collapse_mask; 615 } 616 617 /** Decode pulse vector and combine the result with the pitch vector to produce 618 the final normalised signal in the current band. */ 619 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 620 ec_dec *dec, opus_val32 gain 621 ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits)) 622 { 623 opus_val32 Ryy; 624 unsigned collapse_mask; 625 VARDECL(int, iy); 626 int yy_shift=0; 627 SAVE_STACK; 628 629 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 630 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 631 ALLOC(iy, N, int); 632 Ryy = decode_pulses(iy, N, K, dec); 633 #ifdef ENABLE_QEXT 634 if (N==2 && extra_bits >= 2) { 635 int up; 636 int refine; 637 yy_shift = IMAX(0, extra_bits-7); 638 up = (1<<extra_bits)-1; 639 refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2; 640 iy[0] *= up; 641 iy[1] *= up; 642 if (iy[1] == 0) { 643 iy[1] = (iy[0] > 0) ? -refine : refine; 644 iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine; 645 } else if (iy[1] > 0) { 646 iy[0] += refine; 647 iy[1] -= refine*(iy[0]>0?1:-1); 648 } else { 649 iy[0] -= refine; 650 iy[1] -= refine*(iy[0]>0?1:-1); 651 } 652 #ifdef FIXED_POINT 653 Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift; 654 #else 655 Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1]; 656 #endif 657 } else if (extra_bits >= 2) { 658 int i; 659 opus_val64 yy64; 660 VARDECL(int, refine); 661 int up, use_entropy; 662 int sign=0; 663 ALLOC(refine, N, int); 664 yy_shift = IMAX(0, extra_bits-7); 665 up = (1<<extra_bits)-1; 666 use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1; 667 for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy); 668 if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1); 669 else sign = iy[N-1] < 0; 670 for (i=0;i<N-1;i++) { 671 iy[i] = iy[i]*up + refine[i]; 672 } 673 iy[N-1] = up*K; 674 for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]); 675 if (sign) iy[N-1] = -iy[N-1]; 676 yy64 = 0; 677 for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i]; 678 #ifdef FIXED_POINT 679 Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift; 680 #else 681 Ryy = yy64; 682 #endif 683 } 684 #endif 685 normalise_residual(iy, X, N, Ryy, gain, yy_shift); 686 exp_rotation(X, N, -1, B, K, spread); 687 collapse_mask = extract_collapse_mask(iy, N, B); 688 RESTORE_STACK; 689 return collapse_mask; 690 } 691 692 #ifndef OVERRIDE_renormalise_vector 693 void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch) 694 { 695 int i; 696 #ifdef FIXED_POINT 697 int k; 698 #endif 699 opus_val32 E; 700 opus_val16 g; 701 opus_val32 t; 702 celt_norm *xptr; 703 norm_scaledown(X, N, NORM_SHIFT-14); 704 E = EPSILON + celt_inner_prod_norm(X, X, N, arch); 705 #ifdef FIXED_POINT 706 k = celt_ilog2(E)>>1; 707 #endif 708 t = VSHR32(E, 2*(k-7)); 709 g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); 710 711 xptr = X; 712 for (i=0;i<N;i++) 713 { 714 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14)); 715 xptr++; 716 } 717 norm_scaleup(X, N, NORM_SHIFT-14); 718 /*return celt_sqrt(E);*/ 719 } 720 #endif /* OVERRIDE_renormalise_vector */ 721 722 opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) 723 { 724 int i; 725 int itheta; 726 opus_val32 mid, side; 727 opus_val32 Emid, Eside; 728 729 Emid = Eside = 0; 730 if (stereo) 731 { 732 for (i=0;i<N;i++) 733 { 734 celt_norm m, s; 735 m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13); 736 s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13); 737 Emid = MAC16_16(Emid, m, m); 738 Eside = MAC16_16(Eside, s, s); 739 } 740 } else { 741 Emid += celt_inner_prod_norm_shift(X, X, N, arch); 742 Eside += celt_inner_prod_norm_shift(Y, Y, N, arch); 743 } 744 mid = celt_sqrt32(Emid); 745 side = celt_sqrt32(Eside); 746 #if defined(FIXED_POINT) 747 itheta = celt_atan2p_norm(side, mid); 748 #else 749 itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid)); 750 #endif 751 752 return itheta; 753 } 754 755 #ifdef ENABLE_QEXT 756 757 static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) { 758 int i; 759 opus_val32 sum=0; 760 opus_val32 mag; 761 #ifdef FIXED_POINT 762 int sum_shift; 763 int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0); 764 #endif 765 for (i=0;i<N;i++) { 766 X[i] = (1+2*iy[i])-K; 767 } 768 X[face] = sign ? -K : K; 769 for (i=0;i<N;i++) { 770 sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift); 771 } 772 #ifdef FIXED_POINT 773 sum_shift = (29-celt_ilog2(sum))>>1; 774 mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1)); 775 for (i=0;i<N;i++) { 776 X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT); 777 } 778 #else 779 mag = 1.f/sqrt(sum); 780 for (i=0;i<N;i++) { 781 X[i] *= mag*gain; 782 } 783 #endif 784 } 785 786 unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) { 787 int i; 788 int face=0; 789 int K; 790 VARDECL(int, iy); 791 celt_norm faceval=-1; 792 opus_val32 norm; 793 int sign; 794 SAVE_STACK; 795 ALLOC(iy, N, int); 796 K = 1<<res; 797 /* Using odd K on transients to avoid adding pre-echo. */ 798 if (B!=1) K=IMAX(1, K-1); 799 if (K==1) { 800 if (resynth) OPUS_CLEAR(X, N); 801 RESTORE_STACK; 802 return 0; 803 } 804 for (i=0;i<N;i++) { 805 if (ABS32(X[i]) > faceval) { 806 faceval = ABS32(X[i]); 807 face = i; 808 } 809 } 810 sign = X[face]<0; 811 ec_enc_uint(enc, face, N); 812 ec_enc_bits(enc, sign, 1); 813 #ifdef FIXED_POINT 814 if (faceval != 0) { 815 int face_shift = 30-celt_ilog2(faceval); 816 norm = celt_rcp_norm32(SHL32(faceval, face_shift)); 817 norm = MULT16_32_Q15(K, norm); 818 for (i=0;i<N;i++) { 819 /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */ 820 iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15); 821 } 822 } else { 823 OPUS_CLEAR(iy, N); 824 } 825 #else 826 norm = .5f*K/(faceval+EPSILON); 827 for (i=0;i<N;i++) { 828 iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm)); 829 } 830 #endif 831 for (i=0;i<N;i++) { 832 if (i != face) ec_enc_bits(enc, iy[i], res); 833 } 834 if (resynth) { 835 cubic_synthesis(X, iy, N, K, face, sign, gain); 836 } 837 RESTORE_STACK; 838 return (1<<B)-1; 839 } 840 841 unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) { 842 int i; 843 int face; 844 int sign; 845 int K; 846 VARDECL(int, iy); 847 SAVE_STACK; 848 ALLOC(iy, N, int); 849 K = 1<<res; 850 /* Using odd K on transients to avoid adding pre-echo. */ 851 if (B!=1) K=IMAX(1, K-1); 852 if (K==1) { 853 OPUS_CLEAR(X, N); 854 RESTORE_STACK; 855 return 0; 856 } 857 face = ec_dec_uint(dec, N); 858 sign = ec_dec_bits(dec, 1); 859 for (i=0;i<N;i++) { 860 if (i != face) iy[i] = ec_dec_bits(dec, res); 861 } 862 iy[face]=0; 863 cubic_synthesis(X, iy, N, K, face, sign, gain); 864 RESTORE_STACK; 865 return (1<<B)-1; 866 } 867 #endif