celt_encoder.c (106982B)
1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2010 Xiph.Org Foundation 3 Copyright (c) 2008 Gregory Maxwell 4 Written by Jean-Marc Valin and Gregory Maxwell */ 5 /* 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 10 - Redistributions of source code must retain the above copyright 11 notice, this list of conditions and the following disclaimer. 12 13 - Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #ifdef HAVE_CONFIG_H 31 #include "config.h" 32 #endif 33 34 #define CELT_ENCODER_C 35 36 #include "cpu_support.h" 37 #include "os_support.h" 38 #include "mdct.h" 39 #include <math.h> 40 #include "celt.h" 41 #include "pitch.h" 42 #include "bands.h" 43 #include "modes.h" 44 #include "entcode.h" 45 #include "quant_bands.h" 46 #include "rate.h" 47 #include "stack_alloc.h" 48 #include "mathops.h" 49 #include "float_cast.h" 50 #include <stdarg.h> 51 #include "celt_lpc.h" 52 #include "vq.h" 53 54 55 #ifndef M_PI 56 #define M_PI 3.141592653 57 #endif 58 59 60 /** Encoder state 61 @brief Encoder state 62 */ 63 struct OpusCustomEncoder { 64 const OpusCustomMode *mode; /**< Mode used by the encoder */ 65 int channels; 66 int stream_channels; 67 68 int force_intra; 69 int clip; 70 int disable_pf; 71 int complexity; 72 int upsample; 73 int start, end; 74 75 opus_int32 bitrate; 76 int vbr; 77 int signalling; 78 int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ 79 int loss_rate; 80 int lsb_depth; 81 int lfe; 82 int disable_inv; 83 int arch; 84 #ifdef ENABLE_QEXT 85 int enable_qext; 86 int qext_scale; 87 #endif 88 89 /* Everything beyond this point gets cleared on a reset */ 90 #define ENCODER_RESET_START rng 91 92 opus_uint32 rng; 93 int spread_decision; 94 opus_val32 delayedIntra; 95 int tonal_average; 96 int lastCodedBands; 97 int hf_average; 98 int tapset_decision; 99 100 int prefilter_period; 101 opus_val16 prefilter_gain; 102 int prefilter_tapset; 103 #ifdef RESYNTH 104 int prefilter_period_old; 105 opus_val16 prefilter_gain_old; 106 int prefilter_tapset_old; 107 #endif 108 int consec_transient; 109 AnalysisInfo analysis; 110 SILKInfo silk_info; 111 112 opus_val32 preemph_memE[2]; 113 opus_val32 preemph_memD[2]; 114 115 /* VBR-related parameters */ 116 opus_int32 vbr_reservoir; 117 opus_int32 vbr_drift; 118 opus_int32 vbr_offset; 119 opus_int32 vbr_count; 120 opus_val32 overlap_max; 121 opus_val16 stereo_saving; 122 int intensity; 123 celt_glog *energy_mask; 124 celt_glog spec_avg; 125 126 #ifdef RESYNTH 127 #ifdef ENABLE_QEXT 128 /* +MAX_PERIOD/2 to make space for overlap */ 129 celt_sig syn_mem[2][2*DEC_PITCH_BUF_SIZE+MAX_PERIOD]; 130 #else 131 /* +MAX_PERIOD/2 to make space for overlap */ 132 celt_sig syn_mem[2][DEC_PITCH_BUF_SIZE+MAX_PERIOD/2]; 133 #endif 134 #endif 135 136 celt_sig in_mem[1]; /* Size = channels*mode->overlap */ 137 /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */ 138 /* celt_glog oldBandE[], Size = channels*mode->nbEBands */ 139 /* celt_glog oldLogE[], Size = channels*mode->nbEBands */ 140 /* celt_glog oldLogE2[], Size = channels*mode->nbEBands */ 141 /* celt_glog energyError[], Size = channels*mode->nbEBands */ 142 }; 143 144 int celt_encoder_get_size(int channels) 145 { 146 #ifdef ENABLE_QEXT 147 CELTMode *mode = opus_custom_mode_create(96000, 1920, NULL); 148 #else 149 CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); 150 #endif 151 return opus_custom_encoder_get_size(mode, channels); 152 } 153 154 OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) 155 { 156 int extra=0; 157 int size; 158 #ifdef ENABLE_QEXT 159 int qext_scale; 160 extra = channels*NB_QEXT_BANDS*sizeof(celt_glog); 161 if (mode->Fs == 96000 && (mode->shortMdctSize==240 || mode->shortMdctSize==180)) { 162 qext_scale = 2; 163 } else qext_scale = 1; 164 #endif 165 size = sizeof(struct CELTEncoder) 166 + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ 167 + channels*QEXT_SCALE(COMBFILTER_MAXPERIOD)*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ 168 + 4*channels*mode->nbEBands*sizeof(celt_glog) /* celt_glog oldBandE[channels*mode->nbEBands]; */ 169 /* celt_glog oldLogE[channels*mode->nbEBands]; */ 170 /* celt_glog oldLogE2[channels*mode->nbEBands]; */ 171 /* celt_glog energyError[channels*mode->nbEBands]; */ 172 + extra; 173 return size; 174 } 175 176 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 177 CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) 178 { 179 int ret; 180 CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); 181 /* init will handle the NULL case */ 182 ret = opus_custom_encoder_init(st, mode, channels); 183 if (ret != OPUS_OK) 184 { 185 opus_custom_encoder_destroy(st); 186 st = NULL; 187 } 188 if (error) 189 *error = ret; 190 return st; 191 } 192 #endif /* CUSTOM_MODES */ 193 194 static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, 195 int channels, int arch) 196 { 197 if (channels < 0 || channels > 2) 198 return OPUS_BAD_ARG; 199 200 if (st==NULL || mode==NULL) 201 return OPUS_ALLOC_FAIL; 202 203 OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); 204 205 st->mode = mode; 206 st->stream_channels = st->channels = channels; 207 208 st->upsample = 1; 209 st->start = 0; 210 st->end = st->mode->effEBands; 211 st->signalling = 1; 212 st->arch = arch; 213 214 st->constrained_vbr = 1; 215 st->clip = 1; 216 217 st->bitrate = OPUS_BITRATE_MAX; 218 st->vbr = 0; 219 st->force_intra = 0; 220 st->complexity = 5; 221 st->lsb_depth=24; 222 223 #ifdef ENABLE_QEXT 224 if (st->mode->Fs == 96000 && (mode->shortMdctSize==240 || mode->shortMdctSize==180)) st->qext_scale = 2; 225 else st->qext_scale = 1; 226 #endif 227 228 opus_custom_encoder_ctl(st, OPUS_RESET_STATE); 229 230 return OPUS_OK; 231 } 232 233 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 234 int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) 235 { 236 return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); 237 } 238 #endif 239 240 int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, 241 int arch) 242 { 243 int ret; 244 #ifdef ENABLE_QEXT 245 if (sampling_rate==96000) { 246 st->upsample = 1; 247 return opus_custom_encoder_init_arch(st, 248 opus_custom_mode_create(96000, 1920, NULL), channels, arch); 249 } 250 #endif 251 ret = opus_custom_encoder_init_arch(st, 252 opus_custom_mode_create(48000, 960, NULL), channels, arch); 253 if (ret != OPUS_OK) 254 return ret; 255 st->upsample = resampling_factor(sampling_rate); 256 return OPUS_OK; 257 } 258 259 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 260 void opus_custom_encoder_destroy(CELTEncoder *st) 261 { 262 opus_free(st); 263 } 264 #endif /* CUSTOM_MODES */ 265 266 267 static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, 268 opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients, 269 int *weak_transient, opus_val16 tone_freq, opus_val32 toneishness) 270 { 271 int i; 272 VARDECL(opus_val16, tmp); 273 opus_val32 mem0,mem1; 274 int is_transient = 0; 275 opus_int32 mask_metric = 0; 276 int c; 277 opus_val16 tf_max; 278 int len2; 279 /* Forward masking: 6.7 dB/ms. */ 280 #ifdef FIXED_POINT 281 int forward_shift = 4; 282 #else 283 opus_val16 forward_decay = QCONST16(.0625f,15); 284 #endif 285 /* Table of 6*64/x, trained on real data to minimize the average error */ 286 static const unsigned char inv_table[128] = { 287 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, 288 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, 289 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, 290 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 291 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 292 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 293 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 294 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 295 }; 296 SAVE_STACK; 297 #ifdef FIXED_POINT 298 int in_shift = IMAX(0, celt_ilog2(1+celt_maxabs32(in, C*len))-14); 299 #endif 300 ALLOC(tmp, len, opus_val16); 301 302 *weak_transient = 0; 303 /* For lower bitrates, let's be more conservative and have a forward masking 304 decay of 3.3 dB/ms. This avoids having to code transients at very low 305 bitrate (mostly for hybrid), which can result in unstable energy and/or 306 partial collapse. */ 307 if (allow_weak_transients) 308 { 309 #ifdef FIXED_POINT 310 forward_shift = 5; 311 #else 312 forward_decay = QCONST16(.03125f,15); 313 #endif 314 } 315 len2=len/2; 316 for (c=0;c<C;c++) 317 { 318 opus_val32 mean; 319 opus_int32 unmask=0; 320 opus_val32 norm; 321 opus_val16 maxE; 322 mem0=0; 323 mem1=0; 324 /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ 325 for (i=0;i<len;i++) 326 { 327 #ifndef FIXED_POINT 328 float mem00; 329 #endif 330 opus_val32 x,y; 331 x = SHR32(in[i+c*len],in_shift); 332 y = ADD32(mem0, x); 333 #ifdef FIXED_POINT 334 mem0 = mem1 + y - SHL32(x,1); 335 mem1 = x - SHR32(y,1); 336 #else 337 /* Original code: 338 mem0 = mem1 + y - 2*x; 339 mem1 = x - .5f*y; 340 Modified code to shorten dependency chains: */ 341 mem00=mem0; 342 mem0 = mem0 - x + .5f*mem1; 343 mem1 = x - mem00; 344 #endif 345 tmp[i] = SROUND16(y, 2); 346 /*printf("%f ", tmp[i]);*/ 347 } 348 /*printf("\n");*/ 349 /* First few samples are bad because we don't propagate the memory */ 350 OPUS_CLEAR(tmp, 12); 351 352 #ifdef FIXED_POINT 353 /* Normalize tmp to max range */ 354 { 355 int shift=0; 356 shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len))); 357 if (shift!=0) 358 { 359 for (i=0;i<len;i++) 360 tmp[i] = SHL16(tmp[i], shift); 361 } 362 } 363 #endif 364 365 mean=0; 366 mem0=0; 367 /* Grouping by two to reduce complexity */ 368 /* Forward pass to compute the post-echo threshold*/ 369 for (i=0;i<len2;i++) 370 { 371 opus_val32 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),4); 372 mean += PSHR32(x2, 12); 373 #ifdef FIXED_POINT 374 /* FIXME: Use PSHR16() instead */ 375 mem0 = mem0 + PSHR32(x2-mem0,forward_shift); 376 tmp[i] = PSHR32(mem0, 12); 377 #else 378 mem0 = x2 + (1.f-forward_decay)*mem0; 379 tmp[i] = forward_decay*mem0; 380 #endif 381 } 382 383 mem0=0; 384 maxE=0; 385 /* Backward pass to compute the pre-echo threshold */ 386 for (i=len2-1;i>=0;i--) 387 { 388 /* Backward masking: 13.9 dB/ms. */ 389 #ifdef FIXED_POINT 390 /* FIXME: Use PSHR16() instead */ 391 mem0 = mem0 + PSHR32(SHL32(tmp[i],4)-mem0,3); 392 tmp[i] = PSHR32(mem0, 4); 393 maxE = MAX16(maxE, tmp[i]); 394 #else 395 mem0 = tmp[i] + 0.875f*mem0; 396 tmp[i] = 0.125f*mem0; 397 maxE = MAX16(maxE, 0.125f*mem0); 398 #endif 399 } 400 /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/ 401 402 /* Compute the ratio of the "frame energy" over the harmonic mean of the energy. 403 This essentially corresponds to a bitrate-normalized temporal noise-to-mask 404 ratio */ 405 406 /* As a compromise with the old transient detector, frame energy is the 407 geometric mean of the energy and half the max */ 408 #ifdef FIXED_POINT 409 /* Costs two sqrt() to avoid overflows */ 410 mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1))); 411 #else 412 mean = celt_sqrt(mean * maxE*.5*len2); 413 #endif 414 /* Inverse of the mean energy in Q15+6 */ 415 norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1)); 416 /* Compute harmonic mean discarding the unreliable boundaries 417 The data is smooth, so we only take 1/4th of the samples */ 418 unmask=0; 419 /* We should never see NaNs here. If we find any, then something really bad happened and we better abort 420 before it does any damage later on. If these asserts are disabled (no hardening), then the table 421 lookup a few lines below (id = ...) is likely to crash dur to an out-of-bounds read. DO NOT FIX 422 that crash on NaN since it could result in a worse issue later on. */ 423 celt_assert(!celt_isnan(tmp[0])); 424 celt_assert(!celt_isnan(norm)); 425 for (i=12;i<len2-5;i+=4) 426 { 427 int id; 428 #ifdef FIXED_POINT 429 id = MAX32(0,MIN32(127,MULT16_32_Q15(tmp[i]+EPSILON,norm))); /* Do not round to nearest */ 430 #else 431 id = (int)MAX32(0,MIN32(127,floor(64*norm*(tmp[i]+EPSILON)))); /* Do not round to nearest */ 432 #endif 433 unmask += inv_table[id]; 434 } 435 /*printf("%d\n", unmask);*/ 436 /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */ 437 unmask = 64*unmask*4/(6*(len2-17)); 438 if (unmask>mask_metric) 439 { 440 *tf_chan = c; 441 mask_metric = unmask; 442 } 443 } 444 is_transient = mask_metric>200; 445 /* Prevent the transient detector from confusing the partial cycle of a 446 very low frequency tone with a transient. */ 447 if (toneishness > QCONST32(.98f, 29) && tone_freq < QCONST16(0.026f, 13)) 448 { 449 is_transient = 0; 450 mask_metric = 0; 451 } 452 /* For low bitrates, define "weak transients" that need to be 453 handled differently to avoid partial collapse. */ 454 if (allow_weak_transients && is_transient && mask_metric<600) { 455 is_transient = 0; 456 *weak_transient = 1; 457 } 458 /* Arbitrary metric for VBR boost */ 459 tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); 460 /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ 461 *tf_estimate = celt_sqrt(MAX32(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); 462 /*printf("%d %f\n", tf_max, mask_metric);*/ 463 RESTORE_STACK; 464 #ifdef FUZZING 465 is_transient = rand()&0x1; 466 #endif 467 /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ 468 return is_transient; 469 } 470 471 /* Looks for sudden increases of energy to decide whether we need to patch 472 the transient decision */ 473 static int patch_transient_decision(celt_glog *newE, celt_glog *oldE, int nbEBands, 474 int start, int end, int C) 475 { 476 int i, c; 477 opus_val32 mean_diff=0; 478 celt_glog spread_old[26]; 479 /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to 480 avoid false detection caused by irrelevant bands */ 481 if (C==1) 482 { 483 spread_old[start] = oldE[start]; 484 for (i=start+1;i<end;i++) 485 spread_old[i] = MAXG(spread_old[i-1]-GCONST(1.0f), oldE[i]); 486 } else { 487 spread_old[start] = MAXG(oldE[start],oldE[start+nbEBands]); 488 for (i=start+1;i<end;i++) 489 spread_old[i] = MAXG(spread_old[i-1]-GCONST(1.0f), 490 MAXG(oldE[i],oldE[i+nbEBands])); 491 } 492 for (i=end-2;i>=start;i--) 493 spread_old[i] = MAXG(spread_old[i], spread_old[i+1]-GCONST(1.0f)); 494 /* Compute mean increase */ 495 c=0; do { 496 for (i=IMAX(2,start);i<end-1;i++) 497 { 498 opus_val16 x1, x2; 499 x1 = MAXG(0, newE[i + c*nbEBands]); 500 x2 = MAXG(0, spread_old[i]); 501 mean_diff = ADD32(mean_diff, MAXG(0, SUB32(x1, x2))); 502 } 503 } while (++c<C); 504 mean_diff = DIV32(mean_diff, C*(end-1-IMAX(2,start))); 505 /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ 506 return mean_diff > GCONST(1.f); 507 } 508 509 /** Apply window and compute the MDCT for all sub-frames and 510 all channels in a frame */ 511 static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, 512 celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample, 513 int arch) 514 { 515 const int overlap = mode->overlap; 516 int N; 517 int B; 518 int shift; 519 int i, b, c; 520 if (shortBlocks) 521 { 522 B = shortBlocks; 523 N = mode->shortMdctSize; 524 shift = mode->maxLM; 525 } else { 526 B = 1; 527 N = mode->shortMdctSize<<LM; 528 shift = mode->maxLM-LM; 529 } 530 c=0; do { 531 for (b=0;b<B;b++) 532 { 533 /* Interleaving the sub-frames while doing the MDCTs */ 534 clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, 535 &out[b+c*N*B], mode->window, overlap, shift, B, 536 arch); 537 } 538 } while (++c<CC); 539 if (CC==2&&C==1) 540 { 541 for (i=0;i<B*N;i++) 542 out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i])); 543 } 544 if (upsample != 1) 545 { 546 c=0; do 547 { 548 int bound = B*N/upsample; 549 for (i=0;i<bound;i++) 550 out[c*B*N+i] *= upsample; 551 OPUS_CLEAR(&out[c*B*N+bound], B*N-bound); 552 } while (++c<C); 553 } 554 } 555 556 557 void celt_preemphasis(const opus_res * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, 558 int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) 559 { 560 int i; 561 opus_val16 coef0; 562 celt_sig m; 563 int Nu; 564 565 coef0 = coef[0]; 566 m = *mem; 567 568 /* Fast path for the normal 48kHz case and no clipping */ 569 if (coef[1] == 0 && upsample == 1 && !clip) 570 { 571 for (i=0;i<N;i++) 572 { 573 celt_sig x; 574 x = RES2SIG(pcmp[CC*i]); 575 /* Apply pre-emphasis */ 576 inp[i] = x - m; 577 m = MULT16_32_Q15(coef0, x); 578 } 579 *mem = m; 580 return; 581 } 582 583 Nu = N/upsample; 584 if (upsample!=1) 585 { 586 OPUS_CLEAR(inp, N); 587 } 588 for (i=0;i<Nu;i++) 589 inp[i*upsample] = RES2SIG(pcmp[CC*i]); 590 591 #ifndef FIXED_POINT 592 if (clip) 593 { 594 /* Clip input to avoid encoding non-portable files */ 595 for (i=0;i<Nu;i++) 596 inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample])); 597 } 598 #elif defined(ENABLE_RES24) 599 if (clip) 600 { 601 /* Clip input to avoid encoding non-portable files */ 602 for (i=0;i<Nu;i++) 603 inp[i*upsample] = MAX32(-(65536<<SIG_SHIFT), MIN32(65536<<SIG_SHIFT,inp[i*upsample])); 604 } 605 #else 606 (void)clip; /* Avoids a warning about clip being unused. */ 607 #endif 608 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) || defined(ENABLE_QEXT) 609 if (coef[1] != 0) 610 { 611 opus_val16 coef1 = coef[1]; 612 #if defined(FIXED_POINT) && defined(ENABLE_QEXT) 613 /* If we need the extra precision, we use the fact that coef[3] is exact to do a Newton-Raphson 614 iteration and get us more precision on coef[2]. */ 615 opus_val32 coef2_q30 = SHL32(coef[2], 18) + PSHR32(MULT16_16(QCONST32(1.f, 25) - MULT16_16(coef[3], coef[2]), coef[2]), 7); 616 celt_assert(SIG_SHIFT == 12); 617 #else 618 opus_val16 coef2 = coef[2]; 619 #endif 620 for (i=0;i<N;i++) 621 { 622 celt_sig x, tmp; 623 x = inp[i]; 624 /* Apply pre-emphasis */ 625 #if defined(FIXED_POINT) && defined(ENABLE_QEXT) 626 tmp = SHL32(MULT32_32_Q31(coef2_q30, x), 1); 627 #else 628 tmp = SHL32(MULT16_32_Q15(coef2, x), 15-SIG_SHIFT); 629 #endif 630 inp[i] = tmp + m; 631 m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp); 632 } 633 } else 634 #endif 635 { 636 for (i=0;i<N;i++) 637 { 638 celt_sig x; 639 x = inp[i]; 640 /* Apply pre-emphasis */ 641 inp[i] = x - m; 642 m = MULT16_32_Q15(coef0, x); 643 } 644 } 645 *mem = m; 646 } 647 648 649 650 static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias) 651 { 652 int i; 653 opus_val32 L1; 654 L1 = 0; 655 for (i=0;i<N;i++) 656 L1 += EXTEND32(ABS16(SHR32(tmp[i], NORM_SHIFT-14))); 657 /* When in doubt, prefer good freq resolution */ 658 L1 = MAC16_32_Q15(L1, LM*bias, L1); 659 return L1; 660 661 } 662 663 static int tf_analysis(const CELTMode *m, int len, int isTransient, 664 int *tf_res, int lambda, celt_norm *X, int N0, int LM, 665 opus_val16 tf_estimate, int tf_chan, int *importance) 666 { 667 int i; 668 VARDECL(int, metric); 669 int cost0; 670 int cost1; 671 VARDECL(int, path0); 672 VARDECL(int, path1); 673 VARDECL(celt_norm, tmp); 674 VARDECL(celt_norm, tmp_1); 675 int sel; 676 int selcost[2]; 677 int tf_select=0; 678 opus_val16 bias; 679 680 SAVE_STACK; 681 bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate)); 682 /*printf("%f ", bias);*/ 683 684 ALLOC(metric, len, int); 685 ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); 686 ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); 687 ALLOC(path0, len, int); 688 ALLOC(path1, len, int); 689 690 for (i=0;i<len;i++) 691 { 692 int k, N; 693 int narrow; 694 opus_val32 L1, best_L1; 695 int best_level=0; 696 N = (m->eBands[i+1]-m->eBands[i])<<LM; 697 /* band is too narrow to be split down to LM=-1 */ 698 narrow = (m->eBands[i+1]-m->eBands[i])==1; 699 OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N); 700 /* Just add the right channel if we're in stereo */ 701 /*if (C==2) 702 for (j=0;j<N;j++) 703 tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/ 704 L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias); 705 best_L1 = L1; 706 /* Check the -1 case for transients */ 707 if (isTransient && !narrow) 708 { 709 OPUS_COPY(tmp_1, tmp, N); 710 haar1(tmp_1, N>>LM, 1<<LM); 711 L1 = l1_metric(tmp_1, N, LM+1, bias); 712 if (L1<best_L1) 713 { 714 best_L1 = L1; 715 best_level = -1; 716 } 717 } 718 /*printf ("%f ", L1);*/ 719 for (k=0;k<LM+!(isTransient||narrow);k++) 720 { 721 int B; 722 723 if (isTransient) 724 B = (LM-k-1); 725 else 726 B = k+1; 727 728 haar1(tmp, N>>k, 1<<k); 729 730 L1 = l1_metric(tmp, N, B, bias); 731 732 if (L1 < best_L1) 733 { 734 best_L1 = L1; 735 best_level = k+1; 736 } 737 } 738 /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ 739 /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */ 740 if (isTransient) 741 metric[i] = 2*best_level; 742 else 743 metric[i] = -2*best_level; 744 /* For bands that can't be split to -1, set the metric to the half-way point to avoid 745 biasing the decision */ 746 if (narrow && (metric[i]==0 || metric[i]==-2*LM)) 747 metric[i]-=1; 748 /*printf("%d ", metric[i]/2 + (!isTransient)*LM);*/ 749 } 750 /*printf("\n");*/ 751 /* Search for the optimal tf resolution, including tf_select */ 752 tf_select = 0; 753 for (sel=0;sel<2;sel++) 754 { 755 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+0]); 756 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*sel+1]) + (isTransient ? 0 : lambda); 757 for (i=1;i<len;i++) 758 { 759 int curr0, curr1; 760 curr0 = IMIN(cost0, cost1 + lambda); 761 curr1 = IMIN(cost0 + lambda, cost1); 762 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); 763 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); 764 } 765 cost0 = IMIN(cost0, cost1); 766 selcost[sel]=cost0; 767 } 768 /* For now, we're conservative and only allow tf_select=1 for transients. 769 * If tests confirm it's useful for non-transients, we could allow it. */ 770 if (selcost[1]<selcost[0] && isTransient) 771 tf_select=1; 772 cost0 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); 773 cost1 = importance[0]*abs(metric[0]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]) + (isTransient ? 0 : lambda); 774 /* Viterbi forward pass */ 775 for (i=1;i<len;i++) 776 { 777 int curr0, curr1; 778 int from0, from1; 779 780 from0 = cost0; 781 from1 = cost1 + lambda; 782 if (from0 < from1) 783 { 784 curr0 = from0; 785 path0[i]= 0; 786 } else { 787 curr0 = from1; 788 path0[i]= 1; 789 } 790 791 from0 = cost0 + lambda; 792 from1 = cost1; 793 if (from0 < from1) 794 { 795 curr1 = from0; 796 path1[i]= 0; 797 } else { 798 curr1 = from1; 799 path1[i]= 1; 800 } 801 cost0 = curr0 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); 802 cost1 = curr1 + importance[i]*abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); 803 } 804 tf_res[len-1] = cost0 < cost1 ? 0 : 1; 805 /* Viterbi backward pass to check the decisions */ 806 for (i=len-2;i>=0;i--) 807 { 808 if (tf_res[i+1] == 1) 809 tf_res[i] = path1[i+1]; 810 else 811 tf_res[i] = path0[i+1]; 812 } 813 /*printf("%d %f\n", *tf_sum, tf_estimate);*/ 814 RESTORE_STACK; 815 #ifdef FUZZING 816 tf_select = rand()&0x1; 817 tf_res[0] = rand()&0x1; 818 for (i=1;i<len;i++) 819 tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0); 820 #endif 821 return tf_select; 822 } 823 824 static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) 825 { 826 int curr, i; 827 int tf_select_rsv; 828 int tf_changed; 829 int logp; 830 opus_uint32 budget; 831 opus_uint32 tell; 832 budget = enc->storage*8; 833 tell = ec_tell(enc); 834 logp = isTransient ? 2 : 4; 835 /* Reserve space to code the tf_select decision. */ 836 tf_select_rsv = LM>0 && tell+logp+1 <= budget; 837 budget -= tf_select_rsv; 838 curr = tf_changed = 0; 839 for (i=start;i<end;i++) 840 { 841 if (tell+logp<=budget) 842 { 843 ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp); 844 tell = ec_tell(enc); 845 curr = tf_res[i]; 846 tf_changed |= curr; 847 } 848 else 849 tf_res[i] = curr; 850 logp = isTransient ? 4 : 5; 851 } 852 /* Only code tf_select if it would actually make a difference. */ 853 if (tf_select_rsv && 854 tf_select_table[LM][4*isTransient+0+tf_changed]!= 855 tf_select_table[LM][4*isTransient+2+tf_changed]) 856 ec_enc_bit_logp(enc, tf_select, 1); 857 else 858 tf_select = 0; 859 for (i=start;i<end;i++) 860 tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; 861 /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ 862 } 863 864 865 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, 866 const celt_glog *bandLogE, int end, int LM, int C, int N0, 867 AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, 868 int intensity, celt_glog surround_trim, opus_int32 equiv_rate, int arch) 869 { 870 int i; 871 opus_val32 diff=0; 872 int c; 873 int trim_index; 874 opus_val16 trim = QCONST16(5.f, 8); 875 opus_val16 logXC, logXC2; 876 /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less 877 clear what's best, so we're keeping it as it was before, at least for now. */ 878 if (equiv_rate < 64000) { 879 trim = QCONST16(4.f, 8); 880 } else if (equiv_rate < 80000) { 881 opus_int32 frac = (equiv_rate-64000) >> 10; 882 trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac; 883 } 884 if (C==2) 885 { 886 opus_val16 sum = 0; /* Q10 */ 887 opus_val16 minXC; /* Q10 */ 888 /* Compute inter-channel correlation for low frequencies */ 889 for (i=0;i<8;i++) 890 { 891 opus_val32 partial; 892 partial = celt_inner_prod_norm_shift(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], 893 (m->eBands[i+1]-m->eBands[i])<<LM, arch); 894 sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); 895 } 896 sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); 897 sum = MIN16(QCONST16(1.f, 10), ABS16(sum)); 898 minXC = sum; 899 for (i=8;i<intensity;i++) 900 { 901 opus_val32 partial; 902 partial = celt_inner_prod_norm_shift(&X[m->eBands[i]<<LM], &X[N0+(m->eBands[i]<<LM)], 903 (m->eBands[i+1]-m->eBands[i])<<LM, arch); 904 minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18)))); 905 } 906 minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC)); 907 /*printf ("%f\n", sum);*/ 908 /* mid-side savings estimations based on the LF average*/ 909 logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); 910 /* mid-side savings estimations based on min correlation */ 911 logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC))); 912 #ifdef FIXED_POINT 913 /* Compensate for Q20 vs Q14 input and convert output to Q8 */ 914 logXC = PSHR32(logXC-QCONST16(6.f, 10),10-8); 915 logXC2 = PSHR32(logXC2-QCONST16(6.f, 10),10-8); 916 #endif 917 918 trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC)); 919 *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2)); 920 } 921 922 /* Estimate spectral tilt */ 923 c=0; do { 924 for (i=0;i<end-1;i++) 925 { 926 diff += SHR32(bandLogE[i+c*m->nbEBands], 5)*(opus_int32)(2+2*i-end); 927 } 928 } while (++c<C); 929 diff /= C*(end-1); 930 /*printf("%f\n", diff);*/ 931 trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST32(1.f, DB_SHIFT-5),DB_SHIFT-13)/6 )); 932 trim -= SHR16(surround_trim, DB_SHIFT-8); 933 trim -= 2*SHR16(tf_estimate, 14-8); 934 #ifndef DISABLE_FLOAT_API 935 if (analysis->valid) 936 { 937 trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 938 (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); 939 } 940 #else 941 (void)analysis; 942 #endif 943 944 #ifdef FIXED_POINT 945 trim_index = PSHR32(trim, 8); 946 #else 947 trim_index = (int)floor(.5f+trim); 948 #endif 949 trim_index = IMAX(0, IMIN(10, trim_index)); 950 /*printf("%d\n", trim_index);*/ 951 #ifdef FUZZING 952 trim_index = rand()%11; 953 #endif 954 return trim_index; 955 } 956 957 static int stereo_analysis(const CELTMode *m, const celt_norm *X, 958 int LM, int N0) 959 { 960 int i; 961 int thetas; 962 opus_val32 sumLR = EPSILON, sumMS = EPSILON; 963 964 /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ 965 for (i=0;i<13;i++) 966 { 967 int j; 968 for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) 969 { 970 opus_val32 L, R, M, S; 971 /* We cast to 32-bit first because of the -32768 case */ 972 L = SHR32(X[j], NORM_SHIFT-14); 973 R = SHR32(X[N0+j], NORM_SHIFT-14); 974 M = ADD32(L, R); 975 S = SUB32(L, R); 976 sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R))); 977 sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S))); 978 } 979 } 980 sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS); 981 thetas = 13; 982 /* We don't need thetas for lower bands with LM<=1 */ 983 if (LM<=1) 984 thetas -= 8; 985 return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) 986 > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); 987 } 988 989 #define MSWAP(a,b) do {celt_glog tmp = a;a=b;b=tmp;} while(0) 990 static celt_glog median_of_5(const celt_glog *x) 991 { 992 celt_glog t0, t1, t2, t3, t4; 993 t2 = x[2]; 994 if (x[0] > x[1]) 995 { 996 t0 = x[1]; 997 t1 = x[0]; 998 } else { 999 t0 = x[0]; 1000 t1 = x[1]; 1001 } 1002 if (x[3] > x[4]) 1003 { 1004 t3 = x[4]; 1005 t4 = x[3]; 1006 } else { 1007 t3 = x[3]; 1008 t4 = x[4]; 1009 } 1010 if (t0 > t3) 1011 { 1012 MSWAP(t0, t3); 1013 MSWAP(t1, t4); 1014 } 1015 if (t2 > t1) 1016 { 1017 if (t1 < t3) 1018 return MING(t2, t3); 1019 else 1020 return MING(t4, t1); 1021 } else { 1022 if (t2 < t3) 1023 return MING(t1, t3); 1024 else 1025 return MING(t2, t4); 1026 } 1027 } 1028 1029 static celt_glog median_of_3(const celt_glog *x) 1030 { 1031 celt_glog t0, t1, t2; 1032 if (x[0] > x[1]) 1033 { 1034 t0 = x[1]; 1035 t1 = x[0]; 1036 } else { 1037 t0 = x[0]; 1038 t1 = x[1]; 1039 } 1040 t2 = x[2]; 1041 if (t1 < t2) 1042 return t1; 1043 else if (t0 < t2) 1044 return t2; 1045 else 1046 return t0; 1047 } 1048 1049 static celt_glog dynalloc_analysis(const celt_glog *bandLogE, const celt_glog *bandLogE2, const celt_glog *oldBandE, 1050 int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, 1051 int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, 1052 int effectiveBytes, opus_int32 *tot_boost_, int lfe, celt_glog *surround_dynalloc, 1053 AnalysisInfo *analysis, int *importance, int *spread_weight, opus_val16 tone_freq, opus_val32 toneishness 1054 ARG_QEXT(int qext_scale)) 1055 { 1056 int i, c; 1057 opus_int32 tot_boost=0; 1058 celt_glog maxDepth; 1059 VARDECL(celt_glog, follower); 1060 VARDECL(celt_glog, noise_floor); 1061 VARDECL(celt_glog, bandLogE3); 1062 SAVE_STACK; 1063 ALLOC(follower, C*nbEBands, celt_glog); 1064 ALLOC(noise_floor, C*nbEBands, celt_glog); 1065 ALLOC(bandLogE3, nbEBands, celt_glog); 1066 OPUS_CLEAR(offsets, nbEBands); 1067 /* Dynamic allocation code */ 1068 maxDepth=-GCONST(31.9f); 1069 for (i=0;i<end;i++) 1070 { 1071 /* Noise floor must take into account eMeans, the depth, the width of the bands 1072 and the preemphasis filter (approx. square of bark band ID) */ 1073 noise_floor[i] = GCONST(0.0625f)*logN[i] 1074 +GCONST(.5f)+SHL32(9-lsb_depth,DB_SHIFT)-SHL32(eMeans[i],DB_SHIFT-4) 1075 +GCONST(.0062f)*(i+5)*(i+5); 1076 } 1077 c=0;do 1078 { 1079 for (i=0;i<end;i++) 1080 maxDepth = MAXG(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); 1081 } while (++c<C); 1082 { 1083 /* Compute a really simple masking model to avoid taking into account completely masked 1084 bands when computing the spreading decision. */ 1085 VARDECL(celt_glog, mask); 1086 VARDECL(celt_glog, sig); 1087 ALLOC(mask, nbEBands, celt_glog); 1088 ALLOC(sig, nbEBands, celt_glog); 1089 for (i=0;i<end;i++) 1090 mask[i] = bandLogE[i]-noise_floor[i]; 1091 if (C==2) 1092 { 1093 for (i=0;i<end;i++) 1094 mask[i] = MAXG(mask[i], bandLogE[nbEBands+i]-noise_floor[i]); 1095 } 1096 OPUS_COPY(sig, mask, end); 1097 for (i=1;i<end;i++) 1098 mask[i] = MAXG(mask[i], mask[i-1] - GCONST(2.f)); 1099 for (i=end-2;i>=0;i--) 1100 mask[i] = MAXG(mask[i], mask[i+1] - GCONST(3.f)); 1101 for (i=0;i<end;i++) 1102 { 1103 /* Compute SMR: Mask is never more than 72 dB below the peak and never below the noise floor.*/ 1104 celt_glog smr = sig[i]-MAXG(MAXG(0, maxDepth-GCONST(12.f)), mask[i]); 1105 /* Clamp SMR to make sure we're not shifting by something negative or too large. */ 1106 #ifdef FIXED_POINT 1107 /* FIXME: Use PSHR16() instead */ 1108 int shift = -PSHR32(MAXG(-GCONST(5.f), MING(0, smr)), DB_SHIFT); 1109 #else 1110 int shift = IMIN(5, IMAX(0, -(int)floor(.5f + smr))); 1111 #endif 1112 spread_weight[i] = 32 >> shift; 1113 } 1114 /*for (i=0;i<end;i++) 1115 printf("%d ", spread_weight[i]); 1116 printf("\n");*/ 1117 } 1118 /* Make sure that dynamic allocation can't make us bust the budget. 1119 We enable the feature starting at 24 kb/s for 20-ms frames 1120 and 96 kb/s for 2.5 ms frames. */ 1121 if (effectiveBytes >= (30 + 5*LM) && !lfe) 1122 { 1123 int last=0; 1124 c=0;do 1125 { 1126 celt_glog offset; 1127 celt_glog tmp; 1128 celt_glog *f; 1129 OPUS_COPY(bandLogE3, &bandLogE2[c*nbEBands], end); 1130 if (LM==0) { 1131 /* For 2.5 ms frames, the first 8 bands have just one bin, so the 1132 energy is highly unreliable (high variance). For that reason, 1133 we take the max with the previous energy so that at least 2 bins 1134 are getting used. */ 1135 for (i=0;i<IMIN(8,end);i++) bandLogE3[i] = MAXG(bandLogE2[c*nbEBands+i], oldBandE[c*nbEBands+i]); 1136 } 1137 f = &follower[c*nbEBands]; 1138 f[0] = bandLogE3[0]; 1139 for (i=1;i<end;i++) 1140 { 1141 /* The last band to be at least 3 dB higher than the previous one 1142 is the last we'll consider. Otherwise, we run into problems on 1143 bandlimited signals. */ 1144 if (bandLogE3[i] > bandLogE3[i-1]+GCONST(.5f)) 1145 last=i; 1146 f[i] = MING(f[i-1]+GCONST(1.5f), bandLogE3[i]); 1147 } 1148 for (i=last-1;i>=0;i--) 1149 f[i] = MING(f[i], MING(f[i+1]+GCONST(2.f), bandLogE3[i])); 1150 1151 /* Combine with a median filter to avoid dynalloc triggering unnecessarily. 1152 The "offset" value controls how conservative we are -- a higher offset 1153 reduces the impact of the median filter and makes dynalloc use more bits. */ 1154 offset = GCONST(1.f); 1155 for (i=2;i<end-2;i++) 1156 f[i] = MAXG(f[i], median_of_5(&bandLogE3[i-2])-offset); 1157 tmp = median_of_3(&bandLogE3[0])-offset; 1158 f[0] = MAXG(f[0], tmp); 1159 f[1] = MAXG(f[1], tmp); 1160 tmp = median_of_3(&bandLogE3[end-3])-offset; 1161 f[end-2] = MAXG(f[end-2], tmp); 1162 f[end-1] = MAXG(f[end-1], tmp); 1163 1164 for (i=0;i<end;i++) 1165 f[i] = MAXG(f[i], noise_floor[i]); 1166 } while (++c<C); 1167 if (C==2) 1168 { 1169 for (i=start;i<end;i++) 1170 { 1171 /* Consider 24 dB "cross-talk" */ 1172 follower[nbEBands+i] = MAXG(follower[nbEBands+i], follower[ i]-GCONST(4.f)); 1173 follower[ i] = MAXG(follower[ i], follower[nbEBands+i]-GCONST(4.f)); 1174 follower[i] = HALF32(MAXG(0, bandLogE[i]-follower[i]) + MAXG(0, bandLogE[nbEBands+i]-follower[nbEBands+i])); 1175 } 1176 } else { 1177 for (i=start;i<end;i++) 1178 { 1179 follower[i] = MAXG(0, bandLogE[i]-follower[i]); 1180 } 1181 } 1182 for (i=start;i<end;i++) 1183 follower[i] = MAXG(follower[i], surround_dynalloc[i]); 1184 for (i=start;i<end;i++) 1185 { 1186 #ifdef FIXED_POINT 1187 importance[i] = PSHR32(13*celt_exp2_db(MING(follower[i], GCONST(4.f))), 16); 1188 #else 1189 importance[i] = (int)floor(.5f+13*celt_exp2_db(MING(follower[i], GCONST(4.f)))); 1190 #endif 1191 } 1192 /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ 1193 if ((!vbr || constrained_vbr)&&!isTransient) 1194 { 1195 for (i=start;i<end;i++) 1196 follower[i] = HALF32(follower[i]); 1197 } 1198 for (i=start;i<end;i++) 1199 { 1200 if (i<8) 1201 follower[i] *= 2; 1202 if (i>=12) 1203 follower[i] = HALF32(follower[i]); 1204 } 1205 /* Compensate for Opus' under-allocation on tones. */ 1206 if (toneishness > QCONST32(.98f, 29)) { 1207 #ifdef FIXED_POINT 1208 int freq_bin = PSHR32(QEXT_SCALE((opus_val32)tone_freq)*QCONST16(120/M_PI, 9), 13+9); 1209 #else 1210 int freq_bin = (int)floor(.5 + QEXT_SCALE(tone_freq)*120/M_PI); 1211 #endif 1212 for (i=start;i<end;i++) { 1213 if (freq_bin >= eBands[i] && freq_bin <= eBands[i+1]) follower[i] += GCONST(2.f); 1214 if (freq_bin >= eBands[i]-1 && freq_bin <= eBands[i+1]+1) follower[i] += GCONST(1.f); 1215 if (freq_bin >= eBands[i]-2 && freq_bin <= eBands[i+1]+2) follower[i] += GCONST(1.f); 1216 if (freq_bin >= eBands[i]-3 && freq_bin <= eBands[i+1]+3) follower[i] += GCONST(.5f); 1217 } 1218 if (freq_bin >= eBands[end]) { 1219 follower[end-1] += GCONST(2.f); 1220 follower[end-2] += GCONST(1.f); 1221 } 1222 } 1223 #ifdef DISABLE_FLOAT_API 1224 (void)analysis; 1225 #else 1226 if (analysis->valid) 1227 { 1228 for (i=start;i<IMIN(LEAK_BANDS, end);i++) 1229 follower[i] = follower[i] + GCONST(1.f/64.f)*analysis->leak_boost[i]; 1230 } 1231 #endif 1232 for (i=start;i<end;i++) 1233 { 1234 int width; 1235 int boost; 1236 int boost_bits; 1237 1238 follower[i] = MING(follower[i], GCONST(4)); 1239 1240 follower[i] = SHR32(follower[i], 8); 1241 width = C*(eBands[i+1]-eBands[i])<<LM; 1242 if (width<6) 1243 { 1244 boost = (int)SHR32(follower[i],DB_SHIFT-8); 1245 boost_bits = boost*width<<BITRES; 1246 } else if (width > 48) { 1247 boost = (int)SHR32(follower[i]*8,DB_SHIFT-8); 1248 boost_bits = (boost*width<<BITRES)/8; 1249 } else { 1250 boost = (int)SHR32(follower[i]*width/6,DB_SHIFT-8); 1251 boost_bits = boost*6<<BITRES; 1252 } 1253 /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */ 1254 if ((!vbr || (constrained_vbr&&!isTransient)) 1255 && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3) 1256 { 1257 opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3); 1258 offsets[i] = cap-tot_boost; 1259 tot_boost = cap; 1260 break; 1261 } else { 1262 offsets[i] = boost; 1263 tot_boost += boost_bits; 1264 } 1265 } 1266 } else { 1267 for (i=start;i<end;i++) 1268 importance[i] = 13; 1269 } 1270 *tot_boost_ = tot_boost; 1271 RESTORE_STACK; 1272 return maxDepth; 1273 } 1274 1275 #ifdef FIXED_POINT 1276 void normalize_tone_input(opus_val16 *x, int len) { 1277 opus_val32 ac0=len; 1278 int i; 1279 int shift; 1280 for (i=0;i<len;i++) { 1281 ac0 = ADD32(ac0, SHR32(MULT16_16(x[i], x[i]), 10)); 1282 } 1283 shift = 5 - (28-celt_ilog2(ac0))/2; 1284 if (shift > 0) { 1285 for (i=0;i<len;i++) { 1286 x[i] = PSHR32(x[i], shift); 1287 } 1288 } 1289 } 1290 int acos_approx(opus_val32 x) { 1291 opus_val16 x14; 1292 opus_val32 tmp; 1293 int flip = x<0; 1294 x = abs(x); 1295 x14 = x>>15; 1296 tmp = (762*x14>>14)-3308; 1297 tmp = (tmp*x14>>14)+25726; 1298 tmp = tmp*celt_sqrt(IMAX(0, (1<<30) - (x<<1)))>>16; 1299 if (flip) tmp = 25736 - tmp; 1300 return tmp; 1301 } 1302 #endif 1303 1304 /* Compute the LPC coefficients using a least-squares fit for both forward and backward prediction. */ 1305 static int tone_lpc(const opus_val16 *x, int len, int delay, opus_val32 *lpc) { 1306 int i; 1307 opus_val32 r00=0, r01=0, r11=0, r02=0, r12=0, r22=0; 1308 opus_val32 edges; 1309 opus_val32 num0, num1, den; 1310 celt_assert(len > 2*delay); 1311 /* Compute correlations as if using the forward prediction covariance method. */ 1312 for (i=0;i<len-2*delay;i++) { 1313 r00 += MULT16_16(x[i],x[i]); 1314 r01 += MULT16_16(x[i],x[i+delay]); 1315 r02 += MULT16_16(x[i],x[i+2*delay]); 1316 } 1317 edges = 0; 1318 for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-2*delay],x[len+i-2*delay]) - MULT16_16(x[i],x[i]); 1319 r11 = r00+edges; 1320 edges = 0; 1321 for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-delay],x[len+i-delay]) - MULT16_16(x[i+delay],x[i+delay]); 1322 r22 = r11+edges; 1323 edges = 0; 1324 for (i=0;i<delay;i++) edges += MULT16_16(x[len+i-2*delay],x[len+i-delay]) - MULT16_16(x[i],x[i+delay]); 1325 r12 = r01+edges; 1326 /* Reverse and sum to get the backward contribution. */ 1327 { 1328 opus_val32 R00, R01, R11, R02, R12, R22; 1329 R00 = r00 + r22; 1330 R01 = r01 + r12; 1331 R11 = 2*r11; 1332 R02 = 2*r02; 1333 R12 = r12 + r01; 1334 R22 = r00 + r22; 1335 r00 = R00; 1336 r01 = R01; 1337 r11 = R11; 1338 r02 = R02; 1339 r12 = R12; 1340 r22 = R22; 1341 } 1342 /* Solve A*x=b, where A=[r00, r01; r01, r11] and b=[r02; r12]. */ 1343 den = MULT32_32_Q31(r00,r11) - MULT32_32_Q31(r01,r01); 1344 #ifdef FIXED_POINT 1345 if (den <= SHR32(MULT32_32_Q31(r00,r11), 10)) return 1; 1346 #else 1347 if (den < .001f*MULT32_32_Q31(r00,r11)) return 1; 1348 #endif 1349 num1 = MULT32_32_Q31(r02,r11) - MULT32_32_Q31(r01,r12); 1350 if (num1 >= den) lpc[1] = QCONST32(1.f, 29); 1351 else if (num1 <= -den) lpc[1] = -QCONST32(1.f, 29); 1352 else lpc[1] = frac_div32_q29(num1, den); 1353 num0 = MULT32_32_Q31(r00,r12) - MULT32_32_Q31(r02,r01); 1354 if (HALF32(num0) >= den) lpc[0] = QCONST32(1.999999f, 29); 1355 else if (HALF32(num0) <= -den) lpc[0] = -QCONST32(1.999999f, 29); 1356 else lpc[0] = frac_div32_q29(num0, den); 1357 /*printf("%f %f\n", lpc[0], lpc[1]);*/ 1358 return 0; 1359 } 1360 1361 /* Detects pure of nearly pure tones so we can prevent them from causing problems with the encoder. */ 1362 static opus_val16 tone_detect(const celt_sig *in, int CC, int N, opus_val32 *toneishness, opus_int32 Fs) { 1363 int i; 1364 int delay = 1; 1365 int fail; 1366 opus_val32 lpc[2]; 1367 opus_val16 freq; 1368 VARDECL(opus_val16, x); 1369 SAVE_STACK; 1370 ALLOC(x, N, opus_val16); 1371 /* Shift by SIG_SHIFT+2 (+3 for stereo) to account for HF gain of the preemphasis filter. */ 1372 if (CC==2) { 1373 for (i=0;i<N;i++) x[i] = PSHR32(ADD32(in[i], in[i+N]), SIG_SHIFT+3); 1374 } else { 1375 for (i=0;i<N;i++) x[i] = PSHR32(in[i], SIG_SHIFT+2); 1376 } 1377 #ifdef FIXED_POINT 1378 normalize_tone_input(x, N); 1379 #endif 1380 fail = tone_lpc(x, N, delay, lpc); 1381 /* If our LPC filter resonates too close to DC, retry the analysis with down-sampling. */ 1382 while (delay <= Fs/3000 && (fail || (lpc[0] > QCONST32(1.f, 29) && lpc[1] < 0))) { 1383 delay *= 2; 1384 fail = tone_lpc(x, N, delay, lpc); 1385 } 1386 /* Check that our filter has complex roots. */ 1387 if (!fail && MULT32_32_Q31(lpc[0],lpc[0]) + MULT32_32_Q31(QCONST32(3.999999, 29), lpc[1]) < 0) { 1388 /* Squared radius of the poles. */ 1389 *toneishness = -lpc[1]; 1390 #ifdef FIXED_POINT 1391 freq = (acos_approx(lpc[0]>>1)+delay/2)/delay; 1392 #else 1393 freq = acos(.5f*lpc[0])/delay; 1394 #endif 1395 } else { 1396 freq = -1; 1397 *toneishness=0; 1398 } 1399 /*printf("%f %f %f %f\n", freq, lpc[0], lpc[1], *toneishness);*/ 1400 RESTORE_STACK; 1401 return freq; 1402 } 1403 1404 static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N, 1405 int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int complexity, opus_val16 tf_estimate, 1406 int nbAvailableBytes, AnalysisInfo *analysis, opus_val16 tone_freq, opus_val32 toneishness ARG_QEXT(int qext_scale)) 1407 { 1408 int c; 1409 VARDECL(celt_sig, _pre); 1410 celt_sig *pre[2]; 1411 const CELTMode *mode; 1412 int pitch_index; 1413 opus_val16 gain1; 1414 opus_val16 pf_threshold; 1415 int pf_on; 1416 int qg; 1417 int overlap; 1418 int min_period, max_period; 1419 opus_val32 before[2]={0}, after[2]={0}; 1420 int cancel_pitch=0; 1421 SAVE_STACK; 1422 1423 max_period = QEXT_SCALE(COMBFILTER_MAXPERIOD); 1424 min_period = QEXT_SCALE(COMBFILTER_MINPERIOD); 1425 mode = st->mode; 1426 overlap = mode->overlap; 1427 ALLOC(_pre, CC*(N+max_period), celt_sig); 1428 1429 pre[0] = _pre; 1430 pre[1] = _pre + (N+max_period); 1431 1432 1433 c=0; do { 1434 OPUS_COPY(pre[c], prefilter_mem+c*max_period, max_period); 1435 OPUS_COPY(pre[c]+max_period, in+c*(N+overlap)+overlap, N); 1436 } while (++c<CC); 1437 1438 /* If we detect that the signal is dominated by a single tone, don't rely on the standard pitch 1439 estimator, as it can become unreliable. */ 1440 if (enabled && toneishness > QCONST32(.99f, 29)) { 1441 int multiple=1; 1442 /* Using aliased version of the postfilter above 24 kHz. 1443 First value is purposely slightly above pi to avoid triggering for Fs=48kHz. */ 1444 if (QEXT_SCALE(tone_freq) >= QCONST16(3.1416f, 13)) tone_freq = QCONST16(3.141593f, 13) - tone_freq; 1445 /* If the pitch is too high for our post-filter, apply pitch doubling until 1446 we can get something that fits (not ideal, but better than nothing). */ 1447 while (QEXT_SCALE(tone_freq) >= multiple*QCONST16(0.39f, 13)) multiple++; 1448 if (QEXT_SCALE(tone_freq) > QCONST16(0.006148f, 13)) { 1449 #ifdef FIXED_POINT 1450 pitch_index = IMIN((51472*multiple+QEXT_SCALE(tone_freq)/2)/QEXT_SCALE(tone_freq), COMBFILTER_MAXPERIOD-2); 1451 #else 1452 pitch_index = IMIN((int)floor(.5+2.f*M_PI*multiple/QEXT_SCALE(tone_freq)), COMBFILTER_MAXPERIOD-2); 1453 #endif 1454 } else { 1455 /* If the pitch is too low, using a very high pitch will actually give us an improvement 1456 due to the DC component of the filter that will be close to our tone. Again, not ideal, 1457 but if we only have a single tone, it's better than nothing. */ 1458 pitch_index = COMBFILTER_MINPERIOD; 1459 } 1460 gain1 = QCONST16(.75f, 15); 1461 } else if (enabled && complexity >= 5) { 1462 VARDECL(opus_val16, pitch_buf); 1463 ALLOC(pitch_buf, (max_period+N)>>1, opus_val16); 1464 1465 pitch_downsample(pre, pitch_buf, (max_period+N)>>1, CC, 2, st->arch); 1466 /* Don't search for the fir last 1.5 octave of the range because 1467 there's too many false-positives due to short-term correlation */ 1468 pitch_search(pitch_buf+(max_period>>1), pitch_buf, N, 1469 max_period-3*min_period, &pitch_index, 1470 st->arch); 1471 pitch_index = max_period-pitch_index; 1472 1473 gain1 = remove_doubling(pitch_buf, max_period, min_period, 1474 N, &pitch_index, st->prefilter_period, st->prefilter_gain, st->arch); 1475 if (pitch_index > max_period-QEXT_SCALE(2)) 1476 pitch_index = max_period-QEXT_SCALE(2); 1477 #ifdef ENABLE_QEXT 1478 pitch_index /= qext_scale; 1479 #endif 1480 gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); 1481 /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ 1482 if (st->loss_rate>2) 1483 gain1 = HALF32(gain1); 1484 if (st->loss_rate>4) 1485 gain1 = HALF32(gain1); 1486 if (st->loss_rate>8) 1487 gain1 = 0; 1488 } else { 1489 gain1 = 0; 1490 pitch_index = COMBFILTER_MINPERIOD; 1491 } 1492 #ifndef DISABLE_FLOAT_API 1493 if (analysis->valid) 1494 gain1 = (opus_val16)(gain1 * analysis->max_pitch_ratio); 1495 #else 1496 (void)analysis; 1497 #endif 1498 /* Gain threshold for enabling the prefilter/postfilter */ 1499 pf_threshold = QCONST16(.2f,15); 1500 1501 /* Adjusting the threshold based on rate and continuity */ 1502 if (abs(pitch_index-st->prefilter_period)*10>pitch_index) 1503 { 1504 pf_threshold += QCONST16(.2f,15); 1505 /* Completely disable the prefilter on strong transients without continuity. */ 1506 if (tf_estimate > QCONST16(.98f, 14)) 1507 gain1 = 0; 1508 } 1509 if (nbAvailableBytes<25) 1510 pf_threshold += QCONST16(.1f,15); 1511 if (nbAvailableBytes<35) 1512 pf_threshold += QCONST16(.1f,15); 1513 if (st->prefilter_gain > QCONST16(.4f,15)) 1514 pf_threshold -= QCONST16(.1f,15); 1515 if (st->prefilter_gain > QCONST16(.55f,15)) 1516 pf_threshold -= QCONST16(.1f,15); 1517 1518 /* Hard threshold at 0.2 */ 1519 pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); 1520 if (gain1<pf_threshold) 1521 { 1522 gain1 = 0; 1523 pf_on = 0; 1524 qg = 0; 1525 } else { 1526 /*This block is not gated by a total bits check only because 1527 of the nbAvailableBytes check above.*/ 1528 if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15)) 1529 gain1=st->prefilter_gain; 1530 1531 #ifdef FIXED_POINT 1532 qg = ((gain1+1536)>>10)/3-1; 1533 #else 1534 qg = (int)floor(.5f+gain1*32/3)-1; 1535 #endif 1536 qg = IMAX(0, IMIN(7, qg)); 1537 gain1 = QCONST16(0.09375f,15)*(qg+1); 1538 pf_on = 1; 1539 } 1540 /*printf("%d %f\n", pitch_index, gain1);*/ 1541 1542 c=0; do { 1543 int i; 1544 int offset = mode->shortMdctSize-overlap; 1545 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); 1546 OPUS_COPY(in+c*(N+overlap), st->in_mem+c*(overlap), overlap); 1547 for (i=0;i<N;i++) before[c] += ABS32(SHR32(in[c*(N+overlap)+overlap+i], 12)); 1548 if (offset) 1549 comb_filter(in+c*(N+overlap)+overlap, pre[c]+max_period, 1550 st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, 1551 st->prefilter_tapset, st->prefilter_tapset, NULL, 0, st->arch); 1552 1553 comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+max_period+offset, 1554 st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, 1555 st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); 1556 for (i=0;i<N;i++) after[c] += ABS32(SHR32(in[c*(N+overlap)+overlap+i], 12)); 1557 } while (++c<CC); 1558 1559 if (CC==2) { 1560 opus_val16 thresh[2]; 1561 thresh[0] = MULT16_32_Q15(MULT16_16_Q15(QCONST16(.25f, 15), gain1), before[0]) + MULT16_32_Q15(QCONST16(.01f,15), before[1]); 1562 thresh[1] = MULT16_32_Q15(MULT16_16_Q15(QCONST16(.25f, 15), gain1), before[1]) + MULT16_32_Q15(QCONST16(.01f,15), before[0]); 1563 /* Don't use the filter if one channel gets significantly worse. */ 1564 if (after[0]-before[0] > thresh[0] || after[1]-before[1] > thresh[1]) cancel_pitch = 1; 1565 /* Use the filter only if at least one channel gets significantly better. */ 1566 if (before[0]-after[0] < thresh[0] && before[1]-after[1] < thresh[1]) cancel_pitch = 1; 1567 } else { 1568 /* Check that the mono channel actually got better. */ 1569 if (after[0] > before[0]) cancel_pitch = 1; 1570 } 1571 /* If needed, revert to a gain of zero. */ 1572 if (cancel_pitch) { 1573 c=0; do { 1574 int offset = mode->shortMdctSize-overlap; 1575 OPUS_COPY(in+c*(N+overlap)+overlap, pre[c]+max_period, N); 1576 comb_filter(in+c*(N+overlap)+overlap+offset, pre[c]+max_period+offset, 1577 st->prefilter_period, pitch_index, overlap, -st->prefilter_gain, -0, 1578 st->prefilter_tapset, prefilter_tapset, mode->window, overlap, st->arch); 1579 } while (++c<CC); 1580 gain1 = 0; 1581 pf_on = 0; 1582 qg = 0; 1583 } 1584 1585 c=0; do { 1586 OPUS_COPY(st->in_mem+c*(overlap), in+c*(N+overlap)+N, overlap); 1587 1588 if (N>max_period) 1589 { 1590 OPUS_COPY(prefilter_mem+c*max_period, pre[c]+N, max_period); 1591 } else { 1592 OPUS_MOVE(prefilter_mem+c*max_period, prefilter_mem+c*max_period+N, max_period-N); 1593 OPUS_COPY(prefilter_mem+c*max_period+max_period-N, pre[c]+max_period, N); 1594 } 1595 } while (++c<CC); 1596 1597 RESTORE_STACK; 1598 *gain = gain1; 1599 *pitch = pitch_index; 1600 *qgain = qg; 1601 return pf_on; 1602 } 1603 1604 static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target, 1605 int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, 1606 int constrained_vbr, opus_val16 stereo_saving, int tot_boost, 1607 opus_val16 tf_estimate, int pitch_change, celt_glog maxDepth, 1608 int lfe, int has_surround_mask, celt_glog surround_masking, 1609 celt_glog temporal_vbr ARG_QEXT(int enable_qext)) 1610 { 1611 /* The target rate in 8th bits per frame */ 1612 opus_int32 target; 1613 int coded_bins; 1614 int coded_bands; 1615 opus_val16 tf_calibration; 1616 int nbEBands; 1617 const opus_int16 *eBands; 1618 1619 nbEBands = mode->nbEBands; 1620 eBands = mode->eBands; 1621 1622 coded_bands = lastCodedBands ? lastCodedBands : nbEBands; 1623 coded_bins = eBands[coded_bands]<<LM; 1624 if (C==2) 1625 coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM; 1626 1627 target = base_target; 1628 1629 /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ 1630 #ifndef DISABLE_FLOAT_API 1631 if (analysis->valid && analysis->activity<.4) 1632 target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); 1633 #endif 1634 /* Stereo savings */ 1635 if (C==2) 1636 { 1637 int coded_stereo_bands; 1638 int coded_stereo_dof; 1639 opus_val16 max_frac; 1640 coded_stereo_bands = IMIN(intensity, coded_bands); 1641 coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; 1642 /* Maximum fraction of the bits we can save if the signal is mono. */ 1643 max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); 1644 stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8)); 1645 /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ 1646 target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), 1647 SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); 1648 } 1649 /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ 1650 target += tot_boost-(19<<LM); 1651 /* Apply transient boost, compensating for average boost. */ 1652 tf_calibration = QCONST16(0.044f,14); 1653 target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); 1654 1655 #ifndef DISABLE_FLOAT_API 1656 /* Apply tonality boost */ 1657 if (analysis->valid && !lfe) 1658 { 1659 opus_int32 tonal_target; 1660 float tonal; 1661 1662 /* Tonality boost (compensating for the average). */ 1663 tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f; 1664 tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); 1665 if (pitch_change) 1666 tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); 1667 /*printf("%f %f ", analysis->tonality, tonal);*/ 1668 target = tonal_target; 1669 } 1670 #else 1671 (void)analysis; 1672 (void)pitch_change; 1673 #endif 1674 1675 if (has_surround_mask&&!lfe) 1676 { 1677 opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(SHR32(surround_masking,DB_SHIFT-10),coded_bins<<BITRES), 10); 1678 /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ 1679 target = IMAX(target/4, surround_target); 1680 } 1681 1682 { 1683 opus_int32 floor_depth; 1684 int bins; 1685 bins = eBands[nbEBands-2]<<LM; 1686 #ifdef ENABLE_QEXT 1687 if (enable_qext) bins = mode->shortMdctSize<<LM; 1688 #endif 1689 /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/ 1690 floor_depth = (opus_int32)SHR32(MULT16_32_Q15((C*bins<<BITRES),maxDepth), DB_SHIFT-15); 1691 floor_depth = IMAX(floor_depth, target>>2); 1692 target = IMIN(target, floor_depth); 1693 /*printf("%f %d\n", maxDepth, floor_depth);*/ 1694 } 1695 1696 /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate 1697 for long. Needs tuning. */ 1698 if ((!has_surround_mask||lfe) && constrained_vbr) 1699 { 1700 target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target); 1701 } 1702 1703 if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) 1704 { 1705 opus_val16 amount; 1706 opus_val16 tvbr_factor; 1707 amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate))); 1708 tvbr_factor = SHR32(MULT16_16(SHR32(temporal_vbr, DB_SHIFT-10), amount), 10); 1709 target += (opus_int32)MULT16_32_Q15(tvbr_factor, target); 1710 } 1711 1712 /* Don't allow more than doubling the rate */ 1713 target = IMIN(2*base_target, target); 1714 1715 return target; 1716 } 1717 1718 #ifdef ENABLE_QEXT 1719 static void encode_qext_stereo_params(ec_enc *ec, int qext_end, int qext_intensity, int qext_dual_stereo) { 1720 ec_enc_uint(ec, qext_intensity, qext_end+1); 1721 if (qext_intensity != 0) ec_enc_bit_logp(ec, qext_dual_stereo, 1); 1722 } 1723 #endif 1724 1725 int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_res * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) 1726 { 1727 int i, c, N; 1728 opus_int32 bits; 1729 ec_enc _enc; 1730 VARDECL(celt_sig, in); 1731 VARDECL(celt_sig, freq); 1732 VARDECL(celt_norm, X); 1733 VARDECL(celt_ener, bandE); 1734 VARDECL(celt_glog, bandLogE); 1735 VARDECL(celt_glog, bandLogE2); 1736 VARDECL(int, fine_quant); 1737 VARDECL(celt_glog, error); 1738 VARDECL(int, pulses); 1739 VARDECL(int, cap); 1740 VARDECL(int, offsets); 1741 VARDECL(int, importance); 1742 VARDECL(int, spread_weight); 1743 VARDECL(int, fine_priority); 1744 VARDECL(int, tf_res); 1745 VARDECL(unsigned char, collapse_masks); 1746 celt_sig *prefilter_mem; 1747 celt_glog *oldBandE, *oldLogE, *oldLogE2, *energyError; 1748 int shortBlocks=0; 1749 int isTransient=0; 1750 const int CC = st->channels; 1751 const int C = st->stream_channels; 1752 int LM, M; 1753 int tf_select; 1754 int nbFilledBytes, nbAvailableBytes; 1755 opus_int32 min_allowed; 1756 int start; 1757 int end; 1758 int effEnd; 1759 int codedBands; 1760 int alloc_trim; 1761 int pitch_index=COMBFILTER_MINPERIOD; 1762 opus_val16 gain1 = 0; 1763 int dual_stereo=0; 1764 int effectiveBytes; 1765 int dynalloc_logp; 1766 opus_int32 vbr_rate; 1767 opus_int32 total_bits; 1768 opus_int32 total_boost; 1769 opus_int32 balance; 1770 opus_int32 tell; 1771 opus_int32 tell0_frac; 1772 int prefilter_tapset=0; 1773 int pf_on; 1774 int anti_collapse_rsv; 1775 int anti_collapse_on=0; 1776 int silence=0; 1777 int tf_chan = 0; 1778 opus_val16 tf_estimate; 1779 int pitch_change=0; 1780 opus_int32 tot_boost; 1781 opus_val32 sample_max; 1782 celt_glog maxDepth; 1783 const OpusCustomMode *mode; 1784 int nbEBands; 1785 int overlap; 1786 const opus_int16 *eBands; 1787 int secondMdct; 1788 int signalBandwidth; 1789 int transient_got_disabled=0; 1790 celt_glog surround_masking=0; 1791 celt_glog temporal_vbr=0; 1792 celt_glog surround_trim = 0; 1793 opus_int32 equiv_rate; 1794 int hybrid; 1795 int weak_transient = 0; 1796 int enable_tf_analysis; 1797 opus_val16 tone_freq=-1; 1798 opus_val32 toneishness=0; 1799 VARDECL(celt_glog, surround_dynalloc); 1800 int qext_bytes=0; 1801 int packet_size_cap = 1275; 1802 #ifdef ENABLE_QEXT 1803 int qext_scale; 1804 int qext_end=0; 1805 int qext_intensity=0; 1806 int qext_dual_stereo=0; 1807 int padding_len_bytes=0; 1808 unsigned char *ext_payload; 1809 opus_int32 qext_bits; 1810 ec_enc ext_enc; 1811 VARDECL(int, extra_quant); 1812 VARDECL(int, extra_pulses); 1813 VARDECL(celt_glog, error_bak); 1814 const CELTMode *qext_mode = NULL; 1815 CELTMode qext_mode_struct; 1816 celt_ener qext_bandE[2*NB_QEXT_BANDS]; 1817 celt_glog qext_bandLogE[2*NB_QEXT_BANDS]; 1818 celt_glog *qext_oldBandE=NULL; 1819 celt_glog qext_error[2*NB_QEXT_BANDS]; 1820 #endif 1821 ALLOC_STACK; 1822 1823 mode = st->mode; 1824 nbEBands = mode->nbEBands; 1825 overlap = mode->overlap; 1826 eBands = mode->eBands; 1827 start = st->start; 1828 end = st->end; 1829 hybrid = start != 0; 1830 tf_estimate = 0; 1831 if (nbCompressedBytes<2 || pcm==NULL) 1832 { 1833 RESTORE_STACK; 1834 return OPUS_BAD_ARG; 1835 } 1836 1837 frame_size *= st->upsample; 1838 for (LM=0;LM<=mode->maxLM;LM++) 1839 if (mode->shortMdctSize<<LM==frame_size) 1840 break; 1841 if (LM>mode->maxLM) 1842 { 1843 RESTORE_STACK; 1844 return OPUS_BAD_ARG; 1845 } 1846 M=1<<LM; 1847 N = M*mode->shortMdctSize; 1848 1849 #ifdef ENABLE_QEXT 1850 qext_scale = st->qext_scale; 1851 if (st->enable_qext) packet_size_cap = QEXT_PACKET_SIZE_CAP; 1852 #endif 1853 1854 prefilter_mem = st->in_mem+CC*(overlap); 1855 oldBandE = (celt_glog*)(st->in_mem+CC*(overlap+QEXT_SCALE(COMBFILTER_MAXPERIOD))); 1856 oldLogE = oldBandE + CC*nbEBands; 1857 oldLogE2 = oldLogE + CC*nbEBands; 1858 energyError = oldLogE2 + CC*nbEBands; 1859 1860 if (enc==NULL) 1861 { 1862 tell0_frac=tell=1; 1863 nbFilledBytes=0; 1864 } else { 1865 tell0_frac=ec_tell_frac(enc); 1866 tell=ec_tell(enc); 1867 nbFilledBytes=(tell+4)>>3; 1868 } 1869 1870 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 1871 if (st->signalling && enc==NULL) 1872 { 1873 int tmp = (mode->effEBands-end)>>1; 1874 end = st->end = IMAX(1, mode->effEBands-tmp); 1875 compressed[0] = tmp<<5; 1876 compressed[0] |= LM<<3; 1877 compressed[0] |= (C==2)<<2; 1878 /* Convert "standard mode" to Opus header */ 1879 # ifndef ENABLE_QEXT 1880 if (mode->Fs==48000 && mode->shortMdctSize==120) 1881 # endif 1882 { 1883 int c0 = toOpus(compressed[0]); 1884 if (c0<0) 1885 { 1886 RESTORE_STACK; 1887 return OPUS_BAD_ARG; 1888 } 1889 compressed[0] = c0; 1890 } 1891 compressed++; 1892 nbCompressedBytes--; 1893 } 1894 #else 1895 celt_assert(st->signalling==0); 1896 #endif 1897 1898 /* Can't produce more than 1275 output bytes for the main payload, plus any QEXT extra data. */ 1899 nbCompressedBytes = IMIN(nbCompressedBytes,packet_size_cap); 1900 1901 if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) 1902 { 1903 vbr_rate = bitrate_to_bits(st->bitrate, mode->Fs, frame_size)<<BITRES; 1904 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 1905 if (st->signalling) 1906 vbr_rate -= 8<<BITRES; 1907 #endif 1908 effectiveBytes = vbr_rate>>(3+BITRES); 1909 } else { 1910 opus_int32 tmp; 1911 vbr_rate = 0; 1912 tmp = st->bitrate*frame_size; 1913 if (tell>1) 1914 tmp += tell*mode->Fs; 1915 if (st->bitrate!=OPUS_BITRATE_MAX) 1916 { 1917 nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, 1918 (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); 1919 if (enc != NULL) 1920 ec_enc_shrink(enc, nbCompressedBytes); 1921 } 1922 effectiveBytes = nbCompressedBytes - nbFilledBytes; 1923 } 1924 nbAvailableBytes = nbCompressedBytes - nbFilledBytes; 1925 equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50); 1926 if (st->bitrate != OPUS_BITRATE_MAX) 1927 equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50)); 1928 1929 if (enc==NULL) 1930 { 1931 ec_enc_init(&_enc, compressed, nbCompressedBytes); 1932 enc = &_enc; 1933 } 1934 1935 if (vbr_rate>0) 1936 { 1937 /* Computes the max bit-rate allowed in VBR mode to avoid violating the 1938 target rate and buffering. 1939 We must do this up front so that bust-prevention logic triggers 1940 correctly if we don't have enough bits. */ 1941 if (st->constrained_vbr) 1942 { 1943 opus_int32 vbr_bound; 1944 opus_int32 max_allowed; 1945 /* We could use any multiple of vbr_rate as bound (depending on the 1946 delay). 1947 This is clamped to ensure we use at least two bytes if the encoder 1948 was entirely empty, but to allow 0 in hybrid mode. */ 1949 vbr_bound = vbr_rate; 1950 max_allowed = IMIN(IMAX(tell==1?2:0, 1951 (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), 1952 nbAvailableBytes); 1953 if(max_allowed < nbAvailableBytes) 1954 { 1955 nbCompressedBytes = nbFilledBytes+max_allowed; 1956 nbAvailableBytes = max_allowed; 1957 ec_enc_shrink(enc, nbCompressedBytes); 1958 } 1959 } 1960 } 1961 total_bits = nbCompressedBytes*8; 1962 1963 effEnd = end; 1964 if (effEnd > mode->effEBands) 1965 effEnd = mode->effEBands; 1966 1967 ALLOC(in, CC*(N+overlap), celt_sig); 1968 1969 sample_max=MAX32(st->overlap_max, celt_maxabs_res(pcm, C*(N-overlap)/st->upsample)); 1970 st->overlap_max=celt_maxabs_res(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); 1971 sample_max=MAX32(sample_max, st->overlap_max); 1972 #ifdef FIXED_POINT 1973 silence = (sample_max==0); 1974 #else 1975 silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth)); 1976 #endif 1977 #ifdef FUZZING 1978 if ((rand()&0x3F)==0) 1979 silence = 1; 1980 #endif 1981 if (tell==1) 1982 ec_enc_bit_logp(enc, silence, 15); 1983 else 1984 silence=0; 1985 if (silence) 1986 { 1987 /*In VBR mode there is no need to send more than the minimum. */ 1988 if (vbr_rate>0) 1989 { 1990 effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); 1991 total_bits=nbCompressedBytes*8; 1992 nbAvailableBytes=2; 1993 ec_enc_shrink(enc, nbCompressedBytes); 1994 } 1995 #ifdef ENABLE_QEXT 1996 else if (st->enable_qext) { 1997 nbCompressedBytes = IMIN(nbCompressedBytes, 1275); 1998 nbAvailableBytes = nbCompressedBytes - nbFilledBytes; 1999 total_bits = nbCompressedBytes*8; 2000 ec_enc_shrink(enc, nbCompressedBytes); 2001 } 2002 #endif 2003 /* Pretend we've filled all the remaining bits with zeros 2004 (that's what the initialiser did anyway) */ 2005 tell = nbCompressedBytes*8; 2006 enc->nbits_total+=tell-ec_tell(enc); 2007 } 2008 c=0; do { 2009 int need_clip=0; 2010 #ifdef FIXED_POINT 2011 need_clip = st->clip && sample_max>65536<<RES_SHIFT; 2012 #else 2013 need_clip = st->clip && sample_max>65536.f; 2014 #endif 2015 celt_preemphasis(pcm+c, in+c*(N+overlap)+overlap, N, CC, st->upsample, 2016 mode->preemph, st->preemph_memE+c, need_clip); 2017 OPUS_COPY(in+c*(N+overlap), &prefilter_mem[(1+c)*QEXT_SCALE(COMBFILTER_MAXPERIOD)-overlap], overlap); 2018 } while (++c<CC); 2019 2020 2021 tone_freq = tone_detect(in, CC, N+overlap, &toneishness, mode->Fs); 2022 isTransient = 0; 2023 shortBlocks = 0; 2024 if (st->complexity >= 1 && !st->lfe) 2025 { 2026 /* Reduces the likelihood of energy instability on fricatives at low bitrate 2027 in hybrid mode. It seems like we still want to have real transients on vowels 2028 though (small SILK quantization offset value). */ 2029 int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.signalType != 2; 2030 isTransient = transient_analysis(in, N+overlap, CC, 2031 &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient, tone_freq, toneishness); 2032 } 2033 toneishness = MIN32(toneishness, QCONST32(1.f, 29)-SHL32(tf_estimate, 15)); 2034 /* Find pitch period and gain */ 2035 { 2036 int enabled; 2037 int qg; 2038 enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && tell+16<=total_bits && !st->disable_pf; 2039 2040 prefilter_tapset = st->tapset_decision; 2041 pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, st->complexity, tf_estimate, nbAvailableBytes, &st->analysis, tone_freq, toneishness ARG_QEXT(qext_scale)); 2042 if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) 2043 && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) 2044 pitch_change = 1; 2045 if (pf_on==0) 2046 { 2047 if(!hybrid && tell+16<=total_bits) 2048 ec_enc_bit_logp(enc, 0, 1); 2049 } else { 2050 /*This block is not gated by a total bits check only because 2051 of the nbAvailableBytes check above.*/ 2052 int octave; 2053 ec_enc_bit_logp(enc, 1, 1); 2054 pitch_index += 1; 2055 octave = EC_ILOG(pitch_index)-5; 2056 ec_enc_uint(enc, octave, 6); 2057 ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave); 2058 pitch_index -= 1; 2059 ec_enc_bits(enc, qg, 3); 2060 ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2); 2061 } 2062 } 2063 if (LM>0 && ec_tell(enc)+3<=total_bits) 2064 { 2065 if (isTransient) 2066 shortBlocks = M; 2067 } else { 2068 isTransient = 0; 2069 transient_got_disabled=1; 2070 } 2071 2072 ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ 2073 ALLOC(bandE,nbEBands*CC, celt_ener); 2074 ALLOC(bandLogE,nbEBands*CC, celt_glog); 2075 2076 secondMdct = shortBlocks && st->complexity>=8; 2077 ALLOC(bandLogE2, C*nbEBands, celt_glog); 2078 if (secondMdct) 2079 { 2080 compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); 2081 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 2082 amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); 2083 for (c=0;c<C;c++) 2084 { 2085 for (i=0;i<end;i++) 2086 bandLogE2[nbEBands*c+i] += HALF32(SHL32(LM, DB_SHIFT)); 2087 } 2088 } 2089 2090 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); 2091 /* This should catch any NaN in the CELT input. Since we're not supposed to see any (they're filtered 2092 at the Opus layer), just abort. */ 2093 celt_assert(!celt_isnan(freq[0]) && (C==1 || !celt_isnan(freq[N]))); 2094 if (CC==2&&C==1) 2095 tf_chan = 0; 2096 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 2097 2098 if (st->lfe) 2099 { 2100 for (i=2;i<end;i++) 2101 { 2102 bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); 2103 bandE[i] = MAX32(bandE[i], EPSILON); 2104 } 2105 } 2106 amp2Log2(mode, effEnd, end, bandE, bandLogE, C); 2107 2108 ALLOC(surround_dynalloc, C*nbEBands, celt_glog); 2109 OPUS_CLEAR(surround_dynalloc, end); 2110 /* This computes how much masking takes place between surround channels */ 2111 if (!hybrid&&st->energy_mask&&!st->lfe) 2112 { 2113 int mask_end; 2114 int midband; 2115 int count_dynalloc; 2116 opus_val32 mask_avg=0; 2117 opus_val32 diff=0; 2118 int count=0; 2119 mask_end = IMAX(2,st->lastCodedBands); 2120 for (c=0;c<C;c++) 2121 { 2122 for(i=0;i<mask_end;i++) 2123 { 2124 celt_glog mask; 2125 opus_val16 mask16; 2126 mask = MAXG(MING(st->energy_mask[nbEBands*c+i], 2127 GCONST(.25f)), -GCONST(2.0f)); 2128 if (mask > 0) 2129 mask = HALF32(mask); 2130 mask16 = SHR32(mask, DB_SHIFT-10); 2131 mask_avg += MULT16_16(mask16, eBands[i+1]-eBands[i]); 2132 count += eBands[i+1]-eBands[i]; 2133 diff += MULT16_16(mask16, 1+2*i-mask_end); 2134 } 2135 } 2136 celt_assert(count>0); 2137 mask_avg = SHL32(DIV32_16(mask_avg,count), DB_SHIFT-10); 2138 mask_avg += GCONST(.2f); 2139 diff = SHL32(diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end), DB_SHIFT-10); 2140 /* Again, being conservative */ 2141 diff = HALF32(diff); 2142 diff = MAX32(MIN32(diff, GCONST(.031f)), -GCONST(.031f)); 2143 /* Find the band that's in the middle of the coded spectrum */ 2144 for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); 2145 count_dynalloc=0; 2146 for(i=0;i<mask_end;i++) 2147 { 2148 opus_val32 lin; 2149 celt_glog unmask; 2150 lin = mask_avg + diff*(i-midband); 2151 if (C==2) 2152 unmask = MAXG(st->energy_mask[i], st->energy_mask[nbEBands+i]); 2153 else 2154 unmask = st->energy_mask[i]; 2155 unmask = MING(unmask, GCONST(.0f)); 2156 unmask -= lin; 2157 if (unmask > GCONST(.25f)) 2158 { 2159 surround_dynalloc[i] = unmask - GCONST(.25f); 2160 count_dynalloc++; 2161 } 2162 } 2163 if (count_dynalloc>=3) 2164 { 2165 /* If we need dynalloc in many bands, it's probably because our 2166 initial masking rate was too low. */ 2167 mask_avg += GCONST(.25f); 2168 if (mask_avg>0) 2169 { 2170 /* Something went really wrong in the original calculations, 2171 disabling masking. */ 2172 mask_avg = 0; 2173 diff = 0; 2174 OPUS_CLEAR(surround_dynalloc, mask_end); 2175 } else { 2176 for(i=0;i<mask_end;i++) 2177 surround_dynalloc[i] = MAXG(0, surround_dynalloc[i]-GCONST(.25f)); 2178 } 2179 } 2180 mask_avg += GCONST(.2f); 2181 /* Convert to 1/64th units used for the trim */ 2182 surround_trim = 64*diff; 2183 /*printf("%d %d ", mask_avg, surround_trim);*/ 2184 surround_masking = mask_avg; 2185 } 2186 /* Temporal VBR (but not for LFE) */ 2187 if (!st->lfe) 2188 { 2189 celt_glog follow=-QCONST32(10.0f, DB_SHIFT-5); 2190 opus_val32 frame_avg=0; 2191 celt_glog offset = shortBlocks?HALF32(SHL32(LM, DB_SHIFT-5)):0; 2192 for(i=start;i<end;i++) 2193 { 2194 follow = MAXG(follow-QCONST32(1.0f, DB_SHIFT-5), SHR32(bandLogE[i],5)-offset); 2195 if (C==2) 2196 follow = MAXG(follow, SHR32(bandLogE[i+nbEBands],5)-offset); 2197 frame_avg += follow; 2198 } 2199 frame_avg /= (end-start); 2200 temporal_vbr = SUB32(SHL32(frame_avg, 5),st->spec_avg); 2201 temporal_vbr = MING(GCONST(3.f), MAXG(-GCONST(1.5f), temporal_vbr)); 2202 st->spec_avg += MULT16_32_Q15(QCONST16(.02f, 15), temporal_vbr); 2203 } 2204 /*for (i=0;i<21;i++) 2205 printf("%f ", bandLogE[i]); 2206 printf("\n");*/ 2207 2208 if (!secondMdct) 2209 { 2210 OPUS_COPY(bandLogE2, bandLogE, C*nbEBands); 2211 } 2212 2213 /* Last chance to catch any transient we might have missed in the 2214 time-domain analysis */ 2215 if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid) 2216 { 2217 if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) 2218 { 2219 isTransient = 1; 2220 shortBlocks = M; 2221 compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); 2222 compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); 2223 amp2Log2(mode, effEnd, end, bandE, bandLogE, C); 2224 /* Compensate for the scaling of short vs long mdcts */ 2225 for (c=0;c<C;c++) 2226 { 2227 for (i=0;i<end;i++) 2228 bandLogE2[nbEBands*c+i] += HALF32(SHL32(LM, DB_SHIFT)); 2229 } 2230 tf_estimate = QCONST16(.2f,14); 2231 } 2232 } 2233 2234 if (LM>0 && ec_tell(enc)+3<=total_bits) 2235 ec_enc_bit_logp(enc, isTransient, 3); 2236 2237 ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ 2238 2239 /* Band normalisation */ 2240 normalise_bands(mode, freq, X, bandE, effEnd, C, M); 2241 2242 enable_tf_analysis = effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe && toneishness < QCONST32(.98f, 29); 2243 2244 ALLOC(offsets, nbEBands, int); 2245 ALLOC(importance, nbEBands, int); 2246 ALLOC(spread_weight, nbEBands, int); 2247 2248 maxDepth = dynalloc_analysis(bandLogE, bandLogE2, oldBandE, nbEBands, start, end, C, offsets, 2249 st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, 2250 eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight, tone_freq, toneishness ARG_QEXT(qext_scale)); 2251 2252 ALLOC(tf_res, nbEBands, int); 2253 /* Disable variable tf resolution for hybrid and at very low bitrate */ 2254 if (enable_tf_analysis) 2255 { 2256 int lambda; 2257 lambda = IMAX(80, 20480/effectiveBytes + 2); 2258 tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan, importance); 2259 for (i=effEnd;i<end;i++) 2260 tf_res[i] = tf_res[effEnd-1]; 2261 } else if (hybrid && weak_transient) 2262 { 2263 /* For weak transients, we rely on the fact that improving time resolution using 2264 TF on a long window is imperfect and will not result in an energy collapse at 2265 low bitrate. */ 2266 for (i=0;i<end;i++) 2267 tf_res[i] = 1; 2268 tf_select=0; 2269 } else if (hybrid && effectiveBytes<15 && st->silk_info.signalType != 2) 2270 { 2271 /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */ 2272 for (i=0;i<end;i++) 2273 tf_res[i] = 0; 2274 tf_select=isTransient; 2275 } else { 2276 for (i=0;i<end;i++) 2277 tf_res[i] = isTransient; 2278 tf_select=0; 2279 } 2280 2281 ALLOC(error, C*nbEBands, celt_glog); 2282 c=0; 2283 do { 2284 for (i=start;i<end;i++) 2285 { 2286 /* When the energy is stable, slightly bias energy quantization towards 2287 the previous error to make the gain more stable (a constant offset is 2288 better than fluctuations). */ 2289 if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < GCONST(2.f)) 2290 { 2291 bandLogE[i+c*nbEBands] -= MULT16_32_Q15(QCONST16(0.25f, 15), energyError[i+c*nbEBands]); 2292 } 2293 } 2294 } while (++c < C); 2295 quant_coarse_energy(mode, start, end, effEnd, bandLogE, 2296 oldBandE, total_bits, error, enc, 2297 C, LM, nbAvailableBytes, st->force_intra, 2298 &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); 2299 2300 tf_encode(start, end, isTransient, tf_res, LM, tf_select, enc); 2301 2302 if (ec_tell(enc)+4<=total_bits) 2303 { 2304 if (st->lfe) 2305 { 2306 st->tapset_decision = 0; 2307 st->spread_decision = SPREAD_NORMAL; 2308 } else if (hybrid) 2309 { 2310 if (st->complexity == 0) 2311 st->spread_decision = SPREAD_NONE; 2312 else if (isTransient) 2313 st->spread_decision = SPREAD_NORMAL; 2314 else 2315 st->spread_decision = SPREAD_AGGRESSIVE; 2316 } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) 2317 { 2318 if (st->complexity == 0) 2319 st->spread_decision = SPREAD_NONE; 2320 else 2321 st->spread_decision = SPREAD_NORMAL; 2322 } else { 2323 /* Disable new spreading+tapset estimator until we can show it works 2324 better than the old one. So far it seems like spreading_decision() 2325 works best. */ 2326 #if 0 2327 if (st->analysis.valid) 2328 { 2329 static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; 2330 static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; 2331 static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; 2332 static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; 2333 st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); 2334 st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); 2335 } else 2336 #endif 2337 { 2338 st->spread_decision = spreading_decision(mode, X, 2339 &st->tonal_average, st->spread_decision, &st->hf_average, 2340 &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M, spread_weight); 2341 } 2342 /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ 2343 /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ 2344 } 2345 ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); 2346 } else { 2347 st->spread_decision = SPREAD_NORMAL; 2348 } 2349 2350 /* For LFE, everything interesting is in the first band */ 2351 if (st->lfe) 2352 offsets[0] = IMIN(8, effectiveBytes/3); 2353 ALLOC(cap, nbEBands, int); 2354 init_caps(mode,cap,LM,C); 2355 2356 dynalloc_logp = 6; 2357 total_bits<<=BITRES; 2358 total_boost = 0; 2359 tell = ec_tell_frac(enc); 2360 for (i=start;i<end;i++) 2361 { 2362 int width, quanta; 2363 int dynalloc_loop_logp; 2364 int boost; 2365 int j; 2366 width = C*(eBands[i+1]-eBands[i])<<LM; 2367 /* quanta is 6 bits, but no more than 1 bit/sample 2368 and no less than 1/8 bit/sample */ 2369 quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); 2370 dynalloc_loop_logp = dynalloc_logp; 2371 boost = 0; 2372 for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost 2373 && boost < cap[i]; j++) 2374 { 2375 int flag; 2376 flag = j<offsets[i]; 2377 ec_enc_bit_logp(enc, flag, dynalloc_loop_logp); 2378 tell = ec_tell_frac(enc); 2379 if (!flag) 2380 break; 2381 boost += quanta; 2382 total_boost += quanta; 2383 dynalloc_loop_logp = 1; 2384 } 2385 /* Making dynalloc more likely */ 2386 if (j) 2387 dynalloc_logp = IMAX(2, dynalloc_logp-1); 2388 offsets[i] = boost; 2389 } 2390 2391 if (C==2) 2392 { 2393 static const opus_val16 intensity_thresholds[21]= 2394 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ 2395 { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134}; 2396 static const opus_val16 intensity_histeresis[21]= 2397 { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8}; 2398 2399 /* Always use MS for 2.5 ms frames until we can do a better analysis */ 2400 if (LM!=0) 2401 dual_stereo = stereo_analysis(mode, X, LM, N); 2402 2403 st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000), 2404 intensity_thresholds, intensity_histeresis, 21, st->intensity); 2405 st->intensity = IMIN(end,IMAX(start, st->intensity)); 2406 } 2407 2408 alloc_trim = 5; 2409 if (tell+(6<<BITRES) <= total_bits - total_boost) 2410 { 2411 if (start > 0 || st->lfe) 2412 { 2413 st->stereo_saving = 0; 2414 alloc_trim = 5; 2415 } else { 2416 alloc_trim = alloc_trim_analysis(mode, X, bandLogE, 2417 end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, 2418 st->intensity, surround_trim, equiv_rate, st->arch); 2419 } 2420 ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); 2421 tell = ec_tell_frac(enc); 2422 } 2423 2424 /* In VBR mode the frame size must not be reduced so much that it would 2425 result in the encoder running out of bits. 2426 The margin of 2 bytes ensures that none of the bust-prevention logic 2427 in the decoder will have triggered so far. */ 2428 min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2; 2429 /* Take into account the 37 bits we need to have left in the packet to 2430 signal a redundant frame in hybrid mode. Creating a shorter packet would 2431 create an entropy coder desync. */ 2432 if (hybrid) 2433 min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)); 2434 /* Variable bitrate */ 2435 if (vbr_rate>0) 2436 { 2437 opus_val16 alpha; 2438 opus_int32 delta; 2439 /* The target rate in 8th bits per frame */ 2440 opus_int32 target, base_target; 2441 int lm_diff = mode->maxLM - LM; 2442 2443 /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. 2444 The CELT allocator will just not be able to use more than that anyway. */ 2445 nbCompressedBytes = IMIN(nbCompressedBytes,packet_size_cap>>(3-LM)); 2446 if (!hybrid) 2447 { 2448 base_target = vbr_rate - ((40*C+20)<<BITRES); 2449 } else { 2450 base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES)); 2451 } 2452 2453 if (st->constrained_vbr) 2454 base_target += (st->vbr_offset>>lm_diff); 2455 2456 if (!hybrid) 2457 { 2458 target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, 2459 st->lastCodedBands, C, st->intensity, st->constrained_vbr, 2460 st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, 2461 st->lfe, st->energy_mask!=NULL, surround_masking, 2462 temporal_vbr ARG_QEXT(st->enable_qext)); 2463 } else { 2464 target = base_target; 2465 /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */ 2466 if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM); 2467 if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM); 2468 /* Boosting bitrate on transients and vowels with significant temporal 2469 spikes. */ 2470 target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES)); 2471 /* If we have a strong transient, let's make sure it has enough bits to code 2472 the first two bands, so that it can use folding rather than noise. */ 2473 if (tf_estimate > QCONST16(.7f,14)) 2474 target = IMAX(target, 50<<BITRES); 2475 } 2476 /* The current offset is removed from the target and the space used 2477 so far is added*/ 2478 target=target+tell; 2479 2480 nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); 2481 nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); 2482 nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes); 2483 2484 /* By how much did we "miss" the target on that frame */ 2485 delta = target - vbr_rate; 2486 2487 target=nbAvailableBytes<<(BITRES+3); 2488 2489 /*If the frame is silent we don't adjust our drift, otherwise 2490 the encoder will shoot to very high rates after hitting a 2491 span of silence, but we do allow the bitres to refill. 2492 This means that we'll undershoot our target in CVBR/VBR modes 2493 on files with lots of silence. */ 2494 if(silence) 2495 { 2496 nbAvailableBytes = 2; 2497 target = 2*8<<BITRES; 2498 delta = 0; 2499 } 2500 2501 if (st->vbr_count < 970) 2502 { 2503 st->vbr_count++; 2504 alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); 2505 } else 2506 alpha = QCONST16(.001f,15); 2507 /* How many bits have we used in excess of what we're allowed */ 2508 if (st->constrained_vbr) 2509 st->vbr_reservoir += target - vbr_rate; 2510 /*printf ("%d\n", st->vbr_reservoir);*/ 2511 2512 /* Compute the offset we need to apply in order to reach the target */ 2513 if (st->constrained_vbr) 2514 { 2515 st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); 2516 st->vbr_offset = -st->vbr_drift; 2517 } 2518 /*printf ("%d\n", st->vbr_drift);*/ 2519 2520 if (st->constrained_vbr && st->vbr_reservoir < 0) 2521 { 2522 /* We're under the min value -- increase rate */ 2523 int adjust = (-st->vbr_reservoir)/(8<<BITRES); 2524 /* Unless we're just coding silence */ 2525 nbAvailableBytes += silence?0:adjust; 2526 st->vbr_reservoir = 0; 2527 /*printf ("+%d\n", adjust);*/ 2528 } 2529 nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes); 2530 /*printf("%d\n", nbCompressedBytes*50*8);*/ 2531 /* This moves the raw bits to take into account the new compressed size */ 2532 ec_enc_shrink(enc, nbCompressedBytes); 2533 } 2534 #ifdef ENABLE_QEXT 2535 if (st->enable_qext) { 2536 int new_compressedBytes; 2537 /* Don't give any bits for the first 80 kb/s per channel. Then 80% of the excess. */ 2538 opus_int32 offset = bitrate_to_bits(C*80000, mode->Fs, frame_size)/8; 2539 qext_bytes = IMAX(nbCompressedBytes-1275, IMAX(0, (nbCompressedBytes-offset)*4/5)); 2540 if (qext_bytes > 20) { 2541 opus_int32 target; 2542 opus_val16 scale; 2543 target = ((nbCompressedBytes-qext_bytes/3)*8<<BITRES); 2544 if (!vbr_rate) { 2545 opus_val16 tf_estimate2; 2546 target -= ((40*C+20)<<BITRES); 2547 tf_estimate2 = MIN32(QCONST16(1.f, 14), 2*EXTEND32(tf_estimate)); 2548 target = compute_vbr(mode, &st->analysis, target, LM, equiv_rate, 2549 st->lastCodedBands, C, st->intensity, st->constrained_vbr, 2550 st->stereo_saving, tot_boost, tf_estimate2, pitch_change, maxDepth, 2551 st->lfe, st->energy_mask!=NULL, surround_masking, 2552 temporal_vbr ARG_QEXT(st->enable_qext)); 2553 target += tell; 2554 } 2555 scale = PSHR32(toneishness,14); 2556 scale = Q15ONE - MULT16_16_Q15(scale, scale); 2557 qext_bytes += MULT16_32_Q15(scale, (nbCompressedBytes-(target/(8<<BITRES))) - qext_bytes); 2558 qext_bytes = IMAX(nbCompressedBytes-1275, IMAX(21, qext_bytes)); 2559 } 2560 padding_len_bytes = (qext_bytes+253)/254; 2561 qext_bytes = IMIN(qext_bytes, nbCompressedBytes-min_allowed-padding_len_bytes-1); 2562 padding_len_bytes = (qext_bytes+253)/254; 2563 if (qext_bytes > 20) { 2564 new_compressedBytes = nbCompressedBytes-qext_bytes-padding_len_bytes-1; 2565 ec_enc_shrink(enc, new_compressedBytes); 2566 if (compressed == NULL) { 2567 compressed = enc->buf; 2568 } 2569 compressed[-1] |= 0x03; /* Code 3 packet */ 2570 enc->buf += 1+padding_len_bytes; 2571 OPUS_MOVE(compressed+1+padding_len_bytes, compressed, new_compressedBytes); 2572 compressed[0] = 0x41; /* Set padding */ 2573 for (i=0;i<padding_len_bytes-1;i++) compressed[i+1] = 255; 2574 compressed[padding_len_bytes] = qext_bytes%254 == 0 ? 254 : qext_bytes%254; 2575 ext_payload = compressed+padding_len_bytes+1+new_compressedBytes; 2576 ext_payload[0] = QEXT_EXTENSION_ID<<1; 2577 ext_payload += 1; 2578 qext_bytes -= 1; 2579 OPUS_CLEAR(ext_payload, qext_bytes); 2580 ec_enc_init(&ext_enc, ext_payload, qext_bytes); 2581 nbCompressedBytes = new_compressedBytes; 2582 if (end == nbEBands && (mode->Fs == 48000 || mode->Fs == 96000) && (mode->shortMdctSize==120*qext_scale || mode->shortMdctSize==90*qext_scale)) { 2583 compute_qext_mode(&qext_mode_struct, mode); 2584 qext_mode = &qext_mode_struct; 2585 qext_end = (qext_scale == 2) ? NB_QEXT_BANDS : 2; 2586 ec_enc_bit_logp(&ext_enc, qext_end == NB_QEXT_BANDS, 1); 2587 } 2588 } else { 2589 ec_enc_init(&ext_enc, NULL, 0); 2590 qext_bytes = 0; 2591 } 2592 } else { 2593 ec_enc_init(&ext_enc, NULL, 0); 2594 } 2595 #endif 2596 2597 /* Bit allocation */ 2598 ALLOC(fine_quant, nbEBands, int); 2599 ALLOC(pulses, nbEBands, int); 2600 ALLOC(fine_priority, nbEBands, int); 2601 2602 /* bits = packet size - where we are - safety*/ 2603 bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - (opus_int32)ec_tell_frac(enc) - 1; 2604 anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; 2605 bits -= anti_collapse_rsv; 2606 signalBandwidth = end-1; 2607 #ifndef DISABLE_FLOAT_API 2608 if (st->analysis.valid) 2609 { 2610 int min_bandwidth; 2611 if (equiv_rate < (opus_int32)32000*C) 2612 min_bandwidth = 13; 2613 else if (equiv_rate < (opus_int32)48000*C) 2614 min_bandwidth = 16; 2615 else if (equiv_rate < (opus_int32)60000*C) 2616 min_bandwidth = 18; 2617 else if (equiv_rate < (opus_int32)80000*C) 2618 min_bandwidth = 19; 2619 else 2620 min_bandwidth = 20; 2621 signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); 2622 } 2623 #endif 2624 if (st->lfe) 2625 signalBandwidth = 1; 2626 codedBands = clt_compute_allocation(mode, start, end, offsets, cap, 2627 alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, 2628 fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); 2629 if (st->lastCodedBands) 2630 st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); 2631 else 2632 st->lastCodedBands = codedBands; 2633 2634 quant_fine_energy(mode, start, end, oldBandE, error, NULL, fine_quant, enc, C); 2635 OPUS_CLEAR(energyError, nbEBands*CC); 2636 #ifdef ENABLE_QEXT 2637 if (qext_mode) 2638 { 2639 /* Don't bias for intra. */ 2640 opus_val32 qext_delayedIntra=0; 2641 qext_oldBandE = energyError + CC*nbEBands; 2642 compute_band_energies(qext_mode, freq, qext_bandE, qext_end, C, LM, st->arch); 2643 normalise_bands(qext_mode, freq, X, qext_bandE, qext_end, C, M); 2644 amp2Log2(qext_mode, qext_end, qext_end, qext_bandE, qext_bandLogE, C); 2645 if (C==2) { 2646 qext_intensity = qext_end; 2647 qext_dual_stereo = dual_stereo; 2648 encode_qext_stereo_params(&ext_enc, qext_end, qext_intensity, qext_dual_stereo); 2649 } 2650 quant_coarse_energy(qext_mode, 0, qext_end, qext_end, qext_bandLogE, 2651 qext_oldBandE, qext_bytes*8, qext_error, &ext_enc, 2652 C, LM, qext_bytes, st->force_intra, 2653 &qext_delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); 2654 } 2655 ALLOC(extra_quant, nbEBands+NB_QEXT_BANDS, int); 2656 ALLOC(extra_pulses, nbEBands+NB_QEXT_BANDS, int); 2657 ALLOC(error_bak, C*nbEBands, celt_glog); 2658 2659 qext_bits = ((opus_int32)qext_bytes*8<<BITRES) - (opus_int32)ec_tell_frac(enc) - 1; 2660 clt_compute_extra_allocation(mode, qext_mode, start, end, qext_end, bandLogE, qext_bandLogE, 2661 qext_bits, extra_pulses, extra_quant, C, LM, &ext_enc, 1, tone_freq, toneishness); 2662 OPUS_COPY(error_bak, error, C*nbEBands); 2663 if (qext_bytes > 0) { 2664 quant_fine_energy(mode, start, end, oldBandE, error, fine_quant, extra_quant, &ext_enc, C); 2665 } 2666 #endif 2667 2668 /* Residual quantisation */ 2669 ALLOC(collapse_masks, C*nbEBands, unsigned char); 2670 quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, 2671 bandE, pulses, shortBlocks, st->spread_decision, 2672 dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, 2673 balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv 2674 ARG_QEXT(&ext_enc) ARG_QEXT(extra_pulses) 2675 ARG_QEXT(qext_bytes*(8<<BITRES)) ARG_QEXT(cap)); 2676 2677 #ifdef ENABLE_QEXT 2678 if (qext_mode) { 2679 VARDECL(int, zeros); 2680 VARDECL(unsigned char, qext_collapse_masks); 2681 ec_enc dummy_enc; 2682 int ext_balance; 2683 ALLOC(zeros, nbEBands, int); 2684 ALLOC(qext_collapse_masks, C*NB_QEXT_BANDS, unsigned char); 2685 ec_enc_init(&dummy_enc, NULL, 0); 2686 OPUS_CLEAR(zeros, end); 2687 ext_balance = qext_bytes*(8<<BITRES) - ec_tell_frac(&ext_enc); 2688 for (i=0;i<qext_end;i++) ext_balance -= extra_pulses[nbEBands+i] + C*(extra_quant[nbEBands+1]<<BITRES); 2689 quant_fine_energy(qext_mode, 0, qext_end, qext_oldBandE, qext_error, NULL, &extra_quant[nbEBands], &ext_enc, C); 2690 quant_all_bands(1, qext_mode, 0, qext_end, X, C==2 ? X+N : NULL, qext_collapse_masks, 2691 qext_bandE, &extra_pulses[nbEBands], shortBlocks, st->spread_decision, 2692 qext_dual_stereo, qext_intensity, zeros, qext_bytes*(8<<BITRES), 2693 ext_balance, &ext_enc, LM, qext_end, &st->rng, st->complexity, st->arch, st->disable_inv, &dummy_enc, zeros, 0, NULL); 2694 } 2695 #endif 2696 2697 if (anti_collapse_rsv > 0) 2698 { 2699 anti_collapse_on = st->consec_transient<2; 2700 #ifdef FUZZING 2701 anti_collapse_on = rand()&0x1; 2702 #endif 2703 ec_enc_bits(enc, anti_collapse_on, 1); 2704 } 2705 if (qext_bytes == 0) 2706 quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); 2707 c=0; 2708 do { 2709 for (i=start;i<end;i++) 2710 { 2711 energyError[i+c*nbEBands] = MAXG(-GCONST(0.5f), MING(GCONST(0.5f), error[i+c*nbEBands])); 2712 } 2713 } while (++c < C); 2714 #ifdef ENABLE_QEXT 2715 if (qext_bytes > 0) 2716 quant_energy_finalise(mode, start, end, NULL, error_bak, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); 2717 #endif 2718 if (silence) 2719 { 2720 for (i=0;i<C*nbEBands;i++) 2721 oldBandE[i] = -GCONST(28.f); 2722 } 2723 2724 #ifdef RESYNTH 2725 /* Re-synthesis of the coded audio if required */ 2726 { 2727 celt_sig *out_mem[2]; 2728 2729 if (anti_collapse_on) 2730 { 2731 anti_collapse(mode, X, collapse_masks, LM, C, N, 2732 start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, 1, st->arch); 2733 } 2734 2735 c=0; do { 2736 OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, QEXT_SCALE(DEC_PITCH_BUF_SIZE)-N+overlap/2); 2737 } while (++c<CC); 2738 2739 c=0; do { 2740 out_mem[c] = st->syn_mem[c]+QEXT_SCALE(DEC_PITCH_BUF_SIZE)-N; 2741 } while (++c<CC); 2742 2743 celt_synthesis(mode, X, out_mem, oldBandE, start, effEnd, 2744 C, CC, isTransient, LM, st->upsample, silence, st->arch ARG_QEXT(qext_mode) ARG_QEXT(qext_oldBandE) ARG_QEXT(qext_end)); 2745 2746 c=0; do { 2747 st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); 2748 st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); 2749 comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, 2750 st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, 2751 mode->window, overlap, st->arch); 2752 if (LM!=0) 2753 comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, 2754 st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, 2755 mode->window, overlap, st->arch); 2756 } while (++c<CC); 2757 2758 /* We reuse freq[] as scratch space for the de-emphasis */ 2759 deemphasis(out_mem, (opus_res*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, 0); 2760 st->prefilter_period_old = st->prefilter_period; 2761 st->prefilter_gain_old = st->prefilter_gain; 2762 st->prefilter_tapset_old = st->prefilter_tapset; 2763 } 2764 #endif 2765 2766 st->prefilter_period = pitch_index; 2767 st->prefilter_gain = gain1; 2768 st->prefilter_tapset = prefilter_tapset; 2769 #ifdef RESYNTH 2770 if (LM!=0) 2771 { 2772 st->prefilter_period_old = st->prefilter_period; 2773 st->prefilter_gain_old = st->prefilter_gain; 2774 st->prefilter_tapset_old = st->prefilter_tapset; 2775 } 2776 #endif 2777 2778 if (CC==2&&C==1) { 2779 OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands); 2780 } 2781 2782 if (!isTransient) 2783 { 2784 OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands); 2785 OPUS_COPY(oldLogE, oldBandE, CC*nbEBands); 2786 } else { 2787 for (i=0;i<CC*nbEBands;i++) 2788 oldLogE[i] = MING(oldLogE[i], oldBandE[i]); 2789 } 2790 /* In case start or end were to change */ 2791 c=0; do 2792 { 2793 for (i=0;i<start;i++) 2794 { 2795 oldBandE[c*nbEBands+i]=0; 2796 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-GCONST(28.f); 2797 } 2798 for (i=end;i<nbEBands;i++) 2799 { 2800 oldBandE[c*nbEBands+i]=0; 2801 oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-GCONST(28.f); 2802 } 2803 } while (++c<CC); 2804 2805 if (isTransient || transient_got_disabled) 2806 st->consec_transient++; 2807 else 2808 st->consec_transient=0; 2809 st->rng = enc->rng; 2810 2811 /* If there's any room left (can only happen for very high rates), 2812 it's already filled with zeros */ 2813 ec_enc_done(enc); 2814 #ifdef ENABLE_QEXT 2815 ec_enc_done(&ext_enc); 2816 if (qext_bytes > 0) 2817 nbCompressedBytes += padding_len_bytes+2+qext_bytes; 2818 if (qext_bytes) st->rng = st->rng ^ ext_enc.rng; 2819 if (ec_get_error(&ext_enc)) 2820 return OPUS_INTERNAL_ERROR; 2821 #endif 2822 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 2823 if (st->signalling) 2824 nbCompressedBytes++; 2825 #endif 2826 2827 RESTORE_STACK; 2828 if (ec_get_error(enc)) 2829 return OPUS_INTERNAL_ERROR; 2830 else 2831 return nbCompressedBytes; 2832 } 2833 2834 2835 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 2836 2837 #if defined(FIXED_POINT) && !defined(ENABLE_RES24) 2838 int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2839 { 2840 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); 2841 } 2842 #else 2843 int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2844 { 2845 int j, ret, C, N; 2846 VARDECL(opus_res, in); 2847 ALLOC_STACK; 2848 2849 if (pcm==NULL) 2850 return OPUS_BAD_ARG; 2851 2852 C = st->channels; 2853 N = frame_size; 2854 ALLOC(in, C*N, opus_res); 2855 2856 for (j=0;j<C*N;j++) 2857 in[j] = INT16TORES(pcm[j]); 2858 2859 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); 2860 #ifdef RESYNTH 2861 for (j=0;j<C*N;j++) 2862 ((opus_int16*)pcm)[j]=RES2INT16(in[j]); 2863 #endif 2864 RESTORE_STACK; 2865 return ret; 2866 } 2867 #endif 2868 2869 2870 #if defined(FIXED_POINT) && defined(ENABLE_RES24) 2871 int opus_custom_encode24(CELTEncoder * OPUS_RESTRICT st, const opus_int32 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2872 { 2873 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); 2874 } 2875 #else 2876 int opus_custom_encode24(CELTEncoder * OPUS_RESTRICT st, const opus_int32 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2877 { 2878 int j, ret, C, N; 2879 VARDECL(opus_res, in); 2880 ALLOC_STACK; 2881 2882 if (pcm==NULL) 2883 return OPUS_BAD_ARG; 2884 2885 C = st->channels; 2886 N = frame_size; 2887 ALLOC(in, C*N, opus_res); 2888 2889 for (j=0;j<C*N;j++) 2890 in[j] = INT24TORES(pcm[j]); 2891 2892 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); 2893 #ifdef RESYNTH 2894 for (j=0;j<C*N;j++) 2895 ((opus_int32*)pcm)[j]=RES2INT24(in[j]); 2896 #endif 2897 RESTORE_STACK; 2898 return ret; 2899 } 2900 #endif 2901 2902 2903 #ifndef DISABLE_FLOAT_API 2904 2905 # if !defined(FIXED_POINT) 2906 int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2907 { 2908 return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); 2909 } 2910 # else 2911 int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) 2912 { 2913 int j, ret, C, N; 2914 VARDECL(opus_res, in); 2915 ALLOC_STACK; 2916 2917 if (pcm==NULL) 2918 return OPUS_BAD_ARG; 2919 2920 C = st->channels; 2921 N = frame_size; 2922 ALLOC(in, C*N, opus_res); 2923 2924 for (j=0;j<C*N;j++) 2925 in[j] = FLOAT2RES(pcm[j]); 2926 2927 ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); 2928 #ifdef RESYNTH 2929 for (j=0;j<C*N;j++) 2930 ((float*)pcm)[j]=RES2FLOAT(in[j]); 2931 #endif 2932 RESTORE_STACK; 2933 return ret; 2934 } 2935 # endif 2936 2937 #endif 2938 2939 #endif /* CUSTOM_MODES */ 2940 2941 int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) 2942 { 2943 va_list ap; 2944 2945 va_start(ap, request); 2946 switch (request) 2947 { 2948 case OPUS_SET_COMPLEXITY_REQUEST: 2949 { 2950 int value = va_arg(ap, opus_int32); 2951 if (value<0 || value>10) 2952 goto bad_arg; 2953 st->complexity = value; 2954 } 2955 break; 2956 case CELT_SET_START_BAND_REQUEST: 2957 { 2958 opus_int32 value = va_arg(ap, opus_int32); 2959 if (value<0 || value>=st->mode->nbEBands) 2960 goto bad_arg; 2961 st->start = value; 2962 } 2963 break; 2964 case CELT_SET_END_BAND_REQUEST: 2965 { 2966 opus_int32 value = va_arg(ap, opus_int32); 2967 if (value<1 || value>st->mode->nbEBands) 2968 goto bad_arg; 2969 st->end = value; 2970 } 2971 break; 2972 case CELT_SET_PREDICTION_REQUEST: 2973 { 2974 int value = va_arg(ap, opus_int32); 2975 if (value<0 || value>2) 2976 goto bad_arg; 2977 st->disable_pf = value<=1; 2978 st->force_intra = value==0; 2979 } 2980 break; 2981 case OPUS_SET_PACKET_LOSS_PERC_REQUEST: 2982 { 2983 int value = va_arg(ap, opus_int32); 2984 if (value<0 || value>100) 2985 goto bad_arg; 2986 st->loss_rate = value; 2987 } 2988 break; 2989 case OPUS_SET_VBR_CONSTRAINT_REQUEST: 2990 { 2991 opus_int32 value = va_arg(ap, opus_int32); 2992 st->constrained_vbr = value; 2993 } 2994 break; 2995 case OPUS_SET_VBR_REQUEST: 2996 { 2997 opus_int32 value = va_arg(ap, opus_int32); 2998 st->vbr = value; 2999 } 3000 break; 3001 case OPUS_SET_BITRATE_REQUEST: 3002 { 3003 opus_int32 value = va_arg(ap, opus_int32); 3004 if (value<=500 && value!=OPUS_BITRATE_MAX) 3005 goto bad_arg; 3006 value = IMIN(value, 750000*st->channels); 3007 st->bitrate = value; 3008 } 3009 break; 3010 case CELT_SET_CHANNELS_REQUEST: 3011 { 3012 opus_int32 value = va_arg(ap, opus_int32); 3013 if (value<1 || value>2) 3014 goto bad_arg; 3015 st->stream_channels = value; 3016 } 3017 break; 3018 case OPUS_SET_LSB_DEPTH_REQUEST: 3019 { 3020 opus_int32 value = va_arg(ap, opus_int32); 3021 if (value<8 || value>24) 3022 goto bad_arg; 3023 st->lsb_depth=value; 3024 } 3025 break; 3026 case OPUS_GET_LSB_DEPTH_REQUEST: 3027 { 3028 opus_int32 *value = va_arg(ap, opus_int32*); 3029 *value=st->lsb_depth; 3030 } 3031 break; 3032 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: 3033 { 3034 opus_int32 value = va_arg(ap, opus_int32); 3035 if(value<0 || value>1) 3036 { 3037 goto bad_arg; 3038 } 3039 st->disable_inv = value; 3040 } 3041 break; 3042 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: 3043 { 3044 opus_int32 *value = va_arg(ap, opus_int32*); 3045 if (!value) 3046 { 3047 goto bad_arg; 3048 } 3049 *value = st->disable_inv; 3050 } 3051 break; 3052 #ifdef ENABLE_QEXT 3053 case OPUS_SET_QEXT_REQUEST: 3054 { 3055 opus_int32 value = va_arg(ap, opus_int32); 3056 if(value<0 || value>1) 3057 { 3058 goto bad_arg; 3059 } 3060 st->enable_qext = value; 3061 } 3062 break; 3063 case OPUS_GET_QEXT_REQUEST: 3064 { 3065 opus_int32 *value = va_arg(ap, opus_int32*); 3066 if (!value) 3067 { 3068 goto bad_arg; 3069 } 3070 *value = st->enable_qext; 3071 } 3072 break; 3073 #endif 3074 case OPUS_RESET_STATE: 3075 { 3076 int i; 3077 celt_glog *oldBandE, *oldLogE, *oldLogE2; 3078 oldBandE = (celt_glog*)(st->in_mem+st->channels*(st->mode->overlap+QEXT_SCALE2(COMBFILTER_MAXPERIOD, st->qext_scale))); 3079 oldLogE = oldBandE + st->channels*st->mode->nbEBands; 3080 oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; 3081 OPUS_CLEAR((char*)&st->ENCODER_RESET_START, 3082 opus_custom_encoder_get_size(st->mode, st->channels)- 3083 ((char*)&st->ENCODER_RESET_START - (char*)st)); 3084 for (i=0;i<st->channels*st->mode->nbEBands;i++) 3085 oldLogE[i]=oldLogE2[i]=-GCONST(28.f); 3086 st->vbr_offset = 0; 3087 st->delayedIntra = 1; 3088 st->spread_decision = SPREAD_NORMAL; 3089 st->tonal_average = 256; 3090 st->hf_average = 0; 3091 st->tapset_decision = 0; 3092 } 3093 break; 3094 #if defined(CUSTOM_MODES) || defined(ENABLE_OPUS_CUSTOM_API) 3095 case CELT_SET_INPUT_CLIPPING_REQUEST: 3096 { 3097 opus_int32 value = va_arg(ap, opus_int32); 3098 st->clip = value; 3099 } 3100 break; 3101 #endif 3102 case CELT_SET_SIGNALLING_REQUEST: 3103 { 3104 opus_int32 value = va_arg(ap, opus_int32); 3105 st->signalling = value; 3106 } 3107 break; 3108 case CELT_SET_ANALYSIS_REQUEST: 3109 { 3110 AnalysisInfo *info = va_arg(ap, AnalysisInfo *); 3111 if (info) 3112 OPUS_COPY(&st->analysis, info, 1); 3113 } 3114 break; 3115 case CELT_SET_SILK_INFO_REQUEST: 3116 { 3117 SILKInfo *info = va_arg(ap, SILKInfo *); 3118 if (info) 3119 OPUS_COPY(&st->silk_info, info, 1); 3120 } 3121 break; 3122 case CELT_GET_MODE_REQUEST: 3123 { 3124 const CELTMode ** value = va_arg(ap, const CELTMode**); 3125 if (value==0) 3126 goto bad_arg; 3127 *value=st->mode; 3128 } 3129 break; 3130 case OPUS_GET_FINAL_RANGE_REQUEST: 3131 { 3132 opus_uint32 * value = va_arg(ap, opus_uint32 *); 3133 if (value==0) 3134 goto bad_arg; 3135 *value=st->rng; 3136 } 3137 break; 3138 case OPUS_SET_LFE_REQUEST: 3139 { 3140 opus_int32 value = va_arg(ap, opus_int32); 3141 st->lfe = value; 3142 } 3143 break; 3144 case OPUS_SET_ENERGY_MASK_REQUEST: 3145 { 3146 celt_glog *value = va_arg(ap, celt_glog*); 3147 st->energy_mask = value; 3148 } 3149 break; 3150 default: 3151 goto bad_request; 3152 } 3153 va_end(ap); 3154 return OPUS_OK; 3155 bad_arg: 3156 va_end(ap); 3157 return OPUS_BAD_ARG; 3158 bad_request: 3159 va_end(ap); 3160 return OPUS_UNIMPLEMENTED; 3161 }