TDStretch.cpp (34468B)
1 /////////////////////////////////////////////////////////////////////////////// 2 /// 3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 4 /// while maintaining the original pitch by using a time domain WSOLA-like 5 /// method with several performance-increasing tweaks. 6 /// 7 /// Notes : MMX optimized functions reside in a separate, platform-specific 8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'. 9 /// 10 /// This source file contains OpenMP optimizations that allow speeding up the 11 /// corss-correlation algorithm by executing it in several threads / CPU cores 12 /// in parallel. See the following article link for more detailed discussion 13 /// about SoundTouch OpenMP optimizations: 14 /// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices 15 /// 16 /// Author : Copyright (c) Olli Parviainen 17 /// Author e-mail : oparviai 'at' iki.fi 18 /// SoundTouch WWW: http://www.surina.net/soundtouch 19 /// 20 //////////////////////////////////////////////////////////////////////////////// 21 // 22 // License : 23 // 24 // SoundTouch audio processing library 25 // Copyright (c) Olli Parviainen 26 // 27 // This library is free software; you can redistribute it and/or 28 // modify it under the terms of the GNU Lesser General Public 29 // License as published by the Free Software Foundation; either 30 // version 2.1 of the License, or (at your option) any later version. 31 // 32 // This library is distributed in the hope that it will be useful, 33 // but WITHOUT ANY WARRANTY; without even the implied warranty of 34 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 35 // Lesser General Public License for more details. 36 // 37 // You should have received a copy of the GNU Lesser General Public 38 // License along with this library; if not, write to the Free Software 39 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 40 // 41 //////////////////////////////////////////////////////////////////////////////// 42 43 #include <string.h> 44 #include <limits.h> 45 #include <assert.h> 46 #include <math.h> 47 #include <float.h> 48 49 #include "STTypes.h" 50 #include "cpu_detect.h" 51 #include "TDStretch.h" 52 53 using namespace soundtouch; 54 55 #define max(x, y) (((x) > (y)) ? (x) : (y)) 56 57 /***************************************************************************** 58 * 59 * Constant definitions 60 * 61 *****************************************************************************/ 62 63 // Table for the hierarchical mixing position seeking algorithm 64 const short _scanOffsets[5][24]={ 65 { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, 66 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, 67 {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, 68 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 69 { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, 70 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 71 { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, 72 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 73 { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, 74 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; 75 76 /***************************************************************************** 77 * 78 * Implementation of the class 'TDStretch' 79 * 80 *****************************************************************************/ 81 82 83 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) 84 { 85 bQuickSeek = false; 86 channels = 2; 87 88 pMidBuffer = NULL; 89 pMidBufferUnaligned = NULL; 90 overlapLength = 0; 91 92 bAutoSeqSetting = true; 93 bAutoSeekSetting = true; 94 95 tempo = 1.0f; 96 setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); 97 setTempo(1.0f); 98 99 clear(); 100 } 101 102 103 104 TDStretch::~TDStretch() 105 { 106 delete[] pMidBufferUnaligned; 107 } 108 109 110 111 // Sets routine control parameters. These control are certain time constants 112 // defining how the sound is stretched to the desired duration. 113 // 114 // 'sampleRate' = sample rate of the sound 115 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) 116 // 'seekwindowMS' = seeking window length for scanning the best overlapping 117 // position (default = 28 ms) 118 // 'overlapMS' = overlapping length (default = 12 ms) 119 120 void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 121 int aSeekWindowMS, int aOverlapMS) 122 { 123 // accept only positive parameter values - if zero or negative, use old values instead 124 if (aSampleRate > 0) 125 { 126 if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate"); 127 this->sampleRate = aSampleRate; 128 } 129 130 if (aOverlapMS > 0) this->overlapMs = aOverlapMS; 131 132 if (aSequenceMS > 0) 133 { 134 this->sequenceMs = aSequenceMS; 135 bAutoSeqSetting = false; 136 } 137 else if (aSequenceMS == 0) 138 { 139 // if zero, use automatic setting 140 bAutoSeqSetting = true; 141 } 142 143 if (aSeekWindowMS > 0) 144 { 145 this->seekWindowMs = aSeekWindowMS; 146 bAutoSeekSetting = false; 147 } 148 else if (aSeekWindowMS == 0) 149 { 150 // if zero, use automatic setting 151 bAutoSeekSetting = true; 152 } 153 154 calcSeqParameters(); 155 156 calculateOverlapLength(overlapMs); 157 158 // set tempo to recalculate 'sampleReq' 159 setTempo(tempo); 160 } 161 162 163 164 /// Get routine control parameters, see setParameters() function. 165 /// Any of the parameters to this function can be NULL, in such case corresponding parameter 166 /// value isn't returned. 167 void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const 168 { 169 if (pSampleRate) 170 { 171 *pSampleRate = sampleRate; 172 } 173 174 if (pSequenceMs) 175 { 176 *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; 177 } 178 179 if (pSeekWindowMs) 180 { 181 *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; 182 } 183 184 if (pOverlapMs) 185 { 186 *pOverlapMs = overlapMs; 187 } 188 } 189 190 191 // Overlaps samples in 'midBuffer' with the samples in 'pInput' 192 void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const 193 { 194 int i; 195 SAMPLETYPE m1, m2; 196 197 m1 = (SAMPLETYPE)0; 198 m2 = (SAMPLETYPE)overlapLength; 199 200 for (i = 0; i < overlapLength ; i ++) 201 { 202 pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength; 203 m1 += 1; 204 m2 -= 1; 205 } 206 } 207 208 209 210 void TDStretch::clearMidBuffer() 211 { 212 memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength); 213 } 214 215 216 void TDStretch::clearInput() 217 { 218 inputBuffer.clear(); 219 clearMidBuffer(); 220 isBeginning = true; 221 maxnorm = 0; 222 maxnormf = 1e8; 223 skipFract = 0; 224 } 225 226 227 // Clears the sample buffers 228 void TDStretch::clear() 229 { 230 outputBuffer.clear(); 231 clearInput(); 232 } 233 234 235 236 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero 237 // to enable 238 void TDStretch::enableQuickSeek(bool enable) 239 { 240 bQuickSeek = enable; 241 } 242 243 244 // Returns nonzero if the quick seeking algorithm is enabled. 245 bool TDStretch::isQuickSeekEnabled() const 246 { 247 return bQuickSeek; 248 } 249 250 251 // Seeks for the optimal overlap-mixing position. 252 int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) 253 { 254 if (bQuickSeek) 255 { 256 return seekBestOverlapPositionQuick(refPos); 257 } 258 else 259 { 260 return seekBestOverlapPositionFull(refPos); 261 } 262 } 263 264 265 // Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position 266 // of 'ovlPos'. 267 inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const 268 { 269 #ifndef USE_MULTICH_ALWAYS 270 if (channels == 1) 271 { 272 // mono sound. 273 overlapMono(pOutput, pInput + ovlPos); 274 } 275 else if (channels == 2) 276 { 277 // stereo sound 278 overlapStereo(pOutput, pInput + 2 * ovlPos); 279 } 280 else 281 #endif // USE_MULTICH_ALWAYS 282 { 283 assert(channels > 0); 284 overlapMulti(pOutput, pInput + channels * ovlPos); 285 } 286 } 287 288 289 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the 290 // routine 291 // 292 // The best position is determined as the position where the two overlapped 293 // sample sequences are 'most alike', in terms of the highest cross-correlation 294 // value over the overlapping period 295 int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) 296 { 297 int bestOffs; 298 double bestCorr; 299 int i; 300 double norm; 301 302 bestCorr = -FLT_MAX; 303 bestOffs = 0; 304 305 // Scans for the best correlation value by testing each possible position 306 // over the permitted range. 307 bestCorr = calcCrossCorr(refPos, pMidBuffer, norm); 308 bestCorr = (bestCorr + 0.1) * 0.75; 309 310 #pragma omp parallel for 311 for (i = 1; i < seekLength; i ++) 312 { 313 double corr; 314 // Calculates correlation value for the mixing position corresponding to 'i' 315 #if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED) 316 // in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't 317 // iterate the loop in sequential order 318 // in SIMD mode, avoid accumulator version to allow avoiding unaligned positions 319 corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm); 320 #else 321 // In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same 322 // as "calcCrossCorr", but saves time by reusing & updating previously stored 323 // "norm" value 324 corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm); 325 #endif 326 // heuristic rule to slightly favour values close to mid of the range 327 double tmp = (double)(2 * i - seekLength) / (double)seekLength; 328 corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); 329 330 // Checks for the highest correlation value 331 if (corr > bestCorr) 332 { 333 // For optimal performance, enter critical section only in case that best value found. 334 // in such case repeat 'if' condition as it's possible that parallel execution may have 335 // updated the bestCorr value in the mean time 336 #pragma omp critical 337 if (corr > bestCorr) 338 { 339 bestCorr = corr; 340 bestOffs = i; 341 } 342 } 343 } 344 345 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 346 adaptNormalizer(); 347 #endif 348 349 // clear cross correlation routine state if necessary (is so e.g. in MMX routines). 350 clearCrossCorrState(); 351 352 return bestOffs; 353 } 354 355 356 // Quick seek algorithm for improved runtime-performance: First roughly scans through the 357 // correlation area, and then scan surroundings of two best preliminary correlation candidates 358 // with improved precision 359 // 360 // Based on testing: 361 // - This algorithm gives on average 99% as good match as the full algorithm 362 // - this quick seek algorithm finds the best match on ~90% of cases 363 // - on those 10% of cases when this algorithm doesn't find best match, 364 // it still finds on average ~90% match vs. the best possible match 365 int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) 366 { 367 #define _MIN(a, b) (((a) < (b)) ? (a) : (b)) 368 #define SCANSTEP 16 369 #define SCANWIND 8 370 371 int bestOffs; 372 int i; 373 int bestOffs2; 374 float bestCorr, corr; 375 float bestCorr2; 376 double norm; 377 378 // note: 'float' types used in this function in case that the platform would need to use software-fp 379 380 bestCorr = 381 bestCorr2 = -FLT_MAX; 382 bestOffs = 383 bestOffs2 = SCANWIND; 384 385 // Scans for the best correlation value by testing each possible position 386 // over the permitted range. Look for two best matches on the first pass to 387 // increase possibility of ideal match. 388 // 389 // Begin from "SCANSTEP" instead of SCANWIND to make the calculation 390 // catch the 'middlepoint' of seekLength vector as that's the a-priori 391 // expected best match position 392 // 393 // Roughly: 394 // - 15% of cases find best result directly on the first round, 395 // - 75% cases find better match on 2nd round around the best match from 1st round 396 // - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round 397 for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP) 398 { 399 // Calculates correlation value for the mixing position corresponding 400 // to 'i' 401 corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); 402 // heuristic rule to slightly favour values close to mid of the seek range 403 float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; 404 corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); 405 406 // Checks for the highest correlation value 407 if (corr > bestCorr) 408 { 409 // found new best match. keep the previous best as 2nd best match 410 bestCorr2 = bestCorr; 411 bestOffs2 = bestOffs; 412 bestCorr = corr; 413 bestOffs = i; 414 } 415 else if (corr > bestCorr2) 416 { 417 // not new best, but still new 2nd best match 418 bestCorr2 = corr; 419 bestOffs2 = i; 420 } 421 } 422 423 // Scans surroundings of the found best match with small stepping 424 int end = _MIN(bestOffs + SCANWIND + 1, seekLength); 425 for (i = bestOffs - SCANWIND; i < end; i++) 426 { 427 if (i == bestOffs) continue; // this offset already calculated, thus skip 428 429 // Calculates correlation value for the mixing position corresponding 430 // to 'i' 431 corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); 432 // heuristic rule to slightly favour values close to mid of the range 433 float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; 434 corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); 435 436 // Checks for the highest correlation value 437 if (corr > bestCorr) 438 { 439 bestCorr = corr; 440 bestOffs = i; 441 } 442 } 443 444 // Scans surroundings of the 2nd best match with small stepping 445 end = _MIN(bestOffs2 + SCANWIND + 1, seekLength); 446 for (i = bestOffs2 - SCANWIND; i < end; i++) 447 { 448 if (i == bestOffs2) continue; // this offset already calculated, thus skip 449 450 // Calculates correlation value for the mixing position corresponding 451 // to 'i' 452 corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm); 453 // heuristic rule to slightly favour values close to mid of the range 454 float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength; 455 corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp)); 456 457 // Checks for the highest correlation value 458 if (corr > bestCorr) 459 { 460 bestCorr = corr; 461 bestOffs = i; 462 } 463 } 464 465 // clear cross correlation routine state if necessary (is so e.g. in MMX routines). 466 clearCrossCorrState(); 467 468 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 469 adaptNormalizer(); 470 #endif 471 472 return bestOffs; 473 } 474 475 476 477 478 /// For integer algorithm: adapt normalization factor divider with music so that 479 /// it'll not be pessimistically restrictive that can degrade quality on quieter sections 480 /// yet won't cause integer overflows either 481 void TDStretch::adaptNormalizer() 482 { 483 // Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to 484 // too low values during pauses in music 485 if ((maxnorm > 1000) || (maxnormf > 40000000)) 486 { 487 //norm averaging filter 488 maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm; 489 490 if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16)) 491 { 492 // large values, so increase divider 493 overlapDividerBitsNorm++; 494 if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase 495 } 496 else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0)) 497 { 498 // extra small values, decrease divider 499 overlapDividerBitsNorm--; 500 } 501 } 502 503 maxnorm = 0; 504 } 505 506 507 /// clear cross correlation routine state if necessary 508 void TDStretch::clearCrossCorrState() 509 { 510 // default implementation is empty. 511 } 512 513 514 /// Calculates processing sequence length according to tempo setting 515 void TDStretch::calcSeqParameters() 516 { 517 // Adjust tempo param according to tempo, so that variating processing sequence length is used 518 // at various tempo settings, between the given low...top limits 519 #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) 520 #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) 521 522 // sequence-ms setting values at above low & top tempo 523 #define AUTOSEQ_AT_MIN 90.0 524 #define AUTOSEQ_AT_MAX 40.0 525 #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) 526 #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) 527 528 // seek-window-ms setting values at above low & top tempoq 529 #define AUTOSEEK_AT_MIN 20.0 530 #define AUTOSEEK_AT_MAX 15.0 531 #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) 532 #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) 533 534 #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) 535 536 double seq, seek; 537 538 if (bAutoSeqSetting) 539 { 540 seq = AUTOSEQ_C + AUTOSEQ_K * tempo; 541 seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); 542 sequenceMs = (int)(seq + 0.5); 543 } 544 545 if (bAutoSeekSetting) 546 { 547 seek = AUTOSEEK_C + AUTOSEEK_K * tempo; 548 seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); 549 seekWindowMs = (int)(seek + 0.5); 550 } 551 552 // Update seek window lengths 553 seekWindowLength = (sampleRate * sequenceMs) / 1000; 554 if (seekWindowLength < 2 * overlapLength) 555 { 556 seekWindowLength = 2 * overlapLength; 557 } 558 seekLength = (sampleRate * seekWindowMs) / 1000; 559 } 560 561 562 563 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 564 // tempo, larger faster tempo. 565 void TDStretch::setTempo(double newTempo) 566 { 567 int intskip; 568 569 tempo = newTempo; 570 571 // Calculate new sequence duration 572 calcSeqParameters(); 573 574 // Calculate ideal skip length (according to tempo value) 575 nominalSkip = tempo * (seekWindowLength - overlapLength); 576 intskip = (int)(nominalSkip + 0.5); 577 578 // Calculate how many samples are needed in the 'inputBuffer' to 579 // process another batch of samples 580 //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; 581 sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; 582 } 583 584 585 586 // Sets the number of channels, 1 = mono, 2 = stereo 587 void TDStretch::setChannels(int numChannels) 588 { 589 if (!verifyNumberOfChannels(numChannels) || 590 (channels == numChannels)) return; 591 592 channels = numChannels; 593 inputBuffer.setChannels(channels); 594 outputBuffer.setChannels(channels); 595 596 // re-init overlap/buffer 597 overlapLength=0; 598 setParameters(sampleRate); 599 } 600 601 602 // nominal tempo, no need for processing, just pass the samples through 603 // to outputBuffer 604 /* 605 void TDStretch::processNominalTempo() 606 { 607 assert(tempo == 1.0f); 608 609 if (bMidBufferDirty) 610 { 611 // If there are samples in pMidBuffer waiting for overlapping, 612 // do a single sliding overlapping with them in order to prevent a 613 // clicking distortion in the output sound 614 if (inputBuffer.numSamples() < overlapLength) 615 { 616 // wait until we've got overlapLength input samples 617 return; 618 } 619 // Mix the samples in the beginning of 'inputBuffer' with the 620 // samples in 'midBuffer' using sliding overlapping 621 overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); 622 outputBuffer.putSamples(overlapLength); 623 inputBuffer.receiveSamples(overlapLength); 624 clearMidBuffer(); 625 // now we've caught the nominal sample flow and may switch to 626 // bypass mode 627 } 628 629 // Simply bypass samples from input to output 630 outputBuffer.moveSamples(inputBuffer); 631 } 632 */ 633 634 635 // Processes as many processing frames of the samples 'inputBuffer', store 636 // the result into 'outputBuffer' 637 void TDStretch::processSamples() 638 { 639 int ovlSkip; 640 int offset = 0; 641 int temp; 642 643 /* Removed this small optimization - can introduce a click to sound when tempo setting 644 crosses the nominal value 645 if (tempo == 1.0f) 646 { 647 // tempo not changed from the original, so bypass the processing 648 processNominalTempo(); 649 return; 650 } 651 */ 652 653 // Process samples as long as there are enough samples in 'inputBuffer' 654 // to form a processing frame. 655 while ((int)inputBuffer.numSamples() >= sampleReq) 656 { 657 if (isBeginning == false) 658 { 659 // apart from the very beginning of the track, 660 // scan for the best overlapping position & do overlap-add 661 offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); 662 663 // Mix the samples in the 'inputBuffer' at position of 'offset' with the 664 // samples in 'midBuffer' using sliding overlapping 665 // ... first partially overlap with the end of the previous sequence 666 // (that's in 'midBuffer') 667 overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); 668 outputBuffer.putSamples((uint)overlapLength); 669 offset += overlapLength; 670 } 671 else 672 { 673 // Adjust processing offset at beginning of track by not perform initial overlapping 674 // and compensating that in the 'input buffer skip' calculation 675 isBeginning = false; 676 int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5); 677 678 #ifdef ST_SIMD_AVOID_UNALIGNED 679 // in SIMD mode, round the skip amount to value corresponding to aligned memory address 680 if (channels == 1) 681 { 682 skip &= -4; 683 } 684 else if (channels == 2) 685 { 686 skip &= -2; 687 } 688 #endif 689 skipFract -= skip; 690 if (skipFract <= -nominalSkip) 691 { 692 skipFract = -nominalSkip; 693 } 694 } 695 696 // ... then copy sequence samples from 'inputBuffer' to output: 697 698 // crosscheck that we don't have buffer overflow... 699 if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength)) 700 { 701 continue; // just in case, shouldn't really happen 702 } 703 704 // length of sequence 705 temp = (seekWindowLength - 2 * overlapLength); 706 outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp); 707 708 // Copies the end of the current sequence from 'inputBuffer' to 709 // 'midBuffer' for being mixed with the beginning of the next 710 // processing sequence and so on 711 assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples()); 712 memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp), 713 channels * sizeof(SAMPLETYPE) * overlapLength); 714 715 // Remove the processed samples from the input buffer. Update 716 // the difference between integer & nominal skip step to 'skipFract' 717 // in order to prevent the error from accumulating over time. 718 skipFract += nominalSkip; // real skip size 719 ovlSkip = (int)skipFract; // rounded to integer skip 720 skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip 721 inputBuffer.receiveSamples((uint)ovlSkip); 722 } 723 } 724 725 726 // Adds 'numsamples' pcs of samples from the 'samples' memory position into 727 // the input of the object. 728 void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) 729 { 730 // Add the samples into the input buffer 731 inputBuffer.putSamples(samples, nSamples); 732 // Process the samples in input buffer 733 processSamples(); 734 } 735 736 737 738 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary. 739 void TDStretch::acceptNewOverlapLength(int newOverlapLength) 740 { 741 int prevOvl; 742 743 assert(newOverlapLength >= 0); 744 prevOvl = overlapLength; 745 overlapLength = newOverlapLength; 746 747 if (overlapLength > prevOvl) 748 { 749 delete[] pMidBufferUnaligned; 750 751 pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)]; 752 // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency 753 pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); 754 755 clearMidBuffer(); 756 } 757 } 758 759 760 // Operator 'new' is overloaded so that it automatically creates a suitable instance 761 // depending on if we've a MMX/SSE/etc-capable CPU available or not. 762 void * TDStretch::operator new(size_t s) 763 { 764 // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! 765 ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); 766 return newInstance(); 767 } 768 769 770 TDStretch * TDStretch::newInstance() 771 { 772 #if defined(SOUNDTOUCH_ALLOW_MMX) || defined(SOUNDTOUCH_ALLOW_SSE) 773 uint uExtensions; 774 775 uExtensions = detectCPUextensions(); 776 #endif 777 778 // Check if MMX/SSE instruction set extensions supported by CPU 779 780 #ifdef SOUNDTOUCH_ALLOW_MMX 781 // MMX routines available only with integer sample types 782 if (uExtensions & SUPPORT_MMX) 783 { 784 return ::new TDStretchMMX; 785 } 786 else 787 #endif // SOUNDTOUCH_ALLOW_MMX 788 789 790 #ifdef SOUNDTOUCH_ALLOW_SSE 791 if (uExtensions & SUPPORT_SSE) 792 { 793 // SSE support 794 return ::new TDStretchSSE; 795 } 796 else 797 #endif // SOUNDTOUCH_ALLOW_SSE 798 799 { 800 // ISA optimizations not supported, use plain C version 801 return ::new TDStretch; 802 } 803 } 804 805 806 ////////////////////////////////////////////////////////////////////////////// 807 // 808 // Integer arithmetic specific algorithm implementations. 809 // 810 ////////////////////////////////////////////////////////////////////////////// 811 812 #ifdef SOUNDTOUCH_INTEGER_SAMPLES 813 814 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 815 // version of the routine. 816 void TDStretch::overlapStereo(short *poutput, const short *input) const 817 { 818 int i; 819 short temp; 820 int cnt2; 821 822 for (i = 0; i < overlapLength ; i ++) 823 { 824 temp = (short)(overlapLength - i); 825 cnt2 = 2 * i; 826 poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; 827 poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; 828 } 829 } 830 831 832 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi' 833 // version of the routine. 834 void TDStretch::overlapMulti(short *poutput, const short *input) const 835 { 836 short m1; 837 int i = 0; 838 839 for (m1 = 0; m1 < overlapLength; m1 ++) 840 { 841 short m2 = (short)(overlapLength - m1); 842 for (int c = 0; c < channels; c ++) 843 { 844 poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength; 845 i++; 846 } 847 } 848 } 849 850 // Calculates the x having the closest 2^x value for the given value 851 static int _getClosest2Power(double value) 852 { 853 return (int)(log(value) / log(2.0) + 0.5); 854 } 855 856 857 /// Calculates overlap period length in samples. 858 /// Integer version rounds overlap length to closest power of 2 859 /// for a divide scaling operation. 860 void TDStretch::calculateOverlapLength(int aoverlapMs) 861 { 862 int newOvl; 863 864 assert(aoverlapMs >= 0); 865 866 // calculate overlap length so that it's power of 2 - thus it's easy to do 867 // integer division by right-shifting. Term "-1" at end is to account for 868 // the extra most significatnt bit left unused in result by signed multiplication 869 overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; 870 if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9; 871 if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3; 872 newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above 873 874 acceptNewOverlapLength(newOvl); 875 876 overlapDividerBitsNorm = overlapDividerBitsPure; 877 878 // calculate sloping divider so that crosscorrelation operation won't 879 // overflow 32-bit register. Max. sum of the crosscorrelation sum without 880 // divider would be 2^30*(N^3-N)/3, where N = overlap length 881 slopingDivider = (newOvl * newOvl - 1) / 3; 882 } 883 884 885 double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm) 886 { 887 long corr; 888 unsigned long lnorm; 889 int i; 890 891 #ifdef ST_SIMD_AVOID_UNALIGNED 892 // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary 893 if (((ulongptr)mixingPos) & 15) return -1e50; 894 #endif 895 896 // hint compiler autovectorization that loop length is divisible by 8 897 int ilength = (channels * overlapLength) & -8; 898 899 corr = lnorm = 0; 900 // Same routine for stereo and mono 901 for (i = 0; i < ilength; i += 2) 902 { 903 corr += (mixingPos[i] * compare[i] + 904 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; 905 lnorm += (mixingPos[i] * mixingPos[i] + 906 mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; 907 // do intermediate scalings to avoid integer overflow 908 } 909 910 if (lnorm > maxnorm) 911 { 912 // modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode 913 #pragma omp critical 914 if (lnorm > maxnorm) 915 { 916 maxnorm = lnorm; 917 } 918 } 919 // Normalize result by dividing by sqrt(norm) - this step is easiest 920 // done using floating point operation 921 norm = (double)lnorm; 922 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); 923 } 924 925 926 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value 927 double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm) 928 { 929 long corr; 930 long lnorm; 931 int i; 932 933 // hint compiler autovectorization that loop length is divisible by 8 934 int ilength = (channels * overlapLength) & -8; 935 936 // cancel first normalizer tap from previous round 937 lnorm = 0; 938 for (i = 1; i <= channels; i ++) 939 { 940 lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm; 941 } 942 943 corr = 0; 944 // Same routine for stereo and mono. 945 for (i = 0; i < ilength; i += 2) 946 { 947 corr += (mixingPos[i] * compare[i] + 948 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; 949 } 950 951 // update normalizer with last samples of this round 952 for (int j = 0; j < channels; j ++) 953 { 954 i --; 955 lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm; 956 } 957 958 norm += (double)lnorm; 959 if (norm > maxnorm) 960 { 961 maxnorm = (unsigned long)norm; 962 } 963 964 // Normalize result by dividing by sqrt(norm) - this step is easiest 965 // done using floating point operation 966 return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm); 967 } 968 969 #endif // SOUNDTOUCH_INTEGER_SAMPLES 970 971 ////////////////////////////////////////////////////////////////////////////// 972 // 973 // Floating point arithmetic specific algorithm implementations. 974 // 975 976 #ifdef SOUNDTOUCH_FLOAT_SAMPLES 977 978 // Overlaps samples in 'midBuffer' with the samples in 'pInput' 979 void TDStretch::overlapStereo(float *pOutput, const float *pInput) const 980 { 981 int i; 982 float fScale; 983 float f1; 984 float f2; 985 986 fScale = 1.0f / (float)overlapLength; 987 988 f1 = 0; 989 f2 = 1.0f; 990 991 for (i = 0; i < 2 * (int)overlapLength ; i += 2) 992 { 993 pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; 994 pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; 995 996 f1 += fScale; 997 f2 -= fScale; 998 } 999 } 1000 1001 1002 // Overlaps samples in 'midBuffer' with the samples in 'input'. 1003 void TDStretch::overlapMulti(float *pOutput, const float *pInput) const 1004 { 1005 int i; 1006 float fScale; 1007 float f1; 1008 float f2; 1009 1010 fScale = 1.0f / (float)overlapLength; 1011 1012 f1 = 0; 1013 f2 = 1.0f; 1014 1015 i=0; 1016 for (int i2 = 0; i2 < overlapLength; i2 ++) 1017 { 1018 // note: Could optimize this slightly by taking into account that always channels > 2 1019 for (int c = 0; c < channels; c ++) 1020 { 1021 pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2; 1022 i++; 1023 } 1024 f1 += fScale; 1025 f2 -= fScale; 1026 } 1027 } 1028 1029 1030 /// Calculates overlapInMsec period length in samples. 1031 void TDStretch::calculateOverlapLength(int overlapInMsec) 1032 { 1033 int newOvl; 1034 1035 assert(overlapInMsec >= 0); 1036 newOvl = (sampleRate * overlapInMsec) / 1000; 1037 if (newOvl < 16) newOvl = 16; 1038 1039 // must be divisible by 8 1040 newOvl -= newOvl % 8; 1041 1042 acceptNewOverlapLength(newOvl); 1043 } 1044 1045 1046 /// Calculate cross-correlation 1047 double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm) 1048 { 1049 float corr; 1050 float norm; 1051 int i; 1052 1053 #ifdef ST_SIMD_AVOID_UNALIGNED 1054 // in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary 1055 if (((ulongptr)mixingPos) & 15) return -1e50; 1056 #endif 1057 1058 // hint compiler autovectorization that loop length is divisible by 8 1059 int ilength = (channels * overlapLength) & -8; 1060 1061 corr = norm = 0; 1062 // Same routine for stereo and mono 1063 for (i = 0; i < ilength; i ++) 1064 { 1065 corr += mixingPos[i] * compare[i]; 1066 norm += mixingPos[i] * mixingPos[i]; 1067 } 1068 1069 anorm = norm; 1070 return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); 1071 } 1072 1073 1074 /// Update cross-correlation by accumulating "norm" coefficient by previously calculated value 1075 double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm) 1076 { 1077 float corr; 1078 int i; 1079 1080 corr = 0; 1081 1082 // cancel first normalizer tap from previous round 1083 for (i = 1; i <= channels; i ++) 1084 { 1085 norm -= mixingPos[-i] * mixingPos[-i]; 1086 } 1087 1088 // hint compiler autovectorization that loop length is divisible by 8 1089 int ilength = (channels * overlapLength) & -8; 1090 1091 // Same routine for stereo and mono 1092 for (i = 0; i < ilength; i ++) 1093 { 1094 corr += mixingPos[i] * compare[i]; 1095 } 1096 1097 // update normalizer with last samples of this round 1098 for (int j = 0; j < channels; j ++) 1099 { 1100 i --; 1101 norm += mixingPos[i] * mixingPos[i]; 1102 } 1103 1104 return corr / sqrt((norm < 1e-9 ? 1.0 : norm)); 1105 } 1106 1107 1108 #endif // SOUNDTOUCH_FLOAT_SAMPLES