[ tor-browser ].git.dasho

HRTFPanner.cpp (12760B)
      1 /*
      2 * Copyright (C) 2010, Google Inc. All rights reserved.
      3 *
      4 * Redistribution and use in source and binary forms, with or without
      5 * modification, are permitted provided that the following conditions
      6 * are met:
      7 * 1.  Redistributions of source code must retain the above copyright
      8 *    notice, this list of conditions and the following disclaimer.
      9 * 2.  Redistributions in binary form must reproduce the above copyright
     10 *    notice, this list of conditions and the following disclaimer in the
     11 *    documentation and/or other materials provided with the distribution.
     12 *
     13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND
     14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR
     17 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     20 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     21 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     23 */
     24 
     25 #include "HRTFPanner.h"
     26 
     27 #include "AudioBlock.h"
     28 #include "FFTConvolver.h"
     29 #include "HRTFDatabase.h"
     30 #include "HRTFDatabaseLoader.h"
     31 
     32 using namespace mozilla;
     33 using dom::ChannelInterpretation;
     34 
     35 namespace WebCore {
     36 
     37 // The value of 2 milliseconds is larger than the largest delay which exists in
     38 // any HRTFKernel from the default HRTFDatabase (0.0136 seconds). We ASSERT the
     39 // delay values used in process() with this value.
     40 const float MaxDelayTimeSeconds = 0.002f;
     41 
     42 const int UninitializedAzimuth = -1;
     43 
     44 HRTFPanner::HRTFPanner(float sampleRate,
     45                       already_AddRefed<HRTFDatabaseLoader> databaseLoader)
     46    : m_databaseLoader(databaseLoader),
     47      m_sampleRate(sampleRate),
     48      m_crossfadeSelection(CrossfadeSelection1),
     49      m_azimuthIndex1(UninitializedAzimuth),
     50      m_azimuthIndex2(UninitializedAzimuth)
     51      // m_elevation1 and m_elevation2 are initialized in pan()
     52      ,
     53      m_crossfadeX(0),
     54      m_crossfadeIncr(0),
     55      m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate)),
     56      m_convolverR1(m_convolverL1.fftSize()),
     57      m_convolverL2(m_convolverL1.fftSize()),
     58      m_convolverR2(m_convolverL1.fftSize()),
     59      m_delayLine(MaxDelayTimeSeconds * sampleRate) {
     60  MOZ_ASSERT(m_databaseLoader);
     61  MOZ_COUNT_CTOR(HRTFPanner);
     62 }
     63 
     64 HRTFPanner::~HRTFPanner() { MOZ_COUNT_DTOR(HRTFPanner); }
     65 
     66 size_t HRTFPanner::sizeOfIncludingThis(
     67    mozilla::MallocSizeOf aMallocSizeOf) const {
     68  size_t amount = aMallocSizeOf(this);
     69 
     70  // NB: m_databaseLoader can be shared, so it is not measured here
     71  amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf);
     72  amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf);
     73  amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf);
     74  amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf);
     75  amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf);
     76 
     77  return amount;
     78 }
     79 
     80 void HRTFPanner::reset() {
     81  m_azimuthIndex1 = UninitializedAzimuth;
     82  m_azimuthIndex2 = UninitializedAzimuth;
     83  // m_elevation1 and m_elevation2 are initialized in pan()
     84  m_crossfadeSelection = CrossfadeSelection1;
     85  m_crossfadeX = 0.0f;
     86  m_crossfadeIncr = 0.0f;
     87  m_convolverL1.reset();
     88  m_convolverR1.reset();
     89  m_convolverL2.reset();
     90  m_convolverR2.reset();
     91  m_delayLine.Reset();
     92 }
     93 
     94 int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth,
     95                                                     double& azimuthBlend) {
     96  // Convert the azimuth angle from the range -180 -> +180 into the range 0 ->
     97  // 360. The azimuth index may then be calculated from this positive value.
     98  if (azimuth < 0) azimuth += 360.0;
     99 
    100  int numberOfAzimuths = HRTFDatabase::numberOfAzimuths();
    101  const double angleBetweenAzimuths = 360.0 / numberOfAzimuths;
    102 
    103  // Calculate the azimuth index and the blend (0 -> 1) for interpolation.
    104  double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths;
    105  int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat);
    106  azimuthBlend =
    107      desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex);
    108 
    109  // We don't immediately start using this azimuth index, but instead approach
    110  // this index from the last index we rendered at. This minimizes the clicks
    111  // and graininess for moving sources which occur otherwise.
    112  desiredAzimuthIndex = std::max(0, desiredAzimuthIndex);
    113  desiredAzimuthIndex = std::min(numberOfAzimuths - 1, desiredAzimuthIndex);
    114  return desiredAzimuthIndex;
    115 }
    116 
    117 void HRTFPanner::pan(double desiredAzimuth, double elevation,
    118                     const AudioBlock* inputBus, AudioBlock* outputBus) {
    119 #ifdef DEBUG
    120  unsigned numInputChannels = inputBus->IsNull() ? 0 : inputBus->ChannelCount();
    121 
    122  MOZ_ASSERT(numInputChannels <= 2);
    123  MOZ_ASSERT(inputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE);
    124 #endif
    125 
    126  bool isOutputGood = outputBus && outputBus->ChannelCount() == 2 &&
    127                      outputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE;
    128  MOZ_ASSERT(isOutputGood);
    129 
    130  if (!isOutputGood) {
    131    if (outputBus) outputBus->SetNull(outputBus->GetDuration());
    132    return;
    133  }
    134 
    135  HRTFDatabase* database = m_databaseLoader->database();
    136  if (!database) {  // not yet loaded
    137    outputBus->SetNull(outputBus->GetDuration());
    138    return;
    139  }
    140 
    141  // IRCAM HRTF azimuths values from the loaded database is reversed from the
    142  // panner's notion of azimuth.
    143  double azimuth = -desiredAzimuth;
    144 
    145  bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0;
    146  MOZ_ASSERT(isAzimuthGood);
    147  if (!isAzimuthGood) {
    148    outputBus->SetNull(outputBus->GetDuration());
    149    return;
    150  }
    151 
    152  // Normally, we'll just be dealing with mono sources.
    153  // If we have a stereo input, implement stereo panning with left source
    154  // processed by left HRTF, and right source by right HRTF.
    155 
    156  // Get destination pointers.
    157  float* destinationL =
    158      static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0]));
    159  float* destinationR =
    160      static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1]));
    161 
    162  double azimuthBlend;
    163  int desiredAzimuthIndex =
    164      calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);
    165 
    166  // Initially snap azimuth and elevation values to first values encountered.
    167  if (m_azimuthIndex1 == UninitializedAzimuth) {
    168    m_azimuthIndex1 = desiredAzimuthIndex;
    169    m_elevation1 = elevation;
    170  }
    171  if (m_azimuthIndex2 == UninitializedAzimuth) {
    172    m_azimuthIndex2 = desiredAzimuthIndex;
    173    m_elevation2 = elevation;
    174  }
    175 
    176  // Cross-fade / transition over a period of around 45 milliseconds.
    177  // This is an empirical value tuned to be a reasonable trade-off between
    178  // smoothness and speed.
    179  const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;
    180 
    181  // Check for azimuth and elevation changes, initiating a cross-fade if needed.
    182  if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
    183    if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
    184      // Cross-fade from 1 -> 2
    185      m_crossfadeIncr = 1 / fadeFrames;
    186      m_azimuthIndex2 = desiredAzimuthIndex;
    187      m_elevation2 = elevation;
    188    }
    189  }
    190  if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
    191    if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
    192      // Cross-fade from 2 -> 1
    193      m_crossfadeIncr = -1 / fadeFrames;
    194      m_azimuthIndex1 = desiredAzimuthIndex;
    195      m_elevation1 = elevation;
    196    }
    197  }
    198 
    199  // Get the HRTFKernels and interpolated delays.
    200  HRTFKernel* kernelL1;
    201  HRTFKernel* kernelR1;
    202  HRTFKernel* kernelL2;
    203  HRTFKernel* kernelR2;
    204  double frameDelayL1;
    205  double frameDelayR1;
    206  double frameDelayL2;
    207  double frameDelayR2;
    208  database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1,
    209                                           m_elevation1, kernelL1, kernelR1,
    210                                           frameDelayL1, frameDelayR1);
    211  database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2,
    212                                           m_elevation2, kernelL2, kernelR2,
    213                                           frameDelayL2, frameDelayR2);
    214 
    215  bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
    216  MOZ_ASSERT(areKernelsGood);
    217  if (!areKernelsGood) {
    218    outputBus->SetNull(outputBus->GetDuration());
    219    return;
    220  }
    221 
    222  MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds &&
    223             frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
    224  MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds &&
    225             frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
    226 
    227  // Crossfade inter-aural delays based on transitions.
    228  float frameDelaysL[WEBAUDIO_BLOCK_SIZE];
    229  float frameDelaysR[WEBAUDIO_BLOCK_SIZE];
    230  {
    231    float x = m_crossfadeX;
    232    float incr = m_crossfadeIncr;
    233    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
    234      frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2;
    235      frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2;
    236      x += incr;
    237    }
    238  }
    239 
    240  // First run through delay lines for inter-aural time difference.
    241  m_delayLine.Write(*inputBus);
    242  // "Speakers" means a mono input is read into both outputs (with possibly
    243  // different delays).
    244  m_delayLine.ReadChannel(frameDelaysL, outputBus, 0,
    245                          ChannelInterpretation::Speakers);
    246  m_delayLine.ReadChannel(frameDelaysR, outputBus, 1,
    247                          ChannelInterpretation::Speakers);
    248  m_delayLine.NextBlock();
    249 
    250  bool needsCrossfading = m_crossfadeIncr;
    251 
    252  const float* convolutionDestinationL1;
    253  const float* convolutionDestinationR1;
    254  const float* convolutionDestinationL2;
    255  const float* convolutionDestinationR2;
    256 
    257  // Now do the convolutions.
    258  // Note that we avoid doing convolutions on both sets of convolvers if we're
    259  // not currently cross-fading.
    260 
    261  if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
    262    convolutionDestinationL1 =
    263        m_convolverL1.process(kernelL1->fftFrame(), destinationL);
    264    convolutionDestinationR1 =
    265        m_convolverR1.process(kernelR1->fftFrame(), destinationR);
    266  }
    267 
    268  if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
    269    convolutionDestinationL2 =
    270        m_convolverL2.process(kernelL2->fftFrame(), destinationL);
    271    convolutionDestinationR2 =
    272        m_convolverR2.process(kernelR2->fftFrame(), destinationR);
    273  }
    274 
    275  if (needsCrossfading) {
    276    // Apply linear cross-fade.
    277    float x = m_crossfadeX;
    278    float incr = m_crossfadeIncr;
    279    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) {
    280      destinationL[i] = (1 - x) * convolutionDestinationL1[i] +
    281                        x * convolutionDestinationL2[i];
    282      destinationR[i] = (1 - x) * convolutionDestinationR1[i] +
    283                        x * convolutionDestinationR2[i];
    284      x += incr;
    285    }
    286    // Update cross-fade value from local.
    287    m_crossfadeX = x;
    288 
    289    if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
    290      // We've fully made the crossfade transition from 1 -> 2.
    291      m_crossfadeSelection = CrossfadeSelection2;
    292      m_crossfadeX = 1;
    293      m_crossfadeIncr = 0;
    294    } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
    295      // We've fully made the crossfade transition from 2 -> 1.
    296      m_crossfadeSelection = CrossfadeSelection1;
    297      m_crossfadeX = 0;
    298      m_crossfadeIncr = 0;
    299    }
    300  } else {
    301    const float* sourceL;
    302    const float* sourceR;
    303    if (m_crossfadeSelection == CrossfadeSelection1) {
    304      sourceL = convolutionDestinationL1;
    305      sourceR = convolutionDestinationR1;
    306    } else {
    307      sourceL = convolutionDestinationL2;
    308      sourceR = convolutionDestinationR2;
    309    }
    310    PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE);
    311    PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE);
    312  }
    313 }
    314 
    315 int HRTFPanner::maxTailFrames() const {
    316  // Although the ideal tail time would be the length of the impulse
    317  // response, there is additional tail time from the approximations in the
    318  // implementation.  Because HRTFPanner is implemented with a DelayKernel
    319  // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the
    320  // tailTime of the DelayKernel and the tailTime of the FFTConvolver.  The
    321  // FFTs of the convolver are fftSize(), half of which is latency, but this
    322  // is aligned with blocks and so is reduced by the one block which is
    323  // processed immediately.
    324  return m_delayLine.MaxDelayTicks() + m_convolverL1.fftSize() / 2 +
    325         m_convolverL1.latencyFrames();
    326 }
    327 
    328 }  // namespace WebCore
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE