HRTFPanner.cpp (12760B)
1 /* 2 * Copyright (C) 2010, Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR 17 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 20 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 21 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 22 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #include "HRTFPanner.h" 26 27 #include "AudioBlock.h" 28 #include "FFTConvolver.h" 29 #include "HRTFDatabase.h" 30 #include "HRTFDatabaseLoader.h" 31 32 using namespace mozilla; 33 using dom::ChannelInterpretation; 34 35 namespace WebCore { 36 37 // The value of 2 milliseconds is larger than the largest delay which exists in 38 // any HRTFKernel from the default HRTFDatabase (0.0136 seconds). We ASSERT the 39 // delay values used in process() with this value. 40 const float MaxDelayTimeSeconds = 0.002f; 41 42 const int UninitializedAzimuth = -1; 43 44 HRTFPanner::HRTFPanner(float sampleRate, 45 already_AddRefed<HRTFDatabaseLoader> databaseLoader) 46 : m_databaseLoader(databaseLoader), 47 m_sampleRate(sampleRate), 48 m_crossfadeSelection(CrossfadeSelection1), 49 m_azimuthIndex1(UninitializedAzimuth), 50 m_azimuthIndex2(UninitializedAzimuth) 51 // m_elevation1 and m_elevation2 are initialized in pan() 52 , 53 m_crossfadeX(0), 54 m_crossfadeIncr(0), 55 m_convolverL1(HRTFElevation::fftSizeForSampleRate(sampleRate)), 56 m_convolverR1(m_convolverL1.fftSize()), 57 m_convolverL2(m_convolverL1.fftSize()), 58 m_convolverR2(m_convolverL1.fftSize()), 59 m_delayLine(MaxDelayTimeSeconds * sampleRate) { 60 MOZ_ASSERT(m_databaseLoader); 61 MOZ_COUNT_CTOR(HRTFPanner); 62 } 63 64 HRTFPanner::~HRTFPanner() { MOZ_COUNT_DTOR(HRTFPanner); } 65 66 size_t HRTFPanner::sizeOfIncludingThis( 67 mozilla::MallocSizeOf aMallocSizeOf) const { 68 size_t amount = aMallocSizeOf(this); 69 70 // NB: m_databaseLoader can be shared, so it is not measured here 71 amount += m_convolverL1.sizeOfExcludingThis(aMallocSizeOf); 72 amount += m_convolverR1.sizeOfExcludingThis(aMallocSizeOf); 73 amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf); 74 amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf); 75 amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf); 76 77 return amount; 78 } 79 80 void HRTFPanner::reset() { 81 m_azimuthIndex1 = UninitializedAzimuth; 82 m_azimuthIndex2 = UninitializedAzimuth; 83 // m_elevation1 and m_elevation2 are initialized in pan() 84 m_crossfadeSelection = CrossfadeSelection1; 85 m_crossfadeX = 0.0f; 86 m_crossfadeIncr = 0.0f; 87 m_convolverL1.reset(); 88 m_convolverR1.reset(); 89 m_convolverL2.reset(); 90 m_convolverR2.reset(); 91 m_delayLine.Reset(); 92 } 93 94 int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, 95 double& azimuthBlend) { 96 // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 97 // 360. The azimuth index may then be calculated from this positive value. 98 if (azimuth < 0) azimuth += 360.0; 99 100 int numberOfAzimuths = HRTFDatabase::numberOfAzimuths(); 101 const double angleBetweenAzimuths = 360.0 / numberOfAzimuths; 102 103 // Calculate the azimuth index and the blend (0 -> 1) for interpolation. 104 double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths; 105 int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat); 106 azimuthBlend = 107 desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex); 108 109 // We don't immediately start using this azimuth index, but instead approach 110 // this index from the last index we rendered at. This minimizes the clicks 111 // and graininess for moving sources which occur otherwise. 112 desiredAzimuthIndex = std::max(0, desiredAzimuthIndex); 113 desiredAzimuthIndex = std::min(numberOfAzimuths - 1, desiredAzimuthIndex); 114 return desiredAzimuthIndex; 115 } 116 117 void HRTFPanner::pan(double desiredAzimuth, double elevation, 118 const AudioBlock* inputBus, AudioBlock* outputBus) { 119 #ifdef DEBUG 120 unsigned numInputChannels = inputBus->IsNull() ? 0 : inputBus->ChannelCount(); 121 122 MOZ_ASSERT(numInputChannels <= 2); 123 MOZ_ASSERT(inputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE); 124 #endif 125 126 bool isOutputGood = outputBus && outputBus->ChannelCount() == 2 && 127 outputBus->GetDuration() == WEBAUDIO_BLOCK_SIZE; 128 MOZ_ASSERT(isOutputGood); 129 130 if (!isOutputGood) { 131 if (outputBus) outputBus->SetNull(outputBus->GetDuration()); 132 return; 133 } 134 135 HRTFDatabase* database = m_databaseLoader->database(); 136 if (!database) { // not yet loaded 137 outputBus->SetNull(outputBus->GetDuration()); 138 return; 139 } 140 141 // IRCAM HRTF azimuths values from the loaded database is reversed from the 142 // panner's notion of azimuth. 143 double azimuth = -desiredAzimuth; 144 145 bool isAzimuthGood = azimuth >= -180.0 && azimuth <= 180.0; 146 MOZ_ASSERT(isAzimuthGood); 147 if (!isAzimuthGood) { 148 outputBus->SetNull(outputBus->GetDuration()); 149 return; 150 } 151 152 // Normally, we'll just be dealing with mono sources. 153 // If we have a stereo input, implement stereo panning with left source 154 // processed by left HRTF, and right source by right HRTF. 155 156 // Get destination pointers. 157 float* destinationL = 158 static_cast<float*>(const_cast<void*>(outputBus->mChannelData[0])); 159 float* destinationR = 160 static_cast<float*>(const_cast<void*>(outputBus->mChannelData[1])); 161 162 double azimuthBlend; 163 int desiredAzimuthIndex = 164 calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend); 165 166 // Initially snap azimuth and elevation values to first values encountered. 167 if (m_azimuthIndex1 == UninitializedAzimuth) { 168 m_azimuthIndex1 = desiredAzimuthIndex; 169 m_elevation1 = elevation; 170 } 171 if (m_azimuthIndex2 == UninitializedAzimuth) { 172 m_azimuthIndex2 = desiredAzimuthIndex; 173 m_elevation2 = elevation; 174 } 175 176 // Cross-fade / transition over a period of around 45 milliseconds. 177 // This is an empirical value tuned to be a reasonable trade-off between 178 // smoothness and speed. 179 const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096; 180 181 // Check for azimuth and elevation changes, initiating a cross-fade if needed. 182 if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) { 183 if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) { 184 // Cross-fade from 1 -> 2 185 m_crossfadeIncr = 1 / fadeFrames; 186 m_azimuthIndex2 = desiredAzimuthIndex; 187 m_elevation2 = elevation; 188 } 189 } 190 if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) { 191 if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) { 192 // Cross-fade from 2 -> 1 193 m_crossfadeIncr = -1 / fadeFrames; 194 m_azimuthIndex1 = desiredAzimuthIndex; 195 m_elevation1 = elevation; 196 } 197 } 198 199 // Get the HRTFKernels and interpolated delays. 200 HRTFKernel* kernelL1; 201 HRTFKernel* kernelR1; 202 HRTFKernel* kernelL2; 203 HRTFKernel* kernelR2; 204 double frameDelayL1; 205 double frameDelayR1; 206 double frameDelayL2; 207 double frameDelayR2; 208 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, 209 m_elevation1, kernelL1, kernelR1, 210 frameDelayL1, frameDelayR1); 211 database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, 212 m_elevation2, kernelL2, kernelR2, 213 frameDelayL2, frameDelayR2); 214 215 bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2; 216 MOZ_ASSERT(areKernelsGood); 217 if (!areKernelsGood) { 218 outputBus->SetNull(outputBus->GetDuration()); 219 return; 220 } 221 222 MOZ_ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && 223 frameDelayR1 / sampleRate() < MaxDelayTimeSeconds); 224 MOZ_ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && 225 frameDelayR2 / sampleRate() < MaxDelayTimeSeconds); 226 227 // Crossfade inter-aural delays based on transitions. 228 float frameDelaysL[WEBAUDIO_BLOCK_SIZE]; 229 float frameDelaysR[WEBAUDIO_BLOCK_SIZE]; 230 { 231 float x = m_crossfadeX; 232 float incr = m_crossfadeIncr; 233 for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { 234 frameDelaysL[i] = (1 - x) * frameDelayL1 + x * frameDelayL2; 235 frameDelaysR[i] = (1 - x) * frameDelayR1 + x * frameDelayR2; 236 x += incr; 237 } 238 } 239 240 // First run through delay lines for inter-aural time difference. 241 m_delayLine.Write(*inputBus); 242 // "Speakers" means a mono input is read into both outputs (with possibly 243 // different delays). 244 m_delayLine.ReadChannel(frameDelaysL, outputBus, 0, 245 ChannelInterpretation::Speakers); 246 m_delayLine.ReadChannel(frameDelaysR, outputBus, 1, 247 ChannelInterpretation::Speakers); 248 m_delayLine.NextBlock(); 249 250 bool needsCrossfading = m_crossfadeIncr; 251 252 const float* convolutionDestinationL1; 253 const float* convolutionDestinationR1; 254 const float* convolutionDestinationL2; 255 const float* convolutionDestinationR2; 256 257 // Now do the convolutions. 258 // Note that we avoid doing convolutions on both sets of convolvers if we're 259 // not currently cross-fading. 260 261 if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) { 262 convolutionDestinationL1 = 263 m_convolverL1.process(kernelL1->fftFrame(), destinationL); 264 convolutionDestinationR1 = 265 m_convolverR1.process(kernelR1->fftFrame(), destinationR); 266 } 267 268 if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) { 269 convolutionDestinationL2 = 270 m_convolverL2.process(kernelL2->fftFrame(), destinationL); 271 convolutionDestinationR2 = 272 m_convolverR2.process(kernelR2->fftFrame(), destinationR); 273 } 274 275 if (needsCrossfading) { 276 // Apply linear cross-fade. 277 float x = m_crossfadeX; 278 float incr = m_crossfadeIncr; 279 for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; ++i) { 280 destinationL[i] = (1 - x) * convolutionDestinationL1[i] + 281 x * convolutionDestinationL2[i]; 282 destinationR[i] = (1 - x) * convolutionDestinationR1[i] + 283 x * convolutionDestinationR2[i]; 284 x += incr; 285 } 286 // Update cross-fade value from local. 287 m_crossfadeX = x; 288 289 if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) { 290 // We've fully made the crossfade transition from 1 -> 2. 291 m_crossfadeSelection = CrossfadeSelection2; 292 m_crossfadeX = 1; 293 m_crossfadeIncr = 0; 294 } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) { 295 // We've fully made the crossfade transition from 2 -> 1. 296 m_crossfadeSelection = CrossfadeSelection1; 297 m_crossfadeX = 0; 298 m_crossfadeIncr = 0; 299 } 300 } else { 301 const float* sourceL; 302 const float* sourceR; 303 if (m_crossfadeSelection == CrossfadeSelection1) { 304 sourceL = convolutionDestinationL1; 305 sourceR = convolutionDestinationR1; 306 } else { 307 sourceL = convolutionDestinationL2; 308 sourceR = convolutionDestinationR2; 309 } 310 PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE); 311 PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE); 312 } 313 } 314 315 int HRTFPanner::maxTailFrames() const { 316 // Although the ideal tail time would be the length of the impulse 317 // response, there is additional tail time from the approximations in the 318 // implementation. Because HRTFPanner is implemented with a DelayKernel 319 // and a FFTConvolver, the tailTime of the HRTFPanner is the sum of the 320 // tailTime of the DelayKernel and the tailTime of the FFTConvolver. The 321 // FFTs of the convolver are fftSize(), half of which is latency, but this 322 // is aligned with blocks and so is reduced by the one block which is 323 // processed immediately. 324 return m_delayLine.MaxDelayTicks() + m_convolverL1.fftSize() / 2 + 325 m_convolverL1.latencyFrames(); 326 } 327 328 } // namespace WebCore