ReverbConvolver.cpp (11382B)
1 /* 2 * Copyright (C) 2010 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "ReverbConvolver.h" 30 31 #include "ReverbConvolverStage.h" 32 33 using namespace mozilla; 34 35 namespace WebCore { 36 37 const int InputBufferSize = 8 * 16384; 38 39 // We only process the leading portion of the impulse response in the real-time 40 // thread. We don't exceed this length. It turns out then, that the background 41 // thread has about 278msec of scheduling slop. Empirically, this has been found 42 // to be a good compromise between giving enough time for scheduling slop, while 43 // still minimizing the amount of processing done in the primary (high-priority) 44 // thread. This was found to be a good value on Mac OS X, and may work well on 45 // other platforms as well, assuming the very rough scheduling latencies are 46 // similar on these time-scales. Of course, this code may need to be tuned for 47 // individual platforms if this assumption is found to be incorrect. 48 const size_t RealtimeFrameLimit = 8192 + 4096 // ~278msec @ 44.1KHz 49 - WEBAUDIO_BLOCK_SIZE; 50 // First stage will have size MinFFTSize - successive stages will double in 51 // size each time until we hit the maximum size. 52 const size_t MinFFTSize = 256; 53 // If we are using background threads then don't exceed this FFT size for the 54 // stages which run in the real-time thread. This avoids having only one or 55 // two large stages (size 16384 or so) at the end which take a lot of time 56 // every several processing slices. This way we amortize the cost over more 57 // processing slices. 58 const size_t MaxRealtimeFFTSize = 4096; 59 60 ReverbConvolver::ReverbConvolver(const float* impulseResponseData, 61 size_t impulseResponseLength, 62 size_t maxFFTSize, size_t convolverRenderPhase, 63 bool useBackgroundThreads, 64 bool* aAllocationFailure) 65 : m_impulseResponseLength(impulseResponseLength), 66 m_inputBuffer(InputBufferSize), 67 m_backgroundThread("ConvolverWorker"), 68 m_backgroundThreadMonitor("ConvolverMonitor"), 69 m_useBackgroundThreads(useBackgroundThreads), 70 m_wantsToExit(false), 71 m_moreInputBuffered(false) { 72 *aAllocationFailure = !m_accumulationBuffer.allocate(impulseResponseLength + 73 WEBAUDIO_BLOCK_SIZE); 74 if (*aAllocationFailure) { 75 return; 76 } 77 // For the moment, a good way to know if we have real-time constraint is to 78 // check if we're using background threads. Otherwise, assume we're being run 79 // from a command-line tool. 80 bool hasRealtimeConstraint = useBackgroundThreads; 81 82 const float* response = impulseResponseData; 83 size_t totalResponseLength = impulseResponseLength; 84 85 // The total latency is zero because the first FFT stage is small enough 86 // to return output in the first block. 87 size_t reverbTotalLatency = 0; 88 89 size_t stageOffset = 0; 90 size_t stagePhase = 0; 91 size_t fftSize = MinFFTSize; 92 while (stageOffset < totalResponseLength) { 93 size_t stageSize = fftSize / 2; 94 95 // For the last stage, it's possible that stageOffset is such that we're 96 // straddling the end of the impulse response buffer (if we use stageSize), 97 // so reduce the last stage's length... 98 if (stageSize + stageOffset > totalResponseLength) { 99 stageSize = totalResponseLength - stageOffset; 100 // Use smallest FFT that is large enough to cover the last stage. 101 fftSize = MinFFTSize; 102 while (stageSize * 2 > fftSize) { 103 fftSize *= 2; 104 } 105 } 106 107 // This "staggers" the time when each FFT happens so they don't all happen 108 // at the same time 109 int renderPhase = convolverRenderPhase + stagePhase; 110 111 UniquePtr<ReverbConvolverStage> stage(new ReverbConvolverStage( 112 response, totalResponseLength, reverbTotalLatency, stageOffset, 113 stageSize, fftSize, renderPhase, &m_accumulationBuffer)); 114 115 bool isBackgroundStage = false; 116 117 if (this->useBackgroundThreads() && stageOffset > RealtimeFrameLimit) { 118 m_backgroundStages.AppendElement(std::move(stage)); 119 isBackgroundStage = true; 120 } else 121 m_stages.AppendElement(std::move(stage)); 122 123 // Figure out next FFT size 124 fftSize *= 2; 125 126 stageOffset += stageSize; 127 128 if (hasRealtimeConstraint && !isBackgroundStage && 129 fftSize > MaxRealtimeFFTSize) { 130 fftSize = MaxRealtimeFFTSize; 131 // Custom phase positions for all but the first of the realtime 132 // stages of largest size. These spread out the work of the 133 // larger realtime stages. None of the FFTs of size 1024, 2048 or 134 // 4096 are performed when processing the same block. The first 135 // MaxRealtimeFFTSize = 4096 stage, at the end of the doubling, 136 // performs its FFT at block 7. The FFTs of size 2048 are 137 // performed in blocks 3 + 8 * n and size 1024 at 1 + 4 * n. 138 const uint32_t phaseLookup[] = {14, 0, 10, 4}; 139 stagePhase = WEBAUDIO_BLOCK_SIZE * 140 phaseLookup[m_stages.Length() % std::size(phaseLookup)]; 141 } else if (fftSize > maxFFTSize) { 142 fftSize = maxFFTSize; 143 // A prime offset spreads out FFTs in a way that all 144 // available phase positions will be used if there are sufficient 145 // stages. 146 stagePhase += 5 * WEBAUDIO_BLOCK_SIZE; 147 } else if (stageSize > WEBAUDIO_BLOCK_SIZE) { 148 // As the stages are doubling in size, the next FFT will occur 149 // mid-way between FFTs for this stage. 150 stagePhase = stageSize - WEBAUDIO_BLOCK_SIZE; 151 } 152 } 153 154 // Start up background thread 155 // FIXME: would be better to up the thread priority here. It doesn't need to 156 // be real-time, but higher than the default... 157 if (this->useBackgroundThreads() && m_backgroundStages.Length() > 0) { 158 if (!m_backgroundThread.Start()) { 159 NS_WARNING("Cannot start convolver thread."); 160 return; 161 } 162 m_backgroundThread.message_loop()->PostTask(NewNonOwningRunnableMethod( 163 "WebCore::ReverbConvolver::backgroundThreadEntry", this, 164 &ReverbConvolver::backgroundThreadEntry)); 165 } 166 } 167 168 ReverbConvolver::~ReverbConvolver() { 169 // Wait for background thread to stop 170 if (useBackgroundThreads() && m_backgroundThread.IsRunning()) { 171 m_wantsToExit = true; 172 173 // Wake up thread so it can return 174 { 175 MonitorAutoLock locker(m_backgroundThreadMonitor); 176 m_moreInputBuffered = true; 177 m_backgroundThreadMonitor.Notify(); 178 } 179 180 m_backgroundThread.Stop(); 181 } 182 } 183 184 size_t ReverbConvolver::sizeOfIncludingThis( 185 mozilla::MallocSizeOf aMallocSizeOf) const { 186 size_t amount = aMallocSizeOf(this); 187 amount += m_stages.ShallowSizeOfExcludingThis(aMallocSizeOf); 188 for (size_t i = 0; i < m_stages.Length(); i++) { 189 if (m_stages[i]) { 190 amount += m_stages[i]->sizeOfIncludingThis(aMallocSizeOf); 191 } 192 } 193 194 amount += m_backgroundStages.ShallowSizeOfExcludingThis(aMallocSizeOf); 195 for (size_t i = 0; i < m_backgroundStages.Length(); i++) { 196 if (m_backgroundStages[i]) { 197 amount += m_backgroundStages[i]->sizeOfIncludingThis(aMallocSizeOf); 198 } 199 } 200 201 // NB: The buffer sizes are static, so even though they might be accessed 202 // in another thread it's safe to measure them. 203 amount += m_accumulationBuffer.sizeOfExcludingThis(aMallocSizeOf); 204 amount += m_inputBuffer.sizeOfExcludingThis(aMallocSizeOf); 205 206 // Possible future measurements: 207 // - m_backgroundThread 208 // - m_backgroundThreadMonitor 209 return amount; 210 } 211 212 void ReverbConvolver::backgroundThreadEntry() { 213 while (!m_wantsToExit) { 214 // Wait for realtime thread to give us more input 215 m_moreInputBuffered = false; 216 { 217 MonitorAutoLock locker(m_backgroundThreadMonitor); 218 while (!m_moreInputBuffered && !m_wantsToExit) 219 m_backgroundThreadMonitor.Wait(); 220 } 221 222 // Process all of the stages until their read indices reach the input 223 // buffer's write index 224 int writeIndex = m_inputBuffer.writeIndex(); 225 226 // Even though it doesn't seem like every stage needs to maintain its own 227 // version of readIndex we do this in case we want to run in more than one 228 // background thread. 229 int readIndex; 230 231 while ((readIndex = m_backgroundStages[0]->inputReadIndex()) != 232 writeIndex) { // FIXME: do better to detect buffer overrun... 233 // Accumulate contributions from each stage 234 for (size_t i = 0; i < m_backgroundStages.Length(); ++i) 235 m_backgroundStages[i]->processInBackground(this); 236 } 237 } 238 } 239 240 void ReverbConvolver::process(const float* sourceChannelData, 241 float* destinationChannelData) { 242 const float* source = sourceChannelData; 243 float* destination = destinationChannelData; 244 bool isDataSafe = source && destination; 245 MOZ_ASSERT(isDataSafe); 246 if (!isDataSafe) return; 247 248 // Feed input buffer (read by all threads) 249 m_inputBuffer.write(source, WEBAUDIO_BLOCK_SIZE); 250 251 // Accumulate contributions from each stage 252 for (size_t i = 0; i < m_stages.Length(); ++i) m_stages[i]->process(source); 253 254 // Finally read from accumulation buffer 255 m_accumulationBuffer.readAndClear(destination, WEBAUDIO_BLOCK_SIZE); 256 257 // Now that we've buffered more input, wake up our background thread. 258 259 // Not using a MonitorAutoLock looks strange, but we use a TryLock() instead 260 // because this is run on the real-time thread where it is a disaster for the 261 // lock to be contended (causes audio glitching). It's OK if we fail to 262 // signal from time to time, since we'll get to it the next time we're called. 263 // We're called repeatedly and frequently (around every 3ms). The background 264 // thread is processing well into the future and has a considerable amount of 265 // leeway here... 266 if (m_backgroundThreadMonitor.TryLock()) { 267 m_moreInputBuffered = true; 268 m_backgroundThreadMonitor.Notify(); 269 m_backgroundThreadMonitor.Unlock(); 270 } 271 } 272 273 } // namespace WebCore