AudioConverter.cpp (16818B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "AudioConverter.h" 8 9 #include <speex/speex_resampler.h> 10 #include <string.h> 11 12 #include <cmath> 13 14 /* 15 * Parts derived from MythTV AudioConvert Class 16 * Created by Jean-Yves Avenard. 17 * 18 * Copyright (C) Bubblestuff Pty Ltd 2013 19 * Copyright (C) foobum@gmail.com 2010 20 */ 21 22 namespace mozilla { 23 24 AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut) 25 : mIn(aIn), mOut(aOut), mResampler(nullptr) { 26 MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut), 27 "The conversion is not supported"); 28 mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap); 29 if (aIn.Rate() != aOut.Rate()) { 30 RecreateResampler(); 31 } 32 } 33 34 AudioConverter::~AudioConverter() { 35 if (mResampler) { 36 speex_resampler_destroy(mResampler); 37 mResampler = nullptr; 38 } 39 } 40 41 bool AudioConverter::CanConvert(const AudioConfig& aIn, 42 const AudioConfig& aOut) { 43 if (aIn.Format() != aOut.Format() || 44 aIn.Interleaved() != aOut.Interleaved()) { 45 NS_WARNING("No format conversion is supported at this stage"); 46 return false; 47 } 48 if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) { 49 NS_WARNING( 50 "Only down/upmixing to mono or stereo is supported at this stage"); 51 return false; 52 } 53 if (!aOut.Interleaved()) { 54 NS_WARNING("planar audio format not supported"); 55 return false; 56 } 57 return true; 58 } 59 60 bool AudioConverter::CanWorkInPlace() const { 61 bool needDownmix = mIn.Channels() > mOut.Channels(); 62 bool needUpmix = mIn.Channels() < mOut.Channels(); 63 bool canDownmixInPlace = 64 mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >= 65 mOut.Channels() * AudioConfig::SampleSize(mOut.Format()); 66 bool needResample = mIn.Rate() != mOut.Rate(); 67 bool canResampleInPlace = mIn.Rate() >= mOut.Rate(); 68 // We should be able to work in place if 1s of audio input takes less space 69 // than 1s of audio output. However, as we downmix before resampling we can't 70 // perform any upsampling in place (e.g. if incoming rate >= outgoing rate) 71 return !needUpmix && (!needDownmix || canDownmixInPlace) && 72 (!needResample || canResampleInPlace); 73 } 74 75 size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn, 76 size_t aFrames) { 77 if (!aFrames) { 78 return 0; 79 } 80 81 if (mIn.Channels() > mOut.Channels()) { 82 return DownmixAudio(aOut, aIn, aFrames); 83 } 84 85 if (mIn.Channels() < mOut.Channels()) { 86 return UpmixAudio(aOut, aIn, aFrames); 87 } 88 89 if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) { 90 ReOrderInterleavedChannels(aOut, aIn, aFrames); 91 } else if (aIn != aOut) { 92 memmove(aOut, aIn, FramesOutToBytes(aFrames)); 93 } 94 return aFrames; 95 } 96 97 // Reorder interleaved channels. 98 // Can work in place (e.g aOut == aIn). 99 template <class AudioDataType> 100 void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn, 101 uint32_t aFrames, uint32_t aChannels, 102 const uint8_t* aChannelOrderMap) { 103 MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS); 104 AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS]; 105 for (uint32_t i = 0; i < aFrames; i++) { 106 for (uint32_t j = 0; j < aChannels; j++) { 107 val[j] = aIn[aChannelOrderMap[j]]; 108 } 109 for (uint32_t j = 0; j < aChannels; j++) { 110 aOut[j] = val[j]; 111 } 112 aOut += aChannels; 113 aIn += aChannels; 114 } 115 } 116 117 void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn, 118 size_t aFrames) const { 119 MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels()); 120 MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio()); 121 122 if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 || 123 mOut.Layout() == mIn.Layout()) { 124 // If channel count is 1, planar and non-planar formats are the same or 125 // there's nothing to reorder, or if we don't know how to re-order. 126 if (aOut != aIn) { 127 memmove(aOut, aIn, FramesOutToBytes(aFrames)); 128 } 129 return; 130 } 131 132 uint32_t bits = AudioConfig::FormatToBits(mOut.Format()); 133 switch (bits) { 134 case 8: 135 _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames, 136 mIn.Channels(), mChannelOrderMap.Elements()); 137 break; 138 case 16: 139 _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames, 140 mIn.Channels(), mChannelOrderMap.Elements()); 141 break; 142 default: 143 MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4); 144 _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames, 145 mIn.Channels(), mChannelOrderMap.Elements()); 146 break; 147 } 148 } 149 150 static inline int16_t clipTo15(int32_t aX) { 151 return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767; 152 } 153 154 template <typename TYPE> 155 static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn, 156 int32_t aInChannels, int32_t aFrames) { 157 if (aIn == aOut) { 158 return; 159 } 160 int32_t commonChannels = std::min(aInChannels, aOutChannels); 161 162 for (int32_t i = 0; i < aFrames; i++) { 163 for (int32_t j = 0; j < commonChannels; j++) { 164 aOut[i * aOutChannels + j] = aIn[i * aInChannels + j]; 165 } 166 if (aOutChannels > aInChannels) { 167 for (int32_t j = 0; j < aInChannels - aOutChannels; j++) { 168 aOut[i * aOutChannels + j] = 0; 169 } 170 } 171 } 172 } 173 174 size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn, 175 size_t aFrames) const { 176 MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || 177 mIn.Format() == AudioConfig::FORMAT_FLT); 178 MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels()); 179 MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) || 180 mOut.Layout() == AudioConfig::ChannelLayout(1)); 181 182 uint32_t inChannels = mIn.Channels(); 183 uint32_t outChannels = mOut.Channels(); 184 185 if (inChannels == outChannels) { 186 if (aOut != aIn) { 187 memmove(aOut, aIn, FramesOutToBytes(aFrames)); 188 } 189 return aFrames; 190 } 191 192 if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) { 193 // Dumb copy dropping extra channels. 194 if (mIn.Format() == AudioConfig::FORMAT_FLT) { 195 dumbUpDownMix(static_cast<float*>(aOut), outChannels, 196 static_cast<const float*>(aIn), inChannels, aFrames); 197 } else if (mIn.Format() == AudioConfig::FORMAT_S16) { 198 dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels, 199 static_cast<const int16_t*>(aIn), inChannels, aFrames); 200 } else { 201 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 202 } 203 return aFrames; 204 } 205 206 MOZ_ASSERT( 207 mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()), 208 "Can only downmix input data in SMPTE layout"); 209 if (inChannels > 2) { 210 if (mIn.Format() == AudioConfig::FORMAT_FLT) { 211 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 212 // 5-8. 213 static const float dmatrix[6][8][2] = { 214 /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}}, 215 /*4*/ 216 {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}}, 217 /*5*/ 218 {{0.6510f, 0}, 219 {0, 0.6510f}, 220 {0.4600f, 0.4600f}, 221 {0.5636f, 0.3254f}, 222 {0.3254f, 0.5636f}}, 223 /*6*/ 224 {{0.5290f, 0}, 225 {0, 0.5290f}, 226 {0.3741f, 0.3741f}, 227 {0.3741f, 0.3741f}, 228 {0.4582f, 0.2645f}, 229 {0.2645f, 0.4582f}}, 230 /*7*/ 231 {{0.4553f, 0}, 232 {0, 0.4553f}, 233 {0.3220f, 0.3220f}, 234 {0.3220f, 0.3220f}, 235 {0.2788f, 0.2788f}, 236 {0.3943f, 0.2277f}, 237 {0.2277f, 0.3943f}}, 238 /*8*/ 239 {{0.3886f, 0}, 240 {0, 0.3886f}, 241 {0.2748f, 0.2748f}, 242 {0.2748f, 0.2748f}, 243 {0.3366f, 0.1943f}, 244 {0.1943f, 0.3366f}, 245 {0.3366f, 0.1943f}, 246 {0.1943f, 0.3366f}}, 247 }; 248 // Re-write the buffer with downmixed data 249 const float* in = static_cast<const float*>(aIn); 250 float* out = static_cast<float*>(aOut); 251 for (uint32_t i = 0; i < aFrames; i++) { 252 float sampL = 0.0; 253 float sampR = 0.0; 254 for (uint32_t j = 0; j < inChannels; j++) { 255 sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0]; 256 sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1]; 257 } 258 if (outChannels == 2) { 259 *out++ = sampL; 260 *out++ = sampR; 261 } else { 262 *out++ = (sampL + sampR) * 0.5; 263 } 264 } 265 } else if (mIn.Format() == AudioConfig::FORMAT_S16) { 266 // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows 267 // 5-8. Coefficients in Q14. 268 static const int16_t dmatrix[6][8][2] = { 269 /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}}, 270 /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}}, 271 /*5*/ 272 {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}}, 273 /*6*/ 274 {{8668, 0}, 275 {0, 8668}, 276 {6129, 6129}, 277 {6129, 6129}, 278 {7507, 4335}, 279 {4335, 7507}}, 280 /*7*/ 281 {{7459, 0}, 282 {0, 7459}, 283 {5275, 5275}, 284 {5275, 5275}, 285 {4568, 4568}, 286 {6460, 3731}, 287 {3731, 6460}}, 288 /*8*/ 289 {{6368, 0}, 290 {0, 6368}, 291 {4502, 4502}, 292 {4502, 4502}, 293 {5514, 3184}, 294 {3184, 5514}, 295 {5514, 3184}, 296 {3184, 5514}}}; 297 // Re-write the buffer with downmixed data 298 const int16_t* in = static_cast<const int16_t*>(aIn); 299 int16_t* out = static_cast<int16_t*>(aOut); 300 for (uint32_t i = 0; i < aFrames; i++) { 301 int32_t sampL = 0; 302 int32_t sampR = 0; 303 for (uint32_t j = 0; j < inChannels; j++) { 304 sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0]; 305 sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1]; 306 } 307 sampL = clipTo15((sampL + 8192) >> 14); 308 sampR = clipTo15((sampR + 8192) >> 14); 309 if (outChannels == 2) { 310 *out++ = sampL; 311 *out++ = sampR; 312 } else { 313 *out++ = (sampL + sampR) * 0.5; 314 } 315 } 316 } else { 317 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 318 } 319 return aFrames; 320 } 321 322 MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1); 323 if (mIn.Format() == AudioConfig::FORMAT_FLT) { 324 const float* in = static_cast<const float*>(aIn); 325 float* out = static_cast<float*>(aOut); 326 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { 327 float sample = 0.0; 328 // The sample of the buffer would be interleaved. 329 sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5; 330 *out++ = sample; 331 } 332 } else if (mIn.Format() == AudioConfig::FORMAT_S16) { 333 const int16_t* in = static_cast<const int16_t*>(aIn); 334 int16_t* out = static_cast<int16_t*>(aOut); 335 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { 336 int32_t sample = 0.0; 337 // The sample of the buffer would be interleaved. 338 sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5; 339 *out++ = sample; 340 } 341 } else { 342 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 343 } 344 return aFrames; 345 } 346 347 size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn, 348 size_t aFrames) { 349 if (!mResampler) { 350 return 0; 351 } 352 uint32_t outframes = ResampleRecipientFrames(aFrames); 353 uint32_t inframes = aFrames; 354 355 int error; 356 if (mOut.Format() == AudioConfig::FORMAT_FLT) { 357 const float* in = reinterpret_cast<const float*>(aIn); 358 float* out = reinterpret_cast<float*>(aOut); 359 error = speex_resampler_process_interleaved_float(mResampler, in, &inframes, 360 out, &outframes); 361 } else if (mOut.Format() == AudioConfig::FORMAT_S16) { 362 const int16_t* in = reinterpret_cast<const int16_t*>(aIn); 363 int16_t* out = reinterpret_cast<int16_t*>(aOut); 364 error = speex_resampler_process_interleaved_int(mResampler, in, &inframes, 365 out, &outframes); 366 } else { 367 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 368 error = RESAMPLER_ERR_ALLOC_FAILED; 369 } 370 MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS); 371 if (error != RESAMPLER_ERR_SUCCESS) { 372 speex_resampler_destroy(mResampler); 373 mResampler = nullptr; 374 return 0; 375 } 376 MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped"); 377 return outframes; 378 } 379 380 void AudioConverter::RecreateResampler() { 381 if (mResampler) { 382 speex_resampler_destroy(mResampler); 383 } 384 int error; 385 mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(), 386 SPEEX_RESAMPLER_QUALITY_DEFAULT, &error); 387 388 if (error == RESAMPLER_ERR_SUCCESS) { 389 speex_resampler_skip_zeros(mResampler); 390 } else { 391 NS_WARNING("Failed to initialize resampler."); 392 mResampler = nullptr; 393 } 394 } 395 396 size_t AudioConverter::DrainResampler(void* aOut) { 397 if (!mResampler) { 398 return 0; 399 } 400 int frames = speex_resampler_get_input_latency(mResampler); 401 AlignedByteBuffer buffer(FramesOutToBytes(frames)); 402 if (!buffer) { 403 // OOM 404 return 0; 405 } 406 frames = ResampleAudio(aOut, buffer.Data(), frames); 407 // Tore down the resampler as it's easier than handling follow-up. 408 RecreateResampler(); 409 return frames; 410 } 411 412 size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn, 413 size_t aFrames) const { 414 MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || 415 mIn.Format() == AudioConfig::FORMAT_FLT); 416 MOZ_ASSERT(mIn.Channels() < mOut.Channels()); 417 MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now"); 418 MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now"); 419 420 if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() || 421 mOut.Channels() != 2) { 422 // Dumb copy the channels and insert silence for the extra channels. 423 if (mIn.Format() == AudioConfig::FORMAT_FLT) { 424 dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(), 425 static_cast<const float*>(aIn), mIn.Channels(), aFrames); 426 } else if (mIn.Format() == AudioConfig::FORMAT_S16) { 427 dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(), 428 static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames); 429 } else { 430 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 431 } 432 return aFrames; 433 } 434 435 // Upmix mono to stereo. 436 // This is a very dumb mono to stereo upmixing, power levels are preserved 437 // following the calculation: left = right = -3dB*mono. 438 if (mIn.Format() == AudioConfig::FORMAT_FLT) { 439 const float m3db = std::sqrt(0.5); // -3dB = sqrt(1/2) 440 const float* in = static_cast<const float*>(aIn); 441 float* out = static_cast<float*>(aOut); 442 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { 443 float sample = in[fIdx] * m3db; 444 // The samples of the buffer would be interleaved. 445 *out++ = sample; 446 *out++ = sample; 447 } 448 } else if (mIn.Format() == AudioConfig::FORMAT_S16) { 449 const int16_t* in = static_cast<const int16_t*>(aIn); 450 int16_t* out = static_cast<int16_t*>(aOut); 451 for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { 452 int16_t sample = 453 ((int32_t)in[fIdx] * 11585) >> 14; // close enough to i*sqrt(0.5) 454 // The samples of the buffer would be interleaved. 455 *out++ = sample; 456 *out++ = sample; 457 } 458 } else { 459 MOZ_DIAGNOSTIC_CRASH("Unsupported data type"); 460 } 461 462 return aFrames; 463 } 464 465 size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const { 466 if (!aFrames && mIn.Rate() != mOut.Rate()) { 467 if (!mResampler) { 468 return 0; 469 } 470 // We drain by pushing in get_input_latency() samples of 0 471 aFrames = speex_resampler_get_input_latency(mResampler); 472 } 473 return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1; 474 } 475 476 size_t AudioConverter::FramesOutToSamples(size_t aFrames) const { 477 return aFrames * mOut.Channels(); 478 } 479 480 size_t AudioConverter::SamplesInToFrames(size_t aSamples) const { 481 return aSamples / mIn.Channels(); 482 } 483 484 size_t AudioConverter::FramesOutToBytes(size_t aFrames) const { 485 return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format()); 486 } 487 } // namespace mozilla