tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

AudioConverter.cpp (16818B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "AudioConverter.h"
      8 
      9 #include <speex/speex_resampler.h>
     10 #include <string.h>
     11 
     12 #include <cmath>
     13 
     14 /*
     15 *  Parts derived from MythTV AudioConvert Class
     16 *  Created by Jean-Yves Avenard.
     17 *
     18 *  Copyright (C) Bubblestuff Pty Ltd 2013
     19 *  Copyright (C) foobum@gmail.com 2010
     20 */
     21 
     22 namespace mozilla {
     23 
     24 AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
     25    : mIn(aIn), mOut(aOut), mResampler(nullptr) {
     26  MOZ_DIAGNOSTIC_ASSERT(CanConvert(aIn, aOut),
     27                        "The conversion is not supported");
     28  mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap);
     29  if (aIn.Rate() != aOut.Rate()) {
     30    RecreateResampler();
     31  }
     32 }
     33 
     34 AudioConverter::~AudioConverter() {
     35  if (mResampler) {
     36    speex_resampler_destroy(mResampler);
     37    mResampler = nullptr;
     38  }
     39 }
     40 
     41 bool AudioConverter::CanConvert(const AudioConfig& aIn,
     42                                const AudioConfig& aOut) {
     43  if (aIn.Format() != aOut.Format() ||
     44      aIn.Interleaved() != aOut.Interleaved()) {
     45    NS_WARNING("No format conversion is supported at this stage");
     46    return false;
     47  }
     48  if (aIn.Channels() != aOut.Channels() && aOut.Channels() > 2) {
     49    NS_WARNING(
     50        "Only down/upmixing to mono or stereo is supported at this stage");
     51    return false;
     52  }
     53  if (!aOut.Interleaved()) {
     54    NS_WARNING("planar audio format not supported");
     55    return false;
     56  }
     57  return true;
     58 }
     59 
     60 bool AudioConverter::CanWorkInPlace() const {
     61  bool needDownmix = mIn.Channels() > mOut.Channels();
     62  bool needUpmix = mIn.Channels() < mOut.Channels();
     63  bool canDownmixInPlace =
     64      mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >=
     65      mOut.Channels() * AudioConfig::SampleSize(mOut.Format());
     66  bool needResample = mIn.Rate() != mOut.Rate();
     67  bool canResampleInPlace = mIn.Rate() >= mOut.Rate();
     68  // We should be able to work in place if 1s of audio input takes less space
     69  // than 1s of audio output. However, as we downmix before resampling we can't
     70  // perform any upsampling in place (e.g. if incoming rate >= outgoing rate)
     71  return !needUpmix && (!needDownmix || canDownmixInPlace) &&
     72         (!needResample || canResampleInPlace);
     73 }
     74 
     75 size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn,
     76                                       size_t aFrames) {
     77  if (!aFrames) {
     78    return 0;
     79  }
     80 
     81  if (mIn.Channels() > mOut.Channels()) {
     82    return DownmixAudio(aOut, aIn, aFrames);
     83  }
     84 
     85  if (mIn.Channels() < mOut.Channels()) {
     86    return UpmixAudio(aOut, aIn, aFrames);
     87  }
     88 
     89  if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) {
     90    ReOrderInterleavedChannels(aOut, aIn, aFrames);
     91  } else if (aIn != aOut) {
     92    memmove(aOut, aIn, FramesOutToBytes(aFrames));
     93  }
     94  return aFrames;
     95 }
     96 
     97 // Reorder interleaved channels.
     98 // Can work in place (e.g aOut == aIn).
     99 template <class AudioDataType>
    100 void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn,
    101                                 uint32_t aFrames, uint32_t aChannels,
    102                                 const uint8_t* aChannelOrderMap) {
    103  MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS);
    104  AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS];
    105  for (uint32_t i = 0; i < aFrames; i++) {
    106    for (uint32_t j = 0; j < aChannels; j++) {
    107      val[j] = aIn[aChannelOrderMap[j]];
    108    }
    109    for (uint32_t j = 0; j < aChannels; j++) {
    110      aOut[j] = val[j];
    111    }
    112    aOut += aChannels;
    113    aIn += aChannels;
    114  }
    115 }
    116 
    117 void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn,
    118                                                size_t aFrames) const {
    119  MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels());
    120  MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio());
    121 
    122  if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 ||
    123      mOut.Layout() == mIn.Layout()) {
    124    // If channel count is 1, planar and non-planar formats are the same or
    125    // there's nothing to reorder, or if we don't know how to re-order.
    126    if (aOut != aIn) {
    127      memmove(aOut, aIn, FramesOutToBytes(aFrames));
    128    }
    129    return;
    130  }
    131 
    132  uint32_t bits = AudioConfig::FormatToBits(mOut.Format());
    133  switch (bits) {
    134    case 8:
    135      _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames,
    136                                  mIn.Channels(), mChannelOrderMap.Elements());
    137      break;
    138    case 16:
    139      _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames,
    140                                  mIn.Channels(), mChannelOrderMap.Elements());
    141      break;
    142    default:
    143      MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4);
    144      _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames,
    145                                  mIn.Channels(), mChannelOrderMap.Elements());
    146      break;
    147  }
    148 }
    149 
    150 static inline int16_t clipTo15(int32_t aX) {
    151  return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767;
    152 }
    153 
    154 template <typename TYPE>
    155 static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn,
    156                          int32_t aInChannels, int32_t aFrames) {
    157  if (aIn == aOut) {
    158    return;
    159  }
    160  int32_t commonChannels = std::min(aInChannels, aOutChannels);
    161 
    162  for (int32_t i = 0; i < aFrames; i++) {
    163    for (int32_t j = 0; j < commonChannels; j++) {
    164      aOut[i * aOutChannels + j] = aIn[i * aInChannels + j];
    165    }
    166    if (aOutChannels > aInChannels) {
    167      for (int32_t j = 0; j < aInChannels - aOutChannels; j++) {
    168        aOut[i * aOutChannels + j] = 0;
    169      }
    170    }
    171  }
    172 }
    173 
    174 size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn,
    175                                    size_t aFrames) const {
    176  MOZ_DIAGNOSTIC_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
    177                        mIn.Format() == AudioConfig::FORMAT_FLT);
    178  MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() >= mOut.Channels());
    179  MOZ_DIAGNOSTIC_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) ||
    180                        mOut.Layout() == AudioConfig::ChannelLayout(1));
    181 
    182  uint32_t inChannels = mIn.Channels();
    183  uint32_t outChannels = mOut.Channels();
    184 
    185  if (inChannels == outChannels) {
    186    if (aOut != aIn) {
    187      memmove(aOut, aIn, FramesOutToBytes(aFrames));
    188    }
    189    return aFrames;
    190  }
    191 
    192  if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) {
    193    // Dumb copy dropping extra channels.
    194    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
    195      dumbUpDownMix(static_cast<float*>(aOut), outChannels,
    196                    static_cast<const float*>(aIn), inChannels, aFrames);
    197    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
    198      dumbUpDownMix(static_cast<int16_t*>(aOut), outChannels,
    199                    static_cast<const int16_t*>(aIn), inChannels, aFrames);
    200    } else {
    201      MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    202    }
    203    return aFrames;
    204  }
    205 
    206  MOZ_ASSERT(
    207      mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()),
    208      "Can only downmix input data in SMPTE layout");
    209  if (inChannels > 2) {
    210    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
    211      // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
    212      // 5-8.
    213      static const float dmatrix[6][8][2] = {
    214          /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}},
    215          /*4*/
    216          {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}},
    217          /*5*/
    218          {{0.6510f, 0},
    219           {0, 0.6510f},
    220           {0.4600f, 0.4600f},
    221           {0.5636f, 0.3254f},
    222           {0.3254f, 0.5636f}},
    223          /*6*/
    224          {{0.5290f, 0},
    225           {0, 0.5290f},
    226           {0.3741f, 0.3741f},
    227           {0.3741f, 0.3741f},
    228           {0.4582f, 0.2645f},
    229           {0.2645f, 0.4582f}},
    230          /*7*/
    231          {{0.4553f, 0},
    232           {0, 0.4553f},
    233           {0.3220f, 0.3220f},
    234           {0.3220f, 0.3220f},
    235           {0.2788f, 0.2788f},
    236           {0.3943f, 0.2277f},
    237           {0.2277f, 0.3943f}},
    238          /*8*/
    239          {{0.3886f, 0},
    240           {0, 0.3886f},
    241           {0.2748f, 0.2748f},
    242           {0.2748f, 0.2748f},
    243           {0.3366f, 0.1943f},
    244           {0.1943f, 0.3366f},
    245           {0.3366f, 0.1943f},
    246           {0.1943f, 0.3366f}},
    247      };
    248      // Re-write the buffer with downmixed data
    249      const float* in = static_cast<const float*>(aIn);
    250      float* out = static_cast<float*>(aOut);
    251      for (uint32_t i = 0; i < aFrames; i++) {
    252        float sampL = 0.0;
    253        float sampR = 0.0;
    254        for (uint32_t j = 0; j < inChannels; j++) {
    255          sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
    256          sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
    257        }
    258        if (outChannels == 2) {
    259          *out++ = sampL;
    260          *out++ = sampR;
    261        } else {
    262          *out++ = (sampL + sampR) * 0.5;
    263        }
    264      }
    265    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
    266      // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows
    267      // 5-8. Coefficients in Q14.
    268      static const int16_t dmatrix[6][8][2] = {
    269          /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}},
    270          /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}},
    271          /*5*/
    272          {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}},
    273          /*6*/
    274          {{8668, 0},
    275           {0, 8668},
    276           {6129, 6129},
    277           {6129, 6129},
    278           {7507, 4335},
    279           {4335, 7507}},
    280          /*7*/
    281          {{7459, 0},
    282           {0, 7459},
    283           {5275, 5275},
    284           {5275, 5275},
    285           {4568, 4568},
    286           {6460, 3731},
    287           {3731, 6460}},
    288          /*8*/
    289          {{6368, 0},
    290           {0, 6368},
    291           {4502, 4502},
    292           {4502, 4502},
    293           {5514, 3184},
    294           {3184, 5514},
    295           {5514, 3184},
    296           {3184, 5514}}};
    297      // Re-write the buffer with downmixed data
    298      const int16_t* in = static_cast<const int16_t*>(aIn);
    299      int16_t* out = static_cast<int16_t*>(aOut);
    300      for (uint32_t i = 0; i < aFrames; i++) {
    301        int32_t sampL = 0;
    302        int32_t sampR = 0;
    303        for (uint32_t j = 0; j < inChannels; j++) {
    304          sampL += in[i * inChannels + j] * dmatrix[inChannels - 3][j][0];
    305          sampR += in[i * inChannels + j] * dmatrix[inChannels - 3][j][1];
    306        }
    307        sampL = clipTo15((sampL + 8192) >> 14);
    308        sampR = clipTo15((sampR + 8192) >> 14);
    309        if (outChannels == 2) {
    310          *out++ = sampL;
    311          *out++ = sampR;
    312        } else {
    313          *out++ = (sampL + sampR) * 0.5;
    314        }
    315      }
    316    } else {
    317      MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    318    }
    319    return aFrames;
    320  }
    321 
    322  MOZ_DIAGNOSTIC_ASSERT(inChannels == 2 && outChannels == 1);
    323  if (mIn.Format() == AudioConfig::FORMAT_FLT) {
    324    const float* in = static_cast<const float*>(aIn);
    325    float* out = static_cast<float*>(aOut);
    326    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
    327      float sample = 0.0;
    328      // The sample of the buffer would be interleaved.
    329      sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
    330      *out++ = sample;
    331    }
    332  } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
    333    const int16_t* in = static_cast<const int16_t*>(aIn);
    334    int16_t* out = static_cast<int16_t*>(aOut);
    335    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
    336      int32_t sample = 0.0;
    337      // The sample of the buffer would be interleaved.
    338      sample = (in[fIdx * inChannels] + in[fIdx * inChannels + 1]) * 0.5;
    339      *out++ = sample;
    340    }
    341  } else {
    342    MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    343  }
    344  return aFrames;
    345 }
    346 
    347 size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn,
    348                                     size_t aFrames) {
    349  if (!mResampler) {
    350    return 0;
    351  }
    352  uint32_t outframes = ResampleRecipientFrames(aFrames);
    353  uint32_t inframes = aFrames;
    354 
    355  int error;
    356  if (mOut.Format() == AudioConfig::FORMAT_FLT) {
    357    const float* in = reinterpret_cast<const float*>(aIn);
    358    float* out = reinterpret_cast<float*>(aOut);
    359    error = speex_resampler_process_interleaved_float(mResampler, in, &inframes,
    360                                                      out, &outframes);
    361  } else if (mOut.Format() == AudioConfig::FORMAT_S16) {
    362    const int16_t* in = reinterpret_cast<const int16_t*>(aIn);
    363    int16_t* out = reinterpret_cast<int16_t*>(aOut);
    364    error = speex_resampler_process_interleaved_int(mResampler, in, &inframes,
    365                                                    out, &outframes);
    366  } else {
    367    MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    368    error = RESAMPLER_ERR_ALLOC_FAILED;
    369  }
    370  MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS);
    371  if (error != RESAMPLER_ERR_SUCCESS) {
    372    speex_resampler_destroy(mResampler);
    373    mResampler = nullptr;
    374    return 0;
    375  }
    376  MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped");
    377  return outframes;
    378 }
    379 
    380 void AudioConverter::RecreateResampler() {
    381  if (mResampler) {
    382    speex_resampler_destroy(mResampler);
    383  }
    384  int error;
    385  mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(),
    386                                    SPEEX_RESAMPLER_QUALITY_DEFAULT, &error);
    387 
    388  if (error == RESAMPLER_ERR_SUCCESS) {
    389    speex_resampler_skip_zeros(mResampler);
    390  } else {
    391    NS_WARNING("Failed to initialize resampler.");
    392    mResampler = nullptr;
    393  }
    394 }
    395 
    396 size_t AudioConverter::DrainResampler(void* aOut) {
    397  if (!mResampler) {
    398    return 0;
    399  }
    400  int frames = speex_resampler_get_input_latency(mResampler);
    401  AlignedByteBuffer buffer(FramesOutToBytes(frames));
    402  if (!buffer) {
    403    // OOM
    404    return 0;
    405  }
    406  frames = ResampleAudio(aOut, buffer.Data(), frames);
    407  // Tore down the resampler as it's easier than handling follow-up.
    408  RecreateResampler();
    409  return frames;
    410 }
    411 
    412 size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn,
    413                                  size_t aFrames) const {
    414  MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 ||
    415             mIn.Format() == AudioConfig::FORMAT_FLT);
    416  MOZ_ASSERT(mIn.Channels() < mOut.Channels());
    417  MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now");
    418  MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now");
    419 
    420  if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() ||
    421      mOut.Channels() != 2) {
    422    // Dumb copy the channels and insert silence for the extra channels.
    423    if (mIn.Format() == AudioConfig::FORMAT_FLT) {
    424      dumbUpDownMix(static_cast<float*>(aOut), mOut.Channels(),
    425                    static_cast<const float*>(aIn), mIn.Channels(), aFrames);
    426    } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
    427      dumbUpDownMix(static_cast<int16_t*>(aOut), mOut.Channels(),
    428                    static_cast<const int16_t*>(aIn), mIn.Channels(), aFrames);
    429    } else {
    430      MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    431    }
    432    return aFrames;
    433  }
    434 
    435  // Upmix mono to stereo.
    436  // This is a very dumb mono to stereo upmixing, power levels are preserved
    437  // following the calculation: left = right = -3dB*mono.
    438  if (mIn.Format() == AudioConfig::FORMAT_FLT) {
    439    const float m3db = std::sqrt(0.5);  // -3dB = sqrt(1/2)
    440    const float* in = static_cast<const float*>(aIn);
    441    float* out = static_cast<float*>(aOut);
    442    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
    443      float sample = in[fIdx] * m3db;
    444      // The samples of the buffer would be interleaved.
    445      *out++ = sample;
    446      *out++ = sample;
    447    }
    448  } else if (mIn.Format() == AudioConfig::FORMAT_S16) {
    449    const int16_t* in = static_cast<const int16_t*>(aIn);
    450    int16_t* out = static_cast<int16_t*>(aOut);
    451    for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) {
    452      int16_t sample =
    453          ((int32_t)in[fIdx] * 11585) >> 14;  // close enough to i*sqrt(0.5)
    454      // The samples of the buffer would be interleaved.
    455      *out++ = sample;
    456      *out++ = sample;
    457    }
    458  } else {
    459    MOZ_DIAGNOSTIC_CRASH("Unsupported data type");
    460  }
    461 
    462  return aFrames;
    463 }
    464 
    465 size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const {
    466  if (!aFrames && mIn.Rate() != mOut.Rate()) {
    467    if (!mResampler) {
    468      return 0;
    469    }
    470    // We drain by pushing in get_input_latency() samples of 0
    471    aFrames = speex_resampler_get_input_latency(mResampler);
    472  }
    473  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
    474 }
    475 
    476 size_t AudioConverter::FramesOutToSamples(size_t aFrames) const {
    477  return aFrames * mOut.Channels();
    478 }
    479 
    480 size_t AudioConverter::SamplesInToFrames(size_t aSamples) const {
    481  return aSamples / mIn.Channels();
    482 }
    483 
    484 size_t AudioConverter::FramesOutToBytes(size_t aFrames) const {
    485  return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format());
    486 }
    487 }  // namespace mozilla