audio_buffer.cc (14649B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/audio_buffer.h" 12 13 #include <algorithm> 14 #include <array> 15 #include <cstdint> 16 #include <cstring> 17 #include <memory> 18 19 #include "api/audio/audio_processing.h" 20 #include "common_audio/channel_buffer.h" 21 #include "common_audio/include/audio_util.h" 22 #include "common_audio/resampler/push_sinc_resampler.h" 23 #include "modules/audio_processing/splitting_filter.h" 24 #include "rtc_base/checks.h" 25 26 namespace webrtc { 27 namespace { 28 29 constexpr size_t kSamplesPer32kHzChannel = 320; 30 constexpr size_t kSamplesPer48kHzChannel = 480; 31 32 size_t NumBandsFromFramesPerChannel(size_t num_frames) { 33 if (num_frames == kSamplesPer32kHzChannel) { 34 return 2; 35 } 36 if (num_frames == kSamplesPer48kHzChannel) { 37 return 3; 38 } 39 return 1; 40 } 41 42 } // namespace 43 44 AudioBuffer::AudioBuffer(size_t input_rate, 45 size_t input_num_channels, 46 size_t buffer_rate, 47 size_t buffer_num_channels, 48 size_t output_rate, 49 size_t /* output_num_channels */) 50 : input_num_frames_(static_cast<int>(input_rate) / 100), 51 input_num_channels_(input_num_channels), 52 buffer_num_frames_(static_cast<int>(buffer_rate) / 100), 53 buffer_num_channels_(buffer_num_channels), 54 output_num_frames_(static_cast<int>(output_rate) / 100), 55 output_num_channels_(0), 56 num_channels_(buffer_num_channels), 57 num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)), 58 num_split_frames_(CheckedDivExact(buffer_num_frames_, num_bands_)), 59 data_( 60 new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)) { 61 RTC_DCHECK_GT(input_num_frames_, 0); 62 RTC_DCHECK_GT(buffer_num_frames_, 0); 63 RTC_DCHECK_GT(output_num_frames_, 0); 64 RTC_DCHECK_GT(input_num_channels_, 0); 65 RTC_DCHECK_GT(buffer_num_channels_, 0); 66 RTC_DCHECK_LE(buffer_num_channels_, input_num_channels_); 67 68 const bool input_resampling_needed = input_num_frames_ != buffer_num_frames_; 69 const bool output_resampling_needed = 70 output_num_frames_ != buffer_num_frames_; 71 if (input_resampling_needed) { 72 for (size_t i = 0; i < buffer_num_channels_; ++i) { 73 input_resamplers_.push_back(std::unique_ptr<PushSincResampler>( 74 new PushSincResampler(input_num_frames_, buffer_num_frames_))); 75 } 76 } 77 78 if (output_resampling_needed) { 79 for (size_t i = 0; i < buffer_num_channels_; ++i) { 80 output_resamplers_.push_back(std::unique_ptr<PushSincResampler>( 81 new PushSincResampler(buffer_num_frames_, output_num_frames_))); 82 } 83 } 84 85 if (num_bands_ > 1) { 86 split_data_.reset(new ChannelBuffer<float>( 87 buffer_num_frames_, buffer_num_channels_, num_bands_)); 88 splitting_filter_.reset(new SplittingFilter( 89 buffer_num_channels_, num_bands_, buffer_num_frames_)); 90 } 91 } 92 93 AudioBuffer::~AudioBuffer() {} 94 95 void AudioBuffer::set_downmixing_to_specific_channel(size_t channel) { 96 downmix_by_averaging_ = false; 97 RTC_DCHECK_GT(input_num_channels_, channel); 98 channel_for_downmixing_ = std::min(channel, input_num_channels_ - 1); 99 } 100 101 void AudioBuffer::set_downmixing_by_averaging() { 102 downmix_by_averaging_ = true; 103 } 104 105 void AudioBuffer::CopyFrom(const float* const* stacked_data, 106 const StreamConfig& stream_config) { 107 RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); 108 RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); 109 RestoreNumChannels(); 110 const bool downmix_needed = input_num_channels_ > 1 && num_channels_ == 1; 111 112 const bool resampling_needed = input_num_frames_ != buffer_num_frames_; 113 114 if (downmix_needed) { 115 RTC_DCHECK_GE(kMaxSamplesPerChannel10ms, input_num_frames_); 116 117 std::array<float, kMaxSamplesPerChannel10ms> downmix; 118 if (downmix_by_averaging_) { 119 const float kOneByNumChannels = 1.f / input_num_channels_; 120 for (size_t i = 0; i < input_num_frames_; ++i) { 121 float value = stacked_data[0][i]; 122 for (size_t j = 1; j < input_num_channels_; ++j) { 123 value += stacked_data[j][i]; 124 } 125 downmix[i] = value * kOneByNumChannels; 126 } 127 } 128 const float* downmixed_data = downmix_by_averaging_ 129 ? downmix.data() 130 : stacked_data[channel_for_downmixing_]; 131 132 if (resampling_needed) { 133 input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, 134 data_->channels()[0], buffer_num_frames_); 135 } 136 const float* data_to_convert = 137 resampling_needed ? data_->channels()[0] : downmixed_data; 138 FloatToFloatS16(data_to_convert, buffer_num_frames_, data_->channels()[0]); 139 } else { 140 if (resampling_needed) { 141 for (size_t i = 0; i < num_channels_; ++i) { 142 input_resamplers_[i]->Resample(stacked_data[i], input_num_frames_, 143 data_->channels()[i], 144 buffer_num_frames_); 145 FloatToFloatS16(data_->channels()[i], buffer_num_frames_, 146 data_->channels()[i]); 147 } 148 } else { 149 for (size_t i = 0; i < num_channels_; ++i) { 150 FloatToFloatS16(stacked_data[i], buffer_num_frames_, 151 data_->channels()[i]); 152 } 153 } 154 } 155 } 156 157 void AudioBuffer::CopyTo(const StreamConfig& stream_config, 158 float* const* stacked_data) { 159 RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); 160 161 const bool resampling_needed = output_num_frames_ != buffer_num_frames_; 162 if (resampling_needed) { 163 for (size_t i = 0; i < num_channels_; ++i) { 164 FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, 165 data_->channels()[i]); 166 output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, 167 stacked_data[i], output_num_frames_); 168 } 169 } else { 170 for (size_t i = 0; i < num_channels_; ++i) { 171 FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, 172 stacked_data[i]); 173 } 174 } 175 176 for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { 177 memcpy(stacked_data[i], stacked_data[0], 178 output_num_frames_ * sizeof(**stacked_data)); 179 } 180 } 181 182 void AudioBuffer::CopyTo(AudioBuffer* buffer) const { 183 RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_); 184 185 const bool resampling_needed = output_num_frames_ != buffer_num_frames_; 186 if (resampling_needed) { 187 for (size_t i = 0; i < num_channels_; ++i) { 188 output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, 189 buffer->channels()[i], 190 buffer->num_frames()); 191 } 192 } else { 193 for (size_t i = 0; i < num_channels_; ++i) { 194 memcpy(buffer->channels()[i], data_->channels()[i], 195 buffer_num_frames_ * sizeof(**buffer->channels())); 196 } 197 } 198 199 for (size_t i = num_channels_; i < buffer->num_channels(); ++i) { 200 memcpy(buffer->channels()[i], buffer->channels()[0], 201 output_num_frames_ * sizeof(**buffer->channels())); 202 } 203 } 204 205 void AudioBuffer::RestoreNumChannels() { 206 num_channels_ = buffer_num_channels_; 207 data_->set_num_channels(buffer_num_channels_); 208 if (split_data_) { 209 split_data_->set_num_channels(buffer_num_channels_); 210 } 211 } 212 213 void AudioBuffer::set_num_channels(size_t num_channels) { 214 RTC_DCHECK_GE(buffer_num_channels_, num_channels); 215 num_channels_ = num_channels; 216 data_->set_num_channels(num_channels); 217 if (split_data_) { 218 split_data_->set_num_channels(num_channels); 219 } 220 } 221 222 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream. 223 void AudioBuffer::CopyFrom(const int16_t* const interleaved_data, 224 const StreamConfig& stream_config) { 225 RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); 226 RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); 227 RestoreNumChannels(); 228 229 const bool resampling_required = input_num_frames_ != buffer_num_frames_; 230 231 const int16_t* interleaved = interleaved_data; 232 if (num_channels_ == 1) { 233 if (input_num_channels_ == 1) { 234 if (resampling_required) { 235 std::array<float, kMaxSamplesPerChannel10ms> float_buffer; 236 S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data()); 237 input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_, 238 data_->channels()[0], 239 buffer_num_frames_); 240 } else { 241 S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]); 242 } 243 } else { 244 std::array<float, kMaxSamplesPerChannel10ms> float_buffer; 245 float* downmixed_data = 246 resampling_required ? float_buffer.data() : data_->channels()[0]; 247 if (downmix_by_averaging_) { 248 for (size_t j = 0, k = 0; j < input_num_frames_; ++j) { 249 int32_t sum = 0; 250 for (size_t i = 0; i < input_num_channels_; ++i, ++k) { 251 sum += interleaved[k]; 252 } 253 downmixed_data[j] = sum / static_cast<int16_t>(input_num_channels_); 254 } 255 } else { 256 for (size_t j = 0, k = channel_for_downmixing_; j < input_num_frames_; 257 ++j, k += input_num_channels_) { 258 downmixed_data[j] = interleaved[k]; 259 } 260 } 261 262 if (resampling_required) { 263 input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, 264 data_->channels()[0], 265 buffer_num_frames_); 266 } 267 } 268 } else { 269 auto deinterleave_channel = [](size_t channel, size_t num_channels, 270 size_t samples_per_channel, const int16_t* x, 271 float* y) { 272 for (size_t j = 0, k = channel; j < samples_per_channel; 273 ++j, k += num_channels) { 274 y[j] = x[k]; 275 } 276 }; 277 278 if (resampling_required) { 279 std::array<float, kMaxSamplesPerChannel10ms> float_buffer; 280 for (size_t i = 0; i < num_channels_; ++i) { 281 deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, 282 float_buffer.data()); 283 input_resamplers_[i]->Resample(float_buffer.data(), input_num_frames_, 284 data_->channels()[i], 285 buffer_num_frames_); 286 } 287 } else { 288 for (size_t i = 0; i < num_channels_; ++i) { 289 deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, 290 data_->channels()[i]); 291 } 292 } 293 } 294 } 295 296 void AudioBuffer::CopyTo(const StreamConfig& stream_config, 297 int16_t* const interleaved_data) { 298 const size_t config_num_channels = stream_config.num_channels(); 299 300 RTC_DCHECK(config_num_channels == num_channels_ || num_channels_ == 1); 301 RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); 302 303 const bool resampling_required = buffer_num_frames_ != output_num_frames_; 304 305 int16_t* interleaved = interleaved_data; 306 if (num_channels_ == 1) { 307 std::array<float, kMaxSamplesPerChannel10ms> float_buffer; 308 309 if (resampling_required) { 310 output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_, 311 float_buffer.data(), output_num_frames_); 312 } 313 const float* deinterleaved = 314 resampling_required ? float_buffer.data() : data_->channels()[0]; 315 316 if (config_num_channels == 1) { 317 for (size_t j = 0; j < output_num_frames_; ++j) { 318 interleaved[j] = FloatS16ToS16(deinterleaved[j]); 319 } 320 } else { 321 for (size_t i = 0, k = 0; i < output_num_frames_; ++i) { 322 float tmp = FloatS16ToS16(deinterleaved[i]); 323 for (size_t j = 0; j < config_num_channels; ++j, ++k) { 324 interleaved[k] = tmp; 325 } 326 } 327 } 328 } else { 329 auto interleave_channel = [](size_t channel, size_t num_channels, 330 size_t samples_per_channel, const float* x, 331 int16_t* y) { 332 for (size_t k = 0, j = channel; k < samples_per_channel; 333 ++k, j += num_channels) { 334 y[j] = FloatS16ToS16(x[k]); 335 } 336 }; 337 338 if (resampling_required) { 339 for (size_t i = 0; i < num_channels_; ++i) { 340 std::array<float, kMaxSamplesPerChannel10ms> float_buffer; 341 output_resamplers_[i]->Resample(data_->channels()[i], 342 buffer_num_frames_, float_buffer.data(), 343 output_num_frames_); 344 interleave_channel(i, config_num_channels, output_num_frames_, 345 float_buffer.data(), interleaved); 346 } 347 } else { 348 for (size_t i = 0; i < num_channels_; ++i) { 349 interleave_channel(i, config_num_channels, output_num_frames_, 350 data_->channels()[i], interleaved); 351 } 352 } 353 354 for (size_t i = num_channels_; i < config_num_channels; ++i) { 355 for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_; 356 ++j, k += config_num_channels, n += config_num_channels) { 357 interleaved[k] = interleaved[n]; 358 } 359 } 360 } 361 } 362 363 void AudioBuffer::SplitIntoFrequencyBands() { 364 splitting_filter_->Analysis(data_.get(), split_data_.get()); 365 } 366 367 void AudioBuffer::MergeFrequencyBands() { 368 splitting_filter_->Synthesis(split_data_.get(), data_.get()); 369 } 370 371 void AudioBuffer::ExportSplitChannelData( 372 size_t channel, 373 int16_t* const* split_band_data) const { 374 for (size_t k = 0; k < num_bands(); ++k) { 375 const float* band_data = split_bands_const(channel)[k]; 376 377 RTC_DCHECK(split_band_data[k]); 378 RTC_DCHECK(band_data); 379 for (size_t i = 0; i < num_frames_per_band(); ++i) { 380 split_band_data[k][i] = FloatS16ToS16(band_data[i]); 381 } 382 } 383 } 384 385 void AudioBuffer::ImportSplitChannelData( 386 size_t channel, 387 const int16_t* const* split_band_data) { 388 for (size_t k = 0; k < num_bands(); ++k) { 389 float* band_data = split_bands(channel)[k]; 390 RTC_DCHECK(split_band_data[k]); 391 RTC_DCHECK(band_data); 392 for (size_t i = 0; i < num_frames_per_band(); ++i) { 393 band_data[i] = split_band_data[k][i]; 394 } 395 } 396 } 397 398 } // namespace webrtc