channel_mixer_unittest.cc (14418B)
1 /* 2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio/utility/channel_mixer.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 16 #include "api/audio/audio_frame.h" 17 #include "api/audio/channel_layout.h" 18 #include "rtc_base/strings/string_builder.h" 19 #include "test/gtest.h" 20 21 namespace webrtc { 22 23 namespace { 24 25 constexpr uint32_t kTimestamp = 27; 26 constexpr int kSampleRateHz = 16000; 27 constexpr size_t kSamplesPerChannel = kSampleRateHz / 100; 28 29 class ChannelMixerTest : public ::testing::Test { 30 protected: 31 ChannelMixerTest() { 32 // Use 10ms audio frames by default. Don't set values yet. 33 frame_.samples_per_channel_ = kSamplesPerChannel; 34 frame_.sample_rate_hz_ = kSampleRateHz; 35 EXPECT_TRUE(frame_.muted()); 36 } 37 38 ~ChannelMixerTest() override {} 39 40 AudioFrame frame_; 41 }; 42 43 void SetFrameData(int16_t data, AudioFrame* frame) { 44 int16_t* frame_data = frame->mutable_data(); 45 for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); 46 i++) { 47 frame_data[i] = data; 48 } 49 } 50 51 void SetMonoData(int16_t center, AudioFrame* frame) { 52 frame->num_channels_ = 1; 53 int16_t* frame_data = frame->mutable_data(); 54 for (size_t i = 0; i < frame->samples_per_channel(); ++i) { 55 frame_data[i] = center; 56 } 57 EXPECT_FALSE(frame->muted()); 58 } 59 60 void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) { 61 ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples()); 62 frame->num_channels_ = 2; 63 int16_t* frame_data = frame->mutable_data(); 64 for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) { 65 frame_data[i] = left; 66 frame_data[i + 1] = right; 67 } 68 EXPECT_FALSE(frame->muted()); 69 } 70 71 void SetFiveOneData(int16_t front_left, 72 int16_t front_right, 73 int16_t center, 74 int16_t lfe, 75 int16_t side_left, 76 int16_t side_right, 77 AudioFrame* frame) { 78 ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples()); 79 frame->num_channels_ = 6; 80 int16_t* frame_data = frame->mutable_data(); 81 for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) { 82 frame_data[i] = front_left; 83 frame_data[i + 1] = front_right; 84 frame_data[i + 2] = center; 85 frame_data[i + 3] = lfe; 86 frame_data[i + 4] = side_left; 87 frame_data[i + 5] = side_right; 88 } 89 EXPECT_FALSE(frame->muted()); 90 } 91 92 void SetSevenOneData(int16_t front_left, 93 int16_t front_right, 94 int16_t center, 95 int16_t lfe, 96 int16_t side_left, 97 int16_t side_right, 98 int16_t back_left, 99 int16_t back_right, 100 AudioFrame* frame) { 101 ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples()); 102 frame->num_channels_ = 8; 103 int16_t* frame_data = frame->mutable_data(); 104 for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) { 105 frame_data[i] = front_left; 106 frame_data[i + 1] = front_right; 107 frame_data[i + 2] = center; 108 frame_data[i + 3] = lfe; 109 frame_data[i + 4] = side_left; 110 frame_data[i + 5] = side_right; 111 frame_data[i + 6] = back_left; 112 frame_data[i + 7] = back_right; 113 } 114 EXPECT_FALSE(frame->muted()); 115 } 116 117 bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) { 118 const int16_t* frame_data = frame->data(); 119 for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); 120 i++) { 121 if (frame_data[i] != sample) { 122 return false; 123 } 124 } 125 return true; 126 } 127 128 void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { 129 EXPECT_EQ(frame1.num_channels(), frame2.num_channels()); 130 EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel()); 131 const int16_t* frame1_data = frame1.data(); 132 const int16_t* frame2_data = frame2.data(); 133 for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels(); 134 i++) { 135 EXPECT_EQ(frame1_data[i], frame2_data[i]); 136 } 137 EXPECT_EQ(frame1.muted(), frame2.muted()); 138 } 139 140 } // namespace 141 142 // Test all possible layout conversions can be constructed and mixed. Don't 143 // care about the actual content, simply run through all mixing combinations 144 // and ensure that nothing fails. 145 TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) { 146 for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; 147 input_layout <= CHANNEL_LAYOUT_MAX; 148 input_layout = static_cast<ChannelLayout>(input_layout + 1)) { 149 for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; 150 output_layout <= CHANNEL_LAYOUT_MAX; 151 output_layout = static_cast<ChannelLayout>(output_layout + 1)) { 152 // DISCRETE, BITSTREAM can't be tested here based on the current approach. 153 // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. 154 // Stereo down mix should never be the output layout. 155 if (input_layout == CHANNEL_LAYOUT_BITSTREAM || 156 input_layout == CHANNEL_LAYOUT_DISCRETE || 157 input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || 158 output_layout == CHANNEL_LAYOUT_BITSTREAM || 159 output_layout == CHANNEL_LAYOUT_DISCRETE || 160 output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || 161 output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { 162 continue; 163 } 164 165 StringBuilder ss; 166 ss << "Input Layout: " << input_layout 167 << ", Output Layout: " << output_layout; 168 SCOPED_TRACE(ss.str()); 169 ChannelMixer mixer(input_layout, output_layout); 170 171 frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz, 172 AudioFrame::kNormalSpeech, AudioFrame::kVadActive, 173 ChannelLayoutToChannelCount(input_layout)); 174 EXPECT_TRUE(frame_.muted()); 175 mixer.Transform(&frame_); 176 } 177 } 178 } 179 180 // Ensure that the audio frame is untouched when input and output channel 181 // layouts are identical, i.e., the transformation should have no effect. 182 // Exclude invalid mixing combinations. 183 TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) { 184 for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; 185 input_layout <= CHANNEL_LAYOUT_MAX; 186 input_layout = static_cast<ChannelLayout>(input_layout + 1)) { 187 for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; 188 output_layout <= CHANNEL_LAYOUT_MAX; 189 output_layout = static_cast<ChannelLayout>(output_layout + 1)) { 190 if (input_layout != output_layout || 191 input_layout == CHANNEL_LAYOUT_BITSTREAM || 192 input_layout == CHANNEL_LAYOUT_DISCRETE || 193 input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || 194 output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { 195 continue; 196 } 197 ChannelMixer mixer(input_layout, output_layout); 198 frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout); 199 SetFrameData(99, &frame_); 200 mixer.Transform(&frame_); 201 EXPECT_EQ(ChannelLayoutToChannelCount(input_layout), 202 static_cast<int>(frame_.num_channels())); 203 EXPECT_TRUE(AllSamplesEquals(99, &frame_)); 204 } 205 } 206 } 207 208 TEST_F(ChannelMixerTest, StereoToMono) { 209 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); 210 // 211 // Input: stereo 212 // LEFT RIGHT 213 // Output: mono CENTER 0.5 0.5 214 // 215 SetStereoData(7, 3, &frame_); 216 EXPECT_EQ(2u, frame_.num_channels()); 217 mixer.Transform(&frame_); 218 EXPECT_EQ(1u, frame_.num_channels()); 219 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); 220 221 AudioFrame mono_frame; 222 mono_frame.samples_per_channel_ = frame_.samples_per_channel(); 223 SetMonoData(5, &mono_frame); 224 VerifyFramesAreEqual(mono_frame, frame_); 225 226 SetStereoData(-32768, -32768, &frame_); 227 EXPECT_EQ(2u, frame_.num_channels()); 228 mixer.Transform(&frame_); 229 EXPECT_EQ(1u, frame_.num_channels()); 230 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); 231 SetMonoData(-32768, &mono_frame); 232 VerifyFramesAreEqual(mono_frame, frame_); 233 } 234 235 TEST_F(ChannelMixerTest, StereoToMonoMuted) { 236 ASSERT_TRUE(frame_.muted()); 237 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); 238 mixer.Transform(&frame_); 239 EXPECT_EQ(1u, frame_.num_channels()); 240 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); 241 EXPECT_TRUE(frame_.muted()); 242 } 243 244 TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) { 245 ASSERT_TRUE(frame_.muted()); 246 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); 247 mixer.Transform(&frame_); 248 EXPECT_EQ(8u, frame_.num_channels()); 249 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); 250 EXPECT_TRUE(frame_.muted()); 251 } 252 253 TEST_F(ChannelMixerTest, FiveOneToMono) { 254 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO); 255 // 256 // Input: 5.1 257 // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT 258 // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 259 // 260 // a = [10, 20, 15, 2, 5, 5] 261 // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] => 262 // a * b (dot product) = 44.69848480983499, 263 // which is truncated into 44 using 16 bit representation. 264 // 265 SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); 266 EXPECT_EQ(6u, frame_.num_channels()); 267 mixer.Transform(&frame_); 268 EXPECT_EQ(1u, frame_.num_channels()); 269 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); 270 271 AudioFrame mono_frame; 272 mono_frame.samples_per_channel_ = frame_.samples_per_channel(); 273 SetMonoData(44, &mono_frame); 274 VerifyFramesAreEqual(mono_frame, frame_); 275 276 SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); 277 EXPECT_EQ(6u, frame_.num_channels()); 278 mixer.Transform(&frame_); 279 EXPECT_EQ(1u, frame_.num_channels()); 280 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); 281 SetMonoData(-32768, &mono_frame); 282 VerifyFramesAreEqual(mono_frame, frame_); 283 } 284 285 TEST_F(ChannelMixerTest, FiveOneToSevenOne) { 286 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); 287 // 288 // Input: 5.1 289 // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT 290 // Output: 7.1 LEFT 1 0 0 0 0 0 291 // RIGHT 0 1 0 0 0 0 292 // CENTER 0 0 1 0 0 0 293 // LFE 0 0 0 1 0 0 294 // SIDE_LEFT 0 0 0 0 1 0 295 // SIDE_RIGHT 0 0 0 0 0 1 296 // BACK_LEFT 0 0 0 0 0 0 297 // BACK_RIGHT 0 0 0 0 0 0 298 // 299 SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); 300 EXPECT_EQ(6u, frame_.num_channels()); 301 mixer.Transform(&frame_); 302 EXPECT_EQ(8u, frame_.num_channels()); 303 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); 304 305 AudioFrame seven_one_frame; 306 seven_one_frame.samples_per_channel_ = frame_.samples_per_channel(); 307 SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame); 308 VerifyFramesAreEqual(seven_one_frame, frame_); 309 310 SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_); 311 EXPECT_EQ(6u, frame_.num_channels()); 312 mixer.Transform(&frame_); 313 EXPECT_EQ(8u, frame_.num_channels()); 314 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); 315 SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0, 316 &seven_one_frame); 317 VerifyFramesAreEqual(seven_one_frame, frame_); 318 } 319 320 TEST_F(ChannelMixerTest, FiveOneBackToStereo) { 321 ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO); 322 // 323 // Input: 5.1 324 // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT 325 // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 326 // RIGHT 0 1 0.707 0.707 0 0.707 327 // 328 SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_); 329 EXPECT_EQ(6u, frame_.num_channels()); 330 mixer.Transform(&frame_); 331 EXPECT_EQ(2u, frame_.num_channels()); 332 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); 333 334 AudioFrame stereo_frame; 335 stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); 336 SetStereoData(35, 45, &stereo_frame); 337 VerifyFramesAreEqual(stereo_frame, frame_); 338 339 SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); 340 EXPECT_EQ(6u, frame_.num_channels()); 341 mixer.Transform(&frame_); 342 EXPECT_EQ(2u, frame_.num_channels()); 343 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); 344 SetStereoData(-32768, -32768, &stereo_frame); 345 VerifyFramesAreEqual(stereo_frame, frame_); 346 } 347 348 TEST_F(ChannelMixerTest, MonoToStereo) { 349 ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO); 350 // 351 // Input: mono 352 // CENTER 353 // Output: stereo LEFT 1 354 // RIGHT 1 355 // 356 SetMonoData(44, &frame_); 357 EXPECT_EQ(1u, frame_.num_channels()); 358 mixer.Transform(&frame_); 359 EXPECT_EQ(2u, frame_.num_channels()); 360 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); 361 362 AudioFrame stereo_frame; 363 stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); 364 SetStereoData(44, 44, &stereo_frame); 365 VerifyFramesAreEqual(stereo_frame, frame_); 366 } 367 368 TEST_F(ChannelMixerTest, StereoToFiveOne) { 369 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1); 370 // 371 // Input: Stereo 372 // LEFT RIGHT 373 // Output: 5.1 LEFT 1 0 374 // RIGHT 0 1 375 // CENTER 0 0 376 // LFE 0 0 377 // SIDE_LEFT 0 0 378 // SIDE_RIGHT 0 0 379 // 380 SetStereoData(50, 60, &frame_); 381 EXPECT_EQ(2u, frame_.num_channels()); 382 mixer.Transform(&frame_); 383 EXPECT_EQ(6u, frame_.num_channels()); 384 EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout()); 385 386 AudioFrame five_one_frame; 387 five_one_frame.samples_per_channel_ = frame_.samples_per_channel(); 388 SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame); 389 VerifyFramesAreEqual(five_one_frame, frame_); 390 } 391 392 } // namespace webrtc