audio_egress_unittest.cc (11672B)
1 /* 2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio/voip/audio_egress.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <memory> 16 #include <optional> 17 18 #include "api/array_view.h" 19 #include "api/audio/audio_frame.h" 20 #include "api/audio_codecs/audio_encoder_factory.h" 21 #include "api/audio_codecs/audio_format.h" 22 #include "api/audio_codecs/builtin_audio_encoder_factory.h" 23 #include "api/call/transport.h" 24 #include "api/environment/environment.h" 25 #include "api/environment/environment_factory.h" 26 #include "api/rtp_headers.h" 27 #include "api/scoped_refptr.h" 28 #include "api/units/time_delta.h" 29 #include "api/units/timestamp.h" 30 #include "modules/audio_mixer/sine_wave_generator.h" 31 #include "modules/rtp_rtcp/source/rtp_packet_received.h" 32 #include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h" 33 #include "modules/rtp_rtcp/source/rtp_rtcp_interface.h" 34 #include "rtc_base/event.h" 35 #include "test/gmock.h" 36 #include "test/gtest.h" 37 #include "test/mock_transport.h" 38 #include "test/time_controller/simulated_time_controller.h" 39 40 namespace webrtc { 41 namespace { 42 43 using ::testing::Invoke; 44 using ::testing::NiceMock; 45 using ::testing::Unused; 46 47 std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(const Environment& env, 48 Transport* transport, 49 uint32_t remote_ssrc) { 50 RtpRtcpInterface::Configuration rtp_config; 51 rtp_config.audio = true; 52 rtp_config.rtcp_report_interval_ms = 5000; 53 rtp_config.outgoing_transport = transport; 54 rtp_config.local_media_ssrc = remote_ssrc; 55 auto rtp_rtcp = std::make_unique<ModuleRtpRtcpImpl2>(env, rtp_config); 56 rtp_rtcp->SetSendingMediaStatus(false); 57 rtp_rtcp->SetRTCPStatus(RtcpMode::kCompound); 58 return rtp_rtcp; 59 } 60 61 constexpr int16_t kAudioLevel = 3004; // Used for sine wave level. 62 63 // AudioEgressTest configures audio egress by using Rtp Stack, fake clock, 64 // and task queue factory. Encoder factory is needed to create codec and 65 // configure the RTP stack in audio egress. 66 class AudioEgressTest : public ::testing::Test { 67 public: 68 static constexpr uint16_t kSeqNum = 12345; 69 static constexpr uint64_t kStartTime = 123456789; 70 static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF; 71 const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1}; 72 73 AudioEgressTest() : wave_generator_(1000.0, kAudioLevel) { 74 encoder_factory_ = CreateBuiltinAudioEncoderFactory(); 75 } 76 77 // Prepare test on audio egress by using PCMu codec with specific 78 // sequence number and its status to be running. 79 void SetUp() override { 80 rtp_rtcp_ = CreateRtpStack(env_, &transport_, kRemoteSsrc); 81 egress_ = std::make_unique<AudioEgress>(env_, rtp_rtcp_.get()); 82 constexpr int kPcmuPayload = 0; 83 egress_->SetEncoder(kPcmuPayload, kPcmuFormat, 84 encoder_factory_->Create( 85 env_, kPcmuFormat, {.payload_type = kPcmuPayload})); 86 egress_->StartSend(); 87 rtp_rtcp_->SetSequenceNumber(kSeqNum); 88 rtp_rtcp_->SetSendingStatus(true); 89 } 90 91 // Make sure we have shut down rtp stack and reset egress for each test. 92 void TearDown() override { 93 egress_->StopSend(); 94 rtp_rtcp_->SetSendingStatus(false); 95 egress_.reset(); 96 rtp_rtcp_.reset(); 97 } 98 99 // Create an audio frame prepared for pcmu encoding. Timestamp is 100 // increased per RTP specification which is the number of samples it contains. 101 // Wave generator writes sine wave which has expected high level set 102 // by kAudioLevel. 103 std::unique_ptr<AudioFrame> GetAudioFrame(int order) { 104 auto frame = std::make_unique<AudioFrame>(); 105 frame->sample_rate_hz_ = kPcmuFormat.clockrate_hz; 106 frame->samples_per_channel_ = kPcmuFormat.clockrate_hz / 100; // 10 ms. 107 frame->num_channels_ = kPcmuFormat.num_channels; 108 frame->timestamp_ = frame->samples_per_channel_ * order; 109 wave_generator_.GenerateNextFrame(frame.get()); 110 return frame; 111 } 112 113 GlobalSimulatedTimeController time_controller_{Timestamp::Micros(kStartTime)}; 114 const Environment env_ = 115 CreateEnvironment(time_controller_.GetClock(), 116 time_controller_.GetTaskQueueFactory()); 117 NiceMock<MockTransport> transport_; 118 SineWaveGenerator wave_generator_; 119 std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_; 120 scoped_refptr<AudioEncoderFactory> encoder_factory_; 121 std::unique_ptr<AudioEgress> egress_; 122 }; 123 124 TEST_F(AudioEgressTest, SendingStatusAfterStartAndStop) { 125 EXPECT_TRUE(egress_->IsSending()); 126 egress_->StopSend(); 127 EXPECT_FALSE(egress_->IsSending()); 128 } 129 130 TEST_F(AudioEgressTest, ProcessAudioWithMute) { 131 constexpr int kExpected = 10; 132 Event event; 133 int rtp_count = 0; 134 RtpPacketReceived rtp; 135 auto rtp_sent = [&](ArrayView<const uint8_t> packet, Unused) { 136 rtp.Parse(packet); 137 if (++rtp_count == kExpected) { 138 event.Set(); 139 } 140 return true; 141 }; 142 143 EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); 144 145 egress_->SetMute(true); 146 147 // Two 10 ms audio frames will result in rtp packet with ptime 20. 148 for (size_t i = 0; i < kExpected * 2; i++) { 149 egress_->SendAudioData(GetAudioFrame(i)); 150 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 151 } 152 153 event.Wait(TimeDelta::Seconds(1)); 154 EXPECT_EQ(rtp_count, kExpected); 155 156 // we expect on pcmu payload to result in 255 for silenced payload 157 RTPHeader header; 158 rtp.GetHeader(&header); 159 size_t packet_length = rtp.size(); 160 size_t payload_length = packet_length - header.headerLength; 161 size_t payload_data_length = payload_length - header.paddingLength; 162 const uint8_t* payload = rtp.data() + header.headerLength; 163 for (size_t i = 0; i < payload_data_length; ++i) { 164 EXPECT_EQ(*payload++, 255); 165 } 166 } 167 168 TEST_F(AudioEgressTest, ProcessAudioWithSineWave) { 169 constexpr int kExpected = 10; 170 Event event; 171 int rtp_count = 0; 172 RtpPacketReceived rtp; 173 auto rtp_sent = [&](ArrayView<const uint8_t> packet, Unused) { 174 rtp.Parse(packet); 175 if (++rtp_count == kExpected) { 176 event.Set(); 177 } 178 return true; 179 }; 180 181 EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); 182 183 // Two 10 ms audio frames will result in rtp packet with ptime 20. 184 for (size_t i = 0; i < kExpected * 2; i++) { 185 egress_->SendAudioData(GetAudioFrame(i)); 186 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 187 } 188 189 event.Wait(TimeDelta::Seconds(1)); 190 EXPECT_EQ(rtp_count, kExpected); 191 192 // we expect on pcmu to result in < 255 for payload with sine wave 193 RTPHeader header; 194 rtp.GetHeader(&header); 195 size_t packet_length = rtp.size(); 196 size_t payload_length = packet_length - header.headerLength; 197 size_t payload_data_length = payload_length - header.paddingLength; 198 const uint8_t* payload = rtp.data() + header.headerLength; 199 for (size_t i = 0; i < payload_data_length; ++i) { 200 EXPECT_NE(*payload++, 255); 201 } 202 } 203 204 TEST_F(AudioEgressTest, SkipAudioEncodingAfterStopSend) { 205 constexpr int kExpected = 10; 206 Event event; 207 int rtp_count = 0; 208 auto rtp_sent = [&](ArrayView<const uint8_t> /* packet */, Unused) { 209 if (++rtp_count == kExpected) { 210 event.Set(); 211 } 212 return true; 213 }; 214 215 EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); 216 217 // Two 10 ms audio frames will result in rtp packet with ptime 20. 218 for (size_t i = 0; i < kExpected * 2; i++) { 219 egress_->SendAudioData(GetAudioFrame(i)); 220 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 221 } 222 223 event.Wait(TimeDelta::Seconds(1)); 224 EXPECT_EQ(rtp_count, kExpected); 225 226 // Now stop send and yet feed more data. 227 egress_->StopSend(); 228 229 // It should be safe to exit the test case while encoder_queue_ has 230 // outstanding data to process. We are making sure that this doesn't 231 // result in crashes or sanitizer errors due to remaining data. 232 for (size_t i = 0; i < kExpected * 2; i++) { 233 egress_->SendAudioData(GetAudioFrame(i)); 234 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 235 } 236 } 237 238 TEST_F(AudioEgressTest, ChangeEncoderFromPcmuToOpus) { 239 std::optional<SdpAudioFormat> pcmu = egress_->GetEncoderFormat(); 240 EXPECT_TRUE(pcmu); 241 EXPECT_EQ(pcmu->clockrate_hz, kPcmuFormat.clockrate_hz); 242 EXPECT_EQ(pcmu->num_channels, kPcmuFormat.num_channels); 243 244 constexpr int kOpusPayload = 120; 245 const SdpAudioFormat kOpusFormat = {"opus", 48000, 2}; 246 247 egress_->SetEncoder(kOpusPayload, kOpusFormat, 248 encoder_factory_->Create(env_, kOpusFormat, 249 {.payload_type = kOpusPayload})); 250 251 std::optional<SdpAudioFormat> opus = egress_->GetEncoderFormat(); 252 EXPECT_TRUE(opus); 253 EXPECT_EQ(opus->clockrate_hz, kOpusFormat.clockrate_hz); 254 EXPECT_EQ(opus->num_channels, kOpusFormat.num_channels); 255 } 256 257 TEST_F(AudioEgressTest, SendDTMF) { 258 constexpr int kExpected = 7; 259 constexpr int kPayloadType = 100; 260 constexpr int kDurationMs = 100; 261 constexpr int kSampleRate = 8000; 262 constexpr int kEvent = 3; 263 264 egress_->RegisterTelephoneEventType(kPayloadType, kSampleRate); 265 // 100 ms duration will produce total 7 DTMF 266 // 1 @ 20 ms, 2 @ 40 ms, 3 @ 60 ms, 4 @ 80 ms 267 // 5, 6, 7 @ 100 ms (last one sends 3 dtmf) 268 egress_->SendTelephoneEvent(kEvent, kDurationMs); 269 270 Event event; 271 int dtmf_count = 0; 272 auto is_dtmf = [&](RtpPacketReceived& rtp) { 273 return (rtp.PayloadType() == kPayloadType && 274 rtp.SequenceNumber() == kSeqNum + dtmf_count && 275 rtp.padding_size() == 0 && rtp.Marker() == (dtmf_count == 0) && 276 rtp.Ssrc() == kRemoteSsrc); 277 }; 278 279 // It's possible that we may have actual audio RTP packets along with 280 // DTMF packtets. We are only interested in the exact number of DTMF 281 // packets rtp stack is emitting. 282 auto rtp_sent = [&](ArrayView<const uint8_t> packet, Unused) { 283 RtpPacketReceived rtp; 284 rtp.Parse(packet); 285 if (is_dtmf(rtp) && ++dtmf_count == kExpected) { 286 event.Set(); 287 } 288 return true; 289 }; 290 291 EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); 292 293 // Two 10 ms audio frames will result in rtp packet with ptime 20. 294 for (size_t i = 0; i < kExpected * 2; i++) { 295 egress_->SendAudioData(GetAudioFrame(i)); 296 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 297 } 298 299 event.Wait(TimeDelta::Seconds(1)); 300 EXPECT_EQ(dtmf_count, kExpected); 301 } 302 303 TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) { 304 // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to 305 // get audio level from input source. 306 constexpr int kExpected = 6; 307 Event event; 308 int rtp_count = 0; 309 auto rtp_sent = [&](ArrayView<const uint8_t> /* packet */, Unused) { 310 if (++rtp_count == kExpected) { 311 event.Set(); 312 } 313 return true; 314 }; 315 316 EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent)); 317 318 // Two 10 ms audio frames will result in rtp packet with ptime 20. 319 for (size_t i = 0; i < kExpected * 2; i++) { 320 egress_->SendAudioData(GetAudioFrame(i)); 321 time_controller_.AdvanceTime(TimeDelta::Millis(10)); 322 } 323 324 event.Wait(/*give_up_after=*/TimeDelta::Seconds(1)); 325 EXPECT_EQ(rtp_count, kExpected); 326 327 constexpr double kExpectedEnergy = 0.00016809565587789564; 328 constexpr double kExpectedDuration = 0.11999999999999998; 329 330 EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel); 331 EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy); 332 EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration); 333 } 334 335 } // namespace 336 } // namespace webrtc