rtp_sender_audio.cc (12355B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h" 12 13 #include <cstdint> 14 #include <cstring> 15 #include <memory> 16 #include <optional> 17 #include <utility> 18 #include <vector> 19 20 #include "absl/strings/match.h" 21 #include "absl/strings/string_view.h" 22 #include "api/rtp_headers.h" 23 #include "modules/audio_coding/include/audio_coding_module_typedefs.h" 24 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" 25 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h" 26 #include "modules/rtp_rtcp/source/byte_io.h" 27 #include "modules/rtp_rtcp/source/dtmf_queue.h" 28 #include "modules/rtp_rtcp/source/rtp_header_extensions.h" 29 #include "modules/rtp_rtcp/source/rtp_packet.h" 30 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h" 31 #include "modules/rtp_rtcp/source/rtp_sender.h" 32 #include "rtc_base/checks.h" 33 #include "rtc_base/logging.h" 34 #include "rtc_base/numerics/safe_conversions.h" 35 #include "rtc_base/synchronization/mutex.h" 36 #include "system_wrappers/include/clock.h" 37 38 namespace webrtc { 39 40 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender) 41 : clock_(clock), 42 rtp_sender_(rtp_sender), 43 absolute_capture_time_sender_(clock) { 44 RTC_DCHECK(clock_); 45 } 46 47 RTPSenderAudio::~RTPSenderAudio() {} 48 49 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name, 50 const int8_t payload_type, 51 const uint32_t frequency, 52 const size_t /* channels */, 53 const uint32_t /* rate */) { 54 if (absl::EqualsIgnoreCase(payload_name, "cn")) { 55 MutexLock lock(&send_audio_mutex_); 56 // we can have multiple CNG payload types 57 switch (frequency) { 58 case 8000: 59 cngnb_payload_type_ = payload_type; 60 break; 61 case 16000: 62 cngwb_payload_type_ = payload_type; 63 break; 64 case 32000: 65 cngswb_payload_type_ = payload_type; 66 break; 67 case 48000: 68 cngfb_payload_type_ = payload_type; 69 break; 70 default: 71 return -1; 72 } 73 } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) { 74 MutexLock lock(&send_audio_mutex_); 75 // Don't add it to the list 76 // we dont want to allow send with a DTMF payloadtype 77 dtmf_payload_type_ = payload_type; 78 dtmf_payload_freq_ = frequency; 79 return 0; 80 } else if (payload_name == "audio") { 81 MutexLock lock(&send_audio_mutex_); 82 encoder_rtp_timestamp_frequency_ = dchecked_cast<int>(frequency); 83 return 0; 84 } 85 return 0; 86 } 87 88 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) { 89 MutexLock lock(&send_audio_mutex_); 90 // for audio true for first packet in a speech burst 91 bool marker_bit = false; 92 if (last_payload_type_ != payload_type) { 93 if (payload_type != -1 && (cngnb_payload_type_ == payload_type || 94 cngwb_payload_type_ == payload_type || 95 cngswb_payload_type_ == payload_type || 96 cngfb_payload_type_ == payload_type)) { 97 // Only set a marker bit when we change payload type to a non CNG 98 return false; 99 } 100 101 // payload_type differ 102 if (last_payload_type_ == -1) { 103 if (frame_type != AudioFrameType::kAudioFrameCN) { 104 // first packet and NOT CNG 105 return true; 106 } else { 107 // first packet and CNG 108 inband_vad_active_ = true; 109 return false; 110 } 111 } 112 113 // not first packet AND 114 // not CNG AND 115 // payload_type changed 116 117 // set a marker bit when we change payload type 118 marker_bit = true; 119 } 120 121 // For G.723 G.729, AMR etc we can have inband VAD 122 if (frame_type == AudioFrameType::kAudioFrameCN) { 123 inband_vad_active_ = true; 124 } else if (inband_vad_active_) { 125 inband_vad_active_ = false; 126 marker_bit = true; 127 } 128 return marker_bit; 129 } 130 131 bool RTPSenderAudio::SendAudio(const RtpAudioFrame& frame) { 132 RTC_DCHECK_GE(frame.payload_id, 0); 133 RTC_DCHECK_LE(frame.payload_id, 127); 134 135 // From RFC 4733: 136 // A source has wide latitude as to how often it sends event updates. A 137 // natural interval is the spacing between non-event audio packets. [...] 138 // Alternatively, a source MAY decide to use a different spacing for event 139 // updates, with a value of 50 ms RECOMMENDED. 140 constexpr int kDtmfIntervalTimeMs = 50; 141 uint32_t dtmf_payload_freq = 0; 142 std::optional<AbsoluteCaptureTime> absolute_capture_time; 143 { 144 MutexLock lock(&send_audio_mutex_); 145 dtmf_payload_freq = dtmf_payload_freq_; 146 if (frame.capture_time.has_value()) { 147 // Send absolute capture time periodically in order to optimize and save 148 // network traffic. Missing absolute capture times can be interpolated on 149 // the receiving end if sending intervals are small enough. 150 absolute_capture_time = absolute_capture_time_sender_.OnSendPacket( 151 rtp_sender_->SSRC(), frame.rtp_timestamp, 152 // Replace missing value with 0 (invalid frequency), this will trigger 153 // absolute capture time sending. 154 encoder_rtp_timestamp_frequency_.value_or(0), 155 clock_->ConvertTimestampToNtpTime(*frame.capture_time), 156 /*estimated_capture_clock_offset=*/0); 157 } 158 } 159 160 // Check if we have pending DTMFs to send 161 if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) { 162 if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) > 163 kDtmfIntervalTimeMs) { 164 // New tone to play 165 dtmf_timestamp_ = frame.rtp_timestamp; 166 if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) { 167 dtmf_event_first_packet_sent_ = false; 168 dtmf_length_samples_ = 169 dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000); 170 dtmf_event_is_on_ = true; 171 } 172 } 173 } 174 175 // A source MAY send events and coded audio packets for the same time 176 // but we don't support it 177 if (dtmf_event_is_on_) { 178 if (frame.type == AudioFrameType::kEmptyFrame) { 179 // kEmptyFrame is used to drive the DTMF when in CN mode 180 // it can be triggered more frequently than we want to send the 181 // DTMF packets. 182 const unsigned int dtmf_interval_time_rtp = 183 dtmf_payload_freq * kDtmfIntervalTimeMs / 1000; 184 if ((frame.rtp_timestamp - dtmf_timestamp_last_sent_) < 185 dtmf_interval_time_rtp) { 186 // not time to send yet 187 return true; 188 } 189 } 190 dtmf_timestamp_last_sent_ = frame.rtp_timestamp; 191 uint32_t dtmf_duration_samples = frame.rtp_timestamp - dtmf_timestamp_; 192 bool ended = false; 193 bool send = true; 194 195 if (dtmf_length_samples_ > dtmf_duration_samples) { 196 if (dtmf_duration_samples <= 0) { 197 // Skip send packet at start, since we shouldn't use duration 0 198 send = false; 199 } 200 } else { 201 ended = true; 202 dtmf_event_is_on_ = false; 203 dtmf_time_last_sent_ = clock_->TimeInMilliseconds(); 204 } 205 if (send) { 206 if (dtmf_duration_samples > 0xffff) { 207 // RFC 4733 2.5.2.3 Long-Duration Events 208 SendTelephoneEventPacket(ended, dtmf_timestamp_, 209 static_cast<uint16_t>(0xffff), false); 210 211 // set new timestap for this segment 212 dtmf_timestamp_ = frame.rtp_timestamp; 213 dtmf_duration_samples -= 0xffff; 214 dtmf_length_samples_ -= 0xffff; 215 216 return SendTelephoneEventPacket( 217 ended, dtmf_timestamp_, 218 static_cast<uint16_t>(dtmf_duration_samples), false); 219 } else { 220 if (!SendTelephoneEventPacket(ended, dtmf_timestamp_, 221 dtmf_duration_samples, 222 !dtmf_event_first_packet_sent_)) { 223 return false; 224 } 225 dtmf_event_first_packet_sent_ = true; 226 return true; 227 } 228 } 229 return true; 230 } 231 if (frame.payload.empty()) { 232 if (frame.type == AudioFrameType::kEmptyFrame) { 233 // we don't send empty audio RTP packets 234 // no error since we use it to either drive DTMF when we use VAD, or 235 // enter DTX. 236 return true; 237 } 238 return false; 239 } 240 241 std::unique_ptr<RtpPacketToSend> packet = 242 rtp_sender_->AllocatePacket(frame.csrcs); 243 packet->SetMarker(MarkerBit(frame.type, frame.payload_id)); 244 packet->SetPayloadType(frame.payload_id); 245 packet->SetTimestamp(frame.rtp_timestamp); 246 packet->set_capture_time(clock_->CurrentTime()); 247 // Set audio level extension, if included. 248 packet->SetExtension<AudioLevelExtension>( 249 AudioLevel(frame.type == AudioFrameType::kAudioFrameSpeech, 250 frame.audio_level_dbov.value_or(127))); 251 252 if (absolute_capture_time.has_value()) { 253 // It also checks that extension was registered during SDP negotiation. If 254 // not then setter won't do anything. 255 packet->SetExtension<AbsoluteCaptureTimeExtension>(*absolute_capture_time); 256 } 257 258 packet->SetPayload(frame.payload); 259 260 { 261 MutexLock lock(&send_audio_mutex_); 262 last_payload_type_ = frame.payload_id; 263 } 264 packet->set_packet_type(RtpPacketMediaType::kAudio); 265 packet->set_allow_retransmission(true); 266 std::vector<std::unique_ptr<RtpPacketToSend>> packets(1); 267 packets[0] = std::move(packet); 268 rtp_sender_->EnqueuePackets(std::move(packets)); 269 if (first_packet_sent_()) { 270 RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer"; 271 } 272 return true; 273 } 274 275 // Send a TelephoneEvent tone using RFC 2833 (4733) 276 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key, 277 uint16_t time_ms, 278 uint8_t level) { 279 DtmfQueue::Event event; 280 { 281 MutexLock lock(&send_audio_mutex_); 282 if (dtmf_payload_type_ < 0) { 283 // TelephoneEvent payloadtype not configured 284 return -1; 285 } 286 event.payload_type = dtmf_payload_type_; 287 } 288 event.key = key; 289 event.duration_ms = time_ms; 290 event.level = level; 291 return dtmf_queue_.AddDtmf(event) ? 0 : -1; 292 } 293 294 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended, 295 uint32_t dtmf_timestamp, 296 uint16_t duration, 297 bool marker_bit) { 298 size_t send_count = ended ? 3 : 1; 299 300 std::vector<std::unique_ptr<RtpPacketToSend>> packets; 301 packets.reserve(send_count); 302 for (size_t i = 0; i < send_count; ++i) { 303 // Send DTMF data. 304 constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr; 305 constexpr size_t kDtmfSize = 4; 306 auto packet = std::make_unique<RtpPacketToSend>(kNoExtensions, 307 kRtpHeaderSize + kDtmfSize); 308 packet->SetPayloadType(dtmf_current_event_.payload_type); 309 packet->SetMarker(marker_bit); 310 packet->SetSsrc(rtp_sender_->SSRC()); 311 packet->SetTimestamp(dtmf_timestamp); 312 packet->set_capture_time(clock_->CurrentTime()); 313 314 // Create DTMF data. 315 uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize); 316 RTC_DCHECK(dtmfbuffer); 317 /* From RFC 2833: 318 0 1 2 3 319 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 320 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 321 | event |E|R| volume | duration | 322 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 323 */ 324 // R bit always cleared 325 uint8_t R = 0x00; 326 uint8_t volume = dtmf_current_event_.level; 327 328 // First packet un-ended 329 uint8_t E = ended ? 0x80 : 0x00; 330 331 // First byte is Event number, equals key number 332 dtmfbuffer[0] = dtmf_current_event_.key; 333 dtmfbuffer[1] = E | R | volume; 334 ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration); 335 336 packet->set_packet_type(RtpPacketMediaType::kAudio); 337 packet->set_allow_retransmission(true); 338 packets.push_back(std::move(packet)); 339 } 340 rtp_sender_->EnqueuePackets(std::move(packets)); 341 return true; 342 } 343 } // namespace webrtc