opus_interface.cc (22294B)
1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_coding/codecs/opus/opus_interface.h" 12 13 #include <cstdint> 14 #include <cstdlib> 15 16 #include "rtc_base/checks.h" 17 18 enum { 19 #if WEBRTC_OPUS_SUPPORT_120MS_PTIME 20 /* Maximum supported frame size in WebRTC is 120 ms. */ 21 kWebRtcOpusMaxEncodeFrameSizeMs = 120, 22 #else 23 /* Maximum supported frame size in WebRTC is 60 ms. */ 24 kWebRtcOpusMaxEncodeFrameSizeMs = 60, 25 #endif 26 27 /* The format allows up to 120 ms frames. Since we don't control the other 28 * side, we must allow for packets of that size. NetEq is currently limited 29 * to 60 ms on the receive side. */ 30 kWebRtcOpusMaxDecodeFrameSizeMs = 120, 31 32 // Duration of audio that each call to packet loss concealment covers. 33 kWebRtcOpusPlcFrameSizeMs = 10, 34 }; 35 36 static int FrameSizePerChannel(int frame_size_ms, int sample_rate_hz) { 37 RTC_DCHECK_GT(frame_size_ms, 0); 38 RTC_DCHECK_EQ(frame_size_ms % 10, 0); 39 RTC_DCHECK_GT(sample_rate_hz, 0); 40 RTC_DCHECK_EQ(sample_rate_hz % 1000, 0); 41 return frame_size_ms * (sample_rate_hz / 1000); 42 } 43 44 // Maximum sample count per channel. 45 static int MaxFrameSizePerChannel(int sample_rate_hz) { 46 return FrameSizePerChannel(kWebRtcOpusMaxDecodeFrameSizeMs, sample_rate_hz); 47 } 48 49 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, 50 size_t channels, 51 int32_t application, 52 int sample_rate_hz) { 53 int opus_app; 54 if (!inst) 55 return -1; 56 57 switch (application) { 58 case 0: 59 opus_app = OPUS_APPLICATION_VOIP; 60 break; 61 case 1: 62 opus_app = OPUS_APPLICATION_AUDIO; 63 break; 64 default: 65 return -1; 66 } 67 68 OpusEncInst* state = 69 reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst))); 70 RTC_DCHECK(state); 71 72 int error; 73 state->encoder = opus_encoder_create( 74 sample_rate_hz, static_cast<int>(channels), opus_app, &error); 75 76 if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { 77 WebRtcOpus_EncoderFree(state); 78 return -1; 79 } 80 81 state->in_dtx_mode = 0; 82 state->channels = channels; 83 state->sample_rate_hz = sample_rate_hz; 84 85 *inst = state; 86 return 0; 87 } 88 89 int16_t WebRtcOpus_MultistreamEncoderCreate( 90 OpusEncInst** inst, 91 size_t channels, 92 int32_t application, 93 size_t streams, 94 size_t coupled_streams, 95 const unsigned char* channel_mapping) { 96 int opus_app; 97 if (!inst) 98 return -1; 99 100 switch (application) { 101 case 0: 102 opus_app = OPUS_APPLICATION_VOIP; 103 break; 104 case 1: 105 opus_app = OPUS_APPLICATION_AUDIO; 106 break; 107 default: 108 return -1; 109 } 110 111 OpusEncInst* state = 112 reinterpret_cast<OpusEncInst*>(calloc(1, sizeof(OpusEncInst))); 113 RTC_DCHECK(state); 114 115 int error; 116 const int sample_rate_hz = 48000; 117 state->multistream_encoder = opus_multistream_encoder_create( 118 sample_rate_hz, channels, streams, coupled_streams, channel_mapping, 119 opus_app, &error); 120 121 if (error != OPUS_OK || (!state->encoder && !state->multistream_encoder)) { 122 WebRtcOpus_EncoderFree(state); 123 return -1; 124 } 125 126 state->in_dtx_mode = 0; 127 state->channels = channels; 128 state->sample_rate_hz = sample_rate_hz; 129 130 *inst = state; 131 return 0; 132 } 133 134 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { 135 if (inst) { 136 if (inst->encoder) { 137 opus_encoder_destroy(inst->encoder); 138 } else { 139 opus_multistream_encoder_destroy(inst->multistream_encoder); 140 } 141 free(inst); 142 return 0; 143 } else { 144 return -1; 145 } 146 } 147 148 int WebRtcOpus_Encode(OpusEncInst* inst, 149 const int16_t* audio_in, 150 size_t samples, 151 size_t length_encoded_buffer, 152 uint8_t* encoded) { 153 int res; 154 155 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { 156 return -1; 157 } 158 159 if (inst->encoder) { 160 res = opus_encode(inst->encoder, (const opus_int16*)audio_in, 161 static_cast<int>(samples), encoded, 162 static_cast<opus_int32>(length_encoded_buffer)); 163 } else { 164 res = opus_multistream_encode( 165 inst->multistream_encoder, (const opus_int16*)audio_in, 166 static_cast<int>(samples), encoded, 167 static_cast<opus_int32>(length_encoded_buffer)); 168 } 169 170 if (res <= 0) { 171 return -1; 172 } 173 174 if (res <= 2) { 175 // Indicates DTX since the packet has nothing but a header. In principle, 176 // there is no need to send this packet. However, we do transmit the first 177 // occurrence to let the decoder know that the encoder enters DTX mode. 178 if (inst->in_dtx_mode) { 179 return 0; 180 } else { 181 inst->in_dtx_mode = 1; 182 return res; 183 } 184 } 185 186 inst->in_dtx_mode = 0; 187 return res; 188 } 189 190 #define ENCODER_CTL(inst, vargs) \ 191 (inst->encoder \ 192 ? opus_encoder_ctl(inst->encoder, vargs) \ 193 : opus_multistream_encoder_ctl(inst->multistream_encoder, vargs)) 194 195 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { 196 if (inst) { 197 return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate)); 198 } else { 199 return -1; 200 } 201 } 202 203 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { 204 if (inst) { 205 return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate)); 206 } else { 207 return -1; 208 } 209 } 210 211 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { 212 opus_int32 set_bandwidth; 213 214 if (!inst) 215 return -1; 216 217 if (frequency_hz <= 8000) { 218 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND; 219 } else if (frequency_hz <= 12000) { 220 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; 221 } else if (frequency_hz <= 16000) { 222 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND; 223 } else if (frequency_hz <= 24000) { 224 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; 225 } else { 226 set_bandwidth = OPUS_BANDWIDTH_FULLBAND; 227 } 228 return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); 229 } 230 231 int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, 232 int32_t* result_hz) { 233 if (inst->encoder) { 234 if (opus_encoder_ctl(inst->encoder, OPUS_GET_MAX_BANDWIDTH(result_hz)) == 235 OPUS_OK) { 236 return 0; 237 } 238 return -1; 239 } 240 241 opus_int32 max_bandwidth; 242 int s; 243 int ret; 244 245 max_bandwidth = 0; 246 ret = OPUS_OK; 247 s = 0; 248 while (ret == OPUS_OK) { 249 OpusEncoder* enc; 250 opus_int32 bandwidth; 251 252 ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc)); 253 if (ret == OPUS_BAD_ARG) 254 break; 255 if (ret != OPUS_OK) 256 return -1; 257 if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK) 258 return -1; 259 260 if (max_bandwidth != 0 && max_bandwidth != bandwidth) 261 return -1; 262 263 max_bandwidth = bandwidth; 264 s++; 265 } 266 *result_hz = max_bandwidth; 267 return 0; 268 } 269 270 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { 271 if (inst) { 272 return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1)); 273 } else { 274 return -1; 275 } 276 } 277 278 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { 279 if (inst) { 280 return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0)); 281 } else { 282 return -1; 283 } 284 } 285 286 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { 287 if (inst) { 288 return ENCODER_CTL(inst, OPUS_SET_DTX(1)); 289 } else { 290 return -1; 291 } 292 } 293 294 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { 295 if (inst) { 296 return ENCODER_CTL(inst, OPUS_SET_DTX(0)); 297 } else { 298 return -1; 299 } 300 } 301 302 int16_t WebRtcOpus_GetUseDtx(OpusEncInst* inst) { 303 if (inst) { 304 opus_int32 use_dtx; 305 if (ENCODER_CTL(inst, OPUS_GET_DTX(&use_dtx)) == 0) { 306 return use_dtx; 307 } 308 } 309 return -1; 310 } 311 312 int16_t WebRtcOpus_GetInDtx(OpusEncInst* inst) { 313 if (inst) { 314 opus_int32 in_dtx; 315 if (ENCODER_CTL(inst, OPUS_GET_IN_DTX(&in_dtx)) == 0) { 316 return in_dtx; 317 } 318 } 319 return -1; 320 } 321 322 int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) { 323 if (inst) { 324 return ENCODER_CTL(inst, OPUS_SET_VBR(0)); 325 } else { 326 return -1; 327 } 328 } 329 330 int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) { 331 if (inst) { 332 return ENCODER_CTL(inst, OPUS_SET_VBR(1)); 333 } else { 334 return -1; 335 } 336 } 337 338 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { 339 if (inst) { 340 return ENCODER_CTL(inst, OPUS_SET_COMPLEXITY(complexity)); 341 } else { 342 return -1; 343 } 344 } 345 346 int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) { 347 if (!inst) { 348 return -1; 349 } 350 int32_t bandwidth; 351 if (ENCODER_CTL(inst, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) { 352 return bandwidth; 353 } else { 354 return -1; 355 } 356 } 357 358 int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) { 359 if (inst) { 360 return ENCODER_CTL(inst, OPUS_SET_BANDWIDTH(bandwidth)); 361 } else { 362 return -1; 363 } 364 } 365 366 int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) { 367 if (!inst) 368 return -1; 369 if (num_channels == 0) { 370 return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); 371 } else if (num_channels == 1 || num_channels == 2) { 372 return ENCODER_CTL(inst, OPUS_SET_FORCE_CHANNELS(num_channels)); 373 } else { 374 return -1; 375 } 376 } 377 378 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, 379 size_t channels, 380 int sample_rate_hz) { 381 int error; 382 OpusDecInst* state; 383 384 if (inst != nullptr) { 385 // Create Opus decoder state. 386 state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst))); 387 if (state == nullptr) { 388 return -1; 389 } 390 391 state->decoder = 392 opus_decoder_create(sample_rate_hz, static_cast<int>(channels), &error); 393 if (error == OPUS_OK && state->decoder) { 394 // Creation of memory all ok. 395 state->channels = channels; 396 state->sample_rate_hz = sample_rate_hz; 397 state->in_dtx_mode = 0; 398 state->last_packet_num_channels = channels; 399 *inst = state; 400 return 0; 401 } 402 403 // If memory allocation was unsuccessful, free the entire state. 404 if (state->decoder) { 405 opus_decoder_destroy(state->decoder); 406 } 407 free(state); 408 } 409 return -1; 410 } 411 412 int16_t WebRtcOpus_MultistreamDecoderCreate( 413 OpusDecInst** inst, 414 size_t channels, 415 size_t streams, 416 size_t coupled_streams, 417 const unsigned char* channel_mapping) { 418 int error; 419 OpusDecInst* state; 420 421 if (inst != nullptr) { 422 // Create Opus decoder state. 423 state = reinterpret_cast<OpusDecInst*>(calloc(1, sizeof(OpusDecInst))); 424 if (state == nullptr) { 425 return -1; 426 } 427 428 // Create new memory, always at 48000 Hz. 429 state->multistream_decoder = opus_multistream_decoder_create( 430 48000, channels, streams, coupled_streams, channel_mapping, &error); 431 432 if (error == OPUS_OK && state->multistream_decoder) { 433 // Creation of memory all ok. 434 state->channels = channels; 435 state->sample_rate_hz = 48000; 436 state->in_dtx_mode = 0; 437 *inst = state; 438 return 0; 439 } 440 441 // If memory allocation was unsuccessful, free the entire state. 442 opus_multistream_decoder_destroy(state->multistream_decoder); 443 free(state); 444 } 445 return -1; 446 } 447 448 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { 449 if (inst) { 450 if (inst->decoder) { 451 opus_decoder_destroy(inst->decoder); 452 } else if (inst->multistream_decoder) { 453 opus_multistream_decoder_destroy(inst->multistream_decoder); 454 } 455 free(inst); 456 return 0; 457 } else { 458 return -1; 459 } 460 } 461 462 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) { 463 return inst->channels; 464 } 465 466 void WebRtcOpus_DecoderInit(OpusDecInst* inst) { 467 if (inst->decoder) { 468 opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); 469 } else { 470 opus_multistream_decoder_ctl(inst->multistream_decoder, OPUS_RESET_STATE); 471 } 472 inst->in_dtx_mode = 0; 473 } 474 475 /* For decoder to determine if it is to output speech or comfort noise. */ 476 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { 477 // Audio type becomes comfort noise if `encoded_byte` is 1 and keeps 478 // to be so if the following `encoded_byte` are 0 or 1. 479 if (encoded_bytes == 0 && inst->in_dtx_mode) { 480 return 2; // Comfort noise. 481 } else if (encoded_bytes == 1 || encoded_bytes == 2) { 482 // TODO(henrik.lundin): There is a slight risk that a 2-byte payload is in 483 // fact a 1-byte TOC with a 1-byte payload. That will be erroneously 484 // interpreted as comfort noise output, but such a payload is probably 485 // faulty anyway. 486 487 // TODO(webrtc:10218): This is wrong for multistream opus. Then are several 488 // single-stream packets glued together with some packet size bytes in 489 // between. See https://tools.ietf.org/html/rfc6716#appendix-B 490 inst->in_dtx_mode = 1; 491 return 2; // Comfort noise. 492 } else { 493 inst->in_dtx_mode = 0; 494 return 0; // Speech. 495 } 496 } 497 498 /* `frame_size` is set to maximum Opus frame size in the normal case, and 499 * is set to the number of samples needed for PLC in case of losses. 500 * It is up to the caller to make sure the value is correct. */ 501 static int DecodeNative(OpusDecInst* inst, 502 const uint8_t* encoded, 503 size_t encoded_bytes, 504 int frame_size, 505 int16_t* decoded, 506 int16_t* audio_type, 507 int decode_fec) { 508 int decoded_samples_per_channel = -1; 509 if (inst->decoder) { 510 if (encoded_bytes > 0) { 511 // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block 512 // below. 513 inst->last_packet_num_channels = opus_packet_get_nb_channels(encoded); 514 RTC_DCHECK(inst->last_packet_num_channels == 1 || 515 inst->last_packet_num_channels == 2); 516 } 517 decoded_samples_per_channel = opus_decode( 518 inst->decoder, encoded, static_cast<opus_int32>(encoded_bytes), 519 reinterpret_cast<opus_int16*>(decoded), frame_size, decode_fec); 520 } else { 521 decoded_samples_per_channel = opus_multistream_decode( 522 inst->multistream_decoder, encoded, 523 static_cast<opus_int32>(encoded_bytes), 524 reinterpret_cast<opus_int16*>(decoded), frame_size, decode_fec); 525 } 526 527 if (decoded_samples_per_channel <= 0) 528 return -1; 529 530 *audio_type = DetermineAudioType(inst, encoded_bytes); 531 532 if (inst->decoder) { 533 // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block 534 // below. 535 // When stereo decoding is enabled and the last observed non-empty packet 536 // encoded mono audio, the Opus decoder may generate non-trivial stereo 537 // audio. As that is undesired, in that case make sure that `decoded` 538 // contains trivial stereo audio by copying the left channel into the right 539 // one. 540 if (inst->channels == 2 && inst->last_packet_num_channels == 1) { 541 int num_channels = inst->channels; 542 for (int i = 0; i < decoded_samples_per_channel * num_channels; 543 i += num_channels) { 544 decoded[i + 1] = decoded[i]; 545 } 546 } 547 } 548 549 return decoded_samples_per_channel; 550 } 551 552 static int DecodePlc(OpusDecInst* inst, int16_t* decoded) { 553 int16_t audio_type = 0; 554 int decoded_samples; 555 int plc_samples = 556 FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); 557 558 decoded_samples = 559 DecodeNative(inst, nullptr, 0, plc_samples, decoded, &audio_type, 0); 560 if (decoded_samples < 0) { 561 return -1; 562 } 563 564 return decoded_samples; 565 } 566 567 int WebRtcOpus_Decode(OpusDecInst* inst, 568 const uint8_t* encoded, 569 size_t encoded_bytes, 570 int16_t* decoded, 571 int16_t* audio_type) { 572 int decoded_samples_per_channel; 573 if (encoded_bytes == 0) { 574 *audio_type = DetermineAudioType(inst, encoded_bytes); 575 decoded_samples_per_channel = DecodePlc(inst, decoded); 576 } else { 577 decoded_samples_per_channel = DecodeNative( 578 inst, encoded, encoded_bytes, 579 MaxFrameSizePerChannel(inst->sample_rate_hz), decoded, audio_type, 0); 580 } 581 if (decoded_samples_per_channel < 0) { 582 return -1; 583 } 584 585 return decoded_samples_per_channel; 586 } 587 588 int WebRtcOpus_DecodeFec(OpusDecInst* inst, 589 const uint8_t* encoded, 590 size_t encoded_bytes, 591 int16_t* decoded, 592 int16_t* audio_type) { 593 int decoded_samples; 594 int fec_samples; 595 596 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { 597 return 0; 598 } 599 600 fec_samples = 601 opus_packet_get_samples_per_frame(encoded, inst->sample_rate_hz); 602 603 decoded_samples = DecodeNative(inst, encoded, encoded_bytes, fec_samples, 604 decoded, audio_type, 1); 605 if (decoded_samples < 0) { 606 return -1; 607 } 608 609 return decoded_samples; 610 } 611 612 int WebRtcOpus_DurationEst(OpusDecInst* inst, 613 const uint8_t* payload, 614 size_t payload_length_bytes) { 615 if (payload_length_bytes == 0) { 616 // WebRtcOpus_Decode calls PLC when payload length is zero. So we return 617 // PLC duration correspondingly. 618 return WebRtcOpus_PlcDuration(inst); 619 } 620 621 int frames, samples; 622 frames = opus_packet_get_nb_frames( 623 payload, static_cast<opus_int32>(payload_length_bytes)); 624 if (frames < 0) { 625 /* Invalid payload data. */ 626 return 0; 627 } 628 samples = 629 frames * opus_packet_get_samples_per_frame(payload, inst->sample_rate_hz); 630 if (samples > 120 * inst->sample_rate_hz / 1000) { 631 // More than 120 ms' worth of samples. 632 return 0; 633 } 634 return samples; 635 } 636 637 int WebRtcOpus_PlcDuration(OpusDecInst* inst) { 638 return FrameSizePerChannel(kWebRtcOpusPlcFrameSizeMs, inst->sample_rate_hz); 639 } 640 641 int WebRtcOpus_FecDurationEst(const uint8_t* payload, 642 size_t payload_length_bytes, 643 int sample_rate_hz) { 644 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) { 645 return 0; 646 } 647 const int samples = 648 opus_packet_get_samples_per_frame(payload, sample_rate_hz); 649 const int samples_per_ms = sample_rate_hz / 1000; 650 if (samples < 10 * samples_per_ms || samples > 120 * samples_per_ms) { 651 /* Invalid payload duration. */ 652 return 0; 653 } 654 return samples; 655 } 656 657 int WebRtcOpus_NumSilkFrames(const uint8_t* payload) { 658 // For computing the payload length in ms, the sample rate is not important 659 // since it cancels out. We use 48 kHz, but any valid sample rate would work. 660 int payload_length_ms = 661 opus_packet_get_samples_per_frame(payload, 48000) / 48; 662 if (payload_length_ms < 10) 663 payload_length_ms = 10; 664 665 int silk_frames; 666 switch (payload_length_ms) { 667 case 10: 668 case 20: 669 silk_frames = 1; 670 break; 671 case 40: 672 silk_frames = 2; 673 break; 674 case 60: 675 silk_frames = 3; 676 break; 677 default: 678 return 0; // It is actually even an invalid packet. 679 } 680 return silk_frames; 681 } 682 683 // This method is based on Definition of the Opus Audio Codec 684 // (https://tools.ietf.org/html/rfc6716). Basically, this method is based on 685 // parsing the LP layer of an Opus packet, particularly the LBRR flag. 686 int WebRtcOpus_PacketHasFec(const uint8_t* payload, 687 size_t payload_length_bytes) { 688 if (payload == nullptr || payload_length_bytes == 0) 689 return 0; 690 691 // In CELT_ONLY mode, packets should not have FEC. 692 if (payload[0] & 0x80) 693 return 0; 694 695 int silk_frames = WebRtcOpus_NumSilkFrames(payload); 696 if (silk_frames == 0) 697 return 0; // Not valid. 698 699 const int channels = opus_packet_get_nb_channels(payload); 700 RTC_DCHECK(channels == 1 || channels == 2); 701 702 // Max number of frames in an Opus packet is 48. 703 opus_int16 frame_sizes[48]; 704 const unsigned char* frame_data[48]; 705 706 // Parse packet to get the frames. But we only care about the first frame, 707 // since we can only decode the FEC from the first one. 708 if (opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes), 709 nullptr, frame_data, frame_sizes, nullptr) < 0) { 710 return 0; 711 } 712 713 if (frame_sizes[0] < 1) { 714 return 0; 715 } 716 717 // A frame starts with the LP layer. The LP layer begins with two to eight 718 // header bits.These consist of one VAD bit per SILK frame (up to 3), 719 // followed by a single flag indicating the presence of LBRR frames. 720 // For a stereo packet, these first flags correspond to the mid channel, and 721 // a second set of flags is included for the side channel. Because these are 722 // the first symbols decoded by the range coder and because they are coded 723 // as binary values with uniform probability, they can be extracted directly 724 // from the most significant bits of the first byte of compressed data. 725 for (int n = 0; n < channels; n++) { 726 // The LBRR bit for channel 1 is on the (`silk_frames` + 1)-th bit, and 727 // that of channel 2 is on the |(`silk_frames` + 1) * 2 + 1|-th bit. 728 if (frame_data[0][0] & (0x80 >> ((n + 1) * (silk_frames + 1) - 1))) 729 return 1; 730 } 731 732 return 0; 733 } 734 735 int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload, 736 size_t payload_length_bytes) { 737 if (payload == nullptr || payload_length_bytes == 0) 738 return 0; 739 740 // In CELT_ONLY mode we can not determine whether there is VAD. 741 if (payload[0] & 0x80) 742 return -1; 743 744 int silk_frames = WebRtcOpus_NumSilkFrames(payload); 745 if (silk_frames == 0) 746 return -1; 747 748 const int channels = opus_packet_get_nb_channels(payload); 749 RTC_DCHECK(channels == 1 || channels == 2); 750 751 // Max number of frames in an Opus packet is 48. 752 opus_int16 frame_sizes[48]; 753 const unsigned char* frame_data[48]; 754 755 // Parse packet to get the frames. 756 int frames = 757 opus_packet_parse(payload, static_cast<opus_int32>(payload_length_bytes), 758 nullptr, frame_data, frame_sizes, nullptr); 759 if (frames < 0) 760 return -1; 761 762 // Iterate over all Opus frames which may contain multiple SILK frames. 763 for (int frame = 0; frame < frames; frame++) { 764 if (frame_sizes[frame] < 1) { 765 continue; 766 } 767 if (frame_data[frame][0] >> (8 - silk_frames)) 768 return 1; 769 if (channels == 2 && 770 (frame_data[frame][0] << (silk_frames + 1)) >> (8 - silk_frames)) 771 return 1; 772 } 773 774 return 0; 775 }