audio_decoder_g722.cc (7038B)
1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_coding/codecs/g722/audio_decoder_g722.h" 12 13 #include <cstdint> 14 #include <cstring> 15 #include <utility> 16 #include <vector> 17 18 #include "api/audio_codecs/audio_decoder.h" 19 #include "modules/audio_coding/codecs/g722/g722_interface.h" 20 #include "modules/audio_coding/codecs/legacy_encoded_audio_frame.h" 21 #include "rtc_base/buffer.h" 22 #include "rtc_base/checks.h" 23 24 namespace webrtc { 25 26 AudioDecoderG722Impl::AudioDecoderG722Impl() { 27 WebRtcG722_CreateDecoder(&dec_state_); 28 WebRtcG722_DecoderInit(dec_state_); 29 } 30 31 AudioDecoderG722Impl::~AudioDecoderG722Impl() { 32 WebRtcG722_FreeDecoder(dec_state_); 33 } 34 35 bool AudioDecoderG722Impl::HasDecodePlc() const { 36 return false; 37 } 38 39 int AudioDecoderG722Impl::DecodeInternal(const uint8_t* encoded, 40 size_t encoded_len, 41 int sample_rate_hz, 42 int16_t* decoded, 43 SpeechType* speech_type) { 44 RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); 45 int16_t temp_type = 1; // Default is speech. 46 size_t ret = 47 WebRtcG722_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type); 48 *speech_type = ConvertSpeechType(temp_type); 49 return static_cast<int>(ret); 50 } 51 52 void AudioDecoderG722Impl::Reset() { 53 WebRtcG722_DecoderInit(dec_state_); 54 } 55 56 std::vector<AudioDecoder::ParseResult> AudioDecoderG722Impl::ParsePayload( 57 Buffer&& payload, 58 uint32_t timestamp) { 59 return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), 60 timestamp, 8, 16); 61 } 62 63 int AudioDecoderG722Impl::PacketDuration(const uint8_t* /* encoded */, 64 size_t encoded_len) const { 65 // 1/2 encoded byte per sample per channel. 66 return static_cast<int>(2 * encoded_len / Channels()); 67 } 68 69 int AudioDecoderG722Impl::PacketDurationRedundant(const uint8_t* encoded, 70 size_t encoded_len) const { 71 return PacketDuration(encoded, encoded_len); 72 } 73 74 int AudioDecoderG722Impl::SampleRateHz() const { 75 return 16000; 76 } 77 78 size_t AudioDecoderG722Impl::Channels() const { 79 return 1; 80 } 81 82 AudioDecoderG722StereoImpl::AudioDecoderG722StereoImpl() { 83 WebRtcG722_CreateDecoder(&dec_state_left_); 84 WebRtcG722_CreateDecoder(&dec_state_right_); 85 WebRtcG722_DecoderInit(dec_state_left_); 86 WebRtcG722_DecoderInit(dec_state_right_); 87 } 88 89 AudioDecoderG722StereoImpl::~AudioDecoderG722StereoImpl() { 90 WebRtcG722_FreeDecoder(dec_state_left_); 91 WebRtcG722_FreeDecoder(dec_state_right_); 92 } 93 94 int AudioDecoderG722StereoImpl::DecodeInternal(const uint8_t* encoded, 95 size_t encoded_len, 96 int sample_rate_hz, 97 int16_t* decoded, 98 SpeechType* speech_type) { 99 RTC_DCHECK_EQ(SampleRateHz(), sample_rate_hz); 100 // Adjust the encoded length down to ensure the same number of samples in each 101 // channel. 102 const size_t encoded_len_adjusted = PacketDuration(encoded, encoded_len) * 103 Channels() / 104 2; // 1/2 byte per sample per channel 105 int16_t temp_type = 1; // Default is speech. 106 // De-interleave the bit-stream into two separate payloads. 107 uint8_t* encoded_deinterleaved = new uint8_t[encoded_len_adjusted]; 108 SplitStereoPacket(encoded, encoded_len_adjusted, encoded_deinterleaved); 109 // Decode left and right. 110 size_t decoded_len = 111 WebRtcG722_Decode(dec_state_left_, encoded_deinterleaved, 112 encoded_len_adjusted / 2, decoded, &temp_type); 113 size_t ret = WebRtcG722_Decode( 114 dec_state_right_, &encoded_deinterleaved[encoded_len_adjusted / 2], 115 encoded_len_adjusted / 2, &decoded[decoded_len], &temp_type); 116 if (ret == decoded_len) { 117 ret += decoded_len; // Return total number of samples. 118 // Interleave output. 119 for (size_t k = ret / 2; k < ret; k++) { 120 int16_t temp = decoded[k]; 121 memmove(&decoded[2 * k - ret + 2], &decoded[2 * k - ret + 1], 122 (ret - k - 1) * sizeof(int16_t)); 123 decoded[2 * k - ret + 1] = temp; 124 } 125 } 126 *speech_type = ConvertSpeechType(temp_type); 127 delete[] encoded_deinterleaved; 128 return static_cast<int>(ret); 129 } 130 131 int AudioDecoderG722StereoImpl::PacketDuration(const uint8_t* /* encoded */, 132 size_t encoded_len) const { 133 // 1/2 encoded byte per sample per channel. Make sure the length represents 134 // an equal number of bytes per channel. Otherwise, we cannot de-interleave 135 // the encoded data later. 136 return static_cast<int>(2 * (encoded_len / Channels())); 137 } 138 139 int AudioDecoderG722StereoImpl::SampleRateHz() const { 140 return 16000; 141 } 142 143 size_t AudioDecoderG722StereoImpl::Channels() const { 144 return 2; 145 } 146 147 void AudioDecoderG722StereoImpl::Reset() { 148 WebRtcG722_DecoderInit(dec_state_left_); 149 WebRtcG722_DecoderInit(dec_state_right_); 150 } 151 152 std::vector<AudioDecoder::ParseResult> AudioDecoderG722StereoImpl::ParsePayload( 153 Buffer&& payload, 154 uint32_t timestamp) { 155 return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload), 156 timestamp, 2 * 8, 16); 157 } 158 159 // Split the stereo packet and place left and right channel after each other 160 // in the output array. 161 void AudioDecoderG722StereoImpl::SplitStereoPacket( 162 const uint8_t* encoded, 163 size_t encoded_len, 164 uint8_t* encoded_deinterleaved) { 165 // Regroup the 4 bits/sample so |l1 l2| |r1 r2| |l3 l4| |r3 r4| ..., 166 // where "lx" is 4 bits representing left sample number x, and "rx" right 167 // sample. Two samples fit in one byte, represented with |...|. 168 for (size_t i = 0; i + 1 < encoded_len; i += 2) { 169 uint8_t right_byte = ((encoded[i] & 0x0F) << 4) + (encoded[i + 1] & 0x0F); 170 encoded_deinterleaved[i] = (encoded[i] & 0xF0) + (encoded[i + 1] >> 4); 171 encoded_deinterleaved[i + 1] = right_byte; 172 } 173 174 // Move one byte representing right channel each loop, and place it at the 175 // end of the bytestream vector. After looping the data is reordered to: 176 // |l1 l2| |l3 l4| ... |l(N-1) lN| |r1 r2| |r3 r4| ... |r(N-1) r(N)|, 177 // where N is the total number of samples. 178 for (size_t i = 0; i < encoded_len / 2; i++) { 179 uint8_t right_byte = encoded_deinterleaved[i + 1]; 180 memmove(&encoded_deinterleaved[i + 1], &encoded_deinterleaved[i + 2], 181 encoded_len - i - 2); 182 encoded_deinterleaved[encoded_len - 1] = right_byte; 183 } 184 } 185 186 } // namespace webrtc