1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
12
13#include <assert.h>  // assert
14#include <math.h>   // pow()
15#include <string.h>  // memcpy()
16
17#include "webrtc/base/logging.h"
18#include "webrtc/base/trace_event.h"
19#include "webrtc/system_wrappers/include/critical_section_wrapper.h"
20
21namespace webrtc {
22RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23    RtpData* data_callback,
24    RtpAudioFeedback* incoming_messages_callback) {
25  return new RTPReceiverAudio(data_callback, incoming_messages_callback);
26}
27
28RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback,
29                                   RtpAudioFeedback* incoming_messages_callback)
30    : RTPReceiverStrategy(data_callback),
31      TelephoneEventHandler(),
32      last_received_frequency_(8000),
33      telephone_event_forward_to_decoder_(false),
34      telephone_event_payload_type_(-1),
35      cng_nb_payload_type_(-1),
36      cng_wb_payload_type_(-1),
37      cng_swb_payload_type_(-1),
38      cng_fb_payload_type_(-1),
39      cng_payload_type_(-1),
40      g722_payload_type_(-1),
41      last_received_g722_(false),
42      num_energy_(0),
43      current_remote_energy_(),
44      cb_audio_feedback_(incoming_messages_callback) {
45  last_payload_.Audio.channels = 1;
46  memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
47}
48
49// Outband TelephoneEvent(DTMF) detection
50void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
51    bool forward_to_decoder) {
52  CriticalSectionScoped lock(crit_sect_.get());
53  telephone_event_forward_to_decoder_ = forward_to_decoder;
54}
55
56// Is forwarding of outband telephone events turned on/off?
57bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
58  CriticalSectionScoped lock(crit_sect_.get());
59  return telephone_event_forward_to_decoder_;
60}
61
62bool RTPReceiverAudio::TelephoneEventPayloadType(
63    int8_t payload_type) const {
64  CriticalSectionScoped lock(crit_sect_.get());
65  return telephone_event_payload_type_ == payload_type;
66}
67
68bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
69                                      uint32_t* frequency,
70                                      bool* cng_payload_type_has_changed) {
71  CriticalSectionScoped lock(crit_sect_.get());
72  *cng_payload_type_has_changed = false;
73
74  //  We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
75  if (cng_nb_payload_type_ == payload_type) {
76    *frequency = 8000;
77    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
78      *cng_payload_type_has_changed = true;
79
80    cng_payload_type_ = cng_nb_payload_type_;
81    return true;
82  } else if (cng_wb_payload_type_ == payload_type) {
83    // if last received codec is G.722 we must use frequency 8000
84    if (last_received_g722_) {
85      *frequency = 8000;
86    } else {
87      *frequency = 16000;
88    }
89    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
90      *cng_payload_type_has_changed = true;
91    cng_payload_type_ = cng_wb_payload_type_;
92    return true;
93  } else if (cng_swb_payload_type_ == payload_type) {
94    *frequency = 32000;
95    if ((cng_payload_type_ != -1) &&
96        (cng_payload_type_ != cng_swb_payload_type_))
97      *cng_payload_type_has_changed = true;
98    cng_payload_type_ = cng_swb_payload_type_;
99    return true;
100  } else if (cng_fb_payload_type_ == payload_type) {
101    *frequency = 48000;
102    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
103      *cng_payload_type_has_changed = true;
104    cng_payload_type_ = cng_fb_payload_type_;
105    return true;
106  } else {
107    //  not CNG
108    if (g722_payload_type_ == payload_type) {
109      last_received_g722_ = true;
110    } else {
111      last_received_g722_ = false;
112    }
113  }
114  return false;
115}
116
117bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
118  // Don't do this for DTMF packets, otherwise it's fine.
119  return !TelephoneEventPayloadType(payload_type);
120}
121
122// -   Sample based or frame based codecs based on RFC 3551
123// -
124// -   NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
125// -   The correct rate is 4 bits/sample.
126// -
127// -   name of                              sampling              default
128// -   encoding  sample/frame  bits/sample      rate  ms/frame  ms/packet
129// -
130// -   Sample based audio codecs
131// -   DVI4      sample        4                var.                   20
132// -   G722      sample        4              16,000                   20
133// -   G726-40   sample        5               8,000                   20
134// -   G726-32   sample        4               8,000                   20
135// -   G726-24   sample        3               8,000                   20
136// -   G726-16   sample        2               8,000                   20
137// -   L8        sample        8                var.                   20
138// -   L16       sample        16               var.                   20
139// -   PCMA      sample        8                var.                   20
140// -   PCMU      sample        8                var.                   20
141// -
142// -   Frame based audio codecs
143// -   G723      frame         N/A             8,000        30         30
144// -   G728      frame         N/A             8,000       2.5         20
145// -   G729      frame         N/A             8,000        10         20
146// -   G729D     frame         N/A             8,000        10         20
147// -   G729E     frame         N/A             8,000        10         20
148// -   GSM       frame         N/A             8,000        20         20
149// -   GSM-EFR   frame         N/A             8,000        20         20
150// -   LPC       frame         N/A             8,000        20         20
151// -   MPA       frame         N/A              var.      var.
152// -
153// -   G7221     frame         N/A
154int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
155    const char payload_name[RTP_PAYLOAD_NAME_SIZE],
156    int8_t payload_type,
157    uint32_t frequency) {
158  CriticalSectionScoped lock(crit_sect_.get());
159
160  if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
161    telephone_event_payload_type_ = payload_type;
162  }
163  if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
164    //  we can have three CNG on 8000Hz, 16000Hz and 32000Hz
165    if (frequency == 8000) {
166      cng_nb_payload_type_ = payload_type;
167    } else if (frequency == 16000) {
168      cng_wb_payload_type_ = payload_type;
169    } else if (frequency == 32000) {
170      cng_swb_payload_type_ = payload_type;
171    } else if (frequency == 48000) {
172      cng_fb_payload_type_ = payload_type;
173    } else {
174      assert(false);
175      return -1;
176    }
177  }
178  return 0;
179}
180
181int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
182                                         const PayloadUnion& specific_payload,
183                                         bool is_red,
184                                         const uint8_t* payload,
185                                         size_t payload_length,
186                                         int64_t timestamp_ms,
187                                         bool is_first_packet) {
188  TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
189               "seqnum", rtp_header->header.sequenceNumber, "timestamp",
190               rtp_header->header.timestamp);
191  rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
192  num_energy_ = rtp_header->type.Audio.numEnergy;
193  if (rtp_header->type.Audio.numEnergy > 0 &&
194      rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
195    memcpy(current_remote_energy_,
196           rtp_header->type.Audio.arrOfEnergy,
197           rtp_header->type.Audio.numEnergy);
198  }
199
200  return ParseAudioCodecSpecific(rtp_header,
201                                 payload,
202                                 payload_length,
203                                 specific_payload.Audio,
204                                 is_red);
205}
206
207int RTPReceiverAudio::GetPayloadTypeFrequency() const {
208  CriticalSectionScoped lock(crit_sect_.get());
209  if (last_received_g722_) {
210    return 8000;
211  }
212  return last_received_frequency_;
213}
214
215RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
216    uint16_t last_payload_length) const {
217
218  // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
219  // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
220  if (last_payload_length < 10) {  // our CNG is 9 bytes
221    return kRtpNoRtp;
222  } else {
223    return kRtpDead;
224  }
225}
226
227void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
228                                           PayloadUnion* specific_payload,
229                                           bool* should_discard_changes) {
230  *should_discard_changes = false;
231
232  if (TelephoneEventPayloadType(payload_type)) {
233    // Don't do callbacks for DTMF packets.
234    *should_discard_changes = true;
235    return;
236  }
237  // frequency is updated for CNG
238  bool cng_payload_type_has_changed = false;
239  bool is_cng_payload_type = CNGPayloadType(payload_type,
240                                            &specific_payload->Audio.frequency,
241                                            &cng_payload_type_has_changed);
242
243  if (is_cng_payload_type) {
244    // Don't do callbacks for DTMF packets.
245    *should_discard_changes = true;
246    return;
247  }
248}
249
250int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
251  CriticalSectionScoped cs(crit_sect_.get());
252
253  assert(num_energy_ <= kRtpCsrcSize);
254
255  if (num_energy_ > 0) {
256    memcpy(array_of_energy, current_remote_energy_,
257           sizeof(uint8_t) * num_energy_);
258  }
259  return num_energy_;
260}
261
262int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
263    RtpFeedback* callback,
264    int8_t payload_type,
265    const char payload_name[RTP_PAYLOAD_NAME_SIZE],
266    const PayloadUnion& specific_payload) const {
267  if (-1 ==
268      callback->OnInitializeDecoder(
269          payload_type, payload_name, specific_payload.Audio.frequency,
270          specific_payload.Audio.channels, specific_payload.Audio.rate)) {
271    LOG(LS_ERROR) << "Failed to create decoder for payload type: "
272                  << payload_name << "/" << static_cast<int>(payload_type);
273    return -1;
274  }
275  return 0;
276}
277
278// We are not allowed to have any critsects when calling data_callback.
279int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
280    WebRtcRTPHeader* rtp_header,
281    const uint8_t* payload_data,
282    size_t payload_length,
283    const AudioPayload& audio_specific,
284    bool is_red) {
285
286  if (payload_length == 0) {
287    return 0;
288  }
289
290  bool telephone_event_packet =
291      TelephoneEventPayloadType(rtp_header->header.payloadType);
292  if (telephone_event_packet) {
293    CriticalSectionScoped lock(crit_sect_.get());
294
295    // RFC 4733 2.3
296    // 0                   1                   2                   3
297    // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
298    // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
299    // |     event     |E|R| volume    |          duration             |
300    // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
301    //
302    if (payload_length % 4 != 0) {
303      return -1;
304    }
305    size_t number_of_events = payload_length / 4;
306
307    // sanity
308    if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
309      number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
310    }
311    for (size_t n = 0; n < number_of_events; ++n) {
312      bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
313
314      std::set<uint8_t>::iterator event =
315          telephone_event_reported_.find(payload_data[4 * n]);
316
317      if (event != telephone_event_reported_.end()) {
318        // we have already seen this event
319        if (end) {
320          telephone_event_reported_.erase(payload_data[4 * n]);
321        }
322      } else {
323        if (end) {
324          // don't add if it's a end of a tone
325        } else {
326          telephone_event_reported_.insert(payload_data[4 * n]);
327        }
328      }
329    }
330
331    // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
332    // should not be a problem since we don't care about the duration
333
334    // RFC 4733 See 2.5.1.5. & 2.5.2.4.  Multiple Events in a Packet
335  }
336
337  {
338    CriticalSectionScoped lock(crit_sect_.get());
339
340    if (!telephone_event_packet) {
341      last_received_frequency_ = audio_specific.frequency;
342    }
343
344    // Check if this is a CNG packet, receiver might want to know
345    uint32_t ignored;
346    bool also_ignored;
347    if (CNGPayloadType(rtp_header->header.payloadType,
348                       &ignored,
349                       &also_ignored)) {
350      rtp_header->type.Audio.isCNG = true;
351      rtp_header->frameType = kAudioFrameCN;
352    } else {
353      rtp_header->frameType = kAudioFrameSpeech;
354      rtp_header->type.Audio.isCNG = false;
355    }
356
357    // check if it's a DTMF event, hence something we can playout
358    if (telephone_event_packet) {
359      if (!telephone_event_forward_to_decoder_) {
360        // don't forward event to decoder
361        return 0;
362      }
363      std::set<uint8_t>::iterator first =
364          telephone_event_reported_.begin();
365      if (first != telephone_event_reported_.end() && *first > 15) {
366        // don't forward non DTMF events
367        return 0;
368      }
369    }
370  }
371  // TODO(holmer): Break this out to have RED parsing handled generically.
372  if (is_red && !(payload_data[0] & 0x80)) {
373    // we recive only one frame packed in a RED packet remove the RED wrapper
374    rtp_header->header.payloadType = payload_data[0];
375
376    // only one frame in the RED strip the one byte to help NetEq
377    return data_callback_->OnReceivedPayloadData(
378        payload_data + 1, payload_length - 1, rtp_header);
379  }
380
381  rtp_header->type.Audio.channel = audio_specific.channels;
382  return data_callback_->OnReceivedPayloadData(
383      payload_data, payload_length, rtp_header);
384}
385}  // namespace webrtc
386