1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
12
13#include <assert.h>  // assert
14#include <math.h>   // pow()
15#include <string.h>  // memcpy()
16
17#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
18#include "webrtc/system_wrappers/interface/logging.h"
19#include "webrtc/system_wrappers/interface/trace_event.h"
20
21namespace webrtc {
22RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23    int32_t id, RtpData* data_callback,
24    RtpAudioFeedback* incoming_messages_callback) {
25  return new RTPReceiverAudio(id, data_callback, incoming_messages_callback);
26}
27
28RTPReceiverAudio::RTPReceiverAudio(const int32_t id,
29                                   RtpData* data_callback,
30                                   RtpAudioFeedback* incoming_messages_callback)
31    : RTPReceiverStrategy(data_callback),
32      TelephoneEventHandler(),
33      id_(id),
34      last_received_frequency_(8000),
35      telephone_event_forward_to_decoder_(false),
36      telephone_event_payload_type_(-1),
37      cng_nb_payload_type_(-1),
38      cng_wb_payload_type_(-1),
39      cng_swb_payload_type_(-1),
40      cng_fb_payload_type_(-1),
41      cng_payload_type_(-1),
42      g722_payload_type_(-1),
43      last_received_g722_(false),
44      num_energy_(0),
45      current_remote_energy_(),
46      cb_audio_feedback_(incoming_messages_callback) {
47  last_payload_.Audio.channels = 1;
48  memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
49}
50
51// Outband TelephoneEvent(DTMF) detection
52void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
53    bool forward_to_decoder) {
54  CriticalSectionScoped lock(crit_sect_.get());
55  telephone_event_forward_to_decoder_ = forward_to_decoder;
56}
57
58// Is forwarding of outband telephone events turned on/off?
59bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
60  CriticalSectionScoped lock(crit_sect_.get());
61  return telephone_event_forward_to_decoder_;
62}
63
64bool RTPReceiverAudio::TelephoneEventPayloadType(
65    int8_t payload_type) const {
66  CriticalSectionScoped lock(crit_sect_.get());
67  return (telephone_event_payload_type_ == payload_type) ? true : false;
68}
69
70bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
71                                      uint32_t* frequency,
72                                      bool* cng_payload_type_has_changed) {
73  CriticalSectionScoped lock(crit_sect_.get());
74  *cng_payload_type_has_changed = false;
75
76  //  We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
77  if (cng_nb_payload_type_ == payload_type) {
78    *frequency = 8000;
79    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
80      *cng_payload_type_has_changed = true;
81
82    cng_payload_type_ = cng_nb_payload_type_;
83    return true;
84  } else if (cng_wb_payload_type_ == payload_type) {
85    // if last received codec is G.722 we must use frequency 8000
86    if (last_received_g722_) {
87      *frequency = 8000;
88    } else {
89      *frequency = 16000;
90    }
91    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
92      *cng_payload_type_has_changed = true;
93    cng_payload_type_ = cng_wb_payload_type_;
94    return true;
95  } else if (cng_swb_payload_type_ == payload_type) {
96    *frequency = 32000;
97    if ((cng_payload_type_ != -1) &&
98        (cng_payload_type_ != cng_swb_payload_type_))
99      *cng_payload_type_has_changed = true;
100    cng_payload_type_ = cng_swb_payload_type_;
101    return true;
102  } else if (cng_fb_payload_type_ == payload_type) {
103    *frequency = 48000;
104    if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
105      *cng_payload_type_has_changed = true;
106    cng_payload_type_ = cng_fb_payload_type_;
107    return true;
108  } else {
109    //  not CNG
110    if (g722_payload_type_ == payload_type) {
111      last_received_g722_ = true;
112    } else {
113      last_received_g722_ = false;
114    }
115  }
116  return false;
117}
118
119bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
120  // Don't do this for DTMF packets, otherwise it's fine.
121  return !TelephoneEventPayloadType(payload_type);
122}
123
124// -   Sample based or frame based codecs based on RFC 3551
125// -
126// -   NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
127// -   The correct rate is 4 bits/sample.
128// -
129// -   name of                              sampling              default
130// -   encoding  sample/frame  bits/sample      rate  ms/frame  ms/packet
131// -
132// -   Sample based audio codecs
133// -   DVI4      sample        4                var.                   20
134// -   G722      sample        4              16,000                   20
135// -   G726-40   sample        5               8,000                   20
136// -   G726-32   sample        4               8,000                   20
137// -   G726-24   sample        3               8,000                   20
138// -   G726-16   sample        2               8,000                   20
139// -   L8        sample        8                var.                   20
140// -   L16       sample        16               var.                   20
141// -   PCMA      sample        8                var.                   20
142// -   PCMU      sample        8                var.                   20
143// -
144// -   Frame based audio codecs
145// -   G723      frame         N/A             8,000        30         30
146// -   G728      frame         N/A             8,000       2.5         20
147// -   G729      frame         N/A             8,000        10         20
148// -   G729D     frame         N/A             8,000        10         20
149// -   G729E     frame         N/A             8,000        10         20
150// -   GSM       frame         N/A             8,000        20         20
151// -   GSM-EFR   frame         N/A             8,000        20         20
152// -   LPC       frame         N/A             8,000        20         20
153// -   MPA       frame         N/A              var.      var.
154// -
155// -   G7221     frame         N/A
156int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
157    const char payload_name[RTP_PAYLOAD_NAME_SIZE],
158    int8_t payload_type,
159    uint32_t frequency) {
160  CriticalSectionScoped lock(crit_sect_.get());
161
162  if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
163    telephone_event_payload_type_ = payload_type;
164  }
165  if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
166    //  we can have three CNG on 8000Hz, 16000Hz and 32000Hz
167    if (frequency == 8000) {
168      cng_nb_payload_type_ = payload_type;
169    } else if (frequency == 16000) {
170      cng_wb_payload_type_ = payload_type;
171    } else if (frequency == 32000) {
172      cng_swb_payload_type_ = payload_type;
173    } else if (frequency == 48000) {
174      cng_fb_payload_type_ = payload_type;
175    } else {
176      assert(false);
177      return -1;
178    }
179  }
180  return 0;
181}
182
183int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
184                                         const PayloadUnion& specific_payload,
185                                         bool is_red,
186                                         const uint8_t* payload,
187                                         uint16_t payload_length,
188                                         int64_t timestamp_ms,
189                                         bool is_first_packet) {
190  TRACE_EVENT2("webrtc_rtp", "Audio::ParseRtp",
191               "seqnum", rtp_header->header.sequenceNumber,
192               "timestamp", rtp_header->header.timestamp);
193  rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
194  num_energy_ = rtp_header->type.Audio.numEnergy;
195  if (rtp_header->type.Audio.numEnergy > 0 &&
196      rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
197    memcpy(current_remote_energy_,
198           rtp_header->type.Audio.arrOfEnergy,
199           rtp_header->type.Audio.numEnergy);
200  }
201
202  return ParseAudioCodecSpecific(rtp_header,
203                                 payload,
204                                 payload_length,
205                                 specific_payload.Audio,
206                                 is_red);
207}
208
209int RTPReceiverAudio::GetPayloadTypeFrequency() const {
210  CriticalSectionScoped lock(crit_sect_.get());
211  if (last_received_g722_) {
212    return 8000;
213  }
214  return last_received_frequency_;
215}
216
217RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
218    uint16_t last_payload_length) const {
219
220  // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
221  // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
222  if (last_payload_length < 10) {  // our CNG is 9 bytes
223    return kRtpNoRtp;
224  } else {
225    return kRtpDead;
226  }
227}
228
229void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
230                                           PayloadUnion* specific_payload,
231                                           bool* should_reset_statistics,
232                                           bool* should_discard_changes) {
233  *should_discard_changes = false;
234  *should_reset_statistics = false;
235
236  if (TelephoneEventPayloadType(payload_type)) {
237    // Don't do callbacks for DTMF packets.
238    *should_discard_changes = true;
239    return;
240  }
241  // frequency is updated for CNG
242  bool cng_payload_type_has_changed = false;
243  bool is_cng_payload_type = CNGPayloadType(payload_type,
244                                            &specific_payload->Audio.frequency,
245                                            &cng_payload_type_has_changed);
246
247  *should_reset_statistics = cng_payload_type_has_changed;
248
249  if (is_cng_payload_type) {
250    // Don't do callbacks for DTMF packets.
251    *should_discard_changes = true;
252    return;
253  }
254}
255
256int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
257  CriticalSectionScoped cs(crit_sect_.get());
258
259  assert(num_energy_ <= kRtpCsrcSize);
260
261  if (num_energy_ > 0) {
262    memcpy(array_of_energy, current_remote_energy_,
263           sizeof(uint8_t) * num_energy_);
264  }
265  return num_energy_;
266}
267
268int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
269    RtpFeedback* callback,
270    int32_t id,
271    int8_t payload_type,
272    const char payload_name[RTP_PAYLOAD_NAME_SIZE],
273    const PayloadUnion& specific_payload) const {
274  if (-1 == callback->OnInitializeDecoder(id,
275                                          payload_type,
276                                          payload_name,
277                                          specific_payload.Audio.frequency,
278                                          specific_payload.Audio.channels,
279                                          specific_payload.Audio.rate)) {
280    LOG(LS_ERROR) << "Failed to create decoder for payload type: "
281                  << payload_name << "/" << payload_type;
282    return -1;
283  }
284  return 0;
285}
286
287// We are not allowed to have any critsects when calling data_callback.
288int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
289    WebRtcRTPHeader* rtp_header,
290    const uint8_t* payload_data,
291    uint16_t payload_length,
292    const AudioPayload& audio_specific,
293    bool is_red) {
294
295  if (payload_length == 0) {
296    return 0;
297  }
298
299  bool telephone_event_packet =
300      TelephoneEventPayloadType(rtp_header->header.payloadType);
301  if (telephone_event_packet) {
302    CriticalSectionScoped lock(crit_sect_.get());
303
304    // RFC 4733 2.3
305    // 0                   1                   2                   3
306    // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
307    // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
308    // |     event     |E|R| volume    |          duration             |
309    // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
310    //
311    if (payload_length % 4 != 0) {
312      return -1;
313    }
314    uint8_t number_of_events = payload_length / 4;
315
316    // sanity
317    if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
318      number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
319    }
320    for (int n = 0; n < number_of_events; ++n) {
321      bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
322
323      std::set<uint8_t>::iterator event =
324          telephone_event_reported_.find(payload_data[4 * n]);
325
326      if (event != telephone_event_reported_.end()) {
327        // we have already seen this event
328        if (end) {
329          telephone_event_reported_.erase(payload_data[4 * n]);
330        }
331      } else {
332        if (end) {
333          // don't add if it's a end of a tone
334        } else {
335          telephone_event_reported_.insert(payload_data[4 * n]);
336        }
337      }
338    }
339
340    // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
341    // should not be a problem since we don't care about the duration
342
343    // RFC 4733 See 2.5.1.5. & 2.5.2.4.  Multiple Events in a Packet
344  }
345
346  {
347    CriticalSectionScoped lock(crit_sect_.get());
348
349    if (!telephone_event_packet) {
350      last_received_frequency_ = audio_specific.frequency;
351    }
352
353    // Check if this is a CNG packet, receiver might want to know
354    uint32_t ignored;
355    bool also_ignored;
356    if (CNGPayloadType(rtp_header->header.payloadType,
357                       &ignored,
358                       &also_ignored)) {
359      rtp_header->type.Audio.isCNG = true;
360      rtp_header->frameType = kAudioFrameCN;
361    } else {
362      rtp_header->frameType = kAudioFrameSpeech;
363      rtp_header->type.Audio.isCNG = false;
364    }
365
366    // check if it's a DTMF event, hence something we can playout
367    if (telephone_event_packet) {
368      if (!telephone_event_forward_to_decoder_) {
369        // don't forward event to decoder
370        return 0;
371      }
372      std::set<uint8_t>::iterator first =
373          telephone_event_reported_.begin();
374      if (first != telephone_event_reported_.end() && *first > 15) {
375        // don't forward non DTMF events
376        return 0;
377      }
378    }
379  }
380  // TODO(holmer): Break this out to have RED parsing handled generically.
381  if (is_red && !(payload_data[0] & 0x80)) {
382    // we recive only one frame packed in a RED packet remove the RED wrapper
383    rtp_header->header.payloadType = payload_data[0];
384
385    // only one frame in the RED strip the one byte to help NetEq
386    return data_callback_->OnReceivedPayloadData(
387        payload_data + 1, payload_length - 1, rtp_header);
388  }
389
390  rtp_header->type.Audio.channel = audio_specific.channels;
391  return data_callback_->OnReceivedPayloadData(
392      payload_data, payload_length, rtp_header);
393}
394}  // namespace webrtc
395