1/*
2 * libjingle
3 * Copyright 2011 Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/session/media/currentspeakermonitor.h"
29
30#include "talk/media/base/streamparams.h"
31#include "talk/session/media/audiomonitor.h"
32#include "webrtc/base/logging.h"
33
34namespace cricket {
35
36namespace {
37const int kMaxAudioLevel = 9;
38// To avoid overswitching, we disable switching for a period of time after a
39// switch is done.
40const int kDefaultMinTimeBetweenSwitches = 1000;
41}
42
43CurrentSpeakerMonitor::CurrentSpeakerMonitor(
44    AudioSourceContext* audio_source_context)
45    : started_(false),
46      audio_source_context_(audio_source_context),
47      current_speaker_ssrc_(0),
48      earliest_permitted_switch_time_(0),
49      min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {}
50
51CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
52  Stop();
53}
54
55void CurrentSpeakerMonitor::Start() {
56  if (!started_) {
57    audio_source_context_->SignalAudioMonitor.connect(
58        this, &CurrentSpeakerMonitor::OnAudioMonitor);
59    audio_source_context_->SignalMediaStreamsUpdate.connect(
60        this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
61    audio_source_context_->SignalMediaStreamsReset.connect(
62        this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
63
64    started_ = true;
65  }
66}
67
68void CurrentSpeakerMonitor::Stop() {
69  if (started_) {
70    audio_source_context_->SignalAudioMonitor.disconnect(this);
71    audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
72
73    started_ = false;
74    ssrc_to_speaking_state_map_.clear();
75    current_speaker_ssrc_ = 0;
76    earliest_permitted_switch_time_ = 0;
77  }
78}
79
80void CurrentSpeakerMonitor::set_min_time_between_switches(
81    uint32_t min_time_between_switches) {
82  min_time_between_switches_ = min_time_between_switches;
83}
84
85void CurrentSpeakerMonitor::OnAudioMonitor(
86    AudioSourceContext* audio_source_context, const AudioInfo& info) {
87  std::map<uint32_t, int> active_ssrc_to_level_map;
88  cricket::AudioInfo::StreamList::const_iterator stream_list_it;
89  for (stream_list_it = info.active_streams.begin();
90       stream_list_it != info.active_streams.end(); ++stream_list_it) {
91    uint32_t ssrc = stream_list_it->first;
92    active_ssrc_to_level_map[ssrc] = stream_list_it->second;
93
94    // It's possible we haven't yet added this source to our map.  If so,
95    // add it now with a "not speaking" state.
96    if (ssrc_to_speaking_state_map_.find(ssrc) ==
97        ssrc_to_speaking_state_map_.end()) {
98      ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
99    }
100  }
101
102  int max_level = 0;
103  uint32_t loudest_speaker_ssrc = 0;
104
105  // Update the speaking states of all participants based on the new audio
106  // level information.  Also retain loudest speaker.
107  std::map<uint32_t, SpeakingState>::iterator state_it;
108  for (state_it = ssrc_to_speaking_state_map_.begin();
109       state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
110    bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
111
112    // This uses a state machine in order to gradually identify
113    // members as having started or stopped speaking. Matches the
114    // algorithm used by the hangouts js code.
115
116    std::map<uint32_t, int>::const_iterator level_it =
117        active_ssrc_to_level_map.find(state_it->first);
118    // Note that the stream map only contains streams with non-zero audio
119    // levels.
120    int level = (level_it != active_ssrc_to_level_map.end()) ?
121        level_it->second : 0;
122    switch (state_it->second) {
123      case SS_NOT_SPEAKING:
124        if (level > 0) {
125          // Reset level because we don't think they're really speaking.
126          level = 0;
127          state_it->second = SS_MIGHT_BE_SPEAKING;
128        } else {
129          // State unchanged.
130        }
131        break;
132      case SS_MIGHT_BE_SPEAKING:
133        if (level > 0) {
134          state_it->second = SS_SPEAKING;
135        } else {
136          state_it->second = SS_NOT_SPEAKING;
137        }
138        break;
139      case SS_SPEAKING:
140        if (level > 0) {
141          // State unchanged.
142        } else {
143          state_it->second = SS_WAS_SPEAKING_RECENTLY1;
144          if (is_previous_speaker) {
145            // Assume this is an inter-word silence and assign him the highest
146            // volume.
147            level = kMaxAudioLevel;
148          }
149        }
150        break;
151      case SS_WAS_SPEAKING_RECENTLY1:
152        if (level > 0) {
153          state_it->second = SS_SPEAKING;
154        } else {
155          state_it->second = SS_WAS_SPEAKING_RECENTLY2;
156          if (is_previous_speaker) {
157            // Assume this is an inter-word silence and assign him the highest
158            // volume.
159            level = kMaxAudioLevel;
160          }
161        }
162        break;
163      case SS_WAS_SPEAKING_RECENTLY2:
164        if (level > 0) {
165          state_it->second = SS_SPEAKING;
166        } else {
167          state_it->second = SS_NOT_SPEAKING;
168        }
169        break;
170    }
171
172    if (level > max_level) {
173      loudest_speaker_ssrc = state_it->first;
174      max_level = level;
175    } else if (level > 0 && level == max_level && is_previous_speaker) {
176      // Favor continuity of loudest speakers if audio levels are equal.
177      loudest_speaker_ssrc = state_it->first;
178    }
179  }
180
181  // We avoid over-switching by disabling switching for a period of time after
182  // a switch is done.
183  uint32_t now = rtc::Time();
184  if (earliest_permitted_switch_time_ <= now &&
185      current_speaker_ssrc_ != loudest_speaker_ssrc) {
186    current_speaker_ssrc_ = loudest_speaker_ssrc;
187    LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
188    earliest_permitted_switch_time_ = now + min_time_between_switches_;
189    SignalUpdate(this, current_speaker_ssrc_);
190  }
191}
192
193void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
194    AudioSourceContext* audio_source_context,
195    const MediaStreams& added,
196    const MediaStreams& removed) {
197  if (audio_source_context == audio_source_context_) {
198    // Update the speaking state map based on added and removed streams.
199    for (std::vector<cricket::StreamParams>::const_iterator
200           it = removed.audio().begin(); it != removed.audio().end(); ++it) {
201      ssrc_to_speaking_state_map_.erase(it->first_ssrc());
202    }
203
204    for (std::vector<cricket::StreamParams>::const_iterator
205           it = added.audio().begin(); it != added.audio().end(); ++it) {
206      ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
207    }
208  }
209}
210
211void CurrentSpeakerMonitor::OnMediaStreamsReset(
212    AudioSourceContext* audio_source_context) {
213  if (audio_source_context == audio_source_context_) {
214    ssrc_to_speaking_state_map_.clear();
215  }
216}
217
218}  // namespace cricket
219