currentspeakermonitor.cc revision dee76f3b89b9339699e0321a3afc643ee06afa09
1/* 2 * libjingle 3 * Copyright 2011 Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "talk/session/media/currentspeakermonitor.h" 29 30#include "talk/media/base/streamparams.h" 31#include "talk/session/media/audiomonitor.h" 32#include "webrtc/libjingle/media/mediamessages.h" 33#include "webrtc/base/logging.h" 34 35namespace cricket { 36 37namespace { 38const int kMaxAudioLevel = 9; 39// To avoid overswitching, we disable switching for a period of time after a 40// switch is done. 41const int kDefaultMinTimeBetweenSwitches = 1000; 42} 43 44CurrentSpeakerMonitor::CurrentSpeakerMonitor( 45 AudioSourceContext* audio_source_context, BaseSession* session) 46 : started_(false), 47 audio_source_context_(audio_source_context), 48 session_(session), 49 current_speaker_ssrc_(0), 50 earliest_permitted_switch_time_(0), 51 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) { 52} 53 54CurrentSpeakerMonitor::~CurrentSpeakerMonitor() { 55 Stop(); 56} 57 58void CurrentSpeakerMonitor::Start() { 59 if (!started_) { 60 audio_source_context_->SignalAudioMonitor.connect( 61 this, &CurrentSpeakerMonitor::OnAudioMonitor); 62 audio_source_context_->SignalMediaStreamsUpdate.connect( 63 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate); 64 audio_source_context_->SignalMediaStreamsReset.connect( 65 this, &CurrentSpeakerMonitor::OnMediaStreamsReset); 66 67 started_ = true; 68 } 69} 70 71void CurrentSpeakerMonitor::Stop() { 72 if (started_) { 73 audio_source_context_->SignalAudioMonitor.disconnect(this); 74 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this); 75 76 started_ = false; 77 ssrc_to_speaking_state_map_.clear(); 78 current_speaker_ssrc_ = 0; 79 earliest_permitted_switch_time_ = 0; 80 } 81} 82 83void CurrentSpeakerMonitor::set_min_time_between_switches( 84 uint32 min_time_between_switches) { 85 min_time_between_switches_ = min_time_between_switches; 86} 87 88void CurrentSpeakerMonitor::OnAudioMonitor( 89 AudioSourceContext* audio_source_context, const AudioInfo& info) { 90 std::map<uint32, int> active_ssrc_to_level_map; 91 cricket::AudioInfo::StreamList::const_iterator stream_list_it; 92 for (stream_list_it = info.active_streams.begin(); 93 stream_list_it != info.active_streams.end(); ++stream_list_it) { 94 uint32 ssrc = stream_list_it->first; 95 active_ssrc_to_level_map[ssrc] = stream_list_it->second; 96 97 // It's possible we haven't yet added this source to our map. If so, 98 // add it now with a "not speaking" state. 99 if (ssrc_to_speaking_state_map_.find(ssrc) == 100 ssrc_to_speaking_state_map_.end()) { 101 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING; 102 } 103 } 104 105 int max_level = 0; 106 uint32 loudest_speaker_ssrc = 0; 107 108 // Update the speaking states of all participants based on the new audio 109 // level information. Also retain loudest speaker. 110 std::map<uint32, SpeakingState>::iterator state_it; 111 for (state_it = ssrc_to_speaking_state_map_.begin(); 112 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) { 113 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first; 114 115 // This uses a state machine in order to gradually identify 116 // members as having started or stopped speaking. Matches the 117 // algorithm used by the hangouts js code. 118 119 std::map<uint32, int>::const_iterator level_it = 120 active_ssrc_to_level_map.find(state_it->first); 121 // Note that the stream map only contains streams with non-zero audio 122 // levels. 123 int level = (level_it != active_ssrc_to_level_map.end()) ? 124 level_it->second : 0; 125 switch (state_it->second) { 126 case SS_NOT_SPEAKING: 127 if (level > 0) { 128 // Reset level because we don't think they're really speaking. 129 level = 0; 130 state_it->second = SS_MIGHT_BE_SPEAKING; 131 } else { 132 // State unchanged. 133 } 134 break; 135 case SS_MIGHT_BE_SPEAKING: 136 if (level > 0) { 137 state_it->second = SS_SPEAKING; 138 } else { 139 state_it->second = SS_NOT_SPEAKING; 140 } 141 break; 142 case SS_SPEAKING: 143 if (level > 0) { 144 // State unchanged. 145 } else { 146 state_it->second = SS_WAS_SPEAKING_RECENTLY1; 147 if (is_previous_speaker) { 148 // Assume this is an inter-word silence and assign him the highest 149 // volume. 150 level = kMaxAudioLevel; 151 } 152 } 153 break; 154 case SS_WAS_SPEAKING_RECENTLY1: 155 if (level > 0) { 156 state_it->second = SS_SPEAKING; 157 } else { 158 state_it->second = SS_WAS_SPEAKING_RECENTLY2; 159 if (is_previous_speaker) { 160 // Assume this is an inter-word silence and assign him the highest 161 // volume. 162 level = kMaxAudioLevel; 163 } 164 } 165 break; 166 case SS_WAS_SPEAKING_RECENTLY2: 167 if (level > 0) { 168 state_it->second = SS_SPEAKING; 169 } else { 170 state_it->second = SS_NOT_SPEAKING; 171 } 172 break; 173 } 174 175 if (level > max_level) { 176 loudest_speaker_ssrc = state_it->first; 177 max_level = level; 178 } else if (level > 0 && level == max_level && is_previous_speaker) { 179 // Favor continuity of loudest speakers if audio levels are equal. 180 loudest_speaker_ssrc = state_it->first; 181 } 182 } 183 184 // We avoid over-switching by disabling switching for a period of time after 185 // a switch is done. 186 uint32 now = rtc::Time(); 187 if (earliest_permitted_switch_time_ <= now && 188 current_speaker_ssrc_ != loudest_speaker_ssrc) { 189 current_speaker_ssrc_ = loudest_speaker_ssrc; 190 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_; 191 earliest_permitted_switch_time_ = now + min_time_between_switches_; 192 SignalUpdate(this, current_speaker_ssrc_); 193 } 194} 195 196void CurrentSpeakerMonitor::OnMediaStreamsUpdate( 197 AudioSourceContext* audio_source_context, BaseSession* session, 198 const MediaStreams& added, const MediaStreams& removed) { 199 200 if (audio_source_context == audio_source_context_ && session == session_) { 201 // Update the speaking state map based on added and removed streams. 202 for (std::vector<cricket::StreamParams>::const_iterator 203 it = removed.audio().begin(); it != removed.audio().end(); ++it) { 204 ssrc_to_speaking_state_map_.erase(it->first_ssrc()); 205 } 206 207 for (std::vector<cricket::StreamParams>::const_iterator 208 it = added.audio().begin(); it != added.audio().end(); ++it) { 209 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING; 210 } 211 } 212} 213 214void CurrentSpeakerMonitor::OnMediaStreamsReset( 215 AudioSourceContext* audio_source_context, BaseSession* session) { 216 if (audio_source_context == audio_source_context_ && session == session_) { 217 ssrc_to_speaking_state_map_.clear(); 218 } 219} 220 221} // namespace cricket 222