1/*
2 * libjingle
3 * Copyright 2011 Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28// CurrentSpeakerMonitor monitors the audio levels for a session and determines
29// which participant is currently speaking.
30
31#ifndef TALK_SESSION_MEDIA_CURRENTSPEAKERMONITOR_H_
32#define TALK_SESSION_MEDIA_CURRENTSPEAKERMONITOR_H_
33
34#include <map>
35
36#include "webrtc/base/basictypes.h"
37#include "webrtc/base/sigslot.h"
38
39namespace cricket {
40
41class BaseSession;
42class Session;
43struct AudioInfo;
44struct MediaStreams;
45
46class AudioSourceContext {
47 public:
48  sigslot::signal2<AudioSourceContext*, const cricket::AudioInfo&>
49      SignalAudioMonitor;
50  sigslot::signal2<AudioSourceContext*, cricket::BaseSession*>
51      SignalMediaStreamsReset;
52  sigslot::signal4<AudioSourceContext*, cricket::BaseSession*,
53      const cricket::MediaStreams&, const cricket::MediaStreams&>
54          SignalMediaStreamsUpdate;
55};
56
57// CurrentSpeakerMonitor can be used to monitor the audio-levels from
58// many audio-sources and report on changes in the loudest audio-source.
59// Its a generic type and relies on an AudioSourceContext which is aware of
60// the audio-sources. AudioSourceContext needs to provide two signals namely
61// SignalAudioInfoMonitor - provides audio info of the all current speakers.
62// SignalMediaSourcesUpdated - provides updates when a speaker leaves or joins.
63// Note that the AudioSourceContext's audio monitor must be started
64// before this is started.
65// It's recommended that the audio monitor be started with a 100 ms period.
66class CurrentSpeakerMonitor : public sigslot::has_slots<> {
67 public:
68  CurrentSpeakerMonitor(AudioSourceContext* audio_source_context,
69                        BaseSession* session);
70  ~CurrentSpeakerMonitor();
71
72  BaseSession* session() const { return session_; }
73
74  void Start();
75  void Stop();
76
77  // Used by tests.  Note that the actual minimum time between switches
78  // enforced by the monitor will be the given value plus or minus the
79  // resolution of the system clock.
80  void set_min_time_between_switches(uint32 min_time_between_switches);
81
82  // This is fired when the current speaker changes, and provides his audio
83  // SSRC.  This only fires after the audio monitor on the underlying
84  // AudioSourceContext has been started.
85  sigslot::signal2<CurrentSpeakerMonitor*, uint32> SignalUpdate;
86
87 private:
88  void OnAudioMonitor(AudioSourceContext* audio_source_context,
89                      const AudioInfo& info);
90  void OnMediaStreamsUpdate(AudioSourceContext* audio_source_context,
91                            BaseSession* session,
92                            const MediaStreams& added,
93                            const MediaStreams& removed);
94  void OnMediaStreamsReset(AudioSourceContext* audio_source_context,
95                           BaseSession* session);
96
97  // These are states that a participant will pass through so that we gradually
98  // recognize that they have started and stopped speaking.  This avoids
99  // "twitchiness".
100  enum SpeakingState {
101    SS_NOT_SPEAKING,
102    SS_MIGHT_BE_SPEAKING,
103    SS_SPEAKING,
104    SS_WAS_SPEAKING_RECENTLY1,
105    SS_WAS_SPEAKING_RECENTLY2
106  };
107
108  bool started_;
109  AudioSourceContext* audio_source_context_;
110  BaseSession* session_;
111  std::map<uint32, SpeakingState> ssrc_to_speaking_state_map_;
112  uint32 current_speaker_ssrc_;
113  // To prevent overswitching, switching is disabled for some time after a
114  // switch is made.  This gives us the earliest time a switch is permitted.
115  uint32 earliest_permitted_switch_time_;
116  uint32 min_time_between_switches_;
117};
118
119}
120
121#endif  // TALK_SESSION_MEDIA_CURRENTSPEAKERMONITOR_H_
122