1// libjingle
2// Copyright 2010 Google Inc.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are met:
6//
7//  1. Redistributions of source code must retain the above copyright notice,
8//     this list of conditions and the following disclaimer.
9//  2. Redistributions in binary form must reproduce the above copyright notice,
10//     this list of conditions and the following disclaimer in the documentation
11//     and/or other materials provided with the distribution.
12//  3. The name of the author may not be used to endorse or promote products
13//     derived from this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25//
26// Declaration of abstract class VideoCapturer
27
28#ifndef TALK_MEDIA_BASE_VIDEOCAPTURER_H_
29#define TALK_MEDIA_BASE_VIDEOCAPTURER_H_
30
31#include <string>
32#include <vector>
33
34#include "talk/media/base/mediachannel.h"
35#include "talk/media/base/videoadapter.h"
36#include "talk/media/base/videocommon.h"
37#include "talk/media/base/videoframefactory.h"
38#include "talk/media/devices/devicemanager.h"
39#include "webrtc/base/basictypes.h"
40#include "webrtc/base/criticalsection.h"
41#include "webrtc/base/messagehandler.h"
42#include "webrtc/base/rollingaccumulator.h"
43#include "webrtc/base/scoped_ptr.h"
44#include "webrtc/base/sigslot.h"
45#include "webrtc/base/thread.h"
46#include "webrtc/base/timing.h"
47
48
49namespace cricket {
50
51class VideoProcessor;
52
53// Current state of the capturer.
54// TODO(hellner): CS_NO_DEVICE is an error code not a capture state. Separate
55//                error codes and states.
56enum CaptureState {
57  CS_STOPPED,    // The capturer has been stopped or hasn't started yet.
58  CS_STARTING,   // The capturer is in the process of starting. Note, it may
59                 // still fail to start.
60  CS_RUNNING,    // The capturer has been started successfully and is now
61                 // capturing.
62  CS_PAUSED,     // The capturer has been paused.
63  CS_FAILED,     // The capturer failed to start.
64  CS_NO_DEVICE,  // The capturer has no device and consequently failed to start.
65};
66
67class VideoFrame;
68
69struct CapturedFrame {
70  static const uint32 kFrameHeaderSize = 40;  // Size from width to data_size.
71  static const uint32 kUnknownDataSize = 0xFFFFFFFF;
72
73  CapturedFrame();
74
75  // Get the number of bytes of the frame data. If data_size is known, return
76  // it directly. Otherwise, calculate the size based on width, height, and
77  // fourcc. Return true if succeeded.
78  bool GetDataSize(uint32* size) const;
79
80  // The width and height of the captured frame could be different from those
81  // of VideoFormat. Once the first frame is captured, the width, height,
82  // fourcc, pixel_width, and pixel_height should keep the same over frames.
83  int    width;         // in number of pixels
84  int    height;        // in number of pixels
85  uint32 fourcc;        // compression
86  uint32 pixel_width;   // width of a pixel, default is 1
87  uint32 pixel_height;  // height of a pixel, default is 1
88  int64  elapsed_time;  // elapsed time since the creation of the frame
89                        // source (that is, the camera), in nanoseconds.
90  int64  time_stamp;    // timestamp of when the frame was captured, in unix
91                        // time with nanosecond units.
92  uint32 data_size;     // number of bytes of the frame data
93  int    rotation;      // rotation in degrees of the frame (0, 90, 180, 270)
94  void*  data;          // pointer to the frame data. This object allocates the
95                        // memory or points to an existing memory.
96
97 private:
98  DISALLOW_COPY_AND_ASSIGN(CapturedFrame);
99};
100
101// VideoCapturer is an abstract class that defines the interfaces for video
102// capturing. The subclasses implement the video capturer for various types of
103// capturers and various platforms.
104//
105// The captured frames may need to be adapted (for example, cropping).
106// Video adaptation is built into and enabled by default. After a frame has
107// been captured from the device, it is sent to the video adapter, then video
108// processors, then out to the encoder.
109//
110// Programming model:
111//   Create an object of a subclass of VideoCapturer
112//   Initialize
113//   SignalStateChange.connect()
114//   SignalFrameCaptured.connect()
115//   Find the capture format for Start() by either calling GetSupportedFormats()
116//   and selecting one of the supported or calling GetBestCaptureFormat().
117//   video_adapter()->OnOutputFormatRequest(desired_encoding_format)
118//   Start()
119//   GetCaptureFormat() optionally
120//   Stop()
121//
122// Assumption:
123//   The Start() and Stop() methods are called by a single thread (E.g., the
124//   media engine thread). Hence, the VideoCapture subclasses dont need to be
125//   thread safe.
126//
127class VideoCapturer
128    : public sigslot::has_slots<>,
129      public rtc::MessageHandler {
130 public:
131  typedef std::vector<VideoProcessor*> VideoProcessors;
132
133  // All signals are marshalled to |thread| or the creating thread if
134  // none is provided.
135  VideoCapturer();
136  explicit VideoCapturer(rtc::Thread* thread);
137  virtual ~VideoCapturer() {}
138
139  // Gets the id of the underlying device, which is available after the capturer
140  // is initialized. Can be used to determine if two capturers reference the
141  // same device.
142  const std::string& GetId() const { return id_; }
143
144  // Get the capture formats supported by the video capturer. The supported
145  // formats are non empty after the device has been opened successfully.
146  const std::vector<VideoFormat>* GetSupportedFormats() const;
147
148  // Get the best capture format for the desired format. The best format is the
149  // same as one of the supported formats except that the frame interval may be
150  // different. If the application asks for 16x9 and the camera does not support
151  // 16x9 HD or the application asks for 16x10, we find the closest 4x3 and then
152  // crop; Otherwise, we find what the application asks for. Note that we assume
153  // that for HD, the desired format is always 16x9. The subclasses can override
154  // the default implementation.
155  // Parameters
156  //   desired: the input desired format. If desired.fourcc is not kAnyFourcc,
157  //            the best capture format has the exactly same fourcc. Otherwise,
158  //            the best capture format uses a fourcc in GetPreferredFourccs().
159  //   best_format: the output of the best capture format.
160  // Return false if there is no such a best format, that is, the desired format
161  // is not supported.
162  virtual bool GetBestCaptureFormat(const VideoFormat& desired,
163                                    VideoFormat* best_format);
164
165  // TODO(hellner): deprecate (make private) the Start API in favor of this one.
166  //                Also remove CS_STARTING as it is implied by the return
167  //                value of StartCapturing().
168  bool StartCapturing(const VideoFormat& capture_format);
169  // Start the video capturer with the specified capture format.
170  // Parameter
171  //   capture_format: The caller got this parameter by either calling
172  //                   GetSupportedFormats() and selecting one of the supported
173  //                   or calling GetBestCaptureFormat().
174  // Return
175  //   CS_STARTING:  The capturer is trying to start. Success or failure will
176  //                 be notified via the |SignalStateChange| callback.
177  //   CS_RUNNING:   if the capturer is started and capturing.
178  //   CS_PAUSED:    Will never be returned.
179  //   CS_FAILED:    if the capturer failes to start..
180  //   CS_NO_DEVICE: if the capturer has no device and fails to start.
181  virtual CaptureState Start(const VideoFormat& capture_format) = 0;
182  // Sets the desired aspect ratio. If the capturer is capturing at another
183  // aspect ratio it will crop the width or the height so that asked for
184  // aspect ratio is acheived. Note that ratio_w and ratio_h do not need to be
185  // relatively prime.
186  void UpdateAspectRatio(int ratio_w, int ratio_h);
187  void ClearAspectRatio();
188
189  // Get the current capture format, which is set by the Start() call.
190  // Note that the width and height of the captured frames may differ from the
191  // capture format. For example, the capture format is HD but the captured
192  // frames may be smaller than HD.
193  const VideoFormat* GetCaptureFormat() const {
194    return capture_format_.get();
195  }
196
197  // Pause the video capturer.
198  virtual bool Pause(bool paused);
199  // Stop the video capturer.
200  virtual void Stop() = 0;
201  // Check if the video capturer is running.
202  virtual bool IsRunning() = 0;
203  // Restart the video capturer with the new |capture_format|.
204  // Default implementation stops and starts the capturer.
205  virtual bool Restart(const VideoFormat& capture_format);
206  // TODO(thorcarpenter): This behavior of keeping the camera open just to emit
207  // black frames is a total hack and should be fixed.
208  // When muting, produce black frames then pause the camera.
209  // When unmuting, start the camera. Camera starts unmuted.
210  virtual bool MuteToBlackThenPause(bool muted);
211  virtual bool IsMuted() const {
212    return muted_;
213  }
214  CaptureState capture_state() const {
215    return capture_state_;
216  }
217
218  // Adds a video processor that will be applied on VideoFrames returned by
219  // |SignalVideoFrame|. Multiple video processors can be added. The video
220  // processors will be applied in the order they were added.
221  void AddVideoProcessor(VideoProcessor* video_processor);
222  // Removes the |video_processor| from the list of video processors or
223  // returns false.
224  bool RemoveVideoProcessor(VideoProcessor* video_processor);
225
226  // Returns true if the capturer is screencasting. This can be used to
227  // implement screencast specific behavior.
228  virtual bool IsScreencast() const = 0;
229
230  // Caps the VideoCapturer's format according to max_format. It can e.g. be
231  // used to prevent cameras from capturing at a resolution or framerate that
232  // the capturer is capable of but not performing satisfactorily at.
233  // The capping is an upper bound for each component of the capturing format.
234  // The fourcc component is ignored.
235  void ConstrainSupportedFormats(const VideoFormat& max_format);
236
237  void set_enable_camera_list(bool enable_camera_list) {
238    enable_camera_list_ = enable_camera_list;
239  }
240  bool enable_camera_list() {
241    return enable_camera_list_;
242  }
243
244  // Enable scaling to ensure square pixels.
245  void set_square_pixel_aspect_ratio(bool square_pixel_aspect_ratio) {
246    square_pixel_aspect_ratio_ = square_pixel_aspect_ratio;
247  }
248  bool square_pixel_aspect_ratio() {
249    return square_pixel_aspect_ratio_;
250  }
251
252  // Signal all capture state changes that are not a direct result of calling
253  // Start().
254  sigslot::signal2<VideoCapturer*, CaptureState> SignalStateChange;
255  // Frame callbacks are multithreaded to allow disconnect and connect to be
256  // called concurrently. It also ensures that it is safe to call disconnect
257  // at any time which is needed since the signal may be called from an
258  // unmarshalled thread owned by the VideoCapturer.
259  // Signal the captured frame to downstream.
260  sigslot::signal2<VideoCapturer*, const CapturedFrame*,
261                   sigslot::multi_threaded_local> SignalFrameCaptured;
262  // Signal the captured and possibly adapted frame to downstream consumers
263  // such as the encoder.
264  sigslot::signal2<VideoCapturer*, const VideoFrame*,
265                   sigslot::multi_threaded_local> SignalVideoFrame;
266
267  const VideoProcessors& video_processors() const { return video_processors_; }
268
269  // If 'screencast_max_pixels' is set greater than zero, screencasts will be
270  // scaled to be no larger than this value.
271  // If set to zero, the max pixels will be limited to
272  // Retina MacBookPro 15" resolution of 2880 x 1800.
273  // For high fps, maximum pixels limit is set based on common 24" monitor
274  // resolution of 2048 x 1280.
275  int screencast_max_pixels() const { return screencast_max_pixels_; }
276  void set_screencast_max_pixels(int p) {
277    screencast_max_pixels_ = rtc::_max(0, p);
278  }
279
280  // If true, run video adaptation. By default, video adaptation is enabled
281  // and users must call video_adapter()->OnOutputFormatRequest()
282  // to receive frames.
283  bool enable_video_adapter() const { return enable_video_adapter_; }
284  void set_enable_video_adapter(bool enable_video_adapter) {
285    enable_video_adapter_ = enable_video_adapter;
286  }
287
288  CoordinatedVideoAdapter* video_adapter() { return &video_adapter_; }
289  const CoordinatedVideoAdapter* video_adapter() const {
290    return &video_adapter_;
291  }
292
293  // Takes ownership.
294  void set_frame_factory(VideoFrameFactory* frame_factory) {
295    frame_factory_.reset(frame_factory);
296  }
297
298  // Gets statistics for tracked variables recorded since the last call to
299  // GetStats.  Note that calling GetStats resets any gathered data so it
300  // should be called only periodically to log statistics.
301  void GetStats(VariableInfo<int>* adapt_drop_stats,
302                VariableInfo<int>* effect_drop_stats,
303                VariableInfo<double>* frame_time_stats,
304                VideoFormat* last_captured_frame_format);
305
306 protected:
307  // Callback attached to SignalFrameCaptured where SignalVideoFrames is called.
308  void OnFrameCaptured(VideoCapturer* video_capturer,
309                       const CapturedFrame* captured_frame);
310  void SetCaptureState(CaptureState state);
311
312  // Marshals SignalStateChange onto thread_.
313  void OnMessage(rtc::Message* message);
314
315  // subclasses override this virtual method to provide a vector of fourccs, in
316  // order of preference, that are expected by the media engine.
317  virtual bool GetPreferredFourccs(std::vector<uint32>* fourccs) = 0;
318
319  // mutators to set private attributes
320  void SetId(const std::string& id) {
321    id_ = id;
322  }
323
324  void SetCaptureFormat(const VideoFormat* format) {
325    capture_format_.reset(format ? new VideoFormat(*format) : NULL);
326    if (capture_format_) {
327      ASSERT(capture_format_->interval > 0 &&
328             "Capture format expected to have positive interval.");
329      // Video adapter really only cares about capture format interval.
330      video_adapter_.SetInputFormat(*capture_format_);
331    }
332  }
333
334  void SetSupportedFormats(const std::vector<VideoFormat>& formats);
335  VideoFrameFactory* frame_factory() { return frame_factory_.get(); }
336
337 private:
338  void Construct();
339  // Get the distance between the desired format and the supported format.
340  // Return the max distance if they mismatch. See the implementation for
341  // details.
342  int64 GetFormatDistance(const VideoFormat& desired,
343                          const VideoFormat& supported);
344
345  // Convert captured frame to readable string for LOG messages.
346  std::string ToString(const CapturedFrame* frame) const;
347
348  // Applies all registered processors. If any of the processors signal that
349  // the frame should be dropped the return value will be false. Note that
350  // this frame should be dropped as it has not applied all processors.
351  bool ApplyProcessors(VideoFrame* video_frame);
352
353  // Updates filtered_supported_formats_ so that it contains the formats in
354  // supported_formats_ that fulfill all applied restrictions.
355  void UpdateFilteredSupportedFormats();
356  // Returns true if format doesn't fulfill all applied restrictions.
357  bool ShouldFilterFormat(const VideoFormat& format) const;
358
359  void UpdateStats(const CapturedFrame* captured_frame);
360
361  // Helper function to save statistics on the current data from a
362  // RollingAccumulator into stats.
363  template<class T>
364  static void GetVariableSnapshot(
365      const rtc::RollingAccumulator<T>& data,
366      VariableInfo<T>* stats);
367
368  rtc::Thread* thread_;
369  std::string id_;
370  CaptureState capture_state_;
371  rtc::scoped_ptr<VideoFrameFactory> frame_factory_;
372  rtc::scoped_ptr<VideoFormat> capture_format_;
373  std::vector<VideoFormat> supported_formats_;
374  rtc::scoped_ptr<VideoFormat> max_format_;
375  std::vector<VideoFormat> filtered_supported_formats_;
376
377  int ratio_w_;  // View resolution. e.g. 1280 x 720.
378  int ratio_h_;
379  bool enable_camera_list_;
380  bool square_pixel_aspect_ratio_;  // Enable scaling to square pixels.
381  int scaled_width_;  // Current output size from ComputeScale.
382  int scaled_height_;
383  int screencast_max_pixels_;  // Downscale screencasts further if requested.
384  bool muted_;
385  int black_frame_count_down_;
386
387  bool enable_video_adapter_;
388  CoordinatedVideoAdapter video_adapter_;
389
390  rtc::Timing frame_length_time_reporter_;
391  rtc::CriticalSection frame_stats_crit_;
392
393  int adapt_frame_drops_;
394  rtc::RollingAccumulator<int> adapt_frame_drops_data_;
395  int effect_frame_drops_;
396  rtc::RollingAccumulator<int> effect_frame_drops_data_;
397  double previous_frame_time_;
398  rtc::RollingAccumulator<double> frame_time_data_;
399  // The captured frame format before potential adapation.
400  VideoFormat last_captured_frame_format_;
401
402  rtc::CriticalSection crit_;
403  VideoProcessors video_processors_;
404
405  DISALLOW_COPY_AND_ASSIGN(VideoCapturer);
406};
407
408}  // namespace cricket
409
410#endif  // TALK_MEDIA_BASE_VIDEOCAPTURER_H_
411