audio_low_latency_output_win.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/audio/win/audio_low_latency_output_win.h"
6
7#include <Functiondiscoverykeys_devpkey.h>
8
9#include "base/command_line.h"
10#include "base/logging.h"
11#include "base/memory/scoped_ptr.h"
12#include "base/metrics/histogram.h"
13#include "base/utf_string_conversions.h"
14#include "media/audio/audio_util.h"
15#include "media/audio/win/audio_manager_win.h"
16#include "media/audio/win/avrt_wrapper_win.h"
17#include "media/base/limits.h"
18#include "media/base/media_switches.h"
19
20using base::win::ScopedComPtr;
21using base::win::ScopedCOMInitializer;
22using base::win::ScopedCoMem;
23
24namespace media {
25
26typedef uint32 ChannelConfig;
27
28// Retrieves the stream format that the audio engine uses for its internal
29// processing/mixing of shared-mode streams.
30static HRESULT GetMixFormat(ERole device_role, WAVEFORMATEX** device_format) {
31  // Note that we are using the IAudioClient::GetMixFormat() API to get the
32  // device format in this function. It is in fact possible to be "more native",
33  // and ask the endpoint device directly for its properties. Given a reference
34  // to the IMMDevice interface of an endpoint object, a client can obtain a
35  // reference to the endpoint object's property store by calling the
36  // IMMDevice::OpenPropertyStore() method. However, I have not been able to
37  // access any valuable information using this method on my HP Z600 desktop,
38  // hence it feels more appropriate to use the IAudioClient::GetMixFormat()
39  // approach instead.
40
41  // Calling this function only makes sense for shared mode streams, since
42  // if the device will be opened in exclusive mode, then the application
43  // specified format is used instead. However, the result of this method can
44  // be useful for testing purposes so we don't DCHECK here.
45  DLOG_IF(WARNING, WASAPIAudioOutputStream::GetShareMode() ==
46          AUDCLNT_SHAREMODE_EXCLUSIVE) <<
47      "The mixing sample rate will be ignored for exclusive-mode streams.";
48
49  // It is assumed that this static method is called from a COM thread, i.e.,
50  // CoInitializeEx() is not called here again to avoid STA/MTA conflicts.
51  ScopedComPtr<IMMDeviceEnumerator> enumerator;
52  HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),
53                                NULL,
54                                CLSCTX_INPROC_SERVER,
55                                __uuidof(IMMDeviceEnumerator),
56                                enumerator.ReceiveVoid());
57  if (FAILED(hr))
58    return hr;
59
60  ScopedComPtr<IMMDevice> endpoint_device;
61  hr = enumerator->GetDefaultAudioEndpoint(eRender,
62                                           device_role,
63                                           endpoint_device.Receive());
64  if (FAILED(hr))
65    return hr;
66
67  ScopedComPtr<IAudioClient> audio_client;
68  hr = endpoint_device->Activate(__uuidof(IAudioClient),
69                                 CLSCTX_INPROC_SERVER,
70                                 NULL,
71                                 audio_client.ReceiveVoid());
72  return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr;
73}
74
75// Retrieves an integer mask which corresponds to the channel layout the
76// audio engine uses for its internal processing/mixing of shared-mode
77// streams. This mask indicates which channels are present in the multi-
78// channel stream. The least significant bit corresponds with the Front Left
79// speaker, the next least significant bit corresponds to the Front Right
80// speaker, and so on, continuing in the order defined in KsMedia.h.
81// See http://msdn.microsoft.com/en-us/library/windows/hardware/ff537083(v=vs.85).aspx
82// for more details.
83static ChannelConfig GetChannelConfig() {
84  // Use a WAVEFORMATEXTENSIBLE structure since it can specify both the
85  // number of channels and the mapping of channels to speakers for
86  // multichannel devices.
87  base::win::ScopedCoMem<WAVEFORMATPCMEX> format_ex;
88  HRESULT hr = S_FALSE;
89  hr = GetMixFormat(eConsole, reinterpret_cast<WAVEFORMATEX**>(&format_ex));
90  if (FAILED(hr))
91    return 0;
92
93  // The dwChannelMask member specifies which channels are present in the
94  // multichannel stream. The least significant bit corresponds to the
95  // front left speaker, the next least significant bit corresponds to the
96  // front right speaker, and so on.
97  // See http://msdn.microsoft.com/en-us/library/windows/desktop/dd757714(v=vs.85).aspx
98  // for more details on the channel mapping.
99  DVLOG(2) << "dwChannelMask: 0x" << std::hex << format_ex->dwChannelMask;
100
101#if !defined(NDEBUG)
102  // See http://en.wikipedia.org/wiki/Surround_sound for more details on
103  // how to name various speaker configurations. The list below is not complete.
104  const char* speaker_config = "Undefined";
105  switch (format_ex->dwChannelMask) {
106    case KSAUDIO_SPEAKER_MONO:
107      speaker_config = "Mono";
108      break;
109    case KSAUDIO_SPEAKER_STEREO:
110      speaker_config = "Stereo";
111      break;
112    case KSAUDIO_SPEAKER_5POINT1_SURROUND:
113      speaker_config = "5.1 surround";
114      break;
115    case KSAUDIO_SPEAKER_5POINT1:
116      speaker_config = "5.1";
117      break;
118    case KSAUDIO_SPEAKER_7POINT1_SURROUND:
119      speaker_config = "7.1 surround";
120      break;
121    case KSAUDIO_SPEAKER_7POINT1:
122      speaker_config = "7.1";
123      break;
124    default:
125      break;
126  }
127  DVLOG(2) << "speaker configuration: " << speaker_config;
128#endif
129
130  return static_cast<ChannelConfig>(format_ex->dwChannelMask);
131}
132
133// Converts Microsoft's channel configuration to ChannelLayout.
134// This mapping is not perfect but the best we can do given the current
135// ChannelLayout enumerator and the Windows-specific speaker configurations
136// defined in ksmedia.h. Don't assume that the channel ordering in
137// ChannelLayout is exactly the same as the Windows specific configuration.
138// As an example: KSAUDIO_SPEAKER_7POINT1_SURROUND is mapped to
139// CHANNEL_LAYOUT_7_1 but the positions of Back L, Back R and Side L, Side R
140// speakers are different in these two definitions.
141static ChannelLayout ChannelConfigToChannelLayout(ChannelConfig config) {
142  switch (config) {
143    case KSAUDIO_SPEAKER_DIRECTOUT:
144      return CHANNEL_LAYOUT_NONE;
145    case KSAUDIO_SPEAKER_MONO:
146      return CHANNEL_LAYOUT_MONO;
147    case KSAUDIO_SPEAKER_STEREO:
148      return CHANNEL_LAYOUT_STEREO;
149    case KSAUDIO_SPEAKER_QUAD:
150      return CHANNEL_LAYOUT_QUAD;
151    case KSAUDIO_SPEAKER_SURROUND:
152      return CHANNEL_LAYOUT_4_0;
153    case KSAUDIO_SPEAKER_5POINT1:
154      return CHANNEL_LAYOUT_5_1_BACK;
155    case KSAUDIO_SPEAKER_5POINT1_SURROUND:
156      return CHANNEL_LAYOUT_5_1;
157    case KSAUDIO_SPEAKER_7POINT1:
158      return CHANNEL_LAYOUT_7_1_WIDE;
159    case KSAUDIO_SPEAKER_7POINT1_SURROUND:
160      return CHANNEL_LAYOUT_7_1;
161    default:
162      DVLOG(1) << "Unsupported channel layout: " << config;
163      return CHANNEL_LAYOUT_UNSUPPORTED;
164  }
165}
166
167// static
168AUDCLNT_SHAREMODE WASAPIAudioOutputStream::GetShareMode() {
169  const CommandLine* cmd_line = CommandLine::ForCurrentProcess();
170  if (cmd_line->HasSwitch(switches::kEnableExclusiveAudio))
171    return AUDCLNT_SHAREMODE_EXCLUSIVE;
172  return AUDCLNT_SHAREMODE_SHARED;
173}
174
175WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager,
176                                                 const AudioParameters& params,
177                                                 ERole device_role)
178    : creating_thread_id_(base::PlatformThread::CurrentId()),
179      manager_(manager),
180      opened_(false),
181      restart_rendering_mode_(false),
182      volume_(1.0),
183      endpoint_buffer_size_frames_(0),
184      device_role_(device_role),
185      share_mode_(GetShareMode()),
186      client_channel_count_(params.channels()),
187      num_written_frames_(0),
188      source_(NULL),
189      audio_bus_(AudioBus::Create(params)) {
190  DCHECK(manager_);
191
192  // Load the Avrt DLL if not already loaded. Required to support MMCSS.
193  bool avrt_init = avrt::Initialize();
194  DCHECK(avrt_init) << "Failed to load the avrt.dll";
195
196  if (share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE) {
197    VLOG(1) << ">> Note that EXCLUSIVE MODE is enabled <<";
198  }
199
200  // Set up the desired render format specified by the client. We use the
201  // WAVE_FORMAT_EXTENSIBLE structure to ensure that multiple channel ordering
202  // and high precision data can be supported.
203
204  // Begin with the WAVEFORMATEX structure that specifies the basic format.
205  WAVEFORMATEX* format = &format_.Format;
206  format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
207  format->nChannels = client_channel_count_;
208  format->nSamplesPerSec = params.sample_rate();
209  format->wBitsPerSample = params.bits_per_sample();
210  format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
211  format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
212  format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
213
214  // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
215  format_.Samples.wValidBitsPerSample = params.bits_per_sample();
216  format_.dwChannelMask = GetChannelConfig();
217  format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
218
219  // Size in bytes of each audio frame.
220  frame_size_ = format->nBlockAlign;
221
222  // Store size (in different units) of audio packets which we expect to
223  // get from the audio endpoint device in each render event.
224  packet_size_frames_ = params.GetBytesPerBuffer() / format->nBlockAlign;
225  packet_size_bytes_ = params.GetBytesPerBuffer();
226  packet_size_ms_ = (1000.0 * packet_size_frames_) / params.sample_rate();
227  DVLOG(1) << "Number of bytes per audio frame  : " << frame_size_;
228  DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
229  DVLOG(1) << "Number of bytes per packet       : " << packet_size_bytes_;
230  DVLOG(1) << "Number of milliseconds per packet: " << packet_size_ms_;
231
232  // All events are auto-reset events and non-signaled initially.
233
234  // Create the event which the audio engine will signal each time
235  // a buffer becomes ready to be processed by the client.
236  audio_samples_render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
237  DCHECK(audio_samples_render_event_.IsValid());
238
239  // Create the event which will be set in Stop() when capturing shall stop.
240  stop_render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
241  DCHECK(stop_render_event_.IsValid());
242
243  // Create the event which will be set when a stream switch shall take place.
244  stream_switch_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
245  DCHECK(stream_switch_event_.IsValid());
246}
247
248WASAPIAudioOutputStream::~WASAPIAudioOutputStream() {}
249
250bool WASAPIAudioOutputStream::Open() {
251  DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
252  if (opened_)
253    return true;
254
255  // Channel mixing is not supported, it must be handled by ChannelMixer.
256  if (format_.Format.nChannels != client_channel_count_) {
257    LOG(ERROR) << "Channel down-mixing is not supported.";
258    return false;
259  }
260
261  // Create an IMMDeviceEnumerator interface and obtain a reference to
262  // the IMMDevice interface of the default rendering device with the
263  // specified role.
264  HRESULT hr = SetRenderDevice();
265  if (FAILED(hr)) {
266    return false;
267  }
268
269  // Obtain an IAudioClient interface which enables us to create and initialize
270  // an audio stream between an audio application and the audio engine.
271  hr = ActivateRenderDevice();
272  if (FAILED(hr)) {
273    return false;
274  }
275
276  // Verify that the selected audio endpoint supports the specified format
277  // set during construction.
278  // In exclusive mode, the client can choose to open the stream in any audio
279  // format that the endpoint device supports. In shared mode, the client must
280  // open the stream in the mix format that is currently in use by the audio
281  // engine (or a format that is similar to the mix format). The audio engine's
282  // input streams and the output mix from the engine are all in this format.
283  if (!DesiredFormatIsSupported()) {
284    return false;
285  }
286
287  // Initialize the audio stream between the client and the device using
288  // shared or exclusive mode and a lowest possible glitch-free latency.
289  // We will enter different code paths depending on the specified share mode.
290  hr = InitializeAudioEngine();
291  if (FAILED(hr)) {
292    return false;
293  }
294
295  opened_ = true;
296  return true;
297}
298
299void WASAPIAudioOutputStream::Start(AudioSourceCallback* callback) {
300  DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
301  CHECK(callback);
302  CHECK(opened_);
303
304  if (render_thread_.get()) {
305    CHECK_EQ(callback, source_);
306    return;
307  }
308
309  if (restart_rendering_mode_) {
310    // The selected audio device has been removed or disabled and a new
311    // default device has been enabled instead. The current implementation
312    // does not to support this sequence of events. Given that Open()
313    // and Start() are usually called in one sequence; it should be a very
314    // rare event.
315    // TODO(henrika): it is possible to extend the functionality here.
316    LOG(ERROR) << "Unable to start since the selected default device has "
317                  "changed since Open() was called.";
318    return;
319  }
320
321  source_ = callback;
322
323  // Avoid start-up glitches by filling up the endpoint buffer with "silence"
324  // before starting the stream.
325  BYTE* data_ptr = NULL;
326  HRESULT hr = audio_render_client_->GetBuffer(endpoint_buffer_size_frames_,
327                                               &data_ptr);
328  if (FAILED(hr)) {
329    DLOG(ERROR) << "Failed to use rendering audio buffer: " << std::hex << hr;
330    return;
331  }
332
333  // Using the AUDCLNT_BUFFERFLAGS_SILENT flag eliminates the need to
334  // explicitly write silence data to the rendering buffer.
335  audio_render_client_->ReleaseBuffer(endpoint_buffer_size_frames_,
336                                      AUDCLNT_BUFFERFLAGS_SILENT);
337  num_written_frames_ = endpoint_buffer_size_frames_;
338
339  // Sanity check: verify that the endpoint buffer is filled with silence.
340  UINT32 num_queued_frames = 0;
341  audio_client_->GetCurrentPadding(&num_queued_frames);
342  DCHECK(num_queued_frames == num_written_frames_);
343
344  // Create and start the thread that will drive the rendering by waiting for
345  // render events.
346  render_thread_.reset(
347      new base::DelegateSimpleThread(this, "wasapi_render_thread"));
348  render_thread_->Start();
349
350  // Start streaming data between the endpoint buffer and the audio engine.
351  hr = audio_client_->Start();
352  if (FAILED(hr)) {
353    SetEvent(stop_render_event_.Get());
354    render_thread_->Join();
355    render_thread_.reset();
356    HandleError(hr);
357  }
358}
359
360void WASAPIAudioOutputStream::Stop() {
361  DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
362  if (!render_thread_.get())
363    return;
364
365  // Stop output audio streaming.
366  HRESULT hr = audio_client_->Stop();
367  if (FAILED(hr)) {
368    DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
369        << "Failed to stop output streaming: " << std::hex << hr;
370  }
371
372  // Wait until the thread completes and perform cleanup.
373  SetEvent(stop_render_event_.Get());
374  render_thread_->Join();
375  render_thread_.reset();
376
377  // Ensure that we don't quit the main thread loop immediately next
378  // time Start() is called.
379  ResetEvent(stop_render_event_.Get());
380
381  // Clear source callback, it'll be set again on the next Start() call.
382  source_ = NULL;
383
384  // Flush all pending data and reset the audio clock stream position to 0.
385  hr = audio_client_->Reset();
386  if (FAILED(hr)) {
387    DLOG_IF(ERROR, hr != AUDCLNT_E_NOT_INITIALIZED)
388        << "Failed to reset streaming: " << std::hex << hr;
389  }
390
391  // Extra safety check to ensure that the buffers are cleared.
392  // If the buffers are not cleared correctly, the next call to Start()
393  // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
394  // This check is is only needed for shared-mode streams.
395  if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
396    UINT32 num_queued_frames = 0;
397    audio_client_->GetCurrentPadding(&num_queued_frames);
398    DCHECK_EQ(0u, num_queued_frames);
399  }
400}
401
402void WASAPIAudioOutputStream::Close() {
403  DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
404
405  // It is valid to call Close() before calling open or Start().
406  // It is also valid to call Close() after Start() has been called.
407  Stop();
408
409  // Inform the audio manager that we have been closed. This will cause our
410  // destruction.
411  manager_->ReleaseOutputStream(this);
412}
413
414void WASAPIAudioOutputStream::SetVolume(double volume) {
415  DVLOG(1) << "SetVolume(volume=" << volume << ")";
416  float volume_float = static_cast<float>(volume);
417  if (volume_float < 0.0f || volume_float > 1.0f) {
418    return;
419  }
420  volume_ = volume_float;
421}
422
423void WASAPIAudioOutputStream::GetVolume(double* volume) {
424  DVLOG(1) << "GetVolume()";
425  *volume = static_cast<double>(volume_);
426}
427
428// static
429int WASAPIAudioOutputStream::HardwareChannelCount() {
430  // Use a WAVEFORMATEXTENSIBLE structure since it can specify both the
431  // number of channels and the mapping of channels to speakers for
432  // multichannel devices.
433  base::win::ScopedCoMem<WAVEFORMATPCMEX> format_ex;
434  HRESULT hr = GetMixFormat(
435      eConsole, reinterpret_cast<WAVEFORMATEX**>(&format_ex));
436  if (FAILED(hr))
437    return 0;
438
439  // Number of channels in the stream. Corresponds to the number of bits
440  // set in the dwChannelMask.
441  DVLOG(1) << "endpoint channels (out): " << format_ex->Format.nChannels;
442
443  return static_cast<int>(format_ex->Format.nChannels);
444}
445
446// static
447ChannelLayout WASAPIAudioOutputStream::HardwareChannelLayout() {
448  return ChannelConfigToChannelLayout(GetChannelConfig());
449}
450
451// static
452int WASAPIAudioOutputStream::HardwareSampleRate(ERole device_role) {
453  base::win::ScopedCoMem<WAVEFORMATEX> format;
454  HRESULT hr = GetMixFormat(device_role, &format);
455  if (FAILED(hr))
456    return 0;
457
458  DVLOG(2) << "nSamplesPerSec: " << format->nSamplesPerSec;
459  return static_cast<int>(format->nSamplesPerSec);
460}
461
462void WASAPIAudioOutputStream::Run() {
463  ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
464
465  // Increase the thread priority.
466  render_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
467
468  // Enable MMCSS to ensure that this thread receives prioritized access to
469  // CPU resources.
470  DWORD task_index = 0;
471  HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
472                                                      &task_index);
473  bool mmcss_is_ok =
474      (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
475  if (!mmcss_is_ok) {
476    // Failed to enable MMCSS on this thread. It is not fatal but can lead
477    // to reduced QoS at high load.
478    DWORD err = GetLastError();
479    LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
480  }
481
482  HRESULT hr = S_FALSE;
483
484  bool playing = true;
485  bool error = false;
486  HANDLE wait_array[] = { stop_render_event_,
487                          stream_switch_event_,
488                          audio_samples_render_event_ };
489  UINT64 device_frequency = 0;
490
491  // The IAudioClock interface enables us to monitor a stream's data
492  // rate and the current position in the stream. Allocate it before we
493  // start spinning.
494  ScopedComPtr<IAudioClock> audio_clock;
495  hr = audio_client_->GetService(__uuidof(IAudioClock),
496                                 audio_clock.ReceiveVoid());
497  if (SUCCEEDED(hr)) {
498    // The device frequency is the frequency generated by the hardware clock in
499    // the audio device. The GetFrequency() method reports a constant frequency.
500    hr = audio_clock->GetFrequency(&device_frequency);
501  }
502  error = FAILED(hr);
503  PLOG_IF(ERROR, error) << "Failed to acquire IAudioClock interface: "
504                        << std::hex << hr;
505
506  // Keep rendering audio until the stop event or the stream-switch event
507  // is signaled. An error event can also break the main thread loop.
508  while (playing && !error) {
509    // Wait for a close-down event, stream-switch event or a new render event.
510    DWORD wait_result = WaitForMultipleObjects(arraysize(wait_array),
511                                               wait_array,
512                                               FALSE,
513                                               INFINITE);
514
515    switch (wait_result) {
516      case WAIT_OBJECT_0 + 0:
517        // |stop_render_event_| has been set.
518        playing = false;
519        break;
520      case WAIT_OBJECT_0 + 1:
521        // |stream_switch_event_| has been set. Stop rendering and try to
522        // re-start the session using a new endpoint device.
523        if (!RestartRenderingUsingNewDefaultDevice()) {
524          // Abort the thread since stream switching failed.
525          playing = false;
526          error = true;
527        }
528        break;
529      case WAIT_OBJECT_0 + 2:
530        {
531          // |audio_samples_render_event_| has been set.
532          UINT32 num_queued_frames = 0;
533          uint8* audio_data = NULL;
534
535          // Contains how much new data we can write to the buffer without
536          // the risk of overwriting previously written data that the audio
537          // engine has not yet read from the buffer.
538          size_t num_available_frames = 0;
539
540          if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
541            // Get the padding value which represents the amount of rendering
542            // data that is queued up to play in the endpoint buffer.
543            hr = audio_client_->GetCurrentPadding(&num_queued_frames);
544            num_available_frames =
545                endpoint_buffer_size_frames_ - num_queued_frames;
546          } else {
547            // While the stream is running, the system alternately sends one
548            // buffer or the other to the client. This form of double buffering
549            // is referred to as "ping-ponging". Each time the client receives
550            // a buffer from the system (triggers this event) the client must
551            // process the entire buffer. Calls to the GetCurrentPadding method
552            // are unnecessary because the packet size must always equal the
553            // buffer size. In contrast to the shared mode buffering scheme,
554            // the latency for an event-driven, exclusive-mode stream depends
555            // directly on the buffer size.
556            num_available_frames = endpoint_buffer_size_frames_;
557          }
558
559          // Check if there is enough available space to fit the packet size
560          // specified by the client.
561          if (FAILED(hr) || (num_available_frames < packet_size_frames_))
562            continue;
563
564          // Derive the number of packets we need get from the client to
565          // fill up the available area in the endpoint buffer.
566          // |num_packets| will always be one for exclusive-mode streams.
567          size_t num_packets = (num_available_frames / packet_size_frames_);
568
569          // Get data from the client/source.
570          for (size_t n = 0; n < num_packets; ++n) {
571            // Grab all available space in the rendering endpoint buffer
572            // into which the client can write a data packet.
573            hr = audio_render_client_->GetBuffer(packet_size_frames_,
574                                                 &audio_data);
575            if (FAILED(hr)) {
576              DLOG(ERROR) << "Failed to use rendering audio buffer: "
577                          << std::hex << hr;
578              continue;
579            }
580
581            // Derive the audio delay which corresponds to the delay between
582            // a render event and the time when the first audio sample in a
583            // packet is played out through the speaker. This delay value
584            // can typically be utilized by an acoustic echo-control (AEC)
585            // unit at the render side.
586            UINT64 position = 0;
587            int audio_delay_bytes = 0;
588            hr = audio_clock->GetPosition(&position, NULL);
589            if (SUCCEEDED(hr)) {
590              // Stream position of the sample that is currently playing
591              // through the speaker.
592              double pos_sample_playing_frames = format_.Format.nSamplesPerSec *
593                  (static_cast<double>(position) / device_frequency);
594
595              // Stream position of the last sample written to the endpoint
596              // buffer. Note that, the packet we are about to receive in
597              // the upcoming callback is also included.
598              size_t pos_last_sample_written_frames =
599                  num_written_frames_ + packet_size_frames_;
600
601              // Derive the actual delay value which will be fed to the
602              // render client using the OnMoreData() callback.
603              audio_delay_bytes = (pos_last_sample_written_frames -
604                  pos_sample_playing_frames) *  frame_size_;
605            }
606
607            // Read a data packet from the registered client source and
608            // deliver a delay estimate in the same callback to the client.
609            // A time stamp is also stored in the AudioBuffersState. This
610            // time stamp can be used at the client side to compensate for
611            // the delay between the usage of the delay value and the time
612            // of generation.
613
614            uint32 num_filled_bytes = 0;
615            const int bytes_per_sample = format_.Format.wBitsPerSample >> 3;
616
617            int frames_filled = source_->OnMoreData(
618                audio_bus_.get(), AudioBuffersState(0, audio_delay_bytes));
619            num_filled_bytes = frames_filled * frame_size_;
620            DCHECK_LE(num_filled_bytes, packet_size_bytes_);
621            // Note: If this ever changes to output raw float the data must be
622            // clipped and sanitized since it may come from an untrusted
623            // source such as NaCl.
624            audio_bus_->ToInterleaved(
625                frames_filled, bytes_per_sample, audio_data);
626
627            // Perform in-place, software-volume adjustments.
628            media::AdjustVolume(audio_data,
629                                num_filled_bytes,
630                                audio_bus_->channels(),
631                                bytes_per_sample,
632                                volume_);
633
634            // Zero out the part of the packet which has not been filled by
635            // the client. Using silence is the least bad option in this
636            // situation.
637            if (num_filled_bytes < packet_size_bytes_) {
638              memset(&audio_data[num_filled_bytes], 0,
639                     (packet_size_bytes_ - num_filled_bytes));
640            }
641
642            // Release the buffer space acquired in the GetBuffer() call.
643            DWORD flags = 0;
644            audio_render_client_->ReleaseBuffer(packet_size_frames_,
645                                                flags);
646
647            num_written_frames_ += packet_size_frames_;
648          }
649        }
650        break;
651      default:
652        error = true;
653        break;
654    }
655  }
656
657  if (playing && error) {
658    // Stop audio rendering since something has gone wrong in our main thread
659    // loop. Note that, we are still in a "started" state, hence a Stop() call
660    // is required to join the thread properly.
661    audio_client_->Stop();
662    PLOG(ERROR) << "WASAPI rendering failed.";
663  }
664
665  // Disable MMCSS.
666  if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
667    PLOG(WARNING) << "Failed to disable MMCSS";
668  }
669}
670
671void WASAPIAudioOutputStream::HandleError(HRESULT err) {
672  CHECK((started() && GetCurrentThreadId() == render_thread_->tid()) ||
673        (!started() && GetCurrentThreadId() == creating_thread_id_));
674  NOTREACHED() << "Error code: " << std::hex << err;
675  if (source_)
676    source_->OnError(this, static_cast<int>(err));
677}
678
679HRESULT WASAPIAudioOutputStream::SetRenderDevice() {
680  ScopedComPtr<IMMDeviceEnumerator> device_enumerator;
681  ScopedComPtr<IMMDevice> endpoint_device;
682
683  // Create the IMMDeviceEnumerator interface.
684  HRESULT hr = CoCreateInstance(__uuidof(MMDeviceEnumerator),
685                                NULL,
686                                CLSCTX_INPROC_SERVER,
687                                __uuidof(IMMDeviceEnumerator),
688                                device_enumerator.ReceiveVoid());
689  if (SUCCEEDED(hr)) {
690    // Retrieve the default render audio endpoint for the specified role.
691    // Note that, in Windows Vista, the MMDevice API supports device roles
692    // but the system-supplied user interface programs do not.
693    hr = device_enumerator->GetDefaultAudioEndpoint(
694        eRender, device_role_, endpoint_device.Receive());
695    if (FAILED(hr))
696      return hr;
697
698    // Verify that the audio endpoint device is active. That is, the audio
699    // adapter that connects to the endpoint device is present and enabled.
700    DWORD state = DEVICE_STATE_DISABLED;
701    hr = endpoint_device->GetState(&state);
702    if (SUCCEEDED(hr)) {
703      if (!(state & DEVICE_STATE_ACTIVE)) {
704        DLOG(ERROR) << "Selected render device is not active.";
705        hr = E_ACCESSDENIED;
706      }
707    }
708  }
709
710  if (SUCCEEDED(hr)) {
711    device_enumerator_ = device_enumerator;
712    endpoint_device_ = endpoint_device;
713  }
714
715  return hr;
716}
717
718HRESULT WASAPIAudioOutputStream::ActivateRenderDevice() {
719  ScopedComPtr<IAudioClient> audio_client;
720
721  // Creates and activates an IAudioClient COM object given the selected
722  // render endpoint device.
723  HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),
724                                          CLSCTX_INPROC_SERVER,
725                                          NULL,
726                                          audio_client.ReceiveVoid());
727  if (SUCCEEDED(hr)) {
728    // Retrieve the stream format that the audio engine uses for its internal
729    // processing/mixing of shared-mode streams.
730    audio_engine_mix_format_.Reset(NULL);
731    hr = audio_client->GetMixFormat(
732        reinterpret_cast<WAVEFORMATEX**>(&audio_engine_mix_format_));
733
734    if (SUCCEEDED(hr)) {
735      audio_client_ = audio_client;
736    }
737  }
738
739  return hr;
740}
741
742bool WASAPIAudioOutputStream::DesiredFormatIsSupported() {
743  // Determine, before calling IAudioClient::Initialize(), whether the audio
744  // engine supports a particular stream format.
745  // In shared mode, the audio engine always supports the mix format,
746  // which is stored in the |audio_engine_mix_format_| member and it is also
747  // possible to receive a proposed (closest) format if the current format is
748  // not supported.
749  base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> closest_match;
750  HRESULT hr = audio_client_->IsFormatSupported(
751      share_mode_, reinterpret_cast<WAVEFORMATEX*>(&format_),
752      reinterpret_cast<WAVEFORMATEX**>(&closest_match));
753
754  // This log can only be triggered for shared mode.
755  DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "
756                                << "but a closest match exists.";
757  // This log can be triggered both for shared and exclusive modes.
758  DLOG_IF(ERROR, hr == AUDCLNT_E_UNSUPPORTED_FORMAT) << "Unsupported format.";
759  if (hr == S_FALSE) {
760    DVLOG(1) << "wFormatTag    : " << closest_match->Format.wFormatTag;
761    DVLOG(1) << "nChannels     : " << closest_match->Format.nChannels;
762    DVLOG(1) << "nSamplesPerSec: " << closest_match->Format.nSamplesPerSec;
763    DVLOG(1) << "wBitsPerSample: " << closest_match->Format.wBitsPerSample;
764  }
765
766  return (hr == S_OK);
767}
768
769HRESULT WASAPIAudioOutputStream::InitializeAudioEngine() {
770#if !defined(NDEBUG)
771  // The period between processing passes by the audio engine is fixed for a
772  // particular audio endpoint device and represents the smallest processing
773  // quantum for the audio engine. This period plus the stream latency between
774  // the buffer and endpoint device represents the minimum possible latency
775  // that an audio application can achieve in shared mode.
776  {
777    REFERENCE_TIME default_device_period = 0;
778    REFERENCE_TIME minimum_device_period = 0;
779    HRESULT hr_dbg = audio_client_->GetDevicePeriod(&default_device_period,
780      &minimum_device_period);
781    if (SUCCEEDED(hr_dbg)) {
782      // Shared mode device period.
783      DVLOG(1) << "shared mode (default) device period: "
784               << static_cast<double>(default_device_period / 10000.0)
785               << " [ms]";
786      // Exclusive mode device period.
787      DVLOG(1) << "exclusive mode (minimum) device period: "
788               << static_cast<double>(minimum_device_period / 10000.0)
789               << " [ms]";
790    }
791
792    REFERENCE_TIME latency = 0;
793    hr_dbg = audio_client_->GetStreamLatency(&latency);
794    if (SUCCEEDED(hr_dbg)) {
795      DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)
796               << " [ms]";
797    }
798  }
799#endif
800
801  HRESULT hr = S_FALSE;
802
803  // Perform different initialization depending on if the device shall be
804  // opened in shared mode or in exclusive mode.
805  hr = (share_mode_ == AUDCLNT_SHAREMODE_SHARED) ?
806      SharedModeInitialization() : ExclusiveModeInitialization();
807  if (FAILED(hr)) {
808    LOG(WARNING) << "IAudioClient::Initialize() failed: " << std::hex << hr;
809    return hr;
810  }
811
812  // Retrieve the length of the endpoint buffer. The buffer length represents
813  // the maximum amount of rendering data that the client can write to
814  // the endpoint buffer during a single processing pass.
815  // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
816  hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
817  if (FAILED(hr))
818    return hr;
819  DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
820           << " [frames]";
821
822  // The buffer scheme for exclusive mode streams is not designed for max
823  // flexibility. We only allow a "perfect match" between the packet size set
824  // by the user and the actual endpoint buffer size.
825  if (share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE &&
826      endpoint_buffer_size_frames_ != packet_size_frames_) {
827    hr = AUDCLNT_E_INVALID_SIZE;
828    DLOG(ERROR) << "AUDCLNT_E_INVALID_SIZE";
829    return hr;
830  }
831
832  // Set the event handle that the audio engine will signal each time
833  // a buffer becomes ready to be processed by the client.
834  hr = audio_client_->SetEventHandle(audio_samples_render_event_.Get());
835  if (FAILED(hr))
836    return hr;
837
838  // Get access to the IAudioRenderClient interface. This interface
839  // enables us to write output data to a rendering endpoint buffer.
840  // The methods in this interface manage the movement of data packets
841  // that contain audio-rendering data.
842  hr = audio_client_->GetService(__uuidof(IAudioRenderClient),
843                                 audio_render_client_.ReceiveVoid());
844  return hr;
845}
846
847HRESULT WASAPIAudioOutputStream::SharedModeInitialization() {
848  DCHECK_EQ(share_mode_, AUDCLNT_SHAREMODE_SHARED);
849
850  // TODO(henrika): this buffer scheme is still under development.
851  // The exact details are yet to be determined based on tests with different
852  // audio clients.
853  int glitch_free_buffer_size_ms = static_cast<int>(packet_size_ms_ + 0.5);
854  if (audio_engine_mix_format_->Format.nSamplesPerSec % 8000 == 0) {
855    // Initial tests have shown that we have to add 10 ms extra to
856    // ensure that we don't run empty for any packet size.
857    glitch_free_buffer_size_ms += 10;
858  } else if (audio_engine_mix_format_->Format.nSamplesPerSec % 11025 == 0) {
859    // Initial tests have shown that we have to add 20 ms extra to
860    // ensure that we don't run empty for any packet size.
861    glitch_free_buffer_size_ms += 20;
862  } else {
863    DLOG(WARNING) << "Unsupported sample rate "
864        << audio_engine_mix_format_->Format.nSamplesPerSec << " detected";
865    glitch_free_buffer_size_ms += 20;
866  }
867  DVLOG(1) << "glitch_free_buffer_size_ms: " << glitch_free_buffer_size_ms;
868  REFERENCE_TIME requested_buffer_duration =
869      static_cast<REFERENCE_TIME>(glitch_free_buffer_size_ms * 10000);
870
871  // Initialize the audio stream between the client and the device.
872  // We connect indirectly through the audio engine by using shared mode
873  // and WASAPI is initialized in an event driven mode.
874  // Note that this API ensures that the buffer is never smaller than the
875  // minimum buffer size needed to ensure glitch-free rendering.
876  // If we requests a buffer size that is smaller than the audio engine's
877  // minimum required buffer size, the method sets the buffer size to this
878  // minimum buffer size rather than to the buffer size requested.
879  HRESULT hr = S_FALSE;
880  hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED,
881                                 AUDCLNT_STREAMFLAGS_EVENTCALLBACK |
882                                 AUDCLNT_STREAMFLAGS_NOPERSIST,
883                                 requested_buffer_duration,
884                                 0,
885                                 reinterpret_cast<WAVEFORMATEX*>(&format_),
886                                 NULL);
887  return hr;
888}
889
890HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization() {
891  DCHECK_EQ(share_mode_, AUDCLNT_SHAREMODE_EXCLUSIVE);
892
893  float f = (1000.0 * packet_size_frames_) / format_.Format.nSamplesPerSec;
894  REFERENCE_TIME requested_buffer_duration =
895      static_cast<REFERENCE_TIME>(f * 10000.0 + 0.5);
896
897  // Initialize the audio stream between the client and the device.
898  // For an exclusive-mode stream that uses event-driven buffering, the
899  // caller must specify nonzero values for hnsPeriodicity and
900  // hnsBufferDuration, and the values of these two parameters must be equal.
901  // The Initialize method allocates two buffers for the stream. Each buffer
902  // is equal in duration to the value of the hnsBufferDuration parameter.
903  // Following the Initialize call for a rendering stream, the caller should
904  // fill the first of the two buffers before starting the stream.
905  HRESULT hr = S_FALSE;
906  hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,
907                                 AUDCLNT_STREAMFLAGS_EVENTCALLBACK |
908                                 AUDCLNT_STREAMFLAGS_NOPERSIST,
909                                 requested_buffer_duration,
910                                 requested_buffer_duration,
911                                 reinterpret_cast<WAVEFORMATEX*>(&format_),
912                                 NULL);
913  if (FAILED(hr)) {
914    if (hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED) {
915      LOG(ERROR) << "AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED";
916
917      UINT32 aligned_buffer_size = 0;
918      audio_client_->GetBufferSize(&aligned_buffer_size);
919      DVLOG(1) << "Use aligned buffer size instead: " << aligned_buffer_size;
920      audio_client_.Release();
921
922      // Calculate new aligned periodicity. Each unit of reference time
923      // is 100 nanoseconds.
924      REFERENCE_TIME aligned_buffer_duration = static_cast<REFERENCE_TIME>(
925          (10000000.0 * aligned_buffer_size / format_.Format.nSamplesPerSec)
926          + 0.5);
927
928      // It is possible to re-activate and re-initialize the audio client
929      // at this stage but we bail out with an error code instead and
930      // combine it with a log message which informs about the suggested
931      // aligned buffer size which should be used instead.
932      DVLOG(1) << "aligned_buffer_duration: "
933                << static_cast<double>(aligned_buffer_duration / 10000.0)
934                << " [ms]";
935    } else if (hr == AUDCLNT_E_INVALID_DEVICE_PERIOD) {
936      // We will get this error if we try to use a smaller buffer size than
937      // the minimum supported size (usually ~3ms on Windows 7).
938      LOG(ERROR) << "AUDCLNT_E_INVALID_DEVICE_PERIOD";
939    }
940  }
941
942  return hr;
943}
944
945ULONG WASAPIAudioOutputStream::AddRef() {
946  NOTREACHED() << "IMMNotificationClient should not use this method.";
947  return 1;
948}
949
950ULONG WASAPIAudioOutputStream::Release() {
951  NOTREACHED() << "IMMNotificationClient should not use this method.";
952  return 1;
953}
954
955HRESULT WASAPIAudioOutputStream::QueryInterface(REFIID iid, void** object) {
956  NOTREACHED() << "IMMNotificationClient should not use this method.";
957  if (iid == IID_IUnknown || iid == __uuidof(IMMNotificationClient)) {
958    *object = static_cast < IMMNotificationClient*>(this);
959  } else {
960    return E_NOINTERFACE;
961  }
962  return S_OK;
963}
964
965STDMETHODIMP WASAPIAudioOutputStream::OnDeviceStateChanged(LPCWSTR device_id,
966                                                           DWORD new_state) {
967#ifndef NDEBUG
968  std::string device_name = GetDeviceName(device_id);
969  std::string device_state;
970
971  switch (new_state) {
972    case DEVICE_STATE_ACTIVE:
973      device_state = "ACTIVE";
974      break;
975    case DEVICE_STATE_DISABLED:
976      device_state = "DISABLED";
977      break;
978    case DEVICE_STATE_NOTPRESENT:
979      device_state = "NOTPRESENT";
980      break;
981    case DEVICE_STATE_UNPLUGGED:
982      device_state = "UNPLUGGED";
983      break;
984    default:
985      break;
986  }
987
988  DVLOG(1) << "-> State changed to " << device_state
989           << " for device: " << device_name;
990#endif
991  return S_OK;
992}
993
994HRESULT WASAPIAudioOutputStream::OnDefaultDeviceChanged(
995    EDataFlow flow, ERole role, LPCWSTR new_default_device_id) {
996  if (new_default_device_id == NULL) {
997    // The user has removed or disabled the default device for our
998    // particular role, and no other device is available to take that role.
999    DLOG(ERROR) << "All devices are disabled.";
1000    return E_FAIL;
1001  }
1002
1003  if (flow ==  eRender && role == device_role_) {
1004    // Log the name of the new default device for our configured role.
1005    std::string new_default_device = GetDeviceName(new_default_device_id);
1006    DVLOG(1) << "-> New default device: "  << new_default_device;
1007
1008    // Initiate a stream switch if not already initiated by signaling the
1009    // stream-switch event to inform the render thread that it is OK to
1010    // re-initialize the active audio renderer. All the action takes place
1011    // on the WASAPI render thread.
1012    if (!restart_rendering_mode_) {
1013      restart_rendering_mode_ = true;
1014      SetEvent(stream_switch_event_.Get());
1015    }
1016  }
1017
1018  return S_OK;
1019}
1020
1021std::string WASAPIAudioOutputStream::GetDeviceName(LPCWSTR device_id) const {
1022  std::string name;
1023  ScopedComPtr<IMMDevice> audio_device;
1024
1025  // Get the IMMDevice interface corresponding to the given endpoint ID string.
1026  HRESULT hr = device_enumerator_->GetDevice(device_id, audio_device.Receive());
1027  if (SUCCEEDED(hr)) {
1028    // Retrieve user-friendly name of endpoint device.
1029    // Example: "Speakers (Realtek High Definition Audio)".
1030    ScopedComPtr<IPropertyStore> properties;
1031    hr = audio_device->OpenPropertyStore(STGM_READ, properties.Receive());
1032    if (SUCCEEDED(hr)) {
1033      PROPVARIANT friendly_name;
1034      PropVariantInit(&friendly_name);
1035      hr = properties->GetValue(PKEY_Device_FriendlyName, &friendly_name);
1036      if (SUCCEEDED(hr) && friendly_name.vt == VT_LPWSTR) {
1037        if (friendly_name.pwszVal)
1038          name = WideToUTF8(friendly_name.pwszVal);
1039      }
1040      PropVariantClear(&friendly_name);
1041    }
1042  }
1043  return name;
1044}
1045
1046bool WASAPIAudioOutputStream::RestartRenderingUsingNewDefaultDevice() {
1047  DCHECK(base::PlatformThread::CurrentId() == render_thread_->tid());
1048  DCHECK(restart_rendering_mode_);
1049
1050  // The |restart_rendering_mode_| event has been signaled which means that
1051  // we must stop the current renderer and start a new render session using
1052  // the new default device with the configured role.
1053
1054  // Stop the current rendering.
1055  HRESULT hr = audio_client_->Stop();
1056  if (FAILED(hr)) {
1057    restart_rendering_mode_ = false;
1058    return false;
1059  }
1060
1061  // Release acquired interfaces (IAudioRenderClient, IAudioClient, IMMDevice).
1062  audio_render_client_.Release();
1063  audio_client_.Release();
1064  endpoint_device_.Release();
1065
1066  // Retrieve the new default render audio endpoint (IMMDevice) for the
1067  // specified role.
1068  hr = device_enumerator_->GetDefaultAudioEndpoint(
1069      eRender, device_role_, endpoint_device_.Receive());
1070  if (FAILED(hr)) {
1071    restart_rendering_mode_ = false;
1072    return false;
1073  }
1074
1075  // Re-create an IAudioClient interface.
1076  hr = ActivateRenderDevice();
1077  if (FAILED(hr)) {
1078    restart_rendering_mode_ = false;
1079    return false;
1080  }
1081
1082  // Retrieve the new mix format and ensure that it is supported given
1083  // the predefined format set at construction.
1084  base::win::ScopedCoMem<WAVEFORMATEX> new_audio_engine_mix_format;
1085  hr = audio_client_->GetMixFormat(&new_audio_engine_mix_format);
1086  if (FAILED(hr) || !DesiredFormatIsSupported()) {
1087    restart_rendering_mode_ = false;
1088    return false;
1089  }
1090
1091  // Re-initialize the audio engine using the new audio endpoint.
1092  // This method will create a new IAudioRenderClient interface.
1093  hr = InitializeAudioEngine();
1094  if (FAILED(hr)) {
1095    restart_rendering_mode_ = false;
1096    return false;
1097  }
1098
1099  // All released interfaces (IAudioRenderClient, IAudioClient, IMMDevice)
1100  // are now re-initiated and it is now possible to re-start audio rendering.
1101
1102  // Start rendering again using the new default audio endpoint.
1103  hr = audio_client_->Start();
1104
1105  restart_rendering_mode_ = false;
1106  return SUCCEEDED(hr);
1107}
1108
1109}  // namespace media
1110