audio_low_latency_input_win.cc revision 1320f92c476a1ad9d19dba2a48c72b75566198e9
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/audio/win/audio_low_latency_input_win.h"
6
7#include "base/logging.h"
8#include "base/memory/scoped_ptr.h"
9#include "base/strings/utf_string_conversions.h"
10#include "media/audio/win/audio_manager_win.h"
11#include "media/audio/win/avrt_wrapper_win.h"
12#include "media/audio/win/core_audio_util_win.h"
13#include "media/base/audio_bus.h"
14
15using base::win::ScopedComPtr;
16using base::win::ScopedCOMInitializer;
17
18namespace media {
19namespace {
20
21// Returns true if |device| represents the default communication capture device.
22bool IsDefaultCommunicationDevice(IMMDeviceEnumerator* enumerator,
23                                  IMMDevice* device) {
24  ScopedComPtr<IMMDevice> communications;
25  if (FAILED(enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications,
26                                                 communications.Receive()))) {
27    return false;
28  }
29
30  base::win::ScopedCoMem<WCHAR> communications_id, device_id;
31  device->GetId(&device_id);
32  communications->GetId(&communications_id);
33  return lstrcmpW(communications_id, device_id) == 0;
34}
35
36}  // namespace
37
38WASAPIAudioInputStream::WASAPIAudioInputStream(AudioManagerWin* manager,
39                                               const AudioParameters& params,
40                                               const std::string& device_id)
41    : manager_(manager),
42      capture_thread_(NULL),
43      opened_(false),
44      started_(false),
45      frame_size_(0),
46      packet_size_frames_(0),
47      packet_size_bytes_(0),
48      endpoint_buffer_size_frames_(0),
49      effects_(params.effects()),
50      device_id_(device_id),
51      perf_count_to_100ns_units_(0.0),
52      ms_to_frame_count_(0.0),
53      sink_(NULL),
54      audio_bus_(media::AudioBus::Create(params)) {
55  DCHECK(manager_);
56
57  // Load the Avrt DLL if not already loaded. Required to support MMCSS.
58  bool avrt_init = avrt::Initialize();
59  DCHECK(avrt_init) << "Failed to load the Avrt.dll";
60
61  // Set up the desired capture format specified by the client.
62  format_.nSamplesPerSec = params.sample_rate();
63  format_.wFormatTag = WAVE_FORMAT_PCM;
64  format_.wBitsPerSample = params.bits_per_sample();
65  format_.nChannels = params.channels();
66  format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;
67  format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;
68  format_.cbSize = 0;
69
70  // Size in bytes of each audio frame.
71  frame_size_ = format_.nBlockAlign;
72  // Store size of audio packets which we expect to get from the audio
73  // endpoint device in each capture event.
74  packet_size_frames_ = params.GetBytesPerBuffer() / format_.nBlockAlign;
75  packet_size_bytes_ = params.GetBytesPerBuffer();
76  DVLOG(1) << "Number of bytes per audio frame  : " << frame_size_;
77  DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
78
79  // All events are auto-reset events and non-signaled initially.
80
81  // Create the event which the audio engine will signal each time
82  // a buffer becomes ready to be processed by the client.
83  audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
84  DCHECK(audio_samples_ready_event_.IsValid());
85
86  // Create the event which will be set in Stop() when capturing shall stop.
87  stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
88  DCHECK(stop_capture_event_.IsValid());
89
90  ms_to_frame_count_ = static_cast<double>(params.sample_rate()) / 1000.0;
91
92  LARGE_INTEGER performance_frequency;
93  if (QueryPerformanceFrequency(&performance_frequency)) {
94    perf_count_to_100ns_units_ =
95        (10000000.0 / static_cast<double>(performance_frequency.QuadPart));
96  } else {
97    DLOG(ERROR) << "High-resolution performance counters are not supported.";
98  }
99}
100
101WASAPIAudioInputStream::~WASAPIAudioInputStream() {
102  DCHECK(CalledOnValidThread());
103}
104
105bool WASAPIAudioInputStream::Open() {
106  DCHECK(CalledOnValidThread());
107  // Verify that we are not already opened.
108  if (opened_)
109    return false;
110
111  // Obtain a reference to the IMMDevice interface of the capturing
112  // device with the specified unique identifier or role which was
113  // set at construction.
114  HRESULT hr = SetCaptureDevice();
115  if (FAILED(hr))
116    return false;
117
118  // Obtain an IAudioClient interface which enables us to create and initialize
119  // an audio stream between an audio application and the audio engine.
120  hr = ActivateCaptureDevice();
121  if (FAILED(hr))
122    return false;
123
124  // Retrieve the stream format which the audio engine uses for its internal
125  // processing/mixing of shared-mode streams. This function call is for
126  // diagnostic purposes only and only in debug mode.
127#ifndef NDEBUG
128  hr = GetAudioEngineStreamFormat();
129#endif
130
131  // Verify that the selected audio endpoint supports the specified format
132  // set during construction.
133  if (!DesiredFormatIsSupported())
134    return false;
135
136  // Initialize the audio stream between the client and the device using
137  // shared mode and a lowest possible glitch-free latency.
138  hr = InitializeAudioEngine();
139
140  opened_ = SUCCEEDED(hr);
141  return opened_;
142}
143
144void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
145  DCHECK(CalledOnValidThread());
146  DCHECK(callback);
147  DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
148  if (!opened_)
149    return;
150
151  if (started_)
152    return;
153
154  DCHECK(!sink_);
155  sink_ = callback;
156
157  // Starts periodic AGC microphone measurements if the AGC has been enabled
158  // using SetAutomaticGainControl().
159  StartAgc();
160
161  // Create and start the thread that will drive the capturing by waiting for
162  // capture events.
163  capture_thread_ =
164      new base::DelegateSimpleThread(this, "wasapi_capture_thread");
165  capture_thread_->Start();
166
167  // Start streaming data between the endpoint buffer and the audio engine.
168  HRESULT hr = audio_client_->Start();
169  DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";
170
171  if (SUCCEEDED(hr) && audio_render_client_for_loopback_)
172    hr = audio_render_client_for_loopback_->Start();
173
174  started_ = SUCCEEDED(hr);
175}
176
177void WASAPIAudioInputStream::Stop() {
178  DCHECK(CalledOnValidThread());
179  DVLOG(1) << "WASAPIAudioInputStream::Stop()";
180  if (!started_)
181    return;
182
183  // Stops periodic AGC microphone measurements.
184  StopAgc();
185
186  // Shut down the capture thread.
187  if (stop_capture_event_.IsValid()) {
188    SetEvent(stop_capture_event_.Get());
189  }
190
191  // Stop the input audio streaming.
192  HRESULT hr = audio_client_->Stop();
193  if (FAILED(hr)) {
194    LOG(ERROR) << "Failed to stop input streaming.";
195  }
196
197  // Wait until the thread completes and perform cleanup.
198  if (capture_thread_) {
199    SetEvent(stop_capture_event_.Get());
200    capture_thread_->Join();
201    capture_thread_ = NULL;
202  }
203
204  started_ = false;
205  sink_ = NULL;
206}
207
208void WASAPIAudioInputStream::Close() {
209  DVLOG(1) << "WASAPIAudioInputStream::Close()";
210  // It is valid to call Close() before calling open or Start().
211  // It is also valid to call Close() after Start() has been called.
212  Stop();
213
214  // Inform the audio manager that we have been closed. This will cause our
215  // destruction.
216  manager_->ReleaseInputStream(this);
217}
218
219double WASAPIAudioInputStream::GetMaxVolume() {
220  // Verify that Open() has been called succesfully, to ensure that an audio
221  // session exists and that an ISimpleAudioVolume interface has been created.
222  DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
223  if (!opened_)
224    return 0.0;
225
226  // The effective volume value is always in the range 0.0 to 1.0, hence
227  // we can return a fixed value (=1.0) here.
228  return 1.0;
229}
230
231void WASAPIAudioInputStream::SetVolume(double volume) {
232  DVLOG(1) << "SetVolume(volume=" << volume << ")";
233  DCHECK(CalledOnValidThread());
234  DCHECK_GE(volume, 0.0);
235  DCHECK_LE(volume, 1.0);
236
237  DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
238  if (!opened_)
239    return;
240
241  // Set a new master volume level. Valid volume levels are in the range
242  // 0.0 to 1.0. Ignore volume-change events.
243  HRESULT hr = simple_audio_volume_->SetMasterVolume(static_cast<float>(volume),
244      NULL);
245  DLOG_IF(WARNING, FAILED(hr)) << "Failed to set new input master volume.";
246
247  // Update the AGC volume level based on the last setting above. Note that,
248  // the volume-level resolution is not infinite and it is therefore not
249  // possible to assume that the volume provided as input parameter can be
250  // used directly. Instead, a new query to the audio hardware is required.
251  // This method does nothing if AGC is disabled.
252  UpdateAgcVolume();
253}
254
255double WASAPIAudioInputStream::GetVolume() {
256  DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
257  if (!opened_)
258    return 0.0;
259
260  // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
261  float level = 0.0f;
262  HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
263  DLOG_IF(WARNING, FAILED(hr)) << "Failed to get input master volume.";
264
265  return static_cast<double>(level);
266}
267
268// static
269AudioParameters WASAPIAudioInputStream::GetInputStreamParameters(
270    const std::string& device_id) {
271  int sample_rate = 48000;
272  ChannelLayout channel_layout = CHANNEL_LAYOUT_STEREO;
273
274  base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
275  int effects = AudioParameters::NO_EFFECTS;
276  if (SUCCEEDED(GetMixFormat(device_id, &audio_engine_mix_format, &effects))) {
277    sample_rate = static_cast<int>(audio_engine_mix_format->nSamplesPerSec);
278    channel_layout = audio_engine_mix_format->nChannels == 1 ?
279        CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO;
280  }
281
282  // Use 10ms frame size as default.
283  int frames_per_buffer = sample_rate / 100;
284  return AudioParameters(
285      AudioParameters::AUDIO_PCM_LOW_LATENCY, channel_layout, sample_rate,
286      16, frames_per_buffer, effects);
287}
288
289// static
290HRESULT WASAPIAudioInputStream::GetMixFormat(const std::string& device_id,
291                                             WAVEFORMATEX** device_format,
292                                             int* effects) {
293  DCHECK(effects);
294
295  // It is assumed that this static method is called from a COM thread, i.e.,
296  // CoInitializeEx() is not called here to avoid STA/MTA conflicts.
297  ScopedComPtr<IMMDeviceEnumerator> enumerator;
298  HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator), NULL,
299                                         CLSCTX_INPROC_SERVER);
300  if (FAILED(hr))
301    return hr;
302
303  ScopedComPtr<IMMDevice> endpoint_device;
304  if (device_id == AudioManagerBase::kDefaultDeviceId) {
305    // Retrieve the default capture audio endpoint.
306    hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
307                                             endpoint_device.Receive());
308  } else if (device_id == AudioManagerBase::kLoopbackInputDeviceId) {
309    // Get the mix format of the default playback stream.
310    hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
311                                             endpoint_device.Receive());
312  } else {
313    // Retrieve a capture endpoint device that is specified by an endpoint
314    // device-identification string.
315    hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id).c_str(),
316                               endpoint_device.Receive());
317  }
318
319  if (FAILED(hr))
320    return hr;
321
322  *effects = IsDefaultCommunicationDevice(enumerator, endpoint_device) ?
323      AudioParameters::DUCKING : AudioParameters::NO_EFFECTS;
324
325  ScopedComPtr<IAudioClient> audio_client;
326  hr = endpoint_device->Activate(__uuidof(IAudioClient),
327                                 CLSCTX_INPROC_SERVER,
328                                 NULL,
329                                 audio_client.ReceiveVoid());
330  return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr;
331}
332
333void WASAPIAudioInputStream::Run() {
334  ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
335
336  // Increase the thread priority.
337  capture_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
338
339  // Enable MMCSS to ensure that this thread receives prioritized access to
340  // CPU resources.
341  DWORD task_index = 0;
342  HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
343                                                      &task_index);
344  bool mmcss_is_ok =
345      (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
346  if (!mmcss_is_ok) {
347    // Failed to enable MMCSS on this thread. It is not fatal but can lead
348    // to reduced QoS at high load.
349    DWORD err = GetLastError();
350    LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
351  }
352
353  // Allocate a buffer with a size that enables us to take care of cases like:
354  // 1) The recorded buffer size is smaller, or does not match exactly with,
355  //    the selected packet size used in each callback.
356  // 2) The selected buffer size is larger than the recorded buffer size in
357  //    each event.
358  size_t buffer_frame_index = 0;
359  size_t capture_buffer_size = std::max(
360      2 * endpoint_buffer_size_frames_ * frame_size_,
361      2 * packet_size_frames_ * frame_size_);
362  scoped_ptr<uint8[]> capture_buffer(new uint8[capture_buffer_size]);
363
364  LARGE_INTEGER now_count;
365  bool recording = true;
366  bool error = false;
367  double volume = GetVolume();
368  HANDLE wait_array[2] =
369      { stop_capture_event_.Get(), audio_samples_ready_event_.Get() };
370
371  while (recording && !error) {
372    HRESULT hr = S_FALSE;
373
374    // Wait for a close-down event or a new capture event.
375    DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
376    switch (wait_result) {
377      case WAIT_FAILED:
378        error = true;
379        break;
380      case WAIT_OBJECT_0 + 0:
381        // |stop_capture_event_| has been set.
382        recording = false;
383        break;
384      case WAIT_OBJECT_0 + 1:
385        {
386          // |audio_samples_ready_event_| has been set.
387          BYTE* data_ptr = NULL;
388          UINT32 num_frames_to_read = 0;
389          DWORD flags = 0;
390          UINT64 device_position = 0;
391          UINT64 first_audio_frame_timestamp = 0;
392
393          // Retrieve the amount of data in the capture endpoint buffer,
394          // replace it with silence if required, create callbacks for each
395          // packet and store non-delivered data for the next event.
396          hr = audio_capture_client_->GetBuffer(&data_ptr,
397                                                &num_frames_to_read,
398                                                &flags,
399                                                &device_position,
400                                                &first_audio_frame_timestamp);
401          if (FAILED(hr)) {
402            DLOG(ERROR) << "Failed to get data from the capture buffer";
403            continue;
404          }
405
406          if (num_frames_to_read != 0) {
407            size_t pos = buffer_frame_index * frame_size_;
408            size_t num_bytes = num_frames_to_read * frame_size_;
409            DCHECK_GE(capture_buffer_size, pos + num_bytes);
410
411            if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
412              // Clear out the local buffer since silence is reported.
413              memset(&capture_buffer[pos], 0, num_bytes);
414            } else {
415              // Copy captured data from audio engine buffer to local buffer.
416              memcpy(&capture_buffer[pos], data_ptr, num_bytes);
417            }
418
419            buffer_frame_index += num_frames_to_read;
420          }
421
422          hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
423          DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
424
425          // Derive a delay estimate for the captured audio packet.
426          // The value contains two parts (A+B), where A is the delay of the
427          // first audio frame in the packet and B is the extra delay
428          // contained in any stored data. Unit is in audio frames.
429          QueryPerformanceCounter(&now_count);
430          double audio_delay_frames =
431              ((perf_count_to_100ns_units_ * now_count.QuadPart -
432                first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +
433                buffer_frame_index - num_frames_to_read;
434
435          // Get a cached AGC volume level which is updated once every second
436          // on the audio manager thread. Note that, |volume| is also updated
437          // each time SetVolume() is called through IPC by the render-side AGC.
438          GetAgcVolume(&volume);
439
440          // Deliver captured data to the registered consumer using a packet
441          // size which was specified at construction.
442          uint32 delay_frames = static_cast<uint32>(audio_delay_frames + 0.5);
443          while (buffer_frame_index >= packet_size_frames_) {
444            // Copy data to audio bus to match the OnData interface.
445            uint8* audio_data = reinterpret_cast<uint8*>(capture_buffer.get());
446            audio_bus_->FromInterleaved(
447                audio_data, audio_bus_->frames(), format_.wBitsPerSample / 8);
448
449            // Deliver data packet, delay estimation and volume level to
450            // the user.
451            sink_->OnData(
452                this, audio_bus_.get(), delay_frames * frame_size_, volume);
453
454            // Store parts of the recorded data which can't be delivered
455            // using the current packet size. The stored section will be used
456            // either in the next while-loop iteration or in the next
457            // capture event.
458            memmove(&capture_buffer[0],
459                    &capture_buffer[packet_size_bytes_],
460                    (buffer_frame_index - packet_size_frames_) * frame_size_);
461
462            buffer_frame_index -= packet_size_frames_;
463            delay_frames -= packet_size_frames_;
464          }
465        }
466        break;
467      default:
468        error = true;
469        break;
470    }
471  }
472
473  if (recording && error) {
474    // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
475    // stopping the audio client, joining the thread etc.?
476    NOTREACHED() << "WASAPI capturing failed with error code "
477                 << GetLastError();
478  }
479
480  // Disable MMCSS.
481  if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
482    PLOG(WARNING) << "Failed to disable MMCSS";
483  }
484}
485
486void WASAPIAudioInputStream::HandleError(HRESULT err) {
487  NOTREACHED() << "Error code: " << err;
488  if (sink_)
489    sink_->OnError(this);
490}
491
492HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
493  DCHECK(!endpoint_device_);
494
495  ScopedComPtr<IMMDeviceEnumerator> enumerator;
496  HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator),
497                                         NULL, CLSCTX_INPROC_SERVER);
498  if (FAILED(hr))
499    return hr;
500
501  // Retrieve the IMMDevice by using the specified role or the specified
502  // unique endpoint device-identification string.
503
504  if (effects_ & AudioParameters::DUCKING) {
505    // Ducking has been requested and it is only supported for the default
506    // communication device.  So, let's open up the communication device and
507    // see if the ID of that device matches the requested ID.
508    // We consider a kDefaultDeviceId as well as an explicit device id match,
509    // to be valid matches.
510    hr = enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications,
511                                             endpoint_device_.Receive());
512    if (endpoint_device_ && device_id_ != AudioManagerBase::kDefaultDeviceId) {
513      base::win::ScopedCoMem<WCHAR> communications_id;
514      endpoint_device_->GetId(&communications_id);
515      if (device_id_ !=
516          base::WideToUTF8(static_cast<WCHAR*>(communications_id))) {
517        DLOG(WARNING) << "Ducking has been requested for a non-default device."
518                         "Not supported.";
519        // We can't honor the requested effect flag, so turn it off and
520        // continue.  We'll check this flag later to see if we've actually
521        // opened up the communications device, so it's important that it
522        // reflects the active state.
523        effects_ &= ~AudioParameters::DUCKING;
524        endpoint_device_.Release();  // Fall back on code below.
525      }
526    }
527  }
528
529  if (!endpoint_device_) {
530    if (device_id_ == AudioManagerBase::kDefaultDeviceId) {
531      // Retrieve the default capture audio endpoint for the specified role.
532      // Note that, in Windows Vista, the MMDevice API supports device roles
533      // but the system-supplied user interface programs do not.
534      hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
535                                               endpoint_device_.Receive());
536    } else if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
537      // Capture the default playback stream.
538      hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
539                                               endpoint_device_.Receive());
540    } else {
541      hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id_).c_str(),
542                                 endpoint_device_.Receive());
543    }
544  }
545
546  if (FAILED(hr))
547    return hr;
548
549  // Verify that the audio endpoint device is active, i.e., the audio
550  // adapter that connects to the endpoint device is present and enabled.
551  DWORD state = DEVICE_STATE_DISABLED;
552  hr = endpoint_device_->GetState(&state);
553  if (FAILED(hr))
554    return hr;
555
556  if (!(state & DEVICE_STATE_ACTIVE)) {
557    DLOG(ERROR) << "Selected capture device is not active.";
558    hr = E_ACCESSDENIED;
559  }
560
561  return hr;
562}
563
564HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {
565  // Creates and activates an IAudioClient COM object given the selected
566  // capture endpoint device.
567  HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),
568                                          CLSCTX_INPROC_SERVER,
569                                          NULL,
570                                          audio_client_.ReceiveVoid());
571  return hr;
572}
573
574HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
575  HRESULT hr = S_OK;
576#ifndef NDEBUG
577  // The GetMixFormat() method retrieves the stream format that the
578  // audio engine uses for its internal processing of shared-mode streams.
579  // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
580  // of a stand-alone WAVEFORMATEX structure, to specify the format.
581  // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
582  // channels to speakers and the number of bits of precision in each sample.
583  base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex;
584  hr = audio_client_->GetMixFormat(
585      reinterpret_cast<WAVEFORMATEX**>(&format_ex));
586
587  // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
588  // for details on the WAVE file format.
589  WAVEFORMATEX format = format_ex->Format;
590  DVLOG(2) << "WAVEFORMATEX:";
591  DVLOG(2) << "  wFormatTags    : 0x" << std::hex << format.wFormatTag;
592  DVLOG(2) << "  nChannels      : " << format.nChannels;
593  DVLOG(2) << "  nSamplesPerSec : " << format.nSamplesPerSec;
594  DVLOG(2) << "  nAvgBytesPerSec: " << format.nAvgBytesPerSec;
595  DVLOG(2) << "  nBlockAlign    : " << format.nBlockAlign;
596  DVLOG(2) << "  wBitsPerSample : " << format.wBitsPerSample;
597  DVLOG(2) << "  cbSize         : " << format.cbSize;
598
599  DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
600  DVLOG(2) << " wValidBitsPerSample: " <<
601      format_ex->Samples.wValidBitsPerSample;
602  DVLOG(2) << " dwChannelMask      : 0x" << std::hex <<
603      format_ex->dwChannelMask;
604  if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
605    DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_PCM";
606  else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
607    DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
608  else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX)
609    DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
610#endif
611  return hr;
612}
613
614bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
615  // An application that uses WASAPI to manage shared-mode streams can rely
616  // on the audio engine to perform only limited format conversions. The audio
617  // engine can convert between a standard PCM sample size used by the
618  // application and the floating-point samples that the engine uses for its
619  // internal processing. However, the format for an application stream
620  // typically must have the same number of channels and the same sample
621  // rate as the stream format used by the device.
622  // Many audio devices support both PCM and non-PCM stream formats. However,
623  // the audio engine can mix only PCM streams.
624  base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
625  HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
626                                                &format_,
627                                                &closest_match);
628  DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "
629                                << "but a closest match exists.";
630  return (hr == S_OK);
631}
632
633HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
634  DWORD flags;
635  // Use event-driven mode only fo regular input devices. For loopback the
636  // EVENTCALLBACK flag is specified when intializing
637  // |audio_render_client_for_loopback_|.
638  if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
639    flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
640  } else {
641    flags =
642      AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
643  }
644
645  // Initialize the audio stream between the client and the device.
646  // We connect indirectly through the audio engine by using shared mode.
647  // Note that, |hnsBufferDuration| is set of 0, which ensures that the
648  // buffer is never smaller than the minimum buffer size needed to ensure
649  // that glitches do not occur between the periodic processing passes.
650  // This setting should lead to lowest possible latency.
651  HRESULT hr = audio_client_->Initialize(
652      AUDCLNT_SHAREMODE_SHARED,
653      flags,
654      0,  // hnsBufferDuration
655      0,
656      &format_,
657      (effects_ & AudioParameters::DUCKING) ? &kCommunicationsSessionId : NULL);
658
659  if (FAILED(hr))
660    return hr;
661
662  // Retrieve the length of the endpoint buffer shared between the client
663  // and the audio engine. The buffer length determines the maximum amount
664  // of capture data that the audio engine can read from the endpoint buffer
665  // during a single processing pass.
666  // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
667  hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
668  if (FAILED(hr))
669    return hr;
670
671  DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
672           << " [frames]";
673
674#ifndef NDEBUG
675  // The period between processing passes by the audio engine is fixed for a
676  // particular audio endpoint device and represents the smallest processing
677  // quantum for the audio engine. This period plus the stream latency between
678  // the buffer and endpoint device represents the minimum possible latency
679  // that an audio application can achieve.
680  // TODO(henrika): possibly remove this section when all parts are ready.
681  REFERENCE_TIME device_period_shared_mode = 0;
682  REFERENCE_TIME device_period_exclusive_mode = 0;
683  HRESULT hr_dbg = audio_client_->GetDevicePeriod(
684      &device_period_shared_mode, &device_period_exclusive_mode);
685  if (SUCCEEDED(hr_dbg)) {
686    DVLOG(1) << "device period: "
687             << static_cast<double>(device_period_shared_mode / 10000.0)
688             << " [ms]";
689  }
690
691  REFERENCE_TIME latency = 0;
692  hr_dbg = audio_client_->GetStreamLatency(&latency);
693  if (SUCCEEDED(hr_dbg)) {
694    DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)
695             << " [ms]";
696  }
697#endif
698
699  // Set the event handle that the audio engine will signal each time a buffer
700  // becomes ready to be processed by the client.
701  //
702  // In loopback case the capture device doesn't receive any events, so we
703  // need to create a separate playback client to get notifications. According
704  // to MSDN:
705  //
706  //   A pull-mode capture client does not receive any events when a stream is
707  //   initialized with event-driven buffering and is loopback-enabled. To
708  //   work around this, initialize a render stream in event-driven mode. Each
709  //   time the client receives an event for the render stream, it must signal
710  //   the capture client to run the capture thread that reads the next set of
711  //   samples from the capture endpoint buffer.
712  //
713  // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
714  if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
715    hr = endpoint_device_->Activate(
716        __uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL,
717        audio_render_client_for_loopback_.ReceiveVoid());
718    if (FAILED(hr))
719      return hr;
720
721    hr = audio_render_client_for_loopback_->Initialize(
722        AUDCLNT_SHAREMODE_SHARED,
723        AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST,
724        0, 0, &format_, NULL);
725    if (FAILED(hr))
726      return hr;
727
728    hr = audio_render_client_for_loopback_->SetEventHandle(
729        audio_samples_ready_event_.Get());
730  } else {
731    hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
732  }
733
734  if (FAILED(hr))
735    return hr;
736
737  // Get access to the IAudioCaptureClient interface. This interface
738  // enables us to read input data from the capture endpoint buffer.
739  hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),
740                                 audio_capture_client_.ReceiveVoid());
741  if (FAILED(hr))
742    return hr;
743
744  // Obtain a reference to the ISimpleAudioVolume interface which enables
745  // us to control the master volume level of an audio session.
746  hr = audio_client_->GetService(__uuidof(ISimpleAudioVolume),
747                                 simple_audio_volume_.ReceiveVoid());
748  return hr;
749}
750
751}  // namespace media
752