browser/speech/speech_input_manager.cc

// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/speech/speech_input_manager.h"

#include "app/l10n_util.h"
#include "base/lock.h"
#include "base/ref_counted.h"
#include "base/singleton.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/browser_process.h"
#include "chrome/browser/browser_thread.h"
#include "chrome/browser/prefs/pref_service.h"
#include "chrome/browser/speech/speech_input_bubble_controller.h"
#include "chrome/browser/speech/speech_recognizer.h"
#include "chrome/browser/tab_contents/infobar_delegate.h"
#include "chrome/browser/tab_contents/tab_contents.h"
#include "chrome/browser/tab_contents/tab_util.h"
#include "chrome/common/pref_names.h"
#include "grit/generated_resources.h"
#include "media/audio/audio_manager.h"
#include <map>

#if defined(OS_WIN)
#include "chrome/installer/util/wmi.h"
#endif

namespace {

// Asynchronously fetches the PC and audio hardware/driver info on windows if
// the user has opted into UMA. This information is sent with speech input
// requests to the server for identifying and improving quality issues with
// specific device configurations.
class HardwareInfo : public base::RefCountedThreadSafe<HardwareInfo> {
 public:
  HardwareInfo() {}

#if defined(OS_WIN)
  void Refresh() {
    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
    // UMA opt-in can be checked only from the UI thread, so switch to that.
    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
        NewRunnableMethod(this, &HardwareInfo::CheckUMAAndGetHardwareInfo));
  }

  void CheckUMAAndGetHardwareInfo() {
    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    if (g_browser_process->local_state()->GetBoolean(
        prefs::kMetricsReportingEnabled)) {
      // Access potentially slow OS calls from the FILE thread.
      BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
          NewRunnableMethod(this, &HardwareInfo::GetHardwareInfo));
    }
  }

  void GetHardwareInfo() {
    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
    AutoLock lock(lock_);
    value_ = UTF16ToUTF8(
        installer::WMIComputerSystem::GetModel() + L"|" +
        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
  }

  std::string value() {
    AutoLock lock(lock_);
    return value_;
  }

 private:
  Lock lock_;
  std::string value_;

#else // defined(OS_WIN)
  void Refresh() {}
  std::string value() { return std::string(); }
#endif // defined(OS_WIN)

  DISALLOW_COPY_AND_ASSIGN(HardwareInfo);
};

}

namespace speech_input {

class SpeechInputManagerImpl : public SpeechInputManager,
                               public SpeechInputBubbleControllerDelegate,
                               public SpeechRecognizerDelegate {
 public:
  // SpeechInputManager methods.
  virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
                                int caller_id,
                                int render_process_id,
                                int render_view_id,
                                const gfx::Rect& element_rect,
                                const std::string& language,
                                const std::string& grammar);
  virtual void CancelRecognition(int caller_id);
  virtual void StopRecording(int caller_id);

  // SpeechRecognizer::Delegate methods.
  virtual void SetRecognitionResult(int caller_id,
                                    bool error,
                                    const SpeechInputResultArray& result);
  virtual void DidCompleteRecording(int caller_id);
  virtual void DidCompleteRecognition(int caller_id);
  virtual void OnRecognizerError(int caller_id,
                                 SpeechRecognizer::ErrorCode error);
  virtual void DidCompleteEnvironmentEstimation(int caller_id);
  virtual void SetInputVolume(int caller_id, float volume);

  // SpeechInputBubbleController::Delegate methods.
  virtual void InfoBubbleButtonClicked(int caller_id,
                                       SpeechInputBubble::Button button);
  virtual void InfoBubbleFocusChanged(int caller_id);

 private:
  struct SpeechInputRequest {
    SpeechInputManagerDelegate* delegate;
    scoped_refptr<SpeechRecognizer> recognizer;
    bool is_active;  // Set to true when recording or recognition is going on.
  };

  // Private constructor to enforce singleton.
  friend struct DefaultSingletonTraits<SpeechInputManagerImpl>;
  SpeechInputManagerImpl();
  virtual ~SpeechInputManagerImpl();

  bool HasPendingRequest(int caller_id) const;
  SpeechInputManagerDelegate* GetDelegate(int caller_id) const;

  void CancelRecognitionAndInformDelegate(int caller_id);

  // Starts/restarts recognition for an existing request.
  void StartRecognitionForRequest(int caller_id);

  SpeechInputManagerDelegate* delegate_;
  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
  SpeechRecognizerMap requests_;
  int recording_caller_id_;
  scoped_refptr<SpeechInputBubbleController> bubble_controller_;
  scoped_refptr<HardwareInfo> hardware_info_;
};

SpeechInputManager* SpeechInputManager::Get() {
  return Singleton<SpeechInputManagerImpl>::get();
}

SpeechInputManagerImpl::SpeechInputManagerImpl()
    : recording_caller_id_(0),
      bubble_controller_(new SpeechInputBubbleController(
          ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
}

SpeechInputManagerImpl::~SpeechInputManagerImpl() {
  while (requests_.begin() != requests_.end())
    CancelRecognition(requests_.begin()->first);
}

bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
  return requests_.find(caller_id) != requests_.end();
}

SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
    int caller_id) const {
  return requests_.find(caller_id)->second.delegate;
}

void SpeechInputManagerImpl::StartRecognition(
    SpeechInputManagerDelegate* delegate,
    int caller_id,
    int render_process_id,
    int render_view_id,
    const gfx::Rect& element_rect,
    const std::string& language,
    const std::string& grammar) {
  DCHECK(!HasPendingRequest(caller_id));

  bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
                                   element_rect);

  if (!hardware_info_.get()) {
    hardware_info_ = new HardwareInfo();
    // Since hardware info is optional with speech input requests, we start an
    // asynchronous fetch here and move on with recording audio. This first
    // speech input request would send an empty string for hardware info and
    // subsequent requests may have the hardware info available if the fetch
    // completed before them. This way we don't end up stalling the user with
    // a long wait and disk seeks when they click on a UI element and start
    // speaking.
    hardware_info_->Refresh();
  }

  SpeechInputRequest* request = &requests_[caller_id];
  request->delegate = delegate;
  request->recognizer = new SpeechRecognizer(this, caller_id, language,
                                             grammar, hardware_info_->value());
  request->is_active = false;

  StartRecognitionForRequest(caller_id);
}

void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
  DCHECK(HasPendingRequest(caller_id));

  // If we are currently recording audio for another caller, abort that cleanly.
  if (recording_caller_id_)
    CancelRecognitionAndInformDelegate(recording_caller_id_);

  if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
    bubble_controller_->SetBubbleMessage(
        caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
  } else {
    recording_caller_id_ = caller_id;
    requests_[caller_id].is_active = true;
    requests_[caller_id].recognizer->StartRecording();
  }
}

void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
  DCHECK(HasPendingRequest(caller_id));
  if (requests_[caller_id].is_active)
    requests_[caller_id].recognizer->CancelRecognition();
  requests_.erase(caller_id);
  if (recording_caller_id_ == caller_id)
    recording_caller_id_ = 0;
  bubble_controller_->CloseBubble(caller_id);
}

void SpeechInputManagerImpl::StopRecording(int caller_id) {
  DCHECK(HasPendingRequest(caller_id));
  requests_[caller_id].recognizer->StopRecording();
}

void SpeechInputManagerImpl::SetRecognitionResult(
    int caller_id, bool error, const SpeechInputResultArray& result) {
  DCHECK(HasPendingRequest(caller_id));
  GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
}

void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
  DCHECK(recording_caller_id_ == caller_id);
  DCHECK(HasPendingRequest(caller_id));
  recording_caller_id_ = 0;
  GetDelegate(caller_id)->DidCompleteRecording(caller_id);
  bubble_controller_->SetBubbleRecognizingMode(caller_id);
}

void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
  GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
  requests_.erase(caller_id);
  bubble_controller_->CloseBubble(caller_id);
}

void SpeechInputManagerImpl::OnRecognizerError(
    int caller_id, SpeechRecognizer::ErrorCode error) {
  if (caller_id == recording_caller_id_)
    recording_caller_id_ = 0;

  requests_[caller_id].is_active = false;

  int message_id;
  switch (error) {
    case SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE:
      message_id = IDS_SPEECH_INPUT_ERROR;
      break;
    case SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH:
      message_id = IDS_SPEECH_INPUT_NO_SPEECH;
      break;
    case SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS:
      message_id = IDS_SPEECH_INPUT_NO_RESULTS;
      break;
    default:
      NOTREACHED() << "unknown error " << error;
      return;
  }
  bubble_controller_->SetBubbleMessage(caller_id,
                                       l10n_util::GetStringUTF16(message_id));
}

void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
  DCHECK(HasPendingRequest(caller_id));
  DCHECK(recording_caller_id_ == caller_id);

  // Speech recognizer has gathered enough background audio so we can ask the
  // user to start speaking.
  bubble_controller_->SetBubbleRecordingMode(caller_id);
}

void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) {
  DCHECK(HasPendingRequest(caller_id));
  DCHECK_EQ(recording_caller_id_, caller_id);

  bubble_controller_->SetBubbleInputVolume(caller_id, volume);
}

void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
  SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
  CancelRecognition(caller_id);
  cur_delegate->DidCompleteRecording(caller_id);
  cur_delegate->DidCompleteRecognition(caller_id);
}

void SpeechInputManagerImpl::InfoBubbleButtonClicked(
    int caller_id, SpeechInputBubble::Button button) {
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  // Ignore if the caller id was not in our active recognizers list because the
  // user might have clicked more than once, or recognition could have been
  // cancelled due to other reasons before the user click was processed.
  if (!HasPendingRequest(caller_id))
    return;

  if (button == SpeechInputBubble::BUTTON_CANCEL) {
    CancelRecognitionAndInformDelegate(caller_id);
  } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
    StartRecognitionForRequest(caller_id);
  }
}

void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  // Ignore if the caller id was not in our active recognizers list because the
  // user might have clicked more than once, or recognition could have been
  // ended due to other reasons before the user click was processed.
  if (HasPendingRequest(caller_id)) {
    // If this is an ongoing recording or if we were displaying an error message
    // to the user, abort it since user has switched focus. Otherwise
    // recognition has started and keep that going so user can start speaking to
    // another element while this gets the results in parallel.
    if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
      CancelRecognitionAndInformDelegate(caller_id);
    }
  }
}

}  // namespace speech_input