1ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen// Use of this source code is governed by a BSD-style license that can be
3bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen// found in the LICENSE file.
4bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
5bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include "chrome/browser/extensions/extension_tts_api.h"
6bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
7bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include <atlbase.h>
8bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include <atlcom.h>
9bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include <sapi.h>
10bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
11ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/memory/singleton.h"
12bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include "base/string_number_conversions.h"
13731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/utf_string_conversions.h"
14bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen#include "base/values.h"
15ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/win/scoped_comptr.h"
16bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
17bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsennamespace util = extension_tts_api_util;
18bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
19731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickclass ExtensionTtsPlatformImplWin : public ExtensionTtsPlatformImpl {
20bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen public:
21731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  virtual bool Speak(
22731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      const std::string& utterance,
23731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      const std::string& language,
24731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      const std::string& gender,
25731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      double rate,
26731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      double pitch,
27731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      double volume);
28bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
29731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  virtual bool StopSpeaking();
30bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
31731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  virtual bool IsSpeaking();
32bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
33731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Get the single instance of this class.
34731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  static ExtensionTtsPlatformImplWin* GetInstance();
35bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
36bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen private:
37731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  ExtensionTtsPlatformImplWin();
38731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  virtual ~ExtensionTtsPlatformImplWin() {}
39731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
40ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
41bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen  bool paused_;
42731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
43731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  friend struct DefaultSingletonTraits<ExtensionTtsPlatformImplWin>;
44731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
45731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  DISALLOW_COPY_AND_ASSIGN(ExtensionTtsPlatformImplWin);
46bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen};
47bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
48731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static
49731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickExtensionTtsPlatformImpl* ExtensionTtsPlatformImpl::GetInstance() {
50731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return ExtensionTtsPlatformImplWin::GetInstance();
51731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
52731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
53731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickbool ExtensionTtsPlatformImplWin::Speak(
54731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const std::string& src_utterance,
55731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const std::string& language,
56731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const std::string& gender,
57731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    double rate,
58731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    double pitch,
59731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    double volume) {
60731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  std::wstring utterance = UTF8ToUTF16(src_utterance);
61731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (!speech_synthesizer_)
63731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    return false;
64731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
65731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // Speech API equivalents for kGenderKey and kLanguageNameKey do not
66731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  // exist and thus are not supported.
67731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
68731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (rate >= 0.0) {
69731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // The TTS api allows a range of -10 to 10 for speech rate.
70731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    speech_synthesizer_->SetRate(static_cast<int32>(rate * 20 - 10));
71bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen  }
72bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
73731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (pitch >= 0.0) {
74731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // The TTS api allows a range of -10 to 10 for speech pitch.
75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // TODO(dtseng): cleanup if we ever use any other properties that
76731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // require xml.
77731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    std::wstring pitch_value =
78731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick        base::IntToString16(static_cast<int>(pitch * 20 - 10));
79731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    utterance = L"<pitch absmiddle=\"" + pitch_value + L"\">" +
80731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick        utterance + L"</pitch>";
81731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  }
82731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
83731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  if (volume >= 0.0) {
84731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    // The TTS api allows a range of 0 to 100 for speech volume.
85731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    speech_synthesizer_->SetVolume(static_cast<uint16>(volume * 100));
86731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  }
87731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
8872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  if (paused_) {
89731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    speech_synthesizer_->Resume();
9072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    paused_ = false;
9172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  }
92731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  speech_synthesizer_->Speak(
93731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      utterance.c_str(), SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
94731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
95731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return true;
96bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen}
97bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
98731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickbool ExtensionTtsPlatformImplWin::StopSpeaking() {
9972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  if (speech_synthesizer_ && !paused_) {
100731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    speech_synthesizer_->Pause();
101731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    paused_ = true;
102bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen  }
103bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen  return true;
104bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen}
105bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen
106731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickbool ExtensionTtsPlatformImplWin::IsSpeaking() {
10772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  if (speech_synthesizer_ && !paused_) {
10872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    SPVOICESTATUS status;
10972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
11072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    if (result == S_OK) {
11172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      if (status.dwRunningState == 0 ||  // 0 == waiting to speak
11272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen          status.dwRunningState == SPRS_IS_SPEAKING) {
11372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen        return true;
11472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen      }
11572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen    }
11672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  }
117bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen  return false;
118bda42a81ee5f9b20d2bebedcf0bbef1e30e5b293Kristian Monsen}
119731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
120731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickExtensionTtsPlatformImplWin::ExtensionTtsPlatformImplWin()
121731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  : speech_synthesizer_(NULL),
122731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    paused_(false) {
123731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  CoCreateInstance(
124731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      CLSID_SpVoice,
125731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      NULL,
126731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      CLSCTX_SERVER,
127731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      IID_ISpVoice,
128731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick      reinterpret_cast<void**>(&speech_synthesizer_));
129731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
130731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
131731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick// static
132731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickExtensionTtsPlatformImplWin* ExtensionTtsPlatformImplWin::GetInstance() {
133731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  return Singleton<ExtensionTtsPlatformImplWin>::get();
134731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
135