1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <math.h>
6
7#include <map>
8
9#include "base/debug/leak_annotations.h"
10#include "base/memory/scoped_ptr.h"
11#include "base/memory/singleton.h"
12#include "base/synchronization/lock.h"
13#include "chrome/browser/speech/tts_platform.h"
14#include "content/public/browser/browser_thread.h"
15
16#include "library_loaders/libspeechd.h"
17
18using content::BrowserThread;
19
20namespace {
21
22const char kNotSupportedError[] =
23    "Native speech synthesis not supported on this platform.";
24
25struct SPDChromeVoice {
26  std::string name;
27  std::string module;
28};
29
30}  // namespace
31
32class TtsPlatformImplLinux : public TtsPlatformImpl {
33 public:
34  virtual bool PlatformImplAvailable() OVERRIDE;
35  virtual bool Speak(
36      int utterance_id,
37      const std::string& utterance,
38      const std::string& lang,
39      const VoiceData& voice,
40      const UtteranceContinuousParameters& params) OVERRIDE;
41  virtual bool StopSpeaking() OVERRIDE;
42  virtual void Pause() OVERRIDE;
43  virtual void Resume() OVERRIDE;
44  virtual bool IsSpeaking() OVERRIDE;
45  virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
46
47  void OnSpeechEvent(SPDNotificationType type);
48
49  // Get the single instance of this class.
50  static TtsPlatformImplLinux* GetInstance();
51
52 private:
53  TtsPlatformImplLinux();
54  virtual ~TtsPlatformImplLinux();
55
56  // Initiate the connection with the speech dispatcher.
57  void Initialize();
58
59  // Resets the connection with speech dispatcher.
60  void Reset();
61
62  static void NotificationCallback(size_t msg_id,
63                                   size_t client_id,
64                                   SPDNotificationType type);
65
66  static void IndexMarkCallback(size_t msg_id,
67                                size_t client_id,
68                                SPDNotificationType state,
69                                char* index_mark);
70
71  static SPDNotificationType current_notification_;
72
73  base::Lock initialization_lock_;
74  LibSpeechdLoader libspeechd_loader_;
75  SPDConnection* conn_;
76
77  // These apply to the current utterance only.
78  std::string utterance_;
79  int utterance_id_;
80
81  // Map a string composed of a voicename and module to the voicename. Used to
82  // uniquely identify a voice across all available modules.
83  scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
84
85  friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
86
87  DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
88};
89
90// static
91SPDNotificationType TtsPlatformImplLinux::current_notification_ =
92    SPD_EVENT_END;
93
94TtsPlatformImplLinux::TtsPlatformImplLinux()
95    : utterance_id_(0) {
96  BrowserThread::PostTask(BrowserThread::FILE,
97                          FROM_HERE,
98                          base::Bind(&TtsPlatformImplLinux::Initialize,
99                                     base::Unretained(this)));
100}
101
102void TtsPlatformImplLinux::Initialize() {
103  base::AutoLock lock(initialization_lock_);
104
105  if (!libspeechd_loader_.Load("libspeechd.so.2"))
106    return;
107
108  {
109    // spd_open has memory leaks which are hard to suppress.
110    // http://crbug.com/317360
111    ANNOTATE_SCOPED_MEMORY_LEAK;
112    conn_ = libspeechd_loader_.spd_open(
113        "chrome", "extension_api", NULL, SPD_MODE_THREADED);
114  }
115  if (!conn_)
116    return;
117
118  // Register callbacks for all events.
119  conn_->callback_begin =
120    conn_->callback_end =
121    conn_->callback_cancel =
122    conn_->callback_pause =
123    conn_->callback_resume =
124    &NotificationCallback;
125
126  conn_->callback_im = &IndexMarkCallback;
127
128  libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
129  libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
130  libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
131  libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
132  libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
133}
134
135TtsPlatformImplLinux::~TtsPlatformImplLinux() {
136  base::AutoLock lock(initialization_lock_);
137  if (conn_) {
138    libspeechd_loader_.spd_close(conn_);
139    conn_ = NULL;
140  }
141}
142
143void TtsPlatformImplLinux::Reset() {
144  base::AutoLock lock(initialization_lock_);
145  if (conn_)
146    libspeechd_loader_.spd_close(conn_);
147  conn_ = libspeechd_loader_.spd_open(
148      "chrome", "extension_api", NULL, SPD_MODE_THREADED);
149}
150
151bool TtsPlatformImplLinux::PlatformImplAvailable() {
152  if (!initialization_lock_.Try())
153    return false;
154  bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
155  initialization_lock_.Release();
156  return result;
157}
158
159bool TtsPlatformImplLinux::Speak(
160    int utterance_id,
161    const std::string& utterance,
162    const std::string& lang,
163    const VoiceData& voice,
164    const UtteranceContinuousParameters& params) {
165  if (!PlatformImplAvailable()) {
166    error_ = kNotSupportedError;
167    return false;
168  }
169
170  // Speech dispatcher's speech params are around 3x at either limit.
171  float rate = params.rate > 3 ? 3 : params.rate;
172  rate = params.rate < 0.334 ? 0.334 : rate;
173  float pitch = params.pitch > 3 ? 3 : params.pitch;
174  pitch = params.pitch < 0.334 ? 0.334 : pitch;
175
176  std::map<std::string, SPDChromeVoice>::iterator it =
177      all_native_voices_->find(voice.name);
178  if (it != all_native_voices_->end()) {
179    libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
180    libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
181  }
182
183  // Map our multiplicative range to Speech Dispatcher's linear range.
184  // .334 = -100.
185  // 3 = 100.
186  libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
187  libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
188
189  utterance_ = utterance;
190  utterance_id_ = utterance_id;
191
192  if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
193    Reset();
194    return false;
195  }
196  return true;
197}
198
199bool TtsPlatformImplLinux::StopSpeaking() {
200  if (!PlatformImplAvailable())
201    return false;
202  if (libspeechd_loader_.spd_stop(conn_) == -1) {
203    Reset();
204    return false;
205  }
206  return true;
207}
208
209void TtsPlatformImplLinux::Pause() {
210  if (!PlatformImplAvailable())
211    return;
212  libspeechd_loader_.spd_pause(conn_);
213}
214
215void TtsPlatformImplLinux::Resume() {
216  if (!PlatformImplAvailable())
217    return;
218  libspeechd_loader_.spd_resume(conn_);
219}
220
221bool TtsPlatformImplLinux::IsSpeaking() {
222  return current_notification_ == SPD_EVENT_BEGIN;
223}
224
225void TtsPlatformImplLinux::GetVoices(
226    std::vector<VoiceData>* out_voices) {
227  if (!all_native_voices_.get()) {
228    all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
229    char** modules = libspeechd_loader_.spd_list_modules(conn_);
230    if (!modules)
231      return;
232    for (int i = 0; modules[i]; i++) {
233      char* module = modules[i];
234      libspeechd_loader_.spd_set_output_module(conn_, module);
235      SPDVoice** native_voices =
236          libspeechd_loader_.spd_list_synthesis_voices(conn_);
237      if (!native_voices) {
238        free(module);
239        continue;
240      }
241      for (int j = 0; native_voices[j]; j++) {
242        SPDVoice* native_voice = native_voices[j];
243        SPDChromeVoice native_data;
244        native_data.name = native_voice->name;
245        native_data.module = module;
246        std::string key;
247        key.append(native_data.name);
248        key.append(" ");
249        key.append(native_data.module);
250        all_native_voices_->insert(
251            std::pair<std::string, SPDChromeVoice>(key, native_data));
252        free(native_voices[j]);
253      }
254      free(modules[i]);
255    }
256  }
257
258  for (std::map<std::string, SPDChromeVoice>::iterator it =
259           all_native_voices_->begin();
260       it != all_native_voices_->end();
261       it++) {
262    out_voices->push_back(VoiceData());
263    VoiceData& voice = out_voices->back();
264    voice.native = true;
265    voice.name = it->first;
266    voice.events.insert(TTS_EVENT_START);
267    voice.events.insert(TTS_EVENT_END);
268    voice.events.insert(TTS_EVENT_CANCELLED);
269    voice.events.insert(TTS_EVENT_MARKER);
270    voice.events.insert(TTS_EVENT_PAUSE);
271    voice.events.insert(TTS_EVENT_RESUME);
272  }
273}
274
275void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
276  TtsController* controller = TtsController::GetInstance();
277  switch (type) {
278  case SPD_EVENT_BEGIN:
279    controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
280    break;
281  case SPD_EVENT_RESUME:
282    controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
283    break;
284  case SPD_EVENT_END:
285    controller->OnTtsEvent(
286        utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
287    break;
288  case SPD_EVENT_PAUSE:
289    controller->OnTtsEvent(
290        utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
291    break;
292  case SPD_EVENT_CANCEL:
293    controller->OnTtsEvent(
294        utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
295    break;
296  case SPD_EVENT_INDEX_MARK:
297    controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
298    break;
299  }
300}
301
302// static
303void TtsPlatformImplLinux::NotificationCallback(
304    size_t msg_id, size_t client_id, SPDNotificationType type) {
305  // We run Speech Dispatcher in threaded mode, so these callbacks should always
306  // be in a separate thread.
307  if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
308    current_notification_ = type;
309    BrowserThread::PostTask(
310        BrowserThread::UI,
311        FROM_HERE,
312        base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
313                   base::Unretained(TtsPlatformImplLinux::GetInstance()),
314                   type));
315  }
316}
317
318// static
319void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
320                                                      size_t client_id,
321                                                      SPDNotificationType state,
322                                                      char* index_mark) {
323  // TODO(dtseng): index_mark appears to specify an index type supplied by a
324  // client. Need to explore how this is used before hooking it up with existing
325  // word, sentence events.
326  // We run Speech Dispatcher in threaded mode, so these callbacks should always
327  // be in a separate thread.
328  if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
329    current_notification_ = state;
330    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
331        base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
332        base::Unretained(TtsPlatformImplLinux::GetInstance()),
333        state));
334  }
335}
336
337// static
338TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
339  return Singleton<TtsPlatformImplLinux,
340                   LeakySingletonTraits<TtsPlatformImplLinux> >::get();
341}
342
343// static
344TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
345  return TtsPlatformImplLinux::GetInstance();
346}
347