1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/speech/audio_buffer.h"
6#include "content/browser/speech/endpointer/endpointer.h"
7#include "testing/gtest/include/gtest/gtest.h"
8
9namespace {
10const int kFrameRate = 50;  // 20 ms long frames for AMR encoding.
11const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
12
13// At 8 sample per second a 20 ms frame is 160 samples, which corrsponds
14// to the AMR codec.
15const int kFrameSize = kSampleRate / kFrameRate;  // 160 samples.
16COMPILE_ASSERT(kFrameSize == 160, invalid_frame_size);
17}
18
19namespace content {
20
21class FrameProcessor {
22 public:
23  // Process a single frame of test audio samples.
24  virtual EpStatus ProcessFrame(int64 time, int16* samples, int frame_size) = 0;
25};
26
27void RunEndpointerEventsTest(FrameProcessor* processor) {
28  int16 samples[kFrameSize];
29
30  // We will create a white noise signal of 150 frames. The frames from 50 to
31  // 100 will have more power, and the endpointer should fire on those frames.
32  const int kNumFrames = 150;
33
34  // Create a random sequence of samples.
35  srand(1);
36  float gain = 0.0;
37  int64 time = 0;
38  for (int frame_count = 0; frame_count < kNumFrames; ++frame_count) {
39    // The frames from 50 to 100 will have more power, and the endpointer
40    // should detect those frames as speech.
41    if ((frame_count >= 50) && (frame_count < 100)) {
42      gain = 2000.0;
43    } else {
44      gain = 1.0;
45    }
46    // Create random samples.
47    for (int i = 0; i < kFrameSize; ++i) {
48      float randNum = static_cast<float>(rand() - (RAND_MAX / 2)) /
49          static_cast<float>(RAND_MAX);
50      samples[i] = static_cast<int16>(gain * randNum);
51    }
52
53    EpStatus ep_status = processor->ProcessFrame(time, samples, kFrameSize);
54    time += static_cast<int64>(kFrameSize * (1e6 / kSampleRate));
55
56    // Log the status.
57    if (20 == frame_count)
58      EXPECT_EQ(EP_PRE_SPEECH, ep_status);
59    if (70 == frame_count)
60      EXPECT_EQ(EP_SPEECH_PRESENT, ep_status);
61    if (120 == frame_count)
62      EXPECT_EQ(EP_PRE_SPEECH, ep_status);
63  }
64}
65
66// This test instantiates and initializes a stand alone endpointer module.
67// The test creates FrameData objects with random noise and send them
68// to the endointer module. The energy of the first 50 frames is low,
69// followed by 500 high energy frames, and another 50 low energy frames.
70// We test that the correct start and end frames were detected.
71class EnergyEndpointerFrameProcessor : public FrameProcessor {
72 public:
73  explicit EnergyEndpointerFrameProcessor(EnergyEndpointer* endpointer)
74      : endpointer_(endpointer) {}
75
76  virtual EpStatus ProcessFrame(int64 time,
77                                int16* samples,
78                                int frame_size) OVERRIDE {
79    endpointer_->ProcessAudioFrame(time, samples, kFrameSize, NULL);
80    int64 ep_time;
81    return endpointer_->Status(&ep_time);
82  }
83
84 private:
85  EnergyEndpointer* endpointer_;
86};
87
88TEST(EndpointerTest, TestEnergyEndpointerEvents) {
89  // Initialize endpointer and configure it. We specify the parameters
90  // here for a 20ms window, and a 20ms step size, which corrsponds to
91  // the narrow band AMR codec.
92  EnergyEndpointerParams ep_config;
93  ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
94  ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
95  ep_config.set_endpoint_margin(0.2f);
96  ep_config.set_onset_window(0.15f);
97  ep_config.set_speech_on_window(0.4f);
98  ep_config.set_offset_window(0.15f);
99  ep_config.set_onset_detect_dur(0.09f);
100  ep_config.set_onset_confirm_dur(0.075f);
101  ep_config.set_on_maintain_dur(0.10f);
102  ep_config.set_offset_confirm_dur(0.12f);
103  ep_config.set_decision_threshold(100.0f);
104  EnergyEndpointer endpointer;
105  endpointer.Init(ep_config);
106
107  endpointer.StartSession();
108
109  EnergyEndpointerFrameProcessor frame_processor(&endpointer);
110  RunEndpointerEventsTest(&frame_processor);
111
112  endpointer.EndSession();
113};
114
115// Test endpointer wrapper class.
116class EndpointerFrameProcessor : public FrameProcessor {
117 public:
118  explicit EndpointerFrameProcessor(Endpointer* endpointer)
119      : endpointer_(endpointer) {}
120
121  virtual EpStatus ProcessFrame(int64 time,
122                                int16* samples,
123                                int frame_size) OVERRIDE {
124    scoped_refptr<AudioChunk> frame(
125        new AudioChunk(reinterpret_cast<uint8*>(samples), kFrameSize * 2, 2));
126    endpointer_->ProcessAudio(*frame.get(), NULL);
127    int64 ep_time;
128    return endpointer_->Status(&ep_time);
129  }
130
131 private:
132  Endpointer* endpointer_;
133};
134
135TEST(EndpointerTest, TestEmbeddedEndpointerEvents) {
136  const int kSampleRate = 8000;  // 8 k samples per second for AMR encoding.
137
138  Endpointer endpointer(kSampleRate);
139  const int64 kMillisecondsPerMicrosecond = 1000;
140  const int64 short_timeout = 300 * kMillisecondsPerMicrosecond;
141  endpointer.set_speech_input_possibly_complete_silence_length(short_timeout);
142  const int64 long_timeout = 500 * kMillisecondsPerMicrosecond;
143  endpointer.set_speech_input_complete_silence_length(long_timeout);
144  endpointer.StartSession();
145
146  EndpointerFrameProcessor frame_processor(&endpointer);
147  RunEndpointerEventsTest(&frame_processor);
148
149  endpointer.EndSession();
150}
151
152}  // namespace content
153