1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/speech/audio_encoder.h"
6
7#include "base/basictypes.h"
8#include "base/logging.h"
9#include "base/memory/scoped_ptr.h"
10#include "base/stl_util.h"
11#include "base/strings/string_number_conversions.h"
12#include "content/browser/speech/audio_buffer.h"
13#include "third_party/flac/include/FLAC/stream_encoder.h"
14#include "third_party/speex/include/speex/speex.h"
15
16namespace content {
17namespace {
18
19//-------------------------------- FLACEncoder ---------------------------------
20
21const char* const kContentTypeFLAC = "audio/x-flac; rate=";
22const int kFLACCompressionLevel = 0;  // 0 for speed
23
24class FLACEncoder : public AudioEncoder {
25 public:
26  FLACEncoder(int sampling_rate, int bits_per_sample);
27  virtual ~FLACEncoder();
28  virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
29  virtual void Flush() OVERRIDE;
30
31 private:
32  static FLAC__StreamEncoderWriteStatus WriteCallback(
33      const FLAC__StreamEncoder* encoder,
34      const FLAC__byte buffer[],
35      size_t bytes,
36      unsigned samples,
37      unsigned current_frame,
38      void* client_data);
39
40  FLAC__StreamEncoder* encoder_;
41  bool is_encoder_initialized_;
42
43  DISALLOW_COPY_AND_ASSIGN(FLACEncoder);
44};
45
46FLAC__StreamEncoderWriteStatus FLACEncoder::WriteCallback(
47    const FLAC__StreamEncoder* encoder,
48    const FLAC__byte buffer[],
49    size_t bytes,
50    unsigned samples,
51    unsigned current_frame,
52    void* client_data) {
53  FLACEncoder* me = static_cast<FLACEncoder*>(client_data);
54  DCHECK(me->encoder_ == encoder);
55  me->encoded_audio_buffer_.Enqueue(buffer, bytes);
56  return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
57}
58
59FLACEncoder::FLACEncoder(int sampling_rate, int bits_per_sample)
60    : AudioEncoder(std::string(kContentTypeFLAC) +
61                   base::IntToString(sampling_rate),
62                   bits_per_sample),
63      encoder_(FLAC__stream_encoder_new()),
64      is_encoder_initialized_(false) {
65  FLAC__stream_encoder_set_channels(encoder_, 1);
66  FLAC__stream_encoder_set_bits_per_sample(encoder_, bits_per_sample);
67  FLAC__stream_encoder_set_sample_rate(encoder_, sampling_rate);
68  FLAC__stream_encoder_set_compression_level(encoder_, kFLACCompressionLevel);
69
70  // Initializing the encoder will cause sync bytes to be written to
71  // its output stream, so we wait until the first call to this method
72  // before doing so.
73}
74
75FLACEncoder::~FLACEncoder() {
76  FLAC__stream_encoder_delete(encoder_);
77}
78
79void FLACEncoder::Encode(const AudioChunk& raw_audio) {
80  DCHECK_EQ(raw_audio.bytes_per_sample(), 2);
81  if (!is_encoder_initialized_) {
82    const FLAC__StreamEncoderInitStatus encoder_status =
83        FLAC__stream_encoder_init_stream(encoder_, WriteCallback, NULL, NULL,
84                                         NULL, this);
85    DCHECK_EQ(encoder_status, FLAC__STREAM_ENCODER_INIT_STATUS_OK);
86    is_encoder_initialized_ = true;
87  }
88
89  // FLAC encoder wants samples as int32s.
90  const int num_samples = raw_audio.NumSamples();
91  scoped_ptr<FLAC__int32[]> flac_samples(new FLAC__int32[num_samples]);
92  FLAC__int32* flac_samples_ptr = flac_samples.get();
93  for (int i = 0; i < num_samples; ++i)
94    flac_samples_ptr[i] = static_cast<FLAC__int32>(raw_audio.GetSample16(i));
95
96  FLAC__stream_encoder_process(encoder_, &flac_samples_ptr, num_samples);
97}
98
99void FLACEncoder::Flush() {
100  FLAC__stream_encoder_finish(encoder_);
101}
102
103//-------------------------------- SpeexEncoder --------------------------------
104
105const char* const kContentTypeSpeex = "audio/x-speex-with-header-byte; rate=";
106const int kSpeexEncodingQuality = 8;
107const int kMaxSpeexFrameLength = 110;  // (44kbps rate sampled at 32kHz).
108
109// Since the frame length gets written out as a byte in the encoded packet,
110// make sure it is within the byte range.
111COMPILE_ASSERT(kMaxSpeexFrameLength <= 0xFF, invalidLength);
112
113class SpeexEncoder : public AudioEncoder {
114 public:
115  explicit SpeexEncoder(int sampling_rate, int bits_per_sample);
116  virtual ~SpeexEncoder();
117  virtual void Encode(const AudioChunk& raw_audio) OVERRIDE;
118  virtual void Flush() OVERRIDE {}
119
120 private:
121  void* encoder_state_;
122  SpeexBits bits_;
123  int samples_per_frame_;
124  char encoded_frame_data_[kMaxSpeexFrameLength + 1];  // +1 for the frame size.
125  DISALLOW_COPY_AND_ASSIGN(SpeexEncoder);
126};
127
128SpeexEncoder::SpeexEncoder(int sampling_rate, int bits_per_sample)
129    : AudioEncoder(std::string(kContentTypeSpeex) +
130                   base::IntToString(sampling_rate),
131                   bits_per_sample) {
132   // speex_bits_init() does not initialize all of the |bits_| struct.
133   memset(&bits_, 0, sizeof(bits_));
134   speex_bits_init(&bits_);
135   encoder_state_ = speex_encoder_init(&speex_wb_mode);
136   DCHECK(encoder_state_);
137   speex_encoder_ctl(encoder_state_, SPEEX_GET_FRAME_SIZE, &samples_per_frame_);
138   DCHECK(samples_per_frame_ > 0);
139   int quality = kSpeexEncodingQuality;
140   speex_encoder_ctl(encoder_state_, SPEEX_SET_QUALITY, &quality);
141   int vbr = 1;
142   speex_encoder_ctl(encoder_state_, SPEEX_SET_VBR, &vbr);
143   memset(encoded_frame_data_, 0, sizeof(encoded_frame_data_));
144}
145
146SpeexEncoder::~SpeexEncoder() {
147  speex_bits_destroy(&bits_);
148  speex_encoder_destroy(encoder_state_);
149}
150
151void SpeexEncoder::Encode(const AudioChunk& raw_audio) {
152  spx_int16_t* src_buffer =
153      const_cast<spx_int16_t*>(raw_audio.SamplesData16());
154  int num_samples = raw_audio.NumSamples();
155  // Drop incomplete frames, typically those which come in when recording stops.
156  num_samples -= (num_samples % samples_per_frame_);
157  for (int i = 0; i < num_samples; i += samples_per_frame_) {
158    speex_bits_reset(&bits_);
159    speex_encode_int(encoder_state_, src_buffer + i, &bits_);
160
161    // Encode the frame and place the size of the frame as the first byte. This
162    // is the packet format for MIME type x-speex-with-header-byte.
163    int frame_length = speex_bits_write(&bits_, encoded_frame_data_ + 1,
164                                        kMaxSpeexFrameLength);
165    encoded_frame_data_[0] = static_cast<char>(frame_length);
166    encoded_audio_buffer_.Enqueue(
167        reinterpret_cast<uint8*>(&encoded_frame_data_[0]), frame_length + 1);
168  }
169}
170
171}  // namespace
172
173AudioEncoder* AudioEncoder::Create(Codec codec,
174                                   int sampling_rate,
175                                   int bits_per_sample) {
176  if (codec == CODEC_FLAC)
177    return new FLACEncoder(sampling_rate, bits_per_sample);
178  return new SpeexEncoder(sampling_rate, bits_per_sample);
179}
180
181AudioEncoder::AudioEncoder(const std::string& mime_type, int bits_per_sample)
182    : encoded_audio_buffer_(1), /* Byte granularity of encoded samples. */
183      mime_type_(mime_type),
184      bits_per_sample_(bits_per_sample) {
185}
186
187AudioEncoder::~AudioEncoder() {
188}
189
190scoped_refptr<AudioChunk> AudioEncoder::GetEncodedDataAndClear() {
191  return encoded_audio_buffer_.DequeueAll();
192}
193
194}  // namespace content
195