1348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang#!/usr/bin/env python
217a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang# Copyright 2014 The Chromium OS Authors. All rights reserved.
3348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# Use of this source code is governed by a BSD-style license that can be
4348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# found in the LICENSE file.
5348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
6348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""This module provides abstraction of audio data."""
7348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
8348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport contextlib
917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangimport copy
10443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiangimport numpy as np
11348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport struct
12348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport StringIO
13348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
14348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
15348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""The dict containing information on how to parse sample from raw data.
16348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
17348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangKeys: The sample format as in aplay command.
18348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangValues: A dict containing:
19348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    message: Human-readable sample format.
20a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang    dtype_str: Data type used in numpy dtype.  Check
21a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang               https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html
22a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang               for supported data type.
23348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    size_bytes: Number of bytes for one sample.
24348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""
25348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangSAMPLE_FORMATS = dict(
26348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        S32_LE=dict(
27348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                message='Signed 32-bit integer, little-endian',
28a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang                dtype_str='<i',
29348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                size_bytes=4),
30348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        S16_LE=dict(
31348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                message='Signed 16-bit integer, little-endian',
32a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang                dtype_str='<i',
33348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                size_bytes=2))
34348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
35348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
368302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiangdef get_maximum_value_from_sample_format(sample_format):
378302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    """Gets the maximum value from sample format.
388302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
398302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    @param sample_format: A key in SAMPLE_FORMAT.
408302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
418302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    @returns: The maximum value the sample can hold + 1.
428302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
438302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    """
448302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    size_bits = SAMPLE_FORMATS[sample_format]['size_bytes'] * 8
458302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    return 1 << (size_bits - 1)
468302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
478302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
4817a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangclass AudioRawDataError(Exception):
4917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang    """Error in AudioRawData."""
5017a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang    pass
5117a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang
5217a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang
53348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangclass AudioRawData(object):
54348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    """The abstraction of audio raw data.
55348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
56348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property channel: The number of channels.
57348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property channel_data: A list of lists containing samples in each channel.
58348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                            E.g., The third sample in the second channel is
59348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                            channel_data[1][2].
60348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property sample_format: The sample format which should be one of the keys
61348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                             in audio_data.SAMPLE_FORMATS.
62348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    """
63348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    def __init__(self, binary, channel, sample_format):
64348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """Initializes an AudioRawData.
65348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
6617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang        @param binary: A string containing binary data. If binary is not None,
6717a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang                       The samples in binary will be parsed and be filled into
6817a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang                       channel_data.
69348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param channel: The number of channels.
70348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param sample_format: One of the keys in audio_data.SAMPLE_FORMATS.
71348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """
72348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.channel = channel
73348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.channel_data = [[] for _ in xrange(self.channel)]
74348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.sample_format = sample_format
7517a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang        if binary:
7617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang            self.read_binary(binary)
77348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
78348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
79348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    def read_binary(self, binary):
80348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """Reads samples from binary and fills channel_data.
81348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
82443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        Reads samples of fixed width from binary string into a numpy array
83443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        and shapes them into each channel.
84348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
85348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param binary: A string containing binary data.
86348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """
87443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        sample_format_dict = SAMPLE_FORMATS[self.sample_format]
88443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang
89443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # The data type used in numpy fromstring function. For example,
90443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # <i4 for 32-bit signed int.
91a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang        np_dtype = '%s%d' % (sample_format_dict['dtype_str'],
92443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang                             sample_format_dict['size_bytes'])
93443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang
94443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Reads data from a string into 1-D array.
95443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        np_array = np.fromstring(binary, dtype=np_dtype)
96443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        n_frames = len(np_array) / self.channel
97443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Reshape np_array into an array of shape (n_frames, channel).
98443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        np_array = np_array.reshape(n_frames, self.channel)
99443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Transpose np_arrya so it becomes of shape (channel, n_frames).
100443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        self.channel_data = np_array.transpose()
101