1348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang#!/usr/bin/env python
217a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang# Copyright 2014 The Chromium OS Authors. All rights reserved.
3348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# Use of this source code is governed by a BSD-style license that can be
4348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# found in the LICENSE file.
5348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
6348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""This module provides abstraction of audio data."""
7348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
8348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport contextlib
917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangimport copy
10443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiangimport numpy as np
11348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport struct
12348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport StringIO
13348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
14348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
15348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""The dict containing information on how to parse sample from raw data.
16348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
17348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangKeys: The sample format as in aplay command.
18348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangValues: A dict containing:
19348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    message: Human-readable sample format.
20348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    struct_format: Format used in struct.unpack.
21348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    size_bytes: Number of bytes for one sample.
22348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""
23348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangSAMPLE_FORMATS = dict(
24348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        S32_LE=dict(
25348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                message='Signed 32-bit integer, little-endian',
26348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                struct_format='<i',
27348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                size_bytes=4),
28348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        S16_LE=dict(
29348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                message='Signed 16-bit integer, little-endian',
30348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                struct_format='<h',
31348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                size_bytes=2))
32348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
33348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
348302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiangdef get_maximum_value_from_sample_format(sample_format):
358302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    """Gets the maximum value from sample format.
368302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
378302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    @param sample_format: A key in SAMPLE_FORMAT.
388302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
398302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    @returns: The maximum value the sample can hold + 1.
408302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
418302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    """
428302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    size_bits = SAMPLE_FORMATS[sample_format]['size_bytes'] * 8
438302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang    return 1 << (size_bits - 1)
448302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
458302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang
4617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangclass AudioRawDataError(Exception):
4717a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang    """Error in AudioRawData."""
4817a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang    pass
4917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang
5017a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang
51348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangclass AudioRawData(object):
52348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    """The abstraction of audio raw data.
53348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
54348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property channel: The number of channels.
55348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property channel_data: A list of lists containing samples in each channel.
56348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                            E.g., The third sample in the second channel is
57348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                            channel_data[1][2].
58348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    @property sample_format: The sample format which should be one of the keys
59348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang                             in audio_data.SAMPLE_FORMATS.
60348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    """
61348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    def __init__(self, binary, channel, sample_format):
62348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """Initializes an AudioRawData.
63348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
6417a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang        @param binary: A string containing binary data. If binary is not None,
6517a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang                       The samples in binary will be parsed and be filled into
6617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang                       channel_data.
67348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param channel: The number of channels.
68348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param sample_format: One of the keys in audio_data.SAMPLE_FORMATS.
69348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """
70348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.channel = channel
71348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.channel_data = [[] for _ in xrange(self.channel)]
72348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        self.sample_format = sample_format
7317a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang        if binary:
7417a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang            self.read_binary(binary)
75348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
76348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
77348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang    def read_binary(self, binary):
78348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """Reads samples from binary and fills channel_data.
79348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
80443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        Reads samples of fixed width from binary string into a numpy array
81443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        and shapes them into each channel.
82348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang
83348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        @param binary: A string containing binary data.
84348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang        """
85443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        sample_format_dict = SAMPLE_FORMATS[self.sample_format]
86443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang
87443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # The data type used in numpy fromstring function. For example,
88443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # <i4 for 32-bit signed int.
89443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        np_dtype = '%s%d' % (sample_format_dict['struct_format'],
90443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang                             sample_format_dict['size_bytes'])
91443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang
92443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Reads data from a string into 1-D array.
93443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        np_array = np.fromstring(binary, dtype=np_dtype)
94443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        n_frames = len(np_array) / self.channel
95443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Reshape np_array into an array of shape (n_frames, channel).
96443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        np_array = np_array.reshape(n_frames, self.channel)
97443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        # Transpose np_arrya so it becomes of shape (channel, n_frames).
98443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang        self.channel_data = np_array.transpose()
99