1348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang#!/usr/bin/env python 217a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang# Copyright 2014 The Chromium OS Authors. All rights reserved. 3348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# Use of this source code is governed by a BSD-style license that can be 4348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang# found in the LICENSE file. 5348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 6348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""This module provides abstraction of audio data.""" 7348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 8348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport contextlib 917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangimport copy 10443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiangimport numpy as np 11348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport struct 12348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangimport StringIO 13348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 14348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 15348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang"""The dict containing information on how to parse sample from raw data. 16348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 17348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangKeys: The sample format as in aplay command. 18348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangValues: A dict containing: 19348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang message: Human-readable sample format. 20a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang dtype_str: Data type used in numpy dtype. Check 21a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang https://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html 22a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang for supported data type. 23348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang size_bytes: Number of bytes for one sample. 24348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang""" 25348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi ChiangSAMPLE_FORMATS = dict( 26348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang S32_LE=dict( 27348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang message='Signed 32-bit integer, little-endian', 28a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang dtype_str='<i', 29348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang size_bytes=4), 30348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang S16_LE=dict( 31348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang message='Signed 16-bit integer, little-endian', 32a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang dtype_str='<i', 33348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang size_bytes=2)) 34348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 35348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 368302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiangdef get_maximum_value_from_sample_format(sample_format): 378302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang """Gets the maximum value from sample format. 388302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang 398302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang @param sample_format: A key in SAMPLE_FORMAT. 408302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang 418302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang @returns: The maximum value the sample can hold + 1. 428302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang 438302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang """ 448302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang size_bits = SAMPLE_FORMATS[sample_format]['size_bytes'] * 8 458302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang return 1 << (size_bits - 1) 468302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang 478302dbfb793ff7b5686b720b27ec4207b68a761dCheng-Yi Chiang 4817a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiangclass AudioRawDataError(Exception): 4917a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang """Error in AudioRawData.""" 5017a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang pass 5117a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang 5217a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang 53348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiangclass AudioRawData(object): 54348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """The abstraction of audio raw data. 55348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 56348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @property channel: The number of channels. 57348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @property channel_data: A list of lists containing samples in each channel. 58348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang E.g., The third sample in the second channel is 59348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang channel_data[1][2]. 60348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @property sample_format: The sample format which should be one of the keys 61348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang in audio_data.SAMPLE_FORMATS. 62348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """ 63348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang def __init__(self, binary, channel, sample_format): 64348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """Initializes an AudioRawData. 65348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 6617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang @param binary: A string containing binary data. If binary is not None, 6717a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang The samples in binary will be parsed and be filled into 6817a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang channel_data. 69348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @param channel: The number of channels. 70348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @param sample_format: One of the keys in audio_data.SAMPLE_FORMATS. 71348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """ 72348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang self.channel = channel 73348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang self.channel_data = [[] for _ in xrange(self.channel)] 74348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang self.sample_format = sample_format 7517a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang if binary: 7617a2527ea44ed461cec2cf62577079f59e00a9b3Cheng-Yi Chiang self.read_binary(binary) 77348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 78348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 79348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang def read_binary(self, binary): 80348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """Reads samples from binary and fills channel_data. 81348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 82443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang Reads samples of fixed width from binary string into a numpy array 83443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang and shapes them into each channel. 84348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang 85348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang @param binary: A string containing binary data. 86348270a6c87f6a50bd02f5f7f36f33bb89ef39e3Cheng-Yi Chiang """ 87443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang sample_format_dict = SAMPLE_FORMATS[self.sample_format] 88443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang 89443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang # The data type used in numpy fromstring function. For example, 90443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang # <i4 for 32-bit signed int. 91a2ccb016ea760e1ac72773ec7320e5aa4ba8728fCheng-Yi Chiang np_dtype = '%s%d' % (sample_format_dict['dtype_str'], 92443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang sample_format_dict['size_bytes']) 93443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang 94443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang # Reads data from a string into 1-D array. 95443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang np_array = np.fromstring(binary, dtype=np_dtype) 96443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang n_frames = len(np_array) / self.channel 97443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang # Reshape np_array into an array of shape (n_frames, channel). 98443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang np_array = np_array.reshape(n_frames, self.channel) 99443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang # Transpose np_arrya so it becomes of shape (channel, n_frames). 100443d9083210bd0acd94e1e38153eb1d3a5312f90Cheng-Yi Chiang self.channel_data = np_array.transpose() 101