sndhdr.py revision 0de65807e6bdc5254f5a7e99b2f39adeea6b883b
1"""Routines to help recognizing sound files.
2
3Function whathdr() recognizes various types of sound file headers.
4It understands almost all headers that SOX can decode.
5
6The return tuple contains the following items, in this order:
7- file type (as SOX understands it)
8- sampling rate (0 if unknown or hard to decode)
9- number of channels (0 if unknown or hard to decode)
10- number of frames in the file (-1 if unknown or hard to decode)
11- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW
12
13If the file doesn't have a recognizable type, it returns None.
14If the file can't be opened, IOError is raised.
15
16To compute the total time, divide the number of frames by the
17sampling rate (a frame contains a sample for each channel).
18
19Function what() calls whathdr().  (It used to also use some
20heuristics for raw data, but this doesn't work very well.)
21
22Finally, the function test() is a simple main program that calls
23what() for all files mentioned on the argument list.  For directory
24arguments it calls what() for all files in that directory.  Default
25argument is "." (testing all files in the current directory).  The
26option -r tells it to recurse down directories found inside
27explicitly given directories.
28"""
29
30# The file structure is top-down except that the test program and its
31# subroutine come last.
32
33__all__ = ["what","whathdr"]
34
35def what(filename):
36    """Guess the type of a sound file"""
37    res = whathdr(filename)
38    return res
39
40
41def whathdr(filename):
42    """Recognize sound headers"""
43    f = open(filename, 'r')
44    h = f.read(512)
45    for tf in tests:
46        res = tf(h, f)
47        if res:
48            return res
49    return None
50
51
52#-----------------------------------#
53# Subroutines per sound header type #
54#-----------------------------------#
55
56tests = []
57
58def test_aifc(h, f):
59    import aifc
60    if h[:4] != 'FORM':
61        return None
62    if h[8:12] == 'AIFC':
63        fmt = 'aifc'
64    elif h[8:12] == 'AIFF':
65        fmt = 'aiff'
66    else:
67        return None
68    f.seek(0)
69    try:
70        a = aifc.openfp(f, 'r')
71    except (EOFError, aifc.Error):
72        return None
73    return (fmt, a.getframerate(), a.getnchannels(), \
74            a.getnframes(), 8*a.getsampwidth())
75
76tests.append(test_aifc)
77
78
79def test_au(h, f):
80    if h[:4] == '.snd':
81        f = get_long_be
82    elif h[:4] in ('\0ds.', 'dns.'):
83        f = get_long_le
84    else:
85        return None
86    type = 'au'
87    hdr_size = f(h[4:8])
88    data_size = f(h[8:12])
89    encoding = f(h[12:16])
90    rate = f(h[16:20])
91    nchannels = f(h[20:24])
92    sample_size = 1 # default
93    if encoding == 1:
94        sample_bits = 'U'
95    elif encoding == 2:
96        sample_bits = 8
97    elif encoding == 3:
98        sample_bits = 16
99        sample_size = 2
100    else:
101        sample_bits = '?'
102    frame_size = sample_size * nchannels
103    return type, rate, nchannels, data_size/frame_size, sample_bits
104
105tests.append(test_au)
106
107
108def test_hcom(h, f):
109    if h[65:69] != 'FSSD' or h[128:132] != 'HCOM':
110        return None
111    divisor = get_long_be(h[128+16:128+20])
112    return 'hcom', 22050/divisor, 1, -1, 8
113
114tests.append(test_hcom)
115
116
117def test_voc(h, f):
118    if h[:20] != 'Creative Voice File\032':
119        return None
120    sbseek = get_short_le(h[20:22])
121    rate = 0
122    if 0 <= sbseek < 500 and h[sbseek] == '\1':
123        ratecode = ord(h[sbseek+4])
124        rate = int(1000000.0 / (256 - ratecode))
125    return 'voc', rate, 1, -1, 8
126
127tests.append(test_voc)
128
129
130def test_wav(h, f):
131    # 'RIFF' <len> 'WAVE' 'fmt ' <len>
132    if h[:4] != 'RIFF' or h[8:12] != 'WAVE' or h[12:16] != 'fmt ':
133        return None
134    style = get_short_le(h[20:22])
135    nchannels = get_short_le(h[22:24])
136    rate = get_long_le(h[24:28])
137    sample_bits = get_short_le(h[34:36])
138    return 'wav', rate, nchannels, -1, sample_bits
139
140tests.append(test_wav)
141
142
143def test_8svx(h, f):
144    if h[:4] != 'FORM' or h[8:12] != '8SVX':
145        return None
146    # Should decode it to get #channels -- assume always 1
147    return '8svx', 0, 1, 0, 8
148
149tests.append(test_8svx)
150
151
152def test_sndt(h, f):
153    if h[:5] == 'SOUND':
154        nsamples = get_long_le(h[8:12])
155        rate = get_short_le(h[20:22])
156        return 'sndt', rate, 1, nsamples, 8
157
158tests.append(test_sndt)
159
160
161def test_sndr(h, f):
162    if h[:2] == '\0\0':
163        rate = get_short_le(h[2:4])
164        if 4000 <= rate <= 25000:
165            return 'sndr', rate, 1, -1, 8
166
167tests.append(test_sndr)
168
169
170#---------------------------------------------#
171# Subroutines to extract numbers from strings #
172#---------------------------------------------#
173
174def get_long_be(s):
175    return (ord(s[0])<<24) | (ord(s[1])<<16) | (ord(s[2])<<8) | ord(s[3])
176
177def get_long_le(s):
178    return (ord(s[3])<<24) | (ord(s[2])<<16) | (ord(s[1])<<8) | ord(s[0])
179
180def get_short_be(s):
181    return (ord(s[0])<<8) | ord(s[1])
182
183def get_short_le(s):
184    return (ord(s[1])<<8) | ord(s[0])
185
186
187#--------------------#
188# Small test program #
189#--------------------#
190
191def test():
192    import sys
193    recursive = 0
194    if sys.argv[1:] and sys.argv[1] == '-r':
195        del sys.argv[1:2]
196        recursive = 1
197    try:
198        if sys.argv[1:]:
199            testall(sys.argv[1:], recursive, 1)
200        else:
201            testall(['.'], recursive, 1)
202    except KeyboardInterrupt:
203        sys.stderr.write('\n[Interrupted]\n')
204        sys.exit(1)
205
206def testall(list, recursive, toplevel):
207    import sys
208    import os
209    for filename in list:
210        if os.path.isdir(filename):
211            print filename + '/:',
212            if recursive or toplevel:
213                print 'recursing down:'
214                import glob
215                names = glob.glob(os.path.join(filename, '*'))
216                testall(names, recursive, 0)
217            else:
218                print '*** directory (use -r) ***'
219        else:
220            print filename + ':',
221            sys.stdout.flush()
222            try:
223                print what(filename)
224            except IOError:
225                print '*** not found ***'
226
227if __name__ == '__main__':
228    test()
229