1c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  libFLAC - Free Lossless Audio Codec library
2c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  Copyright (C) 2004,2005,2006,2007  Josh Coalson
3c74663799493f2b1e6123c18def94295d0afab7Kenny Root#
4c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  Redistribution and use in source and binary forms, with or without
5c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  modification, are permitted provided that the following conditions
6c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  are met:
7c74663799493f2b1e6123c18def94295d0afab7Kenny Root#
8c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  - Redistributions of source code must retain the above copyright
9c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  notice, this list of conditions and the following disclaimer.
10c74663799493f2b1e6123c18def94295d0afab7Kenny Root#
11c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  - Redistributions in binary form must reproduce the above copyright
12c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  notice, this list of conditions and the following disclaimer in the
13c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  documentation and/or other materials provided with the distribution.
14c74663799493f2b1e6123c18def94295d0afab7Kenny Root#
15c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  - Neither the name of the Xiph.org Foundation nor the names of its
16c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  contributors may be used to endorse or promote products derived from
17c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  this software without specific prior written permission.
18c74663799493f2b1e6123c18def94295d0afab7Kenny Root#
19c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
23c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29c74663799493f2b1e6123c18def94295d0afab7Kenny Root#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30c74663799493f2b1e6123c18def94295d0afab7Kenny Root
31c74663799493f2b1e6123c18def94295d0afab7Kenny Root.text
32c74663799493f2b1e6123c18def94295d0afab7Kenny Root	.align 2
33c74663799493f2b1e6123c18def94295d0afab7Kenny Root.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
34c74663799493f2b1e6123c18def94295d0afab7Kenny Root.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16, @function
35c74663799493f2b1e6123c18def94295d0afab7Kenny Root
36c74663799493f2b1e6123c18def94295d0afab7Kenny Root.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
37c74663799493f2b1e6123c18def94295d0afab7Kenny Root.type _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8, @function
38c74663799493f2b1e6123c18def94295d0afab7Kenny Root
39c74663799493f2b1e6123c18def94295d0afab7Kenny Root_FLAC__lpc_restore_signal_asm_ppc_altivec_16:
40c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r3: residual[]
41c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r4: data_len
42c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r5: qlp_coeff[]
43c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r6: order
44c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r7: lp_quantization
45c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r8: data[]
46c74663799493f2b1e6123c18def94295d0afab7Kenny Root
47c74663799493f2b1e6123c18def94295d0afab7Kenny Root# see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
48c74663799493f2b1e6123c18def94295d0afab7Kenny Root# these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
49c74663799493f2b1e6123c18def94295d0afab7Kenny Root# bps<=15 for mid-side coding, since that uses an extra bit)
50c74663799493f2b1e6123c18def94295d0afab7Kenny Root
51c74663799493f2b1e6123c18def94295d0afab7Kenny Root# these should be fast; the inner loop is unrolled (it takes no more than
52c74663799493f2b1e6123c18def94295d0afab7Kenny Root# 3*(order%4) instructions, all of which are arithmetic), and all of the
53c74663799493f2b1e6123c18def94295d0afab7Kenny Root# coefficients and all relevant history stay in registers, so the outer loop
54c74663799493f2b1e6123c18def94295d0afab7Kenny Root# has only one load from memory (the residual)
55c74663799493f2b1e6123c18def94295d0afab7Kenny Root
56c74663799493f2b1e6123c18def94295d0afab7Kenny Root# I have not yet run this through simg4, so there may be some avoidable stalls,
57c74663799493f2b1e6123c18def94295d0afab7Kenny Root# and there may be a somewhat more clever way to do the outer loop
58c74663799493f2b1e6123c18def94295d0afab7Kenny Root
59c74663799493f2b1e6123c18def94295d0afab7Kenny Root# the branch mechanism may prevent dynamic loading; I still need to examine
60c74663799493f2b1e6123c18def94295d0afab7Kenny Root# this issue, and there may be a more elegant method
61c74663799493f2b1e6123c18def94295d0afab7Kenny Root
62c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stmw r31,-4(r1)
63c74663799493f2b1e6123c18def94295d0afab7Kenny Root
64c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r9,r1,-28
65c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xf
66c74663799493f2b1e6123c18def94295d0afab7Kenny Root	andc r9,r9,r31 # for quadword-aligned stack data
67c74663799493f2b1e6123c18def94295d0afab7Kenny Root
68c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r6,r6,2 # adjust for word size
69c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r4,r4,2
70c74663799493f2b1e6123c18def94295d0afab7Kenny Root	add r4,r4,r8 # r4 = data+data_len
71c74663799493f2b1e6123c18def94295d0afab7Kenny Root
72c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mfspr r0,256 # cache old vrsave
73c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addis r31,0,0xffff
74c74663799493f2b1e6123c18def94295d0afab7Kenny Root	ori r31,r31,0xfc00
75c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtspr 256,r31 # declare VRs in vrsave
76c74663799493f2b1e6123c18def94295d0afab7Kenny Root
77c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r8,r4 # i<data_len
78c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 4,0,L1400
79c74663799493f2b1e6123c18def94295d0afab7Kenny Root
80c74663799493f2b1e6123c18def94295d0afab7Kenny Root	# load coefficients into v0-v7 and initial history into v8-v15
81c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xf
82c74663799493f2b1e6123c18def94295d0afab7Kenny Root	and r31,r8,r31 # r31: data%4
83c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r11,16
84c74663799493f2b1e6123c18def94295d0afab7Kenny Root	subf r31,r31,r11 # r31: 4-(data%4)
85c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r31,r31,3 # convert to bits for vsro
86c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-4
87c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stw r31,-4(r9)
88c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v0,r10,r9
89c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v18,-1
90c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsro v18,v18,v0 # v18: mask vector
91c74663799493f2b1e6123c18def94295d0afab7Kenny Root
92c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0x8
93c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v0,0,r31
94c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v0,v0,v0,12
95c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xc
96c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v1,0,r31
97c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v2,0
98c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v3,-1
99c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmrglw v2,v2,v3
100c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsel v0,v1,v0,v2 # v0: reversal permutation vector
101c74663799493f2b1e6123c18def94295d0afab7Kenny Root
102c74663799493f2b1e6123c18def94295d0afab7Kenny Root	add r10,r5,r6
103c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v17,0,r5 # v17: coefficient alignment permutation vector
104c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v17,v17,v17,v0 # v17: reversal coefficient alignment permutation vector
105c74663799493f2b1e6123c18def94295d0afab7Kenny Root
106c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mr r11,r8
107c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v16,0,r11 # v16: history alignment permutation vector
108c74663799493f2b1e6123c18def94295d0afab7Kenny Root
109c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v0,0,r5
110c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
111c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v1,0,r5
112c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v0,v0,v1,v17
113c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v8,0,r11
114c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
115c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v9,0,r11
116c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v8,v9,v8,v16
117c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
118c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1101
119c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v0,v0,v18
120c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addis r31,0,L1307@ha
121c74663799493f2b1e6123c18def94295d0afab7Kenny Root	ori r31,r31,L1307@l
122c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
123c74663799493f2b1e6123c18def94295d0afab7Kenny Root
124c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1101:
125c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
126c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v2,0,r5
127c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v1,v1,v2,v17
128c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
129c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v10,0,r11
130c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v9,v10,v9,v16
131c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
132c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1102
133c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v1,v1,v18
134c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addis r31,0,L1306@ha
135c74663799493f2b1e6123c18def94295d0afab7Kenny Root	ori r31,r31,L1306@l
136c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
137c74663799493f2b1e6123c18def94295d0afab7Kenny Root
138c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1102:
139c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
140c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v3,0,r5
141c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v2,v2,v3,v17
142c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
143c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v11,0,r11
144c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v10,v11,v10,v16
145c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
146c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1103
147c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v2,v2,v18
148c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1305@ha
149c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1305@l(r31)
150c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
151c74663799493f2b1e6123c18def94295d0afab7Kenny Root
152c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1103:
153c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
154c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v4,0,r5
155c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v3,v3,v4,v17
156c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
157c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v12,0,r11
158c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v11,v12,v11,v16
159c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
160c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1104
161c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v3,v3,v18
162c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1304@ha
163c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1304@l(r31)
164c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
165c74663799493f2b1e6123c18def94295d0afab7Kenny Root
166c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1104:
167c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
168c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v5,0,r5
169c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v4,v4,v5,v17
170c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
171c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v13,0,r11
172c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v12,v13,v12,v16
173c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
174c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1105
175c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v4,v4,v18
176c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1303@ha
177c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1303@l(r31)
178c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
179c74663799493f2b1e6123c18def94295d0afab7Kenny Root
180c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1105:
181c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
182c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v6,0,r5
183c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v5,v5,v6,v17
184c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
185c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v14,0,r11
186c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v13,v14,v13,v16
187c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
188c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1106
189c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v5,v5,v18
190c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1302@ha
191c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1302@l(r31)
192c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
193c74663799493f2b1e6123c18def94295d0afab7Kenny Root
194c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1106:
195c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
196c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v7,0,r5
197c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v6,v6,v7,v17
198c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
199c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v15,0,r11
200c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v14,v15,v14,v16
201c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
202c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1107
203c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v6,v6,v18
204c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1301@ha
205c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1301@l(r31)
206c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L1199
207c74663799493f2b1e6123c18def94295d0afab7Kenny Root
208c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1107:
209c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
210c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v19,0,r5
211c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v7,v7,v19,v17
212c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
213c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v19,0,r11
214c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v15,v19,v15,v16
215c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v7,v7,v18
216c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L1300@ha
217c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L1300@l(r31)
218c74663799493f2b1e6123c18def94295d0afab7Kenny Root
219c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1199:
220c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtctr r31
221c74663799493f2b1e6123c18def94295d0afab7Kenny Root
222c74663799493f2b1e6123c18def94295d0afab7Kenny Root	# set up invariant vectors
223c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltish v16,0 # v16: zero vector
224c74663799493f2b1e6123c18def94295d0afab7Kenny Root
225c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-12
226c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsr v17,r10,r8 # v17: result shift vector
227c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v18,r10,r3 # v18: residual shift back vector
228c74663799493f2b1e6123c18def94295d0afab7Kenny Root
229c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-4
230c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stw r7,-4(r9)
231c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v19,r10,r9 # v19: lp_quantization vector
232c74663799493f2b1e6123c18def94295d0afab7Kenny Root
233c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1200:
234c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v20,v0,v8 # v20: sum vector
235c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bcctr 20,0
236c74663799493f2b1e6123c18def94295d0afab7Kenny Root
237c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1300:
238c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v7,v15
239c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v15,v15,v14,4 # increment history
240c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
241c74663799493f2b1e6123c18def94295d0afab7Kenny Root
242c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1301:
243c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v6,v14
244c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v14,v14,v13,4
245c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
246c74663799493f2b1e6123c18def94295d0afab7Kenny Root
247c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1302:
248c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v5,v13
249c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v13,v13,v12,4
250c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
251c74663799493f2b1e6123c18def94295d0afab7Kenny Root
252c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1303:
253c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v4,v12
254c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v12,v12,v11,4
255c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
256c74663799493f2b1e6123c18def94295d0afab7Kenny Root
257c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1304:
258c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v3,v11
259c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v11,v11,v10,4
260c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
261c74663799493f2b1e6123c18def94295d0afab7Kenny Root
262c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1305:
263c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v2,v10
264c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v10,v10,v9,4
265c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
266c74663799493f2b1e6123c18def94295d0afab7Kenny Root
267c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1306:
268c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v21,v1,v9
269c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v9,v9,v8,4
270c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v20,v21
271c74663799493f2b1e6123c18def94295d0afab7Kenny Root
272c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1307:
273c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsumsws v20,v20,v16 # v20[3]: sum
274c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsraw v20,v20,v19 # v20[3]: sum >> lp_quantization
275c74663799493f2b1e6123c18def94295d0afab7Kenny Root
276c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v21,0,r3 # v21[n]: *residual
277c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v21,v21,v21,v18 # v21[3]: *residual
278c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v20,v21,v20 # v20[3]: *residual + (sum >> lp_quantization)
279c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v18,v18,v18,4 # increment shift vector
280c74663799493f2b1e6123c18def94295d0afab7Kenny Root
281c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v21,v20,v20,v17 # v21[n]: shift for storage
282c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v17,v17,v17,12 # increment shift vector
283c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stvewx v21,0,r8
284c74663799493f2b1e6123c18def94295d0afab7Kenny Root
285c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v20,v20,v20,12
286c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v8,v8,v20,4 # insert value onto history
287c74663799493f2b1e6123c18def94295d0afab7Kenny Root
288c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r3,r3,4
289c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r8,r8,4
290c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r8,r4 # i<data_len
291c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L1200
292c74663799493f2b1e6123c18def94295d0afab7Kenny Root
293c74663799493f2b1e6123c18def94295d0afab7Kenny RootL1400:
294c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtspr 256,r0 # restore old vrsave
295c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lmw r31,-4(r1)
296c74663799493f2b1e6123c18def94295d0afab7Kenny Root	blr
297c74663799493f2b1e6123c18def94295d0afab7Kenny Root
298c74663799493f2b1e6123c18def94295d0afab7Kenny Root_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
299c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r3: residual[]
300c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r4: data_len
301c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r5: qlp_coeff[]
302c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r6: order
303c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r7: lp_quantization
304c74663799493f2b1e6123c18def94295d0afab7Kenny Root#	r8: data[]
305c74663799493f2b1e6123c18def94295d0afab7Kenny Root
306c74663799493f2b1e6123c18def94295d0afab7Kenny Root# see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
307c74663799493f2b1e6123c18def94295d0afab7Kenny Root# this version assumes order<=8; it uses fewer vector registers, which should
308c74663799493f2b1e6123c18def94295d0afab7Kenny Root# save time in context switches, and has less code, which may improve
309c74663799493f2b1e6123c18def94295d0afab7Kenny Root# instruction caching
310c74663799493f2b1e6123c18def94295d0afab7Kenny Root
311c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stmw r31,-4(r1)
312c74663799493f2b1e6123c18def94295d0afab7Kenny Root
313c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r9,r1,-28
314c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xf
315c74663799493f2b1e6123c18def94295d0afab7Kenny Root	andc r9,r9,r31 # for quadword-aligned stack data
316c74663799493f2b1e6123c18def94295d0afab7Kenny Root
317c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r6,r6,2 # adjust for word size
318c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r4,r4,2
319c74663799493f2b1e6123c18def94295d0afab7Kenny Root	add r4,r4,r8 # r4 = data+data_len
320c74663799493f2b1e6123c18def94295d0afab7Kenny Root
321c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mfspr r0,256 # cache old vrsave
322c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addis r31,0,0xffc0
323c74663799493f2b1e6123c18def94295d0afab7Kenny Root	ori r31,r31,0x0000
324c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtspr 256,r31 # declare VRs in vrsave
325c74663799493f2b1e6123c18def94295d0afab7Kenny Root
326c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r8,r4 # i<data_len
327c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 4,0,L2400
328c74663799493f2b1e6123c18def94295d0afab7Kenny Root
329c74663799493f2b1e6123c18def94295d0afab7Kenny Root	# load coefficients into v0-v1 and initial history into v2-v3
330c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xf
331c74663799493f2b1e6123c18def94295d0afab7Kenny Root	and r31,r8,r31 # r31: data%4
332c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r11,16
333c74663799493f2b1e6123c18def94295d0afab7Kenny Root	subf r31,r31,r11 # r31: 4-(data%4)
334c74663799493f2b1e6123c18def94295d0afab7Kenny Root	slwi r31,r31,3 # convert to bits for vsro
335c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-4
336c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stw r31,-4(r9)
337c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v0,r10,r9
338c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v6,-1
339c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsro v6,v6,v0 # v6: mask vector
340c74663799493f2b1e6123c18def94295d0afab7Kenny Root
341c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0x8
342c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v0,0,r31
343c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v0,v0,v0,12
344c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r31,0xc
345c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v1,0,r31
346c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v2,0
347c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltisb v3,-1
348c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmrglw v2,v2,v3
349c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsel v0,v1,v0,v2 # v0: reversal permutation vector
350c74663799493f2b1e6123c18def94295d0afab7Kenny Root
351c74663799493f2b1e6123c18def94295d0afab7Kenny Root	add r10,r5,r6
352c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v5,0,r5 # v5: coefficient alignment permutation vector
353c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v5,v5,v5,v0 # v5: reversal coefficient alignment permutation vector
354c74663799493f2b1e6123c18def94295d0afab7Kenny Root
355c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mr r11,r8
356c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v4,0,r11 # v4: history alignment permutation vector
357c74663799493f2b1e6123c18def94295d0afab7Kenny Root
358c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v0,0,r5
359c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
360c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v1,0,r5
361c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v0,v0,v1,v5
362c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v2,0,r11
363c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
364c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v3,0,r11
365c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v2,v3,v2,v4
366c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r5,r10
367c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L2101
368c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v0,v0,v6
369c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L2301@ha
370c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L2301@l(r31)
371c74663799493f2b1e6123c18def94295d0afab7Kenny Root	b L2199
372c74663799493f2b1e6123c18def94295d0afab7Kenny Root
373c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2101:
374c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r5,r5,16
375c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v7,0,r5
376c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v1,v1,v7,v5
377c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r11,r11,-16
378c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvx v7,0,r11
379c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v3,v7,v3,v4
380c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vand v1,v1,v6
381c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lis r31,L2300@ha
382c74663799493f2b1e6123c18def94295d0afab7Kenny Root	la r31,L2300@l(r31)
383c74663799493f2b1e6123c18def94295d0afab7Kenny Root
384c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2199:
385c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtctr r31
386c74663799493f2b1e6123c18def94295d0afab7Kenny Root
387c74663799493f2b1e6123c18def94295d0afab7Kenny Root	# set up invariant vectors
388c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vspltish v4,0 # v4: zero vector
389c74663799493f2b1e6123c18def94295d0afab7Kenny Root
390c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-12
391c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsr v5,r10,r8 # v5: result shift vector
392c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvsl v6,r10,r3 # v6: residual shift back vector
393c74663799493f2b1e6123c18def94295d0afab7Kenny Root
394c74663799493f2b1e6123c18def94295d0afab7Kenny Root	li r10,-4
395c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stw r7,-4(r9)
396c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v7,r10,r9 # v7: lp_quantization vector
397c74663799493f2b1e6123c18def94295d0afab7Kenny Root
398c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2200:
399c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v8,v0,v2 # v8: sum vector
400c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bcctr 20,0
401c74663799493f2b1e6123c18def94295d0afab7Kenny Root
402c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2300:
403c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vmulosh v9,v1,v3
404c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v3,v3,v2,4
405c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v8,v8,v9
406c74663799493f2b1e6123c18def94295d0afab7Kenny Root
407c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2301:
408c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsumsws v8,v8,v4 # v8[3]: sum
409c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsraw v8,v8,v7 # v8[3]: sum >> lp_quantization
410c74663799493f2b1e6123c18def94295d0afab7Kenny Root
411c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lvewx v9,0,r3 # v9[n]: *residual
412c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v9,v9,v9,v6 # v9[3]: *residual
413c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vaddsws v8,v9,v8 # v8[3]: *residual + (sum >> lp_quantization)
414c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v6,v6,v6,4 # increment shift vector
415c74663799493f2b1e6123c18def94295d0afab7Kenny Root
416c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vperm v9,v8,v8,v5 # v9[n]: shift for storage
417c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v5,v5,v5,12 # increment shift vector
418c74663799493f2b1e6123c18def94295d0afab7Kenny Root	stvewx v9,0,r8
419c74663799493f2b1e6123c18def94295d0afab7Kenny Root
420c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v8,v8,v8,12
421c74663799493f2b1e6123c18def94295d0afab7Kenny Root	vsldoi v2,v2,v8,4 # insert value onto history
422c74663799493f2b1e6123c18def94295d0afab7Kenny Root
423c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r3,r3,4
424c74663799493f2b1e6123c18def94295d0afab7Kenny Root	addi r8,r8,4
425c74663799493f2b1e6123c18def94295d0afab7Kenny Root	cmplw cr0,r8,r4 # i<data_len
426c74663799493f2b1e6123c18def94295d0afab7Kenny Root	bc 12,0,L2200
427c74663799493f2b1e6123c18def94295d0afab7Kenny Root
428c74663799493f2b1e6123c18def94295d0afab7Kenny RootL2400:
429c74663799493f2b1e6123c18def94295d0afab7Kenny Root	mtspr 256,r0 # restore old vrsave
430c74663799493f2b1e6123c18def94295d0afab7Kenny Root	lmw r31,-4(r1)
431c74663799493f2b1e6123c18def94295d0afab7Kenny Root	blr
432