1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/*
2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc.
3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License");
5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License.
6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at
7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **     http://www.apache.org/licenses/LICENSE-2.0
9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software
11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS,
12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and
14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License.
15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */
16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	File:		PrePostMDCT_v7.s
19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	Content:	premdct and postmdct function armv7 assemble
21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	PreMDCT
262857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnstart
27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
28e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPreMDCT:
29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb     sp!, {r4 - r11, lr}
302857b47a2731579772c76d46285660972c0ba23dBen Cheng	.save	  {r4 - r11, lr}
312857b47a2731579772c76d46285660972c0ba23dBen Cheng	fstmfdd   sp!, {d8 - d15}
322857b47a2731579772c76d46285660972c0ba23dBen Cheng	.vsave	  {d8 - d15}
33b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
34e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add         r9, r0, r1, lsl #2
35e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub         r3, r9, #32
36e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
37e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	movs        r1, r1, asr #2
38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq         PreMDCT_END
39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
40e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPreMDCT_LOOP:
41e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD4.I32			{d0, d2, d4, d6}, [r2]!				@ cosa = *csptr++@ sina = *csptr++@
42e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD4.I32			{d1, d3, d5, d7}, [r2]!				@ cosb = *csptr++@ sinb = *csptr++@
43e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{d8, d9, d10, d11}, [r0]			@ tr1 = *(buf0 + 0)@ ti2 = *(buf0 + 1)@
44e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{d13, d15}, [r3]!					@ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
45e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{d12, d14}, [r3]!					@ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
46b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
47b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.32			Q8, Q7
48e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.32			Q9, Q6
49e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
50b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
51e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q0, Q4								@ MULHIGH(cosa, tr1)
52e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q1, Q8								@ MULHIGH(sina, ti1)
53e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q0, Q8								@ MULHIGH(cosa, ti1)
54e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q1, Q4								@ MULHIGH(sina, tr1)
55b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q0, Q10, Q11						@ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
57e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q1, Q12, Q13						@ *buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1)@
58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{d0, d1, d2, d3}, [r0]!
60e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub						r3, r3, #32
61b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q2, Q9										@ MULHIGH(cosb, tr2)
63e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q3, Q5										@ MULHIGH(sinb, ti2)
64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q2, Q5										@ MULHIGH(cosb, ti2)
65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q3, Q9										@ MULHIGH(sinb, tr2)
66b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q0, Q10, Q11									@ MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
68e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q1, Q12, Q13									@ MULHIGH(cosa, ti2) - MULHIGH(sina, tr2)@
69b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.32			Q3, Q1
71e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.32			Q2, Q0
72b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
73b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VST2.I32		{d5, d7}, [r3]!
74b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VST2.I32		{d4, d6}, [r3]!
75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	subs     		r1, r1, #4
77b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	sub		  		r3, r3, #64
78e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	bne       	PreMDCT_LOOP
79b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
80e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPreMDCT_END:
812857b47a2731579772c76d46285660972c0ba23dBen Cheng	fldmfdd   sp!, {d8 - d15}
82e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldmia     sp!, {r4 - r11, pc}
83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  @ |PreMDCT|
842857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnend
85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
87e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	PostMDCT
882857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnstart
89e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
90e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPostMDCT:
91e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb     sp!, {r4 - r11, lr}
922857b47a2731579772c76d46285660972c0ba23dBen Cheng	.save	  {r4 - r11, lr}
932857b47a2731579772c76d46285660972c0ba23dBen Cheng	fstmfdd   sp!, {d8 - d15}
942857b47a2731579772c76d46285660972c0ba23dBen Cheng	.vsave	  {d8 - d15}
95b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
96e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add         r9, r0, r1, lsl #2
97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub         r3, r9, #32
98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
99e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	movs        r1, r1, asr #2
100e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	beq         PostMDCT_END
101b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
102e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPostMDCT_LOOP:
103e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD4.I32			{d0, d2, d4, d6}, [r2]!				@ cosa = *csptr++@ sina = *csptr++@
104e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD4.I32			{d1, d3, d5, d7}, [r2]!				@ cosb = *csptr++@ sinb = *csptr++@
105e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{d8, d9, d10, d11}, [r0]			@ tr1 = *(zbuf1 + 0)@ ti1 = *(zbuf1 + 1)@
106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{d13, d15}, [r3]!							@ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
107b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD2.I32			{d12, d14}, [r3]!							@ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
108b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
109b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.32			Q8, Q6
110b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.32			Q9, Q7
111e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
112e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q0, Q4										@ MULHIGH(cosa, tr1)
113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q1, Q5										@ MULHIGH(sina, ti1)
114e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q0, Q5										@ MULHIGH(cosa, ti1)
115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q1, Q4										@ MULHIGH(sina, tr1)
116b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
117e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q0, Q10, Q11									@ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q5, Q13, Q12									@ *buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1)@
119b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
120e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q2, Q8										@ MULHIGH(cosb, tr2)
121e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q3, Q9										@ MULHIGH(sinb, ti2)
122e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q2, Q9										@ MULHIGH(cosb, ti2)
123e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q3, Q8										@ MULHIGH(sinb, tr2)
124b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q4, Q10, Q11									@ *buf1-- = MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
126b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			Q1, Q13, Q12									@ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@
127b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
128e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.32			Q2, Q4
129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.32			Q3, Q5
130b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
131b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	sub						r3, r3, #32
132e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{d0, d1, d2, d3}, [r0]!
133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
134b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VST2.I32			{d5, d7}, [r3]!
135b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VST2.I32			{d4, d6}, [r3]!
136b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
137e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	subs     			r1, r1, #4
138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	sub		  			r3, r3, #64
139e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	bne       	PostMDCT_LOOP
140e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
141e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardPostMDCT_END:
1422857b47a2731579772c76d46285660972c0ba23dBen Cheng	fldmfdd   sp!, {d8 - d15}
143e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldmia     sp!, {r4 - r11, pc}
144e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
145e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  		@ |PostMDCT|
1462857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnend
147