1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl sixtap_predict_ppc
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl sixtap_predict8x4_ppc
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl sixtap_predict8x8_ppc
15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl sixtap_predict16x16_ppc
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_c V, LABEL, OFF, R0, R1
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lis     \R0, \LABEL@ha
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      \R1, \LABEL@l(\R0)
20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, \OFF, \R1
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_hfilter V0, V1
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c \V0, HFilter, r5, r9, r10
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r5,  r5, 16
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V1, r5, r10
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Vertical filtering
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Vprolog
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v0, VFilter, r6, r3, r10
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v5, 8
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v6, 3
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v6, v5, v6      ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v1, v0, 1
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v2, v0, 2
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v3, v0, 3
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v4, v0, 4
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v5, v0, 5
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v0, v0, 0
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vpre_load
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Vprolog
48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10,  16
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v10,   0, r9    ;# v10..v14 = first 5 rows
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v11, r10, r9
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9,   r9, 32
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v12,   0, r9
53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v13, r10, r9
54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9,   r9, 32
55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v14,   0, r9
56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Msum Re, Ro, V, T, TMP
59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                ;# (Re,Ro) += (V*T)
60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub \TMP, \V, \T        ;# trashes v8
61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm \Re, \Re, \TMP      ;# Re = evens, saturation unnecessary
62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub \TMP, \V, \T
63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm \Ro, \Ro, \TMP      ;# Ro = odds
64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vinterp_no_store P0 P1 P2 P3 P4 P5
67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub  v8, \P0, v0        ;# 64 + 4 positive taps
68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v16, v6, v8
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub  v8, \P0, v0
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v17, v6, v8
71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v16, v17, \P2, v2, v8
72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v16, v17, \P3, v3, v8
73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v16, v17, \P5, v5, v8
74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub v18, \P1, v1        ;# 2 negative taps
76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub v19, \P1, v1
77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v18, v19, \P4, v4, v8
78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubuhs v16, v16, v18       ;# subtract neg from pos
80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubuhs v17, v17, v19
81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v16, v16, v7        ;# divide by 128
82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v17, v17, v7        ;# v16 v17 = evens, odds
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v18, v16, v17       ;# v18 v19 = 16-bit result in order
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v19, v16, v17
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus  \P0, v18, v19      ;# P0 = 8-bit result
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vinterp_no_store_8x8 P0 P1 P2 P3 P4 P5
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub v24, \P0, v13       ;# 64 + 4 positive taps
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v21, v20, v24
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub v24, \P0, v13
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v22, v20, v24
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v21, v22, \P2, v15, v25
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v21, v22, \P3, v16, v25
95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v21, v22, \P5, v18, v25
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub v23, \P1, v14       ;# 2 negative taps
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub v24, \P1, v14
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Msum v23, v24, \P4, v17, v25
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubuhs v21, v21, v23       ;# subtract neg from pos
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubuhs v22, v22, v24
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v21, v21, v19       ;# divide by 128
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v22, v22, v19       ;# v16 v17 = evens, odds
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v23, v21, v22       ;# v18 v19 = 16-bit result in order
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v24, v21, v22
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus \P0, v23, v24       ;# P0 = 8-bit result
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Vinterp P0 P1 P2 P3 P4 P5
112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store \P0, \P1, \P2, \P3, \P4, \P5
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \P0, 0, r7
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8      ;# 33 ops per 16 pels
115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro luma_v P0, P1, P2, P3, P4, P5
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9,   r9, 16        ;# P5 = newest input row
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \P5,   0, r9
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Vinterp \P0, \P1, \P2, \P3, \P4, \P5
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro luma_vtwo
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v10, v11, v12, v13, v14, v15
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v11, v12, v13, v14, v15, v10
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro luma_vfour
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_vtwo
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v12, v13, v14, v15, v10, v11
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v13, v14, v15, v10, v11, v12
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro luma_vsix
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_vfour
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v14, v15, v10, v11, v12, v13
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_v v15, v10, v11, v12, v13, v14
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Interp4 R I I4
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm \R, v13, \I, v15
143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm \R, v14, \I4, \R
144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Read8x8 VD, RS, RP, increment_counter
147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v21,  0, \RS        ;# permutate value for alignment
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \VD,   0, \RS
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v20, r10, \RS
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \RS, \RS, \RP
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \VD, \VD, v20, v21
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro interp_8x8 R
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v20, \R, \R, v16    ;# v20 = 0123 1234 2345 3456
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v21, \R, \R, v17    ;# v21 = 4567 5678 6789 789A
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Interp4 v20, v20,  v21      ;# v20 = result 0 1 2 3
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \R, \R, \R, v18     ;# R   = 89AB 9ABC ABCx BCxx
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Interp4 v21, v21, \R        ;# v21 = result 4 5 6 7
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus \R, v20, v21        ;#  R = 0 1 2 3 4 5 6 7
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    \R, \R, v19
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus \R, \R, \R          ;# saturate and pack
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Read4x4 VD, RS, RP, increment_counter
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v21,  0, \RS        ;# permutate value for alignment
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v20,   0, \RS
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \RS, \RS, \RP
184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \VD, v20, v20, v21
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .text
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsixtap_predict_ppc:
198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xff87
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffc0
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r5, r5, 5           ;# index into horizontal filter array
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v19, 7
208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# If there isn't any filtering to be done for the horizontal, then
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  just skip to the second pass.
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    vertical_only_4x4
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# load up horizontal filter
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_hfilter v13, v14
215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# rounding added in on the multiply
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v16, 8
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v15, 3
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslw    v15, v16, v15       ;# 0x00000040000000400000004000000040
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v16, B_0123, 0, r9, r10
223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v17, B_4567, 0, r9, r10
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v18, B_89AB, 0, r9, r10
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Back off input buffer by 2 bytes.  Need 2 before and 3 after
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r3, r3, -2
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r3, 0
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2, r3, r4, 1
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3, r3, r4, 1
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4, r3, r4, 1
234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5, r3, r4, 1
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r6, r6, 4           ;# index into vertical filter array
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# filter a line
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v2
240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v3
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v4
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v5
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional 5 lines that are needed
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    store_4x4
249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0, r9, r4, 1
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1, r9, r4, 0
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6, r3, r4, 1
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7, r3, r4, 1
259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8, r3, r4, 0
260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v0
262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v1
263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v6
264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v7
265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v8
266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       second_pass_4x4
268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgvertical_only_4x4:
270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0, r3, r4, 1
277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1, r3, r4, 1
278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2, r3, r4, 1
279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3, r3, r4, 1
280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4, r3, r4, 1
281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5, r3, r4, 1
282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6, r3, r4, 1
283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7, r3, r4, 1
284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8, r3, r4, 0
285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 4           ;# index into vertical filter array
287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_4x4:
289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c   v20, b_hilo_4x4, 0, r9, r10
290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c   v21, b_hilo, 0, r9, r10
291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# reposition input so that it can go through the
293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# filtering phase with one pass.
294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v1, v20     ;# 0 1 x x
295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v20     ;# 2 3 x x
296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v5, v20     ;# 4 5 x x
297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v6, v6, v7, v20     ;# 6 7 x x
298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v2, v21     ;# 0 1 2 3
300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v6, v21     ;# 4 5 6 7
301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v1, v0, v4, 4
303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v2, v0, v4, 8
304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v3, v0, v4, 12
305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v5, v4, v8, 4
307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c   v13, VFilter, r6, r9, r10
309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v15, 8
311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 3
312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v20, v15, v20       ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v14, v13, 1
315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v15, v13, 2
316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v16, v13, 3
317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v17, v13, 4
318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v18, v13, 5
319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v13, v13, 0
320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v0, v1, v2, v3, v4, v5
322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r1
324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 4(r1)
330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 8(r1)
334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 12(r1)
338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_4x4
341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_4x4:
343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r1
345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v3, 0, r1
350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v4, 0, r1
355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v5, 0, r1
360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_4x4:
364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro w_8x8 V, D, R, P
372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, 0, r1
373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     \R, 0(r1)
374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     \R, 0(r7)
375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     \R, 4(r1)
376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     \R, 4(r7)
377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \D, \D, \P
378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsixtap_predict8x4_ppc:
389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffc0
392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r5, r5, 5           ;# index into horizontal filter array
397474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v19, 7
399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# If there isn't any filtering to be done for the horizontal, then
401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  just skip to the second pass.
402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    second_pass_pre_copy_8x4
403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_hfilter v13, v14
405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# rounding added in on the multiply
407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v16, 8
408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v15, 3
409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslw    v15, v16, v15       ;# 0x00000040000000400000004000000040
410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v16, B_0123, 0, r9, r10
413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v17, B_4567, 0, r9, r10
414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v18, B_89AB, 0, r9, r10
415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Back off input buffer by 2 bytes.  Need 2 before and 3 after
417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r3, r3, -2
418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r3, 0
420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2, r3, r4, 1
422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3, r3, r4, 1
423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4, r3, r4, 1
424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5, r3, r4, 1
425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r6, r6, 4           ;# index into vertical filter array
427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# filter a line
429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v2
430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v3
431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v4
432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v5
433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional 5 lines that are needed
437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    store_8x4
439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0, r9, r4, 1
446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1, r9, r4, 0
447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6, r3, r4, 1
448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7, r3, r4, 1
449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8, r3, r4, 0
450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v0
452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v1
453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v6
454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v7
455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v8
456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       second_pass_8x4
458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_pre_copy_8x4:
460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0,  r3, r4, 1
467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1,  r3, r4, 1
468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2,  r3, r4, 1
469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3,  r3, r4, 1
470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4,  r3, r4, 1
471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5,  r3, r4, 1
472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6,  r3, r4, 1
473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7,  r3, r4, 1
474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8,  r3, r4, 1
475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 4           ;# index into vertical filter array
477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x4:
479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v13, VFilter, r6, r9, r10
480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v15, 8
482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 3
483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v20, v15, v20       ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v14, v13, 1
486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v15, v13, 2
487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v16, v13, 3
488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v17, v13, 4
489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v18, v13, 5
490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v13, v13, 0
491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v0, v1, v2, v3,  v4,  v5
493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v1, v2, v3, v4,  v5,  v6
494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v2, v3, v4, v5,  v6,  v7
495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v3, v4, v5, v6,  v7,  v8
496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned_8x4
499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v0, r7, r0, r8
501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v1, r7, r0, r8
502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x4
506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned_8x4:
508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo, 0, r9, r10
510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v1, v10
512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r7
515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x4
519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_8x4:
521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned2_8x4
523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v4, r7, r0, r8
527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v5, r7, r0, r8
528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x4
530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned2_8x4:
532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo, 0, r9, r10
533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v5, v10
536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v4, 0, r7
540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_8x4:
542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Because the width that needs to be filtered will fit in a single altivec
559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  register there is no need to loop.  Everything can stay in registers.
560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsixtap_predict8x8_ppc:
561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffc0
564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r5, r5, 5           ;# index into horizontal filter array
569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v19, 7
571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# If there isn't any filtering to be done for the horizontal, then
573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  just skip to the second pass.
574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    second_pass_pre_copy_8x8
575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_hfilter v13, v14
577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# rounding added in on the multiply
579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v16, 8
580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v15, 3
581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslw    v15, v16, v15       ;# 0x00000040000000400000004000000040
582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v16, B_0123, 0, r9, r10
585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v17, B_4567, 0, r9, r10
586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v18, B_89AB, 0, r9, r10
587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Back off input buffer by 2 bytes.  Need 2 before and 3 after
589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r3, r3, -2
590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r3, 0
592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2, r3, r4, 1
594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3, r3, r4, 1
595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4, r3, r4, 1
596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5, r3, r4, 1
597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6, r3, r4, 1
598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7, r3, r4, 1
599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8, r3, r4, 1
600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v9, r3, r4, 1
601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r6, r6, 4           ;# index into vertical filter array
603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# filter a line
605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v2
606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v3
607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v4
608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v5
609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v6
610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v7
611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v8
612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v9
613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional 5 lines that are needed
617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    store_8x8
619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r9, r9, r4
624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
625474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0,  r9, r4, 1
626474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1,  r9, r4, 0
627474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v10, r3, r4, 1
628474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v11, r3, r4, 1
629474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v12, r3, r4, 0
630474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
631474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v0
632474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v1
633474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v10
634474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v11
635474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    interp_8x8 v12
636474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
637474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       second_pass_8x8
638474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
639474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_pre_copy_8x8:
640474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only needed if there is a vertical filter present
641474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
642474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
643474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
644474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
645474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
646474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v0,  r3, r4, 1
647474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v1,  r3, r4, 1
648474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v2,  r3, r4, 1
649474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v3,  r3, r4, 1
650474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v4,  r3, r4, 1
651474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v5,  r3, r4, 1
652474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v6,  r3, r4, 1
653474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v7,  r3, r4, 1
654474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v8,  r3, r4, 1
655474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v9,  r3, r4, 1
656474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v10, r3, r4, 1
657474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v11, r3, r4, 1
658474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Read8x8 v12, r3, r4, 0
659474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
660474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 4           ;# index into vertical filter array
661474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
662474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x8:
663474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v13, VFilter, r6, r9, r10
664474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
665474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v15, 8
666474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 3
667474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v20, v15, v20       ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
668474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
669474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v14, v13, 1
670474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v15, v13, 2
671474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v16, v13, 3
672474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v17, v13, 4
673474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v18, v13, 5
674474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltb  v13, v13, 0
675474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
676474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v0, v1, v2, v3,  v4,  v5
677474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v1, v2, v3, v4,  v5,  v6
678474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v2, v3, v4, v5,  v6,  v7
679474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v3, v4, v5, v6,  v7,  v8
680474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v4, v5, v6, v7,  v8,  v9
681474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v5, v6, v7, v8,  v9,  v10
682474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v6, v7, v8, v9,  v10, v11
683474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vinterp_no_store_8x8 v7, v8, v9, v10, v11, v12
684474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
685474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
686474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned_8x8
687474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
688474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v0, r7, r0, r8
689474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v1, r7, r0, r8
690474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
691474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
692474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v4, r7, r0, r8
693474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v5, r7, r0, r8
694474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v6, r7, r0, r8
695474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v7, r7, r0, r8
696474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
697474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x8
698474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
699474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned_8x8:
700474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
701474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo, 0, r9, r10
702474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
703474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v1, v10
704474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
705474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v5, v10
706474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v6, v6, v7, v10
707474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
708474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r7
709474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
710474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
711474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
712474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v4, 0, r7
713474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
714474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v6, 0, r7
715474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
716474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x8
717474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
718474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_8x8:
719474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
720474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned2_8x8
721474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
722474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
723474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
724474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v4, r7, r0, r8
725474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v5, r7, r0, r8
726474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v6, r7, r0, r8
727474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v7, r7, r0, r8
728474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v8, r7, r0, r8
729474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v9, r7, r0, r8
730474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
731474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x8
732474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
733474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned2_8x8:
734474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo, 0, r9, r10
735474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
736474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
737474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v5, v10
738474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v6, v6, v7, v10
739474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v8, v8, v9, v10
740474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
741474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
742474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
743474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v4, 0, r7
744474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
745474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v6, 0, r7
746474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
747474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v8, 0, r7
748474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
749474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_8x8:
750474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
751474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
752474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
753474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
754474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
755474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
756474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
757474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
758474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
759474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
760474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
761474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
762474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
763474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
764474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
765474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Two pass filtering.  First pass is Horizontal edges, second pass is vertical
766474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  edges.  One of the filters can be null, but both won't be.  Needs to use a
767474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  temporary buffer because the source buffer can't be modified and the buffer
768474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  for the destination is not large enough to hold the temporary data.
769474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsixtap_predict16x16_ppc:
770474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
771474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
772474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xf000
773474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
774474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
775474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-416(r1)         ;# create space on the stack
776474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
777474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Three possiblities
778474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  1. First filter is null.  Don't use a temp buffer.
779474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  2. Second filter is null.  Don't use a temp buffer.
780474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  3. Neither are null, use temp buffer.
781474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
782474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# First Pass (horizontal edge)
783474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  setup pointers for src
784474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  if possiblity (1) then setup the src pointer to be the orginal and jump
785474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  to second pass.  this is based on if x_offset is 0.
786474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
787474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# load up horizontal filter
788474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r5, r5, 5           ;# index into horizontal filter array
789474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
790474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_hfilter v4, v5
791474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
792474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    copy_horizontal_16x21
793474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
794474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Back off input buffer by 2 bytes.  Need 2 before and 3 after
795474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r3, r3, -2
796474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
797474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r6, r6, 4           ;# index into vertical filter array
798474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
799474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# setup constants
800474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# v14 permutation value for alignment
801474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v14, b_hperm, 0, r9, r10
802474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
803474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# These statements are guessing that there won't be a second pass,
804474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  but if there is then inside the bypass they need to be set
805474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r0, 16              ;# prepare for no vertical filter
806474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
807474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Change the output pointer and pitch to be the actual
808474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  desination instead of a temporary buffer.
809474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r7, 0
810474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r5, r8, 0
811474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
812474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# no vertical filter, so write the output from the first pass
813474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  directly into the output buffer.
814474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq-    no_vertical_filter_bypass
815474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
816474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# if the second filter is not null then need to back off by 2*pitch
817474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
818474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
819474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
820474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# setup counter for the number of lines that are going to be filtered
821474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r0, 21
822474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
823474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# use the stack as temporary storage
824474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, 48(r1)
825474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r5, 16
826474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
827474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgno_vertical_filter_bypass:
828474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
829474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtctr   r0
830474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
831474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# rounding added in on the multiply
832474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v10, 8
833474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v12, 3
834474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslw    v12, v10, v12       ;# 0x00000040000000400000004000000040
835474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
836474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# downshift by 7 ( divide by 128 ) at the end
837474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v13, 7
838474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
839474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# index to the next set of vectors in the row.
840474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
841474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r12, 32
842474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
843474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orghorizontal_loop_16x16:
844474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
845474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v15,  0, r3         ;# permutate value for alignment
846474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
847474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
848474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
849474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v1,   0, r3
850474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v2, r10, r3
851474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v3, r12, r3
852474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
853474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v8, v1, v2, v15
854474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v9, v2, v3, v15     ;# v8 v9 = 21 input pixels left-justified
855474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
856474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v11, v8, v9, 4
857474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
858474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 0
859474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v6, v4, v8, v12    ;# taps times elements
860474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v0, v5, v11, v6
861474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
862474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 1
863474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v10, v8, v9, 1
864474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v11, v8, v9, 5
865474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
866474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v6, v4, v10, v12
867474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v1, v5, v11, v6
868474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
869474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 2
870474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v10, v8, v9, 2
871474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v11, v8, v9, 6
872474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
873474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v6, v4, v10, v12
874474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v2, v5, v11, v6
875474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
876474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 3
877474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v10, v8, v9, 3
878474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v11, v8, v9, 7
879474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
880474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v6, v4, v10, v12
881474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v3, v5, v11, v6
882474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
883474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus v0, v0, v1          ;# v0 = 0 4 8 C 1 5 9 D (16-bit)
884474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus v1, v2, v3          ;# v1 = 2 6 A E 3 7 B F
885474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
886474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v0, v0, v13         ;# divide v0, v1 by 128
887474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v1, v1, v13
888474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
889474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus v0, v0, v1          ;# v0 = scrambled 8-bit result
890474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v0, v14     ;# v0 = correctly-ordered result
891474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
892474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0,  0, r9
893474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r9, r9, r5
894474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
895474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
896474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
897474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bdnz    horizontal_loop_16x16
898474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
899474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# check again to see if vertical filter needs to be done.
900474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r6, 0
901474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, end_16x16
902474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
903474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# yes there is, so go to the second pass
904474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       second_pass_16x16
905474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
906474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgcopy_horizontal_16x21:
907474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 21
908474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtctr   r10
909474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
910474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
911474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
912474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
913474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
914474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
915474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# this is done above if there is a horizontal filter,
916474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  if not it needs to be done down here.
917474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 4           ;# index into vertical filter array
918474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
919474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# always write to the stack when doing a horizontal copy
920474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, 48(r1)
921474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
922474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgcopy_horizontal_loop_16x21:
923474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v15,  0, r3         ;# permutate value for alignment
924474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
925474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v1,   0, r3
926474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v2, r10, r3
927474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
928474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v8, v1, v2, v15
929474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
930474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v8,  0, r9
931474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r9, 16
932474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
933474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
934474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
935474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    bdnz    copy_horizontal_loop_16x21
936474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
937474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_16x16:
938474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
939474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# always read from the stack when doing a vertical filter
940474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, 48(r1)
941474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
942474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# downshift by 7 ( divide by 128 ) at the end
943474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v7, 7
944474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
945474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpre_load
946474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
947474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_vsix
948474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_vsix
949474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    luma_vfour
950474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
951474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgend_16x16:
952474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
953474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 416         ;# recover stack
954474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
955474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
956474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
957474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
958474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
959474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .data
960474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
961474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
962474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgHFilter:
963474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  0,128,  0,  0,  0,128,  0,  0,  0,128,  0,  0,  0,128,  0
964474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
965474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0, -6,123, 12,  0, -6,123, 12,  0, -6,123, 12,  0, -6,123, 12
966474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    -1,  0,  0,  0, -1,  0,  0,  0, -1,  0,  0,  0, -1,  0,  0,  0
967474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     2,-11,108, 36,  2,-11,108, 36,  2,-11,108, 36,  2,-11,108, 36
968474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    -8,  1,  0,  0, -8,  1,  0,  0, -8,  1,  0,  0, -8,  1,  0,  0
969474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0, -9, 93, 50,  0, -9, 93, 50,  0, -9, 93, 50,  0, -9, 93, 50
970474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    -6,  0,  0,  0, -6,  0,  0,  0, -6,  0,  0,  0, -6,  0,  0,  0
971474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     3,-16, 77, 77,  3,-16, 77, 77,  3,-16, 77, 77,  3,-16, 77, 77
972474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   -16,  3,  0,  0,-16,  3,  0,  0,-16,  3,  0,  0,-16,  3,  0,  0
973474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0, -6, 50, 93,  0, -6, 50, 93,  0, -6, 50, 93,  0, -6, 50, 93
974474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    -9,  0,  0,  0, -9,  0,  0,  0, -9,  0,  0,  0, -9,  0,  0,  0
975474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     1, -8, 36,108,  1, -8, 36,108,  1, -8, 36,108,  1, -8, 36,108
976474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   -11,  2,  0,  0,-11,  2,  0,  0,-11,  2,  0,  0,-11,  2,  0,  0
977474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0, -1, 12,123,  0, -1, 12,123,  0, -1, 12,123,  0, -1, 12,123
978474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    -6,  0,  0,  0, -6,  0,  0,  0, -6,  0,  0,  0, -6,  0,  0,  0
979474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
980474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
981474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgVFilter:
982474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  0,128,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
983474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  6,123, 12,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
984474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     2, 11,108, 36,  8,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
985474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  9, 93, 50,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
986474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     3, 16, 77, 77, 16,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
987474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  6, 50, 93,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
988474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     1,  8, 36,108, 11,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
989474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1, 12,123,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
990474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
991474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
992474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_hperm:
993474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15
994474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
995474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
996474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgB_0123:
997474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6
998474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
999474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1000474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgB_4567:
1001474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10
1002474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1003474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1004474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgB_89AB:
1005474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
1006474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1007474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1008474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_hilo:
1009474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23
1010474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1011474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1012474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_hilo_4x4:
1013474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1,  2,  3, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,  0,  0
1014