1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl bilinear_predict4x4_ppc
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl bilinear_predict8x4_ppc
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl bilinear_predict8x8_ppc
15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl bilinear_predict16x16_ppc
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_c V, LABEL, OFF, R0, R1
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lis     \R0, \LABEL@ha
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      \R1, \LABEL@l(\R0)
20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, \OFF, \R1
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_vfilter V0, V1
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c \V0, vfilter_b, r6, r9, r10
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r6,  r6, 16
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V1, r6, r10
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro HProlog jump_label
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# load up horizontal filter
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r5, r5, 4           ;# index into horizontal filter array
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# index to the next set of vectors in the row.
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r10, 16
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    li      r12, 32
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# downshift by 7 ( divide by 128 ) at the end
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v19, 7
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# If there isn't any filtering to be done for the horizontal, then
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  just skip to the second pass.
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     \jump_label
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v20, hfilter_b, r5, r9, r0
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# setup constants
48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# v14 permutation value for alignment
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v28, b_hperm_b, 0, r9, r0
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# rounding added in on the multiply
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v21, 8
53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisw v18, 3
54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslw    v18, v21, v18       ;# 0x00000040000000400000004000000040
55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi.   r6, r6, 5           ;# index into vertical filter array
57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Filters a horizontal line
60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# expects:
61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3  src_ptr
62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4  pitch
63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r10 16
64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r12 32
65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v17 perm intput
66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v18 rounding
67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v19 shift
68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v20 filter taps
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v21 tmp
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v22 tmp
71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v23 tmp
72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v24 tmp
73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v25 tmp
74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v26 tmp
75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v27 tmp
76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v28 perm output
77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro HFilter V
79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v24, v21, v21, v10  ;# v20 = 0123 1234 2345 3456
80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v25, v21, v21, v11  ;# v21 = 4567 5678 6789 789A
81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v24, v20, v24, v18
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v25, v20, v25, v18
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus v24, v24, v25       ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v24, v24, v19       ;# divide v0, v1 by 128
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus \V, v24, v24        ;# \V = scrambled 8-bit result
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hfilter_8 V, increment_counter
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v17,  0, r3         ;# permutate value for alignment
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 9 bytes wide, output is 8 bytes.
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v21,   0, r3
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v22, r10, r3
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v21, v21, v22, v17
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    HFilter \V
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_and_align_8 V, increment_counter
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v17,  0, r3         ;# permutate value for alignment
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v21,   0, r3
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v22, r10, r3
115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \V, v21, v22, v17
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro write_aligned_8 V, increment_counter
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V,  0, r7
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vfilter_16 P0 P1
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub v22, \P0, v20       ;# 64 + 4 positive taps
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v22, v18, v22
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub v23, \P0, v20
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v23, v18, v23
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuleub v24, \P1, v21
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v22, v22, v24       ;# Re = evens, saturation unnecessary
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmuloub v25, \P1, v21
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vadduhm v23, v23, v25       ;# Ro = odds
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v22, v22, v19       ;# divide by 128
143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v23, v23, v19       ;# v16 v17 = evens, odds
144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  \P0, v22, v23       ;# v18 v19 = 16-bit result in order
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v23, v22, v23
146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus \P0, \P0, v23       ;# P0 = 8-bit result
147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro w_8x8 V, D, R, P
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, 0, r1
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     \R, 0(r1)
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     \R, 0(r7)
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     \R, 4(r1)
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     \R, 4(r7)
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \D, \D, \P
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgbilinear_predict4x4_ppc:
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xf830
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xfff8
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    HProlog second_pass_4x4_pre_copy_b
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_0123_b, 0, r9, r12
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v11, b_4567_b, 0, r9, r12
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v0, 1
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v1, 1
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v2, 1
184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v3, 1
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional line that is needed
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     store_out_4x4_b
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v4, 0
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b   second_pass_4x4_b
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_4x4_pre_copy_b:
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 5           ;# index into vertical filter array
198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v0, 1
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v1, 1
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v2, 1
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v3, 1
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v4, 1
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_4x4_b:
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 8
207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v18, 3
208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_vfilter v20, v21
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v0,  v1
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v1,  v2
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v2,  v3
215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v3,  v4
216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_out_4x4_b:
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r1
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v1, 0, r1
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r1
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v3, 0, r1
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(r1)
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(r7)
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_4x4:
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgbilinear_predict8x4_ppc:
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xf830
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xfff8
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    HProlog second_pass_8x4_pre_copy_b
261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_0123_b, 0, r9, r12
264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v11, b_4567_b, 0, r9, r12
265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v0, 1
267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v1, 1
268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v2, 1
269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v3, 1
270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional line that is needed
274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     store_out_8x4_b
276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v4, 0
278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b   second_pass_8x4_b
280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x4_pre_copy_b:
282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 5           ;# index into vertical filter array
283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v0, 1
285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v1, 1
286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v2, 1
287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v3, 1
288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v4, 1
289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x4_b:
291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 8
292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v18, 3
293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_vfilter v20, v21
296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v0,  v1
298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v1,  v2
299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v2,  v3
300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v3,  v4
301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_out_8x4_b:
303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned_8x4_b
306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v0, r7, r0, r8
308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v1, r7, r0, r8
309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x4
313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned_8x4_b:
315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo_b, 0, r9, r10
316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v1, v10
318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r7
321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_8x4:
325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgbilinear_predict8x8_ppc:
339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xfff0
341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffff
342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwu    r1,-32(r1)          ;# create space on the stack
345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    HProlog second_pass_8x8_pre_copy_b
347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Load up permutation constants
349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_0123_b, 0, r9, r12
350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v11, b_4567_b, 0, r9, r12
351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v0, 1
353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v1, 1
354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v2, 1
355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v3, 1
356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v4, 1
357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v5, 1
358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v6, 1
359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v7, 1
360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional line that is needed
364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     store_out_8x8_b
366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_8 v8, 0
368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b   second_pass_8x8_b
370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x8_pre_copy_b:
372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 5           ;# index into vertical filter array
373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v0, 1
375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v1, 1
376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v2, 1
377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v3, 1
378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v4, 1
379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v5, 1
380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v6, 1
381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v7, 1
382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_8  v8, 0
383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_8x8_b:
385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 8
386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v18, 3
387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_vfilter v20, v21
390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v0,  v1
392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v1,  v2
393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v2,  v3
394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v3,  v4
395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v4,  v5
396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v5,  v6
397474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v6,  v7
398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v7,  v8
399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_out_8x8_b:
401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    cmpi    cr0, r8, 8
403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     cr0, store_aligned_8x8_b
404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v0, r7, r0, r8
406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v1, r7, r0, r8
407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v2, r7, r0, r8
408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v3, r7, r0, r8
409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v4, r7, r0, r8
410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v5, r7, r0, r8
411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v6, r7, r0, r8
412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    w_8x8   v7, r7, r0, r8
413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b       exit_8x8
415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_aligned_8x8_b:
417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_c v10, b_hilo_b, 0, r9, r10
418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v0, v1, v10
420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v2, v3, v10
421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v4, v4, v5, v10
422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v6, v6, v7, v10
423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0, 0, r7
425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2, 0, r7
427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v4, 0, r7
429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r7, 16
430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v6, 0, r7
431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgexit_8x8:
433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r1, r1, 32          ;# recover stack
435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Filters a horizontal line
440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# expects:
441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3  src_ptr
442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4  pitch
443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r10 16
444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r12 32
445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v17 perm intput
446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v18 rounding
447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v19 shift
448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v20 filter taps
449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v21 tmp
450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v22 tmp
451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v23 tmp
452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v24 tmp
453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v25 tmp
454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v26 tmp
455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v27 tmp
456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v28 perm output
457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hfilter_16 V, increment_counter
459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v17,  0, r3         ;# permutate value for alignment
461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v21,   0, r3
465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v22, r10, r3
466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v23, r12, r3
467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v21, v21, v22, v17
472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v22, v22, v23, v17  ;# v8 v9 = 21 input pixels left-justified
473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 0
475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v24, v20, v21, v18 ;# taps times elements
476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 1
478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v23, v21, v22, 1
479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v25, v20, v23, v18
480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 2
482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v23, v21, v22, 2
483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v26, v20, v23, v18
484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# set 3
486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsldoi  v23, v21, v22, 3
487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmsummbm v27, v20, v23, v18
488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus v24, v24, v25       ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkswus v25, v26, v27       ;# v25 = 2 6 A E 3 7 B F
491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v24, v24, v19       ;# divide v0, v1 by 128
493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrh    v25, v25, v19
494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkuhus \V, v24, v25        ;# \V = scrambled 8-bit result
496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \V, \V, v0, v28     ;# \V = correctly-ordered result
497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_and_align_16 V, increment_counter
500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvsl    v17,  0, r3         ;# permutate value for alignment
501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input to filter is 21 bytes wide, output is 16 bytes.
503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  input will can span three vectors if not aligned correctly.
504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v21,   0, r3
505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v22, r10, r3
506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r4
509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \V, v21, v22, v17
512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro write_16 V, increment_counter
515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V,  0, r7
516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \increment_counter
518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r8
519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char * src
524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int src_pitch
525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int x_offset
526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 int y_offset
527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 unsigned char * dst
528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 int dst_pitch
529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgbilinear_predict16x16_ppc:
530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xfff8
533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    HProlog second_pass_16x16_pre_copy_b
536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v0,  1
538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v1,  1
539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v2,  1
540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v3,  1
541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v4,  1
542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v5,  1
543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v6,  1
544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v7,  1
545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v8,  1
546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v9,  1
547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v10, 1
548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v11, 1
549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v12, 1
550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v13, 1
551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v14, 1
552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v15, 1
553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Finished filtering main horizontal block.  If there is no
555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  vertical filtering, jump to storing the data.  Otherwise
556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  load up and filter the additional line that is needed
557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#  for the vertical filter.
558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    beq     store_out_16x16_b
559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hfilter_16 v16, 0
561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    b   second_pass_16x16_b
563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_16x16_pre_copy_b:
565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    slwi    r6, r6, 5           ;# index into vertical filter array
566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v0,  1
568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v1,  1
569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v2,  1
570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v3,  1
571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v4,  1
572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v5,  1
573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v6,  1
574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v7,  1
575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v8,  1
576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v9,  1
577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v10, 1
578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v11, 1
579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v12, 1
580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v13, 1
581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v14, 1
582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v15, 1
583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_and_align_16  v16, 0
584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgsecond_pass_16x16_b:
586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v20, 8
587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v18, 3
588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_vfilter v20, v21
591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v0,  v1
593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v1,  v2
594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v2,  v3
595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v3,  v4
596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v4,  v5
597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v5,  v6
598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v6,  v7
599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v7,  v8
600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v8,  v9
601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v9,  v10
602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v10, v11
603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v11, v12
604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v12, v13
605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v13, v14
606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v14, v15
607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vfilter_16 v15, v16
608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgstore_out_16x16_b:
610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v0,  1
612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v1,  1
613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v2,  1
614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v3,  1
615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v4,  1
616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v5,  1
617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v6,  1
618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v7,  1
619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v8,  1
620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v9,  1
621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v10, 1
622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v11, 1
623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v12, 1
624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v13, 1
625474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v14, 1
626474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    write_16 v15, 0
627474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
628474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
629474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
630474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
631474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
632474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .data
633474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
634474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
635474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orghfilter_b:
636474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   128,  0,  0,  0,128,  0,  0,  0,128,  0,  0,  0,128,  0,  0,  0
637474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   112, 16,  0,  0,112, 16,  0,  0,112, 16,  0,  0,112, 16,  0,  0
638474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    96, 32,  0,  0, 96, 32,  0,  0, 96, 32,  0,  0, 96, 32,  0,  0
639474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    80, 48,  0,  0, 80, 48,  0,  0, 80, 48,  0,  0, 80, 48,  0,  0
640474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    64, 64,  0,  0, 64, 64,  0,  0, 64, 64,  0,  0, 64, 64,  0,  0
641474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    48, 80,  0,  0, 48, 80,  0,  0, 48, 80,  0,  0, 48, 80,  0,  0
642474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    32, 96,  0,  0, 32, 96,  0,  0, 32, 96,  0,  0, 32, 96,  0,  0
643474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    16,112,  0,  0, 16,112,  0,  0, 16,112,  0,  0, 16,112,  0,  0
644474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
645474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
646474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgvfilter_b:
647474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
648474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
649474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
650474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
651474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
652474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
653474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
654474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
655474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
656474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
657474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
658474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
659474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
660474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
661474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
662474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
663474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
664474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
665474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_hperm_b:
666474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15
667474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
668474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
669474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_0123_b:
670474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6
671474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
672474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
673474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_4567_b:
674474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10
675474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
676474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgb_hilo_b:
677474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte     0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23
678