1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  Use of this source code is governed by a BSD-style license
5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  that can be found in the LICENSE file in the root of the source
6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  tree. An additional intellectual property rights grant can be found
7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  in the file PATENTS.  All contributing project authors may
8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;  be found in the AUTHORS file in the root of the source tree.
9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;
10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl mbloop_filter_horizontal_edge_y_ppc
13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_horizontal_edge_y_ppc
14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl mbloop_filter_vertical_edge_y_ppc
15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_vertical_edge_y_ppc
16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl mbloop_filter_horizontal_edge_uv_ppc
18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_horizontal_edge_uv_ppc
19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl mbloop_filter_vertical_edge_uv_ppc
20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_vertical_edge_uv_ppc
21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_simple_horizontal_edge_ppc
23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .globl loop_filter_simple_vertical_edge_ppc
24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .text
26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# We often need to perform transposes (and other transpose-like operations)
27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   on matrices of data.  This is simplified by the fact that we usually
28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   operate on hunks of data whose dimensions are powers of 2, or at least
29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   divisible by highish powers of 2.
30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   These operations can be very confusing.  They become more straightforward
32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   when we think of them as permutations of address bits: Concatenate a
33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   group of vector registers and think of it as occupying a block of
34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   memory beginning at address zero.  The low four bits 0...3 of the
35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   address then correspond to position within a register, the higher-order
36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   address bits select the register.
37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Although register selection, at the code level, is arbitrary, things
39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   are simpler if we use contiguous ranges of register numbers, simpler
40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   still if the low-order bits of the register number correspond to
41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   conceptual address bits.  We do this whenever reasonable.
42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   A 16x16 transpose can then be thought of as an operation on
44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   a 256-element block of memory.  It takes 8 bits 0...7 to address this
45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   memory and the effect of a transpose is to interchange address bit
46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   0 with 4, 1 with 5, 2 with 6, and 3 with 7.  Bits 0...3 index the
47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   column, which is interchanged with the row addressed by bits 4..7.
48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   The altivec merge instructions provide a rapid means of effecting
50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   many of these transforms.  They operate at three widths (8,16,32).
51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Writing V(x) for vector register #x, paired merges permute address
52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   indices as follows.
53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   0->1  1->2  2->3  3->(4+d)  (4+s)->0:
55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrghb  V( x),          V( y), V( y + (1<<s))
57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrglb  V( x + (1<<d)), V( y), V( y + (1<<s))
58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   =0=   1->2  2->3  3->(4+d)  (4+s)->1:
61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrghh  V( x),          V( y), V( y + (1<<s))
63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrglh  V( x + (1<<d)), V( y), V( y + (1<<s))
64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   =0=   =1=   2->3  3->(4+d)  (4+s)->2:
67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrghw  V( x),          V( y), V( y + (1<<s))
69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vmrglw  V( x + (1<<d)), V( y), V( y + (1<<s))
70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Unfortunately, there is no doubleword merge instruction.
73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   The following sequence uses "vperm" is a substitute.
74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Assuming that the selection masks b_hihi and b_lolo (defined in LFppc.c)
75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   are in registers Vhihi and Vlolo, we can also effect the permutation
76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   =0=   =1=   =2=   3->(4+d)  (4+s)->3   by the sequence:
78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vperm   V( x),          V( y), V( y + (1<<s)), Vhihi
80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      vperm   V( x + (1<<d)), V( y), V( y + (1<<s)), Vlolo
81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Except for bits s and d, the other relationships between register
84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   number (= high-order part of address) bits are at the disposal of
85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   the programmer.
86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# To avoid excess transposes, we filter all 3 vertical luma subblock
89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   edges together.  This requires a single 16x16 transpose, which, in
90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   the above language, amounts to the following permutation of address
91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   indices:  0<->4   1<->5  2<->6  3<->7, which we accomplish by
92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   4 iterations of the cyclic transform 0->1->2->3->4->5->6->7->0.
93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   Except for the fact that the destination registers get written
95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   before we are done referencing the old contents, the cyclic transform
96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   is effected by
97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      x = 0;  do {
99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#          vmrghb V(2x),   V(x), V(x+8);
100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#          vmrghb V(2x+1), V(x), V(x+8);
101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#      } while( ++x < 8);
102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   For clarity, and because we can afford it, we do this transpose
104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   using all 32 registers, alternating the banks 0..15  and  16 .. 31,
105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   leaving the final result in 16 .. 31, as the lower registers are
106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   used in the filtering itself.
107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Tpair A, B, X, Y
109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  \A, \X, \Y
110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  \B, \X, \Y
111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Each step takes 8*2 = 16 instructions
114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_even
116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v16,v17,  v0,v8
117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v18,v19,  v1,v9
118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v20,v21,  v2,v10
119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v22,v23,  v3,v11
120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v24,v25,  v4,v12
121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v26,v27,  v5,v13
122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v28,v29,  v6,v14
123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v30,v31,  v7,v15
124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_odd
127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v0,v1, v16,v24
128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v2,v3, v17,v25
129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v4,v5, v18,v26
130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v6,v7, v19,v27
131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v8,v9, v20,v28
132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v10,v11, v21,v29
133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v12,v13, v22,v30
134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v14,v15, v23,v31
135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Whole transpose takes 4*16 = 64 instructions
138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_full
140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t16_odd
141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t16_even
142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t16_odd
143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t16_even
144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Vertical edge filtering requires transposes.  For the simple filter,
147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   we need to convert 16 rows of 4 pels each into 4 registers of 16 pels
148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   each.  Writing 0 ... 63 for the pixel indices, the desired result is:
149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v0 =  0  1 ... 14 15
151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v1 = 16 17 ... 30 31
152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v2 = 32 33 ... 47 48
153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v3 = 49 50 ... 62 63
154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  In frame-buffer memory, the layout is:
156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#     0  16  32  48
158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#     1  17  33  49
159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#     ...
160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#    15  31  47  63.
161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  We begin by reading the data 32 bits at a time (using scalar operations)
163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  into a temporary array, reading the rows of the array into vector registers,
164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  with the following layout:
165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v0 =  0 16 32 48  4 20 36 52  8 24 40 56  12 28 44 60
167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v1 =  1 17 33 49  5 21 ...                      45 61
168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v2 =  2 18 ...                                  46 62
169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v3 =  3 19 ...                                  47 63
170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  From the "address-bit" perspective discussed above, we simply need to
172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  interchange bits 0 <-> 4 and 1 <-> 5, leaving bits 2 and 3 alone.
173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  In other words, we transpose each of the four 4x4 submatrices.
174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  This transformation is its own inverse, and we need to perform it
176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  again before writing the pixels back into the frame buffer.
177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  It acts in place on registers v0...v3, uses v4...v7 as temporaries,
179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  and assumes that v14/v15 contain the b_hihi/b_lolo selectors
180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  defined above.  We think of both groups of 4 registers as having
181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  "addresses" {0,1,2,3} * 16.
182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Transpose4times4x4 Vlo, Vhi
184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# d=s=0        0->1  1->2  2->3  3->4  4->0  =5=
186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v4, v0, v1
188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v5, v0, v1
189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v6, v2, v3
190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v7, v2, v3
191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# d=0 s=1      =0=   1->2  2->3  3->4  4->5  5->1
193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v0, v4, v6
195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v1, v4, v6
196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v2, v5, v7
197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v3, v5, v7
198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# d=s=0        =0=   =1=   2->3  3->4  4->2  =5=
200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghw  v4, v0, v1
202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglw  v5, v0, v1
203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghw  v6, v2, v3
204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglw  v7, v2, v3
205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# d=0  s=1     =0=   =1=   =2=   3->4  4->5  5->3
207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v0, v4, v6, \Vlo
209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v1, v4, v6, \Vhi
210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v2, v5, v7, \Vlo
211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   v3, v5, v7, \Vhi
212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# end Transpose4times4x4
214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Normal mb vertical edge filter transpose.
217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   We read 8 columns of data, initially in the following pattern:
219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (0,0)  (1,0) ... (7,0)  (0,1)  (1,1) ... (7,1)
221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (0,2)  (1,2) ... (7,2)  (0,3)  (1,3) ... (7,3)
222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  ...
223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (0,14) (1,14) .. (7,14) (0,15) (1,15) .. (7,15)
224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   and wish to convert to:
226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (0,0) ... (0,15)
228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (1,0) ... (1,15)
229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  ...
230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  (7,0) ... (7,15).
231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  In "address bit" language, we wish to map
233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  0->4  1->5  2->6  3->0  4->1  5->2  6->3, i.e., I -> (I+4) mod 7.
235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  This can be accomplished by 4 iterations of the cyclic transform
237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  I -> (I+1) mod 7;
239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  each iteration can be realized by (d=0, s=2):
241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  x = 0;  do  Tpair( V(2x),V(2x+1),  V(x),V(x+4))  while( ++x < 4);
243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  The input/output is in registers v0...v7.  We use v10...v17 as mirrors;
245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  preserving v8 = sign converter.
246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  Inverse transpose is similar, except here I -> (I+3) mod 7 and the
248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  result lands in the "mirror" registers v10...v17
249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#
250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t8x16_odd
251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v10, v11,  v0, v4
252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v12, v13,  v1, v5
253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v14, v15,  v2, v6
254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v16, v17,  v3, v7
255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t8x16_even
258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v0, v1,  v10, v14
259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v2, v3,  v11, v15
260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v4, v5,  v12, v16
261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Tpair v6, v7,  v13, v17
262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro transpose8x16_fwd
265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_odd
266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_even
267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_odd
268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_even
269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro transpose8x16_inv
272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_odd
273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_even
274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    t8x16_odd
275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Transpose16x16
278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v0, v16, v24
279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v1, v16, v24
280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v2, v17, v25
281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v3, v17, v25
282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v4, v18, v26
283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v5, v18, v26
284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v6, v19, v27
285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v7, v19, v27
286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v8, v20, v28
287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v9, v20, v28
288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v10, v21, v29
289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v11, v21, v29
290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v12, v22, v30
291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v13, v22, v30
292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v14, v23, v31
293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v15, v23, v31
294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v16, v0, v8
295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v17, v0, v8
296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v18, v1, v9
297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v19, v1, v9
298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v20, v2, v10
299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v21, v2, v10
300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v22, v3, v11
301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v23, v3, v11
302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v24, v4, v12
303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v25, v4, v12
304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v26, v5, v13
305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v27, v5, v13
306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v28, v6, v14
307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v29, v6, v14
308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v30, v7, v15
309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v31, v7, v15
310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v0, v16, v24
311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v1, v16, v24
312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v2, v17, v25
313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v3, v17, v25
314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v4, v18, v26
315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v5, v18, v26
316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v6, v19, v27
317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v7, v19, v27
318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v8, v20, v28
319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v9, v20, v28
320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v10, v21, v29
321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v11, v21, v29
322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v12, v22, v30
323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v13, v22, v30
324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v14, v23, v31
325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v15, v23, v31
326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v16, v0, v8
327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v17, v0, v8
328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v18, v1, v9
329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v19, v1, v9
330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v20, v2, v10
331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v21, v2, v10
332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v22, v3, v11
333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v23, v3, v11
334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v24, v4, v12
335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v25, v4, v12
336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v26, v5, v13
337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v27, v5, v13
338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v28, v6, v14
339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v29, v6, v14
340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghb  v30, v7, v15
341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglb  v31, v7, v15
342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# load_g loads a global vector (whose address is in the local variable Gptr)
345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#   into vector register Vreg.  Trashes r0
346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_g Vreg, Gptr
347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, \Gptr
348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \Vreg, 0, r0
349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# exploit the saturation here.  if the answer is negative
352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# it will be clamped to 0.  orring 0 with a positive
353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# number will be the positive number (abs)
354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# RES = abs( A-B), trashes TMP
355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Abs RES, TMP, A, B
356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsububs \RES, \A, \B
357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsububs \TMP, \B, \A
358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vor     \RES, \RES, \TMP
359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# RES = Max( RES, abs( A-B)), trashes TMP
362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro max_abs RES, TMP, A, B
363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsububs \TMP, \A, \B
364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmaxub  \RES, \RES, \TMP
365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsububs \TMP, \B, \A
366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmaxub  \RES, \RES, \TMP
367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Masks
370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# build masks
371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# input is all 8 bit unsigned (0-255).  need to
372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# do abs(vala-valb) > limit.  but no need to compare each
373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# value to the limit.  find the max of the absolute differences
374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# and compare that to the limit.
375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# First hev
376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs     v14, v13, v2, v3    ;# |P1 - P0|
377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, v5, v4    ;# |Q1 - Q0|
378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v10, v14, v10      ;# HEV = true if thresh exceeded
380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Next limit
382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, v0, v1    ;# |P3 - P2|
383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, v1, v2    ;# |P2 - P1|
384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, v6, v5    ;# |Q2 - Q1|
385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, v7, v6    ;# |Q3 - Q2|
386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v9, v14, v9        ;# R = true if limit exceeded
388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# flimit
390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs     v14, v13, v3, v4    ;# |P0 - Q0|
391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v8, v14, v8        ;# X = true if flimit exceeded
393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vor     v8, v8, v9          ;# R = true if flimit or limit exceeded
395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# done building masks
396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
397474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro build_constants RFL, RLI, RTH, FL, LI, TH
399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# build constants
400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \FL, 0, \RFL        ;# flimit
401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \LI, 0, \RLI        ;# limit
402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \TH, 0, \RTH        ;# thresh
403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v11, 8
405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v12, 4
406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslb    v11, v11, v12       ;# 0x80808080808080808080808080808080
407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_data_y
410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# setup strides/pointers to be able to access
411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# all of the data
412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r5, r4, r4          ;# r5 = 2 * stride
413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r6, r3, r5          ;# r6 -> 2 rows back
414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    neg     r7, r4              ;# r7 = -stride
415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# load 16 pixels worth of data to work on
417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r0, r6, r5          ;# r0 -> 4 rows back (temp)
418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v0,  0, r0          ;# P3  (read only)
419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v1, r7, r6          ;# P2
420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v2,  0, r6          ;# P1
421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v3, r7, r3          ;# P0
422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v4,  0, r3          ;# Q0
423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v5, r4, r3          ;# Q1
424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v6, r5, r3          ;# Q2
425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r0, r3, r5          ;# r0 -> 2 rows fwd (temp)
426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v7, r4, r0          ;# Q3  (read only)
427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Expects
430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v10 == HEV
431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v13 == tmp
432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  v14 == tmp
433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro common_adjust P0, Q0, P1, Q1, HEV_PRESENT
434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \P1, \P1, v11       ;# SP1
435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \P0, \P0, v11       ;# SP0
436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \Q0, \Q0, v11       ;# SQ0
437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \Q1, \Q1, v11       ;# SQ1
438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v13, \P1, \Q1       ;# f  = c (P1 - Q1)
440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \HEV_PRESENT
441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand    v13, v13, v10       ;# f &= hev
442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif
443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v14, \Q0, \P0       ;# -126 <=  X = Q0-P0  <= +126
444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14
445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14
446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14       ;# A = c( c(P1-Q1) + 3*(Q0-P0))
447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vandc   v13, v13, v8        ;# f &= mask
449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v8, 3
451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v9, 4
452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v14, v13, v9        ;# f1 = c (f+4)
454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v15, v13, v8        ;# f2 = c (f+3)
455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v13, v14, v8        ;# f1 >>= 3
457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v15, v15, v8        ;# f2 >>= 3
458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs \Q0, \Q0, v13       ;# u1 = c (SQ0 - f1)
460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs \P0, \P0, v15       ;# u2 = c (SP0 + f2)
461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vp8_mbfilter
464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Masks
465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# start the fitering here
467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v1, v1, v11         ;# SP2
468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v2, v2, v11         ;# SP1
469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v3, v3, v11         ;# SP0
470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v4, v4, v11         ;# SQ0
471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v5, v5, v11         ;# SQ1
472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v6, v6, v11         ;# SQ2
473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# add outer taps if we have high edge variance
475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v13, v2, v5         ;# f  = c (SP1-SQ1)
476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v14, v4, v3         ;# SQ0-SP0
478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14
479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14
480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v13, v13, v14       ;# f  = c( c(SP1-SQ1) + 3*(SQ0-SP0))
481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vandc   v13, v13, v8        ;# f &= mask
483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vand    v15, v13, v10       ;# f2 = f & hev
484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# save bottom 3 bits so that we round one side +4 and the other +3
486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v8, 3
487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v9, 4
488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v14, v15, v9        ;# f1 = c (f+4)
490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v15, v15, v8        ;# f2 = c (f+3)
491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v14, v14, v8        ;# f1 >>= 3
493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v15, v15, v8        ;# f2 >>= 3
494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v4, v4, v14         ;# u1 = c (SQ0 - f1)
496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v3, v3, v15         ;# u2 = c (SP0 + f2)
497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# only apply wider filter if not high edge variance
499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vandc   v13, v13, v10       ;# f &= ~hev
500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v9, 2
502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vnor    v8, v8, v8
503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrb    v9, v8, v9          ;# 0x3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f
504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vupkhsb v9, v9              ;# 0x003f003f003f003f003f003f003f003f
505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v8, 9
506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# roughly 1/7th difference across boundary
508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v10, 7
509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulosb v14, v8, v13        ;# A = c( c(P1-Q1) + 3*(Q0-P0))
510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulesb v15, v8, v13
511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v14, v14, v9        ;# +=  63
512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v15, v15, v9
513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v14, v14, v10       ;# >>= 7
514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v15, v15, v10
515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v10, v15, v14
516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v15, v15, v14
517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkshss v10, v15, v10       ;# X = saturated down to bytes
519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v6, v6, v10         ;# subtract from Q and add to P
521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v1, v1, v10
522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v6, v6, v11
524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v1, v1, v11
525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# roughly 2/7th difference across boundary
527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v10, 7
528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddubm v12, v8, v8
529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulosb v14, v12, v13       ;# A = c( c(P1-Q1) + 3*(Q0-P0))
530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulesb v15, v12, v13
531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v14, v14, v9
532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v15, v15, v9
533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v14, v14, v10       ;# >>= 7
534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v15, v15, v10
535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v10, v15, v14
536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v15, v15, v14
537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkshss v10, v15, v10       ;# X = saturated down to bytes
539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v5, v5, v10         ;# subtract from Q and add to P
541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v2, v2, v10
542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v5, v5, v11
544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v2, v2, v11
545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# roughly 3/7th difference across boundary
547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltish v10, 7
548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddubm v12, v12, v8
549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulosb v14, v12, v13       ;# A = c( c(P1-Q1) + 3*(Q0-P0))
550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmulesb v15, v12, v13
551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v14, v14, v9
552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddshs v15, v15, v9
553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v14, v14, v10       ;# >>= 7
554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrah   v15, v15, v10
555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrglh  v10, v15, v14
556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmrghh  v15, v15, v14
557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vpkshss v10, v15, v10       ;# X = saturated down to bytes
559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v4, v4, v10         ;# subtract from Q and add to P
561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v3, v3, v10
562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v4, v4, v11
564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v3, v3, v11
565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro SBFilter
568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Masks
569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    common_adjust v3, v4, v2, v5, 1
571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# outer tap adjustments
573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v8, 1
574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddubm v13, v13, v8        ;# f  += 1
576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v13, v13, v8        ;# f >>= 1
577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vandc   v13, v13, v10       ;# f &= ~hev
579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs v5, v5, v13         ;# u1 = c (SQ1 - f)
581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs v2, v2, v13         ;# u2 = c (SP1 + f)
582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v2, v2, v11
584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v3, v3, v11
585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v4, v4, v11
586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    v5, v5, v11
587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_horizontal_edge_y_ppc:
591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r5, r6, r7, v8, v9, v10
596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_data_y
598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_mbfilter
600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v1, r7, r6         ;# P2
602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v2,  0, r6         ;# P1
603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v3, r7, r3         ;# P0
604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v4,  0, r3         ;# Q0
605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v5, r4, r3         ;# Q1
606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v6, r5, r3         ;# Q2
607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *s
614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 int p
615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 const signed char *flimit
616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *limit
617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *thresh
618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_horizontal_edge_y_ppc:
619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r5, r6, r7, v8, v9, v10
624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
625474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_data_y
626474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
627474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    SBFilter
628474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
629474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v2,  0, r6         ;# P1
630474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v3, r7, r3         ;# P0
631474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v4,  0, r3         ;# Q0
632474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx     v5, r4, r3         ;# Q1
633474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
634474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
635474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
636474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
637474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
638474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Filtering a vertical mb.  Each mb is aligned on a 16 byte boundary.
639474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  So we can read in an entire mb aligned.  However if we want to filter the mb
640474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  edge we run into problems.  For the loopfilter we require 4 bytes before the mb
641474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  and 4 after for a total of 8 bytes.  Reading 16 bytes inorder to get 4 is a bit
642474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  of a waste.  So this is an even uglier way to get around that.
643474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Using the regular register file words are read in and then saved back out to
644474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  memory to align and order them up.  Then they are read in using the
645474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  vector register file.
646474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RLVmb V, R
647474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r3, r4
648474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 4(\R)
649474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,-4(r3)
650474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(\R)
651474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r3, r4
652474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,12(\R)
653474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,-4(r3)
654474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 8(\R)
655474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, 0, \R
656474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
657474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
658474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WLVmb V, R
659474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, 0, \R
660474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,12(\R)
661474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwux   r0, r3, r4
662474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 8(\R)
663474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,-4(r3)
664474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 4(\R)
665474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwux   r0, r3, r4
666474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(\R)
667474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,-4(r3)
668474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
669474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
670474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
671474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *s
672474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 int p
673474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 const signed char *flimit
674474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *limit
675474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *thresh
676474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_vertical_edge_y_ppc:
677474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
678474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
679474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xc000
680474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
681474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
682474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, -48(r1)         ;# temporary space for reading in vectors
683474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub     r3, r3, r4
684474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
685474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v0, r9
686474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v1, r9
687474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v2, r9
688474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v3, r9
689474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v4, r9
690474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v5, r9
691474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v6, r9
692474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLVmb v7, r9
693474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
694474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    transpose8x16_fwd
695474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
696474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r5, r6, r7, v8, v9, v10
697474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
698474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_mbfilter
699474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
700474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    transpose8x16_inv
701474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
702474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add r3, r3, r4
703474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    neg r4, r4
704474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
705474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v17, r9
706474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v16, r9
707474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v15, r9
708474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v14, r9
709474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v13, r9
710474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v12, r9
711474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v11, r9
712474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLVmb v10, r9
713474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
714474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
715474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
716474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
717474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
718474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RL V, R, P
719474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, 0,  \R
720474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \R, \R, \P
721474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
722474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
723474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WL V, R, P
724474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, 0,  \R
725474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     \R, \R, \P
726474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
727474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
728474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Fil P3, P2, P1, P0, Q0, Q1, Q2, Q3
729474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org                                ;# K = |P0-P1| already
730474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs     v14, v13, \Q0, \Q1  ;# M = |Q0-Q1|
731474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmaxub  v14, v14, v4        ;# M = max( |P0-P1|, |Q0-Q1|)
732474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v10, v14, v0
733474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
734474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs     v4, v5, \Q2, \Q3    ;# K = |Q2-Q3| = next |P0-P1]
735474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
736474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, \Q1, \Q2  ;# M = max( M, |Q1-Q2|)
737474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, \P1, \P2  ;# M = max( M, |P1-P2|)
738474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    max_abs  v14, v13, \P2, \P3  ;# M = max( M, |P2-P3|)
739474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
740474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vmaxub   v14, v14, v4       ;# M = max interior abs diff
741474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v9, v14, v2        ;# M = true if int_l exceeded
742474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
743474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs     v14, v13, \P0, \Q0  ;# X = Abs( P0-Q0)
744474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v8, v14, v3        ;# X = true if edge_l exceeded
745474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vor     v8, v8, v9          ;# M = true if edge_l or int_l exceeded
746474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
747474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# replace P1,Q1 w/signed versions
748474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    common_adjust \P0, \Q0, \P1, \Q1, 1
749474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
750474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddubm v13, v13, v1        ;# -16 <= M <= 15, saturation irrelevant
751474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsrab   v13, v13, v1
752474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vandc   v13, v13, v10       ;# adjust P1,Q1 by (M+1)>>1  if ! hev
753474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vsubsbs \Q1, \Q1, v13
754474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vaddsbs \P1, \P1, v13
755474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
756474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \P1, \P1, v11       ;# P1
757474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \P0, \P0, v11       ;# P0
758474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \Q0, \Q0, v11       ;# Q0
759474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor    \Q1, \Q1, v11       ;# Q1
760474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
761474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
762474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
763474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
764474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *s
765474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 int p
766474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 const signed char *flimit
767474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *limit
768474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *thresh
769474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_vertical_edge_y_ppc:
770474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
771474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
772474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffff
773474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
774474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
775474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r3, 0
776474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v16, r9, r4
777474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v17, r9, r4
778474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v18, r9, r4
779474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v19, r9, r4
780474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v20, r9, r4
781474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v21, r9, r4
782474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v22, r9, r4
783474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v23, r9, r4
784474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v24, r9, r4
785474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v25, r9, r4
786474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v26, r9, r4
787474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v27, r9, r4
788474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v28, r9, r4
789474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v29, r9, r4
790474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RL      v30, r9, r4
791474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v31, 0, r9
792474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
793474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Transpose16x16
794474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
795474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v1, 1
796474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
797474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r5, r6, r7, v3, v2, v0
798474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
799474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs v4, v5, v19, v18                            ;# K(v14) = first |P0-P1|
800474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
801474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Fil v16, v17, v18, v19,  v20, v21, v22, v23
802474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Fil v20, v21, v22, v23,  v24, v25, v26, v27
803474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Fil v24, v25, v26, v27,  v28, v29, v30, v31
804474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
805474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Transpose16x16
806474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
807474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r9, r3, 0
808474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v16, r9, r4
809474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v17, r9, r4
810474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v18, r9, r4
811474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v19, r9, r4
812474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v20, r9, r4
813474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v21, r9, r4
814474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v22, r9, r4
815474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v23, r9, r4
816474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v24, r9, r4
817474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v25, r9, r4
818474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v26, r9, r4
819474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v27, r9, r4
820474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v28, r9, r4
821474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v29, r9, r4
822474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WL      v30, r9, r4
823474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v31, 0, r9
824474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
825474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
826474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
827474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
828474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
829474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- UV FILTERING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
830474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro active_chroma_sel V
831474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    andi.   r7, r3, 8       ;# row origin modulo 16
832474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r7, r7, r7      ;# selects selectors
833474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lis     r12, _chromaSelectors@ha
834474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r0,  _chromaSelectors@l(r12)
835474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r7, r0      ;# leave selector addr in r7
836474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
837474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, 0, r0       ;# mask to concatenate active U,V pels
838474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
839474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
840474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hread_uv Dest, U, V, Offs, VMask
841474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \U, \Offs, r3
842474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, \Offs, r4
843474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \Dest, \U, \V, \VMask   ;# Dest = active part of U then V
844474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
845474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
846474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hwrite_uv New, U, V, Offs, Umask, Vmask
847474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \U, \New, \U, \Umask    ;# Combine new pels with siblings
848474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vperm   \V, \New, \V, \Vmask
849474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \U, \Offs, r3           ;# Write to frame buffer
850474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, \Offs, r4
851474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
852474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
853474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Process U,V in parallel.
854474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_chroma_h
855474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    neg     r9, r5          ;# r9 = -1 * stride
856474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r8, r9, r9      ;# r8 = -2 * stride
857474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r10, r5, r5     ;# r10 = 2 * stride
858474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
859474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    active_chroma_sel v12
860474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
861474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# P3, Q3 are read-only; need not save addresses or sibling pels
862474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r6, r8, r8      ;# r6 = -4 * stride
863474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v0, v14, v15, r6, v12
864474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r6, r10, r5     ;# r6 =  3 * stride
865474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v7, v14, v15, r6, v12
866474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
867474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Others are read/write; save addresses and sibling pels
868474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
869474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r6, r8, r9      ;# r6 = -3 * stride
870474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v1, v16, v17, r6,  v12
871474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v2, v18, v19, r8,  v12
872474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v3, v20, v21, r9,  v12
873474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v4, v22, v23, 0,   v12
874474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v5, v24, v25, r5,  v12
875474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hread_uv v6, v26, v27, r10, v12
876474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
877474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
878474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro uresult_sel V
879474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_g   \V, 4(r7)
880474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
881474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
882474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vresult_sel V
883474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_g   \V, 8(r7)
884474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
885474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
886474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# always write P1,P0,Q0,Q1
887474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro store_chroma_h
888474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    uresult_sel v11
889474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vresult_sel v12
890474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v2, v18, v19, r8, v11, v12
891474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v3, v20, v21, r9, v11, v12
892474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v4, v22, v23, 0,  v11, v12
893474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v5, v24, v25, r5, v11, v12
894474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
895474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
896474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
897474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *u
898474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 unsigned char *v
899474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 int p
900474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *flimit
901474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *limit
902474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r8 const signed char *thresh
903474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_horizontal_edge_uv_ppc:
904474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
905474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
906474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffff
907474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
908474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
909474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r6, r7, r8, v8, v9, v10
910474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
911474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_chroma_h
912474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
913474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_mbfilter
914474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
915474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    store_chroma_h
916474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
917474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v1, v16, v17, r6,  v11, v12    ;# v1 == P2
918474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    hwrite_uv v6, v26, v27, r10, v11, v12    ;# v6 == Q2
919474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
920474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
921474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
922474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
923474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
924474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
925474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *u
926474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 unsigned char *v
927474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 int p
928474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *flimit
929474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *limit
930474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r8 const signed char *thresh
931474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_horizontal_edge_uv_ppc:
932474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
933474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
934474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xffff
935474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
936474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
937474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r6, r7, r8, v8, v9, v10
938474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
939474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    load_chroma_h
940474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
941474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    SBFilter
942474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
943474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    store_chroma_h
944474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
945474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
946474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
947474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
948474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
949474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro R V, R
950474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r3, r5
951474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 4(\R)
952474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,-4(r3)
953474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 0(\R)
954474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r4, r5
955474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,12(\R)
956474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,-4(r4)
957474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, 8(\R)
958474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     \V, 0, \R
959474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
960474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
961474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
962474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro W V, R
963474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    \V, 0, \R
964474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0,12(\R)
965474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwux   r0, r4, r5
966474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 8(\R)
967474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,-4(r4)
968474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 4(\R)
969474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwux   r0, r3, r5
970474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, 0(\R)
971474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0,-4(r3)
972474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
973474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
974474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro chroma_vread R
975474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub r3, r3, r5          ;# back up one line for simplicity
976474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    sub r4, r4, r5
977474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
978474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v0, \R
979474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v1, \R
980474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v2, \R
981474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v3, \R
982474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v4, \R
983474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v5, \R
984474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v6, \R
985474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    R v7, \R
986474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
987474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    transpose8x16_fwd
988474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
989474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
990474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro chroma_vwrite R
991474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
992474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    transpose8x16_inv
993474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
994474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r3, r3, r5
995474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r4, r4, r5
996474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    neg     r5, r5          ;# Write rows back in reverse order
997474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
998474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v17, \R
999474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v16, \R
1000474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v15, \R
1001474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v14, \R
1002474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v13, \R
1003474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v12, \R
1004474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v11, \R
1005474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    W v10, \R
1006474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
1007474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1008474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
1009474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *u
1010474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 unsigned char *v
1011474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 int p
1012474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *flimit
1013474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *limit
1014474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r8 const signed char *thresh
1015474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_vertical_edge_uv_ppc:
1016474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
1017474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
1018474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xc000
1019474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
1020474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1021474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, -48(r1)         ;# temporary space for reading in vectors
1022474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1023474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    chroma_vread r9
1024474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1025474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r6, r7, r8, v8, v9, v10
1026474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1027474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_mbfilter
1028474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1029474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    chroma_vwrite r9
1030474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1031474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
1032474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1033474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
1034474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1035474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
1036474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *u
1037474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 unsigned char *v
1038474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 int p
1039474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r6 const signed char *flimit
1040474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r7 const signed char *limit
1041474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r8 const signed char *thresh
1042474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_vertical_edge_uv_ppc:
1043474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
1044474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
1045474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xc000
1046474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
1047474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1048474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r9, -48(r1)         ;# temporary space for reading in vectors
1049474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1050474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    chroma_vread r9
1051474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1052474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    build_constants r6, r7, r8, v8, v9, v10
1053474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1054474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    SBFilter
1055474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1056474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    chroma_vwrite r9
1057474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1058474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
1059474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1060474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
1061474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1062474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# -=-=-=-=-=-=-=-=-=-=-=-=-=-= SIMPLE LOOP FILTER =-=-=-=-=-=-=-=-=-=-=-=-=-=-
1063474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1064474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vp8_simple_filter
1065474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Abs v14, v13, v1, v2    ;# M = abs( P0 - Q0)
1066474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vcmpgtub v8, v14, v8    ;# v5 = true if _over_ limit
1067474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1068474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# preserve unsigned v0 and v3
1069474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    common_adjust v1, v2, v0, v3, 0
1070474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1071474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor v1, v1, v11
1072474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor v2, v2, v11        ;# cvt Q0, P0 back to pels
1073474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
1074474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1075474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro simple_vertical
1076474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r8,  0, 16
1077474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    addi    r7, r5, 32
1078474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1079474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v0,  0, r5
1080474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v1, r8, r5
1081474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v2,  0, r7
1082474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v3, r8, r7
1083474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1084474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lis     r12, _B_hihi@ha
1085474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r0,  _B_hihi@l(r12)
1086474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v16, 0, r0
1087474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1088474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lis     r12, _B_lolo@ha
1089474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la      r0,  _B_lolo@l(r12)
1090474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v17, 0, r0
1091474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1092474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Transpose4times4x4 v16, v17
1093474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_simple_filter
1094474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1095474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor v0, v0, v11
1096474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vxor v3, v3, v11        ;# cvt Q0, P0 back to pels
1097474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1098474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    Transpose4times4x4 v16, v17
1099474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v0,  0, r5
1101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v1, r8, r5
1102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2,  0, r7
1103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v3, r8, r7
1104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
1105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
1107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *s
1108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 int p
1109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 const signed char *flimit
1110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_simple_horizontal_edge_ppc:
1111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
1112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
1113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
1114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# build constants
1116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v8, 0, r5           ;# flimit
1117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v11, 8
1119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v12, 4
1120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslb    v11, v11, v12       ;# 0x80808080808080808080808080808080
1121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    neg     r5, r4              ;# r5 = -1 * stride
1123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    add     r6, r5, r5          ;# r6 = -2 * stride
1124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v0, r6, r3          ;# v0 = P1 = 16 pels two rows above edge
1126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v1, r5, r3          ;# v1 = P0 = 16 pels one row  above edge
1127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v2,  0, r3          ;# v2 = Q0 = 16 pels one row  below edge
1128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v3, r4, r3          ;# v3 = Q1 = 16 pels two rows below edge
1129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vp8_simple_filter
1131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v1, r5, r3          ;# store P0
1133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stvx    v2,  0, r3          ;# store Q0
1134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
1136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
1138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RLV Offs
1140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, (\Offs*4)(r5)
1141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzux   r0, r7, r4
1142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
1143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WLV Offs
1145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwz     r0, (\Offs*4)(r5)
1146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwux   r0, r7, r4
1147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm
1148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 2
1150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r3 unsigned char *s
1151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r4 int p
1152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;#  r5 const signed char *flimit
1153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_simple_vertical_edge_ppc:
1154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mfspr   r11, 256            ;# get old VRSAVE
1155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    oris    r12, r11, 0xffff
1156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ori     r12, r12, 0xc000
1157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r12            ;# set VRSAVE
1158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# build constants
1160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lvx     v8, 0, r5           ;# flimit
1161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v11, 8
1163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vspltisb v12, 4
1164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    vslb    v11, v11, v12       ;# 0x80808080808080808080808080808080
1165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    la r5, -96(r1)              ;# temporary space for reading in vectors
1167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Store 4 pels at word "Offs" in temp array, then advance r7
1169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;#   to next row and read another 4 pels from the frame buffer.
1170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subi    r7, r3,  2          ;# r7 -> 2 pels before start
1172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzx    r0,  0, r7          ;# read first 4 pels
1173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# 16 unaligned word accesses
1175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 0
1176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 4
1177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 8
1178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 12
1179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 1
1180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 5
1181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 9
1182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 13
1183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 2
1184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 6
1185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 10
1186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 14
1187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 3
1188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 7
1189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    RLV 11
1190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stw     r0, (15*4)(r5)      ;# write last 4 pels
1192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    simple_vertical
1194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    ;# Read temp array, write frame buffer.
1196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    subi    r7, r3,  2          ;# r7 -> 2 pels before start
1197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    lwzx    r0,  0, r5          ;# read/write first 4 pels
1198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    stwx    r0,  0, r7
1199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 4
1201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 8
1202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 12
1203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 1
1204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 5
1205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 9
1206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 13
1207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 2
1208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 6
1209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 10
1210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 14
1211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 3
1212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 7
1213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 11
1214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    WLV 15
1215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    mtspr   256, r11            ;# reset old VRSAVE
1217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    blr
1219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .data
1221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_chromaSelectors:
1223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_hihi
1224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_Ures0
1225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_Vres0
1226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   0
1227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_lolo
1228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_Ures8
1229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   _B_Vres8
1230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .long   0
1231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Vres8:
1234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   16, 17, 18, 19, 20, 21, 22, 23,  8,  9, 10, 11, 12, 13, 14, 15
1235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Ures8:
1238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte   16, 17, 18, 19, 20, 21, 22, 23,  0,  1,  2,  3,  4,  5,  6,  7
1239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_lolo:
1242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    8,  9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
1243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Vres0:
1246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    8,  9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
1247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Ures0:
1249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    0,  1,  2,  3,  4,  5,  6,  7, 24, 25, 26, 27, 28, 29, 30, 31
1250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org
1251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .align 4
1252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_hihi:
1253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org    .byte    0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23
1254