1bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;
2bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3f5256e16dfc425c1d466f6308d4026d529ce9e0bHoward Hinnant;
4bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;  Use of this source code is governed by a BSD-style license
5b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant;  that can be found in the LICENSE file in the root of the source
6b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant;  tree. An additional intellectual property rights grant can be found
7bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;  in the file PATENTS.  All contributing project authors may
8bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;  be found in the AUTHORS file in the root of the source tree.
9bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;
10bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
11bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
12bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    .globl recon4b_ppc
13bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    .globl recon2b_ppc
14bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    .globl recon_b_ppc
15bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
16bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.macro row_of16 Diff Pred Dst Stride
17bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lvx     v1,  0, \Pred           ;# v1 = pred = p0..p15
18bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    addi    \Pred, \Pred, 16        ;# next pred
19bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vmrghb  v2, v0, v1              ;# v2 = 16-bit p0..p7
20bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lvx     v3,  0, \Diff           ;# v3 = d0..d7
21bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vaddshs v2, v2, v3              ;# v2 = r0..r7
22da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    vmrglb  v1, v0, v1              ;# v1 = 16-bit p8..p15
23da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    lvx     v3, r8, \Diff           ;# v3 = d8..d15
24da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    addi    \Diff, \Diff, 32        ;# next diff
25da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    vaddshs v3, v3, v1              ;# v3 = r8..r15
26da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    vpkshus v2, v2, v3              ;# v2 = 8-bit r0..r15
27da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    stvx    v2,  0, \Dst            ;# to dst
28da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    add     \Dst, \Dst, \Stride     ;# next dst
29da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endm
30da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow
31da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    .text
32da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    .align 2
33da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;#  r3 = short *diff_ptr,
34da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;#  r4 = unsigned char *pred_ptr,
35da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;#  r5 = unsigned char *dst_ptr,
36bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;#  r6 = int stride
37bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnantrecon4b_ppc:
38bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    mfspr   r0, 256                     ;# get old VRSAVE
39bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
40bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    oris    r0, r0, 0xf000
41bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    mtspr   256,r0                      ;# set VRSAVE
42bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
43bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vxor    v0, v0, v0
44bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    li      r8, 16
45bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
46bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    row_of16 r3, r4, r5, r6
47bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    row_of16 r3, r4, r5, r6
48bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    row_of16 r3, r4, r5, r6
49bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    row_of16 r3, r4, r5, r6
50bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
51bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
52bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    mtspr   256, r12                    ;# reset old VRSAVE
53bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
54bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    blr
55bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant
56bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
57bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lvx     v1,  0, \Pred       ;# v1 = pred = p0..p15
58bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vmrghb  v2, v0, v1          ;# v2 = 16-bit p0..p7
59bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lvx     v3,  0, \Diff       ;# v3 = d0..d7
60bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vaddshs v2, v2, v3          ;# v2 = r0..r7
61bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vmrglb  v1, v0, v1          ;# v1 = 16-bit p8..p15
62bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lvx     v3, r8, \Diff       ;# v2 = d8..d15
63bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vaddshs v3, v3, v1          ;# v3 = r8..r15
64bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    vpkshus v2, v2, v3          ;# v3 = 8-bit r0..r15
65bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    stvx    v2,  0, r10         ;# 2 rows to dst from buf
66bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    lwz     r0, 0(r10)
67bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.if \write_first_four_pels
68bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    stw     r0, 0(\Dst)
69bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant    .else
70da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    stwux   r0, \Dst, \Stride
71da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endif
72da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    lwz     r0, 4(r10)
73da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    stw     r0, 4(\Dst)
74da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    lwz     r0, 8(r10)
75da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    stwux   r0, \Dst, \Stride       ;# advance dst to next row
76da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    lwz     r0, 12(r10)
77da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    stw     r0, 4(\Dst)
78da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endm
79da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow
80da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow    .align 2
81bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;#  r3 = short *diff_ptr,
82;#  r4 = unsigned char *pred_ptr,
83;#  r5 = unsigned char *dst_ptr,
84;#  r6 = int stride
85
86recon2b_ppc:
87    mfspr   r0, 256                     ;# get old VRSAVE
88    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
89    oris    r0, r0, 0xf000
90    mtspr   256,r0                      ;# set VRSAVE
91
92    vxor    v0, v0, v0
93    li      r8, 16
94
95    la      r10, -48(r1)                ;# buf
96
97    two_rows_of8 r3, r4, r5, r6, 1
98
99    addi    r4, r4, 16;                 ;# next pred
100    addi    r3, r3, 32;                 ;# next diff
101
102    two_rows_of8 r3, r4, r5, r6, 0
103
104    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
105    mtspr   256, r12                    ;# reset old VRSAVE
106
107    blr
108
109.macro get_two_diff_rows
110    stw     r0, 0(r10)
111    lwz     r0, 4(r3)
112    stw     r0, 4(r10)
113    lwzu    r0, 32(r3)
114    stw     r0, 8(r10)
115    lwz     r0, 4(r3)
116    stw     r0, 12(r10)
117    lvx     v3, 0, r10
118.endm
119
120    .align 2
121;#  r3 = short *diff_ptr,
122;#  r4 = unsigned char *pred_ptr,
123;#  r5 = unsigned char *dst_ptr,
124;#  r6 = int stride
125recon_b_ppc:
126    mfspr   r0, 256                     ;# get old VRSAVE
127    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
128    oris    r0, r0, 0xf000
129    mtspr   256,r0                      ;# set VRSAVE
130
131    vxor    v0, v0, v0
132
133    la      r10, -48(r1)    ;# buf
134
135    lwz     r0, 0(r4)
136    stw     r0, 0(r10)
137    lwz     r0, 16(r4)
138    stw     r0, 4(r10)
139    lwz     r0, 32(r4)
140    stw     r0, 8(r10)
141    lwz     r0, 48(r4)
142    stw     r0, 12(r10)
143
144    lvx     v1,  0, r10;    ;# v1 = pred = p0..p15
145
146    lwz r0, 0(r3)           ;# v3 = d0..d7
147
148    get_two_diff_rows
149
150    vmrghb  v2, v0, v1;     ;# v2 = 16-bit p0..p7
151    vaddshs v2, v2, v3;     ;# v2 = r0..r7
152
153    lwzu r0, 32(r3)         ;# v3 = d8..d15
154
155    get_two_diff_rows
156
157    vmrglb  v1, v0, v1;     ;# v1 = 16-bit p8..p15
158    vaddshs v3, v3, v1;     ;# v3 = r8..r15
159
160    vpkshus v2, v2, v3;     ;# v2 = 8-bit r0..r15
161    stvx    v2,  0, r10;    ;# 16 pels to dst from buf
162
163    lwz     r0, 0(r10)
164    stw     r0, 0(r5)
165    lwz     r0, 4(r10)
166    stwux   r0, r5, r6
167    lwz     r0, 8(r10)
168    stwux   r0, r5, r6
169    lwz     r0, 12(r10)
170    stwx    r0, r5, r6
171
172    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
173    mtspr   256, r12                    ;# reset old VRSAVE
174
175    blr
176