1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    .globl recon4b_ppc
13    .globl recon2b_ppc
14    .globl recon_b_ppc
15
16.macro row_of16 Diff Pred Dst Stride
17    lvx     v1,  0, \Pred           ;# v1 = pred = p0..p15
18    addi    \Pred, \Pred, 16        ;# next pred
19    vmrghb  v2, v0, v1              ;# v2 = 16-bit p0..p7
20    lvx     v3,  0, \Diff           ;# v3 = d0..d7
21    vaddshs v2, v2, v3              ;# v2 = r0..r7
22    vmrglb  v1, v0, v1              ;# v1 = 16-bit p8..p15
23    lvx     v3, r8, \Diff           ;# v3 = d8..d15
24    addi    \Diff, \Diff, 32        ;# next diff
25    vaddshs v3, v3, v1              ;# v3 = r8..r15
26    vpkshus v2, v2, v3              ;# v2 = 8-bit r0..r15
27    stvx    v2,  0, \Dst            ;# to dst
28    add     \Dst, \Dst, \Stride     ;# next dst
29.endm
30
31    .text
32    .align 2
33;#  r3 = short *diff_ptr,
34;#  r4 = unsigned char *pred_ptr,
35;#  r5 = unsigned char *dst_ptr,
36;#  r6 = int stride
37recon4b_ppc:
38    mfspr   r0, 256                     ;# get old VRSAVE
39    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
40    oris    r0, r0, 0xf000
41    mtspr   256,r0                      ;# set VRSAVE
42
43    vxor    v0, v0, v0
44    li      r8, 16
45
46    row_of16 r3, r4, r5, r6
47    row_of16 r3, r4, r5, r6
48    row_of16 r3, r4, r5, r6
49    row_of16 r3, r4, r5, r6
50
51    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
52    mtspr   256, r12                    ;# reset old VRSAVE
53
54    blr
55
56.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
57    lvx     v1,  0, \Pred       ;# v1 = pred = p0..p15
58    vmrghb  v2, v0, v1          ;# v2 = 16-bit p0..p7
59    lvx     v3,  0, \Diff       ;# v3 = d0..d7
60    vaddshs v2, v2, v3          ;# v2 = r0..r7
61    vmrglb  v1, v0, v1          ;# v1 = 16-bit p8..p15
62    lvx     v3, r8, \Diff       ;# v2 = d8..d15
63    vaddshs v3, v3, v1          ;# v3 = r8..r15
64    vpkshus v2, v2, v3          ;# v3 = 8-bit r0..r15
65    stvx    v2,  0, r10         ;# 2 rows to dst from buf
66    lwz     r0, 0(r10)
67.if \write_first_four_pels
68    stw     r0, 0(\Dst)
69    .else
70    stwux   r0, \Dst, \Stride
71.endif
72    lwz     r0, 4(r10)
73    stw     r0, 4(\Dst)
74    lwz     r0, 8(r10)
75    stwux   r0, \Dst, \Stride       ;# advance dst to next row
76    lwz     r0, 12(r10)
77    stw     r0, 4(\Dst)
78.endm
79
80    .align 2
81;#  r3 = short *diff_ptr,
82;#  r4 = unsigned char *pred_ptr,
83;#  r5 = unsigned char *dst_ptr,
84;#  r6 = int stride
85
86recon2b_ppc:
87    mfspr   r0, 256                     ;# get old VRSAVE
88    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
89    oris    r0, r0, 0xf000
90    mtspr   256,r0                      ;# set VRSAVE
91
92    vxor    v0, v0, v0
93    li      r8, 16
94
95    la      r10, -48(r1)                ;# buf
96
97    two_rows_of8 r3, r4, r5, r6, 1
98
99    addi    r4, r4, 16;                 ;# next pred
100    addi    r3, r3, 32;                 ;# next diff
101
102    two_rows_of8 r3, r4, r5, r6, 0
103
104    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
105    mtspr   256, r12                    ;# reset old VRSAVE
106
107    blr
108
109.macro get_two_diff_rows
110    stw     r0, 0(r10)
111    lwz     r0, 4(r3)
112    stw     r0, 4(r10)
113    lwzu    r0, 32(r3)
114    stw     r0, 8(r10)
115    lwz     r0, 4(r3)
116    stw     r0, 12(r10)
117    lvx     v3, 0, r10
118.endm
119
120    .align 2
121;#  r3 = short *diff_ptr,
122;#  r4 = unsigned char *pred_ptr,
123;#  r5 = unsigned char *dst_ptr,
124;#  r6 = int stride
125recon_b_ppc:
126    mfspr   r0, 256                     ;# get old VRSAVE
127    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
128    oris    r0, r0, 0xf000
129    mtspr   256,r0                      ;# set VRSAVE
130
131    vxor    v0, v0, v0
132
133    la      r10, -48(r1)    ;# buf
134
135    lwz     r0, 0(r4)
136    stw     r0, 0(r10)
137    lwz     r0, 16(r4)
138    stw     r0, 4(r10)
139    lwz     r0, 32(r4)
140    stw     r0, 8(r10)
141    lwz     r0, 48(r4)
142    stw     r0, 12(r10)
143
144    lvx     v1,  0, r10;    ;# v1 = pred = p0..p15
145
146    lwz r0, 0(r3)           ;# v3 = d0..d7
147
148    get_two_diff_rows
149
150    vmrghb  v2, v0, v1;     ;# v2 = 16-bit p0..p7
151    vaddshs v2, v2, v3;     ;# v2 = r0..r7
152
153    lwzu r0, 32(r3)         ;# v3 = d8..d15
154
155    get_two_diff_rows
156
157    vmrglb  v1, v0, v1;     ;# v1 = 16-bit p8..p15
158    vaddshs v3, v3, v1;     ;# v3 = r8..r15
159
160    vpkshus v2, v2, v3;     ;# v2 = 8-bit r0..r15
161    stvx    v2,  0, r10;    ;# 16 pels to dst from buf
162
163    lwz     r0, 0(r10)
164    stw     r0, 0(r5)
165    lwz     r0, 4(r10)
166    stwux   r0, r5, r6
167    lwz     r0, 8(r10)
168    stwux   r0, r5, r6
169    lwz     r0, 12(r10)
170    stwx    r0, r5, r6
171
172    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
173    mtspr   256, r12                    ;# reset old VRSAVE
174
175    blr
176