1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    .globl vp8_subtract_mbuv_ppc
13    .globl vp8_subtract_mby_ppc
14
15;# r3 short *diff
16;# r4 unsigned char *usrc
17;# r5 unsigned char *vsrc
18;# r6 unsigned char *pred
19;# r7 int stride
20vp8_subtract_mbuv_ppc:
21    mfspr   r11, 256            ;# get old VRSAVE
22    oris    r12, r11, 0xf000
23    mtspr   256, r12            ;# set VRSAVE
24
25    li      r9, 256
26    add     r3, r3, r9
27    add     r3, r3, r9
28    add     r6, r6, r9
29
30    li      r10, 16
31    li      r9,  4
32    mtctr   r9
33
34    vspltisw v0, 0
35
36mbu_loop:
37    lvsl    v5, 0, r4           ;# permutate value for alignment
38    lvx     v1, 0, r4           ;# src
39    lvx     v2, 0, r6           ;# pred
40
41    add     r4, r4, r7
42    addi    r6, r6, 16
43
44    vperm   v1, v1, v0, v5
45
46    vmrghb  v3, v0, v1          ;# unpack high src  to short
47    vmrghb  v4, v0, v2          ;# unpack high pred to short
48
49    lvsl    v5, 0, r4           ;# permutate value for alignment
50    lvx     v1, 0, r4           ;# src
51
52    add     r4, r4, r7
53
54    vsubshs v3, v3, v4
55
56    stvx    v3, 0, r3           ;# store out diff
57
58    vperm   v1, v1, v0, v5
59
60    vmrghb  v3, v0, v1          ;# unpack high src  to short
61    vmrglb  v4, v0, v2          ;# unpack high pred to short
62
63    vsubshs v3, v3, v4
64
65    stvx    v3, r10, r3         ;# store out diff
66
67    addi    r3, r3, 32
68
69    bdnz    mbu_loop
70
71    mtctr   r9
72
73mbv_loop:
74    lvsl    v5, 0, r5           ;# permutate value for alignment
75    lvx     v1, 0, r5           ;# src
76    lvx     v2, 0, r6           ;# pred
77
78    add     r5, r5, r7
79    addi    r6, r6, 16
80
81    vperm   v1, v1, v0, v5
82
83    vmrghb  v3, v0, v1          ;# unpack high src  to short
84    vmrghb  v4, v0, v2          ;# unpack high pred to short
85
86    lvsl    v5, 0, r5           ;# permutate value for alignment
87    lvx     v1, 0, r5           ;# src
88
89    add     r5, r5, r7
90
91    vsubshs v3, v3, v4
92
93    stvx    v3, 0, r3           ;# store out diff
94
95    vperm   v1, v1, v0, v5
96
97    vmrghb  v3, v0, v1          ;# unpack high src  to short
98    vmrglb  v4, v0, v2          ;# unpack high pred to short
99
100    vsubshs v3, v3, v4
101
102    stvx    v3, r10, r3         ;# store out diff
103
104    addi    r3, r3, 32
105
106    bdnz    mbv_loop
107
108    mtspr   256, r11            ;# reset old VRSAVE
109
110    blr
111
112;# r3 short *diff
113;# r4 unsigned char *src
114;# r5 unsigned char *pred
115;# r6 int stride
116vp8_subtract_mby_ppc:
117    mfspr   r11, 256            ;# get old VRSAVE
118    oris    r12, r11, 0xf800
119    mtspr   256, r12            ;# set VRSAVE
120
121    li      r10, 16
122    mtctr   r10
123
124    vspltisw v0, 0
125
126mby_loop:
127    lvx     v1, 0, r4           ;# src
128    lvx     v2, 0, r5           ;# pred
129
130    add     r4, r4, r6
131    addi    r5, r5, 16
132
133    vmrghb  v3, v0, v1          ;# unpack high src  to short
134    vmrghb  v4, v0, v2          ;# unpack high pred to short
135
136    vsubshs v3, v3, v4
137
138    stvx    v3, 0, r3           ;# store out diff
139
140    vmrglb  v3, v0, v1          ;# unpack low src  to short
141    vmrglb  v4, v0, v2          ;# unpack low pred to short
142
143    vsubshs v3, v3, v4
144
145    stvx    v3, r10, r3         ;# store out diff
146
147    addi    r3, r3, 32
148
149    bdnz    mby_loop
150
151    mtspr   256, r11            ;# reset old VRSAVE
152
153    blr
154