190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl recon4b_ppc
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl recon2b_ppc
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl recon_b_ppc
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro row_of16 Diff Pred Dst Stride
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v1,  0, \Pred           ;# v1 = pred = p0..p15
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    \Pred, \Pred, 16        ;# next pred
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v2, v0, v1              ;# v2 = 16-bit p0..p7
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v3,  0, \Diff           ;# v3 = d0..d7
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v2, v2, v3              ;# v2 = r0..r7
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglb  v1, v0, v1              ;# v1 = 16-bit p8..p15
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v3, r8, \Diff           ;# v3 = d8..d15
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    \Diff, \Diff, 32        ;# next diff
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v3, v3, v1              ;# v3 = r8..r15
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkshus v2, v2, v3              ;# v2 = 8-bit r0..r15
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2,  0, \Dst            ;# to dst
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     \Dst, \Dst, \Stride     ;# next dst
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .text
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r3 = short *diff_ptr,
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r4 = unsigned char *pred_ptr,
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r5 = unsigned char *dst_ptr,
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r6 = int stride
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon4b_ppc:
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r0, 256                     ;# get old VRSAVE
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r0, r0, 0xf000
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256,r0                      ;# set VRSAVE
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vxor    v0, v0, v0
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r8, 16
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    row_of16 r3, r4, r5, r6
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    row_of16 r3, r4, r5, r6
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    row_of16 r3, r4, r5, r6
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    row_of16 r3, r4, r5, r6
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12                    ;# reset old VRSAVE
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v1,  0, \Pred       ;# v1 = pred = p0..p15
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v2, v0, v1          ;# v2 = 16-bit p0..p7
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v3,  0, \Diff       ;# v3 = d0..d7
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v2, v2, v3          ;# v2 = r0..r7
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglb  v1, v0, v1          ;# v1 = 16-bit p8..p15
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v3, r8, \Diff       ;# v2 = d8..d15
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v3, v3, v1          ;# v3 = r8..r15
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkshus v2, v2, v3          ;# v3 = 8-bit r0..r15
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2,  0, r10         ;# 2 rows to dst from buf
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r10)
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \write_first_four_pels
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(\Dst)
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .else
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwux   r0, \Dst, \Stride
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 4(r10)
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 4(\Dst)
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 8(r10)
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwux   r0, \Dst, \Stride       ;# advance dst to next row
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 12(r10)
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 4(\Dst)
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r3 = short *diff_ptr,
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r4 = unsigned char *pred_ptr,
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r5 = unsigned char *dst_ptr,
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r6 = int stride
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon2b_ppc:
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r0, 256                     ;# get old VRSAVE
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r0, r0, 0xf000
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256,r0                      ;# set VRSAVE
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vxor    v0, v0, v0
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r8, 16
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    la      r10, -48(r1)                ;# buf
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    two_rows_of8 r3, r4, r5, r6, 1
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r4, r4, 16;                 ;# next pred
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r3, r3, 32;                 ;# next diff
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    two_rows_of8 r3, r4, r5, r6, 0
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12                    ;# reset old VRSAVE
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro get_two_diff_rows
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r10)
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 4(r3)
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 4(r10)
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwzu    r0, 32(r3)
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 8(r10)
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 4(r3)
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 12(r10)
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v3, 0, r10
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r3 = short *diff_ptr,
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r4 = unsigned char *pred_ptr,
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r5 = unsigned char *dst_ptr,
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r6 = int stride
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon_b_ppc:
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r0, 256                     ;# get old VRSAVE
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, -8(r1)                  ;# save old VRSAVE to stack
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r0, r0, 0xf000
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256,r0                      ;# set VRSAVE
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vxor    v0, v0, v0
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    la      r10, -48(r1)    ;# buf
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r4)
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r10)
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 16(r4)
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 4(r10)
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 32(r4)
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 8(r10)
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 48(r4)
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 12(r10)
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v1,  0, r10;    ;# v1 = pred = p0..p15
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz r0, 0(r3)           ;# v3 = d0..d7
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    get_two_diff_rows
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v2, v0, v1;     ;# v2 = 16-bit p0..p7
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v2, v2, v3;     ;# v2 = r0..r7
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwzu r0, 32(r3)         ;# v3 = d8..d15
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    get_two_diff_rows
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglb  v1, v0, v1;     ;# v1 = 16-bit p8..p15
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v3, v3, v1;     ;# v3 = r8..r15
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkshus v2, v2, v3;     ;# v2 = 8-bit r0..r15
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2,  0, r10;    ;# 16 pels to dst from buf
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r10)
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r5)
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 4(r10)
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwux   r0, r5, r6
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 8(r10)
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwux   r0, r5, r6
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 12(r10)
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwx    r0, r5, r6
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r12, -8(r1)                 ;# restore old VRSAVE from stack
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12                    ;# reset old VRSAVE
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
176