190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl recon4b_ppc 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl recon2b_ppc 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl recon_b_ppc 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro row_of16 Diff Pred Dst Stride 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v1, 0, \Pred ;# v1 = pred = p0..p15 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi \Pred, \Pred, 16 ;# next pred 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v3, 0, \Diff ;# v3 = d0..d7 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v2, v2, v3 ;# v2 = r0..r7 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v3, r8, \Diff ;# v3 = d8..d15 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi \Diff, \Diff, 32 ;# next diff 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v3, v3, v1 ;# v3 = r8..r15 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, \Dst ;# to dst 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \Dst, \Dst, \Stride ;# next dst 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .text 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 = short *diff_ptr, 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 = unsigned char *pred_ptr, 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 = unsigned char *dst_ptr, 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 = int stride 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon4b_ppc: 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r0, 256 ;# get old VRSAVE 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, -8(r1) ;# save old VRSAVE to stack 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r0, r0, 0xf000 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256,r0 ;# set VRSAVE 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vxor v0, v0, v0 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r8, 16 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber row_of16 r3, r4, r5, r6 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber row_of16 r3, r4, r5, r6 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber row_of16 r3, r4, r5, r6 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber row_of16 r3, r4, r5, r6 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r12, -8(r1) ;# restore old VRSAVE from stack 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# reset old VRSAVE 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v1, 0, \Pred ;# v1 = pred = p0..p15 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v3, 0, \Diff ;# v3 = d0..d7 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v2, v2, v3 ;# v2 = r0..r7 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v3, r8, \Diff ;# v2 = d8..d15 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v3, v3, v1 ;# v3 = r8..r15 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, r10 ;# 2 rows to dst from buf 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r10) 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \write_first_four_pels 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(\Dst) 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .else 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwux r0, \Dst, \Stride 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 4(r10) 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 4(\Dst) 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 8(r10) 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwux r0, \Dst, \Stride ;# advance dst to next row 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 12(r10) 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 4(\Dst) 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 = short *diff_ptr, 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 = unsigned char *pred_ptr, 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 = unsigned char *dst_ptr, 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 = int stride 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon2b_ppc: 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r0, 256 ;# get old VRSAVE 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, -8(r1) ;# save old VRSAVE to stack 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r0, r0, 0xf000 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256,r0 ;# set VRSAVE 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vxor v0, v0, v0 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r8, 16 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber la r10, -48(r1) ;# buf 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber two_rows_of8 r3, r4, r5, r6, 1 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r4, r4, 16; ;# next pred 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r3, r3, 32; ;# next diff 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber two_rows_of8 r3, r4, r5, r6, 0 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r12, -8(r1) ;# restore old VRSAVE from stack 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# reset old VRSAVE 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro get_two_diff_rows 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r10) 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 4(r3) 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 4(r10) 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwzu r0, 32(r3) 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 8(r10) 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 4(r3) 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 12(r10) 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v3, 0, r10 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 = short *diff_ptr, 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 = unsigned char *pred_ptr, 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 = unsigned char *dst_ptr, 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 = int stride 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberrecon_b_ppc: 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r0, 256 ;# get old VRSAVE 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, -8(r1) ;# save old VRSAVE to stack 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r0, r0, 0xf000 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256,r0 ;# set VRSAVE 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vxor v0, v0, v0 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber la r10, -48(r1) ;# buf 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r4) 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r10) 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 16(r4) 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 4(r10) 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 32(r4) 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 8(r10) 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 48(r4) 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 12(r10) 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v1, 0, r10; ;# v1 = pred = p0..p15 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r3) ;# v3 = d0..d7 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber get_two_diff_rows 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v2, v2, v3; ;# v2 = r0..r7 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwzu r0, 32(r3) ;# v3 = d8..d15 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber get_two_diff_rows 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v3, v3, v1; ;# v3 = r8..r15 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, r10; ;# 16 pels to dst from buf 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r10) 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r5) 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 4(r10) 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwux r0, r5, r6 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 8(r10) 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwux r0, r5, r6 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 12(r10) 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwx r0, r5, r6 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r12, -8(r1) ;# restore old VRSAVE from stack 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# reset old VRSAVE 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 176