1bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; 2bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3f5256e16dfc425c1d466f6308d4026d529ce9e0bHoward Hinnant; 4bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; Use of this source code is governed by a BSD-style license 5b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant; that can be found in the LICENSE file in the root of the source 6b64f8b07c104c6cc986570ac8ee0ed16a9f23976Howard Hinnant; tree. An additional intellectual property rights grant can be found 7bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; in the file PATENTS. All contributing project authors may 8bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; be found in the AUTHORS file in the root of the source tree. 9bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant; 10bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 11bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 12bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant .globl recon4b_ppc 13bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant .globl recon2b_ppc 14bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant .globl recon_b_ppc 15bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 16bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.macro row_of16 Diff Pred Dst Stride 17bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lvx v1, 0, \Pred ;# v1 = pred = p0..p15 18bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant addi \Pred, \Pred, 16 ;# next pred 19bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 20bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lvx v3, 0, \Diff ;# v3 = d0..d7 21bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vaddshs v2, v2, v3 ;# v2 = r0..r7 22da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 23da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow lvx v3, r8, \Diff ;# v3 = d8..d15 24da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow addi \Diff, \Diff, 32 ;# next diff 25da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow vaddshs v3, v3, v1 ;# v3 = r8..r15 26da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow vpkshus v2, v2, v3 ;# v2 = 8-bit r0..r15 27da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow stvx v2, 0, \Dst ;# to dst 28da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow add \Dst, \Dst, \Stride ;# next dst 29da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endm 30da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow 31da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow .text 32da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow .align 2 33da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;# r3 = short *diff_ptr, 34da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;# r4 = unsigned char *pred_ptr, 35da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow;# r5 = unsigned char *dst_ptr, 36bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;# r6 = int stride 37bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnantrecon4b_ppc: 38bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant mfspr r0, 256 ;# get old VRSAVE 39bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant stw r0, -8(r1) ;# save old VRSAVE to stack 40bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant oris r0, r0, 0xf000 41bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant mtspr 256,r0 ;# set VRSAVE 42bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 43bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vxor v0, v0, v0 44bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant li r8, 16 45bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 46bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant row_of16 r3, r4, r5, r6 47bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant row_of16 r3, r4, r5, r6 48bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant row_of16 r3, r4, r5, r6 49bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant row_of16 r3, r4, r5, r6 50bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 51bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lwz r12, -8(r1) ;# restore old VRSAVE from stack 52bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant mtspr 256, r12 ;# reset old VRSAVE 53bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 54bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant blr 55bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant 56bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.macro two_rows_of8 Diff Pred Dst Stride write_first_four_pels 57bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lvx v1, 0, \Pred ;# v1 = pred = p0..p15 58bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vmrghb v2, v0, v1 ;# v2 = 16-bit p0..p7 59bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lvx v3, 0, \Diff ;# v3 = d0..d7 60bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vaddshs v2, v2, v3 ;# v2 = r0..r7 61bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vmrglb v1, v0, v1 ;# v1 = 16-bit p8..p15 62bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lvx v3, r8, \Diff ;# v2 = d8..d15 63bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vaddshs v3, v3, v1 ;# v3 = r8..r15 64bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant vpkshus v2, v2, v3 ;# v3 = 8-bit r0..r15 65bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant stvx v2, 0, r10 ;# 2 rows to dst from buf 66bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant lwz r0, 0(r10) 67bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant.if \write_first_four_pels 68bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant stw r0, 0(\Dst) 69bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant .else 70da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow stwux r0, \Dst, \Stride 71da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endif 72da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow lwz r0, 4(r10) 73da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow stw r0, 4(\Dst) 74da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow lwz r0, 8(r10) 75da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow stwux r0, \Dst, \Stride ;# advance dst to next row 76da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow lwz r0, 12(r10) 77da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow stw r0, 4(\Dst) 78da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow.endm 79da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow 80da0a0e8a1be285d18870955cffc1b51982675dd8Marshall Clow .align 2 81bc8d3f97eb5c958007f2713238472e0c1c8fe02Howard Hinnant;# r3 = short *diff_ptr, 82;# r4 = unsigned char *pred_ptr, 83;# r5 = unsigned char *dst_ptr, 84;# r6 = int stride 85 86recon2b_ppc: 87 mfspr r0, 256 ;# get old VRSAVE 88 stw r0, -8(r1) ;# save old VRSAVE to stack 89 oris r0, r0, 0xf000 90 mtspr 256,r0 ;# set VRSAVE 91 92 vxor v0, v0, v0 93 li r8, 16 94 95 la r10, -48(r1) ;# buf 96 97 two_rows_of8 r3, r4, r5, r6, 1 98 99 addi r4, r4, 16; ;# next pred 100 addi r3, r3, 32; ;# next diff 101 102 two_rows_of8 r3, r4, r5, r6, 0 103 104 lwz r12, -8(r1) ;# restore old VRSAVE from stack 105 mtspr 256, r12 ;# reset old VRSAVE 106 107 blr 108 109.macro get_two_diff_rows 110 stw r0, 0(r10) 111 lwz r0, 4(r3) 112 stw r0, 4(r10) 113 lwzu r0, 32(r3) 114 stw r0, 8(r10) 115 lwz r0, 4(r3) 116 stw r0, 12(r10) 117 lvx v3, 0, r10 118.endm 119 120 .align 2 121;# r3 = short *diff_ptr, 122;# r4 = unsigned char *pred_ptr, 123;# r5 = unsigned char *dst_ptr, 124;# r6 = int stride 125recon_b_ppc: 126 mfspr r0, 256 ;# get old VRSAVE 127 stw r0, -8(r1) ;# save old VRSAVE to stack 128 oris r0, r0, 0xf000 129 mtspr 256,r0 ;# set VRSAVE 130 131 vxor v0, v0, v0 132 133 la r10, -48(r1) ;# buf 134 135 lwz r0, 0(r4) 136 stw r0, 0(r10) 137 lwz r0, 16(r4) 138 stw r0, 4(r10) 139 lwz r0, 32(r4) 140 stw r0, 8(r10) 141 lwz r0, 48(r4) 142 stw r0, 12(r10) 143 144 lvx v1, 0, r10; ;# v1 = pred = p0..p15 145 146 lwz r0, 0(r3) ;# v3 = d0..d7 147 148 get_two_diff_rows 149 150 vmrghb v2, v0, v1; ;# v2 = 16-bit p0..p7 151 vaddshs v2, v2, v3; ;# v2 = r0..r7 152 153 lwzu r0, 32(r3) ;# v3 = d8..d15 154 155 get_two_diff_rows 156 157 vmrglb v1, v0, v1; ;# v1 = 16-bit p8..p15 158 vaddshs v3, v3, v1; ;# v3 = r8..r15 159 160 vpkshus v2, v2, v3; ;# v2 = 8-bit r0..r15 161 stvx v2, 0, r10; ;# 16 pels to dst from buf 162 163 lwz r0, 0(r10) 164 stw r0, 0(r5) 165 lwz r0, 4(r10) 166 stwux r0, r5, r6 167 lwz r0, 8(r10) 168 stwux r0, r5, r6 169 lwz r0, 12(r10) 170 stwx r0, r5, r6 171 172 lwz r12, -8(r1) ;# restore old VRSAVE from stack 173 mtspr 256, r12 ;# reset old VRSAVE 174 175 blr 176