1474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 2474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 4474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; Use of this source code is governed by a BSD-style license 5474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; that can be found in the LICENSE file in the root of the source 6474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; tree. An additional intellectual property rights grant can be found 7474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; in the file PATENTS. All contributing project authors may 8474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; be found in the AUTHORS file in the root of the source tree. 9474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org; 10474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 11474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 12474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl mbloop_filter_horizontal_edge_y_ppc 13474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_horizontal_edge_y_ppc 14474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl mbloop_filter_vertical_edge_y_ppc 15474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_vertical_edge_y_ppc 16474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 17474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl mbloop_filter_horizontal_edge_uv_ppc 18474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_horizontal_edge_uv_ppc 19474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl mbloop_filter_vertical_edge_uv_ppc 20474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_vertical_edge_uv_ppc 21474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 22474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_simple_horizontal_edge_ppc 23474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .globl loop_filter_simple_vertical_edge_ppc 24474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 25474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .text 26474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# We often need to perform transposes (and other transpose-like operations) 27474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# on matrices of data. This is simplified by the fact that we usually 28474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# operate on hunks of data whose dimensions are powers of 2, or at least 29474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# divisible by highish powers of 2. 30474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 31474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# These operations can be very confusing. They become more straightforward 32474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# when we think of them as permutations of address bits: Concatenate a 33474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# group of vector registers and think of it as occupying a block of 34474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# memory beginning at address zero. The low four bits 0...3 of the 35474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# address then correspond to position within a register, the higher-order 36474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# address bits select the register. 37474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 38474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Although register selection, at the code level, is arbitrary, things 39474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# are simpler if we use contiguous ranges of register numbers, simpler 40474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# still if the low-order bits of the register number correspond to 41474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# conceptual address bits. We do this whenever reasonable. 42474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 43474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# A 16x16 transpose can then be thought of as an operation on 44474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# a 256-element block of memory. It takes 8 bits 0...7 to address this 45474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# memory and the effect of a transpose is to interchange address bit 46474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 0 with 4, 1 with 5, 2 with 6, and 3 with 7. Bits 0...3 index the 47474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# column, which is interchanged with the row addressed by bits 4..7. 48474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 49474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# The altivec merge instructions provide a rapid means of effecting 50474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# many of these transforms. They operate at three widths (8,16,32). 51474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Writing V(x) for vector register #x, paired merges permute address 52474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# indices as follows. 53474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 54474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 0->1 1->2 2->3 3->(4+d) (4+s)->0: 55474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 56474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrghb V( x), V( y), V( y + (1<<s)) 57474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrglb V( x + (1<<d)), V( y), V( y + (1<<s)) 58474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 59474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 60474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# =0= 1->2 2->3 3->(4+d) (4+s)->1: 61474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 62474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrghh V( x), V( y), V( y + (1<<s)) 63474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrglh V( x + (1<<d)), V( y), V( y + (1<<s)) 64474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 65474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 66474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# =0= =1= 2->3 3->(4+d) (4+s)->2: 67474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 68474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrghw V( x), V( y), V( y + (1<<s)) 69474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrglw V( x + (1<<d)), V( y), V( y + (1<<s)) 70474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 71474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 72474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Unfortunately, there is no doubleword merge instruction. 73474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# The following sequence uses "vperm" is a substitute. 74474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Assuming that the selection masks b_hihi and b_lolo (defined in LFppc.c) 75474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# are in registers Vhihi and Vlolo, we can also effect the permutation 76474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 77474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# =0= =1= =2= 3->(4+d) (4+s)->3 by the sequence: 78474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 79474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vperm V( x), V( y), V( y + (1<<s)), Vhihi 80474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vperm V( x + (1<<d)), V( y), V( y + (1<<s)), Vlolo 81474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 82474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 83474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Except for bits s and d, the other relationships between register 84474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# number (= high-order part of address) bits are at the disposal of 85474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# the programmer. 86474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 87474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 88474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# To avoid excess transposes, we filter all 3 vertical luma subblock 89474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# edges together. This requires a single 16x16 transpose, which, in 90474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# the above language, amounts to the following permutation of address 91474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# indices: 0<->4 1<->5 2<->6 3<->7, which we accomplish by 92474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 4 iterations of the cyclic transform 0->1->2->3->4->5->6->7->0. 93474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 94474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Except for the fact that the destination registers get written 95474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# before we are done referencing the old contents, the cyclic transform 96474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# is effected by 97474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 98474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# x = 0; do { 99474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrghb V(2x), V(x), V(x+8); 100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vmrghb V(2x+1), V(x), V(x+8); 101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# } while( ++x < 8); 102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# For clarity, and because we can afford it, we do this transpose 104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# using all 32 registers, alternating the banks 0..15 and 16 .. 31, 105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# leaving the final result in 16 .. 31, as the lower registers are 106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# used in the filtering itself. 107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Tpair A, B, X, Y 109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb \A, \X, \Y 110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb \B, \X, \Y 111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Each step takes 8*2 = 16 instructions 114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_even 116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v16,v17, v0,v8 117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v18,v19, v1,v9 118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v20,v21, v2,v10 119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v22,v23, v3,v11 120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v24,v25, v4,v12 121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v26,v27, v5,v13 122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v28,v29, v6,v14 123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v30,v31, v7,v15 124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_odd 127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v0,v1, v16,v24 128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v2,v3, v17,v25 129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v4,v5, v18,v26 130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v6,v7, v19,v27 131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v8,v9, v20,v28 132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v10,v11, v21,v29 133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v12,v13, v22,v30 134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v14,v15, v23,v31 135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Whole transpose takes 4*16 = 64 instructions 138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t16_full 140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t16_odd 141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t16_even 142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t16_odd 143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t16_even 144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Vertical edge filtering requires transposes. For the simple filter, 147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# we need to convert 16 rows of 4 pels each into 4 registers of 16 pels 148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# each. Writing 0 ... 63 for the pixel indices, the desired result is: 149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v0 = 0 1 ... 14 15 151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v1 = 16 17 ... 30 31 152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v2 = 32 33 ... 47 48 153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v3 = 49 50 ... 62 63 154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# In frame-buffer memory, the layout is: 156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 0 16 32 48 158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 1 17 33 49 159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# ... 160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 15 31 47 63. 161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# We begin by reading the data 32 bits at a time (using scalar operations) 163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# into a temporary array, reading the rows of the array into vector registers, 164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# with the following layout: 165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v0 = 0 16 32 48 4 20 36 52 8 24 40 56 12 28 44 60 167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v1 = 1 17 33 49 5 21 ... 45 61 168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v2 = 2 18 ... 46 62 169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v3 = 3 19 ... 47 63 170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# From the "address-bit" perspective discussed above, we simply need to 172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# interchange bits 0 <-> 4 and 1 <-> 5, leaving bits 2 and 3 alone. 173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# In other words, we transpose each of the four 4x4 submatrices. 174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# This transformation is its own inverse, and we need to perform it 176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# again before writing the pixels back into the frame buffer. 177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# It acts in place on registers v0...v3, uses v4...v7 as temporaries, 179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# and assumes that v14/v15 contain the b_hihi/b_lolo selectors 180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# defined above. We think of both groups of 4 registers as having 181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# "addresses" {0,1,2,3} * 16. 182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Transpose4times4x4 Vlo, Vhi 184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# d=s=0 0->1 1->2 2->3 3->4 4->0 =5= 186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v4, v0, v1 188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v5, v0, v1 189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v6, v2, v3 190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v7, v2, v3 191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# d=0 s=1 =0= 1->2 2->3 3->4 4->5 5->1 193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghh v0, v4, v6 195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglh v1, v4, v6 196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghh v2, v5, v7 197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglh v3, v5, v7 198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# d=s=0 =0= =1= 2->3 3->4 4->2 =5= 200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghw v4, v0, v1 202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglw v5, v0, v1 203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghw v6, v2, v3 204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglw v7, v2, v3 205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# d=0 s=1 =0= =1= =2= 3->4 4->5 5->3 207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm v0, v4, v6, \Vlo 209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm v1, v4, v6, \Vhi 210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm v2, v5, v7, \Vlo 211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm v3, v5, v7, \Vhi 212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# end Transpose4times4x4 214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Normal mb vertical edge filter transpose. 217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# We read 8 columns of data, initially in the following pattern: 219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (0,0) (1,0) ... (7,0) (0,1) (1,1) ... (7,1) 221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (0,2) (1,2) ... (7,2) (0,3) (1,3) ... (7,3) 222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# ... 223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (0,14) (1,14) .. (7,14) (0,15) (1,15) .. (7,15) 224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# and wish to convert to: 226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (0,0) ... (0,15) 228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (1,0) ... (1,15) 229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# ... 230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# (7,0) ... (7,15). 231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# In "address bit" language, we wish to map 233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 0->4 1->5 2->6 3->0 4->1 5->2 6->3, i.e., I -> (I+4) mod 7. 235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# This can be accomplished by 4 iterations of the cyclic transform 237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# I -> (I+1) mod 7; 239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# each iteration can be realized by (d=0, s=2): 241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# x = 0; do Tpair( V(2x),V(2x+1), V(x),V(x+4)) while( ++x < 4); 243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# The input/output is in registers v0...v7. We use v10...v17 as mirrors; 245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# preserving v8 = sign converter. 246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Inverse transpose is similar, except here I -> (I+3) mod 7 and the 248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# result lands in the "mirror" registers v10...v17 249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# 250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t8x16_odd 251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v10, v11, v0, v4 252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v12, v13, v1, v5 253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v14, v15, v2, v6 254474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v16, v17, v3, v7 255474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 256474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 257474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro t8x16_even 258474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v0, v1, v10, v14 259474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v2, v3, v11, v15 260474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v4, v5, v12, v16 261474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Tpair v6, v7, v13, v17 262474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 263474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 264474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro transpose8x16_fwd 265474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_odd 266474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_even 267474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_odd 268474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_even 269474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 270474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 271474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro transpose8x16_inv 272474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_odd 273474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_even 274474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org t8x16_odd 275474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 276474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 277474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Transpose16x16 278474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v0, v16, v24 279474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v1, v16, v24 280474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v2, v17, v25 281474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v3, v17, v25 282474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v4, v18, v26 283474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v5, v18, v26 284474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v6, v19, v27 285474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v7, v19, v27 286474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v8, v20, v28 287474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v9, v20, v28 288474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v10, v21, v29 289474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v11, v21, v29 290474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v12, v22, v30 291474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v13, v22, v30 292474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v14, v23, v31 293474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v15, v23, v31 294474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v16, v0, v8 295474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v17, v0, v8 296474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v18, v1, v9 297474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v19, v1, v9 298474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v20, v2, v10 299474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v21, v2, v10 300474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v22, v3, v11 301474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v23, v3, v11 302474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v24, v4, v12 303474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v25, v4, v12 304474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v26, v5, v13 305474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v27, v5, v13 306474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v28, v6, v14 307474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v29, v6, v14 308474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v30, v7, v15 309474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v31, v7, v15 310474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v0, v16, v24 311474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v1, v16, v24 312474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v2, v17, v25 313474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v3, v17, v25 314474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v4, v18, v26 315474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v5, v18, v26 316474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v6, v19, v27 317474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v7, v19, v27 318474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v8, v20, v28 319474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v9, v20, v28 320474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v10, v21, v29 321474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v11, v21, v29 322474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v12, v22, v30 323474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v13, v22, v30 324474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v14, v23, v31 325474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v15, v23, v31 326474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v16, v0, v8 327474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v17, v0, v8 328474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v18, v1, v9 329474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v19, v1, v9 330474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v20, v2, v10 331474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v21, v2, v10 332474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v22, v3, v11 333474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v23, v3, v11 334474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v24, v4, v12 335474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v25, v4, v12 336474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v26, v5, v13 337474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v27, v5, v13 338474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v28, v6, v14 339474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v29, v6, v14 340474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghb v30, v7, v15 341474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglb v31, v7, v15 342474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 343474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 344474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# load_g loads a global vector (whose address is in the local variable Gptr) 345474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# into vector register Vreg. Trashes r0 346474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_g Vreg, Gptr 347474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, \Gptr 348474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \Vreg, 0, r0 349474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 350474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 351474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# exploit the saturation here. if the answer is negative 352474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# it will be clamped to 0. orring 0 with a positive 353474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# number will be the positive number (abs) 354474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# RES = abs( A-B), trashes TMP 355474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Abs RES, TMP, A, B 356474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsububs \RES, \A, \B 357474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsububs \TMP, \B, \A 358474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vor \RES, \RES, \TMP 359474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 360474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 361474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# RES = Max( RES, abs( A-B)), trashes TMP 362474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro max_abs RES, TMP, A, B 363474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsububs \TMP, \A, \B 364474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmaxub \RES, \RES, \TMP 365474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsububs \TMP, \B, \A 366474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmaxub \RES, \RES, \TMP 367474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 368474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 369474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Masks 370474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# build masks 371474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# input is all 8 bit unsigned (0-255). need to 372474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# do abs(vala-valb) > limit. but no need to compare each 373474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# value to the limit. find the max of the absolute differences 374474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# and compare that to the limit. 375474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# First hev 376474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v14, v13, v2, v3 ;# |P1 - P0| 377474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, v5, v4 ;# |Q1 - Q0| 378474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 379474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v10, v14, v10 ;# HEV = true if thresh exceeded 380474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 381474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# Next limit 382474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, v0, v1 ;# |P3 - P2| 383474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, v1, v2 ;# |P2 - P1| 384474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, v6, v5 ;# |Q2 - Q1| 385474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, v7, v6 ;# |Q3 - Q2| 386474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 387474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v9, v14, v9 ;# R = true if limit exceeded 388474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 389474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# flimit 390474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v14, v13, v3, v4 ;# |P0 - Q0| 391474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 392474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v8, v14, v8 ;# X = true if flimit exceeded 393474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 394474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vor v8, v8, v9 ;# R = true if flimit or limit exceeded 395474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# done building masks 396474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 397474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 398474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro build_constants RFL, RLI, RTH, FL, LI, TH 399474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# build constants 400474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \FL, 0, \RFL ;# flimit 401474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \LI, 0, \RLI ;# limit 402474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \TH, 0, \RTH ;# thresh 403474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 404474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v11, 8 405474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v12, 4 406474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 407474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 408474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 409474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_data_y 410474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# setup strides/pointers to be able to access 411474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# all of the data 412474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r5, r4, r4 ;# r5 = 2 * stride 413474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r6, r3, r5 ;# r6 -> 2 rows back 414474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg r7, r4 ;# r7 = -stride 415474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 416474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# load 16 pixels worth of data to work on 417474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r0, r6, r5 ;# r0 -> 4 rows back (temp) 418474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v0, 0, r0 ;# P3 (read only) 419474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v1, r7, r6 ;# P2 420474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v2, 0, r6 ;# P1 421474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v3, r7, r3 ;# P0 422474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v4, 0, r3 ;# Q0 423474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v5, r4, r3 ;# Q1 424474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v6, r5, r3 ;# Q2 425474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r0, r3, r5 ;# r0 -> 2 rows fwd (temp) 426474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v7, r4, r0 ;# Q3 (read only) 427474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 428474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 429474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Expects 430474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v10 == HEV 431474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v13 == tmp 432474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# v14 == tmp 433474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro common_adjust P0, Q0, P1, Q1, HEV_PRESENT 434474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \P1, \P1, v11 ;# SP1 435474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \P0, \P0, v11 ;# SP0 436474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \Q0, \Q0, v11 ;# SQ0 437474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \Q1, \Q1, v11 ;# SQ1 438474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 439474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v13, \P1, \Q1 ;# f = c (P1 - Q1) 440474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.if \HEV_PRESENT 441474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vand v13, v13, v10 ;# f &= hev 442474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endif 443474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v14, \Q0, \P0 ;# -126 <= X = Q0-P0 <= +126 444474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 445474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 446474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) 447474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 448474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vandc v13, v13, v8 ;# f &= mask 449474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 450474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v8, 3 451474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v9, 4 452474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 453474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v14, v13, v9 ;# f1 = c (f+4) 454474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v15, v13, v8 ;# f2 = c (f+3) 455474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 456474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v13, v14, v8 ;# f1 >>= 3 457474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v15, v15, v8 ;# f2 >>= 3 458474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 459474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs \Q0, \Q0, v13 ;# u1 = c (SQ0 - f1) 460474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs \P0, \P0, v15 ;# u2 = c (SP0 + f2) 461474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 462474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 463474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vp8_mbfilter 464474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Masks 465474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 466474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# start the fitering here 467474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v1, v1, v11 ;# SP2 468474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v2, v2, v11 ;# SP1 469474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v3, v3, v11 ;# SP0 470474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v4, v4, v11 ;# SQ0 471474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v5, v5, v11 ;# SQ1 472474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v6, v6, v11 ;# SQ2 473474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 474474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# add outer taps if we have high edge variance 475474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v13, v2, v5 ;# f = c (SP1-SQ1) 476474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 477474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v14, v4, v3 ;# SQ0-SP0 478474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 479474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 480474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v13, v13, v14 ;# f = c( c(SP1-SQ1) + 3*(SQ0-SP0)) 481474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 482474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vandc v13, v13, v8 ;# f &= mask 483474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vand v15, v13, v10 ;# f2 = f & hev 484474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 485474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# save bottom 3 bits so that we round one side +4 and the other +3 486474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v8, 3 487474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v9, 4 488474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 489474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v14, v15, v9 ;# f1 = c (f+4) 490474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v15, v15, v8 ;# f2 = c (f+3) 491474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 492474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v14, v14, v8 ;# f1 >>= 3 493474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v15, v15, v8 ;# f2 >>= 3 494474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 495474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v4, v4, v14 ;# u1 = c (SQ0 - f1) 496474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v3, v3, v15 ;# u2 = c (SP0 + f2) 497474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 498474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# only apply wider filter if not high edge variance 499474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vandc v13, v13, v10 ;# f &= ~hev 500474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 501474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v9, 2 502474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vnor v8, v8, v8 503474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrb v9, v8, v9 ;# 0x3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f3f 504474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vupkhsb v9, v9 ;# 0x003f003f003f003f003f003f003f003f 505474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v8, 9 506474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 507474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# roughly 1/7th difference across boundary 508474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltish v10, 7 509474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulosb v14, v8, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) 510474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulesb v15, v8, v13 511474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v14, v14, v9 ;# += 63 512474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v15, v15, v9 513474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v14, v14, v10 ;# >>= 7 514474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v15, v15, v10 515474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglh v10, v15, v14 516474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghh v15, v15, v14 517474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 518474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vpkshss v10, v15, v10 ;# X = saturated down to bytes 519474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 520474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v6, v6, v10 ;# subtract from Q and add to P 521474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v1, v1, v10 522474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 523474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v6, v6, v11 524474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v1, v1, v11 525474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 526474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# roughly 2/7th difference across boundary 527474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltish v10, 7 528474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddubm v12, v8, v8 529474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) 530474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulesb v15, v12, v13 531474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v14, v14, v9 532474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v15, v15, v9 533474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v14, v14, v10 ;# >>= 7 534474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v15, v15, v10 535474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglh v10, v15, v14 536474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghh v15, v15, v14 537474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 538474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vpkshss v10, v15, v10 ;# X = saturated down to bytes 539474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 540474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v5, v5, v10 ;# subtract from Q and add to P 541474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v2, v2, v10 542474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 543474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v5, v5, v11 544474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v2, v2, v11 545474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 546474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# roughly 3/7th difference across boundary 547474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltish v10, 7 548474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddubm v12, v12, v8 549474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulosb v14, v12, v13 ;# A = c( c(P1-Q1) + 3*(Q0-P0)) 550474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmulesb v15, v12, v13 551474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v14, v14, v9 552474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddshs v15, v15, v9 553474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v14, v14, v10 ;# >>= 7 554474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrah v15, v15, v10 555474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrglh v10, v15, v14 556474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmrghh v15, v15, v14 557474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 558474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vpkshss v10, v15, v10 ;# X = saturated down to bytes 559474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 560474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v4, v4, v10 ;# subtract from Q and add to P 561474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v3, v3, v10 562474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 563474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v4, v4, v11 564474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v3, v3, v11 565474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 566474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 567474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro SBFilter 568474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Masks 569474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 570474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org common_adjust v3, v4, v2, v5, 1 571474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 572474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# outer tap adjustments 573474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v8, 1 574474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 575474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddubm v13, v13, v8 ;# f += 1 576474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v13, v13, v8 ;# f >>= 1 577474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 578474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vandc v13, v13, v10 ;# f &= ~hev 579474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 580474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs v5, v5, v13 ;# u1 = c (SQ1 - f) 581474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs v2, v2, v13 ;# u2 = c (SP1 + f) 582474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 583474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v2, v2, v11 584474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v3, v3, v11 585474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v4, v4, v11 586474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v5, v5, v11 587474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 588474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 589474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 590474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_horizontal_edge_y_ppc: 591474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 592474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 593474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 594474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 595474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r5, r6, r7, v8, v9, v10 596474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 597474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_data_y 598474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 599474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_mbfilter 600474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 601474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v1, r7, r6 ;# P2 602474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v2, 0, r6 ;# P1 603474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v3, r7, r3 ;# P0 604474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v4, 0, r3 ;# Q0 605474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v5, r4, r3 ;# Q1 606474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v6, r5, r3 ;# Q2 607474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 608474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 609474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 610474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 611474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 612474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 613474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *s 614474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int p 615474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 const signed char *flimit 616474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *limit 617474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *thresh 618474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_horizontal_edge_y_ppc: 619474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 620474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 621474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 622474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 623474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r5, r6, r7, v8, v9, v10 624474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 625474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_data_y 626474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 627474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org SBFilter 628474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 629474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v2, 0, r6 ;# P1 630474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v3, r7, r3 ;# P0 631474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v4, 0, r3 ;# Q0 632474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v5, r4, r3 ;# Q1 633474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 634474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 635474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 636474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 637474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 638474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Filtering a vertical mb. Each mb is aligned on a 16 byte boundary. 639474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# So we can read in an entire mb aligned. However if we want to filter the mb 640474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# edge we run into problems. For the loopfilter we require 4 bytes before the mb 641474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# and 4 after for a total of 8 bytes. Reading 16 bytes inorder to get 4 is a bit 642474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# of a waste. So this is an even uglier way to get around that. 643474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Using the regular register file words are read in and then saved back out to 644474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# memory to align and order them up. Then they are read in using the 645474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# vector register file. 646474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RLVmb V, R 647474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r3, r4 648474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 4(\R) 649474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,-4(r3) 650474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 0(\R) 651474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r3, r4 652474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,12(\R) 653474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,-4(r3) 654474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 8(\R) 655474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \V, 0, \R 656474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 657474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 658474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WLVmb V, R 659474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx \V, 0, \R 660474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,12(\R) 661474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwux r0, r3, r4 662474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 8(\R) 663474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,-4(r3) 664474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 4(\R) 665474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwux r0, r3, r4 666474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 0(\R) 667474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,-4(r3) 668474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 669474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 670474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 671474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *s 672474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int p 673474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 const signed char *flimit 674474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *limit 675474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *thresh 676474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_vertical_edge_y_ppc: 677474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 678474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 679474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xc000 680474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 681474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 682474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r9, -48(r1) ;# temporary space for reading in vectors 683474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r3, r3, r4 684474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 685474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v0, r9 686474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v1, r9 687474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v2, r9 688474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v3, r9 689474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v4, r9 690474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v5, r9 691474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v6, r9 692474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLVmb v7, r9 693474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 694474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org transpose8x16_fwd 695474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 696474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r5, r6, r7, v8, v9, v10 697474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 698474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_mbfilter 699474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 700474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org transpose8x16_inv 701474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 702474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r3, r3, r4 703474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg r4, r4 704474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 705474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v17, r9 706474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v16, r9 707474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v15, r9 708474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v14, r9 709474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v13, r9 710474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v12, r9 711474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v11, r9 712474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLVmb v10, r9 713474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 714474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 715474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 716474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 717474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 718474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RL V, R, P 719474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \V, 0, \R 720474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add \R, \R, \P 721474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 722474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 723474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WL V, R, P 724474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx \V, 0, \R 725474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add \R, \R, \P 726474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 727474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 728474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro Fil P3, P2, P1, P0, Q0, Q1, Q2, Q3 729474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# K = |P0-P1| already 730474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v14, v13, \Q0, \Q1 ;# M = |Q0-Q1| 731474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmaxub v14, v14, v4 ;# M = max( |P0-P1|, |Q0-Q1|) 732474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v10, v14, v0 733474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 734474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v4, v5, \Q2, \Q3 ;# K = |Q2-Q3| = next |P0-P1] 735474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 736474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, \Q1, \Q2 ;# M = max( M, |Q1-Q2|) 737474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, \P1, \P2 ;# M = max( M, |P1-P2|) 738474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org max_abs v14, v13, \P2, \P3 ;# M = max( M, |P2-P3|) 739474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 740474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vmaxub v14, v14, v4 ;# M = max interior abs diff 741474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v9, v14, v2 ;# M = true if int_l exceeded 742474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 743474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v14, v13, \P0, \Q0 ;# X = Abs( P0-Q0) 744474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v8, v14, v3 ;# X = true if edge_l exceeded 745474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vor v8, v8, v9 ;# M = true if edge_l or int_l exceeded 746474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 747474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# replace P1,Q1 w/signed versions 748474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org common_adjust \P0, \Q0, \P1, \Q1, 1 749474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 750474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddubm v13, v13, v1 ;# -16 <= M <= 15, saturation irrelevant 751474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsrab v13, v13, v1 752474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vandc v13, v13, v10 ;# adjust P1,Q1 by (M+1)>>1 if ! hev 753474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vsubsbs \Q1, \Q1, v13 754474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vaddsbs \P1, \P1, v13 755474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 756474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \P1, \P1, v11 ;# P1 757474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \P0, \P0, v11 ;# P0 758474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \Q0, \Q0, v11 ;# Q0 759474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor \Q1, \Q1, v11 ;# Q1 760474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 761474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 762474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 763474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 764474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *s 765474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int p 766474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 const signed char *flimit 767474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *limit 768474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *thresh 769474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_vertical_edge_y_ppc: 770474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 771474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 772474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xffff 773474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 774474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 775474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org addi r9, r3, 0 776474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v16, r9, r4 777474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v17, r9, r4 778474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v18, r9, r4 779474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v19, r9, r4 780474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v20, r9, r4 781474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v21, r9, r4 782474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v22, r9, r4 783474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v23, r9, r4 784474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v24, r9, r4 785474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v25, r9, r4 786474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v26, r9, r4 787474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v27, r9, r4 788474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v28, r9, r4 789474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v29, r9, r4 790474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RL v30, r9, r4 791474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v31, 0, r9 792474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 793474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Transpose16x16 794474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 795474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v1, 1 796474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 797474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r5, r6, r7, v3, v2, v0 798474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 799474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v4, v5, v19, v18 ;# K(v14) = first |P0-P1| 800474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 801474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Fil v16, v17, v18, v19, v20, v21, v22, v23 802474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Fil v20, v21, v22, v23, v24, v25, v26, v27 803474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Fil v24, v25, v26, v27, v28, v29, v30, v31 804474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 805474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Transpose16x16 806474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 807474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org addi r9, r3, 0 808474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v16, r9, r4 809474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v17, r9, r4 810474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v18, r9, r4 811474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v19, r9, r4 812474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v20, r9, r4 813474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v21, r9, r4 814474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v22, r9, r4 815474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v23, r9, r4 816474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v24, r9, r4 817474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v25, r9, r4 818474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v26, r9, r4 819474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v27, r9, r4 820474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v28, r9, r4 821474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v29, r9, r4 822474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WL v30, r9, r4 823474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v31, 0, r9 824474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 825474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 826474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 827474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 828474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 829474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- UV FILTERING -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 830474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro active_chroma_sel V 831474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org andi. r7, r3, 8 ;# row origin modulo 16 832474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r7, r7, r7 ;# selects selectors 833474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lis r12, _chromaSelectors@ha 834474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r0, _chromaSelectors@l(r12) 835474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r7, r0 ;# leave selector addr in r7 836474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 837474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \V, 0, r0 ;# mask to concatenate active U,V pels 838474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 839474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 840474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hread_uv Dest, U, V, Offs, VMask 841474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \U, \Offs, r3 842474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \V, \Offs, r4 843474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm \Dest, \U, \V, \VMask ;# Dest = active part of U then V 844474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 845474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 846474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro hwrite_uv New, U, V, Offs, Umask, Vmask 847474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm \U, \New, \U, \Umask ;# Combine new pels with siblings 848474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vperm \V, \New, \V, \Vmask 849474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx \U, \Offs, r3 ;# Write to frame buffer 850474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx \V, \Offs, r4 851474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 852474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 853474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# Process U,V in parallel. 854474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro load_chroma_h 855474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg r9, r5 ;# r9 = -1 * stride 856474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r8, r9, r9 ;# r8 = -2 * stride 857474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r10, r5, r5 ;# r10 = 2 * stride 858474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 859474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org active_chroma_sel v12 860474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 861474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# P3, Q3 are read-only; need not save addresses or sibling pels 862474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r6, r8, r8 ;# r6 = -4 * stride 863474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v0, v14, v15, r6, v12 864474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r6, r10, r5 ;# r6 = 3 * stride 865474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v7, v14, v15, r6, v12 866474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 867474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# Others are read/write; save addresses and sibling pels 868474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 869474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r6, r8, r9 ;# r6 = -3 * stride 870474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v1, v16, v17, r6, v12 871474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v2, v18, v19, r8, v12 872474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v3, v20, v21, r9, v12 873474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v4, v22, v23, 0, v12 874474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v5, v24, v25, r5, v12 875474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hread_uv v6, v26, v27, r10, v12 876474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 877474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 878474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro uresult_sel V 879474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_g \V, 4(r7) 880474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 881474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 882474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vresult_sel V 883474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_g \V, 8(r7) 884474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 885474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 886474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# always write P1,P0,Q0,Q1 887474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro store_chroma_h 888474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org uresult_sel v11 889474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vresult_sel v12 890474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v2, v18, v19, r8, v11, v12 891474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v3, v20, v21, r9, v11, v12 892474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v4, v22, v23, 0, v11, v12 893474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v5, v24, v25, r5, v11, v12 894474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 895474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 896474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 897474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *u 898474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 unsigned char *v 899474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int p 900474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *flimit 901474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *limit 902474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 const signed char *thresh 903474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_horizontal_edge_uv_ppc: 904474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 905474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 906474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xffff 907474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 908474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 909474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r6, r7, r8, v8, v9, v10 910474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 911474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_chroma_h 912474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 913474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_mbfilter 914474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 915474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org store_chroma_h 916474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 917474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v1, v16, v17, r6, v11, v12 ;# v1 == P2 918474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org hwrite_uv v6, v26, v27, r10, v11, v12 ;# v6 == Q2 919474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 920474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 921474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 922474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 923474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 924474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 925474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *u 926474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 unsigned char *v 927474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int p 928474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *flimit 929474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *limit 930474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 const signed char *thresh 931474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_horizontal_edge_uv_ppc: 932474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 933474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 934474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xffff 935474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 936474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 937474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r6, r7, r8, v8, v9, v10 938474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 939474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org load_chroma_h 940474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 941474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org SBFilter 942474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 943474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org store_chroma_h 944474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 945474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 946474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 947474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 948474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 949474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro R V, R 950474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r3, r5 951474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 4(\R) 952474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,-4(r3) 953474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 0(\R) 954474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r4, r5 955474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,12(\R) 956474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,-4(r4) 957474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, 8(\R) 958474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx \V, 0, \R 959474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 960474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 961474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 962474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro W V, R 963474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx \V, 0, \R 964474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0,12(\R) 965474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwux r0, r4, r5 966474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 8(\R) 967474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,-4(r4) 968474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 4(\R) 969474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwux r0, r3, r5 970474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, 0(\R) 971474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0,-4(r3) 972474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 973474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 974474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro chroma_vread R 975474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r3, r3, r5 ;# back up one line for simplicity 976474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org sub r4, r4, r5 977474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 978474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v0, \R 979474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v1, \R 980474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v2, \R 981474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v3, \R 982474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v4, \R 983474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v5, \R 984474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v6, \R 985474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org R v7, \R 986474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 987474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org transpose8x16_fwd 988474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 989474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 990474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro chroma_vwrite R 991474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 992474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org transpose8x16_inv 993474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 994474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r3, r3, r5 995474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r4, r4, r5 996474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg r5, r5 ;# Write rows back in reverse order 997474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 998474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v17, \R 999474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v16, \R 1000474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v15, \R 1001474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v14, \R 1002474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v13, \R 1003474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v12, \R 1004474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v11, \R 1005474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org W v10, \R 1006474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 1007474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1008474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 1009474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *u 1010474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 unsigned char *v 1011474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int p 1012474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *flimit 1013474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *limit 1014474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 const signed char *thresh 1015474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgmbloop_filter_vertical_edge_uv_ppc: 1016474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 1017474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 1018474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xc000 1019474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 1020474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1021474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r9, -48(r1) ;# temporary space for reading in vectors 1022474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1023474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org chroma_vread r9 1024474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1025474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r6, r7, r8, v8, v9, v10 1026474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1027474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_mbfilter 1028474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1029474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org chroma_vwrite r9 1030474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1031474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 1032474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1033474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 1034474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1035474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 1036474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *u 1037474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 unsigned char *v 1038474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 int p 1039474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r6 const signed char *flimit 1040474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r7 const signed char *limit 1041474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r8 const signed char *thresh 1042474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_vertical_edge_uv_ppc: 1043474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 1044474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 1045474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xc000 1046474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 1047474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1048474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r9, -48(r1) ;# temporary space for reading in vectors 1049474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1050474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org chroma_vread r9 1051474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1052474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org build_constants r6, r7, r8, v8, v9, v10 1053474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1054474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org SBFilter 1055474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1056474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org chroma_vwrite r9 1057474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1058474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 1059474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1060474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 1061474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1062474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# -=-=-=-=-=-=-=-=-=-=-=-=-=-= SIMPLE LOOP FILTER =-=-=-=-=-=-=-=-=-=-=-=-=-=- 1063474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1064474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro vp8_simple_filter 1065474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Abs v14, v13, v1, v2 ;# M = abs( P0 - Q0) 1066474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vcmpgtub v8, v14, v8 ;# v5 = true if _over_ limit 1067474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1068474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# preserve unsigned v0 and v3 1069474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org common_adjust v1, v2, v0, v3, 0 1070474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1071474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v1, v1, v11 1072474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v2, v2, v11 ;# cvt Q0, P0 back to pels 1073474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 1074474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1075474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro simple_vertical 1076474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org addi r8, 0, 16 1077474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org addi r7, r5, 32 1078474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1079474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v0, 0, r5 1080474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v1, r8, r5 1081474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v2, 0, r7 1082474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v3, r8, r7 1083474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1084474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lis r12, _B_hihi@ha 1085474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r0, _B_hihi@l(r12) 1086474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v16, 0, r0 1087474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1088474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lis r12, _B_lolo@ha 1089474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r0, _B_lolo@l(r12) 1090474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v17, 0, r0 1091474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1092474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Transpose4times4x4 v16, v17 1093474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_simple_filter 1094474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1095474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v0, v0, v11 1096474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vxor v3, v3, v11 ;# cvt Q0, P0 back to pels 1097474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1098474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org Transpose4times4x4 v16, v17 1099474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1100474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v0, 0, r5 1101474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v1, r8, r5 1102474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v2, 0, r7 1103474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v3, r8, r7 1104474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 1105474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1106474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 1107474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *s 1108474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int p 1109474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 const signed char *flimit 1110474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_simple_horizontal_edge_ppc: 1111474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 1112474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 1113474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 1114474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1115474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# build constants 1116474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v8, 0, r5 ;# flimit 1117474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1118474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v11, 8 1119474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v12, 4 1120474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 1121474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1122474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org neg r5, r4 ;# r5 = -1 * stride 1123474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org add r6, r5, r5 ;# r6 = -2 * stride 1124474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1125474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v0, r6, r3 ;# v0 = P1 = 16 pels two rows above edge 1126474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v1, r5, r3 ;# v1 = P0 = 16 pels one row above edge 1127474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v2, 0, r3 ;# v2 = Q0 = 16 pels one row below edge 1128474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v3, r4, r3 ;# v3 = Q1 = 16 pels two rows below edge 1129474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1130474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vp8_simple_filter 1131474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1132474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v1, r5, r3 ;# store P0 1133474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stvx v2, 0, r3 ;# store Q0 1134474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1135474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 1136474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1137474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 1138474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1139474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro RLV Offs 1140474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, (\Offs*4)(r5) 1141474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzux r0, r7, r4 1142474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 1143474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1144474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.macro WLV Offs 1145474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwz r0, (\Offs*4)(r5) 1146474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwux r0, r7, r4 1147474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org.endm 1148474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1149474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 2 1150474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r3 unsigned char *s 1151474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r4 int p 1152474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org;# r5 const signed char *flimit 1153474eb7536515fb785e925cc9375d22817c416851hclam@chromium.orgloop_filter_simple_vertical_edge_ppc: 1154474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mfspr r11, 256 ;# get old VRSAVE 1155474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org oris r12, r11, 0xffff 1156474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ori r12, r12, 0xc000 1157474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r12 ;# set VRSAVE 1158474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1159474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# build constants 1160474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lvx v8, 0, r5 ;# flimit 1161474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1162474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v11, 8 1163474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vspltisb v12, 4 1164474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org vslb v11, v11, v12 ;# 0x80808080808080808080808080808080 1165474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1166474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org la r5, -96(r1) ;# temporary space for reading in vectors 1167474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1168474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# Store 4 pels at word "Offs" in temp array, then advance r7 1169474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# to next row and read another 4 pels from the frame buffer. 1170474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1171474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subi r7, r3, 2 ;# r7 -> 2 pels before start 1172474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzx r0, 0, r7 ;# read first 4 pels 1173474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1174474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# 16 unaligned word accesses 1175474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 0 1176474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 4 1177474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 8 1178474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 12 1179474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 1 1180474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 5 1181474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 9 1182474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 13 1183474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 2 1184474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 6 1185474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 10 1186474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 14 1187474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 3 1188474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 7 1189474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org RLV 11 1190474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1191474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stw r0, (15*4)(r5) ;# write last 4 pels 1192474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1193474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org simple_vertical 1194474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1195474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org ;# Read temp array, write frame buffer. 1196474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org subi r7, r3, 2 ;# r7 -> 2 pels before start 1197474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org lwzx r0, 0, r5 ;# read/write first 4 pels 1198474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org stwx r0, 0, r7 1199474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1200474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 4 1201474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 8 1202474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 12 1203474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 1 1204474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 5 1205474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 9 1206474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 13 1207474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 2 1208474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 6 1209474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 10 1210474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 14 1211474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 3 1212474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 7 1213474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 11 1214474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org WLV 15 1215474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1216474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org mtspr 256, r11 ;# reset old VRSAVE 1217474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1218474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org blr 1219474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1220474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .data 1221474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1222474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_chromaSelectors: 1223474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_hihi 1224474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_Ures0 1225474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_Vres0 1226474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long 0 1227474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_lolo 1228474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_Ures8 1229474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long _B_Vres8 1230474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .long 0 1231474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1232474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1233474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Vres8: 1234474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15 1235474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1236474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1237474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Ures8: 1238474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 1239474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1240474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1241474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_lolo: 1242474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 1243474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1244474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1245474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Vres0: 1246474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 1247474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1248474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_Ures0: 1249474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 1250474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org 1251474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .align 4 1252474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org_B_hihi: 1253474eb7536515fb785e925cc9375d22817c416851hclam@chromium.org .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 1254