190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl bilinear_predict4x4_ppc 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl bilinear_predict8x4_ppc 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl bilinear_predict8x8_ppc 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl bilinear_predict16x16_ppc 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_c V, LABEL, OFF, R0, R1 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lis \R0, \LABEL@ha 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber la \R1, \LABEL@l(\R0) 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx \V, \OFF, \R1 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_vfilter V0, V1 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c \V0, vfilter_b, r6, r9, r10 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r6, r6, 16 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx \V1, r6, r10 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro HProlog jump_label 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up horizontal filter 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r5, r5, 4 ;# index into horizontal filter array 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# index to the next set of vectors in the row. 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r12, 32 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# downshift by 7 ( divide by 128 ) at the end 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 7 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# If there isn't any filtering to be done for the horizontal, then 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# just skip to the second pass. 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq \jump_label 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v20, hfilter_b, r5, r9, r0 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# setup constants 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# v14 permutation value for alignment 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v28, b_hperm_b, 0, r9, r0 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# rounding added in on the multiply 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v21, 8 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v18, 3 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects: 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 src_ptr 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 pitch 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r10 16 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r12 32 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v17 perm intput 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v18 rounding 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v19 shift 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v20 filter taps 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v21 tmp 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v22 tmp 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v23 tmp 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v24 tmp 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v25 tmp 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v26 tmp 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v27 tmp 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v28 perm output 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro HFilter V 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v24, v21, v21, v10 ;# v20 = 0123 1234 2345 3456 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v25, v21, v21, v11 ;# v21 = 4567 5678 6789 789A 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v24, v20, v24, v18 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v25, v20, v25, v18 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v24, v24, v19 ;# divide v0, v1 by 128 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_8 V, increment_counter 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 9 bytes wide, output is 8 bytes. 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v21, v21, v22, v17 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HFilter \V 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_and_align_8 V, increment_counter 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 21 bytes wide, output is 16 bytes. 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input will can span three vectors if not aligned correctly. 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, v21, v22, v17 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro write_aligned_8 V, increment_counter 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx \V, 0, r7 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r7, r7, r8 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro vfilter_16 P0 P1 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuleub v22, \P0, v20 ;# 64 + 4 positive taps 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v22, v18, v22 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuloub v23, \P0, v20 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v23, v18, v23 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuleub v24, \P1, v21 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuloub v25, \P1, v21 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v23, v23, v25 ;# Ro = odds 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v22, v22, v19 ;# divide by 128 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v23, v23, v19 ;# v16 v17 = evens, odds 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglh v23, v22, v23 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro w_8x8 V, D, R, P 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx \V, 0, r1 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz \R, 0(r1) 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw \R, 0(r7) 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz \R, 4(r1) 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw \R, 4(r7) 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \D, \D, \P 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict4x4_ppc: 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xf830 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_4x4_pre_copy_b 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_0123_b, 0, r9, r12 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, b_4567_b, 0, r9, r12 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, 1 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, 1 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, 1 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, 1 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq store_out_4x4_b 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, 0 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_4x4_b 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_pre_copy_b: 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi r6, r6, 5 ;# index into vertical filter array 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v0, 1 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v1, 1 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v2, 1 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v3, 1 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v4, 1 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_b: 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_4x4_b: 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v0, 0, r1 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r1) 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r7) 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r7, r7, r8 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v1, 0, r1 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r1) 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r7) 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r7, r7, r8 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, r1 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r1) 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r7) 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r7, r7, r8 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v3, 0, r1 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(r1) 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r7) 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_4x4: 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict8x4_ppc: 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xf830 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_8x4_pre_copy_b 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_0123_b, 0, r9, r12 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, b_4567_b, 0, r9, r12 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, 1 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, 1 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, 1 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, 1 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq store_out_8x4_b 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, 0 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_8x4_b 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x4_pre_copy_b: 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi r6, r6, 5 ;# index into vertical filter array 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v0, 1 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v1, 1 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v2, 1 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v3, 1 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v4, 1 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x4_b: 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_8x4_b: 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmpi cr0, r8, 8 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq cr0, store_aligned_8x4_b 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v0, r7, r0, r8 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v1, r7, r0, r8 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v2, r7, r0, r8 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v3, r7, r0, r8 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b exit_8x4 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_aligned_8x4_b: 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_hilo_b, 0, r9, r10 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v0, v0, v1, v10 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v2, v2, v3, v10 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v0, 0, r7 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r7, r7, 16 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, r7 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_8x4: 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict8x8_ppc: 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xfff0 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xffff 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_8x8_pre_copy_b 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_0123_b, 0, r9, r12 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, b_4567_b, 0, r9, r12 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, 1 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, 1 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, 1 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, 1 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, 1 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v5, 1 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v6, 1 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v7, 1 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq store_out_8x8_b 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v8, 0 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_8x8_b 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_pre_copy_b: 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi r6, r6, 5 ;# index into vertical filter array 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v0, 1 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v1, 1 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v2, 1 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v3, 1 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v4, 1 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v5, 1 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v6, 1 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v7, 1 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_8 v8, 0 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_b: 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_8x8_b: 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cmpi cr0, r8, 8 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq cr0, store_aligned_8x8_b 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v0, r7, r0, r8 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v1, r7, r0, r8 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v2, r7, r0, r8 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v3, r7, r0, r8 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v4, r7, r0, r8 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v5, r7, r0, r8 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v6, r7, r0, r8 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber w_8x8 v7, r7, r0, r8 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b exit_8x8 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_aligned_8x8_b: 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_hilo_b, 0, r9, r10 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v0, v0, v1, v10 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v2, v2, v3, v10 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v4, v4, v5, v10 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v6, v6, v7, v10 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v0, 0, r7 42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r7, r7, 16 42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v2, 0, r7 42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r7, r7, 16 42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v4, 0, r7 42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r7, r7, 16 43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v6, 0, r7 43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_8x8: 43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line 44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects: 44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 src_ptr 44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 pitch 44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r10 16 44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r12 32 44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v17 perm intput 44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v18 rounding 44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v19 shift 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v20 filter taps 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v21 tmp 45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v22 tmp 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v23 tmp 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v24 tmp 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v25 tmp 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v26 tmp 45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v27 tmp 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v28 perm output 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_16 V, increment_counter 45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 21 bytes wide, output is 16 bytes. 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input will can span three vectors if not aligned correctly. 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v23, r12, r3 46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v21, v21, v22, v17 47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified 47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 0 47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v24, v20, v21, v18 ;# taps times elements 47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 1 47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 1 47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v25, v20, v23, v18 48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 2 48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 2 48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v26, v20, v23, v18 48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 3 48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 3 48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v27, v20, v23, v18 48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) 49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F 49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v24, v24, v19 ;# divide v0, v1 by 128 49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v25, v25, v19 49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, \V, v0, v28 ;# \V = correctly-ordered result 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_and_align_16 V, increment_counter 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 21 bytes wide, output is 16 bytes. 50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input will can span three vectors if not aligned correctly. 50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, v21, v22, v17 51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro write_16 V, increment_counter 51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx \V, 0, r7 51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r7, r7, r8 51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src 52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch 52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset 52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset 52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst 52890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch 52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict16x16_ppc: 53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 53190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xffff 53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_16x16_pre_copy_b 53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v0, 1 53890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v1, 1 53990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v2, 1 54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v3, 1 54190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v4, 1 54290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v5, 1 54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v6, 1 54490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v7, 1 54590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v8, 1 54690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v9, 1 54790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v10, 1 54890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v11, 1 54990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v12, 1 55090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v13, 1 55190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v14, 1 55290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v15, 1 55390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 55490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 55590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 55690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 55790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 55890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq store_out_16x16_b 55990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v16, 0 56190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_16x16_b 56390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_pre_copy_b: 56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi r6, r6, 5 ;# index into vertical filter array 56690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, 1 56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, 1 56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, 1 57090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, 1 57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, 1 57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, 1 57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, 1 57490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, 1 57590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, 1 57690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, 1 57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, 1 57890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, 1 57990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v12, 1 58090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v13, 1 58190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v14, 1 58290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v15, 1 58390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v16, 0 58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_b: 58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 59590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 59890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 59990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v8, v9 60190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v9, v10 60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v10, v11 60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v11, v12 60490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v12, v13 60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v13, v14 60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v14, v15 60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v15, v16 60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_16x16_b: 61090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v0, 1 61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v1, 1 61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v2, 1 61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v3, 1 61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v4, 1 61690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v5, 1 61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v6, 1 61890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v7, 1 61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v8, 1 62090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v9, 1 62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v10, 1 62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v11, 1 62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v12, 1 62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v13, 1 62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v14, 1 62690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber write_16 v15, 0 62790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 62990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 63190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .data 63390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 63590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberhfilter_b: 63690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 63790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 63890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 63990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 64090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 64190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 64290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 64390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 64490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 64690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervfilter_b: 64790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 64890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 64990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 65090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 65190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 65290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 65390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 65490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 65590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 65690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 65790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 65890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 65990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 66090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 66190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 66290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 66390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 66590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hperm_b: 66690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 66790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 66990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_0123_b: 67090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 67190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 67290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 67390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_4567_b: 67490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 67590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 67690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hilo_b: 67790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 678