190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_sub_pixel_variance4x4_ppc 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_sub_pixel_variance8x8_ppc 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_sub_pixel_variance8x16_ppc 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_sub_pixel_variance16x8_ppc 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_sub_pixel_variance16x16_ppc 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_c V, LABEL, OFF, R0, R1 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lis \R0, \LABEL@ha 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber la \R1, \LABEL@l(\R0) 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx \V, \OFF, \R1 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_vfilter V0, V1 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c \V0, vfilter_b, r6, r12, r10 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r6, r6, 16 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx \V1, r6, r10 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro HProlog jump_label 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up horizontal filter 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r5, r5, 4 ;# index into horizontal filter array 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# index to the next set of vectors in the row. 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# downshift by 7 ( divide by 128 ) at the end 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 7 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# If there isn't any filtering to be done for the horizontal, then 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# just skip to the second pass. 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq \jump_label 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v20, hfilter_b, r5, r12, r0 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# setup constants 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# v14 permutation value for alignment 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v28, b_hperm_b, 0, r12, r0 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# index to the next set of vectors in the row. 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r12, 32 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# rounding added in on the multiply 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v21, 8 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v18, 3 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslw v18, v21, v18 ;# 0x00000040000000400000004000000040 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects: 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 src_ptr 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 pitch 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r10 16 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r12 32 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v17 perm intput 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v18 rounding 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v19 shift 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v20 filter taps 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v21 tmp 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v22 tmp 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v23 tmp 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v24 tmp 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v25 tmp 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v26 tmp 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v27 tmp 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v28 perm output 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_8 V, hp, lp, increment_counter 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 9 bytes wide, output is 8 bytes. 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v21, v21, v22, v17 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v24, v21, v21, \hp ;# v20 = 0123 1234 2345 3456 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v25, v21, v21, \lp ;# v21 = 4567 5678 6789 789A 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v24, v20, v24, v18 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v25, v20, v25, v18 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v24, v24, v19 ;# divide v0, v1 by 128 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \V, v24, v24 ;# \V = scrambled 8-bit result 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro vfilter_16 P0 P1 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuleub v22, \P0, v20 ;# 64 + 4 positive taps 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v22, v18, v22 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuloub v23, \P0, v20 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v23, v18, v23 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuleub v24, \P1, v21 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v22, v22, v24 ;# Re = evens, saturation unnecessary 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmuloub v25, \P1, v21 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vadduhm v23, v23, v25 ;# Ro = odds 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v22, v22, v19 ;# divide by 128 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v23, v23, v19 ;# v16 v17 = evens, odds 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghh \P0, v22, v23 ;# v18 v19 = 16-bit result in order 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglh v23, v22, v23 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \P0, \P0, v23 ;# P0 = 8-bit result 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro compute_sum_sse src, ref, sum, sse, t1, t2, z0 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Compute sum first. Unpack to so signed subract 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# can be used. Only have a half word signed 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# subract. Do high, then low. 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb \t1, \z0, \src 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb \t2, \z0, \ref 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubshs \t1, \t1, \t2 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsum4shs \sum, \t1, \sum 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb \t1, \z0, \src 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb \t2, \z0, \ref 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubshs \t1, \t1, \t2 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsum4shs \sum, \t1, \sum 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Now compute sse. 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs \t1, \src, \ref 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs \t2, \ref, \src 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vor \t1, \t1, \t2 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsumubm \sse, \t1, \t1, \sse 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro variance_final sum, sse, z0, DS 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws \sum, \sum, \z0 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws \sse, \sse, \z0 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx \sum, 0, r1 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx \sse, 0, r1 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r4, 12(r1) 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r4, 0(r9) ;# sse 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mullw r3, r3, r3 ;# sum*sum 1601b362b15af34006e6a11974088a46d42b903418eJohann srlwi r3, r3, \DS ;# (sum*sum) >> 8 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro compute_sum_sse_16 V, increment_counter 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v16, r7, r8, \increment_counter 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse \V, v16, v18, v19, v20, v21, v23 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_and_align_16 V, R, P, increment_counter 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, \R ;# permutate value for alignment 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 21 bytes wide, output is 16 bytes. 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input will can span three vectors if not aligned correctly. 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, \R 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, \R 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \R, \R, \P 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, v21, v22, v17 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pixels_per_line 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int xoffset 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int yoffset 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char *dst_ptr 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pixels_per_line 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r9 unsigned int *sse 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sub_pixel_variance4x4_ppc: 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xf830 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_4x4_pre_copy_b 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_0123_b, 0, r12, r0 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, b_4567_b, 0, r12, r0 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, v10, v11, 1 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, v10, v11, 1 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, v10, v11, 1 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, v10, v11, 1 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_4x4_b 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, v10, v11, 0 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_4x4_b 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_pre_copy_b: 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi r6, r6, 5 ;# index into vertical filter array 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, r3, r4, 1 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, r3, r4, 1 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, r3, r4, 1 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, r3, r4, 1 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r3, r4, 0 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_b: 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercompute_sum_sse_4x4_b: 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 0 ;# sum 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 0 ;# sse 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v23, 0 ;# unpack 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r7, r8, 1 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r7, r8, 1 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r7, r8, 1 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r7, r8, 1 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v0, v0, v1 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v1, v2, v3 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v4, v5 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v3, v6, v7 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_hilo_b, 0, r12, r0 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v0, v0, v1, v10 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v1, v2, v3, v10 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v0, v1, v18, v19, v20, v21, v23 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_final v18, v19, v23, 4 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pixels_per_line 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int xoffset 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int yoffset 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char *dst_ptr 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pixels_per_line 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r9 unsigned int *sse 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sub_pixel_variance8x8_ppc: 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xfff0 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xffff 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_8x8_pre_copy_b 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, b_0123_b, 0, r12, r0 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, b_4567_b, 0, r12, r0 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, v10, v11, 1 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, v10, v11, 1 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, v10, v11, 1 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, v10, v11, 1 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, v10, v11, 1 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v5, v10, v11, 1 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v6, v10, v11, 1 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v7, v10, v11, 1 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_8x8_b 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v8, v10, v11, 0 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_8x8_b 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_pre_copy_b: 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, r3, r4, 1 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, r3, r4, 1 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, r3, r4, 1 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, r3, r4, 1 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r3, r4, 1 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r3, r4, 1 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r3, r4, 1 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r3, r4, 1 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r3, r4, 0 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_8x8_b 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_b: 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercompute_sum_sse_8x8_b: 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 0 ;# sum 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 0 ;# sse 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v23, 0 ;# unpack 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v0, v0, v1 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v1, v2, v3 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v4, v5 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v3, v6, v7 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r7, r8, 1 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r7, r8, 1 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r7, r8, 1 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r7, r8, 1 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r7, r8, 1 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, r7, r8, 1 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, r7, r8, 1 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, r7, r8, 0 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v4, v4, v5 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v5, v6, v7 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v6, v8, v9 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v7, v10, v11 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v0, v4, v18, v19, v20, v21, v23 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v1, v5, v18, v19, v20, v21, v23 37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v2, v6, v18, v19, v20, v21, v23 37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v3, v7, v18, v19, v20, v21, v23 37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_final v18, v19, v23, 6 38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pixels_per_line 38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int xoffset 38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int yoffset 39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char *dst_ptr 39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pixels_per_line 39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r9 unsigned int *sse 39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sub_pixel_variance8x16_ppc: 39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xffff 39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfffc 39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-32(r1) ;# create space on the stack 40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_8x16_pre_copy_b 40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Load up permutation constants 40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v29, b_0123_b, 0, r12, r0 40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v30, b_4567_b, 0, r12, r0 40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v0, v29, v30, 1 41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v1, v29, v30, 1 41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v2, v29, v30, 1 41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v3, v29, v30, 1 41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v4, v29, v30, 1 41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v5, v29, v30, 1 41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v6, v29, v30, 1 41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v7, v29, v30, 1 41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v8, v29, v30, 1 41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v9, v29, v30, 1 41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v10, v29, v30, 1 42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v11, v29, v30, 1 42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v12, v29, v30, 1 42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v13, v29, v30, 1 42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v14, v29, v30, 1 42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v15, v29, v30, 1 42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_8x16_b 43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_8 v16, v29, v30, 0 43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_8x16_b 43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x16_pre_copy_b: 43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, r3, r4, 1 44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, r3, r4, 1 44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, r3, r4, 1 44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, r3, r4, 1 44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r3, r4, 1 44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r3, r4, 1 44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r3, r4, 1 44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r3, r4, 1 44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r3, r4, 1 44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, r3, r4, 1 44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, r3, r4, 1 45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, r3, r4, 1 45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v12, r3, r4, 1 45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v13, r3, r4, 1 45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v14, r3, r4, 1 45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v15, r3, r4, 1 45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v16, r3, r4, 0 45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_8x16_b 45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x16_b: 46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v8, v9 47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v9, v10 47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v10, v11 47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v11, v12 47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v12, v13 47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v13, v14 48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v14, v15 48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v15, v16 48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercompute_sum_sse_8x16_b: 48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 0 ;# sum 48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 0 ;# sse 48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v23, 0 ;# unpack 48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v0, v0, v1 49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v1, v2, v3 49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v4, v5 49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v3, v6, v7 49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v4, v8, v9 49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v5, v10, v11 49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v6, v12, v13 49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v7, v14, v15 49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r7, r8, 1 49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, r7, r8, 1 50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, r7, r8, 1 50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, r7, r8, 1 50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v12, r7, r8, 1 50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v13, r7, r8, 1 50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v14, r7, r8, 1 50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v15, r7, r8, 1 50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v8, v8, v9 50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v9, v10, v11 50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v10, v12, v13 51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v11, v14, v15 51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v0, v8, v18, v19, v20, v21, v23 51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v1, v9, v18, v19, v20, v21, v23 51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v2, v10, v18, v19, v20, v21, v23 51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v3, v11, v18, v19, v20, v21, v23 51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r7, r8, 1 51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, r7, r8, 1 51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, r7, r8, 1 52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, r7, r8, 1 52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v12, r7, r8, 1 52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v13, r7, r8, 1 52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v14, r7, r8, 1 52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v15, r7, r8, 0 52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v8, v8, v9 52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v9, v10, v11 52890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v10, v12, v13 52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v11, v14, v15 53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v4, v8, v18, v19, v20, v21, v23 53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v5, v9, v18, v19, v20, v21, v23 53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v6, v10, v18, v19, v20, v21, v23 53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse v7, v11, v18, v19, v20, v21, v23 53590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_final v18, v19, v23, 7 53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 53890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 53990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 54190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 54290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line 54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects: 54490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 src_ptr 54590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 pitch 54690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r10 16 54790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r12 32 54890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v17 perm intput 54990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v18 rounding 55090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v19 shift 55190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v20 filter taps 55290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v21 tmp 55390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v22 tmp 55490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v23 tmp 55590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v24 tmp 55690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v25 tmp 55790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v26 tmp 55890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v27 tmp 55990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# v28 perm output 56090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 56190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_16 V, increment_counter 56290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v17, 0, r3 ;# permutate value for alignment 56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input to filter is 21 bytes wide, output is 16 bytes. 56690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# input will can span three vectors if not aligned correctly. 56790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v21, 0, r3 56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v22, r10, r3 56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v23, r12, r3 57090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter 57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 57490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v21, v21, v22, v17 57590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v22, v22, v23, v17 ;# v8 v9 = 21 input pixels left-justified 57690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 0 57890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v24, v20, v21, v18 ;# taps times elements 57990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 1 58190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 1 58290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v25, v20, v23, v18 58390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 2 58590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 2 58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v26, v20, v23, v18 58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# set 3 58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsldoi v23, v21, v22, 3 59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsummbm v27, v20, v23, v18 59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v24, v24, v25 ;# v24 = 0 4 8 C 1 5 9 D (16-bit) 59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswus v25, v26, v27 ;# v25 = 2 6 A E 3 7 B F 59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v24, v24, v19 ;# divide v0, v1 by 128 59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrh v25, v25, v19 59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 59890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkuhus \V, v24, v25 ;# \V = scrambled 8-bit result 59990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, \V, v0, v28 ;# \V = correctly-ordered result 60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 60190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 60490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pixels_per_line 60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int xoffset 60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int yoffset 60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char *dst_ptr 60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pixels_per_line 60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r9 unsigned int *sse 61090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sub_pixel_variance16x8_ppc: 61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xffff 61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 61690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 61890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1, -32(r1) ;# create space on the stack 61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_16x8_pre_copy_b 62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v0, 1 62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v1, 1 62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v2, 1 62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v3, 1 62690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v4, 1 62790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v5, 1 62890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v6, 1 62990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v7, 1 63090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 63290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 63390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 63490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 63590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_16x8_b 63690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v8, 0 63890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 63990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_16x8_b 64090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x8_pre_copy_b: 64290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 64390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 64490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, r3, r4, 1 64590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, r3, r4, 1 64690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, r3, r4, 1 64790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, r3, r4, 1 64890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r3, r4, 1 64990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r3, r4, 1 65090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r3, r4, 1 65190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r3, r4, 1 65290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r3, r4, 1 65390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 65490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_16x8_b 65590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 65690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x8_b: 65790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 65890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 65990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 66090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 66290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 66390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 66490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 66590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 66690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 66790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 66890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 66990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 67090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 67190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 67290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercompute_sum_sse_16x8_b: 67390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 0 ;# sum 67490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 0 ;# sse 67590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v23, 0 ;# unpack 67690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 67790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 67890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v0, 1 67990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v1, 1 68090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v2, 1 68190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v3, 1 68290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v4, 1 68390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v5, 1 68490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v6, 1 68590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v7, 0 68690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 68790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_final v18, v19, v23, 7 68890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 68990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 69090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 69290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 69490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 69590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 69690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 69790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pixels_per_line 69890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int xoffset 69990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int yoffset 70090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char *dst_ptr 70190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pixels_per_line 70290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r9 unsigned int *sse 70390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 70490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 70590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_sub_pixel_variance16x16_ppc: 70690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 70790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xffff 70890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ori r12, r12, 0xfff8 70990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 71090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1, -32(r1) ;# create space on the stack 71290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber HProlog second_pass_16x16_pre_copy_b 71490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 71590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v0, 1 71690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v1, 1 71790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v2, 1 71890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v3, 1 71990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v4, 1 72090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v5, 1 72190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v6, 1 72290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v7, 1 72390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v8, 1 72490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v9, 1 72590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v10, 1 72690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v11, 1 72790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v12, 1 72890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v13, 1 72990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v14, 1 73090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v15, 1 73190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Finished filtering main horizontal block. If there is no 73390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# vertical filtering, jump to storing the data. Otherwise 73490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# load up and filter the additional line that is needed 73590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# for the vertical filter. 73690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_16x16_b 73790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 73890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber hfilter_16 v16, 0 73990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber b second_pass_16x16_b 74190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_pre_copy_b: 74390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber slwi. r6, r6, 5 ;# index into vertical filter array 74490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 74590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v0, r3, r4, 1 74690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v1, r3, r4, 1 74790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v2, r3, r4, 1 74890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v3, r3, r4, 1 74990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v4, r3, r4, 1 75090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v5, r3, r4, 1 75190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v6, r3, r4, 1 75290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v7, r3, r4, 1 75390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v8, r3, r4, 1 75490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v9, r3, r4, 1 75590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v10, r3, r4, 1 75690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v11, r3, r4, 1 75790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v12, r3, r4, 1 75890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v13, r3, r4, 1 75990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v14, r3, r4, 1 76090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v15, r3, r4, 1 76190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_and_align_16 v16, r3, r4, 0 76290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber beq compute_sum_sse_16x16_b 76490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 76590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_b: 76690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v20, 8 76790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 3 76890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vslh v18, v20, v18 ;# 0x0040 0040 0040 0040 0040 0040 0040 0040 76990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 77090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_vfilter v20, v21 77190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 77290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v0, v1 77390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v1, v2 77490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v2, v3 77590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v3, v4 77690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v4, v5 77790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v5, v6 77890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v6, v7 77990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v7, v8 78090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v8, v9 78190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v9, v10 78290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v10, v11 78390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v11, v12 78490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v12, v13 78590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v13, v14 78690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v14, v15 78790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vfilter_16 v15, v16 78890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 78990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercompute_sum_sse_16x16_b: 79090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v18, 0 ;# sum 79190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v19, 0 ;# sse 79290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v23, 0 ;# unpack 79390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 79490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 79590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v0, 1 79690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v1, 1 79790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v2, 1 79890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v3, 1 79990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v4, 1 80090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v5, 1 80190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v6, 1 80290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v7, 1 80390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v8, 1 80490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v9, 1 80590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v10, 1 80690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v11, 1 80790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v12, 1 80890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v13, 1 80990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v14, 1 81090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse_16 v15, 0 81190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_final v18, v19, v23, 8 81390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 81590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 81790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 81890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 81990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 82090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .data 82190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 82290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 82390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberhfilter_b: 82490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0,128, 0, 0, 0 82590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0,112, 16, 0, 0 82690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0, 96, 32, 0, 0 82790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0, 80, 48, 0, 0 82890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0, 64, 64, 0, 0 82990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0, 48, 80, 0, 0 83090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0, 32, 96, 0, 0 83190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0, 16,112, 0, 0 83290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 83390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 83490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervfilter_b: 83590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128 83690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 83790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 83890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 83990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 84090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 84190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 84290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 84390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 84490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 84590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 84690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 84790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 84890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96 84990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 85090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112 85190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 85290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 85390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hperm_b: 85490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 85590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 85690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 85790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_0123_b: 85890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 85990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 86190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_4567_b: 86290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 86390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 86490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hilo_b: 86590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 866