190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_get8x8var_ppc 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_get16x16var_ppc 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_mse16x16_ppc 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_variance16x16_ppc 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_variance16x8_ppc 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_variance8x16_ppc 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_variance8x8_ppc 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl vp8_variance4x4_ppc 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_aligned_16 V R O 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvsl v3, 0, \R ;# permutate value for alignment 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v1, 0, \R 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v2, \O, \R 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm \V, v1, v2, v3 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro prologue 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xffc0 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1, -32(r1) ;# create space on the stack 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 ;# load offset and loop counter 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v7, 0 ;# zero for merging 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v8, 0 ;# zero out total to start 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltisw v9, 0 ;# zero out total for dif^2 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro epilogue 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 32 ;# recover stack 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro compute_sum_sse 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Compute sum first. Unpack to so signed subract 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# can be used. Only have a half word signed 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# subract. Do high, then low. 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v2, v7, v4 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v3, v7, v5 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubshs v2, v2, v3 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsum4shs v8, v2, v8 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb v2, v7, v4 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglb v3, v7, v5 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubshs v2, v2, v3 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsum4shs v8, v2, v8 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Now compute sse. 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs v2, v4, v5 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs v3, v5, v4 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vor v2, v2, v3 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsumubm v9, v2, v2, v9 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro variance_16 DS loop_label store_sum 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber\loop_label: 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# only one of the inputs should need to be aligned. 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v4, r3, r10 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v5, r5, r10 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# move onto the next line 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r5, r5, r6 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bdnz \loop_label 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v8, v8, v7 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v9, v9, v7 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v8, 0, r1 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v9, 0, r1 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r4, 12(r1) 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \store_sum 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r3, 0(r8) ;# sum 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r4, 0(r7) ;# sse 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mullw r3, r3, r3 ;# sum*sum 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber srawi r3, r3, \DS ;# (sum*sum) >> DS 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subf r3, r3, r4 ;# sse - ((sum*sum) >> DS) 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro variance_8 DS loop_label store_sum 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber\loop_label: 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# only one of the inputs should need to be aligned. 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v4, r3, r10 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v5, r5, r10 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# move onto the next line 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r5, r5, r6 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# only one of the inputs should need to be aligned. 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v6, r3, r10 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v0, r5, r10 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# move onto the next line 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r5, r5, r6 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v4, v4, v6 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghb v5, v5, v0 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bdnz \loop_label 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v8, v8, v7 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v9, v9, v7 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v8, 0, r1 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v9, 0, r1 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r4, 12(r1) 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \store_sum 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r3, 0(r8) ;# sum 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r4, 0(r7) ;# sse 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mullw r3, r3, r3 ;# sum*sum 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber srawi r3, r3, \DS ;# (sum*sum) >> 8 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subf r3, r3, r4 ;# sse - ((sum*sum) >> 8) 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *SSE 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int *Sum 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_get8x8var_ppc: 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r9, 4 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r9 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_8 6, get8x8var_loop, 1 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *SSE 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int *Sum 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_get16x16var_ppc: 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r10 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_16 8, get16x16var_loop, 1 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r 3 return value 20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_mse16x16_ppc: 20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r10 20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermse16x16_loop: 20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# only one of the inputs should need to be aligned. 20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v4, r3, r10 20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_aligned_16 v5, r5, r10 20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# move onto the next line 21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r3, r3, r4 21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r5, r5, r6 21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# Now compute sse. 21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs v2, v4, v5 21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsububs v3, v5, v4 21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vor v2, v2, v3 21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmsumubm v9, v2, v2, v9 22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber bdnz mse16x16_loop 22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v9, v9, v7 22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v9, 0, r1 22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v9, 0, r1 22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r3, 0(r7) ;# sse 23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance16x16_ppc: 24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r10 25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_16 8, variance16x16_loop, 0 25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance16x8_ppc: 26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r9, 8 27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r9 27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_16 7, variance16x8_loop, 0 27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance8x16_ppc: 28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r9, 8 29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r9 29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_8 7, variance8x16_loop, 0 29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance8x8_ppc: 30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r9, 4 31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtctr r9 31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber variance_8 6, variance8x8_loop, 0 31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro transfer_4x4 I P 32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r0, 0(\I) 32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \I, \I, \P 32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r10,0(\I) 32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \I, \I, \P 32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r8, 0(\I) 32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add \I, \I, \P 32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r9, 0(\I) 33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r0, 0(r1) 33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r10, 4(r1) 33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r8, 8(r1) 33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r9, 12(r1) 33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr 34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int source_stride 34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr 34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int recon_stride 34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse 34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# 34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value 34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance4x4_ppc: 34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber prologue 34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber transfer_4x4 r3, r4 35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v4, 0, r1 35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber transfer_4x4 r5, r6 35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v5, 0, r1 35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber compute_sum_sse 35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v8, v8, v7 35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsumsws v9, v9, v7 36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v8, 0, r1 36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r3, 12(r1) 36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v9, 0, r1 36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r4, 12(r1) 36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r4, 0(r7) ;# sse 36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mullw r3, r3, r3 ;# sum*sum 37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber srawi r3, r3, 4 ;# (sum*sum) >> 4 37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber subf r3, r3, r4 ;# sse - ((sum*sum) >> 4) 37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber epilogue 37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 376