190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .globl short_idct4x4llm_ppc 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_c V, LABEL, OFF, R0, R1 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lis \R0, \LABEL@ha 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber la \R1, \LABEL@l(\R0) 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx \V, \OFF, \R1 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 short *input 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 short *output 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int pitch 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 2 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubershort_idct4x4llm_ppc: 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mfspr r11, 256 ;# get old VRSAVE 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber oris r12, r11, 0xfff8 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r12 ;# set VRSAVE 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v8, sinpi8sqrt2, 0, r9, r10 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v9, cospi8sqrt2minus1, 0, r9, r10 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v10, hi_hi, 0, r9, r10 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v11, lo_lo, 0, r9, r10 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber load_c v12, shift_16, 0, r9, r10 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber li r10, 16 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v0, 0, r3 ;# input ip[0], ip[ 4] 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lvx v1, r10, r3 ;# input ip[8], ip[12] 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# first pass 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vupkhsh v2, v0 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vupkhsh v3, v1 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v6, v2, v3 ;# a1 = ip[0]+ip[8] 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v7, v2, v3 ;# b1 = ip[0]-ip[8] 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vupklsh v0, v0 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v4, v0, v8 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v4, v4, v12 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v4, v4, v0 ;# ip[ 4] * sin(pi/8) * sqrt(2) 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vupklsh v1, v1 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v5, v1, v9 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v5, v5, v1 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v4, v4, v5 ;# c1 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v3, v1, v8 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v3, v3, v12 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v3, v3, v1 ;# ip[12] * sin(pi/8) * sqrt(2) 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v5, v0, v9 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) 6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v5, v5, v0 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v3, v3, v5 ;# d1 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v0, v6, v3 ;# a1 + d1 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v3, v6, v3 ;# a1 - d1 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v1, v7, v4 ;# b1 + c1 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v2, v7, v4 ;# b1 - c1 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# transpose input 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghw v4, v0, v1 ;# a0 b0 a1 b1 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghw v5, v2, v3 ;# c0 d0 c1 d1 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglw v6, v0, v1 ;# a2 b2 a3 b3 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglw v7, v2, v3 ;# c2 d2 c3 d3 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v0, v4, v5, v10 ;# a0 b0 c0 d0 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v1, v4, v5, v11 ;# a1 b1 c1 d1 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v2, v6, v7, v10 ;# a2 b2 c2 d2 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vperm v3, v6, v7, v11 ;# a3 b3 c3 d3 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# second pass 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v6, v0, v2 ;# a1 = ip[0]+ip[8] 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v7, v0, v2 ;# b1 = ip[0]-ip[8] 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v4, v1, v8 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v4, v4, v12 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v4, v4, v1 ;# ip[ 4] * sin(pi/8) * sqrt(2) 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v5, v3, v9 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v5, v5, v12 ;# ip[12] * cos(pi/8) * sqrt(2) 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v5, v5, v3 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v4, v4, v5 ;# c1 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v2, v3, v8 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v2, v2, v12 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v2, v2, v3 ;# ip[12] * sin(pi/8) * sqrt(2) 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmulosh v5, v1, v9 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsraw v5, v5, v12 ;# ip[ 4] * cos(pi/8) * sqrt(2) 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v5, v5, v1 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v3, v2, v5 ;# d1 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v0, v6, v3 ;# a1 + d1 11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v3, v6, v3 ;# a1 - d1 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddsws v1, v7, v4 ;# b1 + c1 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsubsws v2, v7, v4 ;# b1 - c1 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v6, 4 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vspltish v7, 3 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswss v0, v0, v1 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vpkswss v1, v2, v3 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v0, v0, v6 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vaddshs v1, v1, v6 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrah v0, v0, v7 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vsrah v1, v1, v7 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ;# transpose output 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghh v2, v0, v1 ;# a0 c0 a1 c1 a2 c2 a3 c3 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglh v3, v0, v1 ;# b0 d0 b1 d1 b2 d2 b3 d3 13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrghh v0, v2, v3 ;# a0 b0 c0 d0 a1 b1 c1 d1 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber vmrglh v1, v2, v3 ;# a2 b2 c2 d2 a3 b3 c3 d3 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stwu r1,-416(r1) ;# create space on the stack 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v0, 0, r1 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 0(r1) 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 0(r4) 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 4(r1) 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 4(r4) 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r4, r4, r5 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 8(r1) 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 0(r4) 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 12(r1) 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 4(r4) 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r4, r4, r5 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stvx v1, 0, r1 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 0(r1) 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 0(r4) 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 4(r1) 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 4(r4) 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber add r4, r4, r5 15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 8(r1) 16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 0(r4) 16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber lwz r6, 12(r1) 16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber stw r6, 4(r4) 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addi r1, r1, 416 ;# recover stack 16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mtspr 256, r11 ;# reset old VRSAVE 16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber blr 17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersinpi8sqrt2: 17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .short 35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468 17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercospi8sqrt2minus1: 17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .short 20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091 17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubershift_16: 18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .long 16, 16, 16, 16 18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberhi_hi: 18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .align 4 18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberlo_lo: 18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber .byte 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 190