190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl short_idct4x4llm_ppc
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_c V, LABEL, OFF, R0, R1
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lis     \R0, \LABEL@ha
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    la      \R1, \LABEL@l(\R0)
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     \V, \OFF, \R1
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 short *input
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 short *output
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int pitch
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubershort_idct4x4llm_ppc:
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xfff8
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v8, sinpi8sqrt2, 0, r9, r10
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v9, cospi8sqrt2minus1, 0, r9, r10
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, hi_hi, 0, r9, r10
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v11, lo_lo, 0, r9, r10
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v12, shift_16, 0, r9, r10
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r10,  16
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v0,   0, r3         ;# input ip[0], ip[ 4]
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v1, r10, r3         ;# input ip[8], ip[12]
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# first pass
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vupkhsh v2, v0
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vupkhsh v3, v1
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v6, v2, v3          ;# a1 = ip[0]+ip[8]
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v7, v2, v3          ;# b1 = ip[0]-ip[8]
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vupklsh v0, v0
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v4, v0, v8
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v4, v4, v12
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v4, v4, v0          ;# ip[ 4] * sin(pi/8) * sqrt(2)
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vupklsh v1, v1
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v5, v1, v9
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v5, v5, v12         ;# ip[12] * cos(pi/8) * sqrt(2)
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v5, v5, v1
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v4, v4, v5          ;# c1
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v3, v1, v8
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v3, v3, v12
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v3, v3, v1          ;# ip[12] * sin(pi/8) * sqrt(2)
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v5, v0, v9
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v5, v5, v12         ;# ip[ 4] * cos(pi/8) * sqrt(2)
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v5, v5, v0
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v3, v3, v5          ;# d1
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v0, v6, v3          ;# a1 + d1
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v3, v6, v3          ;# a1 - d1
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v1, v7, v4          ;# b1 + c1
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v2, v7, v4          ;# b1 - c1
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# transpose input
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghw  v4, v0, v1          ;# a0 b0 a1 b1
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghw  v5, v2, v3          ;# c0 d0 c1 d1
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglw  v6, v0, v1          ;# a2 b2 a3 b3
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglw  v7, v2, v3          ;# c2 d2 c3 d3
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v0, v4, v5, v10     ;# a0 b0 c0 d0
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v1, v4, v5, v11     ;# a1 b1 c1 d1
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v2, v6, v7, v10     ;# a2 b2 c2 d2
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v3, v6, v7, v11     ;# a3 b3 c3 d3
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# second pass
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v6, v0, v2          ;# a1 = ip[0]+ip[8]
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v7, v0, v2          ;# b1 = ip[0]-ip[8]
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v4, v1, v8
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v4, v4, v12
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v4, v4, v1          ;# ip[ 4] * sin(pi/8) * sqrt(2)
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v5, v3, v9
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v5, v5, v12         ;# ip[12] * cos(pi/8) * sqrt(2)
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v5, v5, v3
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v4, v4, v5          ;# c1
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v2, v3, v8
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v2, v2, v12
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v2, v2, v3          ;# ip[12] * sin(pi/8) * sqrt(2)
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmulosh v5, v1, v9
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsraw   v5, v5, v12         ;# ip[ 4] * cos(pi/8) * sqrt(2)
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v5, v5, v1
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v3, v2, v5          ;# d1
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v0, v6, v3          ;# a1 + d1
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v3, v6, v3          ;# a1 - d1
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddsws v1, v7, v4          ;# b1 + c1
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubsws v2, v7, v4          ;# b1 - c1
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v6, 4
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v7, 3
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkswss v0, v0, v1
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkswss v1, v2, v3
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v0, v0, v6
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vaddshs v1, v1, v6
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrah   v0, v0, v7
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrah   v1, v1, v7
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# transpose output
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghh  v2, v0, v1          ;# a0 c0 a1 c1 a2 c2 a3 c3
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglh  v3, v0, v1          ;# b0 d0 b1 d1 b2 d2 b3 d3
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghh  v0, v2, v3          ;# a0 b0 c0 d0 a1 b1 c1 d1
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglh  v1, v2, v3          ;# a2 b2 c2 d2 a3 b3 c3 d3
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwu    r1,-416(r1)         ;# create space on the stack
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v0,  0, r1
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 0(r1)
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6, 0(r4)
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 4(r1)
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6, 4(r4)
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r4, r4, r5
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6,  8(r1)
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6,  0(r4)
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 12(r1)
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6,  4(r4)
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r4, r4, r5
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v1,  0, r1
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 0(r1)
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6, 0(r4)
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 4(r1)
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6, 4(r4)
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r4, r4, r5
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6,  8(r1)
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6,  0(r4)
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r6, 12(r1)
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r6,  4(r4)
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r1, r1, 416         ;# recover stack
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersinpi8sqrt2:
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .short  35468, 35468, 35468, 35468, 35468, 35468, 35468, 35468
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubercospi8sqrt2minus1:
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .short  20091, 20091, 20091, 20091, 20091, 20091, 20091, 20091
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubershift_16:
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .long      16,    16,    16,    16
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberhi_hi:
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberlo_lo:
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     8,  9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31
190