190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl bilinear_predict4x4_ppc
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl bilinear_predict8x4_ppc
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl bilinear_predict8x8_ppc
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl bilinear_predict16x16_ppc
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_c V, LABEL, OFF, R0, R1
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lis     \R0, \LABEL@ha
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    la      \R1, \LABEL@l(\R0)
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     \V, \OFF, \R1
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_vfilter V0, V1
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c \V0, vfilter_b, r6, r9, r10
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r6,  r6, 16
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     \V1, r6, r10
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro HProlog jump_label
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# load up horizontal filter
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi.   r5, r5, 4           ;# index into horizontal filter array
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# index to the next set of vectors in the row.
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r10, 16
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r12, 32
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# downshift by 7 ( divide by 128 ) at the end
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v19, 7
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# If there isn't any filtering to be done for the horizontal, then
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  just skip to the second pass.
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     \jump_label
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v20, hfilter_b, r5, r9, r0
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# setup constants
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# v14 permutation value for alignment
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v28, b_hperm_b, 0, r9, r0
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# rounding added in on the multiply
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltisw v21, 8
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltisw v18, 3
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vslw    v18, v21, v18       ;# 0x00000040000000400000004000000040
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi.   r6, r6, 5           ;# index into vertical filter array
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects:
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r3  src_ptr
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r4  pitch
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r10 16
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r12 32
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v17 perm intput
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v18 rounding
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v19 shift
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v20 filter taps
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v21 tmp
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v22 tmp
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v23 tmp
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v24 tmp
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v25 tmp
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v26 tmp
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v27 tmp
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v28 perm output
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro HFilter V
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v24, v21, v21, v10  ;# v20 = 0123 1234 2345 3456
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v25, v21, v21, v11  ;# v21 = 4567 5678 6789 789A
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v24, v20, v24, v18
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v25, v20, v25, v18
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkswus v24, v24, v25       ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrh    v24, v24, v19       ;# divide v0, v1 by 128
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkuhus \V, v24, v24        ;# \V = scrambled 8-bit result
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_8 V, increment_counter
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvsl    v17,  0, r3         ;# permutate value for alignment
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# input to filter is 9 bytes wide, output is 8 bytes.
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v21,   0, r3
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v22, r10, r3
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v21, v21, v22, v17
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    HFilter \V
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_and_align_8 V, increment_counter
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvsl    v17,  0, r3         ;# permutate value for alignment
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# input to filter is 21 bytes wide, output is 16 bytes.
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  input will can span three vectors if not aligned correctly.
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v21,   0, r3
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v22, r10, r3
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   \V, v21, v22, v17
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro write_aligned_8 V, increment_counter
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    \V,  0, r7
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r7, r7, r8
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro vfilter_16 P0 P1
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmuleub v22, \P0, v20       ;# 64 + 4 positive taps
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadduhm v22, v18, v22
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmuloub v23, \P0, v20
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadduhm v23, v18, v23
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmuleub v24, \P1, v21
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadduhm v22, v22, v24       ;# Re = evens, saturation unnecessary
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmuloub v25, \P1, v21
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vadduhm v23, v23, v25       ;# Ro = odds
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrh    v22, v22, v19       ;# divide by 128
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrh    v23, v23, v19       ;# v16 v17 = evens, odds
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghh  \P0, v22, v23       ;# v18 v19 = 16-bit result in order
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglh  v23, v22, v23
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkuhus \P0, \P0, v23       ;# P0 = 8-bit result
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro w_8x8 V, D, R, P
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    \V, 0, r1
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     \R, 0(r1)
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     \R, 0(r7)
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     \R, 4(r1)
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     \R, 4(r7)
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     \D, \D, \P
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict4x4_ppc:
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xf830
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ori     r12, r12, 0xfff8
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwu    r1,-32(r1)          ;# create space on the stack
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    HProlog second_pass_4x4_pre_copy_b
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Load up permutation constants
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, b_0123_b, 0, r9, r12
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v11, b_4567_b, 0, r9, r12
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v0, 1
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v1, 1
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v2, 1
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v3, 1
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Finished filtering main horizontal block.  If there is no
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  vertical filtering, jump to storing the data.  Otherwise
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  load up and filter the additional line that is needed
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  for the vertical filter.
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     store_out_4x4_b
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v4, 0
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b   second_pass_4x4_b
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_pre_copy_b:
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi    r6, r6, 5           ;# index into vertical filter array
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v0, 1
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v1, 1
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v2, 1
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v3, 1
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v4, 1
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_4x4_b:
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v20, 8
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v18, 3
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_vfilter v20, v21
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v0,  v1
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v1,  v2
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v2,  v3
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v3,  v4
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_4x4_b:
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v0, 0, r1
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r1)
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r7)
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r7, r7, r8
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v1, 0, r1
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r1)
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r7)
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r7, r7, r8
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2, 0, r1
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r1)
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r7)
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r7, r7, r8
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v3, 0, r1
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(r1)
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0, 0(r7)
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_4x4:
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r1, r1, 32          ;# recover stack
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict8x4_ppc:
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xf830
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ori     r12, r12, 0xfff8
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwu    r1,-32(r1)          ;# create space on the stack
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    HProlog second_pass_8x4_pre_copy_b
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Load up permutation constants
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, b_0123_b, 0, r9, r12
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v11, b_4567_b, 0, r9, r12
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v0, 1
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v1, 1
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v2, 1
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v3, 1
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Finished filtering main horizontal block.  If there is no
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  vertical filtering, jump to storing the data.  Otherwise
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  load up and filter the additional line that is needed
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  for the vertical filter.
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     store_out_8x4_b
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v4, 0
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b   second_pass_8x4_b
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x4_pre_copy_b:
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi    r6, r6, 5           ;# index into vertical filter array
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v0, 1
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v1, 1
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v2, 1
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v3, 1
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v4, 1
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x4_b:
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v20, 8
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v18, 3
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_vfilter v20, v21
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v0,  v1
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v1,  v2
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v2,  v3
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v3,  v4
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_8x4_b:
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmpi    cr0, r8, 8
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     cr0, store_aligned_8x4_b
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v0, r7, r0, r8
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v1, r7, r0, r8
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v2, r7, r0, r8
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v3, r7, r0, r8
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b       exit_8x4
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_aligned_8x4_b:
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, b_hilo_b, 0, r9, r10
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v0, v0, v1, v10
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v2, v2, v3, v10
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v0, 0, r7
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r7, r7, 16
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2, 0, r7
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_8x4:
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r1, r1, 32          ;# recover stack
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict8x8_ppc:
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xfff0
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ori     r12, r12, 0xffff
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwu    r1,-32(r1)          ;# create space on the stack
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    HProlog second_pass_8x8_pre_copy_b
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Load up permutation constants
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, b_0123_b, 0, r9, r12
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v11, b_4567_b, 0, r9, r12
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v0, 1
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v1, 1
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v2, 1
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v3, 1
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v4, 1
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v5, 1
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v6, 1
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v7, 1
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Finished filtering main horizontal block.  If there is no
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  vertical filtering, jump to storing the data.  Otherwise
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  load up and filter the additional line that is needed
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  for the vertical filter.
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     store_out_8x8_b
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_8 v8, 0
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b   second_pass_8x8_b
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_pre_copy_b:
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi    r6, r6, 5           ;# index into vertical filter array
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v0, 1
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v1, 1
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v2, 1
37790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v3, 1
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v4, 1
37990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v5, 1
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v6, 1
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v7, 1
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_8  v8, 0
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_8x8_b:
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v20, 8
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v18, 3
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_vfilter v20, v21
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v0,  v1
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v1,  v2
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v2,  v3
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v3,  v4
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v4,  v5
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v5,  v6
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v6,  v7
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v7,  v8
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_8x8_b:
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    cmpi    cr0, r8, 8
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     cr0, store_aligned_8x8_b
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v0, r7, r0, r8
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v1, r7, r0, r8
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v2, r7, r0, r8
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v3, r7, r0, r8
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v4, r7, r0, r8
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v5, r7, r0, r8
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v6, r7, r0, r8
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    w_8x8   v7, r7, r0, r8
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b       exit_8x8
41590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_aligned_8x8_b:
41790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_c v10, b_hilo_b, 0, r9, r10
41890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v0, v0, v1, v10
42090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v2, v2, v3, v10
42190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v4, v4, v5, v10
42290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v6, v6, v7, v10
42390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
42490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v0, 0, r7
42590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r7, r7, 16
42690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v2, 0, r7
42790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r7, r7, 16
42890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v4, 0, r7
42990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r7, r7, 16
43090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v6, 0, r7
43190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberexit_8x8:
43390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r1, r1, 32          ;# recover stack
43590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
43690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
43890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
43990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# Filters a horizontal line
44090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# expects:
44190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r3  src_ptr
44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r4  pitch
44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r10 16
44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  r12 32
44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v17 perm intput
44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v18 rounding
44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v19 shift
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v20 filter taps
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v21 tmp
45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v22 tmp
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v23 tmp
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v24 tmp
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v25 tmp
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v26 tmp
45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v27 tmp
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#  v28 perm output
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro hfilter_16 V, increment_counter
45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvsl    v17,  0, r3         ;# permutate value for alignment
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# input to filter is 21 bytes wide, output is 16 bytes.
46390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  input will can span three vectors if not aligned correctly.
46490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v21,   0, r3
46590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v22, r10, r3
46690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v23, r12, r3
46790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
46890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
46990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
47090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
47190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v21, v21, v22, v17
47290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   v22, v22, v23, v17  ;# v8 v9 = 21 input pixels left-justified
47390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# set 0
47590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v24, v20, v21, v18 ;# taps times elements
47690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
47790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# set 1
47890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsldoi  v23, v21, v22, 1
47990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v25, v20, v23, v18
48090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# set 2
48290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsldoi  v23, v21, v22, 2
48390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v26, v20, v23, v18
48490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# set 3
48690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsldoi  v23, v21, v22, 3
48790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsummbm v27, v20, v23, v18
48890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
48990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkswus v24, v24, v25       ;# v24 = 0 4 8 C 1 5 9 D (16-bit)
49090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkswus v25, v26, v27       ;# v25 = 2 6 A E 3 7 B F
49190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrh    v24, v24, v19       ;# divide v0, v1 by 128
49390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsrh    v25, v25, v19
49490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vpkuhus \V, v24, v25        ;# \V = scrambled 8-bit result
49690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   \V, \V, v0, v28     ;# \V = correctly-ordered result
49790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
49890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
49990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_and_align_16 V, increment_counter
50090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvsl    v17,  0, r3         ;# permutate value for alignment
50190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# input to filter is 21 bytes wide, output is 16 bytes.
50390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  input will can span three vectors if not aligned correctly.
50490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v21,   0, r3
50590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v22, r10, r3
50690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
50790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
50890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
50990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
51090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
51190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   \V, v21, v22, v17
51290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
51390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
51490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro write_16 V, increment_counter
51590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    \V,  0, r7
51690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
51790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \increment_counter
51890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r7, r7, r8
51990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
52090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
52190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
52290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
52390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char * src
52490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int src_pitch
52590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 int x_offset
52690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int y_offset
52790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned char * dst
52890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int dst_pitch
52990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberbilinear_predict16x16_ppc:
53090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
53190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xffff
53290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ori     r12, r12, 0xfff8
53390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
53490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
53590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    HProlog second_pass_16x16_pre_copy_b
53690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
53790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v0,  1
53890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v1,  1
53990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v2,  1
54090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v3,  1
54190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v4,  1
54290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v5,  1
54390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v6,  1
54490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v7,  1
54590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v8,  1
54690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v9,  1
54790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v10, 1
54890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v11, 1
54990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v12, 1
55090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v13, 1
55190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v14, 1
55290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v15, 1
55390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
55490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Finished filtering main horizontal block.  If there is no
55590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  vertical filtering, jump to storing the data.  Otherwise
55690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  load up and filter the additional line that is needed
55790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  for the vertical filter.
55890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    beq     store_out_16x16_b
55990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    hfilter_16 v16, 0
56190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    b   second_pass_16x16_b
56390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_pre_copy_b:
56590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    slwi    r6, r6, 5           ;# index into vertical filter array
56690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
56790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v0,  1
56890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v1,  1
56990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v2,  1
57090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v3,  1
57190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v4,  1
57290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v5,  1
57390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v6,  1
57490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v7,  1
57590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v8,  1
57690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v9,  1
57790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v10, 1
57890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v11, 1
57990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v12, 1
58090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v13, 1
58190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v14, 1
58290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v15, 1
58390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_and_align_16  v16, 0
58490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
58590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersecond_pass_16x16_b:
58690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v20, 8
58790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltish v18, 3
58890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vslh    v18, v20, v18   ;# 0x0040 0040 0040 0040 0040 0040 0040 0040
58990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
59090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_vfilter v20, v21
59190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
59290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v0,  v1
59390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v1,  v2
59490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v2,  v3
59590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v3,  v4
59690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v4,  v5
59790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v5,  v6
59890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v6,  v7
59990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v7,  v8
60090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v8,  v9
60190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v9,  v10
60290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v10, v11
60390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v11, v12
60490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v12, v13
60590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v13, v14
60690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v14, v15
60790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vfilter_16 v15, v16
60890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
60990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstore_out_16x16_b:
61090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
61190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v0,  1
61290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v1,  1
61390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v2,  1
61490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v3,  1
61590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v4,  1
61690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v5,  1
61790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v6,  1
61890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v7,  1
61990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v8,  1
62090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v9,  1
62190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v10, 1
62290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v11, 1
62390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v12, 1
62490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v13, 1
62590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v14, 1
62690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    write_16 v15, 0
62790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
62890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
62990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
63090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
63190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
63290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .data
63390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
63490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
63590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberhfilter_b:
63690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte   128,  0,  0,  0,128,  0,  0,  0,128,  0,  0,  0,128,  0,  0,  0
63790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte   112, 16,  0,  0,112, 16,  0,  0,112, 16,  0,  0,112, 16,  0,  0
63890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    96, 32,  0,  0, 96, 32,  0,  0, 96, 32,  0,  0, 96, 32,  0,  0
63990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    80, 48,  0,  0, 80, 48,  0,  0, 80, 48,  0,  0, 80, 48,  0,  0
64090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    64, 64,  0,  0, 64, 64,  0,  0, 64, 64,  0,  0, 64, 64,  0,  0
64190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    48, 80,  0,  0, 48, 80,  0,  0, 48, 80,  0,  0, 48, 80,  0,  0
64290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    32, 96,  0,  0, 32, 96,  0,  0, 32, 96,  0,  0, 32, 96,  0,  0
64390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    16,112,  0,  0, 16,112,  0,  0, 16,112,  0,  0, 16,112,  0,  0
64490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
64590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
64690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervfilter_b:
64790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte   128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
64890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
64990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte   112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
65090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
65190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
65290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
65390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
65490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
65590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
65690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
65790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48
65890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80
65990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
66090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96
66190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
66290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte   112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
66390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
66490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
66590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hperm_b:
66690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     0,  4,  8, 12,  1,  5,  9, 13,  2,  6, 10, 14,  3,  7, 11, 15
66790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
66890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
66990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_0123_b:
67090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6
67190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
67290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 4
67390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_4567_b:
67490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10
67590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
67690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberb_hilo_b:
67790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .byte     0,  1,  2,  3,  4,  5,  6,  7, 16, 17, 18, 19, 20, 21, 22, 23
678