190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_get8x8var_ppc
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_get16x16var_ppc
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_mse16x16_ppc
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_variance16x16_ppc
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_variance16x8_ppc
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_variance8x16_ppc
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_variance8x8_ppc
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .globl vp8_variance4x4_ppc
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro load_aligned_16 V R O
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvsl    v3,  0, \R          ;# permutate value for alignment
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v1,  0, \R
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v2, \O, \R
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vperm   \V, v1, v2, v3
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro prologue
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mfspr   r11, 256            ;# get old VRSAVE
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    oris    r12, r11, 0xffc0
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r12            ;# set VRSAVE
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stwu    r1, -32(r1)         ;# create space on the stack
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r10, 16             ;# load offset and loop counter
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltisw v7, 0              ;# zero for merging
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltisw v8, 0              ;# zero out total to start
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vspltisw v9, 0              ;# zero out total for dif^2
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro epilogue
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    addi    r1, r1, 32          ;# recover stack
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtspr   256, r11            ;# reset old VRSAVE
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro compute_sum_sse
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Compute sum first.  Unpack to so signed subract
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  can be used.  Only have a half word signed
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;#  subract.  Do high, then low.
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v2, v7, v4
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v3, v7, v5
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubshs v2, v2, v3
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsum4shs v8, v2, v8
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglb  v2, v7, v4
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrglb  v3, v7, v5
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsubshs v2, v2, v3
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsum4shs v8, v2, v8
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Now compute sse.
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsububs v2, v4, v5
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsububs v3, v5, v4
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vor     v2, v2, v3
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsumubm v9, v2, v2, v9
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro variance_16 DS loop_label store_sum
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber\loop_label:
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# only one of the inputs should need to be aligned.
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v4, r3, r10
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v5, r5, r10
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# move onto the next line
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r5, r5, r6
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    compute_sum_sse
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bdnz    \loop_label
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v8, v8, v7
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v9, v9, v7
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v8, 0, r1
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r3, 12(r1)
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v9, 0, r1
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r4, 12(r1)
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \store_sum
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r3, 0(r8)           ;# sum
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r4, 0(r7)           ;# sse
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mullw   r3, r3, r3          ;# sum*sum
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    srawi   r3, r3, \DS         ;# (sum*sum) >> DS
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subf    r3, r3, r4          ;# sse - ((sum*sum) >> DS)
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro variance_8 DS loop_label store_sum
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber\loop_label:
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# only one of the inputs should need to be aligned.
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v4, r3, r10
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v5, r5, r10
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# move onto the next line
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r5, r5, r6
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# only one of the inputs should need to be aligned.
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v6, r3, r10
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v0, r5, r10
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# move onto the next line
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r5, r5, r6
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v4, v4, v6
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmrghb  v5, v5, v0
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    compute_sum_sse
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bdnz    \loop_label
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v8, v8, v7
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v9, v9, v7
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v8, 0, r1
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r3, 12(r1)
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v9, 0, r1
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r4, 12(r1)
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.if \store_sum
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r3, 0(r8)           ;# sum
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endif
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r4, 0(r7)           ;# sse
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mullw   r3, r3, r3          ;# sum*sum
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    srawi   r3, r3, \DS         ;# (sum*sum) >> 8
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subf    r3, r3, r4          ;# sse - ((sum*sum) >> 8)
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *SSE
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int *Sum
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_get8x8var_ppc:
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r9, 4
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r9
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_8 6, get8x8var_loop, 1
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *SSE
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r8 int *Sum
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_get16x16var_ppc:
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r10
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_16 8, get16x16var_loop, 1
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r 3 return value
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_mse16x16_ppc:
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r10
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubermse16x16_loop:
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# only one of the inputs should need to be aligned.
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v4, r3, r10
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    load_aligned_16 v5, r5, r10
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# move onto the next line
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r3, r3, r4
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     r5, r5, r6
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ;# Now compute sse.
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsububs v2, v4, v5
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsububs v3, v5, v4
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vor     v2, v2, v3
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vmsumubm v9, v2, v2, v9
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    bdnz    mse16x16_loop
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v9, v9, v7
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v9, 0, r1
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r3, 12(r1)
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v9, 0, r1
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r3, 12(r1)
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r3, 0(r7)           ;# sse
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance16x16_ppc:
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r10
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_16 8, variance16x16_loop, 0
25290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance16x8_ppc:
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r9, 8
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r9
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_16 7, variance16x8_loop, 0
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance8x16_ppc:
28790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
28990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r9, 8
29190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r9
29290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_8 7, variance8x16_loop, 0
29490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
30390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
30790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance8x8_ppc:
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    li      r9, 4
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mtctr   r9
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    variance_8 6, variance8x8_loop, 0
31590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.macro transfer_4x4 I P
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r0, 0(\I)
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     \I, \I, \P
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r10,0(\I)
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     \I, \I, \P
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r8, 0(\I)
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    add     \I, \I, \P
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r9, 0(\I)
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r0,  0(r1)
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r10, 4(r1)
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r8,  8(r1)
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r9, 12(r1)
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber.endm
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    .align 2
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 unsigned char *src_ptr
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r4 int  source_stride
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r5 unsigned char *ref_ptr
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r6 int  recon_stride
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r7 unsigned int *sse
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;#
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;# r3 return value
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervp8_variance4x4_ppc:
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    prologue
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    transfer_4x4 r3, r4
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v4, 0, r1
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    transfer_4x4 r5, r6
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lvx     v5, 0, r1
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    compute_sum_sse
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v8, v8, v7
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    vsumsws v9, v9, v7
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v8, 0, r1
36290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r3, 12(r1)
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stvx    v9, 0, r1
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    lwz     r4, 12(r1)
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    stw     r4, 0(r7)           ;# sse
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mullw   r3, r3, r3          ;# sum*sum
37090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    srawi   r3, r3, 4           ;# (sum*sum) >> 4
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    subf    r3, r3, r4          ;# sse - ((sum*sum) >> 4)
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    epilogue
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    blr
376