190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_dequantize_b_impl_mmx)
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_dequantize_b_impl_mmx):
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 3
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov       rsi, arg(0) ;sq
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov       rdi, arg(1) ;dq
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov       rax, arg(2) ;q
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      mm1, [rsi]
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw    mm1, [rax+0]            ; mm4 *= kernel 0 modifiers.
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      [rdi], mm1
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      mm1, [rsi+8]
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw    mm1, [rax+8]            ; mm4 *= kernel 0 modifiers.
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      [rdi+8], mm1
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      mm1, [rsi+16]
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw    mm1, [rax+16]            ; mm4 *= kernel 0 modifiers.
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      [rdi+16], mm1
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      mm1, [rsi+24]
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw    mm1, [rax+24]            ; mm4 *= kernel 0 modifiers.
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq      [rdi+24], mm1
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
53f71323e297a928af368937089d3ed71239786f86Andreas Huber;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
54f71323e297a928af368937089d3ed71239786f86Andreas Huberglobal sym(vp8_dequant_idct_add_mmx)
55f71323e297a928af368937089d3ed71239786f86Andreas Hubersym(vp8_dequant_idct_add_mmx):
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
58f71323e297a928af368937089d3ed71239786f86Andreas Huber    SHADOW_ARGS_TO_STACK 6
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rax,    arg(0) ;input
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdx,    arg(1) ;dq
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,    [rax   ]
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm0,    [rdx]
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,    [rax +8]
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm1,    [rdx +8]
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm2,    [rax+16]
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm2,    [rdx+16]
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,    [rax+24]
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm3,    [rdx+24]
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
80f71323e297a928af368937089d3ed71239786f86Andreas Huber        mov         rdx,    arg(3) ;dest
81f71323e297a928af368937089d3ed71239786f86Andreas Huber        mov         rsi,    arg(2) ;pred
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm7,    mm7
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax],   mm7
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+8], mm7
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+16],mm7
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+24],mm7
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
92f71323e297a928af368937089d3ed71239786f86Andreas Huber        movsxd      rax,            dword ptr arg(4) ;pitch
93f71323e297a928af368937089d3ed71239786f86Andreas Huber        movsxd      rdi,            dword ptr arg(5) ;stride
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm2             ; b1= 0-2
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm2             ;
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm0             ; a1 =0+2
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
101538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm7,            mm3             ;
105538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm7,            mm5             ; c1
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm3
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
113538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
116538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm4
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm5             ; d1
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,            mm2             ; a1
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm0             ; b1
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm3             ;0
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm4,            mm7             ;1
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm7             ;2
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm6,            mm3             ;3
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,            mm2             ; 03 02 01 00
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm4             ; 23 22 21 20
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm1,            mm0             ; 11 01 10 00
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm2,            mm0             ; 13 03 12 02
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm3,            mm6             ; 31 21 30 20
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm4,            mm6             ; 33 23 32 22
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,            mm1             ; 11 01 10 00
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm2             ; 13 03 12 02
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm0,            mm3             ; 30 20 10 00
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm1,            mm3             ; 31 21 11 01
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm2,            mm4             ; 32 22 12 02
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm5,            mm4             ; 33 23 13 03
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm5             ; 33 23 13 03
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm2             ; b1= 0-2
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm2             ;
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm0             ; a1 =0+2
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
156538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm7,            mm3             ;
160538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm7,            mm5             ; c1
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm3
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
168538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
171538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm4
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm5             ; d1
175538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        paddw       mm0,            [GLOBAL(fours)]
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
177538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        paddw       mm2,            [GLOBAL(fours)]
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,            mm2             ; a1
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm0             ; b1
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm3             ;0
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm4,            mm7             ;1
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm7             ;2
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm6,            mm3             ;3
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm2,            3
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm0,            3
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm4,            3
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm6,            3
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,            mm2             ; 03 02 01 00
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm4             ; 23 22 21 20
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm1,            mm0             ; 11 01 10 00
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm2,            mm0             ; 13 03 12 02
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm3,            mm6             ; 31 21 30 20
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm4,            mm6             ; 33 23 32 22
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,            mm1             ; 11 01 10 00
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm2             ; 13 03 12 02
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm0,            mm3             ; 30 20 10 00
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm1,            mm3             ; 31 21 11 01
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm2,            mm4             ; 32 22 12 02
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm5,            mm4             ; 33 23 13 03
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
212f71323e297a928af368937089d3ed71239786f86Andreas Huber        pxor        mm7,            mm7
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
214f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi]
215f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
216f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm0,            mm4
217f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm0,            mm7
218f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx],          mm0
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
220f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+rax]
221f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
222f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm1,            mm4
223f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm1,            mm7
224f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi],      mm1
225f71323e297a928af368937089d3ed71239786f86Andreas Huber
226f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+2*rax]
227f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
228f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm2,            mm4
229f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm2,            mm7
230f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi*2],    mm2
231f71323e297a928af368937089d3ed71239786f86Andreas Huber
232f71323e297a928af368937089d3ed71239786f86Andreas Huber        add         rdx,            rdi
233f71323e297a928af368937089d3ed71239786f86Andreas Huber        add         rsi,            rax
234f71323e297a928af368937089d3ed71239786f86Andreas Huber
235f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+2*rax]
236f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
237f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm5,            mm4
238f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm5,            mm7
239f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi*2],    mm5
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
250f71323e297a928af368937089d3ed71239786f86Andreas Huber;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
251f71323e297a928af368937089d3ed71239786f86Andreas Huberglobal sym(vp8_dequant_dc_idct_add_mmx)
252f71323e297a928af368937089d3ed71239786f86Andreas Hubersym(vp8_dequant_dc_idct_add_mmx):
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
255f71323e297a928af368937089d3ed71239786f86Andreas Huber    SHADOW_ARGS_TO_STACK 7
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    GET_GOT     rbx
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rsi
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rdi
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rax,    arg(0) ;input
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov         rdx,    arg(1) ;dq
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,    [rax   ]
26590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm0,    [rdx]
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,    [rax +8]
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm1,    [rdx +8]
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm2,    [rax+16]
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm2,    [rdx+16]
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,    [rax+24]
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw      mm3,    [rdx+24]
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
276f71323e297a928af368937089d3ed71239786f86Andreas Huber        mov         rdx,    arg(3) ;dest
277f71323e297a928af368937089d3ed71239786f86Andreas Huber        mov         rsi,    arg(2) ;pred
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor        mm7,    mm7
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax],   mm7
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+8], mm7
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+16],mm7
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        [rax+24],mm7
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
287f71323e297a928af368937089d3ed71239786f86Andreas Huber        ; move lower word of Dc to lower word of mm0
288f71323e297a928af368937089d3ed71239786f86Andreas Huber        psrlq       mm0,    16
289f71323e297a928af368937089d3ed71239786f86Andreas Huber        movzx       rcx,    word ptr arg(6) ;Dc
290f71323e297a928af368937089d3ed71239786f86Andreas Huber        psllq       mm0,    16
291538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq        mm7,    rcx
292f71323e297a928af368937089d3ed71239786f86Andreas Huber        por         mm0,    mm7
293f71323e297a928af368937089d3ed71239786f86Andreas Huber
294f71323e297a928af368937089d3ed71239786f86Andreas Huber        movsxd      rax,            dword ptr arg(4) ;pitch
295f71323e297a928af368937089d3ed71239786f86Andreas Huber        movsxd      rdi,            dword ptr arg(5) ;stride
29690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
29790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm2             ; b1= 0-2
29890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm2             ;
29990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
30190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm0             ; a1 =0+2
30290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
303538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
30490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
30590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm7,            mm3             ;
307538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
30890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
30990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
31090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm7,            mm5             ; c1
31190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
31290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
31390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm3
31490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
315538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
31690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1
31790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
318538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
31990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm4
32090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm5             ; d1
32290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,            mm2             ; a1
32390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm0             ; b1
32590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm3             ;0
32690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
32790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm4,            mm7             ;1
32890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm7             ;2
32990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm6,            mm3             ;3
33190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,            mm2             ; 03 02 01 00
33390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm4             ; 23 22 21 20
33490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm1,            mm0             ; 11 01 10 00
33690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm2,            mm0             ; 13 03 12 02
33790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
33890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm3,            mm6             ; 31 21 30 20
33990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm4,            mm6             ; 33 23 32 22
34090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,            mm1             ; 11 01 10 00
34290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm2             ; 13 03 12 02
34390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm0,            mm3             ; 30 20 10 00
34590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm1,            mm3             ; 31 21 11 01
34690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
34790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm2,            mm4             ; 32 22 12 02
34890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm5,            mm4             ; 33 23 13 03
34990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm5             ; 33 23 13 03
35190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm2             ; b1= 0-2
35390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm2             ;
35490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
35590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
35690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm0             ; a1 =0+2
35790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
358538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
35990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
36090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm7,            mm3             ;
362538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
36390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
36590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm7,            mm5             ; c1
36690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
36790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm1
36890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm3
36990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
370538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
37190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm5,            mm1
37290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
373538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
37490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm4
37590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
37690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm3,            mm5             ; d1
377538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        paddw       mm0,            [GLOBAL(fours)]
37890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
379538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        paddw       mm2,            [GLOBAL(fours)]
38090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm6,            mm2             ; a1
38190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm4,            mm0             ; b1
38390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm2,            mm3             ;0
38490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw       mm4,            mm7             ;1
38690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm0,            mm7             ;2
38790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
38890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw       mm6,            mm3             ;3
38990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm2,            3
39090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm0,            3
39290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm4,            3
39390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw       mm6,            3
39590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm1,            mm2             ; 03 02 01 00
39790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm3,            mm4             ; 23 22 21 20
39890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
39990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm1,            mm0             ; 11 01 10 00
40090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm2,            mm0             ; 13 03 12 02
40190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpcklwd   mm3,            mm6             ; 31 21 30 20
40390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhwd   mm4,            mm6             ; 33 23 32 22
40490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm0,            mm1             ; 11 01 10 00
40690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq        mm5,            mm2             ; 13 03 12 02
40790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
40890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm0,            mm3             ; 30 20 10 00
40990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm1,            mm3             ; 31 21 11 01
41090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
41190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckldq   mm2,            mm4             ; 32 22 12 02
41290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        punpckhdq   mm5,            mm4             ; 33 23 13 03
41390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
414f71323e297a928af368937089d3ed71239786f86Andreas Huber        pxor        mm7,            mm7
415f71323e297a928af368937089d3ed71239786f86Andreas Huber
416f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi]
417f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
418f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm0,            mm4
419f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm0,            mm7
420f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx],          mm0
421f71323e297a928af368937089d3ed71239786f86Andreas Huber
422f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+rax]
423f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
424f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm1,            mm4
425f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm1,            mm7
426f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi],      mm1
427f71323e297a928af368937089d3ed71239786f86Andreas Huber
428f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+2*rax]
429f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
430f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm2,            mm4
431f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm2,            mm7
432f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi*2],    mm2
433f71323e297a928af368937089d3ed71239786f86Andreas Huber
434f71323e297a928af368937089d3ed71239786f86Andreas Huber        add         rdx,            rdi
435f71323e297a928af368937089d3ed71239786f86Andreas Huber        add         rsi,            rax
436f71323e297a928af368937089d3ed71239786f86Andreas Huber
437f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        mm4,            [rsi+2*rax]
438f71323e297a928af368937089d3ed71239786f86Andreas Huber        punpcklbw   mm4,            mm7
439f71323e297a928af368937089d3ed71239786f86Andreas Huber        paddsw      mm5,            mm4
440f71323e297a928af368937089d3ed71239786f86Andreas Huber        packuswb    mm5,            mm7
441f71323e297a928af368937089d3ed71239786f86Andreas Huber        movd        [rdx+rdi*2],    mm5
44290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
44390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
44490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
44590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
44690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    RESTORE_GOT
44790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
44890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
44990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
45090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
45290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSECTION_RODATA
45390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16
45490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx_s1sqr2:
45590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    times 4 dw 0x8A8C
45690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16
45790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberx_c1sqr2less1:
45890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    times 4 dw 0x4E7B
45990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16
46090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberfours:
46190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    times 4 dw 0x0004
462