190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
2f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
4f71323e297a928af368937089d3ed71239786f86Andreas Huber;  Use of this source code is governed by a BSD-style license
5f71323e297a928af368937089d3ed71239786f86Andreas Huber;  that can be found in the LICENSE file in the root of the source
6f71323e297a928af368937089d3ed71239786f86Andreas Huber;  tree. An additional intellectual property rights grant can be found
7f71323e297a928af368937089d3ed71239786f86Andreas Huber;  in the file PATENTS.  All contributing project authors may
8f71323e297a928af368937089d3ed71239786f86Andreas Huber;  be found in the AUTHORS file in the root of the source tree.
990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;
1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm"
1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;                           short *qcoeff_ptr,short *dequant_ptr,
1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;                           short *scan_mask, short *round_ptr,
1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber;                           short *quant_ptr, short *dqcoeff_ptr);
1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberglobal sym(vp8_fast_quantize_b_impl_mmx)
1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubersym(vp8_fast_quantize_b_impl_mmx):
2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push        rbp
2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    mov         rbp, rsp
2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    SHADOW_ARGS_TO_STACK 8
2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rsi
2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    push rdi
2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; end prolog
2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(0) ;coeff_ptr
2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        [rsi]
3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(1) ;zbin_ptr
3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        [rax]
3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        mm0
3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           mm0,        15
3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,        mm0
3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm3,        mm0         ; abs
3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        mm3
4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpgtw         mm1,        mm2
4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pandn           mm1,        mm2
4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        mm1
4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdx,        arg(6) ;quant_ptr
4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        [rdx]
4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rcx,        arg(5) ;round_ptr
5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rcx]
5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm3,        mm2
5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmulhuw         mm3,        mm1
5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm3,        mm0
5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm3,        mm0     ;gain the sign back
5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;qcoeff_ptr
5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm3
6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rdi],      mm3
6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(3) ;dequant_ptr
6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rax]
6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          mm3,        mm2
6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(7) ;dqcoeff_ptr
6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rax],      mm3
7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ; next 8
7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        [rsi+8]
7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(1) ;zbin_ptr
7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rax+8]
7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm4
7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           mm4,        15
7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4         ; abs
8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        mm7
8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpgtw         mm5,        mm6
8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pandn           mm5,        mm6
8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm5
8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rdx+8]
9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rcx+8]
9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm6
9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmulhuw         mm7,        mm5
9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4;gain the sign back
9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;qcoeff_ptr
9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm7
10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rdi+8],    mm7
10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(3) ;dequant_ptr
10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rax+8]
10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          mm7,        mm6
10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(7) ;dqcoeff_ptr
10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rax+8],    mm7
11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber                ; next 8
11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        [rsi+16]
11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(1) ;zbin_ptr
11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rax+16]
11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm4
11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           mm4,        15
12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4         ; abs
12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        mm7
12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpgtw         mm5,        mm6
12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pandn           mm5,        mm6
12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm5
12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rdx+16]
13190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rcx+16]
13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm6
13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmulhuw         mm7,        mm5
13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4;gain the sign back
13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;qcoeff_ptr
14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm7
14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rdi+16],   mm7
14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(3) ;dequant_ptr
14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rax+16]
14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          mm7,        mm6
14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(7) ;dqcoeff_ptr
14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rax+16],   mm7
15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber                ; next 8
15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm4,        [rsi+24]
15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(1) ;zbin_ptr
15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rax+24]
15890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
15990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm4
16090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psraw           mm4,        15
16190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
16390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4         ; abs
16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        mm7
16690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpgtw         mm5,        mm6
16790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
16890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pandn           mm5,        mm6
16990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm7,        mm5
17090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        [rdx+24]
17290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rcx+24]
17390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddw           mm7,        mm6
17590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmulhuw         mm7,        mm5
17690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
17790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm4
17890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psubw           mm7,        mm4;gain the sign back
17990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(2) ;qcoeff_ptr
18190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        mm7
18390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rdi+24],   mm7
18490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(3) ;dequant_ptr
18690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm6,        [rax+24]
18790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
18890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmullw          mm7,        mm6
18990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rax,        arg(7) ;dqcoeff_ptr
19090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            [rax+24],   mm7
19290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rdi,        arg(4) ;scan_mask
19690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        mov             rsi,        arg(2) ;qcoeff_ptr
19790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
19890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm5,        mm5
19990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm7,        mm7
20090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        [rsi]
20290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        [rsi+8]
20390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rdi]
20590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        [rdi+8];
20690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
20790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm0,        mm7
20890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm1,        mm7
20990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm6,        mm6
21190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm0,        mm6
21290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm1,        mm6
21490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlw           mm0,        15
21590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlw           mm1,        15
21790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm0,        mm2
21890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
21990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm1,        mm3
22090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm5,        mm0
22190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm5,        mm1
22390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        [rsi+16]
22590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm1,        [rsi+24]
22690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
22790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm2,        [rdi+16]
22890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm3,        [rdi+24];
22990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm0,        mm7
23190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm1,        mm7
23290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pcmpeqw         mm6,        mm6
23490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm0,        mm6
23590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pxor            mm1,        mm6
23790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlw           mm0,        15
23890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
23990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlw           mm1,        15
24090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm0,        mm2
24190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        pmaddwd         mm1,        mm3
24390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm5,        mm0
24490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm5,        mm1
24690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        movq            mm0,        mm5
24790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
24890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        psrlq           mm5,        32
24990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        paddd           mm0,        mm5
25090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ; eob adjustment begins here
252538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        movq            rcx,        mm0
25390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rcx,        0xffff
25490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        xor             rdx,        rdx
25690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sub             rdx,        rcx ; rdx=-rcx
25790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
25890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        bsr             rax,        rcx
25990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        inc             rax
26090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
26190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        sar             rdx,        31
26290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        and             rax,        rdx
26390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ; Substitute the sse assembly for the old mmx mixed assembly/C. The
26490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ; following is kept as reference
265538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber        ;    movq            rcx,        mm0
26690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;    bsr             rax,        rcx
26790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;
26890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;    mov             eob,        rax
26990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;    mov             eee,        rcx
27090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;
27190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;if(eee==0)
27290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;{
27390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;    eob=-1;
27490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;}
27590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;else if(eee<0)
27690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;{
27790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;    eob=15;
27890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;}
27990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber        ;d->eob = eob+1;
28090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber
28190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ; begin epilog
28290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rdi
28390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop rsi
28490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    UNSHADOW_ARGS
28590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    pop         rbp
28690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber    ret
287