190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 2f71323e297a928af368937089d3ed71239786f86Andreas Huber; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 4f71323e297a928af368937089d3ed71239786f86Andreas Huber; Use of this source code is governed by a BSD-style license 5f71323e297a928af368937089d3ed71239786f86Andreas Huber; that can be found in the LICENSE file in the root of the source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber; tree. An additional intellectual property rights grant can be found 7f71323e297a928af368937089d3ed71239786f86Andreas Huber; in the file PATENTS. All contributing project authors may 8f71323e297a928af368937089d3ed71239786f86Andreas Huber; be found in the AUTHORS file in the root of the source tree. 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber; 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber%include "vpx_ports/x86_abi_support.asm" 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14f71323e297a928af368937089d3ed71239786f86Andreas Huber;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) 15f71323e297a928af368937089d3ed71239786f86Andreas Huberglobal sym(vp8_short_fdct4x4_sse2) 16f71323e297a928af368937089d3ed71239786f86Andreas Hubersym(vp8_short_fdct4x4_sse2): 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber push rbp 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber mov rbp, rsp 19f71323e297a928af368937089d3ed71239786f86Andreas Huber SHADOW_ARGS_TO_STACK 3 20f71323e297a928af368937089d3ed71239786f86Andreas Huber;; SAVE_XMM 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber GET_GOT rbx 22f71323e297a928af368937089d3ed71239786f86Andreas Huber push rsi 23f71323e297a928af368937089d3ed71239786f86Andreas Huber push rdi 2490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; end prolog 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 26f71323e297a928af368937089d3ed71239786f86Andreas Huber mov rsi, arg(0) 27f71323e297a928af368937089d3ed71239786f86Andreas Huber movsxd rax, DWORD PTR arg(2) 28f71323e297a928af368937089d3ed71239786f86Andreas Huber lea rdi, [rsi + rax*2] 29f71323e297a928af368937089d3ed71239786f86Andreas Huber 30f71323e297a928af368937089d3ed71239786f86Andreas Huber movq xmm0, MMWORD PTR[rsi ] ;03 02 01 00 31f71323e297a928af368937089d3ed71239786f86Andreas Huber movq xmm2, MMWORD PTR[rsi + rax] ;13 12 11 10 32f71323e297a928af368937089d3ed71239786f86Andreas Huber movq xmm1, MMWORD PTR[rsi + rax*2] ;23 22 21 20 33f71323e297a928af368937089d3ed71239786f86Andreas Huber movq xmm3, MMWORD PTR[rdi + rax] ;33 32 31 30 34f71323e297a928af368937089d3ed71239786f86Andreas Huber 35f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 36f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 37f71323e297a928af368937089d3ed71239786f86Andreas Huber 38f71323e297a928af368937089d3ed71239786f86Andreas Huber mov rdi, arg(1) 39f71323e297a928af368937089d3ed71239786f86Andreas Huber 40f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm2, xmm0 41f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 42f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 43f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm1, xmm0 44f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 45f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx 46f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx 47f71323e297a928af368937089d3ed71239786f86Andreas Huber 48f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 49f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm3, xmm0 50f71323e297a928af368937089d3ed71239786f86Andreas Huber paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 51f71323e297a928af368937089d3ed71239786f86Andreas Huber psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 52f71323e297a928af368937089d3ed71239786f86Andreas Huber psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 53f71323e297a928af368937089d3ed71239786f86Andreas Huber psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 54f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm1, xmm0 5576e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 5676e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 57f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm4, xmm3 5876e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 5976e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 60f71323e297a928af368937089d3ed71239786f86Andreas Huber 6176e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] 6276e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] 63f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 64f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 65f71323e297a928af368937089d3ed71239786f86Andreas Huber 66f71323e297a928af368937089d3ed71239786f86Andreas Huber packssdw xmm0, xmm1 ;op[2] op[0] 67f71323e297a928af368937089d3ed71239786f86Andreas Huber packssdw xmm3, xmm4 ;op[3] op[1] 68f71323e297a928af368937089d3ed71239786f86Andreas Huber ; 23 22 21 20 03 02 01 00 69f71323e297a928af368937089d3ed71239786f86Andreas Huber ; 70f71323e297a928af368937089d3ed71239786f86Andreas Huber ; 33 32 31 30 13 12 11 10 71f71323e297a928af368937089d3ed71239786f86Andreas Huber ; 72f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm2, xmm0 73f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 74f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 75f71323e297a928af368937089d3ed71239786f86Andreas Huber 76f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm3, xmm0 77f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 78f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 79f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm2, xmm0 80f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 81f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 82f71323e297a928af368937089d3ed71239786f86Andreas Huber 8376e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] 84f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufd xmm2, xmm2, 04eh 85f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm3, xmm0 86f71323e297a928af368937089d3ed71239786f86Andreas Huber paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 87f71323e297a928af368937089d3ed71239786f86Andreas Huber psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 88f71323e297a928af368937089d3ed71239786f86Andreas Huber 89f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 90f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm2, xmm3 ;save d1 for compare 91f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 92f71323e297a928af368937089d3ed71239786f86Andreas Huber pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 93f71323e297a928af368937089d3ed71239786f86Andreas Huber pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 94f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 95f71323e297a928af368937089d3ed71239786f86Andreas Huber pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 96f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm1, xmm0 9776e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 9876e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 99f71323e297a928af368937089d3ed71239786f86Andreas Huber 100f71323e297a928af368937089d3ed71239786f86Andreas Huber pxor xmm4, xmm4 ;zero out for compare 101f71323e297a928af368937089d3ed71239786f86Andreas Huber paddd xmm0, xmm5 102f71323e297a928af368937089d3ed71239786f86Andreas Huber paddd xmm1, xmm5 103f71323e297a928af368937089d3ed71239786f86Andreas Huber pcmpeqw xmm2, xmm4 104f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm0, 4 ;(a1 + b1 + 7)>>4 105f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm1, 4 ;(a1 - b1 + 7)>>4 10676e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, 10776e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber ;and keep bit 0 of lower 108f71323e297a928af368937089d3ed71239786f86Andreas Huber 109f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm4, xmm3 11076e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 11176e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 11276e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] 11376e0247ec867fcc232fc79f21e9bf85d3c3a5a3fAndreas Huber paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] 114f71323e297a928af368937089d3ed71239786f86Andreas Huber packssdw xmm0, xmm1 ;op[8] op[0] 115f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 116f71323e297a928af368937089d3ed71239786f86Andreas Huber psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 117f71323e297a928af368937089d3ed71239786f86Andreas Huber 118f71323e297a928af368937089d3ed71239786f86Andreas Huber packssdw xmm3, xmm4 ;op[12] op[4] 119f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa xmm1, xmm0 120f71323e297a928af368937089d3ed71239786f86Andreas Huber paddw xmm3, xmm2 ;op[4] += (d1!=0) 121f71323e297a928af368937089d3ed71239786f86Andreas Huber punpcklqdq xmm0, xmm3 ;op[4] op[0] 122f71323e297a928af368937089d3ed71239786f86Andreas Huber punpckhqdq xmm1, xmm3 ;op[12] op[8] 123f71323e297a928af368937089d3ed71239786f86Andreas Huber 124f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa XMMWORD PTR[rdi + 0], xmm0 125f71323e297a928af368937089d3ed71239786f86Andreas Huber movdqa XMMWORD PTR[rdi + 16], xmm1 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ; begin epilog 128f71323e297a928af368937089d3ed71239786f86Andreas Huber pop rdi 129f71323e297a928af368937089d3ed71239786f86Andreas Huber pop rsi 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber RESTORE_GOT 131f71323e297a928af368937089d3ed71239786f86Andreas Huber;; RESTORE_XMM 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber UNSHADOW_ARGS 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber pop rbp 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ret 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSECTION_RODATA 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 138f71323e297a928af368937089d3ed71239786f86Andreas Huber_5352_2217: 139f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 5352 140f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 141f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 5352 142f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 143f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 5352 144f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 145f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 5352 146f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 148f71323e297a928af368937089d3ed71239786f86Andreas Huber_2217_neg5352: 149f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 150f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -5352 151f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 152f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -5352 153f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 154f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -5352 155f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 2217 156f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -5352 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberalign 16 158f71323e297a928af368937089d3ed71239786f86Andreas Huber_mult_add: 159f71323e297a928af368937089d3ed71239786f86Andreas Huber times 8 dw 1 160f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 161f71323e297a928af368937089d3ed71239786f86Andreas Huber_cmp_mask: 162f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dw 1 163f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dw 0 16490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 165f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 166f71323e297a928af368937089d3ed71239786f86Andreas Huber_mult_sub: 167f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 1 168f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -1 169f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 1 170f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -1 171f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 1 172f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -1 173f71323e297a928af368937089d3ed71239786f86Andreas Huber dw 1 174f71323e297a928af368937089d3ed71239786f86Andreas Huber dw -1 175f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 176f71323e297a928af368937089d3ed71239786f86Andreas Huber_7: 177f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dd 7 178f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 179f71323e297a928af368937089d3ed71239786f86Andreas Huber_14500: 180f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dd 14500 181f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 182f71323e297a928af368937089d3ed71239786f86Andreas Huber_7500: 183f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dd 7500 184f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 185f71323e297a928af368937089d3ed71239786f86Andreas Huber_12000: 186f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dd 12000 187f71323e297a928af368937089d3ed71239786f86Andreas Huberalign 16 188f71323e297a928af368937089d3ed71239786f86Andreas Huber_51000: 189f71323e297a928af368937089d3ed71239786f86Andreas Huber times 4 dd 51000 190