11b362b15af34006e6a11974088a46d42b903418eJohann; 21b362b15af34006e6a11974088a46d42b903418eJohann; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 31b362b15af34006e6a11974088a46d42b903418eJohann; 41b362b15af34006e6a11974088a46d42b903418eJohann; Use of this source code is governed by a BSD-style license 51b362b15af34006e6a11974088a46d42b903418eJohann; that can be found in the LICENSE file in the root of the source 61b362b15af34006e6a11974088a46d42b903418eJohann; tree. An additional intellectual property rights grant can be found 71b362b15af34006e6a11974088a46d42b903418eJohann; in the file PATENTS. All contributing project authors may 81b362b15af34006e6a11974088a46d42b903418eJohann; be found in the AUTHORS file in the root of the source tree. 91b362b15af34006e6a11974088a46d42b903418eJohann; 101b362b15af34006e6a11974088a46d42b903418eJohann 111b362b15af34006e6a11974088a46d42b903418eJohann 121b362b15af34006e6a11974088a46d42b903418eJohann%include "vpx_ports/x86_abi_support.asm" 131b362b15af34006e6a11974088a46d42b903418eJohann 141b362b15af34006e6a11974088a46d42b903418eJohann; /**************************************************************************** 151b362b15af34006e6a11974088a46d42b903418eJohann; * Notes: 161b362b15af34006e6a11974088a46d42b903418eJohann; * 171b362b15af34006e6a11974088a46d42b903418eJohann; * This implementation makes use of 16 bit fixed point version of two multiply 181b362b15af34006e6a11974088a46d42b903418eJohann; * constants: 191b362b15af34006e6a11974088a46d42b903418eJohann; * 1. sqrt(2) * cos (pi/8) 201b362b15af34006e6a11974088a46d42b903418eJohann; * 2. sqrt(2) * sin (pi/8) 211b362b15af34006e6a11974088a46d42b903418eJohann; * Because the first constant is bigger than 1, to maintain the same 16 bit 221b362b15af34006e6a11974088a46d42b903418eJohann; * fixed point precision as the second one, we use a trick of 231b362b15af34006e6a11974088a46d42b903418eJohann; * x * a = x + x*(a-1) 241b362b15af34006e6a11974088a46d42b903418eJohann; * so 251b362b15af34006e6a11974088a46d42b903418eJohann; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). 261b362b15af34006e6a11974088a46d42b903418eJohann; * 271b362b15af34006e6a11974088a46d42b903418eJohann; * For the second constant, because of the 16bit version is 35468, which 281b362b15af34006e6a11974088a46d42b903418eJohann; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative 291b362b15af34006e6a11974088a46d42b903418eJohann; * number. 301b362b15af34006e6a11974088a46d42b903418eJohann; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x 311b362b15af34006e6a11974088a46d42b903418eJohann; * 321b362b15af34006e6a11974088a46d42b903418eJohann; **************************************************************************/ 331b362b15af34006e6a11974088a46d42b903418eJohann 341b362b15af34006e6a11974088a46d42b903418eJohann 351b362b15af34006e6a11974088a46d42b903418eJohann;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, 361b362b15af34006e6a11974088a46d42b903418eJohann;int pitch, unsigned char *dest,int stride) 371b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_short_idct4x4llm_mmx) PRIVATE 381b362b15af34006e6a11974088a46d42b903418eJohannsym(vp8_short_idct4x4llm_mmx): 391b362b15af34006e6a11974088a46d42b903418eJohann push rbp 401b362b15af34006e6a11974088a46d42b903418eJohann mov rbp, rsp 411b362b15af34006e6a11974088a46d42b903418eJohann SHADOW_ARGS_TO_STACK 5 421b362b15af34006e6a11974088a46d42b903418eJohann GET_GOT rbx 431b362b15af34006e6a11974088a46d42b903418eJohann push rsi 441b362b15af34006e6a11974088a46d42b903418eJohann push rdi 451b362b15af34006e6a11974088a46d42b903418eJohann ; end prolog 461b362b15af34006e6a11974088a46d42b903418eJohann 471b362b15af34006e6a11974088a46d42b903418eJohann mov rax, arg(0) ;input 481b362b15af34006e6a11974088a46d42b903418eJohann mov rsi, arg(1) ;pred 491b362b15af34006e6a11974088a46d42b903418eJohann 501b362b15af34006e6a11974088a46d42b903418eJohann movq mm0, [rax ] 511b362b15af34006e6a11974088a46d42b903418eJohann movq mm1, [rax+ 8] 521b362b15af34006e6a11974088a46d42b903418eJohann movq mm2, [rax+16] 531b362b15af34006e6a11974088a46d42b903418eJohann movq mm3, [rax+24] 541b362b15af34006e6a11974088a46d42b903418eJohann 551b362b15af34006e6a11974088a46d42b903418eJohann%if 0 561b362b15af34006e6a11974088a46d42b903418eJohann pxor mm7, mm7 571b362b15af34006e6a11974088a46d42b903418eJohann movq [rax], mm7 581b362b15af34006e6a11974088a46d42b903418eJohann movq [rax+8], mm7 591b362b15af34006e6a11974088a46d42b903418eJohann movq [rax+16],mm7 601b362b15af34006e6a11974088a46d42b903418eJohann movq [rax+24],mm7 611b362b15af34006e6a11974088a46d42b903418eJohann%endif 621b362b15af34006e6a11974088a46d42b903418eJohann movsxd rax, dword ptr arg(2) ;pitch 631b362b15af34006e6a11974088a46d42b903418eJohann mov rdx, arg(3) ;dest 641b362b15af34006e6a11974088a46d42b903418eJohann movsxd rdi, dword ptr arg(4) ;stride 651b362b15af34006e6a11974088a46d42b903418eJohann 661b362b15af34006e6a11974088a46d42b903418eJohann 671b362b15af34006e6a11974088a46d42b903418eJohann psubw mm0, mm2 ; b1= 0-2 681b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm2 ; 691b362b15af34006e6a11974088a46d42b903418eJohann 701b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm1 711b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm0 ; a1 =0+2 721b362b15af34006e6a11974088a46d42b903418eJohann 731b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm5, [GLOBAL(x_s1sqr2)]; 741b362b15af34006e6a11974088a46d42b903418eJohann paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 751b362b15af34006e6a11974088a46d42b903418eJohann 761b362b15af34006e6a11974088a46d42b903418eJohann movq mm7, mm3 ; 771b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; 781b362b15af34006e6a11974088a46d42b903418eJohann 791b362b15af34006e6a11974088a46d42b903418eJohann paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 801b362b15af34006e6a11974088a46d42b903418eJohann psubw mm7, mm5 ; c1 811b362b15af34006e6a11974088a46d42b903418eJohann 821b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm1 831b362b15af34006e6a11974088a46d42b903418eJohann movq mm4, mm3 841b362b15af34006e6a11974088a46d42b903418eJohann 851b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 861b362b15af34006e6a11974088a46d42b903418eJohann paddw mm5, mm1 871b362b15af34006e6a11974088a46d42b903418eJohann 881b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm3, [GLOBAL(x_s1sqr2)] 891b362b15af34006e6a11974088a46d42b903418eJohann paddw mm3, mm4 901b362b15af34006e6a11974088a46d42b903418eJohann 911b362b15af34006e6a11974088a46d42b903418eJohann paddw mm3, mm5 ; d1 921b362b15af34006e6a11974088a46d42b903418eJohann movq mm6, mm2 ; a1 931b362b15af34006e6a11974088a46d42b903418eJohann 941b362b15af34006e6a11974088a46d42b903418eJohann movq mm4, mm0 ; b1 951b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm3 ;0 961b362b15af34006e6a11974088a46d42b903418eJohann 971b362b15af34006e6a11974088a46d42b903418eJohann paddw mm4, mm7 ;1 981b362b15af34006e6a11974088a46d42b903418eJohann psubw mm0, mm7 ;2 991b362b15af34006e6a11974088a46d42b903418eJohann 1001b362b15af34006e6a11974088a46d42b903418eJohann psubw mm6, mm3 ;3 1011b362b15af34006e6a11974088a46d42b903418eJohann 1021b362b15af34006e6a11974088a46d42b903418eJohann movq mm1, mm2 ; 03 02 01 00 1031b362b15af34006e6a11974088a46d42b903418eJohann movq mm3, mm4 ; 23 22 21 20 1041b362b15af34006e6a11974088a46d42b903418eJohann 1051b362b15af34006e6a11974088a46d42b903418eJohann punpcklwd mm1, mm0 ; 11 01 10 00 1061b362b15af34006e6a11974088a46d42b903418eJohann punpckhwd mm2, mm0 ; 13 03 12 02 1071b362b15af34006e6a11974088a46d42b903418eJohann 1081b362b15af34006e6a11974088a46d42b903418eJohann punpcklwd mm3, mm6 ; 31 21 30 20 1091b362b15af34006e6a11974088a46d42b903418eJohann punpckhwd mm4, mm6 ; 33 23 32 22 1101b362b15af34006e6a11974088a46d42b903418eJohann 1111b362b15af34006e6a11974088a46d42b903418eJohann movq mm0, mm1 ; 11 01 10 00 1121b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm2 ; 13 03 12 02 1131b362b15af34006e6a11974088a46d42b903418eJohann 1141b362b15af34006e6a11974088a46d42b903418eJohann punpckldq mm0, mm3 ; 30 20 10 00 1151b362b15af34006e6a11974088a46d42b903418eJohann punpckhdq mm1, mm3 ; 31 21 11 01 1161b362b15af34006e6a11974088a46d42b903418eJohann 1171b362b15af34006e6a11974088a46d42b903418eJohann punpckldq mm2, mm4 ; 32 22 12 02 1181b362b15af34006e6a11974088a46d42b903418eJohann punpckhdq mm5, mm4 ; 33 23 13 03 1191b362b15af34006e6a11974088a46d42b903418eJohann 1201b362b15af34006e6a11974088a46d42b903418eJohann movq mm3, mm5 ; 33 23 13 03 1211b362b15af34006e6a11974088a46d42b903418eJohann 1221b362b15af34006e6a11974088a46d42b903418eJohann psubw mm0, mm2 ; b1= 0-2 1231b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm2 ; 1241b362b15af34006e6a11974088a46d42b903418eJohann 1251b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm1 1261b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm0 ; a1 =0+2 1271b362b15af34006e6a11974088a46d42b903418eJohann 1281b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm5, [GLOBAL(x_s1sqr2)]; 1291b362b15af34006e6a11974088a46d42b903418eJohann paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) 1301b362b15af34006e6a11974088a46d42b903418eJohann 1311b362b15af34006e6a11974088a46d42b903418eJohann movq mm7, mm3 ; 1321b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; 1331b362b15af34006e6a11974088a46d42b903418eJohann 1341b362b15af34006e6a11974088a46d42b903418eJohann paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) 1351b362b15af34006e6a11974088a46d42b903418eJohann psubw mm7, mm5 ; c1 1361b362b15af34006e6a11974088a46d42b903418eJohann 1371b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm1 1381b362b15af34006e6a11974088a46d42b903418eJohann movq mm4, mm3 1391b362b15af34006e6a11974088a46d42b903418eJohann 1401b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm5, [GLOBAL(x_c1sqr2less1)] 1411b362b15af34006e6a11974088a46d42b903418eJohann paddw mm5, mm1 1421b362b15af34006e6a11974088a46d42b903418eJohann 1431b362b15af34006e6a11974088a46d42b903418eJohann pmulhw mm3, [GLOBAL(x_s1sqr2)] 1441b362b15af34006e6a11974088a46d42b903418eJohann paddw mm3, mm4 1451b362b15af34006e6a11974088a46d42b903418eJohann 1461b362b15af34006e6a11974088a46d42b903418eJohann paddw mm3, mm5 ; d1 1471b362b15af34006e6a11974088a46d42b903418eJohann paddw mm0, [GLOBAL(fours)] 1481b362b15af34006e6a11974088a46d42b903418eJohann 1491b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, [GLOBAL(fours)] 1501b362b15af34006e6a11974088a46d42b903418eJohann movq mm6, mm2 ; a1 1511b362b15af34006e6a11974088a46d42b903418eJohann 1521b362b15af34006e6a11974088a46d42b903418eJohann movq mm4, mm0 ; b1 1531b362b15af34006e6a11974088a46d42b903418eJohann paddw mm2, mm3 ;0 1541b362b15af34006e6a11974088a46d42b903418eJohann 1551b362b15af34006e6a11974088a46d42b903418eJohann paddw mm4, mm7 ;1 1561b362b15af34006e6a11974088a46d42b903418eJohann psubw mm0, mm7 ;2 1571b362b15af34006e6a11974088a46d42b903418eJohann 1581b362b15af34006e6a11974088a46d42b903418eJohann psubw mm6, mm3 ;3 1591b362b15af34006e6a11974088a46d42b903418eJohann psraw mm2, 3 1601b362b15af34006e6a11974088a46d42b903418eJohann 1611b362b15af34006e6a11974088a46d42b903418eJohann psraw mm0, 3 1621b362b15af34006e6a11974088a46d42b903418eJohann psraw mm4, 3 1631b362b15af34006e6a11974088a46d42b903418eJohann 1641b362b15af34006e6a11974088a46d42b903418eJohann psraw mm6, 3 1651b362b15af34006e6a11974088a46d42b903418eJohann 1661b362b15af34006e6a11974088a46d42b903418eJohann movq mm1, mm2 ; 03 02 01 00 1671b362b15af34006e6a11974088a46d42b903418eJohann movq mm3, mm4 ; 23 22 21 20 1681b362b15af34006e6a11974088a46d42b903418eJohann 1691b362b15af34006e6a11974088a46d42b903418eJohann punpcklwd mm1, mm0 ; 11 01 10 00 1701b362b15af34006e6a11974088a46d42b903418eJohann punpckhwd mm2, mm0 ; 13 03 12 02 1711b362b15af34006e6a11974088a46d42b903418eJohann 1721b362b15af34006e6a11974088a46d42b903418eJohann punpcklwd mm3, mm6 ; 31 21 30 20 1731b362b15af34006e6a11974088a46d42b903418eJohann punpckhwd mm4, mm6 ; 33 23 32 22 1741b362b15af34006e6a11974088a46d42b903418eJohann 1751b362b15af34006e6a11974088a46d42b903418eJohann movq mm0, mm1 ; 11 01 10 00 1761b362b15af34006e6a11974088a46d42b903418eJohann movq mm5, mm2 ; 13 03 12 02 1771b362b15af34006e6a11974088a46d42b903418eJohann 1781b362b15af34006e6a11974088a46d42b903418eJohann punpckldq mm0, mm3 ; 30 20 10 00 1791b362b15af34006e6a11974088a46d42b903418eJohann punpckhdq mm1, mm3 ; 31 21 11 01 1801b362b15af34006e6a11974088a46d42b903418eJohann 1811b362b15af34006e6a11974088a46d42b903418eJohann punpckldq mm2, mm4 ; 32 22 12 02 1821b362b15af34006e6a11974088a46d42b903418eJohann punpckhdq mm5, mm4 ; 33 23 13 03 1831b362b15af34006e6a11974088a46d42b903418eJohann 1841b362b15af34006e6a11974088a46d42b903418eJohann pxor mm7, mm7 1851b362b15af34006e6a11974088a46d42b903418eJohann 1861b362b15af34006e6a11974088a46d42b903418eJohann movd mm4, [rsi] 1871b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm4, mm7 1881b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm0, mm4 1891b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm0, mm7 1901b362b15af34006e6a11974088a46d42b903418eJohann movd [rdx], mm0 1911b362b15af34006e6a11974088a46d42b903418eJohann 1921b362b15af34006e6a11974088a46d42b903418eJohann movd mm4, [rsi+rax] 1931b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm4, mm7 1941b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm1, mm4 1951b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm1, mm7 1961b362b15af34006e6a11974088a46d42b903418eJohann movd [rdx+rdi], mm1 1971b362b15af34006e6a11974088a46d42b903418eJohann 1981b362b15af34006e6a11974088a46d42b903418eJohann movd mm4, [rsi+2*rax] 1991b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm4, mm7 2001b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm2, mm4 2011b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm2, mm7 2021b362b15af34006e6a11974088a46d42b903418eJohann movd [rdx+rdi*2], mm2 2031b362b15af34006e6a11974088a46d42b903418eJohann 2041b362b15af34006e6a11974088a46d42b903418eJohann add rdx, rdi 2051b362b15af34006e6a11974088a46d42b903418eJohann add rsi, rax 2061b362b15af34006e6a11974088a46d42b903418eJohann 2071b362b15af34006e6a11974088a46d42b903418eJohann movd mm4, [rsi+2*rax] 2081b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm4, mm7 2091b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm5, mm4 2101b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm5, mm7 2111b362b15af34006e6a11974088a46d42b903418eJohann movd [rdx+rdi*2], mm5 2121b362b15af34006e6a11974088a46d42b903418eJohann 2131b362b15af34006e6a11974088a46d42b903418eJohann ; begin epilog 2141b362b15af34006e6a11974088a46d42b903418eJohann pop rdi 2151b362b15af34006e6a11974088a46d42b903418eJohann pop rsi 2161b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_GOT 2171b362b15af34006e6a11974088a46d42b903418eJohann UNSHADOW_ARGS 2181b362b15af34006e6a11974088a46d42b903418eJohann pop rbp 2191b362b15af34006e6a11974088a46d42b903418eJohann ret 2201b362b15af34006e6a11974088a46d42b903418eJohann 2211b362b15af34006e6a11974088a46d42b903418eJohann;void vp8_dc_only_idct_add_mmx( 2221b362b15af34006e6a11974088a46d42b903418eJohann;short input_dc, 2231b362b15af34006e6a11974088a46d42b903418eJohann;unsigned char *pred_ptr, 2241b362b15af34006e6a11974088a46d42b903418eJohann;int pred_stride, 2251b362b15af34006e6a11974088a46d42b903418eJohann;unsigned char *dst_ptr, 2261b362b15af34006e6a11974088a46d42b903418eJohann;int stride) 2271b362b15af34006e6a11974088a46d42b903418eJohannglobal sym(vp8_dc_only_idct_add_mmx) PRIVATE 2281b362b15af34006e6a11974088a46d42b903418eJohannsym(vp8_dc_only_idct_add_mmx): 2291b362b15af34006e6a11974088a46d42b903418eJohann push rbp 2301b362b15af34006e6a11974088a46d42b903418eJohann mov rbp, rsp 2311b362b15af34006e6a11974088a46d42b903418eJohann SHADOW_ARGS_TO_STACK 5 2321b362b15af34006e6a11974088a46d42b903418eJohann GET_GOT rbx 2331b362b15af34006e6a11974088a46d42b903418eJohann ; end prolog 2341b362b15af34006e6a11974088a46d42b903418eJohann 2351b362b15af34006e6a11974088a46d42b903418eJohann movd mm5, arg(0) ;input_dc 2361b362b15af34006e6a11974088a46d42b903418eJohann mov rax, arg(1) ;pred_ptr 2371b362b15af34006e6a11974088a46d42b903418eJohann movsxd rdx, dword ptr arg(2) ;pred_stride 2381b362b15af34006e6a11974088a46d42b903418eJohann 2391b362b15af34006e6a11974088a46d42b903418eJohann pxor mm0, mm0 2401b362b15af34006e6a11974088a46d42b903418eJohann 2411b362b15af34006e6a11974088a46d42b903418eJohann paddw mm5, [GLOBAL(fours)] 2421b362b15af34006e6a11974088a46d42b903418eJohann lea rcx, [rdx + rdx*2] 2431b362b15af34006e6a11974088a46d42b903418eJohann 2441b362b15af34006e6a11974088a46d42b903418eJohann psraw mm5, 3 2451b362b15af34006e6a11974088a46d42b903418eJohann 2461b362b15af34006e6a11974088a46d42b903418eJohann punpcklwd mm5, mm5 2471b362b15af34006e6a11974088a46d42b903418eJohann 2481b362b15af34006e6a11974088a46d42b903418eJohann punpckldq mm5, mm5 2491b362b15af34006e6a11974088a46d42b903418eJohann 2501b362b15af34006e6a11974088a46d42b903418eJohann movd mm1, [rax] 2511b362b15af34006e6a11974088a46d42b903418eJohann movd mm2, [rax+rdx] 2521b362b15af34006e6a11974088a46d42b903418eJohann movd mm3, [rax+2*rdx] 2531b362b15af34006e6a11974088a46d42b903418eJohann movd mm4, [rax+rcx] 2541b362b15af34006e6a11974088a46d42b903418eJohann 2551b362b15af34006e6a11974088a46d42b903418eJohann mov rax, arg(3) ;d -- destination 2561b362b15af34006e6a11974088a46d42b903418eJohann movsxd rdx, dword ptr arg(4) ;dst_stride 2571b362b15af34006e6a11974088a46d42b903418eJohann 2581b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm1, mm0 2591b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm1, mm5 2601b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm1, mm0 ; pack and unpack to saturate 2611b362b15af34006e6a11974088a46d42b903418eJohann lea rcx, [rdx + rdx*2] 2621b362b15af34006e6a11974088a46d42b903418eJohann 2631b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm2, mm0 2641b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm2, mm5 2651b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm2, mm0 ; pack and unpack to saturate 2661b362b15af34006e6a11974088a46d42b903418eJohann 2671b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm3, mm0 2681b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm3, mm5 2691b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm3, mm0 ; pack and unpack to saturate 2701b362b15af34006e6a11974088a46d42b903418eJohann 2711b362b15af34006e6a11974088a46d42b903418eJohann punpcklbw mm4, mm0 2721b362b15af34006e6a11974088a46d42b903418eJohann paddsw mm4, mm5 2731b362b15af34006e6a11974088a46d42b903418eJohann packuswb mm4, mm0 ; pack and unpack to saturate 2741b362b15af34006e6a11974088a46d42b903418eJohann 2751b362b15af34006e6a11974088a46d42b903418eJohann movd [rax], mm1 2761b362b15af34006e6a11974088a46d42b903418eJohann movd [rax+rdx], mm2 2771b362b15af34006e6a11974088a46d42b903418eJohann movd [rax+2*rdx], mm3 2781b362b15af34006e6a11974088a46d42b903418eJohann movd [rax+rcx], mm4 2791b362b15af34006e6a11974088a46d42b903418eJohann 2801b362b15af34006e6a11974088a46d42b903418eJohann ; begin epilog 2811b362b15af34006e6a11974088a46d42b903418eJohann RESTORE_GOT 2821b362b15af34006e6a11974088a46d42b903418eJohann UNSHADOW_ARGS 2831b362b15af34006e6a11974088a46d42b903418eJohann pop rbp 2841b362b15af34006e6a11974088a46d42b903418eJohann ret 2851b362b15af34006e6a11974088a46d42b903418eJohann 2861b362b15af34006e6a11974088a46d42b903418eJohannSECTION_RODATA 2871b362b15af34006e6a11974088a46d42b903418eJohannalign 16 2881b362b15af34006e6a11974088a46d42b903418eJohannx_s1sqr2: 2891b362b15af34006e6a11974088a46d42b903418eJohann times 4 dw 0x8A8C 2901b362b15af34006e6a11974088a46d42b903418eJohannalign 16 2911b362b15af34006e6a11974088a46d42b903418eJohannx_c1sqr2less1: 2921b362b15af34006e6a11974088a46d42b903418eJohann times 4 dw 0x4E7B 2931b362b15af34006e6a11974088a46d42b903418eJohannalign 16 2941b362b15af34006e6a11974088a46d42b903418eJohannfours: 2951b362b15af34006e6a11974088a46d42b903418eJohann times 4 dw 0x0004 296