14e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Intel SIMD SSE implementation of Viterbi ACS butterflies 24e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi for 256-state (k=9) convolutional code 34e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi Copyright 2004 Phil Karn, KA9Q 44e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi This code may be used under the terms of the GNU Lesser General Public License (LGPL) 54e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 64e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi void update_viterbi29_blk_sse(struct v29 *vp,unsigned char syms[],int nbits); 74e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi*/ 84e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # SSE (64-bit integer SIMD) version 94e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # Requires Pentium III or better 104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # These are offsets into struct v29, defined in viterbi29.h 114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .set DP,512 124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .set OLDMETRICS,516 134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .set NEWMETRICS,520 144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .text 154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .global update_viterbi29_blk_sse,Branchtab29_sse 164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .type update_viterbi29_blk_sse,@function 174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .align 16 184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiupdate_viterbi29_blk_sse: 204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pushl %ebp 214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %esp,%ebp 224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pushl %esi 234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pushl %edi 244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pushl %edx 254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pushl %ebx 264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl 8(%ebp),%edx # edx = vp 284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi testl %edx,%edx 294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi jnz 0f 304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl -1,%eax 314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi jmp err 324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi0: movl OLDMETRICS(%edx),%esi # esi -> old metrics 334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl NEWMETRICS(%edx),%edi # edi -> new metrics 344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl DP(%edx),%edx # edx -> decisions 354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi1: movl 16(%ebp),%eax # eax = nbits 374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decl %eax 384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi jl 2f # passed zero, we're done 394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %eax,16(%ebp) 404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %eax,%eax 424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl 12(%ebp),%ebx # ebx = syms 434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movb (%ebx),%al 444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movd %eax,%mm6 # mm6[0] = first symbol 454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movb 1(%ebx),%al 464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movd %eax,%mm5 # mm5[0] = second symbol 474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi addl $2,%ebx 484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,12(%ebp) 494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpcklbw %mm6,%mm6 # mm6[1] = mm6[0] 514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpcklbw %mm5,%mm5 524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq thirtyones,%mm7 544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pshufw $0,%mm6,%mm6 # copy low word to upper 3 554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pshufw $0,%mm5,%mm5 564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # mm6 now contains first symbol in each byte, mm5 the second 574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # each invocation of this macro does 8 butterflies in parallel 594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .MACRO butterfly GROUP 604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # compute branch metrics 614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq Branchtab29_sse+(8*\GROUP),%mm4 624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq Branchtab29_sse+128+(8*\GROUP),%mm3 634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pxor %mm6,%mm4 644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pxor %mm5,%mm3 654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pavgb %mm3,%mm4 # mm4 contains branch metrics 664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi psrlw $3,%mm4 674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pand %mm7,%mm4 684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0 704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq ((8*\GROUP)+128)(%esi),%mm3 # Incoming path metric, high bit = 1 714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,%mm2 724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm3,%mm1 734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi paddusb %mm4,%mm0 744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi paddusb %mm4,%mm3 754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # invert branch metrics. This works only because they're 5 bits 774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pxor %mm7,%mm4 784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi paddusb %mm4,%mm1 804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi paddusb %mm4,%mm2 814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # Find survivors, leave in mm0,2 834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub %mm1,%mm0 844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub %mm3,%mm2 854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # get decisions, leave in mm1,3 864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pcmpeqb %mm0,%mm1 874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pcmpeqb %mm2,%mm3 884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # interleave and store new branch metrics in mm0,2 904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,%mm4 914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpckhbw %mm2,%mm0 # interleave second 8 new metrics 924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpcklbw %mm2,%mm4 # interleave first 8 new metrics 934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,(16*\GROUP+8)(%edi) 944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm4,(16*\GROUP)(%edi) 954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # interleave decisions, accumulate into %ebx 974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm1,%mm4 984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpckhbw %mm3,%mm1 994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpcklbw %mm3,%mm4 1004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # Due to an error in the Intel instruction set ref (the register 1014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # fields are swapped), gas assembles pmovmskb incorrectly 1024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html 1034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax 1044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi shll $((16*\GROUP+8)&31),%eax 1054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi orl %eax,%ebx 1064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax 1074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi shll $((16*\GROUP)&31),%eax 1084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi orl %eax,%ebx 1094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .endm 1104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # invoke macro 16 times for a total of 128 butterflies 1124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx # clear decisions 1134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=0 1144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=1 1154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,(%edx) # stash first 32 decisions 1164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx 1174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=2 1184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=3 1194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,4(%edx) # stash second 32 decisions 1204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx # clear decisions 1214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=4 1224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=5 1234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,8(%edx) # stash first 32 decisions 1244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx 1254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=6 1264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=7 1274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,12(%edx) # stash second 32 decisions 1284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx # clear decisions 1294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=8 1304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=9 1314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,16(%edx) # stash first 32 decisions 1324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx 1334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=10 1344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=11 1354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,20(%edx) # stash second 32 decisions 1364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx # clear decisions 1374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=12 1384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=13 1394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,24(%edx) # stash first 32 decisions 1404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %ebx,%ebx 1414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=14 1424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi butterfly GROUP=15 1434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %ebx,28(%edx) # stash second 32 decisions 1444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi addl $32,%edx # bump decision pointer 1464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # see if we have to normalize 1484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl (%edi),%eax # extract first output metric 1494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi andl $255,%eax 1504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi cmp $50,%eax # is it greater than 50? 1514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl $0,%eax 1524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi jle done # No, no need to normalize 1534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # Normalize by finding smallest metric and subtracting it 1554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # from all metrics 1564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq (%edi),%mm0 1574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 8(%edi),%mm0 1584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 16(%edi),%mm0 1594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 24(%edi),%mm0 1604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 32(%edi),%mm0 1614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 40(%edi),%mm0 1624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 48(%edi),%mm0 1634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 56(%edi),%mm0 1644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 64(%edi),%mm0 1654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 72(%edi),%mm0 1664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 80(%edi),%mm0 1674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 88(%edi),%mm0 1684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 96(%edi),%mm0 1694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 104(%edi),%mm0 1704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 112(%edi),%mm0 1714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 120(%edi),%mm0 1724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 128(%edi),%mm0 1734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 136(%edi),%mm0 1744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 144(%edi),%mm0 1754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 152(%edi),%mm0 1764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 160(%edi),%mm0 1774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 168(%edi),%mm0 1784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 176(%edi),%mm0 1794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 184(%edi),%mm0 1804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 192(%edi),%mm0 1814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 200(%edi),%mm0 1824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 208(%edi),%mm0 1834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 216(%edi),%mm0 1844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 224(%edi),%mm0 1854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 232(%edi),%mm0 1864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 240(%edi),%mm0 1874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub 248(%edi),%mm0 1884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # mm0 contains 8 smallest metrics 1894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # crunch down to single lowest metric 1904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,%mm1 1914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi psrlq $32,%mm0 1924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub %mm1,%mm0 1934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,%mm1 1944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi psrlq $16,%mm0 1954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub %mm1,%mm0 1964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm0,%mm1 1974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi psrlq $8,%mm0 1984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pminub %mm1,%mm0 1994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq 8(%edi),%mm1 # reload 2004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi punpcklbw %mm0,%mm0 # expand to all 8 bytes 2014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi pshufw $0,%mm0,%mm0 2024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # mm0 now contains lowest metric in all 8 bytes 2044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # subtract it from every output metric 2054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # Trashes %mm7 2064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .macro PSUBUSBM REG,MEM 2074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq \MEM,%mm7 2084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi psubusb \REG,%mm7 2094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movq %mm7,\MEM 2104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .endm 2114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,(%edi) 2134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,8(%edi) 2144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,16(%edi) 2154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,24(%edi) 2164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,32(%edi) 2174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,40(%edi) 2184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,48(%edi) 2194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,56(%edi) 2204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,64(%edi) 2214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,72(%edi) 2224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,80(%edi) 2234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,88(%edi) 2244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,96(%edi) 2254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,104(%edi) 2264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,112(%edi) 2274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,120(%edi) 2284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,128(%edi) 2294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,136(%edi) 2304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,144(%edi) 2314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,152(%edi) 2324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,160(%edi) 2334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,168(%edi) 2344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,176(%edi) 2354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,184(%edi) 2364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,192(%edi) 2374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,200(%edi) 2384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,208(%edi) 2394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,216(%edi) 2404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,224(%edi) 2414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,232(%edi) 2424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,240(%edi) 2434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi PSUBUSBM %mm0,248(%edi) 2444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yidone: 2464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # swap metrics 2474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %esi,%eax 2484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %edi,%esi 2494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %eax,%edi 2504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi jmp 1b 2514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi2: emms 2534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl 8(%ebp),%ebx # ebx = vp 2544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi # stash metric pointers 2554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %esi,OLDMETRICS(%ebx) 2564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %edi,NEWMETRICS(%ebx) 2574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi movl %edx,DP(%ebx) # stash incremented value of vp->dp 2584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi xorl %eax,%eax 2594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yierr: popl %ebx 2604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi popl %edx 2614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi popl %edi 2624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi popl %esi 2634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi popl %ebp 2644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi ret 2654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .data 2674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .align 8 2684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yithirtyones: 2694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi .byte 31,31,31,31,31,31,31,31 2704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 272