19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* mmx.h 29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MultiMedia eXtensions GCC interface library for IA32. 49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall To use this library, simply include this header file 69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and compile with GCC. You MUST have inlining enabled 79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall in order for mmx_ok() to work; this can be done by 89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall simply using -O on the GCC command line. 99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Compiling with -DMMX_TRACE will cause detailed trace 119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall output to be sent to stderr for each mmx operation. 129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This adds lots of code, and obviously slows execution to 139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall a crawl, but can be very useful for debugging. 149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY 169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT 179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY 189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall AND FITNESS FOR ANY PARTICULAR PURPOSE. 199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1997-99 by H. Dietz and R. Fisher 219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Notes: 239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall It appears that the latest gas has the pand problem fixed, therefore 249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall I'll undefine BROKEN_PAND by default. 259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifndef _MMX_H 289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define _MMX_H 299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Warning: at this writing, the version of GAS packaged 329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall with most Linux distributions does not handle the 339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall parallel AND operation mnemonic correctly. If the 349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall symbol BROKEN_PAND is defined, a slower alternative 359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall coding will be used. If execution of mmxtest results 369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall in an illegal instruction fault, define this symbol. 379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef BROKEN_PAND 399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* The type of an value that fits in an MMX register 429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (note that long long constant values MUST be suffixed 439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall by LL and unsigned long long values by ULL, lest 449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall they be truncated by the compiler) 459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halltypedef union { 479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall long long q; /* Quadword (64-bit) value */ 489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned long long uq; /* Unsigned Quadword */ 499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int d[2]; /* 2 Doubleword (32-bit) values */ 509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned int ud[2]; /* 2 Unsigned Doubleword */ 519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall short w[4]; /* 4 Word (16-bit) values */ 529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned short uw[4]; /* 4 Unsigned Word */ 539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall char b[8]; /* 8 Byte (8-bit) values */ 549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned char ub[8]; /* 8 Unsigned Byte */ 559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall float s[2]; /* Single-precision (32-bit) value */ 569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} __attribute__ ((aligned (8))) mmx_t; /* On an 8-byte (64-bit) boundary */ 579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if 0 609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Function to test if multimedia instructions are supported... 619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallinline extern int 639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallmm_support(void) 649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Returns 1 if MMX instructions are supported, 669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3 if Cyrix MMX and Extended MMX instructions are supported 679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5 if AMD MMX and 3DNow! instructions are supported 689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0 if hardware does not support any of these 699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall register int rval = 0; 719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ( 739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* See if CPUID instruction is supported ... */ 749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* ... Get copies of EFLAGS into eax and ecx */ 759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pushf\n\t" 769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "popl %%eax\n\t" 779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl %%eax, %%ecx\n\t" 789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* ... Toggle the ID bit in one copy and store */ 809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* to the EFLAGS reg */ 819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "xorl $0x200000, %%eax\n\t" 829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "push %%eax\n\t" 839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "popf\n\t" 849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* ... Get the (hopefully modified) EFLAGS */ 869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pushf\n\t" 879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "popl %%eax\n\t" 889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* ... Compare and test result */ 909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "xorl %%eax, %%ecx\n\t" 919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x200000, %%ecx\n\t" 929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jz NotSupported1\n\t" /* CPUID not supported */ 939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Get standard CPUID information, and 969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall go to a specific vendor section */ 979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $0, %%eax\n\t" 989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cpuid\n\t" 999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Check for Intel */ 1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x756e6547, %%ebx\n\t" 1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryAMD\n\t" 1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x49656e69, %%edx\n\t" 1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryAMD\n\t" 1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x6c65746e, %%ecx\n" 1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryAMD\n\t" 1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Intel\n\t" 1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Check for AMD */ 1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nTryAMD:\n\t" 1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x68747541, %%ebx\n\t" 1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryCyrix\n\t" 1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x69746e65, %%edx\n\t" 1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryCyrix\n\t" 1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x444d4163, %%ecx\n" 1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne TryCyrix\n\t" 1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp AMD\n\t" 1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Check for Cyrix */ 1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nTryCyrix:\n\t" 1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x69727943, %%ebx\n\t" 1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne NotSupported2\n\t" 1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x736e4978, %%edx\n\t" 1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne NotSupported3\n\t" 1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x64616574, %%ecx\n\t" 1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne NotSupported4\n\t" 1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Drop through to Cyrix... */ 1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Cyrix Section */ 1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* See if extended CPUID level 80000001 is supported */ 1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* The value of CPUID/80000001 for the 6x86MX is undefined 1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall according to the Cyrix CPU Detection Guide (Preliminary 1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Rev. 1.01 table 1), so we'll check the value of eax for 1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CPUID/0 to see if standard CPUID level 2 is supported. 1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall According to the table, the only CPU which supports level 1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2 is also the only one which supports extended CPUID levels. 1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x2, %%eax\n\t" 1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jne MMXtest\n\t" /* Use standard CPUID instead */ 1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Extended CPUID supported (in theory), so get extended 1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall features */ 1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $0x80000001, %%eax\n\t" 1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cpuid\n\t" 1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x00800000, %%eax\n\t" /* Test for MMX */ 1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jz NotSupported5\n\t" /* MMX not supported */ 1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */ 1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jnz EMMXSupported\n\t" 1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $1, %0:\n\n\t" /* MMX Supported */ 1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Return\n\n" 1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "EMMXSupported:\n\t" 1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */ 1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Return\n\t" 1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* AMD Section */ 1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "AMD:\n\t" 1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* See if extended CPUID is supported */ 1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $0x80000000, %%eax\n\t" 1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cpuid\n\t" 1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cmpl $0x80000000, %%eax\n\t" 1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jl MMXtest\n\t" /* Use standard CPUID instead */ 1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Extended CPUID supported, so get extended features */ 1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $0x80000001, %%eax\n\t" 1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cpuid\n\t" 1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x00800000, %%edx\n\t" /* Test for MMX */ 1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jz NotSupported6\n\t" /* MMX not supported */ 1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */ 1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jnz ThreeDNowSupported\n\t" 1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $1, %0:\n\n\t" /* MMX Supported */ 1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Return\n\n" 1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "ThreeDNowSupported:\n\t" 1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */ 1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Return\n\t" 1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Intel Section */ 1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "Intel:\n\t" 1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Check for MMX */ 1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "MMXtest:\n\t" 1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $1, %%eax\n\t" 1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "cpuid\n\t" 1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "testl $0x00800000, %%edx\n\t" /* Test for MMX */ 1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jz NotSupported7\n\t" /* MMX Not supported */ 1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $1, %0:\n\n\t" /* MMX Supported */ 1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "jmp Return\n\t" 1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Nothing supported */ 1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported1:\n\t" 1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $101, %0:\n\n\t" 1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported2:\n\t" 1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $102, %0:\n\n\t" 1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported3:\n\t" 1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $103, %0:\n\n\t" 1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported4:\n\t" 2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $104, %0:\n\n\t" 2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported5:\n\t" 2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $105, %0:\n\n\t" 2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported6:\n\t" 2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $106, %0:\n\n\t" 2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "\nNotSupported7:\n\t" 2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "#movl $107, %0:\n\n\t" 2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movl $0, %0:\n\n\t" 2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "Return:\n\t" 2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=a" (rval) 2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* no input */ 2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "eax", "ebx", "ecx", "edx" 2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Return */ 2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return(rval); 2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Function to test if mmx instructions are supported... 2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallinline extern int 2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallmmx_ok(void) 2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Returns 1 if MMX instructions are supported, 0 otherwise */ 2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return ( mm_support() & 0x1 ); 2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Helper functions for the instruction macros that follow... 2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (note that memory-to-register, m2r, instructions are nearly 2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall as efficient as register-to-register, r2r, instructions; 2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall however, memory-to-memory instructions are really simulated 2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall as a convenience, and are only 1/3 as efficient) 2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_TRACE 2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Include the stuff for printing a trace to stderr... 2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_i2r(op, imm, reg) \ 2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_t mmx_trace; \ 2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.uq = (imm); \ 2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#op "_i2r(" #imm "=0x%08x%08x, ", \ 2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #reg ", %0" \ 2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#reg "=0x%08x%08x) => ", \ 2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %0, %%" #reg \ 2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ \ 2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (imm)); \ 2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #reg ", %0" \ 2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#reg "=0x%08x%08x\n", \ 2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_m2r(op, mem, reg) \ 2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_t mmx_trace; \ 2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (mem); \ 2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#op "_m2r(" #mem "=0x%08x%08x, ", \ 2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #reg ", %0" \ 2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#reg "=0x%08x%08x) => ", \ 2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %0, %%" #reg \ 2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ \ 2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (mem)); \ 2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #reg ", %0" \ 2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#reg "=0x%08x%08x\n", \ 2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_r2m(op, reg, mem) \ 2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_t mmx_trace; \ 2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #reg ", %0" \ 2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#op "_r2m(" #reg "=0x%08x%08x, ", \ 2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (mem); \ 2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#mem "=0x%08x%08x) => ", \ 2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %%" #reg ", %0" \ 2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mem) \ 2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (mem); \ 2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#mem "=0x%08x%08x\n", \ 2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_r2r(op, regs, regd) \ 3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_t mmx_trace; \ 3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #regs ", %0" \ 3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#op "_r2r(" #regs "=0x%08x%08x, ", \ 3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #regd ", %0" \ 3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#regd "=0x%08x%08x) => ", \ 3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ 3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %%" #regd ", %0" \ 3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (mmx_trace) \ 3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ); \ 3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#regd "=0x%08x%08x\n", \ 3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_m2m(op, mems, memd) \ 3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_t mmx_trace; \ 3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (mems); \ 3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#op "_m2m(" #mems "=0x%08x%08x, ", \ 3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (memd); \ 3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#memd "=0x%08x%08x) => ", \ 3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ 3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #op " %1, %%mm0\n\t" \ 3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movq %%mm0, %0" \ 3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (memd) \ 3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (mems)); \ 3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace = (memd); \ 3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf(#memd "=0x%08x%08x\n", \ 3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_trace.d[1], mmx_trace.d[0]); \ 3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* These macros are a lot simpler without the tracing... 3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_i2r(op, imm, reg) \ 3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %0, %%" #reg \ 3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ \ 3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (imm) ) 3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_m2r(op, mem, reg) \ 3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %0, %%" #reg \ 3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ \ 3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "m" (mem)) 3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_r2m(op, reg, mem) \ 3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %%" #reg ", %0" \ 3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=m" (mem) \ 3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ ) 3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_r2r(op, regs, regd) \ 3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ (#op " %" #regs ", %" #regd) 3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define mmx_m2m(op, mems, memd) \ 3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ 3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #op " %1, %%mm0\n\t" \ 3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movq %%mm0, %0" \ 3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (memd) \ 3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (mems)) 3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64 MOVe Quadword 3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (this is both a load and a store... 3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall in fact, it is the only way to store) 3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) 3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) 3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) 3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movq(vars, vard) \ 3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ 3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movq %%mm0, %0" \ 3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (vard) \ 3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (vars)) 3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x32 MOVe Doubleword 3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (like movq, this is both load and store... 3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall but is most useful for moving things between 3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx registers and ordinary registers) 3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) 3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) 3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) 3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define movd(vars, vard) \ 3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ 3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %%mm0, %0" \ 3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "=y" (vard) \ 4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : "y" (vars)) 4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32, 4x16, and 8x8 Parallel ADDs 4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) 4069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) 4079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) 4089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) 4109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) 4119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) 4129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) 4149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) 4159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) 4169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic 4199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) 4219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) 4229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) 4239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) 4259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) 4269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) 4279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic 4309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) 4329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) 4339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) 4349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) 4369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) 4379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) 4389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32, 4x16, and 8x8 Parallel SUBs 4419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) 4439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) 4449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) 4459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) 4479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) 4489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) 4499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) 4519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) 4529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) 4539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic 4569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) 4589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) 4599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) 4609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) 4629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) 4639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) 4649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic 4679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) 4699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) 4709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) 4719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) 4739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) 4749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) 4759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 Parallel MULs giving Low 4x16 portions of results 4789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) 4809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) 4819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) 4829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16 Parallel MULs giving High 4x16 portions of results 4859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) 4879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) 4889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) 4899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16->2x32 Parallel Mul-ADD 4929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (muls like pmullw, then adds adjacent 16-bit fields 4939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall in the multiply result to make the final 2x32 result) 4949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 4959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) 4969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) 4979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) 4989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64 bitwise AND 5019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef BROKEN_PAND 5039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand_m2r(var, reg) \ 5049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 5059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_m2r(pandn, (mmx_t) -1LL, reg); \ 5069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_m2r(pandn, var, reg); \ 5079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand_r2r(regs, regd) \ 5099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 5109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_m2r(pandn, (mmx_t) -1LL, regd); \ 5119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_r2r(pandn, regs, regd) \ 5129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand(vars, vard) \ 5149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 5159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(vard, mm0); \ 5169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_m2r(pandn, (mmx_t) -1LL, mm0); \ 5179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mmx_m2r(pandn, vars, mm0); \ 5189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2m(mm0, vard); \ 5199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 5219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) 5229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) 5239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pand(vars, vard) mmx_m2m(pand, vars, vard) 5249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 5259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64 bitwise AND with Not the destination 5289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) 5309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) 5319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) 5329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64 bitwise OR 5359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define por_m2r(var, reg) mmx_m2r(por, var, reg) 5379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) 5389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define por(vars, vard) mmx_m2m(por, vars, vard) 5399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64 bitwise eXclusive OR 5429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) 5449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) 5459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) 5469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality 5499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (resulting fields are either 0 or -1) 5509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) 5529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) 5539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) 5549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) 5569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) 5579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) 5589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) 5609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) 5619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) 5629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than 5659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (resulting fields are either 0 or -1) 5669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) 5689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) 5699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) 5709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) 5729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) 5739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) 5749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) 5769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) 5779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) 5789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical 5819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 5829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) 5839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) 5849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) 5859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) 5869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) 5889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) 5899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) 5909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) 5919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) 5939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) 5949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) 5959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) 5969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical 5999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) 6019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) 6029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) 6039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) 6049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) 6069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) 6079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) 6089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) 6099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) 6119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) 6129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) 6139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) 6149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32 and 4x16 Parallel Shift Right Arithmetic 6179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) 6199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) 6209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) 6219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) 6229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) 6249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) 6259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) 6269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) 6279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate 6309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (packs source and dest fields into dest in that order) 6319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) 6339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) 6349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) 6359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) 6379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) 6389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) 6399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 4x16->8x8 PACK and Unsigned Saturate 6429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (packs source and dest fields into dest in that order) 6439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) 6459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) 6469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) 6479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low 6509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (interleaves low half of dest with low half of source 6519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall as padding in each result field) 6529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) 6549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) 6559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) 6569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) 6589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) 6599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) 6609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) 6629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) 6639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) 6649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High 6679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (interleaves high half of dest with high half of source 6689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall as padding in each result field) 6699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) 6719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) 6729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) 6739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) 6759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) 6769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) 6779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) 6799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) 6809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) 6819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Empty MMx State 6849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (used to clean-up when going from mmx to float use 6859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall of the registers that are shared by both; note that 6869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall there is no float-to-mmx operation needed, because 6879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall only the float tag word info is corruptible) 6889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_TRACE 6909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define emms() \ 6929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { \ 6939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf("emms()\n"); \ 6949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ("emms"); \ 6959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 6969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 6989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define emms() __asm__ __volatile__ ("emms") 7009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 7029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 7049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 705