15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* A program to test that SSE/SSE2 insns do not read memory they
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   should not.  Covers insns of the form OP %xmm, %xmm and OP memory,
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   %xmm only. */
5a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdio.h>
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include <assert.h>
9eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch#include "tests/malloc.h"
10a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)#include <string.h>
11a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
124e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)typedef  unsigned char  V128[16];
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef  unsigned int   UInt;
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef  signed int     Int;
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef  unsigned char  UChar;
16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
179ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdochtypedef
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)   struct {
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      V128 arg1;
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      V128 arg2;
219ab5563a3196760eb381d102cbb2bc0f7abc6a50Ben Murdoch      V128 res;
22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   }
237dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch   RRArgs;
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)typedef
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   struct {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      V128 arg1;
28868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      V128 res;
29eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   }
30eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   RMArgs;
31eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static UChar randUChar ( void )
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
347d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   static UInt seed = 80021;
35868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)   seed = 1103515245 * seed + 12345;
36eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   return (seed >> 17) & 0xFF;
37eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch}
38eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void randomise ( UChar* p, Int n )
407d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles){
417d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   Int i;
427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   for (i = 0; i < n; i++)
437d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)      p[i] = randUChar();
447d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
457d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
467d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)static void randV128 ( V128* v )
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)   Int i;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   for (i = 0; i < 16; i++)
50868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      (*v)[i] = randUChar();
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
52868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)
537d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)static void randRRArgs ( RRArgs* rra )
547d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles){
557d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   randV128(&rra->arg1);
567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   randV128(&rra->arg2);
577d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   randV128(&rra->res);
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
607d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)static void randRMArgs ( RMArgs* rra )
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   randV128(&rra->arg1);
63868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)   randV128(&rra->res);
64868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)}
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static void showV128 ( V128* v )
67eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch{
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   Int i;
69868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)   for (i = 0; i < 16; i++)
70868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      printf("%02x", (Int)(*v)[i]);
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void showMaskedV128 ( V128* v, V128* mask )
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles){
75868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)   Int i;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   for (i = 0; i < 16; i++)
77868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)      printf("%02x", (Int)( ((*v)[i]) & ((*mask)[i]) ));
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void showRR ( char* op, RRArgs* rra, V128* rmask )
817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles){
82eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   printf("r %10s ", op);
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   showV128(&rra->arg1);
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   printf(" ");
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   showV128(&rra->arg2);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   printf(" ");
87868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)   showMaskedV128(&rra->res, rmask);
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   printf("\n");
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
907d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
917d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)static void showRM ( char* op, RMArgs* rra, UChar* mem, Int nMem, V128* rmask )
92eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch{
937d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   Int i;
947d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   assert(nMem == 4 || nMem == 8 || nMem == 16 || nMem==0);
957d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   printf("m %10s ", op);
967d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)   for (i = 0; i < nMem; i++)
97eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch      printf("%02x", (Int)mem[i]);
98eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   printf(" ");
99eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   showV128(&rra->arg1);
100eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   printf(" ");
101eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   showMaskedV128(&rra->res, rmask );
102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch   printf("\n");
1037d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
1047d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define Wrapper_RegReg(OP)                 \
106   void r_r_##OP ( RRArgs* p )             \
107   {                                       \
108      __asm__ __volatile__("\n"            \
109         "\tmovups 0(%0), %%xmm6\n"        \
110         "\tmovups 16(%0), %%xmm7\n"       \
111         "\t" #OP " %%xmm6, %%xmm7\n"      \
112         "\tmovups %%xmm7, 32(%0)\n"       \
113         :                                 \
114         : "r" (p)                         \
115         : "memory", "xmm6", "xmm7", "cc"  \
116      );                                   \
117   }
118
119#define Wrapper_RegMem(OP)                 \
120   void r_m_##OP ( RMArgs* p, void* mem )  \
121   {                                       \
122      __asm__ __volatile__("\n"            \
123         "\tmovups 0(%0), %%xmm7\n"        \
124         "\t" #OP " 0(%1), %%xmm7\n"       \
125         "\tmovups %%xmm7, 16(%0)\n"       \
126         :                                 \
127         : "r" (p), "r" (mem)              \
128         : "memory", "xmm7", "cc"          \
129      );                                   \
130   }
131
132
133#define TEST_INSN(res_mask,mem_size,insn)  \
134                                           \
135Wrapper_RegReg(insn)                       \
136Wrapper_RegMem(insn)                       \
137                                           \
138void do_##insn ( void )                    \
139{                                          \
140   Int    i;                               \
141   UChar* buf;                             \
142   RRArgs rargs __attribute__((aligned(16))); \
143   RMArgs margs __attribute__((aligned(16))); \
144   for (i = 0; i < 5; i++) {               \
145      randRRArgs(&rargs);                  \
146      r_r_##insn(&rargs);                  \
147      showRR(#insn, &rargs, res_mask);     \
148   }                                       \
149   for (i = 0; i < 5; i++) {               \
150      randRMArgs(&margs);                  \
151      buf = memalign16(mem_size);          \
152      randomise(buf,mem_size);             \
153      r_m_##insn(&margs,buf);              \
154      showRM(#insn, &margs, buf, mem_size, res_mask);\
155      free(buf);                           \
156   }                                       \
157}
158
159/* Note: these are little endian.  Hence first byte is the least
160   significant byte of lane zero. */
161
162/* Mask for insns where all result bits are non-approximated. */
163static V128 AllMask  = { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
164                         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
165
166/* Mark for insns which produce approximated vector short results. */
167static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,
168                         0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };
169
170/* Mark for insns which produce approximated scalar short results. */
171static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,
172                         0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
173
174#define PD 16
175#define SD 8
176#define PS 16
177#define SS 4
178
179/* ------------------------ SSE1 ------------------------ */
180TEST_INSN( &AllMask, PS,addps)
181TEST_INSN( &AllMask, SS,addss)
182TEST_INSN( &AllMask, PS,andnps)
183TEST_INSN( &AllMask, PS,andps)
184TEST_INSN( &AllMask, PS,cmpeqps)
185TEST_INSN( &AllMask, SS,cmpeqss)
186TEST_INSN( &AllMask, PS,cmpleps)
187TEST_INSN( &AllMask, SS,cmpless)
188TEST_INSN( &AllMask, PS,cmpltps)
189TEST_INSN( &AllMask, SS,cmpltss)
190TEST_INSN( &AllMask, PS,cmpneqps)
191TEST_INSN( &AllMask, SS,cmpneqss)
192TEST_INSN( &AllMask, PS,cmpnleps)
193TEST_INSN( &AllMask, SS,cmpnless)
194TEST_INSN( &AllMask, PS,cmpnltps)
195TEST_INSN( &AllMask, SS,cmpnltss)
196TEST_INSN( &AllMask, PS,cmpordps)
197TEST_INSN( &AllMask, SS,cmpordss)
198TEST_INSN( &AllMask, PS,cmpunordps)
199TEST_INSN( &AllMask, SS,cmpunordss)
200TEST_INSN( &AllMask, SS,comiss)
201//TEST_INSN( &AllMask, 0,cvtpi2ps)
202//TEST_INSN( &AllMask, 0,cvtps2pi)
203//TEST_INSN( &AllMask, 0,cvtsi2ss)
204//TEST_INSN( &AllMask, 0,cvtss2si)
205//TEST_INSN( &AllMask, 0,cvttps2pi)
206//TEST_INSN( &AllMask, 0,cvttss2si)
207TEST_INSN( &AllMask, PS,divps)
208TEST_INSN( &AllMask, SS,divss)
209TEST_INSN( &AllMask, PS,maxps)
210TEST_INSN( &AllMask, SS,maxss)
211TEST_INSN( &AllMask, PS,minps)
212TEST_INSN( &AllMask, SS,minss)
213TEST_INSN( &AllMask, 16,movaps)
214//TEST_INSN( &AllMask, 0,movhlps)
215//TEST_INSN( &AllMask, 0,movhps)
216//TEST_INSN( &AllMask, 0,movlhps)
217//TEST_INSN( &AllMask, 0,movlps)
218//TEST_INSN( &AllMask, 0,movmskps)
219//TEST_INSN( &AllMask, 0,movntps)
220//TEST_INSN( &AllMask, 0,movntq)
221TEST_INSN( &AllMask, 4,movss)
222TEST_INSN( &AllMask, 16,movups)
223TEST_INSN( &AllMask, PS,mulps)
224TEST_INSN( &AllMask, SS,mulss)
225TEST_INSN( &AllMask, PS,orps)
226//TEST_INSN( &AllMask, 0,pavgb) -- dup with sse2?
227//TEST_INSN( &AllMask, 0,pavgw) -- dup with sse2?
228//TEST_INSN( &AllMask, 0,pextrw)
229//TEST_INSN( &AllMask, 0,pinsrw)
230//TEST_INSN( &AllMask, 0,pmaxsw) -- dup with sse2?
231//TEST_INSN( &AllMask, 0,pmaxub) -- dup with sse2?
232//TEST_INSN( &AllMask, 0,pminsw) -- dup with sse2?
233//TEST_INSN( &AllMask, 0,pminub) -- dup with sse2?
234//TEST_INSN( &AllMask, 0,pmovmskb)
235//TEST_INSN( &AllMask, 0,pmulhuw) -- dup with sse2?
236TEST_INSN( &AllMask, 16,psadbw) // -- XXXXXXXXXXXXXXXX sse2 (xmm variant) not implemented!
237//TEST_INSN( &AllMask, 0,pshufw)
238TEST_INSN(&ApproxPS, PS,rcpps)
239TEST_INSN(&ApproxSS, SS,rcpss)
240TEST_INSN(&ApproxPS, PS,rsqrtps)
241TEST_INSN(&ApproxSS, SS,rsqrtss)
242//TEST_INSN( &AllMask, PS,shufps)
243TEST_INSN( &AllMask, PS,sqrtps)
244TEST_INSN( &AllMask, SS,sqrtss)
245TEST_INSN( &AllMask, PS,subps)
246TEST_INSN( &AllMask, SS,subss)
247TEST_INSN( &AllMask, SS,ucomiss)
248TEST_INSN( &AllMask, PS,unpckhps)
249TEST_INSN( &AllMask, PS,unpcklps)
250TEST_INSN( &AllMask, PS,xorps)
251
252
253/* ------------------------ SSE2 ------------------------ */
254TEST_INSN( &AllMask, PD,addpd)
255TEST_INSN( &AllMask, SD,addsd)
256TEST_INSN( &AllMask, PD,andnpd)
257TEST_INSN( &AllMask, PD,andpd)
258TEST_INSN( &AllMask, PD,cmpeqpd)
259TEST_INSN( &AllMask, SD,cmpeqsd)
260TEST_INSN( &AllMask, PD,cmplepd)
261TEST_INSN( &AllMask, SD,cmplesd)
262TEST_INSN( &AllMask, PD,cmpltpd)
263TEST_INSN( &AllMask, SD,cmpltsd)
264TEST_INSN( &AllMask, PD,cmpneqpd)
265TEST_INSN( &AllMask, SD,cmpneqsd)
266TEST_INSN( &AllMask, PD,cmpnlepd)
267TEST_INSN( &AllMask, SD,cmpnlesd)
268TEST_INSN( &AllMask, PD,cmpnltpd)
269TEST_INSN( &AllMask, SD,cmpnltsd)
270TEST_INSN( &AllMask, PD,cmpordpd)
271TEST_INSN( &AllMask, SD,cmpordsd)
272TEST_INSN( &AllMask, PD,cmpunordpd)
273TEST_INSN( &AllMask, SD,cmpunordsd)
274TEST_INSN( &AllMask, SD,comisd)
275TEST_INSN( &AllMask, 8,cvtdq2pd)
276TEST_INSN( &AllMask, 16,cvtdq2ps)
277TEST_INSN( &AllMask, 16,cvtpd2dq)
278//TEST_INSN( &AllMask, 0,cvtpd2pi)
279TEST_INSN( &AllMask, 16,cvtpd2ps)   /* reads 16 */
280//TEST_INSN( &AllMask, 0,cvtpi2pd)
281TEST_INSN( &AllMask, 16,cvtps2dq)  /* reads 16 */
282TEST_INSN( &AllMask, 8,cvtps2pd)   /* reads 8 */
283//TEST_INSN( &AllMask, 0,cvtsd2si)
284TEST_INSN( &AllMask, SD,cvtsd2ss)   /* reads SD */
285//TEST_INSN( &AllMask, 0,cvtsi2sd)
286TEST_INSN( &AllMask, SS,cvtss2sd)   /* reads SS */
287TEST_INSN( &AllMask, 16,cvttpd2dq)
288//TEST_INSN( &AllMask, 0,cvttpd2pi)
289TEST_INSN( &AllMask, 16,cvttps2dq)
290//TEST_INSN( &AllMask, 0,cvttsd2si)
291TEST_INSN( &AllMask, PD,divpd)
292TEST_INSN( &AllMask, SD,divsd)
293TEST_INSN( &AllMask, PD,maxpd)
294TEST_INSN( &AllMask, SD,maxsd)
295TEST_INSN( &AllMask, PD,minpd)
296TEST_INSN( &AllMask, SD,minsd)
297TEST_INSN( &AllMask, PD,movapd)
298//TEST_INSN( &AllMask, 8,movd)
299//TEST_INSN( &AllMask, 0,movdq2q)
300TEST_INSN( &AllMask, 16,movdqa)
301TEST_INSN( &AllMask, 16,movdqu)
302//TEST_INSN( &AllMask, 16,movhpd)
303//TEST_INSN( &AllMask, 16,movlpd)
304//TEST_INSN( &AllMask, 0,movmskpd)
305//TEST_INSN( &AllMask, 0,movntdq)
306//TEST_INSN( &AllMask, 0,movnti)
307//TEST_INSN( &AllMask, 0,movntpd)
308TEST_INSN( &AllMask, 8,movq)
309//TEST_INSN( &AllMask, 0,movq2dq)
310TEST_INSN( &AllMask, 8,movsd)
311TEST_INSN( &AllMask, 16,movupd)
312TEST_INSN( &AllMask, PD,mulpd)
313TEST_INSN( &AllMask, SD,mulsd)
314TEST_INSN( &AllMask, PD,orpd)
315TEST_INSN( &AllMask, 16,packssdw)
316TEST_INSN( &AllMask, 16,packsswb)
317TEST_INSN( &AllMask, 16,packuswb)
318TEST_INSN( &AllMask, 16,paddb)
319TEST_INSN( &AllMask, 16,paddd)
320TEST_INSN( &AllMask, 16,paddq)
321TEST_INSN( &AllMask, 16,paddsb)
322TEST_INSN( &AllMask, 16,paddsw)
323TEST_INSN( &AllMask, 16,paddusb)
324TEST_INSN( &AllMask, 16,paddusw)
325TEST_INSN( &AllMask, 16,paddw)
326TEST_INSN( &AllMask, 16,pand)
327TEST_INSN( &AllMask, 16,pandn)
328TEST_INSN( &AllMask, 16,pavgb)
329TEST_INSN( &AllMask, 16,pavgw)
330TEST_INSN( &AllMask, 16,pcmpeqb)
331TEST_INSN( &AllMask, 16,pcmpeqd)
332TEST_INSN( &AllMask, 16,pcmpeqw)
333TEST_INSN( &AllMask, 16,pcmpgtb)
334TEST_INSN( &AllMask, 16,pcmpgtd)
335TEST_INSN( &AllMask, 16,pcmpgtw)
336//TEST_INSN( &AllMask, 16,pextrw)
337//TEST_INSN( &AllMask, 16,pinsrw)
338TEST_INSN( &AllMask, 16,pmaxsw)
339TEST_INSN( &AllMask, 16,pmaxub)
340TEST_INSN( &AllMask, 16,pminsw)
341TEST_INSN( &AllMask, 16,pminub)
342//TEST_INSN( &AllMask, 0,pmovmskb)
343TEST_INSN( &AllMask, 16,pmulhuw)
344TEST_INSN( &AllMask, 16,pmulhw)
345TEST_INSN( &AllMask, 16,pmullw)
346TEST_INSN( &AllMask, 16,pmuludq)
347TEST_INSN( &AllMask, 16,por)
348//TEST_INSN( &AllMask, 16,pshufd)
349//TEST_INSN( &AllMask, 16,pshufhw)
350//TEST_INSN( &AllMask, 16,pshuflw)
351TEST_INSN( &AllMask, 16,pslld)
352//TEST_INSN( &AllMask, 16,pslldq)
353TEST_INSN( &AllMask, 16,psllq)
354TEST_INSN( &AllMask, 16,psllw)
355TEST_INSN( &AllMask, 16,psrad)
356TEST_INSN( &AllMask, 16,psraw)
357TEST_INSN( &AllMask, 16,psrld)
358//TEST_INSN( &AllMask, 16,psrldq)
359TEST_INSN( &AllMask, 16,psrlq)
360TEST_INSN( &AllMask, 16,psrlw)
361TEST_INSN( &AllMask, 16,psubb)
362TEST_INSN( &AllMask, 16,psubd)
363TEST_INSN( &AllMask, 16,psubq)
364TEST_INSN( &AllMask, 16,psubsb)
365TEST_INSN( &AllMask, 16,psubsw)
366TEST_INSN( &AllMask, 16,psubusb)
367TEST_INSN( &AllMask, 16,psubusw)
368TEST_INSN( &AllMask, 16,psubw)
369TEST_INSN( &AllMask, 16,punpckhbw)
370TEST_INSN( &AllMask, 16,punpckhdq)
371TEST_INSN( &AllMask, 16,punpckhqdq)
372TEST_INSN( &AllMask, 16,punpckhwd)
373TEST_INSN( &AllMask, 16,punpcklbw)
374TEST_INSN( &AllMask, 16,punpckldq)
375TEST_INSN( &AllMask, 16,punpcklqdq)
376TEST_INSN( &AllMask, 16,punpcklwd)
377TEST_INSN( &AllMask, 16,pxor)
378//TEST_INSN( &AllMask, PD,shufpd)
379TEST_INSN( &AllMask, PD,sqrtpd)
380TEST_INSN( &AllMask, SD,sqrtsd)
381TEST_INSN( &AllMask, PD,subpd)
382TEST_INSN( &AllMask, SD,subsd)
383TEST_INSN( &AllMask, SD,ucomisd)
384TEST_INSN( &AllMask, PD,unpckhpd)
385TEST_INSN( &AllMask, PD,unpcklpd)
386TEST_INSN( &AllMask, PD,xorpd)
387
388
389int main ( int argc, char** argv )
390{
391   Int sse1 = 0, sse2 = 0;
392
393   if (argc == 2 && 0==strcmp(argv[1], "sse1")) {
394      sse1 = 1;
395   }
396   else
397   if (argc == 2 && 0==strcmp(argv[1], "sse2")) {
398      sse2 = 1;
399   }
400   else
401   if (argc == 2 && 0==strcmp(argv[1], "all")) {
402      sse1 = sse2 = 1;
403   }
404   else {
405      fprintf(stderr, "usage: sse_memory [sse1|sse2|all]\n");
406      return 0;
407   }
408
409   /* ------------------------ SSE1 ------------------------ */
410   if (sse1) {
411      do_addps();
412      do_addss();
413      do_andnps();
414      do_andps();
415      do_cmpeqps();
416      do_cmpeqss();
417      do_cmpleps();
418      do_cmpless();
419      do_cmpltps();
420      do_cmpltss();
421      do_cmpneqps();
422      do_cmpneqss();
423      do_cmpnleps();
424      do_cmpnless();
425      do_cmpnltps();
426      do_cmpnltss();
427      do_cmpordps();
428      do_cmpordss();
429      do_cmpunordps();
430      do_cmpunordss();
431      do_comiss();
432      //TEST_INSN( &AllMask, 0,cvtpi2ps)
433      //TEST_INSN( &AllMask, 0,cvtps2pi)
434      //TEST_INSN( &AllMask, 0,cvtsi2ss)
435      //TEST_INSN( &AllMask, 0,cvtss2si)
436      //TEST_INSN( &AllMask, 0,cvttps2pi)
437      //TEST_INSN( &AllMask, 0,cvttss2si)
438      do_divps();
439      do_divss();
440      do_maxps();
441      do_maxss();
442      do_minps();
443      do_minss();
444      do_movaps();
445      //TEST_INSN( &AllMask, 0,movhlps)
446      //TEST_INSN( &AllMask, 0,movhps)
447      //TEST_INSN( &AllMask, 0,movlhps)
448      //TEST_INSN( &AllMask, 0,movlps)
449      //TEST_INSN( &AllMask, 0,movmskps)
450      //TEST_INSN( &AllMask, 0,movntps)
451      //TEST_INSN( &AllMask, 0,movntq)
452      do_movss();
453      do_movups();
454      do_mulps();
455      do_mulss();
456      do_orps();
457      //TEST_INSN( &AllMask, 0,pavgb) -- dup with sse2?
458      //TEST_INSN( &AllMask, 0,pavgw) -- dup with sse2?
459      //TEST_INSN( &AllMask, 0,pextrw)
460      //TEST_INSN( &AllMask, 0,pinsrw)
461      //TEST_INSN( &AllMask, 0,pmaxsw) -- dup with sse2?
462      //TEST_INSN( &AllMask, 0,pmaxub) -- dup with sse2?
463      //TEST_INSN( &AllMask, 0,pminsw) -- dup with sse2?
464      //TEST_INSN( &AllMask, 0,pminub) -- dup with sse2?
465      //TEST_INSN( &AllMask, 0,pmovmskb)
466      //TEST_INSN( &AllMask, 0,pmulhuw) -- dup with sse2?
467      //do_psadbw();  -- XXXXXXXXXXXXXXXX sse2 (xmm variant) not implemented!
468      //TEST_INSN( &AllMask, 0,pshufw)
469      do_rcpps();
470      do_rcpss();
471      do_rsqrtps();
472      do_rsqrtss();
473      //TEST_INSN( &AllMask, PS,shufps)
474      do_sqrtps();
475      do_sqrtss();
476      do_subps();
477      do_subss();
478      do_ucomiss();
479      do_unpckhps();
480      do_unpcklps();
481      do_xorps();
482   }
483
484   /* ------------------------ SSE2 ------------------------ */
485   if (sse2) {
486      do_addpd();
487      do_addsd();
488      do_andnpd();
489      do_andpd();
490      do_cmpeqpd();
491      do_cmpeqsd();
492      do_cmplepd();
493      do_cmplesd();
494      do_cmpltpd();
495      do_cmpltsd();
496      do_cmpneqpd();
497      do_cmpneqsd();
498      do_cmpnlepd();
499      do_cmpnlesd();
500      do_cmpnltpd();
501      do_cmpnltsd();
502      do_cmpordpd();
503      do_cmpordsd();
504      do_cmpunordpd();
505      do_cmpunordsd();
506      do_comisd();
507      do_cvtdq2pd();
508      do_cvtdq2ps();
509      do_cvtpd2dq();
510      //TEST_INSN( &AllMask, 0,cvtpd2pi)
511      do_cvtpd2ps();
512      //TEST_INSN( &AllMask, 0,cvtpi2pd)
513      do_cvtps2dq();
514      do_cvtps2pd();
515      //TEST_INSN( &AllMask, 0,cvtsd2si)
516      do_cvtsd2ss();
517      //TEST_INSN( &AllMask, 0,cvtsi2sd)
518      do_cvtss2sd();
519      do_cvttpd2dq();
520      //TEST_INSN( &AllMask, 0,cvttpd2pi)
521      do_cvttps2dq();
522      //TEST_INSN( &AllMask, 0,cvttsd2si)
523      do_divpd();
524      do_divsd();
525      do_maxpd();
526      do_maxsd();
527      do_minpd();
528      do_minsd();
529      do_movapd();
530      //TEST_INSN( &AllMask, 8,movd)
531      //TEST_INSN( &AllMask, 0,movdq2q)
532      do_movdqa();
533      do_movdqu();
534      //TEST_INSN( &AllMask, 16,movhpd)
535      //TEST_INSN( &AllMask, 16,movlpd)
536      //TEST_INSN( &AllMask, 0,movmskpd)
537      //TEST_INSN( &AllMask, 0,movntdq)
538      //TEST_INSN( &AllMask, 0,movnti)
539      //TEST_INSN( &AllMask, 0,movntpd)
540      do_movq();
541      //TEST_INSN( &AllMask, 0,movq2dq)
542      do_movsd();
543      do_movupd();
544      do_mulpd();
545      do_mulsd();
546      do_orpd();
547      do_packssdw();
548      do_packsswb();
549      do_packuswb();
550      do_paddb();
551      do_paddd();
552      do_paddq();
553      do_paddsb();
554      do_paddsw();
555      do_paddusb();
556      do_paddusw();
557      do_paddw();
558      do_pand();
559      do_pandn();
560      do_pavgb();
561      do_pavgw();
562      do_pcmpeqb();
563      do_pcmpeqd();
564      do_pcmpeqw();
565      do_pcmpgtb();
566      do_pcmpgtd();
567      do_pcmpgtw();
568      //TEST_INSN( &AllMask, 16,pextrw)
569      //TEST_INSN( &AllMask, 16,pinsrw)
570      do_pmaxsw();
571      do_pmaxub();
572      do_pminsw();
573      do_pminub();
574      //TEST_INSN( &AllMask, 0,pmovmskb)
575      do_pmulhuw();
576      do_pmulhw();
577      do_pmullw();
578      do_pmuludq();
579      do_por();
580      //TEST_INSN( &AllMask, 16,pshufd)
581      //TEST_INSN( &AllMask, 16,pshufhw)
582      //TEST_INSN( &AllMask, 16,pshuflw)
583      do_pslld();
584      //TEST_INSN( &AllMask, 16,pslldq)
585      do_psllq();
586      do_psllw();
587      do_psrad();
588      do_psraw();
589      do_psrld();
590      //TEST_INSN( &AllMask, 16,psrldq)
591      do_psrlq();
592      do_psrlw();
593      do_psubb();
594      do_psubd();
595      do_psubq();
596      do_psubsb();
597      do_psubsw();
598      do_psubusb();
599      do_psubusw();
600      do_psubw();
601      do_punpckhbw();
602      do_punpckhdq();
603      do_punpckhqdq();
604      do_punpckhwd();
605      do_punpcklbw();
606      do_punpckldq();
607      do_punpcklqdq();
608      do_punpcklwd();
609      do_pxor();
610      //TEST_INSN( &AllMask, PD,shufpd)
611      do_sqrtpd();
612      do_sqrtsd();
613      do_subpd();
614      do_subsd();
615      do_ucomisd();
616      do_unpckhpd();
617      do_unpcklpd();
618      do_xorpd();
619   }
620
621   return 0;
622}
623
624