1409e73fd92047d4c4688664fe4257f89a9d80e5esewardj#include <stdlib.h>
2409e73fd92047d4c4688664fe4257f89a9d80e5esewardj#include <stdio.h>
3409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
4409e73fd92047d4c4688664fe4257f89a9d80e5esewardjtypedef unsigned int UInt;
5409e73fd92047d4c4688664fe4257f89a9d80e5esewardjtypedef unsigned long long int ULong;
6409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
7409e73fd92047d4c4688664fe4257f89a9d80e5esewardjvoid do_cmpxchg8b ( /*OUT*/
8409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    ULong* rdxOut,   ULong* raxOut,
9409e73fd92047d4c4688664fe4257f89a9d80e5esewardj		    ULong* memHiOut, ULong* memLoOut,
10409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    ULong* zOut,
11409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    /*IN*/
12409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    ULong rdxIn,   ULong raxIn,
13409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    ULong memHiIn, ULong memLoIn,
14409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                    ULong rcxIn,   ULong rbxIn )
15409e73fd92047d4c4688664fe4257f89a9d80e5esewardj{
16409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   UInt mem[2];
17409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   ULong block[6];
18409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   mem[0] = (UInt)memLoIn;
19409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   mem[1] = (UInt)memHiIn;
20409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[0] = rdxIn;
21409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[1] = raxIn;
22409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[2] = rcxIn;
23409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[3] = rbxIn;
24409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[4] = (ULong)&mem[0];
25409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[5] = ~(0ULL);
26409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   __asm__ __volatile__(
27409e73fd92047d4c4688664fe4257f89a9d80e5esewardj          "movq %0,%%r11\n"
28409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq  0(%%r11),%%rdx\n"
29409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq  8(%%r11),%%rax\n"
30409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 16(%%r11),%%rcx\n"
31409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 24(%%r11),%%rbx\n"
32409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 32(%%r11),%%r10\n"
33409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tlock cmpxchg8b (%%r10)\n"
34409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovabsq $0,%%r10\n"
35409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tsetz %%r10b\n"
36409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%r10,40(%%r11)\n"
37409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%rdx,0(%%r11)\n"
38409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%rax,8(%%r11)\n"
39409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*out*/
40409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*in*/ "r"(&block[0])
41409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
42409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                      "cc", "memory" );
43409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *rdxOut = block[0];
44409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *raxOut = block[1];
45409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *memLoOut = (ULong)mem[0];
46409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *memHiOut = (ULong)mem[1];
47409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *zOut = block[5];
48409e73fd92047d4c4688664fe4257f89a9d80e5esewardj}
49409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
50409e73fd92047d4c4688664fe4257f89a9d80e5esewardjvoid try8b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
51409e73fd92047d4c4688664fe4257f89a9d80e5esewardj{
52409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   ULong dd, aa, mmHi, mmLo, zz;
53409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   do_cmpxchg8b( &dd, &aa, &mmHi, &mmLo, &zz,
54409e73fd92047d4c4688664fe4257f89a9d80e5esewardj		 d,a,mHi,mLo,c,b);
55409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   printf(" Q d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
56409e73fd92047d4c4688664fe4257f89a9d80e5esewardj          "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
57409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
58409e73fd92047d4c4688664fe4257f89a9d80e5esewardj}
59409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
60409e73fd92047d4c4688664fe4257f89a9d80e5esewardjvoid do_cmpxchg16b ( /*OUT*/
61409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     ULong* rdxOut,   ULong* raxOut,
62409e73fd92047d4c4688664fe4257f89a9d80e5esewardj		     ULong* memHiOut, ULong* memLoOut,
63409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     ULong* zOut,
64409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     /*IN*/
65409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     ULong rdxIn,   ULong raxIn,
66409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     ULong memHiIn, ULong memLoIn,
67409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                     ULong rcxIn,   ULong rbxIn )
68409e73fd92047d4c4688664fe4257f89a9d80e5esewardj{
69fd5e53595fec7edd294e9162c305b194a4f7c3c3sewardj   ULong mem[2] __attribute__((aligned(16)));
70409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   ULong block[6];
71409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   mem[0] = memLoIn;
72409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   mem[1] = memHiIn;
73409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[0] = rdxIn;
74409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[1] = raxIn;
75409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[2] = rcxIn;
76409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[3] = rbxIn;
77409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[4] = (ULong)&mem[0];
78409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   block[5] = ~(0ULL);
79409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   __asm__ __volatile__(
80409e73fd92047d4c4688664fe4257f89a9d80e5esewardj          "movq %0,%%r11\n"
81409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq  0(%%r11),%%rdx\n"
82409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq  8(%%r11),%%rax\n"
83409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 16(%%r11),%%rcx\n"
84409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 24(%%r11),%%rbx\n"
85409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq 32(%%r11),%%r10\n"
86ec4e607f8b518f22d0edd9ae2f6bcb639b2d8854tom        "\t.byte 0xf0, 0x49, 0x0f, 0xc7, 0x0a\n" /* lock cmpxchg16b (%%r10) */
87409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovabsq $0,%%r10\n"
88409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tsetz %%r10b\n"
89409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%r10,40(%%r11)\n"
90409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%rdx,0(%%r11)\n"
91409e73fd92047d4c4688664fe4257f89a9d80e5esewardj        "\tmovq %%rax,8(%%r11)\n"
92409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*out*/
93409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*in*/ "r"(&block[0])
94409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
95409e73fd92047d4c4688664fe4257f89a9d80e5esewardj                      "cc", "memory" );
96409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *rdxOut = block[0];
97409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *raxOut = block[1];
98409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *memLoOut = mem[0];
99409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *memHiOut = mem[1];
100409e73fd92047d4c4688664fe4257f89a9d80e5esewardj    *zOut = block[5];
101409e73fd92047d4c4688664fe4257f89a9d80e5esewardj}
102409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
103409e73fd92047d4c4688664fe4257f89a9d80e5esewardjvoid try16b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
104409e73fd92047d4c4688664fe4257f89a9d80e5esewardj{
105409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   ULong dd, aa, mmHi, mmLo, zz;
106409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   do_cmpxchg16b( &dd, &aa, &mmHi, &mmLo, &zz,
107409e73fd92047d4c4688664fe4257f89a9d80e5esewardj		  d,a,mHi,mLo,c,b);
108409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   printf("QQ d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
109409e73fd92047d4c4688664fe4257f89a9d80e5esewardj          "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
110409e73fd92047d4c4688664fe4257f89a9d80e5esewardj	  d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
111409e73fd92047d4c4688664fe4257f89a9d80e5esewardj}
112409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
113409e73fd92047d4c4688664fe4257f89a9d80e5esewardjint main(void)
114409e73fd92047d4c4688664fe4257f89a9d80e5esewardj{
115409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   ULong z = 0xDEADBEEF00000000ULL;
116409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
117409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 5,4, 3,2 );
118409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 0,1, 3,2 );
119409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
120409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 0,4, 3,2 );
121409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 0,0, 3,2 );
122409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
123409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 5,0, 3,2 );
124409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0,1, 1,1, 3,2 );
125409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
126409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
127409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
128409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
129409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
130409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
131409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
132409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
133409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try8b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
134409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
135409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 5,4, 3,2 );
136409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 0,1, 3,2 );
137409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
138409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 0,4, 3,2 );
139409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 0,0, 3,2 );
140409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
141409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 5,0, 3,2 );
142409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0,1, 1,1, 3,2 );
143409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
144409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
145409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
146409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
147409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
148409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
149409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
150409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
151409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   try16b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
152409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
153409e73fd92047d4c4688664fe4257f89a9d80e5esewardj   return 0;
154409e73fd92047d4c4688664fe4257f89a9d80e5esewardj}
155409e73fd92047d4c4688664fe4257f89a9d80e5esewardj
156