16b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
26b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* Program which uses a happens-before edge to coordinate an access to
36b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   variable 'shared_var' between two threads.  The h-b edge is created
46b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   by a custom (kludgesome!) mechanism and hence we need to use
56b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   ANNOTATES_HAPPEN_{BEFORE,AFTER} to explain to Helgrind what's going
66b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   on (else it reports a race). */
76b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
86b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#include <pthread.h>
96b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#include <stdio.h>
106b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#include <assert.h>
116b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
126b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#include "../../helgrind/helgrind.h"
136b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
146b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* Todo: move all this do_acasW guff into a support library.  It's
156b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   useful for multiple tests, not just this one.
166b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
176b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   XXX: all the do_acasW routines assume the supplied address
186b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   is UWord (naturally) aligned. */
196b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
206b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
216b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjtypedef  unsigned long int  UWord;
226b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
23cae0cc22b83ffb260ee8379e92099c5a701944cbcarll#if defined(VGA_ppc64be) || defined(VGA_ppc64le)
246b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
256b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj// ppc64
266b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* return 1 if success, 0 if failure */
276b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
286b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
296b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord old, success;
306b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
316b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  /* Fetch the old value, and set the reservation */
326b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  __asm__ __volatile__ (
336b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj     "ldarx  %0, 0,%1"     "\n"  // rD,rA,rB
346b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/   "=b"(old)
356b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/    "b"(addr)
366b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*trash*/ "memory","cc"
376b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
386b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
396b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   /* If the old value isn't as expected, we've had it */
406b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   if (old != expected) return 0;
416b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
426b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   /* otherwise try to stuff the new value in */
436b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   __asm__ __volatile__(
446b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "stdcx. %2, 0,%1"   "\n"      // rS,rA,rB
456b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "mfcr   %0"         "\n\t"
466b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "srdi   %0,%0,29"   "\n\t"
476b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "andi.  %0,%0,1"    "\n"
486b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/ "=b"(success)
496b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/ "b"(addr), "b"(nyu)
506b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
516b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
526b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   assert(success == 0 || success == 1);
536b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   return success;
546b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
556b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
566b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#elif defined(VGA_ppc32)
576b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
586b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj// ppc32
596b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* return 1 if success, 0 if failure */
606b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
616b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
626b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord old, success;
636b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
646b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  /* Fetch the old value, and set the reservation */
656b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  __asm__ __volatile__ (
666b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj     "lwarx  %0, 0,%1"     "\n"  // rD,rA,rB
676b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/   "=b"(old)
686b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/    "b"(addr)
696b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*trash*/ "memory","cc"
706b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
716b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
726b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   /* If the old value isn't as expected, we've had it */
736b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   if (old != expected) return 0;
746b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
756b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   /* otherwise try to stuff the new value in */
766b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   __asm__ __volatile__(
776b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "stwcx. %2, 0,%1"   "\n"      // rS,rA,rB
786b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "mfcr   %0"         "\n\t"
796b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "srwi   %0,%0,29"   "\n\t"
806b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "andi.  %0,%0,1"    "\n"
816b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/ "=b"(success)
826b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/ "b"(addr), "b"(nyu)
836b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
846b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
856b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   assert(success == 0 || success == 1);
866b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   return success;
876b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
886b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
896b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#elif defined(VGA_amd64)
906b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
916b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj// amd64
926b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* return 1 if success, 0 if failure */
936b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
946b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
956b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   UWord block[4] = { (UWord)addr, expected, nyu, 2 };
966b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   __asm__ __volatile__(
976b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movq 0(%%rsi),  %%rdi"         "\n\t" // addr
986b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movq 8(%%rsi),  %%rax"         "\n\t" // expected
996b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movq 16(%%rsi), %%rbx"         "\n\t" // nyu
1006b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "xorq %%rcx,%%rcx"              "\n\t"
1016b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "lock; cmpxchgq %%rbx,(%%rdi)"  "\n\t"
1026b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "setz %%cl"                     "\n\t"
1036b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movq %%rcx, 24(%%rsi)"         "\n"
1046b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/
1056b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/ "S"(&block[0])
1066b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*trash*/"memory","cc","rdi","rax","rbx","rcx"
1076b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
1086b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   assert(block[3] == 0 || block[3] == 1);
1096b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   return block[3] & 1;
1106b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
1116b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
1126b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#elif defined(VGA_x86)
1136b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
1146b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj// x86
1156b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj/* return 1 if success, 0 if failure */
1166b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
1176b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
1186b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   UWord block[4] = { (UWord)addr, expected, nyu, 2 };
1196b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   __asm__ __volatile__(
1205d48a3cddd65df5663dd21b978f54ae2ea3a8ef2sewardj      "pushl %%ebx"                   "\n\t"
1216b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movl 0(%%esi),  %%edi"         "\n\t" // addr
1226b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movl 4(%%esi),  %%eax"         "\n\t" // expected
1236b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "movl 8(%%esi),  %%ebx"         "\n\t" // nyu
1246b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "xorl %%ecx,%%ecx"              "\n\t"
1256b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "lock; cmpxchgl %%ebx,(%%edi)"  "\n\t"
1266b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      "setz %%cl"                     "\n\t"
1275d48a3cddd65df5663dd21b978f54ae2ea3a8ef2sewardj      "movl %%ecx, 12(%%esi)"         "\n\t"
1285d48a3cddd65df5663dd21b978f54ae2ea3a8ef2sewardj      "popl %%ebx"                    "\n"
1296b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*out*/
1306b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      : /*in*/ "S"(&block[0])
1315d48a3cddd65df5663dd21b978f54ae2ea3a8ef2sewardj      : /*trash*/"memory","cc","edi","eax","ecx"
1326b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   );
1336b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   assert(block[3] == 0 || block[3] == 1);
1346b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   return block[3] & 1;
1356b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
1366b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
1378777f7308785b4eb67898670e29ecdde42dec9dfsewardj#elif defined(VGA_arm)
1388777f7308785b4eb67898670e29ecdde42dec9dfsewardj
1398777f7308785b4eb67898670e29ecdde42dec9dfsewardj// arm
1408777f7308785b4eb67898670e29ecdde42dec9dfsewardj/* return 1 if success, 0 if failure */
1418777f7308785b4eb67898670e29ecdde42dec9dfsewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
1428777f7308785b4eb67898670e29ecdde42dec9dfsewardj{
1438777f7308785b4eb67898670e29ecdde42dec9dfsewardj  UWord old, success;
1448777f7308785b4eb67898670e29ecdde42dec9dfsewardj  UWord block[2] = { (UWord)addr, nyu };
1458777f7308785b4eb67898670e29ecdde42dec9dfsewardj
1468777f7308785b4eb67898670e29ecdde42dec9dfsewardj  /* Fetch the old value, and set the reservation */
1478777f7308785b4eb67898670e29ecdde42dec9dfsewardj  __asm__ __volatile__ (
1488777f7308785b4eb67898670e29ecdde42dec9dfsewardj     "ldrex  %0, [%1]"    "\n"
1498777f7308785b4eb67898670e29ecdde42dec9dfsewardj      : /*out*/   "=r"(old)
1508777f7308785b4eb67898670e29ecdde42dec9dfsewardj      : /*in*/    "r"(addr)
1518777f7308785b4eb67898670e29ecdde42dec9dfsewardj   );
1528777f7308785b4eb67898670e29ecdde42dec9dfsewardj
1538777f7308785b4eb67898670e29ecdde42dec9dfsewardj   /* If the old value isn't as expected, we've had it */
1548777f7308785b4eb67898670e29ecdde42dec9dfsewardj   if (old != expected) return 0;
1558777f7308785b4eb67898670e29ecdde42dec9dfsewardj
1568777f7308785b4eb67898670e29ecdde42dec9dfsewardj   /* otherwise try to stuff the new value in */
1578777f7308785b4eb67898670e29ecdde42dec9dfsewardj   __asm__ __volatile__(
1588777f7308785b4eb67898670e29ecdde42dec9dfsewardj      "ldr    r4, [%1, #0]"      "\n\t"
1598777f7308785b4eb67898670e29ecdde42dec9dfsewardj      "ldr    r5, [%1, #4]"      "\n\t"
1608777f7308785b4eb67898670e29ecdde42dec9dfsewardj      "strex  r6, r5, [r4, #0]"  "\n\t"
1618777f7308785b4eb67898670e29ecdde42dec9dfsewardj      "eor    %0, r6, #1"        "\n\t"
1628777f7308785b4eb67898670e29ecdde42dec9dfsewardj      : /*out*/ "=r"(success)
1638777f7308785b4eb67898670e29ecdde42dec9dfsewardj      : /*in*/ "r"(&block[0])
1648777f7308785b4eb67898670e29ecdde42dec9dfsewardj      : /*trash*/ "r4","r5","r6","memory"
1658777f7308785b4eb67898670e29ecdde42dec9dfsewardj   );
1668777f7308785b4eb67898670e29ecdde42dec9dfsewardj   assert(success == 0 || success == 1);
1678777f7308785b4eb67898670e29ecdde42dec9dfsewardj   return success;
1688777f7308785b4eb67898670e29ecdde42dec9dfsewardj}
1698777f7308785b4eb67898670e29ecdde42dec9dfsewardj
17023ed6302e96eb905ef25d6c39db600e17e5f341fsewardj#elif defined(VGA_arm64)
17123ed6302e96eb905ef25d6c39db600e17e5f341fsewardj
17223ed6302e96eb905ef25d6c39db600e17e5f341fsewardj// arm64
17323ed6302e96eb905ef25d6c39db600e17e5f341fsewardj/* return 1 if success, 0 if failure */
17423ed6302e96eb905ef25d6c39db600e17e5f341fsewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
17523ed6302e96eb905ef25d6c39db600e17e5f341fsewardj{
17623ed6302e96eb905ef25d6c39db600e17e5f341fsewardj  UWord old, success;
17723ed6302e96eb905ef25d6c39db600e17e5f341fsewardj  UWord block[2] = { (UWord)addr, nyu };
17823ed6302e96eb905ef25d6c39db600e17e5f341fsewardj
17923ed6302e96eb905ef25d6c39db600e17e5f341fsewardj  /* Fetch the old value, and set the reservation */
18023ed6302e96eb905ef25d6c39db600e17e5f341fsewardj  __asm__ __volatile__ (
18123ed6302e96eb905ef25d6c39db600e17e5f341fsewardj     "ldxr  %0, [%1]"    "\n"
18223ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      : /*out*/   "=r"(old)
18323ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      : /*in*/    "r"(addr)
18423ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   );
18523ed6302e96eb905ef25d6c39db600e17e5f341fsewardj
18623ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   /* If the old value isn't as expected, we've had it */
18723ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   if (old != expected) return 0;
18823ed6302e96eb905ef25d6c39db600e17e5f341fsewardj
18923ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   /* otherwise try to stuff the new value in */
19023ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   __asm__ __volatile__(
19123ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      "ldr    x4, [%1, #0]"      "\n\t"
19223ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      "ldr    x5, [%1, #8]"      "\n\t"
19323ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      "stxr   w6, x5, [x4, #0]"  "\n\t"
19423ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      "eor    %0, x6, #1"        "\n\t"
19523ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      : /*out*/ "=r"(success)
19623ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      : /*in*/ "r"(&block[0])
19723ed6302e96eb905ef25d6c39db600e17e5f341fsewardj      : /*trash*/ "x4","x5","x6","memory"
19823ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   );
19923ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   assert(success == 0 || success == 1);
20023ed6302e96eb905ef25d6c39db600e17e5f341fsewardj   return success;
20123ed6302e96eb905ef25d6c39db600e17e5f341fsewardj}
20223ed6302e96eb905ef25d6c39db600e17e5f341fsewardj
203b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj#elif defined(VGA_s390x)
204b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj
205b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj// s390x
206b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj/* return 1 if success, 0 if failure */
207b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardjUWord do_acasW(UWord* addr, UWord expected, UWord nyu )
208b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj{
209b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj   int cc;
210b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj
211b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj   __asm__ __volatile__ (
212b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     "csg %2,%3,%1\n\t"
213b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     "ipm %0\n\t"
214b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     "srl %0,28\n\t"
215b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     : /* out */  "=r" (cc)
216b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     : /* in */ "Q" (*addr), "d" (expected), "d" (nyu)
217b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj     : "memory", "cc"
218b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj   );
219b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj   return cc == 0;
220b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj}
221b5b87408c0c99f9f6938d8cd921e2a5f420577c4sewardj
2225db15403e889d4db339b342bc2a824ef0bfaa654sewardj#elif defined(VGA_mips32)
2235db15403e889d4db339b342bc2a824ef0bfaa654sewardj
224dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj// mips32
2255db15403e889d4db339b342bc2a824ef0bfaa654sewardj/* return 1 if success, 0 if failure */
2265db15403e889d4db339b342bc2a824ef0bfaa654sewardjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
2275db15403e889d4db339b342bc2a824ef0bfaa654sewardj{
228751c1e6e1657cbafe30baaf36ad9bd011c3a179bdejanj  UWord success;
2295db15403e889d4db339b342bc2a824ef0bfaa654sewardj  UWord block[3] = { (UWord)addr, nyu, expected};
2305db15403e889d4db339b342bc2a824ef0bfaa654sewardj
2315db15403e889d4db339b342bc2a824ef0bfaa654sewardj   __asm__ __volatile__(
2325db15403e889d4db339b342bc2a824ef0bfaa654sewardj      ".set noreorder"           "\n\t"
2335db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "lw     $t0, 0(%1)"        "\n\t"
2345db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "lw     $t2, 8(%1)"        "\n\t"
2355db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "lw     $t3, 4(%1)"        "\n\t"
2365db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "ll     $t1, 0($t0)"       "\n\t"
2375db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "bne    $t1, $t2, exit_0"  "\n\t"
238dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "nop"                      "\n\t"
239dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "sc     $t3, 0($t0)"       "\n\t"
240dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "move   %0, $t3"           "\n\t"
241dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "b exit"                   "\n\t"
242dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "nop"                      "\n\t"
243dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "exit_0:"                  "\n\t"
244dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "move   %0, $zero"         "\n\t"
245dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "exit:"                    "\n\t"
246dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      : /*out*/ "=r"(success)
247dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      : /*in*/ "r"(&block[0])
248dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      : /*trash*/ "t0", "t1", "t2", "t3", "memory"
249dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj   );
250dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj
251dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj   assert(success == 0 || success == 1);
252dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj   return success;
253dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj}
254dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj
255dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj#elif defined(VGA_mips64)
256dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj
257dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj// mips64
258dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj/* return 1 if success, 0 if failure */
259dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarjUWord do_acasW ( UWord* addr, UWord expected, UWord nyu )
260dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj{
261751c1e6e1657cbafe30baaf36ad9bd011c3a179bdejanj  UWord success;
262dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj  UWord block[3] = { (UWord)addr, nyu, expected};
263dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj
264dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj   __asm__ __volatile__(
265dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      ".set noreorder"           "\n\t"
266dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "ld     $t0, 0(%1)"        "\n\t"
267dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "ld     $t2, 16(%1)"       "\n\t"
268dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "ld     $t3, 8(%1)"        "\n\t"
269dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "ll     $t1, 0($t0)"       "\n\t"
270dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "bne    $t1, $t2, exit_0"  "\n\t"
271dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "nop"                      "\n\t"
2725db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "sc     $t3, 0($t0)"       "\n\t"
2735db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "move   %0, $t3"           "\n\t"
2745db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "b exit"                   "\n\t"
2755db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "nop"                      "\n\t"
2765db15403e889d4db339b342bc2a824ef0bfaa654sewardj      "exit_0:"                  "\n\t"
277dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "move   %0, $zero"         "\n\t"
278dd6bf60fc451dced6dfa5f5d305061d66f0b13a3petarj      "exit:"                    "\n\t"
2795db15403e889d4db339b342bc2a824ef0bfaa654sewardj      : /*out*/ "=r"(success)
2805db15403e889d4db339b342bc2a824ef0bfaa654sewardj      : /*in*/ "r"(&block[0])
2815db15403e889d4db339b342bc2a824ef0bfaa654sewardj      : /*trash*/ "t0", "t1", "t2", "t3", "memory"
2825db15403e889d4db339b342bc2a824ef0bfaa654sewardj   );
2835db15403e889d4db339b342bc2a824ef0bfaa654sewardj
2845db15403e889d4db339b342bc2a824ef0bfaa654sewardj   assert(success == 0 || success == 1);
2855db15403e889d4db339b342bc2a824ef0bfaa654sewardj   return success;
2865db15403e889d4db339b342bc2a824ef0bfaa654sewardj}
2875db15403e889d4db339b342bc2a824ef0bfaa654sewardj
288112711afefcfcd43680c7c4aa8d38ef180e8811esewardj#elif defined(VGA_tilegx)
289112711afefcfcd43680c7c4aa8d38ef180e8811esewardj
290112711afefcfcd43680c7c4aa8d38ef180e8811esewardj/* return 1 if success, 0 if failure */
291112711afefcfcd43680c7c4aa8d38ef180e8811esewardjUWord do_acasW(UWord* addr, UWord expected, UWord nyu )
292112711afefcfcd43680c7c4aa8d38ef180e8811esewardj{
293112711afefcfcd43680c7c4aa8d38ef180e8811esewardj  /* Load the compare value into special register 0x2780 */
294112711afefcfcd43680c7c4aa8d38ef180e8811esewardj  __insn_mtspr(0x2780, expected);
295112711afefcfcd43680c7c4aa8d38ef180e8811esewardj  return __insn_cmpexch(addr, nyu);
296112711afefcfcd43680c7c4aa8d38ef180e8811esewardj}
297112711afefcfcd43680c7c4aa8d38ef180e8811esewardj
2986b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#endif
2996b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3006b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid atomic_incW ( UWord* w )
3016b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3026b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   while (1) {
3036b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      UWord old = *w;
3046b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      UWord nyu = old + 1;
3056b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      UWord ok = do_acasW( w, old, nyu );
3066b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj      if (ok) break;
3076b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   };
3086b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3096b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3106b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#if 0
3116b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3126b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#define NNN 1000000
3136b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3146b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid* thread_fn ( void* arg )
3156b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3166b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord* w = (UWord*)arg;
3176b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  int i;
3186b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  for (i = 0; i < NNN; i++)
3196b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj     atomic_incW( w );
3206b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  return NULL;
3216b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3226b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3236b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3246b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjint main ( void )
3256b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3266b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   int r;
3276b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  //ANNOTATE_HAPPENS_BEFORE(0);
3286b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  //return 0;
3296b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   UWord w = 0;
3306b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  pthread_t t1, t2;
3316b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3326b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  r= pthread_create( &t1, NULL, &thread_fn, (void*)&w );   assert(!r);
3336b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  r= pthread_create( &t2, NULL, &thread_fn, (void*)&w );   assert(!r);
3346b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3356b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  r= pthread_join( t1, NULL );   assert(!r);
3366b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  r= pthread_join( t2, NULL );   assert(!r);
3376b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3386b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  printf("result = %lu\n", w );
3396b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  return 0;
3406b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3416b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3426b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj#endif
3436b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3446b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjint shared_var = 0;  // is not raced upon
3456b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3466b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3479c85587701933e5ff96335e043ce3b6eefeee9c8philippevoid delayXms ( int i )
3486b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3499c85587701933e5ff96335e043ce3b6eefeee9c8philippe   struct timespec ts = { 0, 1 * 1000 * 1000 };
3509c85587701933e5ff96335e043ce3b6eefeee9c8philippe   // We do the sleep in small pieces to have scheduling
3519c85587701933e5ff96335e043ce3b6eefeee9c8philippe   // events ensuring a fair switch between threads, even
3529c85587701933e5ff96335e043ce3b6eefeee9c8philippe   // without --fair-sched=yes. This is a.o. needed for
3539c85587701933e5ff96335e043ce3b6eefeee9c8philippe   // running this test under an outer helgrind or an outer
3549c85587701933e5ff96335e043ce3b6eefeee9c8philippe   // sgcheck.
3559c85587701933e5ff96335e043ce3b6eefeee9c8philippe   while (i > 0) {
3569c85587701933e5ff96335e043ce3b6eefeee9c8philippe      nanosleep(&ts, NULL);
3579c85587701933e5ff96335e043ce3b6eefeee9c8philippe      i--;
3589c85587701933e5ff96335e043ce3b6eefeee9c8philippe   }
3596b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3606b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3616b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid do_wait ( UWord* w )
3626b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3636b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord w0 = *w;
3646b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord volatile * wV = w;
3659eff0d47c57ce4c91db43cd693cc3d86e7c35f8dflorian  while (*wV == w0)
3669c85587701933e5ff96335e043ce3b6eefeee9c8philippe    delayXms(1); // small sleeps, ensuring context switches
3676b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  ANNOTATE_HAPPENS_AFTER(w);
3686b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3696b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3706b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid do_signal ( UWord* w )
3716b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3726b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  ANNOTATE_HAPPENS_BEFORE(w);
3736b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  atomic_incW(w);
3746b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3756b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3766b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3776b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3786b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid* thread_fn1 ( void* arg )
3796b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3806b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord* w = (UWord*)arg;
3819c85587701933e5ff96335e043ce3b6eefeee9c8philippe  delayXms(500);    // ensure t2 gets to its wait first
3826b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  shared_var = 1;  // first access
3836b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  do_signal(w);    // cause h-b edge to second thread
3846b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3859c85587701933e5ff96335e043ce3b6eefeee9c8philippe  delayXms(500);
3866b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  return NULL;
3876b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3886b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3896b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjvoid* thread_fn2 ( void* arg )
3906b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
3916b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  UWord* w = (UWord*)arg;
3926b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  do_wait(w);      // wait for h-b edge from first thread
3936b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  shared_var = 2;  // second access
3946b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3959c85587701933e5ff96335e043ce3b6eefeee9c8philippe  delayXms(500);
3966b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj  return NULL;
3976b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
3986b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
3996b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4006b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4016b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4026b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4036b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4046b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardjint main ( void )
4056b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj{
4066b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   int r;
4076b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   UWord w = 0;
4086b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   pthread_t t1, t2;
4096b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4106b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   r= pthread_create( &t1, NULL, &thread_fn1, (void*)&w );   assert(!r);
4116b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   r= pthread_create( &t2, NULL, &thread_fn2, (void*)&w );   assert(!r);
4126b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj
4136b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   r= pthread_join( t1, NULL );   assert(!r);
4146b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   r= pthread_join( t2, NULL );   assert(!r);
4156b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj   return 0;
4166b45cf2969d53575947a21fd7f64b5f37ef1ffbasewardj}
417