atomic_incs.c revision 663860b1408516d02ebfcb3a9999a134e6cfb223
1
2/* This is an example of a program which does atomic memory operations
3   between two processes which share a page.  Valgrind 3.4.1 and
4   earlier produce incorrect answers because it does not preserve
5   atomicity of the relevant instructions in the generated code; but
6   the post-DCAS-merge versions of Valgrind do behave correctly. */
7
8/* On ARM, this can be compiled into either ARM or Thumb code, so as
9   to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10   it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11   does on any other platform. */
12
13#include <stdlib.h>
14#include <stdio.h>
15#include <string.h>
16#include <assert.h>
17#include <unistd.h>
18#include <sys/wait.h>
19#include "tests/sys_mman.h"
20
21#define NNN 3456987
22
23#define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24
25
26__attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27{
28#if defined(VGA_x86)
29   unsigned long block[2];
30   block[0] = (unsigned long)p;
31   block[1] = n;
32   __asm__ __volatile__(
33      "movl 0(%%esi),%%eax"      "\n\t"
34      "movl 4(%%esi),%%ebx"      "\n\t"
35      "lock; addb %%bl,(%%eax)"  "\n"
36      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37   );
38#elif defined(VGA_amd64)
39   unsigned long block[2];
40   block[0] = (unsigned long)p;
41   block[1] = n;
42   __asm__ __volatile__(
43      "movq 0(%%rsi),%%rax"      "\n\t"
44      "movq 8(%%rsi),%%rbx"      "\n\t"
45      "lock; addb %%bl,(%%rax)"  "\n"
46      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47   );
48#elif defined(VGA_ppc32)
49   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50      is 4-aligned -- guaranteed by caller. */
51   unsigned long success;
52   do {
53      __asm__ __volatile__(
54         "lwarx  15,0,%1"    "\n\t"
55         "add    15,15,%2"   "\n\t"
56         "stwcx. 15,0,%1"    "\n\t"
57         "mfcr   %0"         "\n\t"
58         "srwi   %0,%0,29"   "\n\t"
59         "andi.  %0,%0,1"    "\n"
60         : /*out*/"=b"(success)
61         : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62         : /*trash*/ "memory", "cc", "r15"
63      );
64   } while (success != 1);
65#elif defined(VGA_ppc64)
66   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67      is 8-aligned -- guaranteed by caller. */
68   unsigned long success;
69   do {
70      __asm__ __volatile__(
71         "ldarx  15,0,%1"    "\n\t"
72         "add    15,15,%2"   "\n\t"
73         "stdcx. 15,0,%1"    "\n\t"
74         "mfcr   %0"         "\n\t"
75         "srwi   %0,%0,29"   "\n\t"
76         "andi.  %0,%0,1"    "\n"
77         : /*out*/"=b"(success)
78         : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79         : /*trash*/ "memory", "cc", "r15"
80      );
81   } while (success != 1);
82#elif defined(VGA_arm)
83   unsigned int block[3]
84      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85   do {
86      __asm__ __volatile__(
87         "mov    r5, %0"         "\n\t"
88         "ldr    r9, [r5, #0]"   "\n\t" // p
89         "ldr    r10, [r5, #4]"  "\n\t" // n
90         "ldrexb r8, [r9]"       "\n\t"
91         "add    r8, r8, r10"    "\n\t"
92         "strexb r4, r8, [r9]"   "\n\t"
93         "str    r4, [r5, #8]"   "\n\t"
94         : /*out*/
95         : /*in*/ "r"(&block[0])
96         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97      );
98   } while (block[2] != 0);
99#elif defined(VGA_s390x)
100   int dummy;
101   __asm__ __volatile__(
102      "   l	0,%0\n\t"
103      "0: st	0,%1\n\t"
104      "   icm	1,1,%1\n\t"
105      "   ar	1,%2\n\t"
106      "   stcm  1,1,%1\n\t"
107      "   l     1,%1\n\t"
108      "   cs	0,1,%0\n\t"
109      "   jl    0b\n\t"
110      : "+m" (*p), "+m" (dummy)
111      : "d" (n)
112      : "cc", "memory", "0", "1");
113#elif defined(VGA_mips32)
114#if defined (_MIPSEL)
115   unsigned int block[3]
116      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
117   do {
118      __asm__ __volatile__(
119         "move   $t0, %0"         "\n\t"
120         "lw   $t1, 0($t0)"       "\n\t" // p
121         "lw   $t2, 4($t0)"       "\n\t" // n
122         "ll   $t3, 0($t1)"       "\n\t"
123         "addu   $t3, $t3, $t2"   "\n\t"
124         "andi   $t3, $t3, 0xFF"  "\n\t"
125         "sc   $t3, 0($t1)"       "\n\t"
126         "sw $t3, 8($t0)"         "\n\t"
127         : /*out*/
128         : /*in*/ "r"(&block[0])
129         : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
130      );
131   } while (block[2] != 1);
132#elif defined (_MIPSEB)
133   unsigned int block[3]
134      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
135   do {
136      __asm__ __volatile__(
137         "move   $t0, %0"               "\n\t"
138         "lw   $t1, 0($t0)"             "\n\t" // p
139         "lw   $t2, 4($t0)"             "\n\t" // n
140         "li   $t4, 0x000000FF"         "\n\t"
141         "ll   $t3, 0($t1)"             "\n\t"
142         "addu $t3, $t3, $t2"           "\n\t"
143         "and  $t3, $t3, $t4"           "\n\t"
144         "wsbh $t4, $t3"                "\n\t"
145         "rotr $t4, $t4, 16"            "\n\t"
146         "or   $t3, $t4, $t3"           "\n\t"
147         "sc   $t3, 0($t1)"             "\n\t"
148         "sw $t3, 8($t0)"               "\n\t"
149         : /*out*/
150         : /*in*/ "r"(&block[0])
151         : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3", "t4"
152      );
153   } while (block[2] != 1);
154#endif
155#else
156# error "Unsupported arch"
157#endif
158}
159
160
161__attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
162{
163#if defined(VGA_x86)
164   unsigned long block[2];
165   block[0] = (unsigned long)p;
166   block[1] = n;
167   __asm__ __volatile__(
168      "movl 0(%%esi),%%eax"      "\n\t"
169      "movl 4(%%esi),%%ebx"      "\n\t"
170      "lock; addw %%bx,(%%eax)"  "\n"
171      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
172   );
173#elif defined(VGA_amd64)
174   unsigned long block[2];
175   block[0] = (unsigned long)p;
176   block[1] = n;
177   __asm__ __volatile__(
178      "movq 0(%%rsi),%%rax"      "\n\t"
179      "movq 8(%%rsi),%%rbx"      "\n\t"
180      "lock; addw %%bx,(%%rax)"  "\n"
181      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
182   );
183#elif defined(VGA_ppc32)
184   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
185      is 8-aligned -- guaranteed by caller. */
186   unsigned long success;
187   do {
188      __asm__ __volatile__(
189         "lwarx  15,0,%1"    "\n\t"
190         "add    15,15,%2"   "\n\t"
191         "stwcx. 15,0,%1"    "\n\t"
192         "mfcr   %0"         "\n\t"
193         "srwi   %0,%0,29"   "\n\t"
194         "andi.  %0,%0,1"    "\n"
195         : /*out*/"=b"(success)
196         : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
197         : /*trash*/ "memory", "cc", "r15"
198      );
199   } while (success != 1);
200#elif defined(VGA_ppc64)
201   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
202      is 8-aligned -- guaranteed by caller. */
203   unsigned long success;
204   do {
205      __asm__ __volatile__(
206         "ldarx  15,0,%1"    "\n\t"
207         "add    15,15,%2"   "\n\t"
208         "stdcx. 15,0,%1"    "\n\t"
209         "mfcr   %0"         "\n\t"
210         "srwi   %0,%0,29"   "\n\t"
211         "andi.  %0,%0,1"    "\n"
212         : /*out*/"=b"(success)
213         : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
214         : /*trash*/ "memory", "cc", "r15"
215      );
216   } while (success != 1);
217#elif defined(VGA_arm)
218   unsigned int block[3]
219      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
220   do {
221      __asm__ __volatile__(
222         "mov    r5, %0"         "\n\t"
223         "ldr    r9, [r5, #0]"   "\n\t" // p
224         "ldr    r10, [r5, #4]"  "\n\t" // n
225         "ldrexh r8, [r9]"       "\n\t"
226         "add    r8, r8, r10"    "\n\t"
227         "strexh r4, r8, [r9]"   "\n\t"
228         "str    r4, [r5, #8]"   "\n\t"
229         : /*out*/
230         : /*in*/ "r"(&block[0])
231         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
232      );
233   } while (block[2] != 0);
234#elif defined(VGA_s390x)
235   int dummy;
236   __asm__ __volatile__(
237      "   l	0,%0\n\t"
238      "0: st	0,%1\n\t"
239      "   icm	1,3,%1\n\t"
240      "   ar	1,%2\n\t"
241      "   stcm  1,3,%1\n\t"
242      "   l     1,%1\n\t"
243      "   cs	0,1,%0\n\t"
244      "   jl    0b\n\t"
245      : "+m" (*p), "+m" (dummy)
246      : "d" (n)
247      : "cc", "memory", "0", "1");
248#elif defined(VGA_mips32)
249#if defined (_MIPSEL)
250   unsigned int block[3]
251      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
252   do {
253      __asm__ __volatile__(
254         "move   $t0, %0"         "\n\t"
255         "lw   $t1, 0($t0)"       "\n\t" // p
256         "lw   $t2, 4($t0)"       "\n\t" // n
257         "ll   $t3, 0($t1)"       "\n\t"
258         "addu   $t3, $t3, $t2"   "\n\t"
259         "andi   $t3, $t3, 0xFFFF"  "\n\t"
260         "sc   $t3, 0($t1)"       "\n\t"
261         "sw $t3, 8($t0)"         "\n\t"
262         : /*out*/
263         : /*in*/ "r"(&block[0])
264         : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
265      );
266   } while (block[2] != 1);
267#elif defined (_MIPSEB)
268   unsigned int block[3]
269      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
270   do {
271      __asm__ __volatile__(
272         "move   $t0, %0"         "\n\t"
273         "lw   $t1, 0($t0)"       "\n\t" // p
274         "li   $t2, 32694"        "\n\t" // n
275         "li   $t3, 0x1"          "\n\t"
276         "sll  $t2, $t2, 16"      "\n\t"
277         "sw   $t2, 0($t1)"       "\n\t"
278         "sw $t3, 8($t0)"         "\n\t"
279         : /*out*/
280         : /*in*/ "r"(&block[0])
281         : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
282      );
283   } while (block[2] != 1);
284#endif
285#else
286# error "Unsupported arch"
287#endif
288}
289
290__attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
291{
292#if defined(VGA_x86)
293   unsigned long block[2];
294   block[0] = (unsigned long)p;
295   block[1] = n;
296   __asm__ __volatile__(
297      "movl 0(%%esi),%%eax"       "\n\t"
298      "movl 4(%%esi),%%ebx"       "\n\t"
299      "lock; addl %%ebx,(%%eax)"  "\n"
300      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
301   );
302#elif defined(VGA_amd64)
303   unsigned long block[2];
304   block[0] = (unsigned long)p;
305   block[1] = n;
306   __asm__ __volatile__(
307      "movq 0(%%rsi),%%rax"       "\n\t"
308      "movq 8(%%rsi),%%rbx"       "\n\t"
309      "lock; addl %%ebx,(%%rax)"  "\n"
310      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
311   );
312#elif defined(VGA_ppc32)
313   unsigned long success;
314   do {
315      __asm__ __volatile__(
316         "lwarx  15,0,%1"    "\n\t"
317         "add    15,15,%2"   "\n\t"
318         "stwcx. 15,0,%1"    "\n\t"
319         "mfcr   %0"         "\n\t"
320         "srwi   %0,%0,29"   "\n\t"
321         "andi.  %0,%0,1"    "\n"
322         : /*out*/"=b"(success)
323         : /*in*/ "b"(p), "b"(n)
324         : /*trash*/ "memory", "cc", "r15"
325      );
326   } while (success != 1);
327#elif defined(VGA_ppc64)
328   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
329      is 8-aligned -- guaranteed by caller. */
330   unsigned long success;
331   do {
332      __asm__ __volatile__(
333         "ldarx  15,0,%1"    "\n\t"
334         "add    15,15,%2"   "\n\t"
335         "stdcx. 15,0,%1"    "\n\t"
336         "mfcr   %0"         "\n\t"
337         "srwi   %0,%0,29"   "\n\t"
338         "andi.  %0,%0,1"    "\n"
339         : /*out*/"=b"(success)
340         : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
341         : /*trash*/ "memory", "cc", "r15"
342      );
343   } while (success != 1);
344#elif defined(VGA_arm)
345   unsigned int block[3]
346      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
347   do {
348      __asm__ __volatile__(
349         "mov   r5, %0"         "\n\t"
350         "ldr   r9, [r5, #0]"   "\n\t" // p
351         "ldr   r10, [r5, #4]"  "\n\t" // n
352         "ldrex r8, [r9]"       "\n\t"
353         "add   r8, r8, r10"    "\n\t"
354         "strex r4, r8, [r9]"   "\n\t"
355         "str   r4, [r5, #8]"   "\n\t"
356         : /*out*/
357         : /*in*/ "r"(&block[0])
358         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
359      );
360   } while (block[2] != 0);
361#elif defined(VGA_s390x)
362   __asm__ __volatile__(
363      "   l	0,%0\n\t"
364      "0: lr	1,0\n\t"
365      "   ar	1,%1\n\t"
366      "   cs	0,1,%0\n\t"
367      "   jl    0b\n\t"
368      : "+m" (*p)
369      : "d" (n)
370      : "cc", "memory", "0", "1");
371#elif defined(VGA_mips32)
372   unsigned int block[3]
373      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
374   do {
375      __asm__ __volatile__(
376         "move   $t0, %0"         "\n\t"
377         "lw   $t1, 0($t0)"       "\n\t" // p
378         "lw   $t2, 4($t0)"       "\n\t" // n
379         "ll   $t3, 0($t1)"       "\n\t"
380         "addu   $t3, $t3, $t2"   "\n\t"
381         "sc   $t3, 0($t1)"       "\n\t"
382         "sw $t3, 8($t0)"         "\n\t"
383         : /*out*/
384         : /*in*/ "r"(&block[0])
385         : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
386      );
387   } while (block[2] != 1);
388#else
389# error "Unsupported arch"
390#endif
391}
392
393__attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
394{
395#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
396   /* do nothing; is not supported */
397#elif defined(VGA_amd64)
398   // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
399   // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
400   unsigned long long int block[2];
401   block[0] = (unsigned long long int)(unsigned long)p;
402   block[1] = n;
403   __asm__ __volatile__(
404      "movq 0(%%rsi),%%rax"      "\n\t"
405      "movq 8(%%rsi),%%rbx"      "\n\t"
406      "lock; addq %%rbx,(%%rax)" "\n"
407      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
408   );
409#elif defined(VGA_ppc64)
410   unsigned long success;
411   do {
412      __asm__ __volatile__(
413         "ldarx  15,0,%1"    "\n\t"
414         "add    15,15,%2"   "\n\t"
415         "stdcx. 15,0,%1"    "\n\t"
416         "mfcr   %0"         "\n\t"
417         "srwi   %0,%0,29"   "\n\t"
418         "andi.  %0,%0,1"    "\n"
419         : /*out*/"=b"(success)
420         : /*in*/ "b"(p), "b"(n)
421         : /*trash*/ "memory", "cc", "r15"
422      );
423   } while (success != 1);
424#elif defined(VGA_arm)
425   unsigned long long int block[3]
426     = { (unsigned long long int)(unsigned long)p,
427         (unsigned long long int)n,
428         0xFFFFFFFFFFFFFFFFULL };
429   do {
430      __asm__ __volatile__(
431         "mov    r5, %0"             "\n\t"
432         "ldr    r8,     [r5, #0]"   "\n\t" // p
433         "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
434         "ldrexd r0, r1, [r8]"       "\n\t"
435         "adds   r2, r2, r0"         "\n\t"
436         "adc    r3, r3, r1"         "\n\t"
437         "strexd r1, r2, r3, [r8]"   "\n\t"
438         "str    r1, [r5, #16]"      "\n\t"
439         : /*out*/
440         : /*in*/ "r"(&block[0])
441         : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
442      );
443   } while (block[2] != 0xFFFFFFFF00000000ULL);
444#elif defined(VGA_s390x)
445   __asm__ __volatile__(
446      "   lg	0,%0\n\t"
447      "0: lgr	1,0\n\t"
448      "   agr	1,%1\n\t"
449      "   csg	0,1,%0\n\t"
450      "   jl    0b\n\t"
451      : "+m" (*p)
452      : "d" (n)
453      : "cc", "memory", "0", "1");
454#else
455# error "Unsupported arch"
456#endif
457}
458
459int main ( int argc, char** argv )
460{
461   int    i, status;
462   char*  page;
463   char*  p8;
464   short* p16;
465   int*   p32;
466   long long int* p64;
467   pid_t  child, p2;
468
469   printf("parent, pre-fork\n");
470
471   page = mmap( 0, sysconf(_SC_PAGESIZE),
472                   PROT_READ|PROT_WRITE,
473                   MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
474   if (page == MAP_FAILED) {
475      perror("mmap failed");
476      exit(1);
477   }
478
479   p8  = (char*)(page+0);
480   p16 = (short*)(page+256);
481   p32 = (int*)(page+512);
482   p64 = (long long int*)(page+768);
483
484   assert( IS_8_ALIGNED(p8) );
485   assert( IS_8_ALIGNED(p16) );
486   assert( IS_8_ALIGNED(p32) );
487   assert( IS_8_ALIGNED(p64) );
488
489   memset(page, 0, 1024);
490
491   *p8  = 0;
492   *p16 = 0;
493   *p32 = 0;
494   *p64 = 0;
495
496   child = fork();
497   if (child == -1) {
498      perror("fork() failed\n");
499      return 1;
500   }
501
502   if (child == 0) {
503      /* --- CHILD --- */
504      printf("child\n");
505      for (i = 0; i < NNN; i++) {
506         atomic_add_8bit(p8, 1);
507         atomic_add_16bit(p16, 1);
508         atomic_add_32bit(p32, 1);
509         atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
510      }
511      return 1;
512      /* NOTREACHED */
513
514   }
515
516   /* --- PARENT --- */
517
518   printf("parent\n");
519
520   for (i = 0; i < NNN; i++) {
521      atomic_add_8bit(p8, 1);
522      atomic_add_16bit(p16, 1);
523      atomic_add_32bit(p32, 1);
524      atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
525   }
526
527   p2 = waitpid(child, &status, 0);
528   assert(p2 == child);
529
530   /* assert that child finished normally */
531   assert(WIFEXITED(status));
532
533   printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
534          (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
535
536   if (-74 == (int)(*(signed char*)p8)
537       && 32694 == (int)(*p16)
538       && 6913974 == *p32
539       && (0LL == *p64 || 682858642110LL == *p64)) {
540      printf("PASS\n");
541   } else {
542      printf("FAIL -- see source code for expected values\n");
543   }
544
545   printf("parent exits\n");
546
547   return 0;
548}
549