1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This is an example of a program which does atomic memory operations
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   between two processes which share a page.  Valgrind 3.4.1 and
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   earlier produce incorrect answers because it does not preserve
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   atomicity of the relevant instructions in the generated code; but
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   the post-DCAS-merge versions of Valgrind do behave correctly. */
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
8b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov/* On ARM, this can be compiled into either ARM or Thumb code, so as
9b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   does on any other platform. */
12b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdlib.h>
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdio.h>
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <string.h>
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <assert.h>
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <unistd.h>
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <sys/wait.h>
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "tests/sys_mman.h"
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define NNN 3456987
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_x86)
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 0(%%esi),%%eax"      "\n\t"
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 4(%%esi),%%ebx"      "\n\t"
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addb %%bl,(%%eax)"  "\n"
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_amd64)
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 0(%%rsi),%%rax"      "\n\t"
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 8(%%rsi),%%rbx"      "\n\t"
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addb %%bl,(%%rax)"  "\n"
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc32)
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is 4-aligned -- guaranteed by caller. */
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "lwarx  15,0,%1"    "\n\t"
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stwcx. 15,0,%1"    "\n\t"
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc64)
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is 8-aligned -- guaranteed by caller. */
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldarx  15,0,%1"    "\n\t"
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stdcx. 15,0,%1"    "\n\t"
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_arm)
83b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   unsigned int block[3]
84b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   do {
86b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      __asm__ __volatile__(
87b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "mov    r5, %0"         "\n\t"
88b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldr    r9, [r5, #0]"   "\n\t" // p
89b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldr    r10, [r5, #4]"  "\n\t" // n
90b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldrexb r8, [r9]"       "\n\t"
91b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "add    r8, r8, r10"    "\n\t"
92b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "strexb r4, r8, [r9]"   "\n\t"
93b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "str    r4, [r5, #8]"   "\n\t"
94b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*out*/
95b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*in*/ "r"(&block[0])
96b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      );
98b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   } while (block[2] != 0);
99436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_arm64)
100436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long long int block[3]
101436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long long int)p, (unsigned long long int)n,
102436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov          0xFFFFFFFFFFFFFFFFULL};
103436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
104436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
105436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "mov   x5, %0"         "\n\t"
106436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x9, [x5, #0]"   "\n\t" // p
107436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x10, [x5, #8]"  "\n\t" // n
108436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldxrb w8, [x9]"       "\n\t"
109436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "add   x8, x8, x10"    "\n\t"
110436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "stxrb w4, w8, [x9]"    "\n\t"
111436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "str   x4, [x5, #16]"   "\n\t"
112436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
113436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
114436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
115436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
116436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 0);
117b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x)
118b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   int dummy;
119b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   __asm__ __volatile__(
120b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   l	0,%0\n\t"
121b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "0: st	0,%1\n\t"
122b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   icm	1,1,%1\n\t"
123b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   ar	1,%2\n\t"
124b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   stcm  1,1,%1\n\t"
125b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   l     1,%1\n\t"
126b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   cs	0,1,%0\n\t"
127b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   jl    0b\n\t"
128b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "+m" (*p), "+m" (dummy)
129b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "d" (n)
130b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "cc", "memory", "0", "1");
131663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#elif defined(VGA_mips32)
132436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
133436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      exception that can cause this function to fail. */
134663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if defined (_MIPSEL)
135663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned int block[3]
136436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned int)p, (unsigned int)n, 0x0 };
137663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   do {
138663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __asm__ __volatile__(
139436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"           "\n\t"
140436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t1, 0($t0)"       "\n\t"  // p
141436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t2, 4($t0)"       "\n\t"  // n
142436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
143436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "li   $t4, 0xFF"         "\n\t"
144436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFFFF00
145436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
146436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFFFF00
147436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
148436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
149436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
150663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         "sc   $t3, 0($t1)"       "\n\t"
151436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 8($t0)"       "\n\t"  // save result
152663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*out*/
153663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*in*/ "r"(&block[0])
154436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
155663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
156663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } while (block[2] != 1);
157663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#elif defined (_MIPSEB)
158663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned int block[3]
159436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
160663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   do {
161663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __asm__ __volatile__(
162436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"          "\n\t"
163436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t1, 0($t0)"      "\n\t"  // p
164436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t2, 4($t0)"      "\n\t"  // n
165436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"      "\n\t"
166436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"    "\n\t"
167436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc   $t3, 0($t1)"      "\n\t"
168436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 8($t0)"      "\n\t"
169663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*out*/
170663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*in*/ "r"(&block[0])
171436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
172436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
173436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
174436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif
175436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_mips64)
176436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
177436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      exception that can cause this function to fail. */
178436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#if defined (_MIPSEL)
179436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
180436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n, 0x0ULL };
181436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
182436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
183436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"           "\n\t"
184436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld   $t1, 0($t0)"       "\n\t"  // p
185436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld   $t2, 8($t0)"       "\n\t"  // n
186436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
187436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "li   $s0, 0xFF"         "\n\t"
188436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "nor  $s0, $s0, $zero"   "\n\t"  // $s0 = 0xFFFFFF00
189436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
190436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFFFF00
191436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
192436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
193436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
194436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc   $t3, 0($t1)"       "\n\t"
195436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 16($t0)"      "\n\t"  // save result
196436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
197436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
198436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
199436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
200436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
201436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined (_MIPSEB)
202436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
203436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
204436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
205436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
206436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move  $t0, %0"          "\n\t"
207436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t1, 0($t0)"      "\n\t"  // p
208436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t2, 8($t0)"      "\n\t"  // n
209436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lld   $t3, 0($t1)"      "\n\t"
210436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "daddu $t3, $t3, $t2"    "\n\t"
211436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "scd   $t3, 0($t1)"      "\n\t"
212436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sd    $t3, 16($t0)"     "\n\t"
213436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
214436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
215436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
216663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
217663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } while (block[2] != 1);
218663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif
219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else
220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# error "Unsupported arch"
221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif
222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_x86)
228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 0(%%esi),%%eax"      "\n\t"
233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 4(%%esi),%%ebx"      "\n\t"
234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addw %%bx,(%%eax)"  "\n"
235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_amd64)
238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 0(%%rsi),%%rax"      "\n\t"
243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 8(%%rsi),%%rbx"      "\n\t"
244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addw %%bx,(%%rax)"  "\n"
245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc32)
248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is 8-aligned -- guaranteed by caller. */
250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "lwarx  15,0,%1"    "\n\t"
254ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
255ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stwcx. 15,0,%1"    "\n\t"
256ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
257ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
258ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
259ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
260ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
261ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
262ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
263ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc64)
265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is 8-aligned -- guaranteed by caller. */
267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
268ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
269ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
270ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldarx  15,0,%1"    "\n\t"
271ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
272ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stdcx. 15,0,%1"    "\n\t"
273ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
274ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
275ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
276ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
277ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
278ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
279ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
280ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
281ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_arm)
282b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   unsigned int block[3]
283b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
284b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   do {
285b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      __asm__ __volatile__(
286b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "mov    r5, %0"         "\n\t"
287b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldr    r9, [r5, #0]"   "\n\t" // p
288b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldr    r10, [r5, #4]"  "\n\t" // n
289b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldrexh r8, [r9]"       "\n\t"
290b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "add    r8, r8, r10"    "\n\t"
291b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "strexh r4, r8, [r9]"   "\n\t"
292b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "str    r4, [r5, #8]"   "\n\t"
293b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*out*/
294b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*in*/ "r"(&block[0])
295b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
296b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      );
297b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   } while (block[2] != 0);
298436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_arm64)
299436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long long int block[3]
300436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long long int)p, (unsigned long long int)n,
301436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov          0xFFFFFFFFFFFFFFFFULL};
302436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
303436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
304436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "mov   x5, %0"         "\n\t"
305436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x9, [x5, #0]"   "\n\t" // p
306436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x10, [x5, #8]"  "\n\t" // n
307436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldxrh w8, [x9]"       "\n\t"
308436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "add   x8, x8, x10"    "\n\t"
309436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "stxrh w4, w8, [x9]"    "\n\t"
310436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "str   x4, [x5, #16]"   "\n\t"
311436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
312436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
313436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
314436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
315436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 0);
316b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x)
317b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   int dummy;
318b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   __asm__ __volatile__(
319b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   l	0,%0\n\t"
320b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "0: st	0,%1\n\t"
321b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   icm	1,3,%1\n\t"
322b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   ar	1,%2\n\t"
323b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   stcm  1,3,%1\n\t"
324b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   l     1,%1\n\t"
325b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   cs	0,1,%0\n\t"
326b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   jl    0b\n\t"
327b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "+m" (*p), "+m" (dummy)
328b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "d" (n)
329b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "cc", "memory", "0", "1");
330663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#elif defined(VGA_mips32)
331436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
332436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      exception that can cause this function to fail. */
333663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if defined (_MIPSEL)
334663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned int block[3]
335436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned int)p, (unsigned int)n, 0x0 };
336663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   do {
337663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __asm__ __volatile__(
338436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"           "\n\t"
339436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t1, 0($t0)"       "\n\t"  // p
340436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t2, 4($t0)"       "\n\t"  // n
341436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
342436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "li   $t4, 0xFFFF"       "\n\t"
343436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFF0000
344436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
345436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFF0000
346436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
347436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
348436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
349663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         "sc   $t3, 0($t1)"       "\n\t"
350436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 8($t0)"       "\n\t"  // save result
351663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*out*/
352663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*in*/ "r"(&block[0])
353436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
354663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
355663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } while (block[2] != 1);
356663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#elif defined (_MIPSEB)
357663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned int block[3]
358436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
359436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
360436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
361436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"          "\n\t"
362436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t1, 0($t0)"      "\n\t"  // p
363436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t2, 4($t0)"      "\n\t"  // n
364436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"      "\n\t"
365436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"    "\n\t"
366436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc   $t3, 0($t1)"      "\n\t"
367436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 8($t0)"      "\n\t"
368436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
369436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
370436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
371436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
372436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
373436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#endif
374436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_mips64)
375436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
376436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      exception that can cause this function to fail. */
377436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#if defined (_MIPSEL)
378436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
379436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n, 0x0ULL };
380436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
381436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
382436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"           "\n\t"
383436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld   $t1, 0($t0)"       "\n\t"  // p
384436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld   $t2, 8($t0)"       "\n\t"  // n
385436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
386436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "li   $s0, 0xFFFF"       "\n\t"
387436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "nor  $s0, $s0, $zero"   "\n\t"  // $s0= 0xFFFF0000
388436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
389436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFF0000
390436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
391436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
392436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
393436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc   $t3, 0($t1)"       "\n\t"
394436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 16($t0)"      "\n\t"  // save result
395436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
396436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
397436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
398436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
399436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
400436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined (_MIPSEB)
401436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
402436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
403663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   do {
404663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __asm__ __volatile__(
405436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move  $t0, %0"          "\n\t"
406436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t1, 0($t0)"      "\n\t"  // p
407436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t2, 8($t0)"      "\n\t"  // n
408436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lld   $t3, 0($t1)"      "\n\t"
409436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "daddu $t3, $t3, $t2"    "\n\t"
410436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "scd   $t3, 0($t1)"      "\n\t"
411436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sd    $t3, 16($t0)"     "\n\t"
412663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*out*/
413663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*in*/ "r"(&block[0])
414436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
415663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
416663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } while (block[2] != 1);
417663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#endif
418ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else
419ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# error "Unsupported arch"
420ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif
421ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
422ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
423ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
424ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
425ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_x86)
426ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
427ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
428ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
429ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
430ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 0(%%esi),%%eax"       "\n\t"
431ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movl 4(%%esi),%%ebx"       "\n\t"
432ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addl %%ebx,(%%eax)"  "\n"
433ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
434ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
435ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_amd64)
436ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long block[2];
437ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long)p;
438ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
439ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
440ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 0(%%rsi),%%rax"       "\n\t"
441ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 8(%%rsi),%%rbx"       "\n\t"
442ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addl %%ebx,(%%rax)"  "\n"
443ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
444ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
445ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc32)
446ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
447ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
448ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
449ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "lwarx  15,0,%1"    "\n\t"
450ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
451ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stwcx. 15,0,%1"    "\n\t"
452ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
453ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
454ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
455ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
456ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(n)
457ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
458ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
459ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
460ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc64)
461ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Nasty hack.  Does correctly atomically do *p += n, but only if p
462ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      is 8-aligned -- guaranteed by caller. */
463ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
464ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
465ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
466ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldarx  15,0,%1"    "\n\t"
467ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
468ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stdcx. 15,0,%1"    "\n\t"
469ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
470ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
471ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
472ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
473ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
474ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
475ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
476ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
477ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_arm)
478ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned int block[3]
479ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
480ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
481ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
482ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mov   r5, %0"         "\n\t"
483ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldr   r9, [r5, #0]"   "\n\t" // p
484ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldr   r10, [r5, #4]"  "\n\t" // n
485ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldrex r8, [r9]"       "\n\t"
486ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add   r8, r8, r10"    "\n\t"
487b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "strex r4, r8, [r9]"   "\n\t"
488b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "str   r4, [r5, #8]"   "\n\t"
489ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/
490ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "r"(&block[0])
491b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
492ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
493ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (block[2] != 0);
494436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_arm64)
495436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long long int block[3]
496436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long long int)p, (unsigned long long int)n,
497436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov          0xFFFFFFFFFFFFFFFFULL};
498436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
499436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
500436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "mov   x5, %0"         "\n\t"
501436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x9, [x5, #0]"   "\n\t" // p
502436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x10, [x5, #8]"  "\n\t" // n
503436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldxr  w8, [x9]"       "\n\t"
504436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "add   x8, x8, x10"    "\n\t"
505436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "stxr  w4, w8, [x9]"    "\n\t"
506436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "str   x4, [x5, #16]"   "\n\t"
507436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
508436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
509436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
510436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
511436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 0);
512b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x)
513b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   __asm__ __volatile__(
514b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   l	0,%0\n\t"
515b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "0: lr	1,0\n\t"
516b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   ar	1,%1\n\t"
517b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   cs	0,1,%0\n\t"
518b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   jl    0b\n\t"
519b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "+m" (*p)
520b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "d" (n)
521b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "cc", "memory", "0", "1");
522663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#elif defined(VGA_mips32)
523663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   unsigned int block[3]
524436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned int)p, (unsigned int)n, 0x0 };
525663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   do {
526663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      __asm__ __volatile__(
527436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move $t0, %0"        "\n\t"
528436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t1, 0($t0)"    "\n\t"  // p
529436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lw   $t2, 4($t0)"    "\n\t"  // n
530436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll   $t3, 0($t1)"    "\n\t"
531436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu $t3, $t3, $t2"  "\n\t"
532436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc   $t3, 0($t1)"    "\n\t"
533436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sw   $t3, 8($t0)"    "\n\t"
534663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*out*/
535663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         : /*in*/ "r"(&block[0])
536436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
537436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
538436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
539436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_mips64)
540436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
541436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n, 0x0ULL };
542436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
543436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
544436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move  $t0, %0"        "\n\t"
545436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t1, 0($t0)"    "\n\t"  // p
546436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t2, 8($t0)"    "\n\t"  // n
547436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ll    $t3, 0($t1)"    "\n\t"
548436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "addu  $t3, $t3, $t2"  "\n\t"
549436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sc    $t3, 0($t1)"    "\n\t"
550436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sd    $t3, 16($t0)"   "\n\t"
551436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
552436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
553436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
554663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
555663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } while (block[2] != 1);
556ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else
557ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# error "Unsupported arch"
558ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif
559ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
560ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
561ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown__attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
562ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
563663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
564ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* do nothing; is not supported */
565ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_amd64)
566ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
567ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
568ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long long int block[2];
569ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[0] = (unsigned long long int)(unsigned long)p;
570ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   block[1] = n;
571ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   __asm__ __volatile__(
572ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 0(%%rsi),%%rax"      "\n\t"
573ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "movq 8(%%rsi),%%rbx"      "\n\t"
574ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      "lock; addq %%rbx,(%%rax)" "\n"
575ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
576ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   );
577ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#elif defined(VGA_ppc64)
578ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned long success;
579ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   do {
580ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      __asm__ __volatile__(
581ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "ldarx  15,0,%1"    "\n\t"
582ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "add    15,15,%2"   "\n\t"
583ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "stdcx. 15,0,%1"    "\n\t"
584ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "mfcr   %0"         "\n\t"
585ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "srwi   %0,%0,29"   "\n\t"
586ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "andi.  %0,%0,1"    "\n"
587ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*out*/"=b"(success)
588ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*in*/ "b"(p), "b"(n)
589ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         : /*trash*/ "memory", "cc", "r15"
590ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
591ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } while (success != 1);
592b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_arm)
593b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   unsigned long long int block[3]
594b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     = { (unsigned long long int)(unsigned long)p,
595b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         (unsigned long long int)n,
596b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         0xFFFFFFFFFFFFFFFFULL };
597b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   do {
598b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      __asm__ __volatile__(
599b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "mov    r5, %0"             "\n\t"
600b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldr    r8,     [r5, #0]"   "\n\t" // p
601b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
602b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "ldrexd r0, r1, [r8]"       "\n\t"
603b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "adds   r2, r2, r0"         "\n\t"
604b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "adc    r3, r3, r1"         "\n\t"
605b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "strexd r1, r2, r3, [r8]"   "\n\t"
606b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         "str    r1, [r5, #16]"      "\n\t"
607b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*out*/
608b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*in*/ "r"(&block[0])
609b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
610b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      );
611b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   } while (block[2] != 0xFFFFFFFF00000000ULL);
612436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_arm64)
613436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long long int block[3]
614436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long long int)p, (unsigned long long int)n,
615436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov          0xFFFFFFFFFFFFFFFFULL};
616436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
617436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
618436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "mov   x5, %0"         "\n\t"
619436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x9, [x5, #0]"   "\n\t" // p
620436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldr   x10, [x5, #8]"  "\n\t" // n
621436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ldxr  x8, [x9]"       "\n\t"
622436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "add   x8, x8, x10"    "\n\t"
623436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "stxr  w4, x8, [x9]"   "\n\t"
624436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "str   x4, [x5, #16]"   "\n\t"
625436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
626436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
627436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
628436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
629436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 0);
630b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov#elif defined(VGA_s390x)
631b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   __asm__ __volatile__(
632b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   lg	0,%0\n\t"
633b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "0: lgr	1,0\n\t"
634b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   agr	1,%1\n\t"
635b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   csg	0,1,%0\n\t"
636b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      "   jl    0b\n\t"
637b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "+m" (*p)
638b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "d" (n)
639b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      : "cc", "memory", "0", "1");
640436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov#elif defined(VGA_mips64)
641436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   unsigned long block[3]
642436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      = { (unsigned long)p, (unsigned long)n, 0x0ULL };
643436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   do {
644436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      __asm__ __volatile__(
645436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "move  $t0, %0"        "\n\t"
646436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t1, 0($t0)"    "\n\t" // p
647436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "ld    $t2, 8($t0)"    "\n\t" // n
648436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "lld   $t3, 0($t1)"    "\n\t"
649436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "daddu $t3, $t3, $t2"  "\n\t"
650436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "scd   $t3, 0($t1)"    "\n\t"
651436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         "sd    $t3, 16($t0)"   "\n\t"
652436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*out*/
653436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*in*/ "r"(&block[0])
654436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov         : /*trash*/ "memory", "t0", "t1", "t2", "t3"
655436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov      );
656436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   } while (block[2] != 1);
657ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#else
658ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# error "Unsupported arch"
659ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif
660ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
661ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
662ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownint main ( int argc, char** argv )
663ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
664ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   int    i, status;
665ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   char*  page;
666ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   char*  p8;
667ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   short* p16;
668ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   int*   p32;
669ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   long long int* p64;
670ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   pid_t  child, p2;
671ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
672ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   printf("parent, pre-fork\n");
673ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
674ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   page = mmap( 0, sysconf(_SC_PAGESIZE),
675ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                   PROT_READ|PROT_WRITE,
676ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                   MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
677ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (page == MAP_FAILED) {
678ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      perror("mmap failed");
679ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      exit(1);
680ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
681ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
682ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   p8  = (char*)(page+0);
683ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   p16 = (short*)(page+256);
684ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   p32 = (int*)(page+512);
685ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   p64 = (long long int*)(page+768);
686ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
687ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert( IS_8_ALIGNED(p8) );
688ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert( IS_8_ALIGNED(p16) );
689ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert( IS_8_ALIGNED(p32) );
690ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert( IS_8_ALIGNED(p64) );
691ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
692ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   memset(page, 0, 1024);
693ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
694ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *p8  = 0;
695ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *p16 = 0;
696ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *p32 = 0;
697ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *p64 = 0;
698ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
699ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   child = fork();
700ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (child == -1) {
701ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      perror("fork() failed\n");
702ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return 1;
703ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
704ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
705ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (child == 0) {
706ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* --- CHILD --- */
707ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      printf("child\n");
708ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 0; i < NNN; i++) {
709ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         atomic_add_8bit(p8, 1);
710ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         atomic_add_16bit(p16, 1);
711ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         atomic_add_32bit(p32, 1);
712ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
713ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
714ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return 1;
715ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* NOTREACHED */
716ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
717ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
718ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
719ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* --- PARENT --- */
720ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
721ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   printf("parent\n");
722ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
723ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   for (i = 0; i < NNN; i++) {
724ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      atomic_add_8bit(p8, 1);
725ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      atomic_add_16bit(p16, 1);
726ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      atomic_add_32bit(p32, 1);
727ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
728ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
729ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
730ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   p2 = waitpid(child, &status, 0);
731ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert(p2 == child);
732ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
733ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* assert that child finished normally */
734ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   assert(WIFEXITED(status));
735ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
736ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
737ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
738ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
739ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (-74 == (int)(*(signed char*)p8)
740ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && 32694 == (int)(*p16)
741ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && 6913974 == *p32
742ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && (0LL == *p64 || 682858642110LL == *p64)) {
743ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      printf("PASS\n");
744ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
745ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      printf("FAIL -- see source code for expected values\n");
746ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
747ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
748ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   printf("parent exits\n");
749ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
750ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return 0;
751ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
752