1/* This is an example of a program which does cavium atomic memory operations
2   between two processes which share a page. This test is based on :
3   memcheck/tests/atomic_incs.c */
4
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include <assert.h>
9#include <unistd.h>
10#include <sys/wait.h>
11#include "tests/sys_mman.h"
12
13#define N 19
14#define NNN 3456987  // Number of repetition.
15
16/* Expected values */
17long long int p1_expd[N] = { 2156643710, 2156643710, 3456986, 6913974,
18                             4288053322, 0, 4294967295,
19                             6913974, 21777111,
20                             3456986, 2153186724,
21                             6913974, 21777111,
22                             4294967295, 4288053323,  // Test 14
23                             4288053322, 4273190185,  // Test 16
24                             0, 0 };                  // Test 18
25
26long long int p2_expd[N] = { 12633614303292, 12633614303292, 3555751, 6913974,
27                              -6913974, 0, -1,
28                             6913974, 23901514779351,
29                             3456986, 11950752204196,
30                             6913974, 23901514779351,
31                             -1, -6913973,               // Test 15
32                             -6913974, -23901514779351,  // Test 17
33                             0, 0 };                     // Test 19
34
35#define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
36
37__attribute__((noinline)) void atomic_saa ( long long int* p, int n )
38{
39#if (_MIPS_ARCH_OCTEON2)
40   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
41   __asm__ __volatile__(
42      "move $t0, %0"      "\n\t"
43      "ld   $t1, 0($t0)"  "\n\t"  // p
44      "ld   $t2, 8($t0)"  "\n\t"  // n
45      "saa  $t2, ($t1)"   "\n\t"
46      : /*out*/
47      : /*in*/ "r"(&block[0])
48      : /*trash*/ "memory", "t0", "t1", "t2"
49   );
50#endif
51}
52
53__attribute__((noinline)) void atomic_saad ( long long int* p, int n )
54{
55#if (_MIPS_ARCH_OCTEON2)
56   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
57   __asm__ __volatile__(
58      "move $t0, %0"      "\n\t"
59      "ld   $t1, 0($t0)"  "\n\t"  // p
60      "ld   $t2, 8($t0)"  "\n\t"  // n
61      "saad $t2, ($t1)"   "\n\t"
62      : /*out*/
63      : /*in*/ "r"(&block[0])
64      : /*trash*/ "memory", "t0", "t1", "t2"
65   );
66#endif
67}
68
69__attribute__((noinline)) void atomic_laa ( long long int* p, int n )
70{
71#if (_MIPS_ARCH_OCTEON2)
72   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
73   __asm__ __volatile__(
74      "move $t0, %0"          "\n\t"
75      "ld   $t1, 0($t0)"      "\n\t"  // p
76      "ld   $t2, 8($t0)"      "\n\t"  // n
77      "laa  $t3, ($t1), $t2"  "\n\t"
78      : /*out*/
79      : /*in*/ "r"(&block[0])
80      : /*trash*/ "memory", "t0", "t1", "t2"
81   );
82#endif
83}
84
85__attribute__((noinline)) void atomic_laad ( long long int* p, int n )
86{
87#if (_MIPS_ARCH_OCTEON2)
88   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
89   __asm__ __volatile__(
90      "move $t0, %0"           "\n\t"
91      "ld   $t1, 0($t0)"       "\n\t"  // p
92      "ld   $t2, 8($t0)"       "\n\t"  // n
93      "laad $t3, ($t1), $t2"   "\n\t"
94      : /*out*/
95      : /*in*/ "r"(&block[0])
96      : /*trash*/ "memory", "t0", "t1", "t2", "t3"
97   );
98#endif
99}
100
101__attribute__((noinline)) void atomic_law ( long long int* p, int n )
102{
103#if (_MIPS_ARCH_OCTEON2)
104   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
105   __asm__ __volatile__(
106      "move $t0, %0"           "\n\t"
107      "ld   $t1, 0($t0)"       "\n\t"  // p
108      "ld   $t2, 8($t0)"       "\n\t"  // n
109      "law  $t3, ($t1), $t2"  "\n\t"
110      : /*out*/
111      : /*in*/ "r"(&block[0])
112      : /*trash*/ "memory", "t0", "t1", "t2"
113   );
114#endif
115}
116
117__attribute__((noinline)) void atomic_lawd ( long long int* p, int n )
118{
119#if (_MIPS_ARCH_OCTEON2)
120   unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
121   __asm__ __volatile__(
122      "move $t0, %0"          "\n\t"
123      "ld   $t1, 0($t0)"      "\n\t"  // p
124      "ld   $t2, 8($t0)"      "\n\t"  // n
125      "lawd $t3, ($t1), $t2"  "\n\t"
126      : /*out*/
127      : /*in*/ "r"(&block[0])
128      : /*trash*/ "memory", "t0", "t1", "t2", "t3"
129   );
130#endif
131}
132
133__attribute__((noinline)) void atomic_lai ( long long int* p )
134{
135#if (_MIPS_ARCH_OCTEON2)
136   unsigned long block[2] = { (unsigned long)p };
137   __asm__ __volatile__(
138      "move $t0, %0"      "\n\t"
139      "ld   $t1, 0($t0)"  "\n\t"  // p
140      "ld   $t2, 8($t0)"  "\n\t"  // n
141      "lai  $t2, ($t1)"   "\n\t"
142      : /*out*/
143      : /*in*/ "r"(&block[0])
144      : /*trash*/ "memory", "t0", "t1", "t2"
145   );
146#endif
147}
148
149__attribute__((noinline)) void atomic_laid ( long long int* p )
150{
151#if (_MIPS_ARCH_OCTEON2)
152   unsigned long block[2] = { (unsigned long)p };
153   __asm__ __volatile__(
154      "move $t0, %0"      "\n\t"
155      "ld   $t1, 0($t0)"  "\n\t"  // p
156      "ld   $t2, 8($t0)"  "\n\t"  // n
157      "laid $t2, ($t1)"   "\n\t"
158      : /*out*/
159      : /*in*/ "r"(&block[0])
160      : /*trash*/ "memory", "t0", "t1", "t2"
161   );
162#endif
163}
164
165__attribute__((noinline)) void atomic_lad ( long long int* p )
166{
167#if (_MIPS_ARCH_OCTEON2)
168   unsigned long block[2] = { (unsigned long)p };
169   __asm__ __volatile__(
170      "move $t0, %0"      "\n\t"
171      "ld   $t1, 0($t0)"  "\n\t"  // p
172      "ld   $t2, 8($t0)"  "\n\t"  // n
173      "lad  $t2, ($t1)"   "\n\t"
174      : /*out*/
175      : /*in*/ "r"(&block[0])
176      : /*trash*/ "memory", "t0", "t1", "t2"
177   );
178#endif
179}
180
181__attribute__((noinline)) void atomic_ladd ( long long int* p )
182{
183#if (_MIPS_ARCH_OCTEON2)
184   unsigned long block[2] = { (unsigned long)p };
185   __asm__ __volatile__(
186      "move $t0, %0"      "\n\t"
187      "ld   $t1, 0($t0)"  "\n\t"  // p
188      "ld   $t2, 8($t0)"  "\n\t"  // n
189      "ladd $t2, ($t1)"   "\n\t"
190      : /*out*/
191      : /*in*/ "r"(&block[0])
192      : /*trash*/ "memory", "t0", "t1", "t2"
193   );
194#endif
195}
196
197__attribute__((noinline)) void atomic_lac ( long long int* p )
198{
199#if (_MIPS_ARCH_OCTEON2)
200   unsigned long block[2] = { (unsigned long)p };
201   __asm__ __volatile__(
202      "move $t0, %0"      "\n\t"
203      "ld   $t1, 0($t0)"  "\n\t"  // p
204      "ld   $t2, 8($t0)"  "\n\t"  // n
205      "lac  $t2, ($t1)"   "\n\t"
206      : /*out*/
207      : /*in*/ "r"(&block[0])
208      : /*trash*/ "memory", "t0", "t1", "t2"
209   );
210#endif
211}
212
213__attribute__((noinline)) void atomic_lacd ( long long int* p )
214{
215#if (_MIPS_ARCH_OCTEON2)
216   unsigned long block[2] = { (unsigned long)p };
217   __asm__ __volatile__(
218      "move $t0, %0"      "\n\t"
219      "ld   $t1, 0($t0)"  "\n\t"  // p
220      "ld   $t2, 8($t0)"  "\n\t"  // n
221      "lacd $t2, ($t1)"   "\n\t"
222      : /*out*/
223      : /*in*/ "r"(&block[0])
224      : /*trash*/ "memory", "t0", "t1", "t2"
225   );
226#endif
227}
228
229__attribute__((noinline)) void atomic_las ( long long int* p )
230{
231#if (_MIPS_ARCH_OCTEON2)
232   unsigned long block[2] = { (unsigned long)p };
233   __asm__ __volatile__(
234      "move $t0, %0"      "\n\t"
235      "ld   $t1, 0($t0)"  "\n\t"  // p
236      "ld   $t2, 8($t0)"  "\n\t"  // n
237      "las  $t2, ($t1)"   "\n\t"
238      : /*out*/
239      : /*in*/ "r"(&block[0])
240      : /*trash*/ "memory", "t0", "t1", "t2"
241   );
242#endif
243}
244
245__attribute__((noinline)) void atomic_lasd ( long long int* p )
246{
247#if (_MIPS_ARCH_OCTEON2)
248   unsigned long block[2] = { (unsigned long)p };
249   __asm__ __volatile__(
250      "move $t0, %0"      "\n\t"
251      "ld   $t1, 0($t0)"  "\n\t"  // p
252      "ld   $t2, 8($t0)"  "\n\t"  // n
253      "lasd $t2, ($t1)"   "\n\t"
254      : /*out*/
255      : /*in*/ "r"(&block[0])
256      : /*trash*/ "memory", "t0", "t1", "t2"
257   );
258#endif
259}
260
261#define TRIOP_AND_SAA(instruction, base1, base2, n)  \
262{                                                    \
263   __asm__ __volatile__(                             \
264      instruction"  $t0, (%0), %2"  "\n\t"           \
265      "saa          $t0, (%1)"       "\n\t"          \
266      : /*out*/                                      \
267      : /*in*/ "r"(base1), "r"(base2), "r"(n)        \
268      : /*trash*/ "memory", "t0"                     \
269   );                                                \
270}
271
272#define TRIOP_AND_SAAD(instruction, base1, base2, n)  \
273{                                                     \
274   __asm__ __volatile__(                              \
275      instruction"  $t0, (%0), %2"  "\n\t"            \
276      "saad         $t0, (%1)"       "\n\t"           \
277      : /*out*/                                       \
278      : /*in*/ "r"(base1), "r"(base2), "r"(n)         \
279      : /*trash*/ "memory", "t0"                      \
280   );                                                 \
281}
282
283#define BINOP_AND_SAA(instruction, base1, base2)  \
284{                                                 \
285   __asm__ __volatile__(                          \
286      instruction"  $t0, (%0)"  "\n\t"            \
287      "saa          $t0, (%1)"  "\n\t"            \
288      : /*out*/                                   \
289      : /*in*/ "r"(base1), "r"(base2)             \
290      : /*trash*/ "memory", "t0"                  \
291   );                                             \
292}
293
294#define BINOP_AND_SAAD(instruction, base1, base2)  \
295{                                                  \
296   __asm__ __volatile__(                           \
297      instruction"  $t0, (%0)"  "\n\t"             \
298      "saad         $t0, (%1)"  "\n\t"             \
299      : /*out*/                                    \
300      : /*in*/ "r"(base1), "r"(base2)              \
301      : /*trash*/ "memory", "t0"                   \
302   );                                              \
303}
304
305int main ( int argc, char** argv )
306{
307#if (_MIPS_ARCH_OCTEON2)
308   int    i, status;
309   char*  page[N];
310   long long int* p1[N];
311   long long int* p2[N];
312   pid_t  child, pc2;
313
314   printf("parent, pre-fork\n");
315
316   for (i = 0; i < N; i++) {
317      page[i] = mmap( 0, sysconf(_SC_PAGESIZE),
318                      PROT_READ|PROT_WRITE,
319                      MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
320      if (page[i] == MAP_FAILED) {
321         perror("mmap failed");
322         exit(1);
323      }
324      p1[i] = (long long int*)(page[i]+0);
325      p2[i] = (long long int*)(page[i]+256);
326
327      assert( IS_8_ALIGNED(p1[i]) );
328      assert( IS_8_ALIGNED(p2[i]) );
329
330      memset(page[i], 0, 1024);
331      memset(page[i], 0, 1024);
332
333      *p1[i] = 0;
334      *p2[i] = 0;
335   }
336
337   child = fork();
338   if (child == -1) {
339      perror("fork() failed\n");
340      return 1;
341   }
342
343   if (child == 0) {
344      /* --- CHILD --- */
345      printf("child\n");
346      for (i = 0; i < NNN; i++) {
347         atomic_saa(p1[0], i);
348         atomic_saad(p2[0], i+98765 ); /* ensure we hit the upper 32 bits */
349         atomic_laa(p1[1], i);
350         atomic_laad(p2[1], i+98765 ); /* ensure we hit the upper 32 bits */
351         atomic_law(p1[2], i);
352         atomic_lawd(p2[2], i+98765 ); /* ensure we hit the upper 32 bits */
353         atomic_lai(p1[3]);
354         atomic_laid(p2[3]);
355         atomic_lad(p1[4]);
356         atomic_ladd(p2[4]);
357         atomic_lac(p1[5]);
358         atomic_lacd(p2[5]);
359         atomic_las(p1[6]);
360         atomic_lasd(p2[6]);
361         TRIOP_AND_SAA("laa ", p1[7], p1[8], 1)
362         TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1)
363         TRIOP_AND_SAA("law ", p1[9], p1[10], i)
364         TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i)
365         BINOP_AND_SAA("lai ", p1[11], p1[12])
366         BINOP_AND_SAAD("laid ", p2[11], p2[12])
367         BINOP_AND_SAA("las ", p1[13], p1[14])
368         BINOP_AND_SAAD("lasd ", p2[13], p2[14])
369         BINOP_AND_SAA("lad ", p1[15], p1[16])
370         BINOP_AND_SAAD("ladd ", p2[15], p2[16])
371         BINOP_AND_SAA("lac ", p1[17], p1[18])
372         BINOP_AND_SAAD("lacd ", p2[17], p2[18])
373      }
374      return 1;
375      /* NOTREACHED */
376
377   }
378
379   /* --- PARENT --- */
380   printf("parent\n");
381
382   for (i = 0; i < NNN; i++) {
383      atomic_saa(p1[0], i);
384      atomic_saad(p2[0], i+98765); /* ensure we hit the upper 32 bits */
385      atomic_laa(p1[1], i);
386      atomic_laad(p2[1], i+98765); /* ensure we hit the upper 32 bits */
387      atomic_law(p1[2], i);
388      atomic_lawd(p2[2], i+98765 ); /* ensure we hit the upper 32 bits */
389      atomic_lai(p1[3]);
390      atomic_laid(p2[3]);
391      atomic_lad(p1[4]);
392      atomic_ladd(p2[4]);
393      atomic_lac(p1[5]);
394      atomic_lacd(p2[5]);
395      atomic_las(p1[6]);
396      atomic_lasd(p2[6]);
397      TRIOP_AND_SAA("laa ", p1[7], p1[8], 1)
398      TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1)
399      TRIOP_AND_SAA("law ", p1[9], p1[10], i)
400      TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i)
401      BINOP_AND_SAA("lai ", p1[11], p1[12])
402      BINOP_AND_SAAD("laid ", p2[11], p2[12])
403      BINOP_AND_SAA("las ", p1[13], p1[14])
404      BINOP_AND_SAAD("lasd ", p2[13], p2[14])
405      BINOP_AND_SAA("lad ", p1[15], p1[16])
406      BINOP_AND_SAAD("ladd ", p2[15], p2[16])
407      BINOP_AND_SAA("lac ", p1[17], p1[18])
408      BINOP_AND_SAAD("lacd ", p2[17], p2[18])
409   }
410
411   pc2 = waitpid(child, &status, 0);
412   assert(pc2 == child);
413
414   /* assert that child finished normally */
415   assert(WIFEXITED(status));
416
417   printf("Store Atomic Add: 32 bit %lld, 64 bit %lld\n",      *p1[0], *p2[0]);
418   printf("Load Atomic Add: 32 bit %lld, 64 bit %lld\n",       *p1[1], *p2[1]);
419   printf("Load Atomic Swap: 32 bit %lld, 64 bit %lld\n",      *p1[2], *p2[2]);
420   printf("Load Atomic Increment: 32 bit %lld, 64 bit %lld\n", *p1[3], *p2[3]);
421   printf("Load Atomic Decrement: 32 bit %lld, 64 bit %lld\n", *p1[4], *p2[4]);
422   printf("Load Atomic Clear: 32 bit %lld, 64 bit %lld\n",     *p1[5], *p2[5]);
423   printf("Load Atomic Set: 32 bit %lld, 64 bit %lld\n",       *p1[6], *p2[6]);
424   printf("laa and saa: base1: %lld, base2: %lld\n",           *p1[7], *p1[8]);
425   printf("laad and saad: base1: %lld, base2: %lld\n",         *p2[7], *p2[8]);
426   printf("law and saa: base1: %lld, base2: %lld\n",           *p1[9], *p1[10]);
427   printf("lawd and saad: base1: %lld, base2: %lld\n",         *p2[9], *p2[10]);
428   printf("lai and saa: base1: %lld, base2: %lld\n",          *p1[11], *p1[12]);
429   printf("laid and saad: base1: %lld, base2: %lld\n",        *p2[11], *p2[12]);
430   printf("las and saa: base1: %lld, base2: %lld\n",          *p1[13], *p1[14]);
431   printf("lasd and saad: base1: %lld, base2: %lld\n",        *p2[13], *p2[14]);
432   printf("lad and saa: base1: %lld, base2: %lld\n",          *p1[15], *p1[16]);
433   printf("ladd and saad: base1: %lld, base2: %lld\n",        *p2[15], *p2[16]);
434   printf("lac and saa: base1: %lld, base2: %lld\n",          *p1[17], *p1[18]);
435   printf("lacd and saad: base1: %lld, base2: %lld\n",        *p2[17], *p2[18]);
436
437   for (i = 0; i < N; i++) {
438      if (p1_expd[i] == *p1[i] && p2_expd[i] == *p2[i]) {
439         printf("PASS %d\n", i+1);
440      } else {
441         printf("FAIL %d -- see source code for expected values\n", i+1);
442      }
443   }
444
445   printf("parent exits\n");
446#endif
447   return 0;
448}
449