1/* This is an example of a program which does cavium atomic memory operations 2 between two processes which share a page. This test is based on : 3 memcheck/tests/atomic_incs.c */ 4 5#include <stdlib.h> 6#include <stdio.h> 7#include <string.h> 8#include <assert.h> 9#include <unistd.h> 10#include <sys/wait.h> 11#include "tests/sys_mman.h" 12 13#define N 19 14#define NNN 3456987 // Number of repetition. 15 16/* Expected values */ 17long long int p1_expd[N] = { 2156643710, 2156643710, 3456986, 6913974, 18 4288053322, 0, 4294967295, 19 6913974, 21777111, 20 3456986, 2153186724, 21 6913974, 21777111, 22 4294967295, 4288053323, // Test 14 23 4288053322, 4273190185, // Test 16 24 0, 0 }; // Test 18 25 26long long int p2_expd[N] = { 12633614303292, 12633614303292, 3555751, 6913974, 27 -6913974, 0, -1, 28 6913974, 23901514779351, 29 3456986, 11950752204196, 30 6913974, 23901514779351, 31 -1, -6913973, // Test 15 32 -6913974, -23901514779351, // Test 17 33 0, 0 }; // Test 19 34 35#define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) 36 37__attribute__((noinline)) void atomic_saa ( long long int* p, int n ) 38{ 39#if (_MIPS_ARCH_OCTEON2) 40 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 41 __asm__ __volatile__( 42 "move $t0, %0" "\n\t" 43 "ld $t1, 0($t0)" "\n\t" // p 44 "ld $t2, 8($t0)" "\n\t" // n 45 "saa $t2, ($t1)" "\n\t" 46 : /*out*/ 47 : /*in*/ "r"(&block[0]) 48 : /*trash*/ "memory", "t0", "t1", "t2" 49 ); 50#endif 51} 52 53__attribute__((noinline)) void atomic_saad ( long long int* p, int n ) 54{ 55#if (_MIPS_ARCH_OCTEON2) 56 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 57 __asm__ __volatile__( 58 "move $t0, %0" "\n\t" 59 "ld $t1, 0($t0)" "\n\t" // p 60 "ld $t2, 8($t0)" "\n\t" // n 61 "saad $t2, ($t1)" "\n\t" 62 : /*out*/ 63 : /*in*/ "r"(&block[0]) 64 : /*trash*/ "memory", "t0", "t1", "t2" 65 ); 66#endif 67} 68 69__attribute__((noinline)) void atomic_laa ( long long int* p, int n ) 70{ 71#if (_MIPS_ARCH_OCTEON2) 72 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 73 __asm__ __volatile__( 74 "move $t0, %0" "\n\t" 75 "ld $t1, 0($t0)" "\n\t" // p 76 "ld $t2, 8($t0)" "\n\t" // n 77 "laa $t3, ($t1), $t2" "\n\t" 78 : /*out*/ 79 : /*in*/ "r"(&block[0]) 80 : /*trash*/ "memory", "t0", "t1", "t2" 81 ); 82#endif 83} 84 85__attribute__((noinline)) void atomic_laad ( long long int* p, int n ) 86{ 87#if (_MIPS_ARCH_OCTEON2) 88 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 89 __asm__ __volatile__( 90 "move $t0, %0" "\n\t" 91 "ld $t1, 0($t0)" "\n\t" // p 92 "ld $t2, 8($t0)" "\n\t" // n 93 "laad $t3, ($t1), $t2" "\n\t" 94 : /*out*/ 95 : /*in*/ "r"(&block[0]) 96 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 97 ); 98#endif 99} 100 101__attribute__((noinline)) void atomic_law ( long long int* p, int n ) 102{ 103#if (_MIPS_ARCH_OCTEON2) 104 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 105 __asm__ __volatile__( 106 "move $t0, %0" "\n\t" 107 "ld $t1, 0($t0)" "\n\t" // p 108 "ld $t2, 8($t0)" "\n\t" // n 109 "law $t3, ($t1), $t2" "\n\t" 110 : /*out*/ 111 : /*in*/ "r"(&block[0]) 112 : /*trash*/ "memory", "t0", "t1", "t2" 113 ); 114#endif 115} 116 117__attribute__((noinline)) void atomic_lawd ( long long int* p, int n ) 118{ 119#if (_MIPS_ARCH_OCTEON2) 120 unsigned long block[2] = { (unsigned long)p, (unsigned long)n }; 121 __asm__ __volatile__( 122 "move $t0, %0" "\n\t" 123 "ld $t1, 0($t0)" "\n\t" // p 124 "ld $t2, 8($t0)" "\n\t" // n 125 "lawd $t3, ($t1), $t2" "\n\t" 126 : /*out*/ 127 : /*in*/ "r"(&block[0]) 128 : /*trash*/ "memory", "t0", "t1", "t2", "t3" 129 ); 130#endif 131} 132 133__attribute__((noinline)) void atomic_lai ( long long int* p ) 134{ 135#if (_MIPS_ARCH_OCTEON2) 136 unsigned long block[2] = { (unsigned long)p }; 137 __asm__ __volatile__( 138 "move $t0, %0" "\n\t" 139 "ld $t1, 0($t0)" "\n\t" // p 140 "ld $t2, 8($t0)" "\n\t" // n 141 "lai $t2, ($t1)" "\n\t" 142 : /*out*/ 143 : /*in*/ "r"(&block[0]) 144 : /*trash*/ "memory", "t0", "t1", "t2" 145 ); 146#endif 147} 148 149__attribute__((noinline)) void atomic_laid ( long long int* p ) 150{ 151#if (_MIPS_ARCH_OCTEON2) 152 unsigned long block[2] = { (unsigned long)p }; 153 __asm__ __volatile__( 154 "move $t0, %0" "\n\t" 155 "ld $t1, 0($t0)" "\n\t" // p 156 "ld $t2, 8($t0)" "\n\t" // n 157 "laid $t2, ($t1)" "\n\t" 158 : /*out*/ 159 : /*in*/ "r"(&block[0]) 160 : /*trash*/ "memory", "t0", "t1", "t2" 161 ); 162#endif 163} 164 165__attribute__((noinline)) void atomic_lad ( long long int* p ) 166{ 167#if (_MIPS_ARCH_OCTEON2) 168 unsigned long block[2] = { (unsigned long)p }; 169 __asm__ __volatile__( 170 "move $t0, %0" "\n\t" 171 "ld $t1, 0($t0)" "\n\t" // p 172 "ld $t2, 8($t0)" "\n\t" // n 173 "lad $t2, ($t1)" "\n\t" 174 : /*out*/ 175 : /*in*/ "r"(&block[0]) 176 : /*trash*/ "memory", "t0", "t1", "t2" 177 ); 178#endif 179} 180 181__attribute__((noinline)) void atomic_ladd ( long long int* p ) 182{ 183#if (_MIPS_ARCH_OCTEON2) 184 unsigned long block[2] = { (unsigned long)p }; 185 __asm__ __volatile__( 186 "move $t0, %0" "\n\t" 187 "ld $t1, 0($t0)" "\n\t" // p 188 "ld $t2, 8($t0)" "\n\t" // n 189 "ladd $t2, ($t1)" "\n\t" 190 : /*out*/ 191 : /*in*/ "r"(&block[0]) 192 : /*trash*/ "memory", "t0", "t1", "t2" 193 ); 194#endif 195} 196 197__attribute__((noinline)) void atomic_lac ( long long int* p ) 198{ 199#if (_MIPS_ARCH_OCTEON2) 200 unsigned long block[2] = { (unsigned long)p }; 201 __asm__ __volatile__( 202 "move $t0, %0" "\n\t" 203 "ld $t1, 0($t0)" "\n\t" // p 204 "ld $t2, 8($t0)" "\n\t" // n 205 "lac $t2, ($t1)" "\n\t" 206 : /*out*/ 207 : /*in*/ "r"(&block[0]) 208 : /*trash*/ "memory", "t0", "t1", "t2" 209 ); 210#endif 211} 212 213__attribute__((noinline)) void atomic_lacd ( long long int* p ) 214{ 215#if (_MIPS_ARCH_OCTEON2) 216 unsigned long block[2] = { (unsigned long)p }; 217 __asm__ __volatile__( 218 "move $t0, %0" "\n\t" 219 "ld $t1, 0($t0)" "\n\t" // p 220 "ld $t2, 8($t0)" "\n\t" // n 221 "lacd $t2, ($t1)" "\n\t" 222 : /*out*/ 223 : /*in*/ "r"(&block[0]) 224 : /*trash*/ "memory", "t0", "t1", "t2" 225 ); 226#endif 227} 228 229__attribute__((noinline)) void atomic_las ( long long int* p ) 230{ 231#if (_MIPS_ARCH_OCTEON2) 232 unsigned long block[2] = { (unsigned long)p }; 233 __asm__ __volatile__( 234 "move $t0, %0" "\n\t" 235 "ld $t1, 0($t0)" "\n\t" // p 236 "ld $t2, 8($t0)" "\n\t" // n 237 "las $t2, ($t1)" "\n\t" 238 : /*out*/ 239 : /*in*/ "r"(&block[0]) 240 : /*trash*/ "memory", "t0", "t1", "t2" 241 ); 242#endif 243} 244 245__attribute__((noinline)) void atomic_lasd ( long long int* p ) 246{ 247#if (_MIPS_ARCH_OCTEON2) 248 unsigned long block[2] = { (unsigned long)p }; 249 __asm__ __volatile__( 250 "move $t0, %0" "\n\t" 251 "ld $t1, 0($t0)" "\n\t" // p 252 "ld $t2, 8($t0)" "\n\t" // n 253 "lasd $t2, ($t1)" "\n\t" 254 : /*out*/ 255 : /*in*/ "r"(&block[0]) 256 : /*trash*/ "memory", "t0", "t1", "t2" 257 ); 258#endif 259} 260 261#define TRIOP_AND_SAA(instruction, base1, base2, n) \ 262{ \ 263 __asm__ __volatile__( \ 264 instruction" $t0, (%0), %2" "\n\t" \ 265 "saa $t0, (%1)" "\n\t" \ 266 : /*out*/ \ 267 : /*in*/ "r"(base1), "r"(base2), "r"(n) \ 268 : /*trash*/ "memory", "t0" \ 269 ); \ 270} 271 272#define TRIOP_AND_SAAD(instruction, base1, base2, n) \ 273{ \ 274 __asm__ __volatile__( \ 275 instruction" $t0, (%0), %2" "\n\t" \ 276 "saad $t0, (%1)" "\n\t" \ 277 : /*out*/ \ 278 : /*in*/ "r"(base1), "r"(base2), "r"(n) \ 279 : /*trash*/ "memory", "t0" \ 280 ); \ 281} 282 283#define BINOP_AND_SAA(instruction, base1, base2) \ 284{ \ 285 __asm__ __volatile__( \ 286 instruction" $t0, (%0)" "\n\t" \ 287 "saa $t0, (%1)" "\n\t" \ 288 : /*out*/ \ 289 : /*in*/ "r"(base1), "r"(base2) \ 290 : /*trash*/ "memory", "t0" \ 291 ); \ 292} 293 294#define BINOP_AND_SAAD(instruction, base1, base2) \ 295{ \ 296 __asm__ __volatile__( \ 297 instruction" $t0, (%0)" "\n\t" \ 298 "saad $t0, (%1)" "\n\t" \ 299 : /*out*/ \ 300 : /*in*/ "r"(base1), "r"(base2) \ 301 : /*trash*/ "memory", "t0" \ 302 ); \ 303} 304 305int main ( int argc, char** argv ) 306{ 307#if (_MIPS_ARCH_OCTEON2) 308 int i, status; 309 char* page[N]; 310 long long int* p1[N]; 311 long long int* p2[N]; 312 pid_t child, pc2; 313 314 printf("parent, pre-fork\n"); 315 316 for (i = 0; i < N; i++) { 317 page[i] = mmap( 0, sysconf(_SC_PAGESIZE), 318 PROT_READ|PROT_WRITE, 319 MAP_ANONYMOUS|MAP_SHARED, -1, 0 ); 320 if (page[i] == MAP_FAILED) { 321 perror("mmap failed"); 322 exit(1); 323 } 324 p1[i] = (long long int*)(page[i]+0); 325 p2[i] = (long long int*)(page[i]+256); 326 327 assert( IS_8_ALIGNED(p1[i]) ); 328 assert( IS_8_ALIGNED(p2[i]) ); 329 330 memset(page[i], 0, 1024); 331 memset(page[i], 0, 1024); 332 333 *p1[i] = 0; 334 *p2[i] = 0; 335 } 336 337 child = fork(); 338 if (child == -1) { 339 perror("fork() failed\n"); 340 return 1; 341 } 342 343 if (child == 0) { 344 /* --- CHILD --- */ 345 printf("child\n"); 346 for (i = 0; i < NNN; i++) { 347 atomic_saa(p1[0], i); 348 atomic_saad(p2[0], i+98765 ); /* ensure we hit the upper 32 bits */ 349 atomic_laa(p1[1], i); 350 atomic_laad(p2[1], i+98765 ); /* ensure we hit the upper 32 bits */ 351 atomic_law(p1[2], i); 352 atomic_lawd(p2[2], i+98765 ); /* ensure we hit the upper 32 bits */ 353 atomic_lai(p1[3]); 354 atomic_laid(p2[3]); 355 atomic_lad(p1[4]); 356 atomic_ladd(p2[4]); 357 atomic_lac(p1[5]); 358 atomic_lacd(p2[5]); 359 atomic_las(p1[6]); 360 atomic_lasd(p2[6]); 361 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) 362 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) 363 TRIOP_AND_SAA("law ", p1[9], p1[10], i) 364 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) 365 BINOP_AND_SAA("lai ", p1[11], p1[12]) 366 BINOP_AND_SAAD("laid ", p2[11], p2[12]) 367 BINOP_AND_SAA("las ", p1[13], p1[14]) 368 BINOP_AND_SAAD("lasd ", p2[13], p2[14]) 369 BINOP_AND_SAA("lad ", p1[15], p1[16]) 370 BINOP_AND_SAAD("ladd ", p2[15], p2[16]) 371 BINOP_AND_SAA("lac ", p1[17], p1[18]) 372 BINOP_AND_SAAD("lacd ", p2[17], p2[18]) 373 } 374 return 1; 375 /* NOTREACHED */ 376 377 } 378 379 /* --- PARENT --- */ 380 printf("parent\n"); 381 382 for (i = 0; i < NNN; i++) { 383 atomic_saa(p1[0], i); 384 atomic_saad(p2[0], i+98765); /* ensure we hit the upper 32 bits */ 385 atomic_laa(p1[1], i); 386 atomic_laad(p2[1], i+98765); /* ensure we hit the upper 32 bits */ 387 atomic_law(p1[2], i); 388 atomic_lawd(p2[2], i+98765 ); /* ensure we hit the upper 32 bits */ 389 atomic_lai(p1[3]); 390 atomic_laid(p2[3]); 391 atomic_lad(p1[4]); 392 atomic_ladd(p2[4]); 393 atomic_lac(p1[5]); 394 atomic_lacd(p2[5]); 395 atomic_las(p1[6]); 396 atomic_lasd(p2[6]); 397 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1) 398 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1) 399 TRIOP_AND_SAA("law ", p1[9], p1[10], i) 400 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i) 401 BINOP_AND_SAA("lai ", p1[11], p1[12]) 402 BINOP_AND_SAAD("laid ", p2[11], p2[12]) 403 BINOP_AND_SAA("las ", p1[13], p1[14]) 404 BINOP_AND_SAAD("lasd ", p2[13], p2[14]) 405 BINOP_AND_SAA("lad ", p1[15], p1[16]) 406 BINOP_AND_SAAD("ladd ", p2[15], p2[16]) 407 BINOP_AND_SAA("lac ", p1[17], p1[18]) 408 BINOP_AND_SAAD("lacd ", p2[17], p2[18]) 409 } 410 411 pc2 = waitpid(child, &status, 0); 412 assert(pc2 == child); 413 414 /* assert that child finished normally */ 415 assert(WIFEXITED(status)); 416 417 printf("Store Atomic Add: 32 bit %lld, 64 bit %lld\n", *p1[0], *p2[0]); 418 printf("Load Atomic Add: 32 bit %lld, 64 bit %lld\n", *p1[1], *p2[1]); 419 printf("Load Atomic Swap: 32 bit %lld, 64 bit %lld\n", *p1[2], *p2[2]); 420 printf("Load Atomic Increment: 32 bit %lld, 64 bit %lld\n", *p1[3], *p2[3]); 421 printf("Load Atomic Decrement: 32 bit %lld, 64 bit %lld\n", *p1[4], *p2[4]); 422 printf("Load Atomic Clear: 32 bit %lld, 64 bit %lld\n", *p1[5], *p2[5]); 423 printf("Load Atomic Set: 32 bit %lld, 64 bit %lld\n", *p1[6], *p2[6]); 424 printf("laa and saa: base1: %lld, base2: %lld\n", *p1[7], *p1[8]); 425 printf("laad and saad: base1: %lld, base2: %lld\n", *p2[7], *p2[8]); 426 printf("law and saa: base1: %lld, base2: %lld\n", *p1[9], *p1[10]); 427 printf("lawd and saad: base1: %lld, base2: %lld\n", *p2[9], *p2[10]); 428 printf("lai and saa: base1: %lld, base2: %lld\n", *p1[11], *p1[12]); 429 printf("laid and saad: base1: %lld, base2: %lld\n", *p2[11], *p2[12]); 430 printf("las and saa: base1: %lld, base2: %lld\n", *p1[13], *p1[14]); 431 printf("lasd and saad: base1: %lld, base2: %lld\n", *p2[13], *p2[14]); 432 printf("lad and saa: base1: %lld, base2: %lld\n", *p1[15], *p1[16]); 433 printf("ladd and saad: base1: %lld, base2: %lld\n", *p2[15], *p2[16]); 434 printf("lac and saa: base1: %lld, base2: %lld\n", *p1[17], *p1[18]); 435 printf("lacd and saad: base1: %lld, base2: %lld\n", *p2[17], *p2[18]); 436 437 for (i = 0; i < N; i++) { 438 if (p1_expd[i] == *p1[i] && p2_expd[i] == *p2[i]) { 439 printf("PASS %d\n", i+1); 440 } else { 441 printf("FAIL %d -- see source code for expected values\n", i+1); 442 } 443 } 444 445 printf("parent exits\n"); 446#endif 447 return 0; 448} 449