1#include <stdio.h>
2
3#define N 256
4
5unsigned long long reg_val_double[N];
6
7void init_reg_val_double()
8{
9   unsigned long c = 19650218UL;
10   int i;
11   reg_val_double[0]= c & 0xffffffffUL;
12   for (i = 1; i < N; i++) {
13      reg_val_double[i] = (1812433253UL * (reg_val_double[i - 1] ^
14                          (reg_val_double[i - 1] >> 30)) + i);
15   }
16}
17
18
19/* Make a copy of original array to prevent the unexpected changes by Atomic Add
20   Instructions */
21unsigned long long reg_val_double_copy[N];
22
23void copy_reg_val_double()
24{
25   int i;
26   for (i = 0; i < N; i++) {
27      reg_val_double_copy[i] = reg_val_double[i];
28   }
29}
30
31/* TEST1_32/64 macro is used in load atomic increment/decrement/set/clear
32   instructions. After executing each instruction we must check both memory
33   location and register value.
34
35   1: Move arguments (offset and base address) to registers
36   2: Add offset and base address to make absolute address
37   3: Execute instruction
38   4: Move result from register ($t3)
39   5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit addresses)
40*/
41#define TEST1_32(instruction, offset,mem)                    \
42{                                                            \
43   unsigned long out = 0;                                    \
44   unsigned long res_mem = 0;                                \
45   __asm__ volatile(                                         \
46     "move         $t0, %2"        "\n\t"                    \
47     "move         $t1, %3"        "\n\t"                    \
48     "daddu        $t0, $t1, $t0"  "\n\t"                    \
49     instruction " $t3, ($t0)"     "\n\t"                    \
50     "move         %0,  $t3"       "\n\t"                    \
51     "lw           %1,  0($t0)"    "\n\t"                    \
52     : "=&r" (out), "=&r"(res_mem)                           \
53     : "r" (mem) , "r" (offset)                              \
54     : "$12", "$13", "cc", "memory"                          \
55     );                                                      \
56   printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
57          instruction, offset, out, res_mem);                \
58}
59
60#define TEST1_64(instruction, offset,mem)                     \
61{                                                             \
62   unsigned long out = 0;                                     \
63   unsigned long res_mem = 0;                                 \
64   __asm__ volatile(                                          \
65     "move         $t0, %2"        "\n\t"                     \
66     "move         $t1, %3"        "\n\t"                     \
67     "daddu        $t0, $t1, $t0"  "\n\t"                     \
68     instruction " $t3, ($t0)"     "\n\t"                     \
69     "move         %0,  $t3"       "\n\t"                     \
70     "ld           %1,  0($t0)"    "\n\t"                     \
71     : "=&r" (out), "=&r"(res_mem)                            \
72     : "r" (mem) , "r" (offset)                               \
73     : "$12", "$13", "cc", "memory"                           \
74     );                                                       \
75   printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
76          instruction, offset, out, res_mem);                 \
77}
78
79/* Test 2 macro is used for pop/dpop/baddu instructions. After executing each
80   instructions the macro performs following operations:
81
82   1: Move arguments to registers
83   2: Execute instruction
84   3: Move result to register ($t3)
85*/
86#define TEST2(instruction, RSVal, RTVal)                            \
87{                                                                   \
88   unsigned long out;                                               \
89   __asm__ volatile(                                                \
90      "move $t1, %1"  "\n\t"                                        \
91      "move $t2, %2"  "\n\t"                                        \
92      instruction     "\n\t"                                        \
93      "move %0, $t3"  "\n\t"                                        \
94      : "=&r" (out)                                                 \
95      : "r" (RSVal), "r" (RTVal)                                    \
96      : "$12", "$13", "cc", "memory"                                \
97        );                                                          \
98   printf("%s :: rd 0x%lx, rs 0x%llx, rt 0x%llx\n",                 \
99          instruction, out, (long long) RSVal, (long long) RTVal);  \
100}
101
102/* TEST3 macro is used for store atomic add and store atomic add doubleword
103   instructions. Following operations are performed by the test macro:
104
105   1: Move arguments to the register
106   2: Add offset and base address to make absolute address
107   3: Execute instruction
108   4: Load memory data
109*/
110#define TEST3(instruction, offset, mem, value)                   \
111{                                                                \
112    unsigned long out = 0;                                       \
113    unsigned long outPre = 0;                                    \
114   __asm__ volatile(                                             \
115     "move         $t0, %2"        "\n\t"                        \
116     "move         $t1, %3"        "\n\t"                        \
117     "daddu        $t0, $t1, $t0"  "\n\t"                        \
118     "ld           %1,  0($t0)"    "\n\t"                        \
119     "move         $t2, %4"        "\n\t"                        \
120     instruction " $t2, ($t0)"     "\n\t"                        \
121     "ld           %0,  0($t0)"    "\n\t"                        \
122     : "=&r" (out), "=&r" (outPre)                               \
123     : "r" (mem) , "r" (offset), "r" (value)                     \
124     : "$12", "$13", "$14", "cc", "memory"                       \
125     );                                                          \
126     printf("%s :: value: 0x%llx, memPre: 0x%lx, mem: 0x%lx\n",  \
127            instruction, value, outPre, out);                    \
128}
129
130/* TEST4_32/64 is used for load atomic add/swap instructions. Following
131   operations are performed by macro after execution of each instruction:
132
133   1: Move arguments to register.
134   2: Add offset and base address to make absolute address.
135   3: Execute instruction.
136   4: Move result to register.
137   5: Load memory data ('lw' for 32bit instruction and 'ld' for 64bit).
138*/
139#define TEST4_32(instruction, offset, mem)                   \
140{                                                            \
141    unsigned long out = 0;                                   \
142    unsigned long res_mem = 0;                               \
143   __asm__ volatile(                                         \
144      "move         $t0, %2"          "\n\t"                 \
145      "move         $t1, %3"          "\n\t"                 \
146      "daddu        $t0, $t0, $t1"    "\n\t"                 \
147      instruction " $t3, ($t0), $t1"  "\n\t"                 \
148      "move         %0,  $t3"         "\n\t"                 \
149      "lw           %1,  0($t0)"      "\n\t"                 \
150      : "=&r" (out), "=&r"(res_mem)                          \
151      : "r" (mem) , "r" (offset)                             \
152      : "$12", "$13", "cc", "memory"                         \
153     );                                                      \
154   printf("%s :: offset: 0x%x, out: 0x%lx, result:0x%lx\n",  \
155          instruction, offset, out, res_mem);                \
156}
157
158#define TEST4_64(instruction, offset, mem)                    \
159{                                                             \
160    unsigned long out = 0;                                    \
161    unsigned long res_mem = 0;                                \
162   __asm__ volatile(                                          \
163      "move         $t0, %2"          "\n\t"                  \
164      "move         $t1, %3"          "\n\t"                  \
165      "daddu        $t0, $t0,   $t1"  "\n\t"                  \
166      instruction " $t3, ($t0), $t1"  "\n\t"                  \
167      "move         %0,  $t3"         "\n\t"                  \
168      "ld           %1,  0($t0)"      "\n\t"                  \
169     : "=&r" (out), "=&r"(res_mem)                            \
170     : "r" (mem) , "r" (offset)                               \
171     : "$12", "$13", "cc", "memory"                           \
172     );                                                       \
173   printf("%s :: offset: 0x%x, out: 0x%lx, result: 0x%lx\n",  \
174          instruction, offset, out, res_mem);                 \
175}
176
177typedef enum {
178   BADDU, POP, DPOP, SAA, SAAD, LAA, LAAD, LAW, LAWD, LAI, LAID, LAD, LADD,
179   LAS, LASD, LAC, LACD
180} cvm_op;
181
182int main()
183{
184#if (_MIPS_ARCH_OCTEON2)
185   init_reg_val_double();
186   int i,j;
187   cvm_op op;
188   for (op = BADDU; op <= LACD; op++) {
189      switch(op){
190         /* Unsigned Byte Add - BADDU rd, rs, rt; Cavium OCTEON */
191         case BADDU: {
192            for(i = 4; i < N; i += 4)
193               for(j = 4; j < N; j += 4)
194                  TEST2("baddu $t3, $t1, $t2", reg_val_double[i],
195                                               reg_val_double[j]);
196            break;
197         }
198         case POP: {  /* Count Ones in a Word - POP */
199            for(j = 4; j < N; j += 4)
200               TEST2("pop $t3, $t1", reg_val_double[j], 0);
201            break;
202         }
203         case DPOP: {  /* Count Ones in a Doubleword - DPOP */
204            for(j = 8; j < N; j += 8)
205               TEST2("dpop $t3, $t1", reg_val_double[j], 0);
206            break;
207         }
208         case SAA: {  /* Atomic Add Word - saa rt, (base). */
209            copy_reg_val_double();
210            for(j = 4; j < N; j += 4)
211               TEST3("saa", j, reg_val_double_copy, reg_val_double[j]);
212            break;
213         }
214         case SAAD: {  /* Atomic Add Double - saad rt, (base). */
215            copy_reg_val_double();
216            for(j = 8; j < N; j += 8)
217               TEST3("saad", j, reg_val_double_copy, reg_val_double[j]);
218            break;
219         }
220         case LAA: {  /* Load Atomic Add Word - laa rd, (base), rt. */
221            copy_reg_val_double();
222            for(j = 4; j < N; j += 4)
223               TEST4_32("laa", j, reg_val_double_copy);
224            break;
225         }
226         case LAAD: {  /* Load Atomic Add Double - laad rd, (base), rt */
227            copy_reg_val_double();
228            for(j = 8; j < N; j += 8)
229               TEST4_64("laad ", j, reg_val_double_copy);
230            break;
231         }
232         case LAW: {  /* Load Atomic Swap Word - law rd, (base), rt */
233            copy_reg_val_double();
234            for(j = 4; j < N; j += 4)
235               TEST4_32("law", j, reg_val_double_copy);
236            break;
237         }
238         case LAWD: {  /* Load Atomic Swap Double - lawd rd, (base), rt */
239            copy_reg_val_double();
240            for(j = 8; j < N; j += 8)
241               TEST4_64("lawd", j, reg_val_double_copy);
242            break;
243         }
244         case LAI: {  /* Load Atomic Increment Word - lai rd, (base) */
245            copy_reg_val_double();
246            for(i = 4; i < N; i += 4)
247               TEST1_32("lai", i, reg_val_double_copy);
248            break;
249         }
250         case LAID: {  /* Load Atomic Increment Double - laid rd, (base) */
251            copy_reg_val_double();
252            for(i = 8; i < N; i += 8)
253              TEST1_64("laid ", i, reg_val_double_copy);
254            break;
255         }
256         case LAD: {  /* Load Atomic Decrement Word - lad rd, (base) */
257            copy_reg_val_double();
258            for(i = 4; i < N; i += 4)
259               TEST1_32("lad", i, reg_val_double_copy);
260            break;
261         }
262         case LADD: {  /* Load Atomic Decrement Double - ladd rd, (base) */
263            copy_reg_val_double();
264            for(i = 8; i < N; i += 8)
265               TEST1_64("ladd",i, reg_val_double_copy);
266            break;
267         }
268         case LAS:{   /* Load Atomic Set Word - las rd, (base) */
269            copy_reg_val_double();
270            for(i = 4; i < N; i += 4)
271               TEST1_32("las",i, reg_val_double_copy);
272            break;
273         }
274         case LASD:{  /* Load Atomic Set Word - lasd rd, (base) */
275            copy_reg_val_double();
276            for(i = 8; i < N; i += 8)
277               TEST1_64("lasd",i, reg_val_double_copy);
278            break;
279         }
280         case LAC: {  /* Load Atomic Clear Word - lac rd, (base) */
281            copy_reg_val_double();
282            for(i = 4; i < N; i += 4)
283               TEST1_32("lac",i, reg_val_double_copy);
284            break;
285         }
286         case LACD: {  /* Load Atomic Clear Double - lacd rd, (base) */
287            copy_reg_val_double();
288            for(i = 8; i < N; i += 8)
289               TEST1_64("lacd",i, reg_val_double_copy);
290            break;
291         }
292         default:
293            printf("Nothing to be executed \n");
294      }
295   }
296#endif
297   return 0;
298}
299