1/** Compute the matrix inverse via Gauss-Jordan elimination. 2 * This program uses OpenMP separate computation steps but no 3 * mutexes. It is an example of a race-free program on which no data races 4 * are reported by the happens-before algorithm (drd), but a lot of data races 5 * (all false positives) are reported by the Eraser-algorithm (helgrind). 6 */ 7 8 9#define _GNU_SOURCE 10 11/***********************/ 12/* Include directives. */ 13/***********************/ 14 15#include <assert.h> 16#include <math.h> 17#include <omp.h> 18#include <stdio.h> 19#include <stdlib.h> 20#include <unistd.h> // getopt() 21 22 23/*********************/ 24/* Type definitions. */ 25/*********************/ 26 27typedef double elem_t; 28 29 30/********************/ 31/* Local variables. */ 32/********************/ 33 34static int s_trigger_race; 35 36 37/*************************/ 38/* Function definitions. */ 39/*************************/ 40 41/** Allocate memory for a matrix with the specified number of rows and 42 * columns. 43 */ 44static elem_t* new_matrix(const int rows, const int cols) 45{ 46 assert(rows > 0); 47 assert(cols > 0); 48 return malloc(rows * cols * sizeof(elem_t)); 49} 50 51/** Free the memory that was allocated for a matrix. */ 52static void delete_matrix(elem_t* const a) 53{ 54 free(a); 55} 56 57/** Fill in some numbers in a matrix. */ 58static void init_matrix(elem_t* const a, const int rows, const int cols) 59{ 60 int i, j; 61 for (i = 0; i < rows; i++) 62 { 63 for (j = 0; j < rows; j++) 64 { 65 a[i * cols + j] = 1.0 / (1 + abs(i-j)); 66 } 67 } 68} 69 70/** Print all elements of a matrix. */ 71void print_matrix(const char* const label, 72 const elem_t* const a, const int rows, const int cols) 73{ 74 int i, j; 75 printf("%s:\n", label); 76 for (i = 0; i < rows; i++) 77 { 78 for (j = 0; j < cols; j++) 79 { 80 printf("%g ", a[i * cols + j]); 81 } 82 printf("\n"); 83 } 84} 85 86/** Copy a subset of the elements of a matrix into another matrix. */ 87static void copy_matrix(const elem_t* const from, 88 const int from_rows, 89 const int from_cols, 90 const int from_row_first, 91 const int from_row_last, 92 const int from_col_first, 93 const int from_col_last, 94 elem_t* const to, 95 const int to_rows, 96 const int to_cols, 97 const int to_row_first, 98 const int to_row_last, 99 const int to_col_first, 100 const int to_col_last) 101{ 102 int i, j; 103 104 assert(from_row_last - from_row_first == to_row_last - to_row_first); 105 assert(from_col_last - from_col_first == to_col_last - to_col_first); 106 107 for (i = from_row_first; i < from_row_last; i++) 108 { 109 assert(i < from_rows); 110 assert(i - from_row_first + to_row_first < to_rows); 111 for (j = from_col_first; j < from_col_last; j++) 112 { 113 assert(j < from_cols); 114 assert(j - from_col_first + to_col_first < to_cols); 115 to[(i - from_row_first + to_col_first) * to_cols 116 + (j - from_col_first + to_col_first)] 117 = from[i * from_cols + j]; 118 } 119 } 120} 121 122/** Compute the matrix product of a1 and a2. */ 123static elem_t* multiply_matrices(const elem_t* const a1, 124 const int rows1, 125 const int cols1, 126 const elem_t* const a2, 127 const int rows2, 128 const int cols2) 129{ 130 int i, j, k; 131 elem_t* prod; 132 133 assert(cols1 == rows2); 134 135 prod = new_matrix(rows1, cols2); 136 for (i = 0; i < rows1; i++) 137 { 138 for (j = 0; j < cols2; j++) 139 { 140 prod[i * cols2 + j] = 0; 141 for (k = 0; k < cols1; k++) 142 { 143 prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j]; 144 } 145 } 146 } 147 return prod; 148} 149 150/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting 151 * at row r0 and up to but not including row r1. It is assumed that as many 152 * threads execute this function concurrently as the count barrier p->b was 153 * initialized with. If the matrix p->a is nonsingular, and if matrix p->a 154 * has at least as many columns as rows, the result of this algorithm is that 155 * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix. 156 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination 157 */ 158static void gj(elem_t* const a, const int rows, const int cols) 159{ 160 int i, j, k; 161 162 for (i = 0; i < rows; i++) 163 { 164 { 165 // Pivoting. 166 j = i; 167 for (k = i + 1; k < rows; k++) 168 { 169 if (a[k * cols + i] > a[j * cols + i]) 170 { 171 j = k; 172 } 173 } 174 if (j != i) 175 { 176 for (k = 0; k < cols; k++) 177 { 178 const elem_t t = a[i * cols + k]; 179 a[i * cols + k] = a[j * cols + k]; 180 a[j * cols + k] = t; 181 } 182 } 183 // Normalize row i. 184 if (a[i * cols + i] != 0) 185 { 186 for (k = cols - 1; k >= 0; k--) 187 { 188 a[i * cols + k] /= a[i * cols + i]; 189 } 190 } 191 } 192 193 // Reduce all rows j != i. 194 195 if (s_trigger_race) 196 { 197# pragma omp parallel for private(j) 198 for (j = 0; j < rows; j++) 199 { 200 if (i != j) 201 { 202 const elem_t factor = a[j * cols + i]; 203 for (k = 0; k < cols; k++) 204 { 205 a[j * cols + k] -= a[i * cols + k] * factor; 206 } 207 } 208 } 209 } 210 else 211 { 212# pragma omp parallel for private(j, k) 213 for (j = 0; j < rows; j++) 214 { 215 if (i != j) 216 { 217 const elem_t factor = a[j * cols + i]; 218 for (k = 0; k < cols; k++) 219 { 220 a[j * cols + k] -= a[i * cols + k] * factor; 221 } 222 } 223 } 224 } 225 } 226} 227 228/** Matrix inversion via the Gauss-Jordan algorithm. */ 229static elem_t* invert_matrix(const elem_t* const a, const int n) 230{ 231 int i, j; 232 elem_t* const inv = new_matrix(n, n); 233 elem_t* const tmp = new_matrix(n, 2*n); 234 copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n); 235 for (i = 0; i < n; i++) 236 for (j = 0; j < n; j++) 237 tmp[i * 2 * n + n + j] = (i == j); 238 gj(tmp, n, 2*n); 239 copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n); 240 delete_matrix(tmp); 241 return inv; 242} 243 244/** Compute the average square error between the identity matrix and the 245 * product of matrix a with its inverse matrix. 246 */ 247static double identity_error(const elem_t* const a, const int n) 248{ 249 int i, j; 250 elem_t e = 0; 251 for (i = 0; i < n; i++) 252 { 253 for (j = 0; j < n; j++) 254 { 255 const elem_t d = a[i * n + j] - (i == j); 256 e += d * d; 257 } 258 } 259 return sqrt(e / (n * n)); 260} 261 262/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the 263 * smallest number for which the sum of one and that number is different of 264 * one. It is assumed that the underlying representation of elem_t uses 265 * base two. 266 */ 267static elem_t epsilon() 268{ 269 elem_t eps; 270 for (eps = 1; 1 + eps != 1; eps /= 2) 271 ; 272 return 2 * eps; 273} 274 275static void usage(const char* const exe) 276{ 277 printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n" 278 "-h: display this information.\n" 279 "-q: quiet mode -- do not print computed error.\n" 280 "-r: trigger a race condition.\n" 281 "-t<n>: use <n> threads.\n" 282 "<m>: matrix size.\n", 283 exe); 284} 285 286int main(int argc, char** argv) 287{ 288 int matrix_size; 289 int nthread = 1; 290 int silent = 0; 291 int optchar; 292 elem_t *a, *inv, *prod; 293 elem_t eps; 294 double error; 295 double ratio; 296 297 while ((optchar = getopt(argc, argv, "hqrt:")) != EOF) 298 { 299 switch (optchar) 300 { 301 case 'h': usage(argv[0]); return 1; 302 case 'q': silent = 1; break; 303 case 'r': s_trigger_race = 1; break; 304 case 't': nthread = atoi(optarg); break; 305 default: 306 return 1; 307 } 308 } 309 310 if (optind + 1 != argc) 311 { 312 fprintf(stderr, "Error: wrong number of arguments.\n"); 313 return 1; 314 } 315 matrix_size = atoi(argv[optind]); 316 317 /* Error checking. */ 318 assert(matrix_size >= 1); 319 assert(nthread >= 1); 320 321 omp_set_num_threads(nthread); 322 omp_set_dynamic(0); 323 324 eps = epsilon(); 325 a = new_matrix(matrix_size, matrix_size); 326 init_matrix(a, matrix_size, matrix_size); 327 inv = invert_matrix(a, matrix_size); 328 prod = multiply_matrices(a, matrix_size, matrix_size, 329 inv, matrix_size, matrix_size); 330 error = identity_error(prod, matrix_size); 331 ratio = error / (eps * matrix_size); 332 if (! silent) 333 { 334 printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n", 335 error, eps, ratio); 336 } 337 if (isfinite(ratio) && ratio < 100) 338 printf("Error within bounds.\n"); 339 else 340 printf("Error out of bounds.\n"); 341 delete_matrix(prod); 342 delete_matrix(inv); 343 delete_matrix(a); 344 345 return 0; 346} 347