1/** Compute the matrix inverse via Gauss-Jordan elimination.
2 *  This program uses OpenMP separate computation steps but no
3 *  mutexes. It is an example of a race-free program on which no data races
4 *  are reported by the happens-before algorithm (drd), but a lot of data races
5 *  (all false positives) are reported by the Eraser-algorithm (helgrind).
6 */
7
8
9#define _GNU_SOURCE
10
11/***********************/
12/* Include directives. */
13/***********************/
14
15#include <assert.h>
16#include <math.h>
17#include <omp.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <unistd.h>  // getopt()
21
22
23/*********************/
24/* Type definitions. */
25/*********************/
26
27typedef double elem_t;
28
29
30/********************/
31/* Local variables. */
32/********************/
33
34static int s_trigger_race;
35
36
37/*************************/
38/* Function definitions. */
39/*************************/
40
41/** Allocate memory for a matrix with the specified number of rows and
42 *  columns.
43 */
44static elem_t* new_matrix(const int rows, const int cols)
45{
46  assert(rows > 0);
47  assert(cols > 0);
48  return malloc(rows * cols * sizeof(elem_t));
49}
50
51/** Free the memory that was allocated for a matrix. */
52static void delete_matrix(elem_t* const a)
53{
54  free(a);
55}
56
57/** Fill in some numbers in a matrix. */
58static void init_matrix(elem_t* const a, const int rows, const int cols)
59{
60  int i, j;
61  for (i = 0; i < rows; i++)
62  {
63    for (j = 0; j < rows; j++)
64    {
65      a[i * cols + j] = 1.0 / (1 + abs(i-j));
66    }
67  }
68}
69
70/** Print all elements of a matrix. */
71void print_matrix(const char* const label,
72                  const elem_t* const a, const int rows, const int cols)
73{
74  int i, j;
75  printf("%s:\n", label);
76  for (i = 0; i < rows; i++)
77  {
78    for (j = 0; j < cols; j++)
79    {
80      printf("%g ", a[i * cols + j]);
81    }
82    printf("\n");
83  }
84}
85
86/** Copy a subset of the elements of a matrix into another matrix. */
87static void copy_matrix(const elem_t* const from,
88                        const int from_rows,
89                        const int from_cols,
90                        const int from_row_first,
91                        const int from_row_last,
92                        const int from_col_first,
93                        const int from_col_last,
94                        elem_t* const to,
95                        const int to_rows,
96                        const int to_cols,
97                        const int to_row_first,
98                        const int to_row_last,
99                        const int to_col_first,
100                        const int to_col_last)
101{
102  int i, j;
103
104  assert(from_row_last - from_row_first == to_row_last - to_row_first);
105  assert(from_col_last - from_col_first == to_col_last - to_col_first);
106
107  for (i = from_row_first; i < from_row_last; i++)
108  {
109    assert(i < from_rows);
110    assert(i - from_row_first + to_row_first < to_rows);
111    for (j = from_col_first; j < from_col_last; j++)
112    {
113      assert(j < from_cols);
114      assert(j - from_col_first + to_col_first < to_cols);
115      to[(i - from_row_first + to_col_first) * to_cols
116         + (j - from_col_first + to_col_first)]
117        = from[i * from_cols + j];
118    }
119  }
120}
121
122/** Compute the matrix product of a1 and a2. */
123static elem_t* multiply_matrices(const elem_t* const a1,
124                                 const int rows1,
125                                 const int cols1,
126                                 const elem_t* const a2,
127                                 const int rows2,
128                                 const int cols2)
129{
130  int i, j, k;
131  elem_t* prod;
132
133  assert(cols1 == rows2);
134
135  prod = new_matrix(rows1, cols2);
136  for (i = 0; i < rows1; i++)
137  {
138    for (j = 0; j < cols2; j++)
139    {
140      prod[i * cols2 + j] = 0;
141      for (k = 0; k < cols1; k++)
142      {
143        prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
144      }
145    }
146  }
147  return prod;
148}
149
150/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
151 *  at row r0 and up to but not including row r1. It is assumed that as many
152 *  threads execute this function concurrently as the count barrier p->b was
153 *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
154 *  has at least as many columns as rows, the result of this algorithm is that
155 *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
156 * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
157 */
158static void gj(elem_t* const a, const int rows, const int cols)
159{
160  int i, j, k;
161
162  for (i = 0; i < rows; i++)
163  {
164    {
165      // Pivoting.
166      j = i;
167      for (k = i + 1; k < rows; k++)
168      {
169        if (a[k * cols + i] > a[j * cols + i])
170        {
171          j = k;
172        }
173      }
174      if (j != i)
175      {
176        for (k = 0; k < cols; k++)
177        {
178          const elem_t t = a[i * cols + k];
179          a[i * cols + k] = a[j * cols + k];
180          a[j * cols + k] = t;
181        }
182      }
183      // Normalize row i.
184      if (a[i * cols + i] != 0)
185      {
186        for (k = cols - 1; k >= 0; k--)
187        {
188          a[i * cols + k] /= a[i * cols + i];
189        }
190      }
191    }
192
193    // Reduce all rows j != i.
194
195    if (s_trigger_race)
196    {
197#     pragma omp parallel for private(j)
198      for (j = 0; j < rows; j++)
199      {
200        if (i != j)
201        {
202          const elem_t factor = a[j * cols + i];
203          for (k = 0; k < cols; k++)
204          {
205            a[j * cols + k] -= a[i * cols + k] * factor;
206          }
207        }
208      }
209    }
210    else
211    {
212#     pragma omp parallel for private(j, k)
213      for (j = 0; j < rows; j++)
214      {
215        if (i != j)
216        {
217          const elem_t factor = a[j * cols + i];
218          for (k = 0; k < cols; k++)
219          {
220            a[j * cols + k] -= a[i * cols + k] * factor;
221          }
222        }
223      }
224    }
225  }
226}
227
228/** Matrix inversion via the Gauss-Jordan algorithm. */
229static elem_t* invert_matrix(const elem_t* const a, const int n)
230{
231  int i, j;
232  elem_t* const inv = new_matrix(n, n);
233  elem_t* const tmp = new_matrix(n, 2*n);
234  copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
235  for (i = 0; i < n; i++)
236    for (j = 0; j < n; j++)
237      tmp[i * 2 * n + n + j] = (i == j);
238  gj(tmp, n, 2*n);
239  copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
240  delete_matrix(tmp);
241  return inv;
242}
243
244/** Compute the average square error between the identity matrix and the
245 * product of matrix a with its inverse matrix.
246 */
247static double identity_error(const elem_t* const a, const int n)
248{
249  int i, j;
250  elem_t e = 0;
251  for (i = 0; i < n; i++)
252  {
253    for (j = 0; j < n; j++)
254    {
255      const elem_t d = a[i * n + j] - (i == j);
256      e += d * d;
257    }
258  }
259  return sqrt(e / (n * n));
260}
261
262/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
263 *  smallest number for which the sum of one and that number is different of
264 *  one. It is assumed that the underlying representation of elem_t uses
265 *  base two.
266 */
267static elem_t epsilon()
268{
269  elem_t eps;
270  for (eps = 1; 1 + eps != 1; eps /= 2)
271    ;
272  return 2 * eps;
273}
274
275static void usage(const char* const exe)
276{
277  printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
278         "-h: display this information.\n"
279         "-q: quiet mode -- do not print computed error.\n"
280         "-r: trigger a race condition.\n"
281         "-t<n>: use <n> threads.\n"
282         "<m>: matrix size.\n",
283         exe);
284}
285
286int main(int argc, char** argv)
287{
288  int matrix_size;
289  int nthread = 1;
290  int silent = 0;
291  int optchar;
292  elem_t *a, *inv, *prod;
293  elem_t eps;
294  double error;
295  double ratio;
296
297  while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
298  {
299    switch (optchar)
300    {
301    case 'h': usage(argv[0]); return 1;
302    case 'q': silent = 1; break;
303    case 'r': s_trigger_race = 1; break;
304    case 't': nthread = atoi(optarg); break;
305    default:
306      return 1;
307    }
308  }
309
310  if (optind + 1 != argc)
311  {
312    fprintf(stderr, "Error: wrong number of arguments.\n");
313    return 1;
314  }
315  matrix_size = atoi(argv[optind]);
316
317  /* Error checking. */
318  assert(matrix_size >= 1);
319  assert(nthread >= 1);
320
321  omp_set_num_threads(nthread);
322  omp_set_dynamic(0);
323
324  eps = epsilon();
325  a = new_matrix(matrix_size, matrix_size);
326  init_matrix(a, matrix_size, matrix_size);
327  inv = invert_matrix(a, matrix_size);
328  prod = multiply_matrices(a, matrix_size, matrix_size,
329                           inv, matrix_size, matrix_size);
330  error = identity_error(prod, matrix_size);
331  ratio = error / (eps * matrix_size);
332  if (! silent)
333  {
334    printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
335           error, eps, ratio);
336  }
337  if (isfinite(ratio) && ratio < 100)
338    printf("Error within bounds.\n");
339  else
340    printf("Error out of bounds.\n");
341  delete_matrix(prod);
342  delete_matrix(inv);
343  delete_matrix(a);
344
345  return 0;
346}
347