1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the matrix inverse via Gauss-Jordan elimination.
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  This program uses OpenMP separate computation steps but no
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  mutexes. It is an example of a race-free program on which no data races
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  are reported by the happens-before algorithm (drd), but a lot of data races
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  (all false positives) are reported by the Eraser-algorithm (helgrind).
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define _GNU_SOURCE
10ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/***********************/
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Include directives. */
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/***********************/
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <assert.h>
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <math.h>
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <omp.h>
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdio.h>
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdlib.h>
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <unistd.h>  // getopt()
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*********************/
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Type definitions. */
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*********************/
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browntypedef double elem_t;
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/********************/
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Local variables. */
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/********************/
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic int s_trigger_race;
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*************************/
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Function definitions. */
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*************************/
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Allocate memory for a matrix with the specified number of rows and
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  columns.
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* new_matrix(const int rows, const int cols)
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(rows > 0);
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(cols > 0);
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return malloc(rows * cols * sizeof(elem_t));
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Free the memory that was allocated for a matrix. */
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void delete_matrix(elem_t* const a)
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  free(a);
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Fill in some numbers in a matrix. */
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void init_matrix(elem_t* const a, const int rows, const int cols)
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j;
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < rows; i++)
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = 0; j < rows; j++)
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      a[i * cols + j] = 1.0 / (1 + abs(i-j));
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Print all elements of a matrix. */
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid print_matrix(const char* const label,
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                  const elem_t* const a, const int rows, const int cols)
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j;
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  printf("%s:\n", label);
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < rows; i++)
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = 0; j < cols; j++)
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      printf("%g ", a[i * cols + j]);
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    printf("\n");
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Copy a subset of the elements of a matrix into another matrix. */
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void copy_matrix(const elem_t* const from,
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_rows,
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_cols,
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_row_first,
91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_row_last,
92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_col_first,
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int from_col_last,
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        elem_t* const to,
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_rows,
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_cols,
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_row_first,
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_row_last,
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_col_first,
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        const int to_col_last)
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j;
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(from_row_last - from_row_first == to_row_last - to_row_first);
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(from_col_last - from_col_first == to_col_last - to_col_first);
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = from_row_first; i < from_row_last; i++)
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    assert(i < from_rows);
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    assert(i - from_row_first + to_row_first < to_rows);
111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = from_col_first; j < from_col_last; j++)
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      assert(j < from_cols);
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      assert(j - from_col_first + to_col_first < to_cols);
115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      to[(i - from_row_first + to_col_first) * to_cols
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         + (j - from_col_first + to_col_first)]
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        = from[i * from_cols + j];
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the matrix product of a1 and a2. */
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* multiply_matrices(const elem_t* const a1,
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 const int rows1,
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 const int cols1,
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 const elem_t* const a2,
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 const int rows2,
128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 const int cols2)
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j, k;
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t* prod;
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(cols1 == rows2);
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  prod = new_matrix(rows1, cols2);
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < rows1; i++)
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = 0; j < cols2; j++)
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      prod[i * cols2 + j] = 0;
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (k = 0; k < cols1; k++)
142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j];
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return prod;
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  at row r0 and up to but not including row r1. It is assumed that as many
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  threads execute this function concurrently as the count barrier p->b was
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  initialized with. If the matrix p->a is nonsingular, and if matrix p->a
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  has at least as many columns as rows, the result of this algorithm is that
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix.
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void gj(elem_t* const a, const int rows, const int cols)
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j, k;
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < rows; i++)
163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // Pivoting.
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      j = i;
167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (k = i + 1; k < rows; k++)
168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        if (a[k * cols + i] > a[j * cols + i])
170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        {
171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          j = k;
172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        }
173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (j != i)
175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        for (k = 0; k < cols; k++)
177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        {
178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          const elem_t t = a[i * cols + k];
179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          a[i * cols + k] = a[j * cols + k];
180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          a[j * cols + k] = t;
181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        }
182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // Normalize row i.
184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (a[i * cols + i] != 0)
185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        for (k = cols - 1; k >= 0; k--)
187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        {
188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          a[i * cols + k] /= a[i * cols + i];
189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        }
190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    // Reduce all rows j != i.
194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    if (s_trigger_race)
196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#     pragma omp parallel for private(j)
198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (j = 0; j < rows; j++)
199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        if (i != j)
201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        {
202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          const elem_t factor = a[j * cols + i];
203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          for (k = 0; k < cols; k++)
204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          {
205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            a[j * cols + k] -= a[i * cols + k] * factor;
206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          }
207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        }
208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    else
211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#     pragma omp parallel for private(j, k)
213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (j = 0; j < rows; j++)
214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      {
215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        if (i != j)
216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        {
217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          const elem_t factor = a[j * cols + i];
218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          for (k = 0; k < cols; k++)
219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          {
220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            a[j * cols + k] -= a[i * cols + k] * factor;
221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          }
222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        }
223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Matrix inversion via the Gauss-Jordan algorithm. */
229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* invert_matrix(const elem_t* const a, const int n)
230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j;
232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t* const inv = new_matrix(n, n);
233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t* const tmp = new_matrix(n, 2*n);
234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n);
235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < n; i++)
236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = 0; j < n; j++)
237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      tmp[i * 2 * n + n + j] = (i == j);
238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  gj(tmp, n, 2*n);
239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n);
240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  delete_matrix(tmp);
241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return inv;
242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the average square error between the identity matrix and the
245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * product of matrix a with its inverse matrix.
246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic double identity_error(const elem_t* const a, const int n)
248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int i, j;
250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t e = 0;
251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (i = 0; i < n; i++)
252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    for (j = 0; j < n; j++)
254ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
255ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      const elem_t d = a[i * n + j] - (i == j);
256ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      e += d * d;
257ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
258ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
259ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return sqrt(e / (n * n));
260ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
261ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
262ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the
263ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  smallest number for which the sum of one and that number is different of
264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  one. It is assumed that the underlying representation of elem_t uses
265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  base two.
266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t epsilon()
268ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
269ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t eps;
270ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  for (eps = 1; 1 + eps != 1; eps /= 2)
271ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    ;
272ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return 2 * eps;
273ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
274ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
275ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void usage(const char* const exe)
276ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
277ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n"
278ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "-h: display this information.\n"
279ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "-q: quiet mode -- do not print computed error.\n"
280ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "-r: trigger a race condition.\n"
281ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "-t<n>: use <n> threads.\n"
282ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         "<m>: matrix size.\n",
283ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         exe);
284ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
285ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
286ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownint main(int argc, char** argv)
287ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
288ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int matrix_size;
289ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int nthread = 1;
290ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int silent = 0;
291ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  int optchar;
292ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t *a, *inv, *prod;
293ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  elem_t eps;
294ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  double error;
295ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  double ratio;
296ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
297ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  while ((optchar = getopt(argc, argv, "hqrt:")) != EOF)
298ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
299ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    switch (optchar)
300ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    {
301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    case 'h': usage(argv[0]); return 1;
302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    case 'q': silent = 1; break;
303ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    case 'r': s_trigger_race = 1; break;
304ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    case 't': nthread = atoi(optarg); break;
305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    default:
306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return 1;
307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    }
308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  if (optind + 1 != argc)
311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    fprintf(stderr, "Error: wrong number of arguments.\n");
313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    return 1;
314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  matrix_size = atoi(argv[optind]);
316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  /* Error checking. */
318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(matrix_size >= 1);
319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  assert(nthread >= 1);
320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  omp_set_num_threads(nthread);
322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  omp_set_dynamic(0);
323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  eps = epsilon();
325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  a = new_matrix(matrix_size, matrix_size);
326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  init_matrix(a, matrix_size, matrix_size);
327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  inv = invert_matrix(a, matrix_size);
328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  prod = multiply_matrices(a, matrix_size, matrix_size,
329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                           inv, matrix_size, matrix_size);
330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  error = identity_error(prod, matrix_size);
331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  ratio = error / (eps * matrix_size);
332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  if (! silent)
333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  {
334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n",
335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown           error, eps, ratio);
336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  }
337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  if (isfinite(ratio) && ratio < 100)
338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    printf("Error within bounds.\n");
339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  else
340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    printf("Error out of bounds.\n");
341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  delete_matrix(prod);
342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  delete_matrix(inv);
343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  delete_matrix(a);
344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown  return 0;
346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
347