1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the matrix inverse via Gauss-Jordan elimination. 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * This program uses OpenMP separate computation steps but no 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * mutexes. It is an example of a race-free program on which no data races 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * are reported by the happens-before algorithm (drd), but a lot of data races 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * (all false positives) are reported by the Eraser-algorithm (helgrind). 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define _GNU_SOURCE 10ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/***********************/ 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Include directives. */ 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/***********************/ 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <assert.h> 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <math.h> 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <omp.h> 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdio.h> 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <stdlib.h> 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include <unistd.h> // getopt() 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*********************/ 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Type definitions. */ 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*********************/ 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browntypedef double elem_t; 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/********************/ 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Local variables. */ 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/********************/ 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic int s_trigger_race; 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*************************/ 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Function definitions. */ 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*************************/ 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Allocate memory for a matrix with the specified number of rows and 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * columns. 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* new_matrix(const int rows, const int cols) 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(rows > 0); 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(cols > 0); 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return malloc(rows * cols * sizeof(elem_t)); 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Free the memory that was allocated for a matrix. */ 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void delete_matrix(elem_t* const a) 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown free(a); 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Fill in some numbers in a matrix. */ 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void init_matrix(elem_t* const a, const int rows, const int cols) 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j; 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < rows; i++) 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < rows; j++) 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[i * cols + j] = 1.0 / (1 + abs(i-j)); 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Print all elements of a matrix. */ 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid print_matrix(const char* const label, 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t* const a, const int rows, const int cols) 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j; 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("%s:\n", label); 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < rows; i++) 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < cols; j++) 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("%g ", a[i * cols + j]); 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("\n"); 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Copy a subset of the elements of a matrix into another matrix. */ 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void copy_matrix(const elem_t* const from, 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_rows, 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_cols, 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_row_first, 91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_row_last, 92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_col_first, 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int from_col_last, 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t* const to, 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_rows, 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_cols, 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_row_first, 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_row_last, 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_col_first, 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int to_col_last) 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j; 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(from_row_last - from_row_first == to_row_last - to_row_first); 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(from_col_last - from_col_first == to_col_last - to_col_first); 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = from_row_first; i < from_row_last; i++) 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(i < from_rows); 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(i - from_row_first + to_row_first < to_rows); 111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = from_col_first; j < from_col_last; j++) 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(j < from_cols); 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(j - from_col_first + to_col_first < to_cols); 115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown to[(i - from_row_first + to_col_first) * to_cols 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown + (j - from_col_first + to_col_first)] 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = from[i * from_cols + j]; 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the matrix product of a1 and a2. */ 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* multiply_matrices(const elem_t* const a1, 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int rows1, 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int cols1, 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t* const a2, 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int rows2, 128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const int cols2) 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j, k; 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t* prod; 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(cols1 == rows2); 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown prod = new_matrix(rows1, cols2); 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < rows1; i++) 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < cols2; j++) 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown prod[i * cols2 + j] = 0; 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = 0; k < cols1; k++) 142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown prod[i * cols2 + j] += a1[i * cols1 + k] * a2[k * cols2 + j]; 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return prod; 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Apply the Gauss-Jordan elimination algorithm on the matrix p->a starting 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * at row r0 and up to but not including row r1. It is assumed that as many 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * threads execute this function concurrently as the count barrier p->b was 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * initialized with. If the matrix p->a is nonsingular, and if matrix p->a 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * has at least as many columns as rows, the result of this algorithm is that 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * submatrix p->a[0..p->rows-1,0..p->rows-1] is the identity matrix. 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * @see http://en.wikipedia.org/wiki/Gauss-Jordan_elimination 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void gj(elem_t* const a, const int rows, const int cols) 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j, k; 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < rows; i++) 163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Pivoting. 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j = i; 167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = i + 1; k < rows; k++) 168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (a[k * cols + i] > a[j * cols + i]) 170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j = k; 172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (j != i) 175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = 0; k < cols; k++) 177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t t = a[i * cols + k]; 179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[i * cols + k] = a[j * cols + k]; 180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[j * cols + k] = t; 181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Normalize row i. 184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (a[i * cols + i] != 0) 185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = cols - 1; k >= 0; k--) 187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[i * cols + k] /= a[i * cols + i]; 189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Reduce all rows j != i. 194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (s_trigger_race) 196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# pragma omp parallel for private(j) 198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < rows; j++) 199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (i != j) 201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t factor = a[j * cols + i]; 203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = 0; k < cols; k++) 204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[j * cols + k] -= a[i * cols + k] * factor; 206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown else 211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# pragma omp parallel for private(j, k) 213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < rows; j++) 214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (i != j) 216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t factor = a[j * cols + i]; 218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (k = 0; k < cols; k++) 219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a[j * cols + k] -= a[i * cols + k] * factor; 221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Matrix inversion via the Gauss-Jordan algorithm. */ 229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t* invert_matrix(const elem_t* const a, const int n) 230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j; 232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t* const inv = new_matrix(n, n); 233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t* const tmp = new_matrix(n, 2*n); 234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown copy_matrix(a, n, n, 0, n, 0, n, tmp, n, 2 * n, 0, n, 0, n); 235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < n; i++) 236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < n; j++) 237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown tmp[i * 2 * n + n + j] = (i == j); 238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown gj(tmp, n, 2*n); 239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown copy_matrix(tmp, n, 2*n, 0, n, n, 2*n, inv, n, n, 0, n, 0, n); 240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown delete_matrix(tmp); 241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return inv; 242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute the average square error between the identity matrix and the 245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * product of matrix a with its inverse matrix. 246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic double identity_error(const elem_t* const a, const int n) 248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int i, j; 250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t e = 0; 251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < n; i++) 252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (j = 0; j < n; j++) 254ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 255ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const elem_t d = a[i * n + j] - (i == j); 256ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown e += d * d; 257ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 258ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 259ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return sqrt(e / (n * n)); 260ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 261ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 262ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/** Compute epsilon for the numeric type elem_t. Epsilon is defined as the 263ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * smallest number for which the sum of one and that number is different of 264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * one. It is assumed that the underlying representation of elem_t uses 265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * base two. 266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic elem_t epsilon() 268ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 269ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t eps; 270ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (eps = 1; 1 + eps != 1; eps /= 2) 271ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ; 272ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 2 * eps; 273ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 274ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 275ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void usage(const char* const exe) 276ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 277ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("Usage: %s [-h] [-q] [-r] [-t<n>] <m>\n" 278ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "-h: display this information.\n" 279ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "-q: quiet mode -- do not print computed error.\n" 280ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "-r: trigger a race condition.\n" 281ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "-t<n>: use <n> threads.\n" 282ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "<m>: matrix size.\n", 283ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown exe); 284ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 285ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 286ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownint main(int argc, char** argv) 287ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 288ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int matrix_size; 289ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int nthread = 1; 290ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int silent = 0; 291ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown int optchar; 292ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t *a, *inv, *prod; 293ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown elem_t eps; 294ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown double error; 295ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown double ratio; 296ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 297ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown while ((optchar = getopt(argc, argv, "hqrt:")) != EOF) 298ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 299ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown switch (optchar) 300ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 'h': usage(argv[0]); return 1; 302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 'q': silent = 1; break; 303ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 'r': s_trigger_race = 1; break; 304ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 't': nthread = atoi(optarg); break; 305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown default: 306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 1; 307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (optind + 1 != argc) 311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fprintf(stderr, "Error: wrong number of arguments.\n"); 313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 1; 314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown matrix_size = atoi(argv[optind]); 316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Error checking. */ 318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(matrix_size >= 1); 319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown assert(nthread >= 1); 320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown omp_set_num_threads(nthread); 322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown omp_set_dynamic(0); 323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown eps = epsilon(); 325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a = new_matrix(matrix_size, matrix_size); 326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown init_matrix(a, matrix_size, matrix_size); 327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown inv = invert_matrix(a, matrix_size); 328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown prod = multiply_matrices(a, matrix_size, matrix_size, 329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown inv, matrix_size, matrix_size); 330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown error = identity_error(prod, matrix_size); 331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ratio = error / (eps * matrix_size); 332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (! silent) 333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown { 334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("error = %g; epsilon = %g; error / (epsilon * n) = %g\n", 335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown error, eps, ratio); 336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (isfinite(ratio) && ratio < 100) 338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("Error within bounds.\n"); 339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown else 340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown printf("Error out of bounds.\n"); 341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown delete_matrix(prod); 342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown delete_matrix(inv); 343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown delete_matrix(a); 344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 0; 346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 347