1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17package org.apache.commons.math.stat.correlation; 18 19import org.apache.commons.math.MathException; 20import org.apache.commons.math.MathRuntimeException; 21import org.apache.commons.math.distribution.TDistribution; 22import org.apache.commons.math.distribution.TDistributionImpl; 23import org.apache.commons.math.exception.util.LocalizedFormats; 24import org.apache.commons.math.exception.NullArgumentException; 25import org.apache.commons.math.exception.DimensionMismatchException; 26import org.apache.commons.math.linear.RealMatrix; 27import org.apache.commons.math.linear.BlockRealMatrix; 28import org.apache.commons.math.stat.regression.SimpleRegression; 29import org.apache.commons.math.util.FastMath; 30 31/** 32 * Computes Pearson's product-moment correlation coefficients for pairs of arrays 33 * or columns of a matrix. 34 * 35 * <p>The constructors that take <code>RealMatrix</code> or 36 * <code>double[][]</code> arguments generate correlation matrices. The 37 * columns of the input matrices are assumed to represent variable values. 38 * Correlations are given by the formula</p> 39 * <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code> 40 * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code> 41 * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations. 42 * 43 * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $ 44 * @since 2.0 45 */ 46public class PearsonsCorrelation { 47 48 /** correlation matrix */ 49 private final RealMatrix correlationMatrix; 50 51 /** number of observations */ 52 private final int nObs; 53 54 /** 55 * Create a PearsonsCorrelation instance without data 56 */ 57 public PearsonsCorrelation() { 58 super(); 59 correlationMatrix = null; 60 nObs = 0; 61 } 62 63 /** 64 * Create a PearsonsCorrelation from a rectangular array 65 * whose columns represent values of variables to be correlated. 66 * 67 * @param data rectangular array with columns representing variables 68 * @throws IllegalArgumentException if the input data array is not 69 * rectangular with at least two rows and two columns. 70 */ 71 public PearsonsCorrelation(double[][] data) { 72 this(new BlockRealMatrix(data)); 73 } 74 75 /** 76 * Create a PearsonsCorrelation from a RealMatrix whose columns 77 * represent variables to be correlated. 78 * 79 * @param matrix matrix with columns representing variables to correlate 80 */ 81 public PearsonsCorrelation(RealMatrix matrix) { 82 checkSufficientData(matrix); 83 nObs = matrix.getRowDimension(); 84 correlationMatrix = computeCorrelationMatrix(matrix); 85 } 86 87 /** 88 * Create a PearsonsCorrelation from a {@link Covariance}. The correlation 89 * matrix is computed by scaling the Covariance's covariance matrix. 90 * The Covariance instance must have been created from a data matrix with 91 * columns representing variable values. 92 * 93 * @param covariance Covariance instance 94 */ 95 public PearsonsCorrelation(Covariance covariance) { 96 RealMatrix covarianceMatrix = covariance.getCovarianceMatrix(); 97 if (covarianceMatrix == null) { 98 throw new NullArgumentException(LocalizedFormats.COVARIANCE_MATRIX); 99 } 100 nObs = covariance.getN(); 101 correlationMatrix = covarianceToCorrelation(covarianceMatrix); 102 } 103 104 /** 105 * Create a PearsonsCorrelation from a covariance matrix. The correlation 106 * matrix is computed by scaling the covariance matrix. 107 * 108 * @param covarianceMatrix covariance matrix 109 * @param numberOfObservations the number of observations in the dataset used to compute 110 * the covariance matrix 111 */ 112 public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) { 113 nObs = numberOfObservations; 114 correlationMatrix = covarianceToCorrelation(covarianceMatrix); 115 116 } 117 118 /** 119 * Returns the correlation matrix 120 * 121 * @return correlation matrix 122 */ 123 public RealMatrix getCorrelationMatrix() { 124 return correlationMatrix; 125 } 126 127 /** 128 * Returns a matrix of standard errors associated with the estimates 129 * in the correlation matrix.<br/> 130 * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard 131 * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code> 132 * <p>The formula used to compute the standard error is <br/> 133 * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code> 134 * where <code>r</code> is the estimated correlation coefficient and 135 * <code>n</code> is the number of observations in the source dataset.</p> 136 * 137 * @return matrix of correlation standard errors 138 */ 139 public RealMatrix getCorrelationStandardErrors() { 140 int nVars = correlationMatrix.getColumnDimension(); 141 double[][] out = new double[nVars][nVars]; 142 for (int i = 0; i < nVars; i++) { 143 for (int j = 0; j < nVars; j++) { 144 double r = correlationMatrix.getEntry(i, j); 145 out[i][j] = FastMath.sqrt((1 - r * r) /(nObs - 2)); 146 } 147 } 148 return new BlockRealMatrix(out); 149 } 150 151 /** 152 * Returns a matrix of p-values associated with the (two-sided) null 153 * hypothesis that the corresponding correlation coefficient is zero. 154 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability 155 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes 156 * a value with absolute value greater than or equal to <br> 157 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p> 158 * <p>The values in the matrix are sometimes referred to as the 159 * <i>significance</i> of the corresponding correlation coefficients.</p> 160 * 161 * @return matrix of p-values 162 * @throws MathException if an error occurs estimating probabilities 163 */ 164 public RealMatrix getCorrelationPValues() throws MathException { 165 TDistribution tDistribution = new TDistributionImpl(nObs - 2); 166 int nVars = correlationMatrix.getColumnDimension(); 167 double[][] out = new double[nVars][nVars]; 168 for (int i = 0; i < nVars; i++) { 169 for (int j = 0; j < nVars; j++) { 170 if (i == j) { 171 out[i][j] = 0d; 172 } else { 173 double r = correlationMatrix.getEntry(i, j); 174 double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r))); 175 out[i][j] = 2 * tDistribution.cumulativeProbability(-t); 176 } 177 } 178 } 179 return new BlockRealMatrix(out); 180 } 181 182 183 /** 184 * Computes the correlation matrix for the columns of the 185 * input matrix. 186 * 187 * @param matrix matrix with columns representing variables to correlate 188 * @return correlation matrix 189 */ 190 public RealMatrix computeCorrelationMatrix(RealMatrix matrix) { 191 int nVars = matrix.getColumnDimension(); 192 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); 193 for (int i = 0; i < nVars; i++) { 194 for (int j = 0; j < i; j++) { 195 double corr = correlation(matrix.getColumn(i), matrix.getColumn(j)); 196 outMatrix.setEntry(i, j, corr); 197 outMatrix.setEntry(j, i, corr); 198 } 199 outMatrix.setEntry(i, i, 1d); 200 } 201 return outMatrix; 202 } 203 204 /** 205 * Computes the correlation matrix for the columns of the 206 * input rectangular array. The colums of the array represent values 207 * of variables to be correlated. 208 * 209 * @param data matrix with columns representing variables to correlate 210 * @return correlation matrix 211 */ 212 public RealMatrix computeCorrelationMatrix(double[][] data) { 213 return computeCorrelationMatrix(new BlockRealMatrix(data)); 214 } 215 216 /** 217 * Computes the Pearson's product-moment correlation coefficient between the two arrays. 218 * 219 * </p>Throws IllegalArgumentException if the arrays do not have the same length 220 * or their common length is less than 2</p> 221 * 222 * @param xArray first data array 223 * @param yArray second data array 224 * @return Returns Pearson's correlation coefficient for the two arrays 225 * @throws IllegalArgumentException if the arrays lengths do not match or 226 * there is insufficient data 227 */ 228 public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException { 229 SimpleRegression regression = new SimpleRegression(); 230 if (xArray.length != yArray.length) { 231 throw new DimensionMismatchException(xArray.length, yArray.length); 232 } else if (xArray.length < 2) { 233 throw MathRuntimeException.createIllegalArgumentException( 234 LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2); 235 } else { 236 for(int i=0; i<xArray.length; i++) { 237 regression.addData(xArray[i], yArray[i]); 238 } 239 return regression.getR(); 240 } 241 } 242 243 /** 244 * Derives a correlation matrix from a covariance matrix. 245 * 246 * <p>Uses the formula <br/> 247 * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where 248 * <code>r(·,·)</code> is the correlation coefficient and 249 * <code>s(·)</code> means standard deviation.</p> 250 * 251 * @param covarianceMatrix the covariance matrix 252 * @return correlation matrix 253 */ 254 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) { 255 int nVars = covarianceMatrix.getColumnDimension(); 256 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); 257 for (int i = 0; i < nVars; i++) { 258 double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i)); 259 outMatrix.setEntry(i, i, 1d); 260 for (int j = 0; j < i; j++) { 261 double entry = covarianceMatrix.getEntry(i, j) / 262 (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j))); 263 outMatrix.setEntry(i, j, entry); 264 outMatrix.setEntry(j, i, entry); 265 } 266 } 267 return outMatrix; 268 } 269 270 /** 271 * Throws IllegalArgumentException of the matrix does not have at least 272 * two columns and two rows 273 * 274 * @param matrix matrix to check for sufficiency 275 */ 276 private void checkSufficientData(final RealMatrix matrix) { 277 int nRows = matrix.getRowDimension(); 278 int nCols = matrix.getColumnDimension(); 279 if (nRows < 2 || nCols < 2) { 280 throw MathRuntimeException.createIllegalArgumentException( 281 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS, 282 nRows, nCols); 283 } 284 } 285} 286