1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.apache.commons.math.stat.correlation;
18
19import org.apache.commons.math.MathException;
20import org.apache.commons.math.MathRuntimeException;
21import org.apache.commons.math.distribution.TDistribution;
22import org.apache.commons.math.distribution.TDistributionImpl;
23import org.apache.commons.math.exception.util.LocalizedFormats;
24import org.apache.commons.math.exception.NullArgumentException;
25import org.apache.commons.math.exception.DimensionMismatchException;
26import org.apache.commons.math.linear.RealMatrix;
27import org.apache.commons.math.linear.BlockRealMatrix;
28import org.apache.commons.math.stat.regression.SimpleRegression;
29import org.apache.commons.math.util.FastMath;
30
31/**
32 * Computes Pearson's product-moment correlation coefficients for pairs of arrays
33 * or columns of a matrix.
34 *
35 * <p>The constructors that take <code>RealMatrix</code> or
36 * <code>double[][]</code> arguments generate correlation matrices.  The
37 * columns of the input matrices are assumed to represent variable values.
38 * Correlations are given by the formula</p>
39 * <code>cor(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code>
40 * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code>
41 * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations.
42 *
43 * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $
44 * @since 2.0
45 */
46public class PearsonsCorrelation {
47
48    /** correlation matrix */
49    private final RealMatrix correlationMatrix;
50
51    /** number of observations */
52    private final int nObs;
53
54    /**
55     * Create a PearsonsCorrelation instance without data
56     */
57    public PearsonsCorrelation() {
58        super();
59        correlationMatrix = null;
60        nObs = 0;
61    }
62
63    /**
64     * Create a PearsonsCorrelation from a rectangular array
65     * whose columns represent values of variables to be correlated.
66     *
67     * @param data rectangular array with columns representing variables
68     * @throws IllegalArgumentException if the input data array is not
69     * rectangular with at least two rows and two columns.
70     */
71    public PearsonsCorrelation(double[][] data) {
72        this(new BlockRealMatrix(data));
73    }
74
75    /**
76     * Create a PearsonsCorrelation from a RealMatrix whose columns
77     * represent variables to be correlated.
78     *
79     * @param matrix matrix with columns representing variables to correlate
80     */
81    public PearsonsCorrelation(RealMatrix matrix) {
82        checkSufficientData(matrix);
83        nObs = matrix.getRowDimension();
84        correlationMatrix = computeCorrelationMatrix(matrix);
85    }
86
87    /**
88     * Create a PearsonsCorrelation from a {@link Covariance}.  The correlation
89     * matrix is computed by scaling the Covariance's covariance matrix.
90     * The Covariance instance must have been created from a data matrix with
91     * columns representing variable values.
92     *
93     * @param covariance Covariance instance
94     */
95    public PearsonsCorrelation(Covariance covariance) {
96        RealMatrix covarianceMatrix = covariance.getCovarianceMatrix();
97        if (covarianceMatrix == null) {
98            throw new NullArgumentException(LocalizedFormats.COVARIANCE_MATRIX);
99        }
100        nObs = covariance.getN();
101        correlationMatrix = covarianceToCorrelation(covarianceMatrix);
102    }
103
104    /**
105     * Create a PearsonsCorrelation from a covariance matrix.  The correlation
106     * matrix is computed by scaling the covariance matrix.
107     *
108     * @param covarianceMatrix covariance matrix
109     * @param numberOfObservations the number of observations in the dataset used to compute
110     * the covariance matrix
111     */
112    public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) {
113        nObs = numberOfObservations;
114        correlationMatrix = covarianceToCorrelation(covarianceMatrix);
115
116    }
117
118    /**
119     * Returns the correlation matrix
120     *
121     * @return correlation matrix
122     */
123    public RealMatrix getCorrelationMatrix() {
124        return correlationMatrix;
125    }
126
127    /**
128     * Returns a matrix of standard errors associated with the estimates
129     * in the correlation matrix.<br/>
130     * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard
131     * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code>
132     * <p>The formula used to compute the standard error is <br/>
133     * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code>
134     * where <code>r</code> is the estimated correlation coefficient and
135     * <code>n</code> is the number of observations in the source dataset.</p>
136     *
137     * @return matrix of correlation standard errors
138     */
139    public RealMatrix getCorrelationStandardErrors() {
140        int nVars = correlationMatrix.getColumnDimension();
141        double[][] out = new double[nVars][nVars];
142        for (int i = 0; i < nVars; i++) {
143            for (int j = 0; j < nVars; j++) {
144                double r = correlationMatrix.getEntry(i, j);
145                out[i][j] = FastMath.sqrt((1 - r * r) /(nObs - 2));
146            }
147        }
148        return new BlockRealMatrix(out);
149    }
150
151    /**
152     * Returns a matrix of p-values associated with the (two-sided) null
153     * hypothesis that the corresponding correlation coefficient is zero.
154     * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability
155     * that a random variable distributed as <code>t<sub>n-2</sub></code> takes
156     * a value with absolute value greater than or equal to <br>
157     * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p>
158     * <p>The values in the matrix are sometimes referred to as the
159     * <i>significance</i> of the corresponding correlation coefficients.</p>
160     *
161     * @return matrix of p-values
162     * @throws MathException if an error occurs estimating probabilities
163     */
164    public RealMatrix getCorrelationPValues() throws MathException {
165        TDistribution tDistribution = new TDistributionImpl(nObs - 2);
166        int nVars = correlationMatrix.getColumnDimension();
167        double[][] out = new double[nVars][nVars];
168        for (int i = 0; i < nVars; i++) {
169            for (int j = 0; j < nVars; j++) {
170                if (i == j) {
171                    out[i][j] = 0d;
172                } else {
173                    double r = correlationMatrix.getEntry(i, j);
174                    double t = FastMath.abs(r * FastMath.sqrt((nObs - 2)/(1 - r * r)));
175                    out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
176                }
177            }
178        }
179        return new BlockRealMatrix(out);
180    }
181
182
183    /**
184     * Computes the correlation matrix for the columns of the
185     * input matrix.
186     *
187     * @param matrix matrix with columns representing variables to correlate
188     * @return correlation matrix
189     */
190    public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
191        int nVars = matrix.getColumnDimension();
192        RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
193        for (int i = 0; i < nVars; i++) {
194            for (int j = 0; j < i; j++) {
195              double corr = correlation(matrix.getColumn(i), matrix.getColumn(j));
196              outMatrix.setEntry(i, j, corr);
197              outMatrix.setEntry(j, i, corr);
198            }
199            outMatrix.setEntry(i, i, 1d);
200        }
201        return outMatrix;
202    }
203
204    /**
205     * Computes the correlation matrix for the columns of the
206     * input rectangular array.  The colums of the array represent values
207     * of variables to be correlated.
208     *
209     * @param data matrix with columns representing variables to correlate
210     * @return correlation matrix
211     */
212    public RealMatrix computeCorrelationMatrix(double[][] data) {
213       return computeCorrelationMatrix(new BlockRealMatrix(data));
214    }
215
216    /**
217     * Computes the Pearson's product-moment correlation coefficient between the two arrays.
218     *
219     * </p>Throws IllegalArgumentException if the arrays do not have the same length
220     * or their common length is less than 2</p>
221     *
222     * @param xArray first data array
223     * @param yArray second data array
224     * @return Returns Pearson's correlation coefficient for the two arrays
225     * @throws  IllegalArgumentException if the arrays lengths do not match or
226     * there is insufficient data
227     */
228    public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException {
229        SimpleRegression regression = new SimpleRegression();
230        if (xArray.length != yArray.length) {
231            throw new DimensionMismatchException(xArray.length, yArray.length);
232        } else if (xArray.length < 2) {
233            throw MathRuntimeException.createIllegalArgumentException(
234                  LocalizedFormats.INSUFFICIENT_DIMENSION, xArray.length, 2);
235        } else {
236            for(int i=0; i<xArray.length; i++) {
237                regression.addData(xArray[i], yArray[i]);
238            }
239            return regression.getR();
240        }
241    }
242
243    /**
244     * Derives a correlation matrix from a covariance matrix.
245     *
246     * <p>Uses the formula <br/>
247     * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where
248     * <code>r(&middot,&middot;)</code> is the correlation coefficient and
249     * <code>s(&middot;)</code> means standard deviation.</p>
250     *
251     * @param covarianceMatrix the covariance matrix
252     * @return correlation matrix
253     */
254    public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) {
255        int nVars = covarianceMatrix.getColumnDimension();
256        RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars);
257        for (int i = 0; i < nVars; i++) {
258            double sigma = FastMath.sqrt(covarianceMatrix.getEntry(i, i));
259            outMatrix.setEntry(i, i, 1d);
260            for (int j = 0; j < i; j++) {
261                double entry = covarianceMatrix.getEntry(i, j) /
262                       (sigma * FastMath.sqrt(covarianceMatrix.getEntry(j, j)));
263                outMatrix.setEntry(i, j, entry);
264                outMatrix.setEntry(j, i, entry);
265            }
266        }
267        return outMatrix;
268    }
269
270    /**
271     * Throws IllegalArgumentException of the matrix does not have at least
272     * two columns and two rows
273     *
274     * @param matrix matrix to check for sufficiency
275     */
276    private void checkSufficientData(final RealMatrix matrix) {
277        int nRows = matrix.getRowDimension();
278        int nCols = matrix.getColumnDimension();
279        if (nRows < 2 || nCols < 2) {
280            throw MathRuntimeException.createIllegalArgumentException(
281                    LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
282                    nRows, nCols);
283        }
284    }
285}
286