1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.apache.commons.math.stat.correlation;
18
19import org.apache.commons.math.MathRuntimeException;
20import org.apache.commons.math.exception.util.LocalizedFormats;
21import org.apache.commons.math.linear.RealMatrix;
22import org.apache.commons.math.linear.BlockRealMatrix;
23import org.apache.commons.math.stat.descriptive.moment.Mean;
24import org.apache.commons.math.stat.descriptive.moment.Variance;
25
26/**
27 * Computes covariances for pairs of arrays or columns of a matrix.
28 *
29 * <p>The constructors that take <code>RealMatrix</code> or
30 * <code>double[][]</code> arguments generate covariance matrices.  The
31 * columns of the input matrices are assumed to represent variable values.</p>
32 *
33 * <p>The constructor argument <code>biasCorrected</code> determines whether or
34 * not computed covariances are bias-corrected.</p>
35 *
36 * <p>Unbiased covariances are given by the formula</p>
37 * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
38 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
39 * is the mean of the <code>Y</code> values.
40 *
41 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
42 *
43 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
44 * @since 2.0
45 */
46public class Covariance {
47
48    /** covariance matrix */
49    private final RealMatrix covarianceMatrix;
50
51    /**
52     * Create an empty covariance matrix.
53     */
54    /** Number of observations (length of covariate vectors) */
55    private final int n;
56
57    /**
58     * Create a Covariance with no data
59     */
60    public Covariance() {
61        super();
62        covarianceMatrix = null;
63        n = 0;
64    }
65
66    /**
67     * Create a Covariance matrix from a rectangular array
68     * whose columns represent covariates.
69     *
70     * <p>The <code>biasCorrected</code> parameter determines whether or not
71     * covariance estimates are bias-corrected.</p>
72     *
73     * <p>The input array must be rectangular with at least two columns
74     * and two rows.</p>
75     *
76     * @param data rectangular array with columns representing covariates
77     * @param biasCorrected true means covariances are bias-corrected
78     * @throws IllegalArgumentException if the input data array is not
79     * rectangular with at least two rows and two columns.
80     */
81    public Covariance(double[][] data, boolean biasCorrected) {
82        this(new BlockRealMatrix(data), biasCorrected);
83    }
84
85    /**
86     * Create a Covariance matrix from a rectangular array
87     * whose columns represent covariates.
88     *
89     * <p>The input array must be rectangular with at least two columns
90     * and two rows</p>
91     *
92     * @param data rectangular array with columns representing covariates
93     * @throws IllegalArgumentException if the input data array is not
94     * rectangular with at least two rows and two columns.
95     */
96    public Covariance(double[][] data) {
97        this(data, true);
98    }
99
100    /**
101     * Create a covariance matrix from a matrix whose columns
102     * represent covariates.
103     *
104     * <p>The <code>biasCorrected</code> parameter determines whether or not
105     * covariance estimates are bias-corrected.</p>
106     *
107     * <p>The matrix must have at least two columns and two rows</p>
108     *
109     * @param matrix matrix with columns representing covariates
110     * @param biasCorrected true means covariances are bias-corrected
111     * @throws IllegalArgumentException if the input matrix does not have
112     * at least two rows and two columns
113     */
114    public Covariance(RealMatrix matrix, boolean biasCorrected) {
115       checkSufficientData(matrix);
116       n = matrix.getRowDimension();
117       covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
118    }
119
120    /**
121     * Create a covariance matrix from a matrix whose columns
122     * represent covariates.
123     *
124     * <p>The matrix must have at least two columns and two rows</p>
125     *
126     * @param matrix matrix with columns representing covariates
127     * @throws IllegalArgumentException if the input matrix does not have
128     * at least two rows and two columns
129     */
130    public Covariance(RealMatrix matrix) {
131        this(matrix, true);
132    }
133
134    /**
135     * Returns the covariance matrix
136     *
137     * @return covariance matrix
138     */
139    public RealMatrix getCovarianceMatrix() {
140        return covarianceMatrix;
141    }
142
143    /**
144     * Returns the number of observations (length of covariate vectors)
145     *
146     * @return number of observations
147     */
148
149    public int getN() {
150        return n;
151    }
152
153    /**
154     * Compute a covariance matrix from a matrix whose columns represent
155     * covariates.
156     * @param matrix input matrix (must have at least two columns and two rows)
157     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
158     * @return covariance matrix
159     */
160    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
161        int dimension = matrix.getColumnDimension();
162        Variance variance = new Variance(biasCorrected);
163        RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
164        for (int i = 0; i < dimension; i++) {
165            for (int j = 0; j < i; j++) {
166              double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
167              outMatrix.setEntry(i, j, cov);
168              outMatrix.setEntry(j, i, cov);
169            }
170            outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
171        }
172        return outMatrix;
173    }
174
175    /**
176     * Create a covariance matrix from a matrix whose columns represent
177     * covariates. Covariances are computed using the bias-corrected formula.
178     * @param matrix input matrix (must have at least two columns and two rows)
179     * @return covariance matrix
180     * @see #Covariance
181     */
182    protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
183        return computeCovarianceMatrix(matrix, true);
184    }
185
186    /**
187     * Compute a covariance matrix from a rectangular array whose columns represent
188     * covariates.
189     * @param data input array (must have at least two columns and two rows)
190     * @param biasCorrected determines whether or not covariance estimates are bias-corrected
191     * @return covariance matrix
192     */
193    protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
194        return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
195    }
196
197    /**
198     * Create a covariance matrix from a rectangual array whose columns represent
199     * covariates. Covariances are computed using the bias-corrected formula.
200     * @param data input array (must have at least two columns and two rows)
201     * @return covariance matrix
202     * @see #Covariance
203     */
204    protected RealMatrix computeCovarianceMatrix(double[][] data) {
205        return computeCovarianceMatrix(data, true);
206    }
207
208    /**
209     * Computes the covariance between the two arrays.
210     *
211     * <p>Array lengths must match and the common length must be at least 2.</p>
212     *
213     * @param xArray first data array
214     * @param yArray second data array
215     * @param biasCorrected if true, returned value will be bias-corrected
216     * @return returns the covariance for the two arrays
217     * @throws  IllegalArgumentException if the arrays lengths do not match or
218     * there is insufficient data
219     */
220    public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
221        throws IllegalArgumentException {
222        Mean mean = new Mean();
223        double result = 0d;
224        int length = xArray.length;
225        if (length != yArray.length) {
226            throw MathRuntimeException.createIllegalArgumentException(
227                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
228        } else if (length < 2) {
229            throw MathRuntimeException.createIllegalArgumentException(
230                  LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
231        } else {
232            double xMean = mean.evaluate(xArray);
233            double yMean = mean.evaluate(yArray);
234            for (int i = 0; i < length; i++) {
235                double xDev = xArray[i] - xMean;
236                double yDev = yArray[i] - yMean;
237                result += (xDev * yDev - result) / (i + 1);
238            }
239        }
240        return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
241    }
242
243    /**
244     * Computes the covariance between the two arrays, using the bias-corrected
245     * formula.
246     *
247     * <p>Array lengths must match and the common length must be at least 2.</p>
248     *
249     * @param xArray first data array
250     * @param yArray second data array
251     * @return returns the covariance for the two arrays
252     * @throws  IllegalArgumentException if the arrays lengths do not match or
253     * there is insufficient data
254     */
255    public double covariance(final double[] xArray, final double[] yArray)
256        throws IllegalArgumentException {
257        return covariance(xArray, yArray, true);
258    }
259
260    /**
261     * Throws IllegalArgumentException of the matrix does not have at least
262     * two columns and two rows
263     * @param matrix matrix to check
264     */
265    private void checkSufficientData(final RealMatrix matrix) {
266        int nRows = matrix.getRowDimension();
267        int nCols = matrix.getColumnDimension();
268        if (nRows < 2 || nCols < 2) {
269            throw MathRuntimeException.createIllegalArgumentException(
270                    LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
271                    nRows, nCols);
272        }
273    }
274}
275