1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.apache.commons.math.stat.inference;
18
19import org.apache.commons.math.MathException;
20import org.apache.commons.math.MathRuntimeException;
21import org.apache.commons.math.distribution.ChiSquaredDistribution;
22import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
23import org.apache.commons.math.exception.util.LocalizedFormats;
24import org.apache.commons.math.util.FastMath;
25
26/**
27 * Implements Chi-Square test statistics defined in the
28 * {@link UnknownDistributionChiSquareTest} interface.
29 *
30 * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $
31 */
32public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
33
34    /** Distribution used to compute inference statistics. */
35    private ChiSquaredDistribution distribution;
36
37    /**
38     * Construct a ChiSquareTestImpl
39     */
40    public ChiSquareTestImpl() {
41        this(new ChiSquaredDistributionImpl(1.0));
42    }
43
44    /**
45     * Create a test instance using the given distribution for computing
46     * inference statistics.
47     * @param x distribution used to compute inference statistics.
48     * @since 1.2
49     */
50    public ChiSquareTestImpl(ChiSquaredDistribution x) {
51        super();
52        setDistribution(x);
53    }
54     /**
55     * {@inheritDoc}
56     * <p><strong>Note: </strong>This implementation rescales the
57     * <code>expected</code> array if necessary to ensure that the sum of the
58     * expected and observed counts are equal.</p>
59     *
60     * @param observed array of observed frequency counts
61     * @param expected array of expected frequency counts
62     * @return chi-square test statistic
63     * @throws IllegalArgumentException if preconditions are not met
64     * or length is less than 2
65     */
66    public double chiSquare(double[] expected, long[] observed)
67        throws IllegalArgumentException {
68        if (expected.length < 2) {
69            throw MathRuntimeException.createIllegalArgumentException(
70                  LocalizedFormats.INSUFFICIENT_DIMENSION, expected.length, 2);
71        }
72        if (expected.length != observed.length) {
73            throw MathRuntimeException.createIllegalArgumentException(
74                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, expected.length, observed.length);
75        }
76        checkPositive(expected);
77        checkNonNegative(observed);
78        double sumExpected = 0d;
79        double sumObserved = 0d;
80        for (int i = 0; i < observed.length; i++) {
81            sumExpected += expected[i];
82            sumObserved += observed[i];
83        }
84        double ratio = 1.0d;
85        boolean rescale = false;
86        if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
87            ratio = sumObserved / sumExpected;
88            rescale = true;
89        }
90        double sumSq = 0.0d;
91        for (int i = 0; i < observed.length; i++) {
92            if (rescale) {
93                final double dev = observed[i] - ratio * expected[i];
94                sumSq += dev * dev / (ratio * expected[i]);
95            } else {
96                final double dev = observed[i] - expected[i];
97                sumSq += dev * dev / expected[i];
98            }
99        }
100        return sumSq;
101    }
102
103    /**
104     * {@inheritDoc}
105     * <p><strong>Note: </strong>This implementation rescales the
106     * <code>expected</code> array if necessary to ensure that the sum of the
107     * expected and observed counts are equal.</p>
108     *
109     * @param observed array of observed frequency counts
110     * @param expected array of expected frequency counts
111     * @return p-value
112     * @throws IllegalArgumentException if preconditions are not met
113     * @throws MathException if an error occurs computing the p-value
114     */
115    public double chiSquareTest(double[] expected, long[] observed)
116        throws IllegalArgumentException, MathException {
117        distribution.setDegreesOfFreedom(expected.length - 1.0);
118        return 1.0 - distribution.cumulativeProbability(
119            chiSquare(expected, observed));
120    }
121
122    /**
123     * {@inheritDoc}
124     * <p><strong>Note: </strong>This implementation rescales the
125     * <code>expected</code> array if necessary to ensure that the sum of the
126     * expected and observed counts are equal.</p>
127     *
128     * @param observed array of observed frequency counts
129     * @param expected array of expected frequency counts
130     * @param alpha significance level of the test
131     * @return true iff null hypothesis can be rejected with confidence
132     * 1 - alpha
133     * @throws IllegalArgumentException if preconditions are not met
134     * @throws MathException if an error occurs performing the test
135     */
136    public boolean chiSquareTest(double[] expected, long[] observed,
137            double alpha) throws IllegalArgumentException, MathException {
138        if ((alpha <= 0) || (alpha > 0.5)) {
139            throw MathRuntimeException.createIllegalArgumentException(
140                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
141                  alpha, 0, 0.5);
142        }
143        return chiSquareTest(expected, observed) < alpha;
144    }
145
146    /**
147     * @param counts array representation of 2-way table
148     * @return chi-square test statistic
149     * @throws IllegalArgumentException if preconditions are not met
150     */
151    public double chiSquare(long[][] counts) throws IllegalArgumentException {
152
153        checkArray(counts);
154        int nRows = counts.length;
155        int nCols = counts[0].length;
156
157        // compute row, column and total sums
158        double[] rowSum = new double[nRows];
159        double[] colSum = new double[nCols];
160        double total = 0.0d;
161        for (int row = 0; row < nRows; row++) {
162            for (int col = 0; col < nCols; col++) {
163                rowSum[row] += counts[row][col];
164                colSum[col] += counts[row][col];
165                total += counts[row][col];
166            }
167        }
168
169        // compute expected counts and chi-square
170        double sumSq = 0.0d;
171        double expected = 0.0d;
172        for (int row = 0; row < nRows; row++) {
173            for (int col = 0; col < nCols; col++) {
174                expected = (rowSum[row] * colSum[col]) / total;
175                sumSq += ((counts[row][col] - expected) *
176                        (counts[row][col] - expected)) / expected;
177            }
178        }
179        return sumSq;
180    }
181
182    /**
183     * @param counts array representation of 2-way table
184     * @return p-value
185     * @throws IllegalArgumentException if preconditions are not met
186     * @throws MathException if an error occurs computing the p-value
187     */
188    public double chiSquareTest(long[][] counts)
189    throws IllegalArgumentException, MathException {
190        checkArray(counts);
191        double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192        distribution.setDegreesOfFreedom(df);
193        return 1 - distribution.cumulativeProbability(chiSquare(counts));
194    }
195
196    /**
197     * @param counts array representation of 2-way table
198     * @param alpha significance level of the test
199     * @return true iff null hypothesis can be rejected with confidence
200     * 1 - alpha
201     * @throws IllegalArgumentException if preconditions are not met
202     * @throws MathException if an error occurs performing the test
203     */
204    public boolean chiSquareTest(long[][] counts, double alpha)
205    throws IllegalArgumentException, MathException {
206        if ((alpha <= 0) || (alpha > 0.5)) {
207            throw MathRuntimeException.createIllegalArgumentException(
208                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209                  alpha, 0.0, 0.5);
210        }
211        return chiSquareTest(counts) < alpha;
212    }
213
214    /**
215     * @param observed1 array of observed frequency counts of the first data set
216     * @param observed2 array of observed frequency counts of the second data set
217     * @return chi-square test statistic
218     * @throws IllegalArgumentException if preconditions are not met
219     * @since 1.2
220     */
221    public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222        throws IllegalArgumentException {
223
224        // Make sure lengths are same
225        if (observed1.length < 2) {
226            throw MathRuntimeException.createIllegalArgumentException(
227                  LocalizedFormats.INSUFFICIENT_DIMENSION, observed1.length, 2);
228        }
229        if (observed1.length != observed2.length) {
230            throw MathRuntimeException.createIllegalArgumentException(
231                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
232                  observed1.length, observed2.length);
233        }
234
235        // Ensure non-negative counts
236        checkNonNegative(observed1);
237        checkNonNegative(observed2);
238
239        // Compute and compare count sums
240        long countSum1 = 0;
241        long countSum2 = 0;
242        boolean unequalCounts = false;
243        double weight = 0.0;
244        for (int i = 0; i < observed1.length; i++) {
245            countSum1 += observed1[i];
246            countSum2 += observed2[i];
247        }
248        // Ensure neither sample is uniformly 0
249        if (countSum1 == 0) {
250            throw MathRuntimeException.createIllegalArgumentException(
251                  LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
252        }
253        if (countSum2 == 0) {
254            throw MathRuntimeException.createIllegalArgumentException(
255                  LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
256        }
257        // Compare and compute weight only if different
258        unequalCounts = countSum1 != countSum2;
259        if (unequalCounts) {
260            weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
261        }
262        // Compute ChiSquare statistic
263        double sumSq = 0.0d;
264        double dev = 0.0d;
265        double obs1 = 0.0d;
266        double obs2 = 0.0d;
267        for (int i = 0; i < observed1.length; i++) {
268            if (observed1[i] == 0 && observed2[i] == 0) {
269                throw MathRuntimeException.createIllegalArgumentException(
270                      LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
271            } else {
272                obs1 = observed1[i];
273                obs2 = observed2[i];
274                if (unequalCounts) { // apply weights
275                    dev = obs1/weight - obs2 * weight;
276                } else {
277                    dev = obs1 - obs2;
278                }
279                sumSq += (dev * dev) / (obs1 + obs2);
280            }
281        }
282        return sumSq;
283    }
284
285    /**
286     * @param observed1 array of observed frequency counts of the first data set
287     * @param observed2 array of observed frequency counts of the second data set
288     * @return p-value
289     * @throws IllegalArgumentException if preconditions are not met
290     * @throws MathException if an error occurs computing the p-value
291     * @since 1.2
292     */
293    public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294        throws IllegalArgumentException, MathException {
295        distribution.setDegreesOfFreedom((double) observed1.length - 1);
296        return 1 - distribution.cumulativeProbability(
297                chiSquareDataSetsComparison(observed1, observed2));
298    }
299
300    /**
301     * @param observed1 array of observed frequency counts of the first data set
302     * @param observed2 array of observed frequency counts of the second data set
303     * @param alpha significance level of the test
304     * @return true iff null hypothesis can be rejected with confidence
305     * 1 - alpha
306     * @throws IllegalArgumentException if preconditions are not met
307     * @throws MathException if an error occurs performing the test
308     * @since 1.2
309     */
310    public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311            double alpha) throws IllegalArgumentException, MathException {
312        if ((alpha <= 0) || (alpha > 0.5)) {
313            throw MathRuntimeException.createIllegalArgumentException(
314                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
315                  alpha, 0.0, 0.5);
316        }
317        return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318    }
319
320    /**
321     * Checks to make sure that the input long[][] array is rectangular,
322     * has at least 2 rows and 2 columns, and has all non-negative entries,
323     * throwing IllegalArgumentException if any of these checks fail.
324     *
325     * @param in input 2-way table to check
326     * @throws IllegalArgumentException if the array is not valid
327     */
328    private void checkArray(long[][] in) throws IllegalArgumentException {
329
330        if (in.length < 2) {
331            throw MathRuntimeException.createIllegalArgumentException(
332                  LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
333        }
334
335        if (in[0].length < 2) {
336            throw MathRuntimeException.createIllegalArgumentException(
337                  LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
338        }
339
340        checkRectangular(in);
341        checkNonNegative(in);
342
343    }
344
345    //---------------------  Private array methods -- should find a utility home for these
346
347    /**
348     * Throws IllegalArgumentException if the input array is not rectangular.
349     *
350     * @param in array to be tested
351     * @throws NullPointerException if input array is null
352     * @throws IllegalArgumentException if input array is not rectangular
353     */
354    private void checkRectangular(long[][] in) {
355        for (int i = 1; i < in.length; i++) {
356            if (in[i].length != in[0].length) {
357                throw MathRuntimeException.createIllegalArgumentException(
358                      LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
359                      in[i].length, in[0].length);
360            }
361        }
362    }
363
364    /**
365     * Check all entries of the input array are > 0.
366     *
367     * @param in array to be tested
368     * @exception IllegalArgumentException if one entry is not positive
369     */
370    private void checkPositive(double[] in) throws IllegalArgumentException {
371        for (int i = 0; i < in.length; i++) {
372            if (in[i] <= 0) {
373                throw MathRuntimeException.createIllegalArgumentException(
374                      LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
375                      i, in[i]);
376            }
377        }
378    }
379
380    /**
381     * Check all entries of the input array are >= 0.
382     *
383     * @param in array to be tested
384     * @exception IllegalArgumentException if one entry is negative
385     */
386    private void checkNonNegative(long[] in) throws IllegalArgumentException {
387        for (int i = 0; i < in.length; i++) {
388            if (in[i] < 0) {
389                throw MathRuntimeException.createIllegalArgumentException(
390                      LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
391                      i, in[i]);
392            }
393        }
394    }
395
396    /**
397     * Check all entries of the input array are >= 0.
398     *
399     * @param in array to be tested
400     * @exception IllegalArgumentException if one entry is negative
401     */
402    private void checkNonNegative(long[][] in) throws IllegalArgumentException {
403        for (int i = 0; i < in.length; i ++) {
404            for (int j = 0; j < in[i].length; j++) {
405                if (in[i][j] < 0) {
406                    throw MathRuntimeException.createIllegalArgumentException(
407                          LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
408                          i, j, in[i][j]);
409                }
410            }
411        }
412    }
413
414    /**
415     * Modify the distribution used to compute inference statistics.
416     *
417     * @param value
418     *            the new distribution
419     * @since 1.2
420     */
421    public void setDistribution(ChiSquaredDistribution value) {
422        distribution = value;
423    }
424}
425