1dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond/*
2dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * Licensed to the Apache Software Foundation (ASF) under one or more
3dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * contributor license agreements.  See the NOTICE file distributed with
4dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * this work for additional information regarding copyright ownership.
5dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * The ASF licenses this file to You under the Apache License, Version 2.0
6dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * (the "License"); you may not use this file except in compliance with
7dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * the License.  You may obtain a copy of the License at
8dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond *
9dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond *      http://www.apache.org/licenses/LICENSE-2.0
10dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond *
11dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * Unless required by applicable law or agreed to in writing, software
12dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * distributed under the License is distributed on an "AS IS" BASIS,
13dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * See the License for the specific language governing permissions and
15dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * limitations under the License.
16dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond */
17dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondpackage org.apache.commons.math.stat.inference;
18dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
19dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.MathException;
20dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.MathRuntimeException;
21dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.distribution.ChiSquaredDistribution;
22dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
23dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.exception.util.LocalizedFormats;
24dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondimport org.apache.commons.math.util.FastMath;
25dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
26dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond/**
27dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * Implements Chi-Square test statistics defined in the
28dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * {@link UnknownDistributionChiSquareTest} interface.
29dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond *
30dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond * @version $Revision: 990655 $ $Date: 2010-08-29 23:49:40 +0200 (dim. 29 août 2010) $
31dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond */
32dee0849a9704d532af0b550146cbafbaa6ee1d19Raymondpublic class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
33dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
34dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /** Distribution used to compute inference statistics. */
35dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private ChiSquaredDistribution distribution;
36dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
37dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
38dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Construct a ChiSquareTestImpl
39dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
40dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public ChiSquareTestImpl() {
41dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        this(new ChiSquaredDistributionImpl(1.0));
42dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
43dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
44dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
45dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Create a test instance using the given distribution for computing
46dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * inference statistics.
47dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param x distribution used to compute inference statistics.
48dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @since 1.2
49dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
50dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public ChiSquareTestImpl(ChiSquaredDistribution x) {
51dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        super();
52dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        setDistribution(x);
53dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
54dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     /**
55dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * {@inheritDoc}
56dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <p><strong>Note: </strong>This implementation rescales the
57dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <code>expected</code> array if necessary to ensure that the sum of the
58dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * expected and observed counts are equal.</p>
59dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
60dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed array of observed frequency counts
61dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param expected array of expected frequency counts
62dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return chi-square test statistic
63dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
64dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * or length is less than 2
65dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
66dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquare(double[] expected, long[] observed)
67dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        throws IllegalArgumentException {
68dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (expected.length < 2) {
69dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
70dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.INSUFFICIENT_DIMENSION, expected.length, 2);
71dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
72dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (expected.length != observed.length) {
73dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
74dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, expected.length, observed.length);
75dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
76dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkPositive(expected);
77dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkNonNegative(observed);
78dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double sumExpected = 0d;
79dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double sumObserved = 0d;
80dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < observed.length; i++) {
81dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            sumExpected += expected[i];
82dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            sumObserved += observed[i];
83dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
84dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double ratio = 1.0d;
85dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        boolean rescale = false;
86dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (FastMath.abs(sumExpected - sumObserved) > 10E-6) {
87dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            ratio = sumObserved / sumExpected;
88dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            rescale = true;
89dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
90dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double sumSq = 0.0d;
91dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < observed.length; i++) {
92dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            if (rescale) {
93dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                final double dev = observed[i] - ratio * expected[i];
94dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                sumSq += dev * dev / (ratio * expected[i]);
95dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            } else {
96dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                final double dev = observed[i] - expected[i];
97dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                sumSq += dev * dev / expected[i];
98dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
99dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
100dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return sumSq;
101dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
102dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
103dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
104dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * {@inheritDoc}
105dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <p><strong>Note: </strong>This implementation rescales the
106dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <code>expected</code> array if necessary to ensure that the sum of the
107dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * expected and observed counts are equal.</p>
108dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
109dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed array of observed frequency counts
110dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param expected array of expected frequency counts
111dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return p-value
112dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
113dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs computing the p-value
114dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
115dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquareTest(double[] expected, long[] observed)
116dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        throws IllegalArgumentException, MathException {
117dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        distribution.setDegreesOfFreedom(expected.length - 1.0);
118dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return 1.0 - distribution.cumulativeProbability(
119dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            chiSquare(expected, observed));
120dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
121dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
122dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
123dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * {@inheritDoc}
124dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <p><strong>Note: </strong>This implementation rescales the
125dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * <code>expected</code> array if necessary to ensure that the sum of the
126dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * expected and observed counts are equal.</p>
127dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
128dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed array of observed frequency counts
129dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param expected array of expected frequency counts
130dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param alpha significance level of the test
131dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return true iff null hypothesis can be rejected with confidence
132dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * 1 - alpha
133dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
134dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs performing the test
135dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
136dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public boolean chiSquareTest(double[] expected, long[] observed,
137dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            double alpha) throws IllegalArgumentException, MathException {
138dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if ((alpha <= 0) || (alpha > 0.5)) {
139dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
140dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
141dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  alpha, 0, 0.5);
142dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
143dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return chiSquareTest(expected, observed) < alpha;
144dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
145dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
146dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
147dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param counts array representation of 2-way table
148dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return chi-square test statistic
149dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
150dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
151dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquare(long[][] counts) throws IllegalArgumentException {
152dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
153dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkArray(counts);
154dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        int nRows = counts.length;
155dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        int nCols = counts[0].length;
156dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
157dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // compute row, column and total sums
158dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double[] rowSum = new double[nRows];
159dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double[] colSum = new double[nCols];
160dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double total = 0.0d;
161dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int row = 0; row < nRows; row++) {
162dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            for (int col = 0; col < nCols; col++) {
163dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                rowSum[row] += counts[row][col];
164dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                colSum[col] += counts[row][col];
165dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                total += counts[row][col];
166dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
167dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
168dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
169dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // compute expected counts and chi-square
170dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double sumSq = 0.0d;
171dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double expected = 0.0d;
172dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int row = 0; row < nRows; row++) {
173dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            for (int col = 0; col < nCols; col++) {
174dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                expected = (rowSum[row] * colSum[col]) / total;
175dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                sumSq += ((counts[row][col] - expected) *
176dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                        (counts[row][col] - expected)) / expected;
177dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
178dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
179dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return sumSq;
180dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
181dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
182dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
183dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param counts array representation of 2-way table
184dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return p-value
185dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
186dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs computing the p-value
187dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
188dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquareTest(long[][] counts)
189dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    throws IllegalArgumentException, MathException {
190dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkArray(counts);
191dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        distribution.setDegreesOfFreedom(df);
193dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return 1 - distribution.cumulativeProbability(chiSquare(counts));
194dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
195dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
196dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
197dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param counts array representation of 2-way table
198dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param alpha significance level of the test
199dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return true iff null hypothesis can be rejected with confidence
200dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * 1 - alpha
201dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
202dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs performing the test
203dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
204dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public boolean chiSquareTest(long[][] counts, double alpha)
205dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    throws IllegalArgumentException, MathException {
206dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if ((alpha <= 0) || (alpha > 0.5)) {
207dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
208dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
209dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  alpha, 0.0, 0.5);
210dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
211dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return chiSquareTest(counts) < alpha;
212dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
213dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
214dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
215dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed1 array of observed frequency counts of the first data set
216dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed2 array of observed frequency counts of the second data set
217dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return chi-square test statistic
218dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
219dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @since 1.2
220dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
221dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        throws IllegalArgumentException {
223dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
224dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Make sure lengths are same
225dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (observed1.length < 2) {
226dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
227dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.INSUFFICIENT_DIMENSION, observed1.length, 2);
228dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
229dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (observed1.length != observed2.length) {
230dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
231dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
232dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  observed1.length, observed2.length);
233dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
234dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
235dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Ensure non-negative counts
236dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkNonNegative(observed1);
237dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkNonNegative(observed2);
238dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
239dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Compute and compare count sums
240dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        long countSum1 = 0;
241dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        long countSum2 = 0;
242dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        boolean unequalCounts = false;
243dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double weight = 0.0;
244dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < observed1.length; i++) {
245dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            countSum1 += observed1[i];
246dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            countSum2 += observed2[i];
247dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
248dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Ensure neither sample is uniformly 0
249dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (countSum1 == 0) {
250dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
251dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 1);
252dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
253dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (countSum2 == 0) {
254dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
255dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.OBSERVED_COUNTS_ALL_ZERO, 2);
256dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
257dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Compare and compute weight only if different
258dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        unequalCounts = countSum1 != countSum2;
259dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (unequalCounts) {
260dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            weight = FastMath.sqrt((double) countSum1 / (double) countSum2);
261dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
262dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        // Compute ChiSquare statistic
263dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double sumSq = 0.0d;
264dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double dev = 0.0d;
265dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double obs1 = 0.0d;
266dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        double obs2 = 0.0d;
267dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < observed1.length; i++) {
268dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            if (observed1[i] == 0 && observed2[i] == 0) {
269dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                throw MathRuntimeException.createIllegalArgumentException(
270dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      LocalizedFormats.OBSERVED_COUNTS_BOTTH_ZERO_FOR_ENTRY, i);
271dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            } else {
272dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                obs1 = observed1[i];
273dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                obs2 = observed2[i];
274dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                if (unequalCounts) { // apply weights
275dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                    dev = obs1/weight - obs2 * weight;
276dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                } else {
277dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                    dev = obs1 - obs2;
278dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                }
279dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                sumSq += (dev * dev) / (obs1 + obs2);
280dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
281dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
282dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return sumSq;
283dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
284dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
285dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
286dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed1 array of observed frequency counts of the first data set
287dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed2 array of observed frequency counts of the second data set
288dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return p-value
289dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
290dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs computing the p-value
291dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @since 1.2
292dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
293dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        throws IllegalArgumentException, MathException {
295dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        distribution.setDegreesOfFreedom((double) observed1.length - 1);
296dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return 1 - distribution.cumulativeProbability(
297dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                chiSquareDataSetsComparison(observed1, observed2));
298dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
299dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
300dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
301dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed1 array of observed frequency counts of the first data set
302dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param observed2 array of observed frequency counts of the second data set
303dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param alpha significance level of the test
304dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @return true iff null hypothesis can be rejected with confidence
305dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * 1 - alpha
306dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if preconditions are not met
307dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws MathException if an error occurs performing the test
308dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @since 1.2
309dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
310dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            double alpha) throws IllegalArgumentException, MathException {
312dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if ((alpha <= 0) || (alpha > 0.5)) {
313dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
314dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
315dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  alpha, 0.0, 0.5);
316dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
317dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
319dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
320dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
321dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Checks to make sure that the input long[][] array is rectangular,
322dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * has at least 2 rows and 2 columns, and has all non-negative entries,
323dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * throwing IllegalArgumentException if any of these checks fail.
324dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
325dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param in input 2-way table to check
326dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if the array is not valid
327dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
328dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private void checkArray(long[][] in) throws IllegalArgumentException {
329dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
330dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (in.length < 2) {
331dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
332dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.INSUFFICIENT_DIMENSION, in.length, 2);
333dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
334dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
335dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        if (in[0].length < 2) {
336dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            throw MathRuntimeException.createIllegalArgumentException(
337dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                  LocalizedFormats.INSUFFICIENT_DIMENSION, in[0].length, 2);
338dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
339dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
340dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkRectangular(in);
341dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        checkNonNegative(in);
342dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
343dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
344dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
345dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    //---------------------  Private array methods -- should find a utility home for these
346dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
347dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
348dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Throws IllegalArgumentException if the input array is not rectangular.
349dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
350dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param in array to be tested
351dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws NullPointerException if input array is null
352dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @throws IllegalArgumentException if input array is not rectangular
353dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
354dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private void checkRectangular(long[][] in) {
355dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 1; i < in.length; i++) {
356dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            if (in[i].length != in[0].length) {
357dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                throw MathRuntimeException.createIllegalArgumentException(
358dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      LocalizedFormats.DIFFERENT_ROWS_LENGTHS,
359dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      in[i].length, in[0].length);
360dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
361dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
362dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
363dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
364dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
365dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Check all entries of the input array are > 0.
366dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
367dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param in array to be tested
368dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @exception IllegalArgumentException if one entry is not positive
369dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
370dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private void checkPositive(double[] in) throws IllegalArgumentException {
371dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < in.length; i++) {
372dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            if (in[i] <= 0) {
373dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                throw MathRuntimeException.createIllegalArgumentException(
374dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      LocalizedFormats.NOT_POSITIVE_ELEMENT_AT_INDEX,
375dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      i, in[i]);
376dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
377dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
378dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
379dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
380dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
381dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Check all entries of the input array are >= 0.
382dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
383dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param in array to be tested
384dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @exception IllegalArgumentException if one entry is negative
385dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
386dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private void checkNonNegative(long[] in) throws IllegalArgumentException {
387dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < in.length; i++) {
388dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            if (in[i] < 0) {
389dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                throw MathRuntimeException.createIllegalArgumentException(
390dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      LocalizedFormats.NEGATIVE_ELEMENT_AT_INDEX,
391dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                      i, in[i]);
392dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
393dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
394dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
395dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
396dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
397dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Check all entries of the input array are >= 0.
398dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
399dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param in array to be tested
400dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @exception IllegalArgumentException if one entry is negative
401dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
402dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    private void checkNonNegative(long[][] in) throws IllegalArgumentException {
403dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        for (int i = 0; i < in.length; i ++) {
404dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            for (int j = 0; j < in[i].length; j++) {
405dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                if (in[i][j] < 0) {
406dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                    throw MathRuntimeException.createIllegalArgumentException(
407dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                          LocalizedFormats.NEGATIVE_ELEMENT_AT_2D_INDEX,
408dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                          i, j, in[i][j]);
409dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond                }
410dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond            }
411dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        }
412dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
413dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond
414dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    /**
415dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * Modify the distribution used to compute inference statistics.
416dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *
417dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @param value
418dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     *            the new distribution
419dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     * @since 1.2
420dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond     */
421dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    public void setDistribution(ChiSquaredDistribution value) {
422dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond        distribution = value;
423dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond    }
424dee0849a9704d532af0b550146cbafbaa6ee1d19Raymond}
425