/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math.stat.descriptive.moment; import java.io.Serializable; import org.apache.commons.math.exception.NullArgumentException; import org.apache.commons.math.exception.util.LocalizedFormats; import org.apache.commons.math.stat.descriptive.WeightedEvaluation; import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; /** * Computes the variance of the available values. By default, the unbiased * "sample variance" definitional formula is used: *
* variance = sum((x_i - mean)^2) / (n - 1)
*
* where mean is the {@link Mean} and n
is the number
* of sample observations.
* The definitional formula does not have good numerical properties, so * this implementation does not compute the statistic using the definitional * formula.
getResult
method computes the variance using
* updating formulas based on West's algorithm, as described in
* Chan, T. F. and
* J. G. Lewis 1979, Communications of the ACM,
* vol. 22 no. 9, pp. 526-531.evaluate
methods leverage the fact that they have the
* full array of values in memory to execute a two-pass algorithm.
* Specifically, these methods use the "corrected two-pass algorithm" from
* Chan, Golub, Levesque, Algorithms for Computing the Sample Variance,
* American Statistician, vol. 37, no. 3 (1983) pp. 242-247.increment
or
* incrementAll
and then executing getResult
will
* sometimes give a different, less accurate, result than executing
* evaluate
with the full array of values. The former approach
* should only be used when the full array of values is not available.
*
* The "population variance" ( sum((x_i - mean)^2) / n ) can also
* be computed using this statistic. The isBiasCorrected
* property determines whether the "population" or "sample" value is
* returned by the evaluate
and getResult
methods.
* To compute population variances, set this property to false.
*
* Note that this implementation is not synchronized. If
* multiple threads access an instance of this class concurrently, and at least
* one of the threads invokes the increment()
or
* clear()
method, it must be synchronized externally.
isBiasCorrected
* property.
*/
public Variance() {
moment = new SecondMoment();
}
/**
* Constructs a Variance based on an external second moment.
*
* @param m2 the SecondMoment (Third or Fourth moments work
* here as well.)
*/
public Variance(final SecondMoment m2) {
incMoment = false;
this.moment = m2;
}
/**
* Constructs a Variance with the specified isBiasCorrected
* property
*
* @param isBiasCorrected setting for bias correction - true means
* bias will be corrected and is equivalent to using the argumentless
* constructor
*/
public Variance(boolean isBiasCorrected) {
moment = new SecondMoment();
this.isBiasCorrected = isBiasCorrected;
}
/**
* Constructs a Variance with the specified isBiasCorrected
* property and the supplied external second moment.
*
* @param isBiasCorrected setting for bias correction - true means
* bias will be corrected
* @param m2 the SecondMoment (Third or Fourth moments work
* here as well.)
*/
public Variance(boolean isBiasCorrected, SecondMoment m2) {
incMoment = false;
this.moment = m2;
this.isBiasCorrected = isBiasCorrected;
}
/**
* Copy constructor, creates a new {@code Variance} identical
* to the {@code original}
*
* @param original the {@code Variance} instance to copy
*/
public Variance(Variance original) {
copy(original, this);
}
/**
* {@inheritDoc}
* If all values are available, it is more accurate to use
* {@link #evaluate(double[])} rather than adding values one at a time
* using this method and then executing {@link #getResult}, since
* evaluate
leverages the fact that is has the full
* list of values together to execute a two-pass algorithm.
* See {@link Variance}.
Double.NaN
if the array is empty.
* * See {@link Variance} for details on the computing algorithm.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null.
* Does not change the internal state of the statistic.
* * @param values the input array * @return the variance of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the array is null */ @Override public double evaluate(final double[] values) { if (values == null) { throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); } return evaluate(values, 0, values.length); } /** * Returns the variance of the entries in the specified portion of * the input array, orDouble.NaN
if the designated subarray
* is empty.
* * See {@link Variance} for details on the computing algorithm.
** Returns 0 for a single-value (i.e. length = 1) sample.
** Does not change the internal state of the statistic.
*
* Throws IllegalArgumentException
if the array is null.
Returns the weighted variance of the entries in the specified portion of
* the input array, or Double.NaN
if the designated subarray
* is empty.
* Uses the formula
* Σ(weights[i]*(values[i] - weightedMean)2)/(Σ(weights[i]) - 1) ** where weightedMean is the weighted mean *
* This formula will not return the same result as the unweighted variance when all * weights are equal, unless all weights are equal to 1. The formula assumes that * weights are to be treated as "expansion values," as will be the case if for example * the weights represent frequency counts. To normalize weights so that the denominator * in the variance computation equals the length of the input vector minus one, use
* evaluate(values, MathUtils.normalizeArray(weights, values.length));
*
* * Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if any of the following are true:
*
* Does not change the internal state of the statistic.
*
* Throws IllegalArgumentException
if either array is null.
* Returns the weighted variance of the entries in the the input array.
** Uses the formula
* Σ(weights[i]*(values[i] - weightedMean)2)/(Σ(weights[i]) - 1) ** where weightedMean is the weighted mean *
* This formula will not return the same result as the unweighted variance when all * weights are equal, unless all weights are equal to 1. The formula assumes that * weights are to be treated as "expansion values," as will be the case if for example * the weights represent frequency counts. To normalize weights so that the denominator * in the variance computation equals the length of the input vector minus one, use
* evaluate(values, MathUtils.normalizeArray(weights, values.length));
*
* * Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if any of the following are true:
*
* Does not change the internal state of the statistic.
*
* Throws IllegalArgumentException
if either array is null.
Double.NaN
if the designated subarray is empty.
* * See {@link Variance} for details on the computing algorithm.
** The formula used assumes that the supplied mean value is the arithmetic * mean of the sample data, not a known population parameter. This method * is supplied only to save computation when the mean has already been * computed.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null.
* Does not change the internal state of the statistic.
* * @param values the input array * @param mean the precomputed mean value * @param begin index of the first array element to include * @param length the number of elements to include * @return the variance of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the array is null or the array index * parameters are not valid */ public double evaluate(final double[] values, final double mean, final int begin, final int length) { double var = Double.NaN; if (test(values, begin, length)) { if (length == 1) { var = 0.0; } else if (length > 1) { double accum = 0.0; double dev = 0.0; double accum2 = 0.0; for (int i = begin; i < begin + length; i++) { dev = values[i] - mean; accum += dev * dev; accum2 += dev; } double len = length; if (isBiasCorrected) { var = (accum - (accum2 * accum2 / len)) / (len - 1.0); } else { var = (accum - (accum2 * accum2 / len)) / len; } } } return var; } /** * Returns the variance of the entries in the input array, using the * precomputed mean value. ReturnsDouble.NaN
if the array
* is empty.
* * See {@link Variance} for details on the computing algorithm.
*
* If isBiasCorrected
is true
the formula used
* assumes that the supplied mean value is the arithmetic mean of the
* sample data, not a known population parameter. If the mean is a known
* population parameter, or if the "population" version of the variance is
* desired, set isBiasCorrected
to false
before
* invoking this method.
* Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null.
* Does not change the internal state of the statistic.
* * @param values the input array * @param mean the precomputed mean value * @return the variance of the values or Double.NaN if the array is empty * @throws IllegalArgumentException if the array is null */ public double evaluate(final double[] values, final double mean) { return evaluate(values, mean, 0, values.length); } /** * Returns the weighted variance of the entries in the specified portion of * the input array, using the precomputed weighted mean value. Returns *Double.NaN
if the designated subarray is empty.
* * Uses the formula
* Σ(weights[i]*(values[i] - mean)2)/(Σ(weights[i]) - 1) **
* The formula used assumes that the supplied mean value is the weighted arithmetic * mean of the sample data, not a known population parameter. This method * is supplied only to save computation when the mean has already been * computed.
** This formula will not return the same result as the unweighted variance when all * weights are equal, unless all weights are equal to 1. The formula assumes that * weights are to be treated as "expansion values," as will be the case if for example * the weights represent frequency counts. To normalize weights so that the denominator * in the variance computation equals the length of the input vector minus one, use
* evaluate(values, MathUtils.normalizeArray(weights, values.length), mean);
*
* * Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if any of the following are true:
*
* Does not change the internal state of the statistic.
* * @param values the input array * @param weights the weights array * @param mean the precomputed weighted mean value * @param begin index of the first array element to include * @param length the number of elements to include * @return the variance of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the parameters are not valid * @since 2.1 */ public double evaluate(final double[] values, final double[] weights, final double mean, final int begin, final int length) { double var = Double.NaN; if (test(values, weights, begin, length)) { if (length == 1) { var = 0.0; } else if (length > 1) { double accum = 0.0; double dev = 0.0; double accum2 = 0.0; for (int i = begin; i < begin + length; i++) { dev = values[i] - mean; accum += weights[i] * (dev * dev); accum2 += weights[i] * dev; } double sumWts = 0; for (int i = 0; i < weights.length; i++) { sumWts += weights[i]; } if (isBiasCorrected) { var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); } else { var = (accum - (accum2 * accum2 / sumWts)) / sumWts; } } } return var; } /** *Returns the weighted variance of the values in the input array, using * the precomputed weighted mean value.
** Uses the formula
* Σ(weights[i]*(values[i] - mean)2)/(Σ(weights[i]) - 1) **
* The formula used assumes that the supplied mean value is the weighted arithmetic * mean of the sample data, not a known population parameter. This method * is supplied only to save computation when the mean has already been * computed.
** This formula will not return the same result as the unweighted variance when all * weights are equal, unless all weights are equal to 1. The formula assumes that * weights are to be treated as "expansion values," as will be the case if for example * the weights represent frequency counts. To normalize weights so that the denominator * in the variance computation equals the length of the input vector minus one, use
* evaluate(values, MathUtils.normalizeArray(weights, values.length), mean);
*
* * Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if any of the following are true:
*
* Does not change the internal state of the statistic.
* * @param values the input array * @param weights the weights array * @param mean the precomputed weighted mean value * @return the variance of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the parameters are not valid * @since 2.1 */ public double evaluate(final double[] values, final double[] weights, final double mean) { return evaluate(values, weights, mean, 0, values.length); } /** * @return Returns the isBiasCorrected. */ public boolean isBiasCorrected() { return isBiasCorrected; } /** * @param biasCorrected The isBiasCorrected to set. */ public void setBiasCorrected(boolean biasCorrected) { this.isBiasCorrected = biasCorrected; } /** * {@inheritDoc} */ @Override public Variance copy() { Variance result = new Variance(); copy(this, result); return result; } /** * Copies source to dest. *Neither source nor dest can be null.
* * @param source Variance to copy * @param dest Variance to copy to * @throws NullPointerException if either source or dest is null */ public static void copy(Variance source, Variance dest) { if (source == null || dest == null) { throw new NullArgumentException(); } dest.setData(source.getDataRef()); dest.moment = source.moment.copy(); dest.isBiasCorrected = source.isBiasCorrected; dest.incMoment = source.incMoment; } }