/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat;
import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
import org.apache.commons.math.stat.descriptive.moment.Mean;
import org.apache.commons.math.stat.descriptive.moment.Variance;
import org.apache.commons.math.stat.descriptive.rank.Max;
import org.apache.commons.math.stat.descriptive.rank.Min;
import org.apache.commons.math.stat.descriptive.rank.Percentile;
import org.apache.commons.math.stat.descriptive.summary.Product;
import org.apache.commons.math.stat.descriptive.summary.Sum;
import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
/**
* StatUtils provides static methods for computing statistics based on data
* stored in double[] arrays.
*
* @version $Revision: 1073276 $ $Date: 2011-02-22 10:34:52 +0100 (mar. 22 févr. 2011) $
*/
public final class StatUtils {
/** sum */
private static final UnivariateStatistic SUM = new Sum();
/** sumSq */
private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
/** prod */
private static final UnivariateStatistic PRODUCT = new Product();
/** sumLog */
private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
/** min */
private static final UnivariateStatistic MIN = new Min();
/** max */
private static final UnivariateStatistic MAX = new Max();
/** mean */
private static final UnivariateStatistic MEAN = new Mean();
/** variance */
private static final Variance VARIANCE = new Variance();
/** percentile */
private static final Percentile PERCENTILE = new Percentile();
/** geometric mean */
private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
/**
* Private Constructor
*/
private StatUtils() {
}
/**
* Returns the sum of the values in the input array, or
* Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the input array
* is null.
Double.NaN
if the array
* is empty
* @throws IllegalArgumentException if the array is null
*/
public static double sum(final double[] values) {
return SUM.evaluate(values);
}
/**
* Returns the sum of the entries in the specified portion of
* the input array, or Double.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the
* array is empty
* @throws IllegalArgumentException if the array is null
*/
public static double sumSq(final double[] values) {
return SUM_OF_SQUARES.evaluate(values);
}
/**
* Returns the sum of the squares of the entries in the specified portion of
* the input array, or Double.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. *
* * @param values the input array * @return the sum of the natural logs of the values or Double.NaN if * the array is empty * @throws IllegalArgumentException if the array is null */ public static double sumLog(final double[] values) { return SUM_OF_LOGS.evaluate(values); } /** * Returns the sum of the natural logs of the entries in the specified portion of * the input array, orDouble.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. *
* * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include * @return the sum of the natural logs of the values or Double.NaN if * length = 0 * @throws IllegalArgumentException if the array is null or the array index * parameters are not valid */ public static double sumLog(final double[] values, final int begin, final int length) { return SUM_OF_LOGS.evaluate(values, begin, length); } /** * Returns the arithmetic mean of the entries in the input array, or *Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for * details on the computing algorithm.
* * @param values the input array * @return the mean of the values or Double.NaN if the array is empty * @throws IllegalArgumentException if the array is null */ public static double mean(final double[] values) { return MEAN.evaluate(values); } /** * Returns the arithmetic mean of the entries in the specified portion of * the input array, orDouble.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for * details on the computing algorithm.
* * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include * @return the mean of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the array is null or the array index * parameters are not valid */ public static double mean(final double[] values, final int begin, final int length) { return MEAN.evaluate(values, begin, length); } /** * Returns the geometric mean of the entries in the input array, or *Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} * for details on the computing algorithm.
* * @param values the input array * @return the geometric mean of the values or Double.NaN if the array is empty * @throws IllegalArgumentException if the array is null */ public static double geometricMean(final double[] values) { return GEOMETRIC_MEAN.evaluate(values); } /** * Returns the geometric mean of the entries in the specified portion of * the input array, orDouble.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null.
* See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} * for details on the computing algorithm.
* * @param values the input array * @param begin index of the first array element to include * @param length the number of elements to include * @return the geometric mean of the values or Double.NaN if length = 0 * @throws IllegalArgumentException if the array is null or the array index * parameters are not valid */ public static double geometricMean(final double[] values, final int begin, final int length) { return GEOMETRIC_MEAN.evaluate(values, begin, length); } /** * Returns the variance of the entries in the input array, or *Double.NaN
if the array is empty.
* * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for * details on the computing algorithm.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the designated subarray
* is empty.
* * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for * details on the computing algorithm.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null or the
* array index parameters are not valid.
Double.NaN
if the designated subarray is empty.
* * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for * details on the computing algorithm.
** The formula used assumes that the supplied mean value is the arithmetic * mean of the sample data, not a known population parameter. This method * is supplied only to save computation when the mean has already been * computed.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null or the
* array index parameters are not valid.
Double.NaN
if the array
* is empty.
* * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for * details on the computing algorithm.
** The formula used assumes that the supplied mean value is the arithmetic * mean of the sample data, not a known population parameter. This method * is supplied only to save computation when the mean has already been * computed.
** Returns 0 for a single-value (i.e. length = 1) sample.
*
* Throws IllegalArgumentException
if the array is null.
Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
*
NaN
iff all values are NaN
* (i.e. NaN
values have no impact on the value of the statistic).Double.POSITIVE_INFINITY
,
* the result is Double.POSITIVE_INFINITY.
Double.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null or
* the array index parameters are not valid.
*
NaN
iff all values are NaN
* (i.e. NaN
values have no impact on the value of the statistic).Double.POSITIVE_INFINITY
,
* the result is Double.POSITIVE_INFINITY.
Double.NaN
if the array is empty.
*
* Throws IllegalArgumentException
if the array is null.
*
NaN
iff all values are NaN
* (i.e. NaN
values have no impact on the value of the statistic).Double.NEGATIVE_INFINITY
,
* the result is Double.NEGATIVE_INFINITY.
Double.NaN
if the designated subarray
* is empty.
*
* Throws IllegalArgumentException
if the array is null or
* the array index parameters are not valid.
*
NaN
iff all values are NaN
* (i.e. NaN
values have no impact on the value of the statistic).Double.NEGATIVE_INFINITY
,
* the result is Double.NEGATIVE_INFINITY.
p
th percentile of the values
* in the values
array.
* *
Double.NaN
if values
has length
* 0
p
) values[0]
* if values
has length 1
IllegalArgumentException
if values
* is null or p is not a valid quantile value (p must be greater than 0
* and less than or equal to 100)* See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for * a description of the percentile estimation algorithm used.
* * @param values input array of values * @param p the percentile value to compute * @return the percentile value or Double.NaN if the array is empty * @throws IllegalArgumentException ifvalues
is null
* or p is invalid
*/
public static double percentile(final double[] values, final double p) {
return PERCENTILE.evaluate(values,p);
}
/**
* Returns an estimate of the p
th percentile of the values
* in the values
array, starting with the element in (0-based)
* position begin
in the array and including length
* values.
* *
Double.NaN
if length = 0
p
) values[begin]
* if length = 1
IllegalArgumentException
if values
* is null , begin
or length
is invalid, or
* p
is not a valid quantile value (p must be greater than 0
* and less than or equal to 100)* See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for * a description of the percentile estimation algorithm used.
* * @param values array of input values * @param p the percentile to compute * @param begin the first (0-based) element to include in the computation * @param length the number of array elements to include * @return the percentile value * @throws IllegalArgumentException if the parameters are not valid or the * input array is null */ public static double percentile(final double[] values, final int begin, final int length, final double p) { return PERCENTILE.evaluate(values, begin, length, p); } /** * Returns the sum of the (signed) differences between corresponding elements of the * input arrays -- i.e., sum(sample1[i] - sample2[i]). * * @param sample1 the first array * @param sample2 the second array * @return sum of paired differences * @throws IllegalArgumentException if the arrays do not have the same * (positive) length */ public static double sumDifference(final double[] sample1, final double[] sample2) throws IllegalArgumentException { int n = sample1.length; if (n != sample2.length) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, n, sample2.length); } if (n < 1) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.INSUFFICIENT_DIMENSION, sample2.length, 1); } double result = 0; for (int i = 0; i < n; i++) { result += sample1[i] - sample2[i]; } return result; } /** * Returns the mean of the (signed) differences between corresponding elements of the * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length. * * @param sample1 the first array * @param sample2 the second array * @return mean of paired differences * @throws IllegalArgumentException if the arrays do not have the same * (positive) length */ public static double meanDifference(final double[] sample1, final double[] sample2) throws IllegalArgumentException { return sumDifference(sample1, sample2) / sample1.length; } /** * Returns the variance of the (signed) differences between corresponding elements of the * input arrays -- i.e., var(sample1[i] - sample2[i]). * * @param sample1 the first array * @param sample2 the second array * @param meanDifference the mean difference between corresponding entries * @see #meanDifference(double[],double[]) * @return variance of paired differences * @throws IllegalArgumentException if the arrays do not have the same * length or their common length is less than 2. */ public static double varianceDifference(final double[] sample1, final double[] sample2, double meanDifference) throws IllegalArgumentException { double sum1 = 0d; double sum2 = 0d; double diff = 0d; int n = sample1.length; if (n != sample2.length) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, n, sample2.length); } if (n < 2) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.INSUFFICIENT_DIMENSION, n, 2); } for (int i = 0; i < n; i++) { diff = sample1[i] - sample2[i]; sum1 += (diff - meanDifference) *(diff - meanDifference); sum2 += diff - meanDifference; } return (sum1 - (sum2 * sum2 / n)) / (n - 1); } /** * Normalize (standardize) the series, so in the end it is having a mean of 0 and a standard deviation of 1. * * @param sample sample to normalize * @return normalized (standardized) sample * @since 2.2 */ public static double[] normalize(final double[] sample) { DescriptiveStatistics stats = new DescriptiveStatistics(); // Add the data from the series to stats for (int i = 0; i < sample.length; i++) { stats.addValue(sample[i]); } // Compute mean and standard deviation double mean = stats.getMean(); double standardDeviation = stats.getStandardDeviation(); // initialize the standardizedSample, which has the same length as the sample double[] standardizedSample = new double[sample.length]; for (int i = 0; i < sample.length; i++) { // z = (x- mean)/standardDeviation standardizedSample[i] = (sample[i] - mean) / standardDeviation; } return standardizedSample; } }