1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17package org.apache.commons.math.stat.descriptive;
18
19import java.io.Serializable;
20
21import org.apache.commons.math.MathRuntimeException;
22import org.apache.commons.math.exception.util.LocalizedFormats;
23import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
24import org.apache.commons.math.stat.descriptive.moment.Mean;
25import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
26import org.apache.commons.math.stat.descriptive.moment.Variance;
27import org.apache.commons.math.stat.descriptive.rank.Max;
28import org.apache.commons.math.stat.descriptive.rank.Min;
29import org.apache.commons.math.stat.descriptive.summary.Sum;
30import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
31import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
32import org.apache.commons.math.util.MathUtils;
33import org.apache.commons.math.util.FastMath;
34
35/**
36 * <p>
37 * Computes summary statistics for a stream of data values added using the
38 * {@link #addValue(double) addValue} method. The data values are not stored in
39 * memory, so this class can be used to compute statistics for very large data
40 * streams.
41 * </p>
42 * <p>
43 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
44 * state and compute statistics are configurable via setters. For example, the
45 * default implementation for the variance can be overridden by calling
46 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
47 * these methods must implement the {@link StorelessUnivariateStatistic}
48 * interface and configuration must be completed before <code>addValue</code>
49 * is called. No configuration is necessary to use the default, commons-math
50 * provided implementations.
51 * </p>
52 * <p>
53 * Note: This class is not thread-safe. Use
54 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
55 * threads is required.
56 * </p>
57 * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 déc. 2010) $
58 */
59public class SummaryStatistics implements StatisticalSummary, Serializable {
60
61    /** Serialization UID */
62    private static final long serialVersionUID = -2021321786743555871L;
63
64    /** count of values that have been added */
65    protected long n = 0;
66
67    /** SecondMoment is used to compute the mean and variance */
68    protected SecondMoment secondMoment = new SecondMoment();
69
70    /** sum of values that have been added */
71    protected Sum sum = new Sum();
72
73    /** sum of the square of each value that has been added */
74    protected SumOfSquares sumsq = new SumOfSquares();
75
76    /** min of values that have been added */
77    protected Min min = new Min();
78
79    /** max of values that have been added */
80    protected Max max = new Max();
81
82    /** sumLog of values that have been added */
83    protected SumOfLogs sumLog = new SumOfLogs();
84
85    /** geoMean of values that have been added */
86    protected GeometricMean geoMean = new GeometricMean(sumLog);
87
88    /** mean of values that have been added */
89    protected Mean mean = new Mean();
90
91    /** variance of values that have been added */
92    protected Variance variance = new Variance();
93
94    /** Sum statistic implementation - can be reset by setter. */
95    private StorelessUnivariateStatistic sumImpl = sum;
96
97    /** Sum of squares statistic implementation - can be reset by setter. */
98    private StorelessUnivariateStatistic sumsqImpl = sumsq;
99
100    /** Minimum statistic implementation - can be reset by setter. */
101    private StorelessUnivariateStatistic minImpl = min;
102
103    /** Maximum statistic implementation - can be reset by setter. */
104    private StorelessUnivariateStatistic maxImpl = max;
105
106    /** Sum of log statistic implementation - can be reset by setter. */
107    private StorelessUnivariateStatistic sumLogImpl = sumLog;
108
109    /** Geometric mean statistic implementation - can be reset by setter. */
110    private StorelessUnivariateStatistic geoMeanImpl = geoMean;
111
112    /** Mean statistic implementation - can be reset by setter. */
113    private StorelessUnivariateStatistic meanImpl = mean;
114
115    /** Variance statistic implementation - can be reset by setter. */
116    private StorelessUnivariateStatistic varianceImpl = variance;
117
118    /**
119     * Construct a SummaryStatistics instance
120     */
121    public SummaryStatistics() {
122    }
123
124    /**
125     * A copy constructor. Creates a deep-copy of the {@code original}.
126     *
127     * @param original the {@code SummaryStatistics} instance to copy
128     */
129    public SummaryStatistics(SummaryStatistics original) {
130        copy(original, this);
131    }
132
133    /**
134     * Return a {@link StatisticalSummaryValues} instance reporting current
135     * statistics.
136     * @return Current values of statistics
137     */
138    public StatisticalSummary getSummary() {
139        return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
140                getMax(), getMin(), getSum());
141    }
142
143    /**
144     * Add a value to the data
145     * @param value the value to add
146     */
147    public void addValue(double value) {
148        sumImpl.increment(value);
149        sumsqImpl.increment(value);
150        minImpl.increment(value);
151        maxImpl.increment(value);
152        sumLogImpl.increment(value);
153        secondMoment.increment(value);
154        // If mean, variance or geomean have been overridden,
155        // need to increment these
156        if (!(meanImpl instanceof Mean)) {
157            meanImpl.increment(value);
158        }
159        if (!(varianceImpl instanceof Variance)) {
160            varianceImpl.increment(value);
161        }
162        if (!(geoMeanImpl instanceof GeometricMean)) {
163            geoMeanImpl.increment(value);
164        }
165        n++;
166    }
167
168    /**
169     * Returns the number of available values
170     * @return The number of available values
171     */
172    public long getN() {
173        return n;
174    }
175
176    /**
177     * Returns the sum of the values that have been added
178     * @return The sum or <code>Double.NaN</code> if no values have been added
179     */
180    public double getSum() {
181        return sumImpl.getResult();
182    }
183
184    /**
185     * Returns the sum of the squares of the values that have been added.
186     * <p>
187     * Double.NaN is returned if no values have been added.
188     * </p>
189     * @return The sum of squares
190     */
191    public double getSumsq() {
192        return sumsqImpl.getResult();
193    }
194
195    /**
196     * Returns the mean of the values that have been added.
197     * <p>
198     * Double.NaN is returned if no values have been added.
199     * </p>
200     * @return the mean
201     */
202    public double getMean() {
203        if (mean == meanImpl) {
204            return new Mean(secondMoment).getResult();
205        } else {
206            return meanImpl.getResult();
207        }
208    }
209
210    /**
211     * Returns the standard deviation of the values that have been added.
212     * <p>
213     * Double.NaN is returned if no values have been added.
214     * </p>
215     * @return the standard deviation
216     */
217    public double getStandardDeviation() {
218        double stdDev = Double.NaN;
219        if (getN() > 0) {
220            if (getN() > 1) {
221                stdDev = FastMath.sqrt(getVariance());
222            } else {
223                stdDev = 0.0;
224            }
225        }
226        return stdDev;
227    }
228
229    /**
230     * Returns the variance of the values that have been added.
231     * <p>
232     * Double.NaN is returned if no values have been added.
233     * </p>
234     * @return the variance
235     */
236    public double getVariance() {
237        if (varianceImpl == variance) {
238            return new Variance(secondMoment).getResult();
239        } else {
240            return varianceImpl.getResult();
241        }
242    }
243
244    /**
245     * Returns the maximum of the values that have been added.
246     * <p>
247     * Double.NaN is returned if no values have been added.
248     * </p>
249     * @return the maximum
250     */
251    public double getMax() {
252        return maxImpl.getResult();
253    }
254
255    /**
256     * Returns the minimum of the values that have been added.
257     * <p>
258     * Double.NaN is returned if no values have been added.
259     * </p>
260     * @return the minimum
261     */
262    public double getMin() {
263        return minImpl.getResult();
264    }
265
266    /**
267     * Returns the geometric mean of the values that have been added.
268     * <p>
269     * Double.NaN is returned if no values have been added.
270     * </p>
271     * @return the geometric mean
272     */
273    public double getGeometricMean() {
274        return geoMeanImpl.getResult();
275    }
276
277    /**
278     * Returns the sum of the logs of the values that have been added.
279     * <p>
280     * Double.NaN is returned if no values have been added.
281     * </p>
282     * @return the sum of logs
283     * @since 1.2
284     */
285    public double getSumOfLogs() {
286        return sumLogImpl.getResult();
287    }
288
289    /**
290     * Returns a statistic related to the Second Central Moment.  Specifically,
291     * what is returned is the sum of squared deviations from the sample mean
292     * among the values that have been added.
293     * <p>
294     * Returns <code>Double.NaN</code> if no data values have been added and
295     * returns <code>0</code> if there is just one value in the data set.</p>
296     * <p>
297     * @return second central moment statistic
298     * @since 2.0
299     */
300    public double getSecondMoment() {
301        return secondMoment.getResult();
302    }
303
304    /**
305     * Generates a text report displaying summary statistics from values that
306     * have been added.
307     * @return String with line feeds displaying statistics
308     * @since 1.2
309     */
310    @Override
311    public String toString() {
312        StringBuilder outBuffer = new StringBuilder();
313        String endl = "\n";
314        outBuffer.append("SummaryStatistics:").append(endl);
315        outBuffer.append("n: ").append(getN()).append(endl);
316        outBuffer.append("min: ").append(getMin()).append(endl);
317        outBuffer.append("max: ").append(getMax()).append(endl);
318        outBuffer.append("mean: ").append(getMean()).append(endl);
319        outBuffer.append("geometric mean: ").append(getGeometricMean())
320            .append(endl);
321        outBuffer.append("variance: ").append(getVariance()).append(endl);
322        outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
323        outBuffer.append("standard deviation: ").append(getStandardDeviation())
324            .append(endl);
325        outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
326        return outBuffer.toString();
327    }
328
329    /**
330     * Resets all statistics and storage
331     */
332    public void clear() {
333        this.n = 0;
334        minImpl.clear();
335        maxImpl.clear();
336        sumImpl.clear();
337        sumLogImpl.clear();
338        sumsqImpl.clear();
339        geoMeanImpl.clear();
340        secondMoment.clear();
341        if (meanImpl != mean) {
342            meanImpl.clear();
343        }
344        if (varianceImpl != variance) {
345            varianceImpl.clear();
346        }
347    }
348
349    /**
350     * Returns true iff <code>object</code> is a
351     * <code>SummaryStatistics</code> instance and all statistics have the
352     * same values as this.
353     * @param object the object to test equality against.
354     * @return true if object equals this
355     */
356    @Override
357    public boolean equals(Object object) {
358        if (object == this) {
359            return true;
360        }
361        if (object instanceof SummaryStatistics == false) {
362            return false;
363        }
364        SummaryStatistics stat = (SummaryStatistics)object;
365        return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
366               MathUtils.equalsIncludingNaN(stat.getMax(),           getMax())           &&
367               MathUtils.equalsIncludingNaN(stat.getMean(),          getMean())          &&
368               MathUtils.equalsIncludingNaN(stat.getMin(),           getMin())           &&
369               MathUtils.equalsIncludingNaN(stat.getN(),             getN())             &&
370               MathUtils.equalsIncludingNaN(stat.getSum(),           getSum())           &&
371               MathUtils.equalsIncludingNaN(stat.getSumsq(),         getSumsq())         &&
372               MathUtils.equalsIncludingNaN(stat.getVariance(),      getVariance());
373    }
374
375    /**
376     * Returns hash code based on values of statistics
377     * @return hash code
378     */
379    @Override
380    public int hashCode() {
381        int result = 31 + MathUtils.hash(getGeometricMean());
382        result = result * 31 + MathUtils.hash(getGeometricMean());
383        result = result * 31 + MathUtils.hash(getMax());
384        result = result * 31 + MathUtils.hash(getMean());
385        result = result * 31 + MathUtils.hash(getMin());
386        result = result * 31 + MathUtils.hash(getN());
387        result = result * 31 + MathUtils.hash(getSum());
388        result = result * 31 + MathUtils.hash(getSumsq());
389        result = result * 31 + MathUtils.hash(getVariance());
390        return result;
391    }
392
393    // Getters and setters for statistics implementations
394    /**
395     * Returns the currently configured Sum implementation
396     * @return the StorelessUnivariateStatistic implementing the sum
397     * @since 1.2
398     */
399    public StorelessUnivariateStatistic getSumImpl() {
400        return sumImpl;
401    }
402
403    /**
404     * <p>
405     * Sets the implementation for the Sum.
406     * </p>
407     * <p>
408     * This method must be activated before any data has been added - i.e.,
409     * before {@link #addValue(double) addValue} has been used to add data;
410     * otherwise an IllegalStateException will be thrown.
411     * </p>
412     * @param sumImpl the StorelessUnivariateStatistic instance to use for
413     *        computing the Sum
414     * @throws IllegalStateException if data has already been added (i.e if n >
415     *         0)
416     * @since 1.2
417     */
418    public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
419        checkEmpty();
420        this.sumImpl = sumImpl;
421    }
422
423    /**
424     * Returns the currently configured sum of squares implementation
425     * @return the StorelessUnivariateStatistic implementing the sum of squares
426     * @since 1.2
427     */
428    public StorelessUnivariateStatistic getSumsqImpl() {
429        return sumsqImpl;
430    }
431
432    /**
433     * <p>
434     * Sets the implementation for the sum of squares.
435     * </p>
436     * <p>
437     * This method must be activated before any data has been added - i.e.,
438     * before {@link #addValue(double) addValue} has been used to add data;
439     * otherwise an IllegalStateException will be thrown.
440     * </p>
441     * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
442     *        computing the sum of squares
443     * @throws IllegalStateException if data has already been added (i.e if n >
444     *         0)
445     * @since 1.2
446     */
447    public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
448        checkEmpty();
449        this.sumsqImpl = sumsqImpl;
450    }
451
452    /**
453     * Returns the currently configured minimum implementation
454     * @return the StorelessUnivariateStatistic implementing the minimum
455     * @since 1.2
456     */
457    public StorelessUnivariateStatistic getMinImpl() {
458        return minImpl;
459    }
460
461    /**
462     * <p>
463     * Sets the implementation for the minimum.
464     * </p>
465     * <p>
466     * This method must be activated before any data has been added - i.e.,
467     * before {@link #addValue(double) addValue} has been used to add data;
468     * otherwise an IllegalStateException will be thrown.
469     * </p>
470     * @param minImpl the StorelessUnivariateStatistic instance to use for
471     *        computing the minimum
472     * @throws IllegalStateException if data has already been added (i.e if n >
473     *         0)
474     * @since 1.2
475     */
476    public void setMinImpl(StorelessUnivariateStatistic minImpl) {
477        checkEmpty();
478        this.minImpl = minImpl;
479    }
480
481    /**
482     * Returns the currently configured maximum implementation
483     * @return the StorelessUnivariateStatistic implementing the maximum
484     * @since 1.2
485     */
486    public StorelessUnivariateStatistic getMaxImpl() {
487        return maxImpl;
488    }
489
490    /**
491     * <p>
492     * Sets the implementation for the maximum.
493     * </p>
494     * <p>
495     * This method must be activated before any data has been added - i.e.,
496     * before {@link #addValue(double) addValue} has been used to add data;
497     * otherwise an IllegalStateException will be thrown.
498     * </p>
499     * @param maxImpl the StorelessUnivariateStatistic instance to use for
500     *        computing the maximum
501     * @throws IllegalStateException if data has already been added (i.e if n >
502     *         0)
503     * @since 1.2
504     */
505    public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
506        checkEmpty();
507        this.maxImpl = maxImpl;
508    }
509
510    /**
511     * Returns the currently configured sum of logs implementation
512     * @return the StorelessUnivariateStatistic implementing the log sum
513     * @since 1.2
514     */
515    public StorelessUnivariateStatistic getSumLogImpl() {
516        return sumLogImpl;
517    }
518
519    /**
520     * <p>
521     * Sets the implementation for the sum of logs.
522     * </p>
523     * <p>
524     * This method must be activated before any data has been added - i.e.,
525     * before {@link #addValue(double) addValue} has been used to add data;
526     * otherwise an IllegalStateException will be thrown.
527     * </p>
528     * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
529     *        computing the log sum
530     * @throws IllegalStateException if data has already been added (i.e if n >
531     *         0)
532     * @since 1.2
533     */
534    public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
535        checkEmpty();
536        this.sumLogImpl = sumLogImpl;
537        geoMean.setSumLogImpl(sumLogImpl);
538    }
539
540    /**
541     * Returns the currently configured geometric mean implementation
542     * @return the StorelessUnivariateStatistic implementing the geometric mean
543     * @since 1.2
544     */
545    public StorelessUnivariateStatistic getGeoMeanImpl() {
546        return geoMeanImpl;
547    }
548
549    /**
550     * <p>
551     * Sets the implementation for the geometric mean.
552     * </p>
553     * <p>
554     * This method must be activated before any data has been added - i.e.,
555     * before {@link #addValue(double) addValue} has been used to add data;
556     * otherwise an IllegalStateException will be thrown.
557     * </p>
558     * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
559     *        computing the geometric mean
560     * @throws IllegalStateException if data has already been added (i.e if n >
561     *         0)
562     * @since 1.2
563     */
564    public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
565        checkEmpty();
566        this.geoMeanImpl = geoMeanImpl;
567    }
568
569    /**
570     * Returns the currently configured mean implementation
571     * @return the StorelessUnivariateStatistic implementing the mean
572     * @since 1.2
573     */
574    public StorelessUnivariateStatistic getMeanImpl() {
575        return meanImpl;
576    }
577
578    /**
579     * <p>
580     * Sets the implementation for the mean.
581     * </p>
582     * <p>
583     * This method must be activated before any data has been added - i.e.,
584     * before {@link #addValue(double) addValue} has been used to add data;
585     * otherwise an IllegalStateException will be thrown.
586     * </p>
587     * @param meanImpl the StorelessUnivariateStatistic instance to use for
588     *        computing the mean
589     * @throws IllegalStateException if data has already been added (i.e if n >
590     *         0)
591     * @since 1.2
592     */
593    public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
594        checkEmpty();
595        this.meanImpl = meanImpl;
596    }
597
598    /**
599     * Returns the currently configured variance implementation
600     * @return the StorelessUnivariateStatistic implementing the variance
601     * @since 1.2
602     */
603    public StorelessUnivariateStatistic getVarianceImpl() {
604        return varianceImpl;
605    }
606
607    /**
608     * <p>
609     * Sets the implementation for the variance.
610     * </p>
611     * <p>
612     * This method must be activated before any data has been added - i.e.,
613     * before {@link #addValue(double) addValue} has been used to add data;
614     * otherwise an IllegalStateException will be thrown.
615     * </p>
616     * @param varianceImpl the StorelessUnivariateStatistic instance to use for
617     *        computing the variance
618     * @throws IllegalStateException if data has already been added (i.e if n >
619     *         0)
620     * @since 1.2
621     */
622    public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
623        checkEmpty();
624        this.varianceImpl = varianceImpl;
625    }
626
627    /**
628     * Throws IllegalStateException if n > 0.
629     */
630    private void checkEmpty() {
631        if (n > 0) {
632            throw MathRuntimeException.createIllegalStateException(
633                    LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC,
634                    n);
635        }
636    }
637
638    /**
639     * Returns a copy of this SummaryStatistics instance with the same internal state.
640     *
641     * @return a copy of this
642     */
643    public SummaryStatistics copy() {
644        SummaryStatistics result = new SummaryStatistics();
645        copy(this, result);
646        return result;
647    }
648
649    /**
650     * Copies source to dest.
651     * <p>Neither source nor dest can be null.</p>
652     *
653     * @param source SummaryStatistics to copy
654     * @param dest SummaryStatistics to copy to
655     * @throws NullPointerException if either source or dest is null
656     */
657    public static void copy(SummaryStatistics source, SummaryStatistics dest) {
658        dest.maxImpl = source.maxImpl.copy();
659        dest.meanImpl = source.meanImpl.copy();
660        dest.minImpl = source.minImpl.copy();
661        dest.sumImpl = source.sumImpl.copy();
662        dest.varianceImpl = source.varianceImpl.copy();
663        dest.sumLogImpl = source.sumLogImpl.copy();
664        dest.sumsqImpl = source.sumsqImpl.copy();
665        if (source.getGeoMeanImpl() instanceof GeometricMean) {
666            // Keep geoMeanImpl, sumLogImpl in synch
667            dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
668        } else {
669            dest.geoMeanImpl = source.geoMeanImpl.copy();
670        }
671        SecondMoment.copy(source.secondMoment, dest.secondMoment);
672        dest.n = source.n;
673
674        // Make sure that if stat == statImpl in source, same
675        // holds in dest; otherwise copy stat
676        if (source.geoMean == source.geoMeanImpl) {
677            dest.geoMean = (GeometricMean) dest.geoMeanImpl;
678        } else {
679            GeometricMean.copy(source.geoMean, dest.geoMean);
680        }
681        if (source.max == source.maxImpl) {
682            dest.max = (Max) dest.maxImpl;
683        } else {
684            Max.copy(source.max, dest.max);
685        }
686        if (source.mean == source.meanImpl) {
687            dest.mean = (Mean) dest.meanImpl;
688        } else {
689            Mean.copy(source.mean, dest.mean);
690        }
691        if (source.min == source.minImpl) {
692            dest.min = (Min) dest.minImpl;
693        } else {
694            Min.copy(source.min, dest.min);
695        }
696        if (source.sum == source.sumImpl) {
697            dest.sum = (Sum) dest.sumImpl;
698        } else {
699            Sum.copy(source.sum, dest.sum);
700        }
701        if (source.variance == source.varianceImpl) {
702            dest.variance = (Variance) dest.varianceImpl;
703        } else {
704            Variance.copy(source.variance, dest.variance);
705        }
706        if (source.sumLog == source.sumLogImpl) {
707            dest.sumLog = (SumOfLogs) dest.sumLogImpl;
708        } else {
709            SumOfLogs.copy(source.sumLog, dest.sumLog);
710        }
711        if (source.sumsq == source.sumsqImpl) {
712            dest.sumsq = (SumOfSquares) dest.sumsqImpl;
713        } else {
714            SumOfSquares.copy(source.sumsq, dest.sumsq);
715        }
716    }
717}
718