1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package org.apache.commons.math.stat.descriptive;
19
20import java.io.Serializable;
21import java.util.Collection;
22import java.util.Iterator;
23
24/**
25 * <p>
26 * An aggregator for {@code SummaryStatistics} from several data sets or
27 * data set partitions.  In its simplest usage mode, the client creates an
28 * instance via the zero-argument constructor, then uses
29 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
30 * for each individual data set / partition.  The per-set statistics objects
31 * are used as normal, and at any time the aggregate statistics for all the
32 * contributors can be obtained from this object.
33 * </p><p>
34 * Clients with specialized requirements can use alternative constructors to
35 * control the statistics implementations and initial values used by the
36 * contributing and the internal aggregate {@code SummaryStatistics} objects.
37 * </p><p>
38 * A static {@link #aggregate(Collection)} method is also included that computes
39 * aggregate statistics directly from a Collection of SummaryStatistics instances.
40 * </p><p>
41 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
42 * instances to be aggregated concurrently, the created instances'
43 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
44 * instance maintained by this class.  In multithreaded environments, if the functionality
45 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
46 * to avoid unecessary computation and synchronization delays.</p>
47 *
48 * @since 2.0
49 * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $
50 *
51 */
52public class AggregateSummaryStatistics implements StatisticalSummary,
53        Serializable {
54
55
56    /** Serializable version identifier */
57    private static final long serialVersionUID = -8207112444016386906L;
58
59    /**
60     * A SummaryStatistics serving as a prototype for creating SummaryStatistics
61     * contributing to this aggregate
62     */
63    private final SummaryStatistics statisticsPrototype;
64
65    /**
66     * The SummaryStatistics in which aggregate statistics are accumulated.
67     */
68    private final SummaryStatistics statistics;
69
70    /**
71     * Initializes a new AggregateSummaryStatistics with default statistics
72     * implementations.
73     *
74     */
75    public AggregateSummaryStatistics() {
76        this(new SummaryStatistics());
77    }
78
79    /**
80     * Initializes a new AggregateSummaryStatistics with the specified statistics
81     * object as a prototype for contributing statistics and for the internal
82     * aggregate statistics.  This provides for customized statistics implementations
83     * to be used by contributing and aggregate statistics.
84     *
85     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
86     *      prototype both for the internal aggregate statistics and for
87     *      contributing statistics obtained via the
88     *      {@code createContributingStatistics()} method.  Being a prototype
89     *      means that other objects are initialized by copying this object's state.
90     *      If {@code null}, a new, default statistics object is used.  Any statistic
91     *      values in the prototype are propagated to contributing statistics
92     *      objects and (once) into these aggregate statistics.
93     * @see #createContributingStatistics()
94     */
95    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) {
96        this(prototypeStatistics,
97             prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
98    }
99
100    /**
101     * Initializes a new AggregateSummaryStatistics with the specified statistics
102     * object as a prototype for contributing statistics and for the internal
103     * aggregate statistics.  This provides for different statistics implementations
104     * to be used by contributing and aggregate statistics and for an initial
105     * state to be supplied for the aggregate statistics.
106     *
107     * @param prototypeStatistics a {@code SummaryStatistics} serving as a
108     *      prototype both for the internal aggregate statistics and for
109     *      contributing statistics obtained via the
110     *      {@code createContributingStatistics()} method.  Being a prototype
111     *      means that other objects are initialized by copying this object's state.
112     *      If {@code null}, a new, default statistics object is used.  Any statistic
113     *      values in the prototype are propagated to contributing statistics
114     *      objects, but not into these aggregate statistics.
115     * @param initialStatistics a {@code SummaryStatistics} to serve as the
116     *      internal aggregate statistics object.  If {@code null}, a new, default
117     *      statistics object is used.
118     * @see #createContributingStatistics()
119     */
120    public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
121                                      SummaryStatistics initialStatistics) {
122        this.statisticsPrototype =
123            (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
124        this.statistics =
125            (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
126    }
127
128    /**
129     * {@inheritDoc}.  This version returns the maximum over all the aggregated
130     * data.
131     *
132     * @see StatisticalSummary#getMax()
133     */
134    public double getMax() {
135        synchronized (statistics) {
136            return statistics.getMax();
137        }
138    }
139
140    /**
141     * {@inheritDoc}.  This version returns the mean of all the aggregated data.
142     *
143     * @see StatisticalSummary#getMean()
144     */
145    public double getMean() {
146        synchronized (statistics) {
147            return statistics.getMean();
148        }
149    }
150
151    /**
152     * {@inheritDoc}.  This version returns the minimum over all the aggregated
153     * data.
154     *
155     * @see StatisticalSummary#getMin()
156     */
157    public double getMin() {
158        synchronized (statistics) {
159            return statistics.getMin();
160        }
161    }
162
163    /**
164     * {@inheritDoc}.  This version returns a count of all the aggregated data.
165     *
166     * @see StatisticalSummary#getN()
167     */
168    public long getN() {
169        synchronized (statistics) {
170            return statistics.getN();
171        }
172    }
173
174    /**
175     * {@inheritDoc}.  This version returns the standard deviation of all the
176     * aggregated data.
177     *
178     * @see StatisticalSummary#getStandardDeviation()
179     */
180    public double getStandardDeviation() {
181        synchronized (statistics) {
182            return statistics.getStandardDeviation();
183        }
184    }
185
186    /**
187     * {@inheritDoc}.  This version returns a sum of all the aggregated data.
188     *
189     * @see StatisticalSummary#getSum()
190     */
191    public double getSum() {
192        synchronized (statistics) {
193            return statistics.getSum();
194        }
195    }
196
197    /**
198     * {@inheritDoc}.  This version returns the variance of all the aggregated
199     * data.
200     *
201     * @see StatisticalSummary#getVariance()
202     */
203    public double getVariance() {
204        synchronized (statistics) {
205            return statistics.getVariance();
206        }
207    }
208
209    /**
210     * Returns the sum of the logs of all the aggregated data.
211     *
212     * @return the sum of logs
213     * @see SummaryStatistics#getSumOfLogs()
214     */
215    public double getSumOfLogs() {
216        synchronized (statistics) {
217            return statistics.getSumOfLogs();
218        }
219    }
220
221    /**
222     * Returns the geometric mean of all the aggregated data.
223     *
224     * @return the geometric mean
225     * @see SummaryStatistics#getGeometricMean()
226     */
227    public double getGeometricMean() {
228        synchronized (statistics) {
229            return statistics.getGeometricMean();
230        }
231    }
232
233    /**
234     * Returns the sum of the squares of all the aggregated data.
235     *
236     * @return The sum of squares
237     * @see SummaryStatistics#getSumsq()
238     */
239    public double getSumsq() {
240        synchronized (statistics) {
241            return statistics.getSumsq();
242        }
243    }
244
245    /**
246     * Returns a statistic related to the Second Central Moment.  Specifically,
247     * what is returned is the sum of squared deviations from the sample mean
248     * among the all of the aggregated data.
249     *
250     * @return second central moment statistic
251     * @see SummaryStatistics#getSecondMoment()
252     */
253    public double getSecondMoment() {
254        synchronized (statistics) {
255            return statistics.getSecondMoment();
256        }
257    }
258
259    /**
260     * Return a {@link StatisticalSummaryValues} instance reporting current
261     * aggregate statistics.
262     *
263     * @return Current values of aggregate statistics
264     */
265    public StatisticalSummary getSummary() {
266        synchronized (statistics) {
267            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
268                    getMax(), getMin(), getSum());
269        }
270    }
271
272    /**
273     * Creates and returns a {@code SummaryStatistics} whose data will be
274     * aggregated with those of this {@code AggregateSummaryStatistics}.
275     *
276     * @return a {@code SummaryStatistics} whose data will be aggregated with
277     *      those of this {@code AggregateSummaryStatistics}.  The initial state
278     *      is a copy of the configured prototype statistics.
279     */
280    public SummaryStatistics createContributingStatistics() {
281        SummaryStatistics contributingStatistics
282                = new AggregatingSummaryStatistics(statistics);
283
284        SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
285
286        return contributingStatistics;
287    }
288
289    /**
290     * Computes aggregate summary statistics. This method can be used to combine statistics
291     * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
292     * should contain the same values that would have been obtained by computing a single
293     * StatisticalSummary over the combined dataset.
294     * <p>
295     * Returns null if the collection is empty or null.
296     * </p>
297     *
298     * @param statistics collection of SummaryStatistics to aggregate
299     * @return summary statistics for the combined dataset
300     */
301    public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
302        if (statistics == null) {
303            return null;
304        }
305        Iterator<SummaryStatistics> iterator = statistics.iterator();
306        if (!iterator.hasNext()) {
307            return null;
308        }
309        SummaryStatistics current = iterator.next();
310        long n = current.getN();
311        double min = current.getMin();
312        double sum = current.getSum();
313        double max = current.getMax();
314        double m2 = current.getSecondMoment();
315        double mean = current.getMean();
316        while (iterator.hasNext()) {
317            current = iterator.next();
318            if (current.getMin() < min || Double.isNaN(min)) {
319                min = current.getMin();
320            }
321            if (current.getMax() > max || Double.isNaN(max)) {
322                max = current.getMax();
323            }
324            sum += current.getSum();
325            final double oldN = n;
326            final double curN = current.getN();
327            n += curN;
328            final double meanDiff = current.getMean() - mean;
329            mean = sum / n;
330            m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
331        }
332        final double variance;
333        if (n == 0) {
334            variance = Double.NaN;
335        } else if (n == 1) {
336            variance = 0d;
337        } else {
338            variance = m2 / (n - 1);
339        }
340        return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
341    }
342
343    /**
344     * A SummaryStatistics that also forwards all values added to it to a second
345     * {@code SummaryStatistics} for aggregation.
346     *
347     * @since 2.0
348     */
349    private static class AggregatingSummaryStatistics extends SummaryStatistics {
350
351        /**
352         * The serialization version of this class
353         */
354        private static final long serialVersionUID = 1L;
355
356        /**
357         * An additional SummaryStatistics into which values added to these
358         * statistics (and possibly others) are aggregated
359         */
360        private final SummaryStatistics aggregateStatistics;
361
362        /**
363         * Initializes a new AggregatingSummaryStatistics with the specified
364         * aggregate statistics object
365         *
366         * @param aggregateStatistics a {@code SummaryStatistics} into which
367         *      values added to this statistics object should be aggregated
368         */
369        public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
370            this.aggregateStatistics = aggregateStatistics;
371        }
372
373        /**
374         * {@inheritDoc}.  This version adds the provided value to the configured
375         * aggregate after adding it to these statistics.
376         *
377         * @see SummaryStatistics#addValue(double)
378         */
379        @Override
380        public void addValue(double value) {
381            super.addValue(value);
382            synchronized (aggregateStatistics) {
383                aggregateStatistics.addValue(value);
384            }
385        }
386
387        /**
388         * Returns true iff <code>object</code> is a
389         * <code>SummaryStatistics</code> instance and all statistics have the
390         * same values as this.
391         * @param object the object to test equality against.
392         * @return true if object equals this
393         */
394        @Override
395        public boolean equals(Object object) {
396            if (object == this) {
397                return true;
398            }
399            if (object instanceof AggregatingSummaryStatistics == false) {
400                return false;
401            }
402            AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
403            return super.equals(stat) &&
404                   aggregateStatistics.equals(stat.aggregateStatistics);
405        }
406
407        /**
408         * Returns hash code based on values of statistics
409         * @return hash code
410         */
411        @Override
412        public int hashCode() {
413            return 123 + super.hashCode() + aggregateStatistics.hashCode();
414        }
415    }
416}
417