1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package org.apache.commons.math.stat.descriptive; 19 20import java.io.Serializable; 21import java.util.Collection; 22import java.util.Iterator; 23 24/** 25 * <p> 26 * An aggregator for {@code SummaryStatistics} from several data sets or 27 * data set partitions. In its simplest usage mode, the client creates an 28 * instance via the zero-argument constructor, then uses 29 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} 30 * for each individual data set / partition. The per-set statistics objects 31 * are used as normal, and at any time the aggregate statistics for all the 32 * contributors can be obtained from this object. 33 * </p><p> 34 * Clients with specialized requirements can use alternative constructors to 35 * control the statistics implementations and initial values used by the 36 * contributing and the internal aggregate {@code SummaryStatistics} objects. 37 * </p><p> 38 * A static {@link #aggregate(Collection)} method is also included that computes 39 * aggregate statistics directly from a Collection of SummaryStatistics instances. 40 * </p><p> 41 * When {@link #createContributingStatistics()} is used to create SummaryStatistics 42 * instances to be aggregated concurrently, the created instances' 43 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating 44 * instance maintained by this class. In multithreaded environments, if the functionality 45 * provided by {@link #aggregate(Collection)} is adequate, that method should be used 46 * to avoid unecessary computation and synchronization delays.</p> 47 * 48 * @since 2.0 49 * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $ 50 * 51 */ 52public class AggregateSummaryStatistics implements StatisticalSummary, 53 Serializable { 54 55 56 /** Serializable version identifier */ 57 private static final long serialVersionUID = -8207112444016386906L; 58 59 /** 60 * A SummaryStatistics serving as a prototype for creating SummaryStatistics 61 * contributing to this aggregate 62 */ 63 private final SummaryStatistics statisticsPrototype; 64 65 /** 66 * The SummaryStatistics in which aggregate statistics are accumulated. 67 */ 68 private final SummaryStatistics statistics; 69 70 /** 71 * Initializes a new AggregateSummaryStatistics with default statistics 72 * implementations. 73 * 74 */ 75 public AggregateSummaryStatistics() { 76 this(new SummaryStatistics()); 77 } 78 79 /** 80 * Initializes a new AggregateSummaryStatistics with the specified statistics 81 * object as a prototype for contributing statistics and for the internal 82 * aggregate statistics. This provides for customized statistics implementations 83 * to be used by contributing and aggregate statistics. 84 * 85 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 86 * prototype both for the internal aggregate statistics and for 87 * contributing statistics obtained via the 88 * {@code createContributingStatistics()} method. Being a prototype 89 * means that other objects are initialized by copying this object's state. 90 * If {@code null}, a new, default statistics object is used. Any statistic 91 * values in the prototype are propagated to contributing statistics 92 * objects and (once) into these aggregate statistics. 93 * @see #createContributingStatistics() 94 */ 95 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) { 96 this(prototypeStatistics, 97 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); 98 } 99 100 /** 101 * Initializes a new AggregateSummaryStatistics with the specified statistics 102 * object as a prototype for contributing statistics and for the internal 103 * aggregate statistics. This provides for different statistics implementations 104 * to be used by contributing and aggregate statistics and for an initial 105 * state to be supplied for the aggregate statistics. 106 * 107 * @param prototypeStatistics a {@code SummaryStatistics} serving as a 108 * prototype both for the internal aggregate statistics and for 109 * contributing statistics obtained via the 110 * {@code createContributingStatistics()} method. Being a prototype 111 * means that other objects are initialized by copying this object's state. 112 * If {@code null}, a new, default statistics object is used. Any statistic 113 * values in the prototype are propagated to contributing statistics 114 * objects, but not into these aggregate statistics. 115 * @param initialStatistics a {@code SummaryStatistics} to serve as the 116 * internal aggregate statistics object. If {@code null}, a new, default 117 * statistics object is used. 118 * @see #createContributingStatistics() 119 */ 120 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, 121 SummaryStatistics initialStatistics) { 122 this.statisticsPrototype = 123 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; 124 this.statistics = 125 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; 126 } 127 128 /** 129 * {@inheritDoc}. This version returns the maximum over all the aggregated 130 * data. 131 * 132 * @see StatisticalSummary#getMax() 133 */ 134 public double getMax() { 135 synchronized (statistics) { 136 return statistics.getMax(); 137 } 138 } 139 140 /** 141 * {@inheritDoc}. This version returns the mean of all the aggregated data. 142 * 143 * @see StatisticalSummary#getMean() 144 */ 145 public double getMean() { 146 synchronized (statistics) { 147 return statistics.getMean(); 148 } 149 } 150 151 /** 152 * {@inheritDoc}. This version returns the minimum over all the aggregated 153 * data. 154 * 155 * @see StatisticalSummary#getMin() 156 */ 157 public double getMin() { 158 synchronized (statistics) { 159 return statistics.getMin(); 160 } 161 } 162 163 /** 164 * {@inheritDoc}. This version returns a count of all the aggregated data. 165 * 166 * @see StatisticalSummary#getN() 167 */ 168 public long getN() { 169 synchronized (statistics) { 170 return statistics.getN(); 171 } 172 } 173 174 /** 175 * {@inheritDoc}. This version returns the standard deviation of all the 176 * aggregated data. 177 * 178 * @see StatisticalSummary#getStandardDeviation() 179 */ 180 public double getStandardDeviation() { 181 synchronized (statistics) { 182 return statistics.getStandardDeviation(); 183 } 184 } 185 186 /** 187 * {@inheritDoc}. This version returns a sum of all the aggregated data. 188 * 189 * @see StatisticalSummary#getSum() 190 */ 191 public double getSum() { 192 synchronized (statistics) { 193 return statistics.getSum(); 194 } 195 } 196 197 /** 198 * {@inheritDoc}. This version returns the variance of all the aggregated 199 * data. 200 * 201 * @see StatisticalSummary#getVariance() 202 */ 203 public double getVariance() { 204 synchronized (statistics) { 205 return statistics.getVariance(); 206 } 207 } 208 209 /** 210 * Returns the sum of the logs of all the aggregated data. 211 * 212 * @return the sum of logs 213 * @see SummaryStatistics#getSumOfLogs() 214 */ 215 public double getSumOfLogs() { 216 synchronized (statistics) { 217 return statistics.getSumOfLogs(); 218 } 219 } 220 221 /** 222 * Returns the geometric mean of all the aggregated data. 223 * 224 * @return the geometric mean 225 * @see SummaryStatistics#getGeometricMean() 226 */ 227 public double getGeometricMean() { 228 synchronized (statistics) { 229 return statistics.getGeometricMean(); 230 } 231 } 232 233 /** 234 * Returns the sum of the squares of all the aggregated data. 235 * 236 * @return The sum of squares 237 * @see SummaryStatistics#getSumsq() 238 */ 239 public double getSumsq() { 240 synchronized (statistics) { 241 return statistics.getSumsq(); 242 } 243 } 244 245 /** 246 * Returns a statistic related to the Second Central Moment. Specifically, 247 * what is returned is the sum of squared deviations from the sample mean 248 * among the all of the aggregated data. 249 * 250 * @return second central moment statistic 251 * @see SummaryStatistics#getSecondMoment() 252 */ 253 public double getSecondMoment() { 254 synchronized (statistics) { 255 return statistics.getSecondMoment(); 256 } 257 } 258 259 /** 260 * Return a {@link StatisticalSummaryValues} instance reporting current 261 * aggregate statistics. 262 * 263 * @return Current values of aggregate statistics 264 */ 265 public StatisticalSummary getSummary() { 266 synchronized (statistics) { 267 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 268 getMax(), getMin(), getSum()); 269 } 270 } 271 272 /** 273 * Creates and returns a {@code SummaryStatistics} whose data will be 274 * aggregated with those of this {@code AggregateSummaryStatistics}. 275 * 276 * @return a {@code SummaryStatistics} whose data will be aggregated with 277 * those of this {@code AggregateSummaryStatistics}. The initial state 278 * is a copy of the configured prototype statistics. 279 */ 280 public SummaryStatistics createContributingStatistics() { 281 SummaryStatistics contributingStatistics 282 = new AggregatingSummaryStatistics(statistics); 283 284 SummaryStatistics.copy(statisticsPrototype, contributingStatistics); 285 286 return contributingStatistics; 287 } 288 289 /** 290 * Computes aggregate summary statistics. This method can be used to combine statistics 291 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned 292 * should contain the same values that would have been obtained by computing a single 293 * StatisticalSummary over the combined dataset. 294 * <p> 295 * Returns null if the collection is empty or null. 296 * </p> 297 * 298 * @param statistics collection of SummaryStatistics to aggregate 299 * @return summary statistics for the combined dataset 300 */ 301 public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) { 302 if (statistics == null) { 303 return null; 304 } 305 Iterator<SummaryStatistics> iterator = statistics.iterator(); 306 if (!iterator.hasNext()) { 307 return null; 308 } 309 SummaryStatistics current = iterator.next(); 310 long n = current.getN(); 311 double min = current.getMin(); 312 double sum = current.getSum(); 313 double max = current.getMax(); 314 double m2 = current.getSecondMoment(); 315 double mean = current.getMean(); 316 while (iterator.hasNext()) { 317 current = iterator.next(); 318 if (current.getMin() < min || Double.isNaN(min)) { 319 min = current.getMin(); 320 } 321 if (current.getMax() > max || Double.isNaN(max)) { 322 max = current.getMax(); 323 } 324 sum += current.getSum(); 325 final double oldN = n; 326 final double curN = current.getN(); 327 n += curN; 328 final double meanDiff = current.getMean() - mean; 329 mean = sum / n; 330 m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; 331 } 332 final double variance; 333 if (n == 0) { 334 variance = Double.NaN; 335 } else if (n == 1) { 336 variance = 0d; 337 } else { 338 variance = m2 / (n - 1); 339 } 340 return new StatisticalSummaryValues(mean, variance, n, max, min, sum); 341 } 342 343 /** 344 * A SummaryStatistics that also forwards all values added to it to a second 345 * {@code SummaryStatistics} for aggregation. 346 * 347 * @since 2.0 348 */ 349 private static class AggregatingSummaryStatistics extends SummaryStatistics { 350 351 /** 352 * The serialization version of this class 353 */ 354 private static final long serialVersionUID = 1L; 355 356 /** 357 * An additional SummaryStatistics into which values added to these 358 * statistics (and possibly others) are aggregated 359 */ 360 private final SummaryStatistics aggregateStatistics; 361 362 /** 363 * Initializes a new AggregatingSummaryStatistics with the specified 364 * aggregate statistics object 365 * 366 * @param aggregateStatistics a {@code SummaryStatistics} into which 367 * values added to this statistics object should be aggregated 368 */ 369 public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { 370 this.aggregateStatistics = aggregateStatistics; 371 } 372 373 /** 374 * {@inheritDoc}. This version adds the provided value to the configured 375 * aggregate after adding it to these statistics. 376 * 377 * @see SummaryStatistics#addValue(double) 378 */ 379 @Override 380 public void addValue(double value) { 381 super.addValue(value); 382 synchronized (aggregateStatistics) { 383 aggregateStatistics.addValue(value); 384 } 385 } 386 387 /** 388 * Returns true iff <code>object</code> is a 389 * <code>SummaryStatistics</code> instance and all statistics have the 390 * same values as this. 391 * @param object the object to test equality against. 392 * @return true if object equals this 393 */ 394 @Override 395 public boolean equals(Object object) { 396 if (object == this) { 397 return true; 398 } 399 if (object instanceof AggregatingSummaryStatistics == false) { 400 return false; 401 } 402 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; 403 return super.equals(stat) && 404 aggregateStatistics.equals(stat.aggregateStatistics); 405 } 406 407 /** 408 * Returns hash code based on values of statistics 409 * @return hash code 410 */ 411 @Override 412 public int hashCode() { 413 return 123 + super.hashCode() + aggregateStatistics.hashCode(); 414 } 415 } 416} 417