1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17package org.apache.commons.math.stat.descriptive; 18 19import java.io.Serializable; 20 21import org.apache.commons.math.MathRuntimeException; 22import org.apache.commons.math.exception.util.LocalizedFormats; 23import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 24import org.apache.commons.math.stat.descriptive.moment.Mean; 25import org.apache.commons.math.stat.descriptive.moment.SecondMoment; 26import org.apache.commons.math.stat.descriptive.moment.Variance; 27import org.apache.commons.math.stat.descriptive.rank.Max; 28import org.apache.commons.math.stat.descriptive.rank.Min; 29import org.apache.commons.math.stat.descriptive.summary.Sum; 30import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 31import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 32import org.apache.commons.math.util.MathUtils; 33import org.apache.commons.math.util.FastMath; 34 35/** 36 * <p> 37 * Computes summary statistics for a stream of data values added using the 38 * {@link #addValue(double) addValue} method. The data values are not stored in 39 * memory, so this class can be used to compute statistics for very large data 40 * streams. 41 * </p> 42 * <p> 43 * The {@link StorelessUnivariateStatistic} instances used to maintain summary 44 * state and compute statistics are configurable via setters. For example, the 45 * default implementation for the variance can be overridden by calling 46 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to 47 * these methods must implement the {@link StorelessUnivariateStatistic} 48 * interface and configuration must be completed before <code>addValue</code> 49 * is called. No configuration is necessary to use the default, commons-math 50 * provided implementations. 51 * </p> 52 * <p> 53 * Note: This class is not thread-safe. Use 54 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple 55 * threads is required. 56 * </p> 57 * @version $Revision: 1042376 $ $Date: 2010-12-05 16:54:55 +0100 (dim. 05 déc. 2010) $ 58 */ 59public class SummaryStatistics implements StatisticalSummary, Serializable { 60 61 /** Serialization UID */ 62 private static final long serialVersionUID = -2021321786743555871L; 63 64 /** count of values that have been added */ 65 protected long n = 0; 66 67 /** SecondMoment is used to compute the mean and variance */ 68 protected SecondMoment secondMoment = new SecondMoment(); 69 70 /** sum of values that have been added */ 71 protected Sum sum = new Sum(); 72 73 /** sum of the square of each value that has been added */ 74 protected SumOfSquares sumsq = new SumOfSquares(); 75 76 /** min of values that have been added */ 77 protected Min min = new Min(); 78 79 /** max of values that have been added */ 80 protected Max max = new Max(); 81 82 /** sumLog of values that have been added */ 83 protected SumOfLogs sumLog = new SumOfLogs(); 84 85 /** geoMean of values that have been added */ 86 protected GeometricMean geoMean = new GeometricMean(sumLog); 87 88 /** mean of values that have been added */ 89 protected Mean mean = new Mean(); 90 91 /** variance of values that have been added */ 92 protected Variance variance = new Variance(); 93 94 /** Sum statistic implementation - can be reset by setter. */ 95 private StorelessUnivariateStatistic sumImpl = sum; 96 97 /** Sum of squares statistic implementation - can be reset by setter. */ 98 private StorelessUnivariateStatistic sumsqImpl = sumsq; 99 100 /** Minimum statistic implementation - can be reset by setter. */ 101 private StorelessUnivariateStatistic minImpl = min; 102 103 /** Maximum statistic implementation - can be reset by setter. */ 104 private StorelessUnivariateStatistic maxImpl = max; 105 106 /** Sum of log statistic implementation - can be reset by setter. */ 107 private StorelessUnivariateStatistic sumLogImpl = sumLog; 108 109 /** Geometric mean statistic implementation - can be reset by setter. */ 110 private StorelessUnivariateStatistic geoMeanImpl = geoMean; 111 112 /** Mean statistic implementation - can be reset by setter. */ 113 private StorelessUnivariateStatistic meanImpl = mean; 114 115 /** Variance statistic implementation - can be reset by setter. */ 116 private StorelessUnivariateStatistic varianceImpl = variance; 117 118 /** 119 * Construct a SummaryStatistics instance 120 */ 121 public SummaryStatistics() { 122 } 123 124 /** 125 * A copy constructor. Creates a deep-copy of the {@code original}. 126 * 127 * @param original the {@code SummaryStatistics} instance to copy 128 */ 129 public SummaryStatistics(SummaryStatistics original) { 130 copy(original, this); 131 } 132 133 /** 134 * Return a {@link StatisticalSummaryValues} instance reporting current 135 * statistics. 136 * @return Current values of statistics 137 */ 138 public StatisticalSummary getSummary() { 139 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 140 getMax(), getMin(), getSum()); 141 } 142 143 /** 144 * Add a value to the data 145 * @param value the value to add 146 */ 147 public void addValue(double value) { 148 sumImpl.increment(value); 149 sumsqImpl.increment(value); 150 minImpl.increment(value); 151 maxImpl.increment(value); 152 sumLogImpl.increment(value); 153 secondMoment.increment(value); 154 // If mean, variance or geomean have been overridden, 155 // need to increment these 156 if (!(meanImpl instanceof Mean)) { 157 meanImpl.increment(value); 158 } 159 if (!(varianceImpl instanceof Variance)) { 160 varianceImpl.increment(value); 161 } 162 if (!(geoMeanImpl instanceof GeometricMean)) { 163 geoMeanImpl.increment(value); 164 } 165 n++; 166 } 167 168 /** 169 * Returns the number of available values 170 * @return The number of available values 171 */ 172 public long getN() { 173 return n; 174 } 175 176 /** 177 * Returns the sum of the values that have been added 178 * @return The sum or <code>Double.NaN</code> if no values have been added 179 */ 180 public double getSum() { 181 return sumImpl.getResult(); 182 } 183 184 /** 185 * Returns the sum of the squares of the values that have been added. 186 * <p> 187 * Double.NaN is returned if no values have been added. 188 * </p> 189 * @return The sum of squares 190 */ 191 public double getSumsq() { 192 return sumsqImpl.getResult(); 193 } 194 195 /** 196 * Returns the mean of the values that have been added. 197 * <p> 198 * Double.NaN is returned if no values have been added. 199 * </p> 200 * @return the mean 201 */ 202 public double getMean() { 203 if (mean == meanImpl) { 204 return new Mean(secondMoment).getResult(); 205 } else { 206 return meanImpl.getResult(); 207 } 208 } 209 210 /** 211 * Returns the standard deviation of the values that have been added. 212 * <p> 213 * Double.NaN is returned if no values have been added. 214 * </p> 215 * @return the standard deviation 216 */ 217 public double getStandardDeviation() { 218 double stdDev = Double.NaN; 219 if (getN() > 0) { 220 if (getN() > 1) { 221 stdDev = FastMath.sqrt(getVariance()); 222 } else { 223 stdDev = 0.0; 224 } 225 } 226 return stdDev; 227 } 228 229 /** 230 * Returns the variance of the values that have been added. 231 * <p> 232 * Double.NaN is returned if no values have been added. 233 * </p> 234 * @return the variance 235 */ 236 public double getVariance() { 237 if (varianceImpl == variance) { 238 return new Variance(secondMoment).getResult(); 239 } else { 240 return varianceImpl.getResult(); 241 } 242 } 243 244 /** 245 * Returns the maximum of the values that have been added. 246 * <p> 247 * Double.NaN is returned if no values have been added. 248 * </p> 249 * @return the maximum 250 */ 251 public double getMax() { 252 return maxImpl.getResult(); 253 } 254 255 /** 256 * Returns the minimum of the values that have been added. 257 * <p> 258 * Double.NaN is returned if no values have been added. 259 * </p> 260 * @return the minimum 261 */ 262 public double getMin() { 263 return minImpl.getResult(); 264 } 265 266 /** 267 * Returns the geometric mean of the values that have been added. 268 * <p> 269 * Double.NaN is returned if no values have been added. 270 * </p> 271 * @return the geometric mean 272 */ 273 public double getGeometricMean() { 274 return geoMeanImpl.getResult(); 275 } 276 277 /** 278 * Returns the sum of the logs of the values that have been added. 279 * <p> 280 * Double.NaN is returned if no values have been added. 281 * </p> 282 * @return the sum of logs 283 * @since 1.2 284 */ 285 public double getSumOfLogs() { 286 return sumLogImpl.getResult(); 287 } 288 289 /** 290 * Returns a statistic related to the Second Central Moment. Specifically, 291 * what is returned is the sum of squared deviations from the sample mean 292 * among the values that have been added. 293 * <p> 294 * Returns <code>Double.NaN</code> if no data values have been added and 295 * returns <code>0</code> if there is just one value in the data set.</p> 296 * <p> 297 * @return second central moment statistic 298 * @since 2.0 299 */ 300 public double getSecondMoment() { 301 return secondMoment.getResult(); 302 } 303 304 /** 305 * Generates a text report displaying summary statistics from values that 306 * have been added. 307 * @return String with line feeds displaying statistics 308 * @since 1.2 309 */ 310 @Override 311 public String toString() { 312 StringBuilder outBuffer = new StringBuilder(); 313 String endl = "\n"; 314 outBuffer.append("SummaryStatistics:").append(endl); 315 outBuffer.append("n: ").append(getN()).append(endl); 316 outBuffer.append("min: ").append(getMin()).append(endl); 317 outBuffer.append("max: ").append(getMax()).append(endl); 318 outBuffer.append("mean: ").append(getMean()).append(endl); 319 outBuffer.append("geometric mean: ").append(getGeometricMean()) 320 .append(endl); 321 outBuffer.append("variance: ").append(getVariance()).append(endl); 322 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl); 323 outBuffer.append("standard deviation: ").append(getStandardDeviation()) 324 .append(endl); 325 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl); 326 return outBuffer.toString(); 327 } 328 329 /** 330 * Resets all statistics and storage 331 */ 332 public void clear() { 333 this.n = 0; 334 minImpl.clear(); 335 maxImpl.clear(); 336 sumImpl.clear(); 337 sumLogImpl.clear(); 338 sumsqImpl.clear(); 339 geoMeanImpl.clear(); 340 secondMoment.clear(); 341 if (meanImpl != mean) { 342 meanImpl.clear(); 343 } 344 if (varianceImpl != variance) { 345 varianceImpl.clear(); 346 } 347 } 348 349 /** 350 * Returns true iff <code>object</code> is a 351 * <code>SummaryStatistics</code> instance and all statistics have the 352 * same values as this. 353 * @param object the object to test equality against. 354 * @return true if object equals this 355 */ 356 @Override 357 public boolean equals(Object object) { 358 if (object == this) { 359 return true; 360 } 361 if (object instanceof SummaryStatistics == false) { 362 return false; 363 } 364 SummaryStatistics stat = (SummaryStatistics)object; 365 return MathUtils.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) && 366 MathUtils.equalsIncludingNaN(stat.getMax(), getMax()) && 367 MathUtils.equalsIncludingNaN(stat.getMean(), getMean()) && 368 MathUtils.equalsIncludingNaN(stat.getMin(), getMin()) && 369 MathUtils.equalsIncludingNaN(stat.getN(), getN()) && 370 MathUtils.equalsIncludingNaN(stat.getSum(), getSum()) && 371 MathUtils.equalsIncludingNaN(stat.getSumsq(), getSumsq()) && 372 MathUtils.equalsIncludingNaN(stat.getVariance(), getVariance()); 373 } 374 375 /** 376 * Returns hash code based on values of statistics 377 * @return hash code 378 */ 379 @Override 380 public int hashCode() { 381 int result = 31 + MathUtils.hash(getGeometricMean()); 382 result = result * 31 + MathUtils.hash(getGeometricMean()); 383 result = result * 31 + MathUtils.hash(getMax()); 384 result = result * 31 + MathUtils.hash(getMean()); 385 result = result * 31 + MathUtils.hash(getMin()); 386 result = result * 31 + MathUtils.hash(getN()); 387 result = result * 31 + MathUtils.hash(getSum()); 388 result = result * 31 + MathUtils.hash(getSumsq()); 389 result = result * 31 + MathUtils.hash(getVariance()); 390 return result; 391 } 392 393 // Getters and setters for statistics implementations 394 /** 395 * Returns the currently configured Sum implementation 396 * @return the StorelessUnivariateStatistic implementing the sum 397 * @since 1.2 398 */ 399 public StorelessUnivariateStatistic getSumImpl() { 400 return sumImpl; 401 } 402 403 /** 404 * <p> 405 * Sets the implementation for the Sum. 406 * </p> 407 * <p> 408 * This method must be activated before any data has been added - i.e., 409 * before {@link #addValue(double) addValue} has been used to add data; 410 * otherwise an IllegalStateException will be thrown. 411 * </p> 412 * @param sumImpl the StorelessUnivariateStatistic instance to use for 413 * computing the Sum 414 * @throws IllegalStateException if data has already been added (i.e if n > 415 * 0) 416 * @since 1.2 417 */ 418 public void setSumImpl(StorelessUnivariateStatistic sumImpl) { 419 checkEmpty(); 420 this.sumImpl = sumImpl; 421 } 422 423 /** 424 * Returns the currently configured sum of squares implementation 425 * @return the StorelessUnivariateStatistic implementing the sum of squares 426 * @since 1.2 427 */ 428 public StorelessUnivariateStatistic getSumsqImpl() { 429 return sumsqImpl; 430 } 431 432 /** 433 * <p> 434 * Sets the implementation for the sum of squares. 435 * </p> 436 * <p> 437 * This method must be activated before any data has been added - i.e., 438 * before {@link #addValue(double) addValue} has been used to add data; 439 * otherwise an IllegalStateException will be thrown. 440 * </p> 441 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for 442 * computing the sum of squares 443 * @throws IllegalStateException if data has already been added (i.e if n > 444 * 0) 445 * @since 1.2 446 */ 447 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) { 448 checkEmpty(); 449 this.sumsqImpl = sumsqImpl; 450 } 451 452 /** 453 * Returns the currently configured minimum implementation 454 * @return the StorelessUnivariateStatistic implementing the minimum 455 * @since 1.2 456 */ 457 public StorelessUnivariateStatistic getMinImpl() { 458 return minImpl; 459 } 460 461 /** 462 * <p> 463 * Sets the implementation for the minimum. 464 * </p> 465 * <p> 466 * This method must be activated before any data has been added - i.e., 467 * before {@link #addValue(double) addValue} has been used to add data; 468 * otherwise an IllegalStateException will be thrown. 469 * </p> 470 * @param minImpl the StorelessUnivariateStatistic instance to use for 471 * computing the minimum 472 * @throws IllegalStateException if data has already been added (i.e if n > 473 * 0) 474 * @since 1.2 475 */ 476 public void setMinImpl(StorelessUnivariateStatistic minImpl) { 477 checkEmpty(); 478 this.minImpl = minImpl; 479 } 480 481 /** 482 * Returns the currently configured maximum implementation 483 * @return the StorelessUnivariateStatistic implementing the maximum 484 * @since 1.2 485 */ 486 public StorelessUnivariateStatistic getMaxImpl() { 487 return maxImpl; 488 } 489 490 /** 491 * <p> 492 * Sets the implementation for the maximum. 493 * </p> 494 * <p> 495 * This method must be activated before any data has been added - i.e., 496 * before {@link #addValue(double) addValue} has been used to add data; 497 * otherwise an IllegalStateException will be thrown. 498 * </p> 499 * @param maxImpl the StorelessUnivariateStatistic instance to use for 500 * computing the maximum 501 * @throws IllegalStateException if data has already been added (i.e if n > 502 * 0) 503 * @since 1.2 504 */ 505 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) { 506 checkEmpty(); 507 this.maxImpl = maxImpl; 508 } 509 510 /** 511 * Returns the currently configured sum of logs implementation 512 * @return the StorelessUnivariateStatistic implementing the log sum 513 * @since 1.2 514 */ 515 public StorelessUnivariateStatistic getSumLogImpl() { 516 return sumLogImpl; 517 } 518 519 /** 520 * <p> 521 * Sets the implementation for the sum of logs. 522 * </p> 523 * <p> 524 * This method must be activated before any data has been added - i.e., 525 * before {@link #addValue(double) addValue} has been used to add data; 526 * otherwise an IllegalStateException will be thrown. 527 * </p> 528 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for 529 * computing the log sum 530 * @throws IllegalStateException if data has already been added (i.e if n > 531 * 0) 532 * @since 1.2 533 */ 534 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) { 535 checkEmpty(); 536 this.sumLogImpl = sumLogImpl; 537 geoMean.setSumLogImpl(sumLogImpl); 538 } 539 540 /** 541 * Returns the currently configured geometric mean implementation 542 * @return the StorelessUnivariateStatistic implementing the geometric mean 543 * @since 1.2 544 */ 545 public StorelessUnivariateStatistic getGeoMeanImpl() { 546 return geoMeanImpl; 547 } 548 549 /** 550 * <p> 551 * Sets the implementation for the geometric mean. 552 * </p> 553 * <p> 554 * This method must be activated before any data has been added - i.e., 555 * before {@link #addValue(double) addValue} has been used to add data; 556 * otherwise an IllegalStateException will be thrown. 557 * </p> 558 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for 559 * computing the geometric mean 560 * @throws IllegalStateException if data has already been added (i.e if n > 561 * 0) 562 * @since 1.2 563 */ 564 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) { 565 checkEmpty(); 566 this.geoMeanImpl = geoMeanImpl; 567 } 568 569 /** 570 * Returns the currently configured mean implementation 571 * @return the StorelessUnivariateStatistic implementing the mean 572 * @since 1.2 573 */ 574 public StorelessUnivariateStatistic getMeanImpl() { 575 return meanImpl; 576 } 577 578 /** 579 * <p> 580 * Sets the implementation for the mean. 581 * </p> 582 * <p> 583 * This method must be activated before any data has been added - i.e., 584 * before {@link #addValue(double) addValue} has been used to add data; 585 * otherwise an IllegalStateException will be thrown. 586 * </p> 587 * @param meanImpl the StorelessUnivariateStatistic instance to use for 588 * computing the mean 589 * @throws IllegalStateException if data has already been added (i.e if n > 590 * 0) 591 * @since 1.2 592 */ 593 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) { 594 checkEmpty(); 595 this.meanImpl = meanImpl; 596 } 597 598 /** 599 * Returns the currently configured variance implementation 600 * @return the StorelessUnivariateStatistic implementing the variance 601 * @since 1.2 602 */ 603 public StorelessUnivariateStatistic getVarianceImpl() { 604 return varianceImpl; 605 } 606 607 /** 608 * <p> 609 * Sets the implementation for the variance. 610 * </p> 611 * <p> 612 * This method must be activated before any data has been added - i.e., 613 * before {@link #addValue(double) addValue} has been used to add data; 614 * otherwise an IllegalStateException will be thrown. 615 * </p> 616 * @param varianceImpl the StorelessUnivariateStatistic instance to use for 617 * computing the variance 618 * @throws IllegalStateException if data has already been added (i.e if n > 619 * 0) 620 * @since 1.2 621 */ 622 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) { 623 checkEmpty(); 624 this.varianceImpl = varianceImpl; 625 } 626 627 /** 628 * Throws IllegalStateException if n > 0. 629 */ 630 private void checkEmpty() { 631 if (n > 0) { 632 throw MathRuntimeException.createIllegalStateException( 633 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, 634 n); 635 } 636 } 637 638 /** 639 * Returns a copy of this SummaryStatistics instance with the same internal state. 640 * 641 * @return a copy of this 642 */ 643 public SummaryStatistics copy() { 644 SummaryStatistics result = new SummaryStatistics(); 645 copy(this, result); 646 return result; 647 } 648 649 /** 650 * Copies source to dest. 651 * <p>Neither source nor dest can be null.</p> 652 * 653 * @param source SummaryStatistics to copy 654 * @param dest SummaryStatistics to copy to 655 * @throws NullPointerException if either source or dest is null 656 */ 657 public static void copy(SummaryStatistics source, SummaryStatistics dest) { 658 dest.maxImpl = source.maxImpl.copy(); 659 dest.meanImpl = source.meanImpl.copy(); 660 dest.minImpl = source.minImpl.copy(); 661 dest.sumImpl = source.sumImpl.copy(); 662 dest.varianceImpl = source.varianceImpl.copy(); 663 dest.sumLogImpl = source.sumLogImpl.copy(); 664 dest.sumsqImpl = source.sumsqImpl.copy(); 665 if (source.getGeoMeanImpl() instanceof GeometricMean) { 666 // Keep geoMeanImpl, sumLogImpl in synch 667 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl); 668 } else { 669 dest.geoMeanImpl = source.geoMeanImpl.copy(); 670 } 671 SecondMoment.copy(source.secondMoment, dest.secondMoment); 672 dest.n = source.n; 673 674 // Make sure that if stat == statImpl in source, same 675 // holds in dest; otherwise copy stat 676 if (source.geoMean == source.geoMeanImpl) { 677 dest.geoMean = (GeometricMean) dest.geoMeanImpl; 678 } else { 679 GeometricMean.copy(source.geoMean, dest.geoMean); 680 } 681 if (source.max == source.maxImpl) { 682 dest.max = (Max) dest.maxImpl; 683 } else { 684 Max.copy(source.max, dest.max); 685 } 686 if (source.mean == source.meanImpl) { 687 dest.mean = (Mean) dest.meanImpl; 688 } else { 689 Mean.copy(source.mean, dest.mean); 690 } 691 if (source.min == source.minImpl) { 692 dest.min = (Min) dest.minImpl; 693 } else { 694 Min.copy(source.min, dest.min); 695 } 696 if (source.sum == source.sumImpl) { 697 dest.sum = (Sum) dest.sumImpl; 698 } else { 699 Sum.copy(source.sum, dest.sum); 700 } 701 if (source.variance == source.varianceImpl) { 702 dest.variance = (Variance) dest.varianceImpl; 703 } else { 704 Variance.copy(source.variance, dest.variance); 705 } 706 if (source.sumLog == source.sumLogImpl) { 707 dest.sumLog = (SumOfLogs) dest.sumLogImpl; 708 } else { 709 SumOfLogs.copy(source.sumLog, dest.sumLog); 710 } 711 if (source.sumsq == source.sumsqImpl) { 712 dest.sumsq = (SumOfSquares) dest.sumsqImpl; 713 } else { 714 SumOfSquares.copy(source.sumsq, dest.sumsq); 715 } 716 } 717} 718