1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#!/usr/local/bin/perl 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# ******************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# * COPYRIGHT: 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# * Copyright (c) 2002, International Business Machines Corporation and 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# * others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# ******************************************************************** 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupackage Dataset; 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse Statistics::Descriptive; 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse Statistics::Distributions; 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse strict; 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Create a new Dataset with the given data. 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub new { 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my ($class) = shift; 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = bless { 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _data => \@_, 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _scale => 1.0, 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _mean => 0.0, 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _error => 0.0, 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }, $class; 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $n = @_; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ($n >= 1) { 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $stats = Statistics::Descriptive::Full->new(); 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $stats->add_data(@{$self->{_data}}); 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $self->{_mean} = $stats->mean(); 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ($n >= 2) { 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru # Use a t distribution rather than Gaussian because (a) we 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru # assume an underlying normal dist, (b) we do not know the 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru # standard deviation -- we estimate it from the data, and (c) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru # we MAY have a small sample size (also works for large n). 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $t = Statistics::Distributions::tdistr($n-1, 0.005); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $self->{_error} = $t * $stats->standard_deviation(); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $self; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Set a scaling factor for all data; 1.0 means no scaling. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Scale must be > 0. 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub setScale { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my ($self, $scale) = @_; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $self->{_scale} = $scale; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Multiply the scaling factor by a value. 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub scaleBy { 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my ($self, $a) = @_; 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $self->{_scale} *= $a; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Return the mean. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub getMean { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return $self->{_mean} * $self->{_scale}; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Return a 99% error based on the t distribution. The dataset 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# is desribed as getMean() +/- getError(). 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub getError { 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return $self->{_error} * $self->{_scale}; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divide two Datasets and return a new one, maintaining the 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error. The new Dataset has no data points. 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub divide { 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $rhs = shift; 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $minratio = ($self->{_mean} - $self->{_error}) / 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ($rhs->{_mean} + $rhs->{_error}); 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $maxratio = ($self->{_mean} + $self->{_error}) / 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ($rhs->{_mean} - $rhs->{_error}); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $result = Dataset->new(); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_mean} = ($minratio + $maxratio) / 2; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_error} = $result->{_mean} - $minratio; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_scale} = $self->{_scale} / $rhs->{_scale}; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# subtracts two Datasets and return a new one, maintaining the 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error. The new Dataset has no data points. 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub subtract { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $rhs = shift; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $result = Dataset->new(); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_mean} = $self->{_mean} - $rhs->{_mean}; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_error} = $self->{_error} + $rhs->{_error}; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_scale} = $self->{_scale}; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# adds two Datasets and return a new one, maintaining the 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error. The new Dataset has no data points. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub add { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $rhs = shift; 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $result = Dataset->new(); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_mean} = $self->{_mean} + $rhs->{_mean}; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_error} = $self->{_error} + $rhs->{_error}; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_scale} = $self->{_scale}; 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divides a dataset by a scalar. 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# The new Dataset has no data points. 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub divideByScalar { 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $s = shift; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $result = Dataset->new(); 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_mean} = $self->{_mean}/$s; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_error} = $self->{_error}/$s; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_scale} = $self->{_scale}; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divides a dataset by a scalar. 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# The new Dataset has no data points. 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub multiplyByScalar { 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $self = shift; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $s = shift; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru my $result = Dataset->new(); 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_mean} = $self->{_mean}*$s; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_error} = $self->{_error}*$s; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result->{_scale} = $self->{_scale}; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru $result; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru1; 140