1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#!/usr/local/bin/perl
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  ********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  * COPYRIGHT:
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  * Copyright (c) 2002, International Business Machines Corporation and
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  * others. All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#  ********************************************************************
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupackage Dataset;
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse Statistics::Descriptive;
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse Statistics::Distributions;
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuse strict;
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Create a new Dataset with the given data.
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub new {
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my ($class) = shift;
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = bless {
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _data => \@_,
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _scale => 1.0,
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _mean => 0.0,
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        _error => 0.0,
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }, $class;
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $n = @_;
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if ($n >= 1) {
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        my $stats = Statistics::Descriptive::Full->new();
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $stats->add_data(@{$self->{_data}});
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        $self->{_mean} = $stats->mean();
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if ($n >= 2) {
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            # Use a t distribution rather than Gaussian because (a) we
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            # assume an underlying normal dist, (b) we do not know the
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            # standard deviation -- we estimate it from the data, and (c)
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            # we MAY have a small sample size (also works for large n).
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            $self->{_error} = $t * $stats->standard_deviation();
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $self;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Set a scaling factor for all data; 1.0 means no scaling.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Scale must be > 0.
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub setScale {
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my ($self, $scale) = @_;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $self->{_scale} = $scale;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Multiply the scaling factor by a value.
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub scaleBy {
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my ($self, $a) = @_;
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $self->{_scale} *= $a;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Return the mean.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub getMean {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return $self->{_mean} * $self->{_scale};
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Return a 99% error based on the t distribution.  The dataset
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# is desribed as getMean() +/- getError().
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub getError {
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return $self->{_error} * $self->{_scale};
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divide two Datasets and return a new one, maintaining the
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error.  The new Dataset has no data points.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub divide {
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $rhs = shift;
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $minratio = ($self->{_mean} - $self->{_error}) /
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   ($rhs->{_mean} + $rhs->{_error});
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $maxratio = ($self->{_mean} + $self->{_error}) /
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   ($rhs->{_mean} - $rhs->{_error});
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $result = Dataset->new();
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_mean} = ($minratio + $maxratio) / 2;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_error} = $result->{_mean} - $minratio;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# subtracts two Datasets and return a new one, maintaining the
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error.  The new Dataset has no data points.
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub subtract {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $rhs = shift;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $result = Dataset->new();
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_error} = $self->{_error} + $rhs->{_error};
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_scale} = $self->{_scale};
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# adds two Datasets and return a new one, maintaining the
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# mean+/-error.  The new Dataset has no data points.
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub add {
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $rhs = shift;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $result = Dataset->new();
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_error} = $self->{_error} + $rhs->{_error};
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_scale} = $self->{_scale};
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result;
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divides a dataset by a scalar.
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# The new Dataset has no data points.
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub divideByScalar {
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $s = shift;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $result = Dataset->new();
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_mean} = $self->{_mean}/$s;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_error} = $self->{_error}/$s;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_scale} = $self->{_scale};
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# Divides a dataset by a scalar.
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru# The new Dataset has no data points.
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querusub multiplyByScalar {
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $self = shift;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $s = shift;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    my $result = Dataset->new();
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_mean} = $self->{_mean}*$s;
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_error} = $self->{_error}*$s;
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result->{_scale} = $self->{_scale};
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    $result;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru1;
140