1#!/usr/local/bin/perl
2#  ********************************************************************
3#  * COPYRIGHT:
4#  * Copyright (c) 2002, International Business Machines Corporation and
5#  * others. All Rights Reserved.
6#  ********************************************************************
7
8package Dataset;
9use Statistics::Descriptive;
10use Statistics::Distributions;
11use strict;
12
13# Create a new Dataset with the given data.
14sub new {
15    my ($class) = shift;
16    my $self = bless {
17        _data => \@_,
18        _scale => 1.0,
19        _mean => 0.0,
20        _error => 0.0,
21    }, $class;
22
23    my $n = @_;
24
25    if ($n >= 1) {
26        my $stats = Statistics::Descriptive::Full->new();
27        $stats->add_data(@{$self->{_data}});
28        $self->{_mean} = $stats->mean();
29
30        if ($n >= 2) {
31            # Use a t distribution rather than Gaussian because (a) we
32            # assume an underlying normal dist, (b) we do not know the
33            # standard deviation -- we estimate it from the data, and (c)
34            # we MAY have a small sample size (also works for large n).
35            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
36            $self->{_error} = $t * $stats->standard_deviation();
37        }
38    }
39
40    $self;
41}
42
43# Set a scaling factor for all data; 1.0 means no scaling.
44# Scale must be > 0.
45sub setScale {
46    my ($self, $scale) = @_;
47    $self->{_scale} = $scale;
48}
49
50# Multiply the scaling factor by a value.
51sub scaleBy {
52    my ($self, $a) = @_;
53    $self->{_scale} *= $a;
54}
55
56# Return the mean.
57sub getMean {
58    my $self = shift;
59    return $self->{_mean} * $self->{_scale};
60}
61
62# Return a 99% error based on the t distribution.  The dataset
63# is desribed as getMean() +/- getError().
64sub getError {
65    my $self = shift;
66    return $self->{_error} * $self->{_scale};
67}
68
69# Divide two Datasets and return a new one, maintaining the
70# mean+/-error.  The new Dataset has no data points.
71sub divide {
72    my $self = shift;
73    my $rhs = shift;
74
75    my $minratio = ($self->{_mean} - $self->{_error}) /
76                   ($rhs->{_mean} + $rhs->{_error});
77    my $maxratio = ($self->{_mean} + $self->{_error}) /
78                   ($rhs->{_mean} - $rhs->{_error});
79
80    my $result = Dataset->new();
81    $result->{_mean} = ($minratio + $maxratio) / 2;
82    $result->{_error} = $result->{_mean} - $minratio;
83    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
84    $result;
85}
86
87# subtracts two Datasets and return a new one, maintaining the
88# mean+/-error.  The new Dataset has no data points.
89sub subtract {
90    my $self = shift;
91    my $rhs = shift;
92
93    my $result = Dataset->new();
94    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
95    $result->{_error} = $self->{_error} + $rhs->{_error};
96    $result->{_scale} = $self->{_scale};
97    $result;
98}
99
100# adds two Datasets and return a new one, maintaining the
101# mean+/-error.  The new Dataset has no data points.
102sub add {
103    my $self = shift;
104    my $rhs = shift;
105
106    my $result = Dataset->new();
107    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
108    $result->{_error} = $self->{_error} + $rhs->{_error};
109    $result->{_scale} = $self->{_scale};
110    $result;
111}
112
113# Divides a dataset by a scalar.
114# The new Dataset has no data points.
115sub divideByScalar {
116    my $self = shift;
117    my $s = shift;
118
119    my $result = Dataset->new();
120    $result->{_mean} = $self->{_mean}/$s;
121    $result->{_error} = $self->{_error}/$s;
122    $result->{_scale} = $self->{_scale};
123    $result;
124}
125
126# Divides a dataset by a scalar.
127# The new Dataset has no data points.
128sub multiplyByScalar {
129    my $self = shift;
130    my $s = shift;
131
132    my $result = Dataset->new();
133    $result->{_mean} = $self->{_mean}*$s;
134    $result->{_error} = $self->{_error}*$s;
135    $result->{_scale} = $self->{_scale};
136    $result;
137}
138
1391;
140