1#!/usr/local/bin/perl
2#  ********************************************************************
3#  * Copyright (C) 2016 and later: Unicode, Inc. and others.
4#  * License & terms of use: http://www.unicode.org/copyright.html#License
5#  ********************************************************************
6#  ********************************************************************
7#  * COPYRIGHT:
8#  * Copyright (c) 2002, International Business Machines Corporation and
9#  * others. All Rights Reserved.
10#  ********************************************************************
11
12package Dataset;
13use Statistics::Descriptive;
14use Statistics::Distributions;
15use strict;
16
17# Create a new Dataset with the given data.
18sub new {
19    my ($class) = shift;
20    my $self = bless {
21        _data => \@_,
22        _scale => 1.0,
23        _mean => 0.0,
24        _error => 0.0,
25    }, $class;
26
27    my $n = @_;
28
29    if ($n >= 1) {
30        my $stats = Statistics::Descriptive::Full->new();
31        $stats->add_data(@{$self->{_data}});
32        $self->{_mean} = $stats->mean();
33
34        if ($n >= 2) {
35            # Use a t distribution rather than Gaussian because (a) we
36            # assume an underlying normal dist, (b) we do not know the
37            # standard deviation -- we estimate it from the data, and (c)
38            # we MAY have a small sample size (also works for large n).
39            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
40            $self->{_error} = $t * $stats->standard_deviation();
41        }
42    }
43
44    $self;
45}
46
47# Set a scaling factor for all data; 1.0 means no scaling.
48# Scale must be > 0.
49sub setScale {
50    my ($self, $scale) = @_;
51    $self->{_scale} = $scale;
52}
53
54# Multiply the scaling factor by a value.
55sub scaleBy {
56    my ($self, $a) = @_;
57    $self->{_scale} *= $a;
58}
59
60# Return the mean.
61sub getMean {
62    my $self = shift;
63    return $self->{_mean} * $self->{_scale};
64}
65
66# Return a 99% error based on the t distribution.  The dataset
67# is desribed as getMean() +/- getError().
68sub getError {
69    my $self = shift;
70    return $self->{_error} * $self->{_scale};
71}
72
73# Divide two Datasets and return a new one, maintaining the
74# mean+/-error.  The new Dataset has no data points.
75sub divide {
76    my $self = shift;
77    my $rhs = shift;
78
79    my $minratio = ($self->{_mean} - $self->{_error}) /
80                   ($rhs->{_mean} + $rhs->{_error});
81    my $maxratio = ($self->{_mean} + $self->{_error}) /
82                   ($rhs->{_mean} - $rhs->{_error});
83
84    my $result = Dataset->new();
85    $result->{_mean} = ($minratio + $maxratio) / 2;
86    $result->{_error} = $result->{_mean} - $minratio;
87    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
88    $result;
89}
90
91# subtracts two Datasets and return a new one, maintaining the
92# mean+/-error.  The new Dataset has no data points.
93sub subtract {
94    my $self = shift;
95    my $rhs = shift;
96
97    my $result = Dataset->new();
98    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
99    $result->{_error} = $self->{_error} + $rhs->{_error};
100    $result->{_scale} = $self->{_scale};
101    $result;
102}
103
104# adds two Datasets and return a new one, maintaining the
105# mean+/-error.  The new Dataset has no data points.
106sub add {
107    my $self = shift;
108    my $rhs = shift;
109
110    my $result = Dataset->new();
111    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
112    $result->{_error} = $self->{_error} + $rhs->{_error};
113    $result->{_scale} = $self->{_scale};
114    $result;
115}
116
117# Divides a dataset by a scalar.
118# The new Dataset has no data points.
119sub divideByScalar {
120    my $self = shift;
121    my $s = shift;
122
123    my $result = Dataset->new();
124    $result->{_mean} = $self->{_mean}/$s;
125    $result->{_error} = $self->{_error}/$s;
126    $result->{_scale} = $self->{_scale};
127    $result;
128}
129
130# Divides a dataset by a scalar.
131# The new Dataset has no data points.
132sub multiplyByScalar {
133    my $self = shift;
134    my $s = shift;
135
136    my $result = Dataset->new();
137    $result->{_mean} = $self->{_mean}*$s;
138    $result->{_error} = $self->{_error}*$s;
139    $result->{_scale} = $self->{_scale};
140    $result;
141}
142
1431;
144