2 # *******************************************************************************
\r
3 # * Copyright (C) 2002-2004, International Business Machines Corporation and *
\r
4 # * others. All Rights Reserved. *
\r
5 # *******************************************************************************
\r
8 use Statistics::Descriptive;
\r
9 use Statistics::Distributions;
\r
12 # Create a new Dataset with the given data.
\r
14 my ($class) = shift;
\r
25 my $stats = Statistics::Descriptive::Full->new();
\r
26 $stats->add_data(@{$self->{_data}});
\r
27 $self->{_mean} = $stats->mean();
\r
30 # Use a t distribution rather than Gaussian because (a) we
\r
31 # assume an underlying normal dist, (b) we do not know the
\r
32 # standard deviation -- we estimate it from the data, and (c)
\r
33 # we MAY have a small sample size (also works for large n).
\r
34 my $t = Statistics::Distributions::tdistr($n-1, 0.005);
\r
35 $self->{_error} = $t * $stats->standard_deviation();
\r
42 # Set a scaling factor for all data; 1.0 means no scaling.
\r
43 # Scale must be > 0.
\r
45 my ($self, $scale) = @_;
\r
46 $self->{_scale} = $scale;
\r
49 # Multiply the scaling factor by a value.
\r
51 my ($self, $a) = @_;
\r
52 $self->{_scale} *= $a;
\r
58 return $self->{_mean} * $self->{_scale};
\r
61 # Return a 99% error based on the t distribution. The dataset
\r
62 # is desribed as getMean() +/- getError().
\r
65 return $self->{_error} * $self->{_scale};
\r
68 # Divide two Datasets and return a new one, maintaining the
\r
69 # mean+/-error. The new Dataset has no data points.
\r
74 my $minratio = ($self->{_mean} - $self->{_error}) /
\r
75 ($rhs->{_mean} + $rhs->{_error});
\r
76 my $maxratio = ($self->{_mean} + $self->{_error}) /
\r
77 ($rhs->{_mean} - $rhs->{_error});
\r
79 my $result = Dataset->new();
\r
80 $result->{_mean} = ($minratio + $maxratio) / 2;
\r
81 $result->{_error} = $result->{_mean} - $minratio;
\r
82 $result->{_scale} = $self->{_scale} / $rhs->{_scale};
\r