jars/icu4j-52_1/perf-tests/Dataset.pm

   1 #/**
   2 # *******************************************************************************
   3 # * Copyright (C) 2002-2004, International Business Machines Corporation and    *
   4 # * others. All Rights Reserved.                                                *
   5 # *******************************************************************************
   6 # */
   7 package Dataset;
   8 use Statistics::Descriptive;
   9 use Statistics::Distributions;
  10 use strict;
  11
  12 # Create a new Dataset with the given data.
  13 sub new {
  14     my ($class) = shift;
  15     my $self = bless {
  16         _data => \@_,
  17         _scale => 1.0,
  18         _mean => 0.0,
  19         _error => 0.0,
  20     }, $class;
  21
  22     my $n = @_;
  23
  24     if ($n >= 1) {
  25         my $stats = Statistics::Descriptive::Full->new();
  26         $stats->add_data(@{$self->{_data}});
  27         $self->{_mean} = $stats->mean();
  28
  29         if ($n >= 2) {
  30             # Use a t distribution rather than Gaussian because (a) we
  31             # assume an underlying normal dist, (b) we do not know the
  32             # standard deviation -- we estimate it from the data, and (c)
  33             # we MAY have a small sample size (also works for large n).
  34             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
  35             $self->{_error} = $t * $stats->standard_deviation();
  36         }
  37     }
  38
  39     $self;
  40 }
  41
  42 # Set a scaling factor for all data; 1.0 means no scaling.
  43 # Scale must be > 0.
  44 sub setScale {
  45     my ($self, $scale) = @_;
  46     $self->{_scale} = $scale;
  47 }
  48
  49 # Multiply the scaling factor by a value.
  50 sub scaleBy {
  51     my ($self, $a) = @_;
  52     $self->{_scale} *= $a;
  53 }
  54
  55 # Return the mean.
  56 sub getMean {
  57     my $self = shift;
  58     return $self->{_mean} * $self->{_scale};
  59 }
  60
  61 # Return a 99% error based on the t distribution.  The dataset
  62 # is desribed as getMean() +/- getError().
  63 sub getError {
  64     my $self = shift;
  65     return $self->{_error} * $self->{_scale};
  66 }
  67
  68 # Divide two Datasets and return a new one, maintaining the
  69 # mean+/-error.  The new Dataset has no data points.
  70 sub divide {
  71     my $self = shift;
  72     my $rhs = shift;
  73
  74     my $minratio = ($self->{_mean} - $self->{_error}) /
  75                    ($rhs->{_mean} + $rhs->{_error});
  76     my $maxratio = ($self->{_mean} + $self->{_error}) /
  77                    ($rhs->{_mean} - $rhs->{_error});
  78
  79     my $result = Dataset->new();
  80     $result->{_mean} = ($minratio + $maxratio) / 2;
  81     $result->{_error} = $result->{_mean} - $minratio;
  82     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
  83     $result;
  84 }
  85
  86 1;