[go: up one dir, main page]

Menu

[r9]: / average  Maximize  Restore  History

Download this file

163 lines (132 with data), 4.9 kB

#!/usr/bin/env perl

# Copyright J.M.P. Alves 2008-2014 (alvesjmp@yahoo.com)
# This software is licensed under the GNU General Public License v. 3
# Please see http://www.fsf.org/licensing/licenses/gpl.html for details

# first version 0.2, 2008-03-24, by J.
# Last update 0.7.3 2014-05-22, by J.

use strict;
use warnings;
use Getopt::Long;
Getopt::Long::Configure ("bundling", "no_ignore_case");

my($A, $s, $t, $l, $m, $h, $v, $d, $x, $n, $E, $c, $Comm);

GetOptions ('a' => \$A, 's' => \$s, 't' => \$t, 'l' => \$l, 'x' => \$x, 'e' => \$E, 'C' => \$Comm,
            'n' => \$n, 'm' => \$m, 'h' => \$h, 'v' => \$v, 'd=i' => \$d, 'c=i' => \$c);

my @vals;
my $version = "0.7.2";
if($h) { print "Version: $version\n\n"; Help(); }
if($v) { print "$version\n"; exit; }
my $type = $E ? "E" : "f";
$c = $c ? $c - 1 : 0;
unless(defined $d) { $d = "2$type"; } else { $d .= "$type"; }

while(<>) {
 next if $_ =~ /^\s*$/;
 unless($Comm) {
     next if $_ =~ /^\s*#/;
 }
 chomp;
 $_ =~ s/^\s*//;
 my @tmp = split(/\s+|\t+/, $_);
 unless($tmp[$c] =~ /^\s*[+\-\d\.]+\s*$/ || $tmp[$c] =~ /^\s*[+\-\d\.e]+\s*$/i) { next; }
 my $flag = 0;
 local $SIG{__WARN__} = sub {
   print "WARNING: Possible non-numeric value found, ignored: $_\n";
   $flag = 1;
 };
 my $test = $tmp[$c] + 1;
 unless($flag) { push @vals, $tmp[$c]; }
}

unless(scalar(@vals)) { print STDERR "ERROR: No numerical values found. I quit.\n"; exit; }
if(scalar(@vals) == 1) { print STDERR "ERROR: Only one numerical value (@vals) found. Nothing to do, so I quit.\n"; exit; }

my($sum, $av, $sd, $median, $min, $max);

($sum, $av) = avrg(@vals);
$sd = stddev($av, @vals);
($median,$min,$max) = median(@vals);

if($l || !($A || $s || $t || $m || $x || $n)) { 
  printf "%.$d +/- %.$d, total %.$d, median %.$d, minimum %.$d, maximum %.$d, n = %d\n", $av, $sd, $sum, $median, $min, $max, scalar(@vals); 
  exit;
}
if($A && !$s) { printf "%.$d\n", $av; exit; }
if($s) { printf "%.$d\t%.$d\n", $av, $sd; exit; }
if($t) { printf "%.$d\n", $sum; exit; }
if($m) { printf "%.$d\n", $median; exit; }
if($n) { printf "%.$d\n", $min; exit; }
if($x) { printf "%.$d\n", $max; exit; }

exit;

##############################

sub avrg {
   my $size = scalar(@_);
   my($sum,$med);
   for my $Valor (@_) { $sum += $Valor; }
   if ($size) { $med = $sum/$size; }
   else { $med = 0; }
   return $sum, $med;
}

##############################

sub stddev {
    my($media) = shift(@_);
    my(@Lista) = @_;
    my $nonzero = 0;
    my($sum,$sd);
    for ($a=0; $a < scalar(@Lista); $a++) {
        $nonzero++;
        $sum += (($Lista[$a] - $media) ** 2);
    }
    if ($nonzero) { $sd = sqrt($sum/($nonzero-1)); }
    else { $sd = 0; }
    return $sd;
}

##############################

sub median {
   my @list = sort {$a<=>$b} @_;
   if(scalar(@list) % 2 != 0) {
     my $ind = int(scalar(@list)/2);
     return $list[$ind], $list[0], $list[$#list];
   }
   else {
     my $ind = scalar(@list)/2 -1;
     my(undef, $median) = avrg($list[$ind],$list[$ind+1]);
     return $median, $list[0], $list[$#list];
   }
}

##############################

sub Help {
  my (@stuff) = <DATA>;
  print @stuff;
  exit;
}

##############################

__DATA__
average
-------

Usage: 
	average [options]

Synopsis:
	Takes a series of numbers and calculates simple statistics: average (arithmetic 
	mean), standard deviation, median, total sum, and minimum and maximum values 
	present. For version 0.6 and later, also works with scientific notation numbers.
	
	Numbers can be in a file or presented from standard input (press control-d
	to end number input after last number). Output is to standard output.
	
	Input can also have more than one column, in which case the column to use
	in calculations can be determined using the -c option. Otherwise, the first
	column is used (leading spaces are ignored; repeated whitespace is considered
	as one).

Options:
	-d	Number of decimal places to show (default: 2);
	-c	Column to use for calculations (default: 1);
	-e	Output in scientific notation (e.g. 1E12);
	-a	Shows only the arithmetic mean;
	-s	Shows arithmetic mean and the standard deviation;
	-t	Shows only the total sum of the numbers;
	-m	Shows only the median;
	-n	Shows only the minimum value;
	-x	Shows only the maximum value;
	-l	Long format, presenting all of the above (default);
        -C      Use lines starting with # (default is to skip such lines);
	-v	Prints program version and exits;
	-h	Prints this help message and exits.

        * Options listed first have precedence over the ones below; e.g. if the user
        uses both -t and -n, only -t will have an effect (total sum only will be shown).
        * If average is used without any options, all statistics are shown (same as -l).

Copyright J.M.P. Alves 2008-2014 (alvesjmp@yahoo.com)
This software is licensed under the GNU General Public License v. 3.
Please see http://www.fsf.org/licensing/licenses/gpl.html for details.