163 lines (132 with data), 4.9 kB
#!/usr/bin/env perl
# Copyright J.M.P. Alves 2008-2014 (alvesjmp@yahoo.com)
# This software is licensed under the GNU General Public License v. 3
# Please see http://www.fsf.org/licensing/licenses/gpl.html for details
# first version 0.2, 2008-03-24, by J.
# Last update 0.7.3 2014-05-22, by J.
use strict;
use warnings;
use Getopt::Long;
Getopt::Long::Configure ("bundling", "no_ignore_case");
my($A, $s, $t, $l, $m, $h, $v, $d, $x, $n, $E, $c, $Comm);
GetOptions ('a' => \$A, 's' => \$s, 't' => \$t, 'l' => \$l, 'x' => \$x, 'e' => \$E, 'C' => \$Comm,
'n' => \$n, 'm' => \$m, 'h' => \$h, 'v' => \$v, 'd=i' => \$d, 'c=i' => \$c);
my @vals;
my $version = "0.7.2";
if($h) { print "Version: $version\n\n"; Help(); }
if($v) { print "$version\n"; exit; }
my $type = $E ? "E" : "f";
$c = $c ? $c - 1 : 0;
unless(defined $d) { $d = "2$type"; } else { $d .= "$type"; }
while(<>) {
next if $_ =~ /^\s*$/;
unless($Comm) {
next if $_ =~ /^\s*#/;
}
chomp;
$_ =~ s/^\s*//;
my @tmp = split(/\s+|\t+/, $_);
unless($tmp[$c] =~ /^\s*[+\-\d\.]+\s*$/ || $tmp[$c] =~ /^\s*[+\-\d\.e]+\s*$/i) { next; }
my $flag = 0;
local $SIG{__WARN__} = sub {
print "WARNING: Possible non-numeric value found, ignored: $_\n";
$flag = 1;
};
my $test = $tmp[$c] + 1;
unless($flag) { push @vals, $tmp[$c]; }
}
unless(scalar(@vals)) { print STDERR "ERROR: No numerical values found. I quit.\n"; exit; }
if(scalar(@vals) == 1) { print STDERR "ERROR: Only one numerical value (@vals) found. Nothing to do, so I quit.\n"; exit; }
my($sum, $av, $sd, $median, $min, $max);
($sum, $av) = avrg(@vals);
$sd = stddev($av, @vals);
($median,$min,$max) = median(@vals);
if($l || !($A || $s || $t || $m || $x || $n)) {
printf "%.$d +/- %.$d, total %.$d, median %.$d, minimum %.$d, maximum %.$d, n = %d\n", $av, $sd, $sum, $median, $min, $max, scalar(@vals);
exit;
}
if($A && !$s) { printf "%.$d\n", $av; exit; }
if($s) { printf "%.$d\t%.$d\n", $av, $sd; exit; }
if($t) { printf "%.$d\n", $sum; exit; }
if($m) { printf "%.$d\n", $median; exit; }
if($n) { printf "%.$d\n", $min; exit; }
if($x) { printf "%.$d\n", $max; exit; }
exit;
##############################
sub avrg {
my $size = scalar(@_);
my($sum,$med);
for my $Valor (@_) { $sum += $Valor; }
if ($size) { $med = $sum/$size; }
else { $med = 0; }
return $sum, $med;
}
##############################
sub stddev {
my($media) = shift(@_);
my(@Lista) = @_;
my $nonzero = 0;
my($sum,$sd);
for ($a=0; $a < scalar(@Lista); $a++) {
$nonzero++;
$sum += (($Lista[$a] - $media) ** 2);
}
if ($nonzero) { $sd = sqrt($sum/($nonzero-1)); }
else { $sd = 0; }
return $sd;
}
##############################
sub median {
my @list = sort {$a<=>$b} @_;
if(scalar(@list) % 2 != 0) {
my $ind = int(scalar(@list)/2);
return $list[$ind], $list[0], $list[$#list];
}
else {
my $ind = scalar(@list)/2 -1;
my(undef, $median) = avrg($list[$ind],$list[$ind+1]);
return $median, $list[0], $list[$#list];
}
}
##############################
sub Help {
my (@stuff) = <DATA>;
print @stuff;
exit;
}
##############################
__DATA__
average
-------
Usage:
average [options]
Synopsis:
Takes a series of numbers and calculates simple statistics: average (arithmetic
mean), standard deviation, median, total sum, and minimum and maximum values
present. For version 0.6 and later, also works with scientific notation numbers.
Numbers can be in a file or presented from standard input (press control-d
to end number input after last number). Output is to standard output.
Input can also have more than one column, in which case the column to use
in calculations can be determined using the -c option. Otherwise, the first
column is used (leading spaces are ignored; repeated whitespace is considered
as one).
Options:
-d Number of decimal places to show (default: 2);
-c Column to use for calculations (default: 1);
-e Output in scientific notation (e.g. 1E12);
-a Shows only the arithmetic mean;
-s Shows arithmetic mean and the standard deviation;
-t Shows only the total sum of the numbers;
-m Shows only the median;
-n Shows only the minimum value;
-x Shows only the maximum value;
-l Long format, presenting all of the above (default);
-C Use lines starting with # (default is to skip such lines);
-v Prints program version and exits;
-h Prints this help message and exits.
* Options listed first have precedence over the ones below; e.g. if the user
uses both -t and -n, only -t will have an effect (total sum only will be shown).
* If average is used without any options, all statistics are shown (same as -l).
Copyright J.M.P. Alves 2008-2014 (alvesjmp@yahoo.com)
This software is licensed under the GNU General Public License v. 3.
Please see http://www.fsf.org/licensing/licenses/gpl.html for details.