[go: up one dir, main page]

Menu

[r169]: / src / agf_util.h  Maximize  Restore  History

Download this file

157 lines (135 with data), 6.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
//Copyright (C) 2011 Peter Mills. All rights reserved.
#ifndef AGF_UTIL_H_INCLUDED
#define AGF_UTIL_H_INCLUDED 1
#include <stdio.h>
#include "gsl/gsl_rng.h"
#include "randomize.h"
#include "agf_defs.h"
namespace libagf {
//random number generator:
extern const gsl_rng_type *agf_gsl_rng_type;
//to prevent the gsl library from aborting if it encounters an error:
void agf_gsl_handler(const char *reason, const char * file, int line, int gsl_errno);
//this rubbish should be redone...
struct agf_command_opts {
char *normfile; //-a file containing normalization values
char *ofile; //-o output file
char *multicommand; //-O command to use for multi-class class.
real_a missing; //-E value for missing data
real_a ftest; //-f fraction of data to use for testing
real_a hrel; //-h relative difference for numerical derivatives
real_a pmin; //-p threshold prob for clustering algorithm
real_a rthresh; //-r location of class border
real_a tol; //-t tolerance of border samples
real_a var[2]; //-v 1st variance bracket
//-V 2nd variance bracket/initial filter variance
real_a W1; //-w minimum total weight
real_a W2; //-W target/maximum total weight
real_a pratio; //-X ratio between sample classes
cls_ta cl_thresh; //-T class threshold: multiclass->2 class
dim_ta svd; //-S perform singular-value-decomposition (number to keep)
nel_ta k; //-k number of nearest-neighbours
nel_ta n; //-s number of border samples
nel_ta nt; //-q number of trials (AGF opt)
nel_ta div; //-d number of divisions ~= -f
enum_a algtype; //-c algorithm to use in n-fold cross-validation
enum_a metrictype; //-m type of metric
enum_a Qtype; //-Q algorithm selection 2
flag_a stdinflag; //-0 read from stdin
flag_a stdoutflag; //-1 write to stdout
flag_a asciiflag; //-A operate on ascii files from stdin & stdout
flag_a Bflag; //-B sort by ordinate
flag_a Cflag; //-C no class data
flag_a errflag; //-e return error estimates for interpolation
flag_a fflag; //-f
flag_a selectflag; //-F select features
flag_a Hflag; //-H no header/strip header
flag_a jointflag; //-j return joint probabilities (instead of cond.) to sdout
flag_a Kflag; //-K keep temporary files
flag_a Lflag; //-L floating point ordinates
flag_a Mflag; //-M SVM format
flag_a normflag; //-n flag to normalize data
flag_a Nflag; //-N take data from stdin
flag_a Pflag; //-P calculate cross-correlation matrix
flag_a Rflag; //-R randomly sample
flag_a uflag; //-u un-normalize borders and gradients
flag_a Uflag; //-U re-label classes to go from [0-nc).
flag_a zflag; //-z randomize
};
//normalizes a set of vectors from averages and standard deviations:
template <class real>
void norm_vec(real **mat, //list of vectors
dim_ta D, //dimension
nel_ta n, //number of vectors
real *ave, //returned averages (size D)
real *std); //returned std. dev. (size D)
//un-normalizes a set of vectors from averages and standard deviations:
template <class real>
void unnorm_vec(real **mat, //vectors (test or training)
dim_ta D, //dimension of vectors
nel_ta n, //number of vectors
real *ave, //averages
real *std); //standard deviations
//parse command line options:
int agf_parse_command_opts (int &argc, //number of arguments
char **&argv, //arguments from command line
const char * optlist, //list of options
agf_command_opts *opt_args); //returned options
//sorts a set of vectors by their classes:
//
//returns a longword array of length ncl+1 indexing into the rearranged mat
// giving the starting point of each class
template <class real, class cls_t>
nel_ta * sort_classes(real **mat, //coordinate data
nel_ta n, //number of samples
cls_t *cl, //classes
cls_t ncl); //number of classes
//calculates averages and standard deviations of coordinate data:
template <class real>
void calc_norm(real **mat, //coordinate samples
dim_ta D, //coordinate dimension
nel_ta n, //number of samples
real *ave, //D returned averages
real *std); //D returned standard deviations
//normalizes both test and training coordinates with averages and std. dev.:
template <class real>
void norm_vec_std(real **mat1, //training vectors
dim_ta D, //number of dimensions
nel_ta n1, //number of training vectors
real **mat2, //test vectors
nel_ta n2); //number of test vectors
//randomly permutes training data:
template <class real, class cls_t>
void randomize_vec(real **mat, //coordinates
dim_ta D, //number of dimensions
nel_ta n, //number of samples
cls_t *cls); //classes
//removes duplicate vectors and returns a set of weights with the
//number of duplicates:
template <class real>
real ** remove_duplicates(real **mat, //set of vectors
dim_ta D, //dimension
nel_ta n, //number of vectors
real *wt, //returned weights
nel_ta &nnew); //new number of samples
//calculate uncertainty coefficient and print out contingency matrix:
template <class cls_t>
double uncertainty_coefficient(cls_t *truth, //actual classes
cls_t *ret, //retrieved classes
nel_ta n, //number of samples
cls_t nclt, //number of classes in first set
cls_t nclr, //number in second set
FILE *fs = NULL); //print to this file stream
//if there are the same number of clases in both sets:
template <class cls_t>
double uncertainty_coefficient(cls_t *, cls_t *, nel_ta, cls_t, FILE *fs=NULL);
//prints out a table of accuracies versus compute confidence ratings:
template <class real, class cls_t>
void check_confidence(cls_t *truth, //actual classes
cls_t *cls, //retrieved classes
real *con, //retrieved confidence ratings
cls_t ncls, //number of classes
nel_ta n, //number of samples
FILE *fs = stdout); //print to this file stream
}
#endif