Adaptive Gaussian Filtering SVN

Machine learning with Gaussian kernels.

Status: Beta
Brought to you by: peteysoft
[r169]: / src / agf_util.h Maximize Restore History
157 lines (135 with data), 6.1 kB

//Copyright (C) 2011 Peter Mills.  All rights reserved.

#ifndef AGF_UTIL_H_INCLUDED
#define AGF_UTIL_H_INCLUDED 1

#include <stdio.h>

#include "gsl/gsl_rng.h"
#include "randomize.h"

#include "agf_defs.h"

namespace libagf {

  //random number generator:
  extern const gsl_rng_type *agf_gsl_rng_type;

  //to prevent the gsl library from aborting if it encounters an error:
  void agf_gsl_handler(const char *reason, const char * file, int line, int gsl_errno);

  //this rubbish should be redone...
  struct agf_command_opts {
    char *normfile;	//-a file containing normalization values
    char *ofile;	//-o output file
    char *multicommand;	//-O command to use for multi-class class.
    real_a missing;	//-E value for missing data
    real_a ftest;	//-f fraction of data to use for testing
    real_a hrel;	//-h relative difference for numerical derivatives
    real_a pmin;	//-p threshold prob for clustering algorithm
    real_a rthresh;	//-r location of class border
    real_a tol;		//-t tolerance of border samples
    real_a var[2];	//-v 1st variance bracket
  			//-V 2nd variance bracket/initial filter variance
    real_a W1;		//-w minimum total weight
    real_a W2;		//-W target/maximum total weight
    real_a pratio;	//-X ratio between sample classes
    cls_ta cl_thresh;	//-T class threshold: multiclass->2 class
    dim_ta svd;		//-S perform singular-value-decomposition (number to keep)
    nel_ta k;		//-k number of nearest-neighbours
    nel_ta n;		//-s number of border samples
    nel_ta nt;		//-q number of trials (AGF opt)
    nel_ta div;		//-d number of divisions ~= -f
    enum_a algtype;	//-c algorithm to use in n-fold cross-validation
    enum_a metrictype;	//-m type of metric
    enum_a Qtype;	//-Q algorithm selection 2
    flag_a stdinflag;	//-0 read from stdin
    flag_a stdoutflag;	//-1 write to stdout
    flag_a asciiflag;	//-A operate on ascii files from stdin & stdout
    flag_a Bflag;	//-B sort by ordinate
    flag_a Cflag;	//-C no class data
    flag_a errflag;	//-e return error estimates for interpolation
    flag_a fflag;	//-f
    flag_a selectflag;	//-F select features
    flag_a Hflag;	//-H no header/strip header
    flag_a jointflag;	//-j return joint probabilities (instead of cond.) to sdout
    flag_a Kflag;	//-K keep temporary files
    flag_a Lflag;	//-L floating point ordinates
    flag_a Mflag;	//-M SVM format
    flag_a normflag;	//-n flag to normalize data
    flag_a Nflag;	//-N take data from stdin
    flag_a Pflag;	//-P calculate cross-correlation matrix
    flag_a Rflag;	//-R randomly sample
    flag_a uflag;	//-u un-normalize borders and gradients
    flag_a Uflag;	//-U re-label classes to go from [0-nc).
    flag_a zflag;	//-z randomize
  };

  //normalizes a set of vectors from averages and standard deviations:
  template <class real>
  void norm_vec(real **mat, 		//list of vectors
		dim_ta D, 		//dimension
		nel_ta n, 		//number of vectors
		real *ave, 		//returned averages (size D)
		real *std);		//returned std. dev. (size D)

  //un-normalizes a set of vectors from averages and standard deviations:
  template <class real>
  void unnorm_vec(real **mat, 		//vectors (test or training)
		dim_ta D, 		//dimension of vectors
		nel_ta n, 		//number of vectors
		real *ave, 		//averages
		real *std);		//standard deviations

  //parse command line options:
  int agf_parse_command_opts (int &argc, 	//number of arguments
		char **&argv, 			//arguments from command line
		const char * optlist, 		//list of options
		agf_command_opts *opt_args);	//returned options

  //sorts a set of vectors by their classes:
  //
  //returns a longword array of length ncl+1 indexing into the rearranged mat
  //	giving the starting point of each class
  template <class real, class cls_t>
  nel_ta * sort_classes(real **mat, 	//coordinate data
		nel_ta n,		//number of samples
		cls_t *cl, 		//classes
		cls_t ncl);		//number of classes

  //calculates averages and standard deviations of coordinate data:
  template <class real>
  void calc_norm(real **mat, 		//coordinate samples
		dim_ta D, 		//coordinate dimension
		nel_ta n, 		//number of samples
		real *ave, 		//D returned averages
		real *std);		//D returned standard deviations

  //normalizes both test and training coordinates with averages and std. dev.:
  template <class real>
  void norm_vec_std(real **mat1,	//training vectors 
		dim_ta D, 		//number of dimensions
		nel_ta n1, 		//number of training vectors
		real **mat2,		//test vectors 
		nel_ta n2);		//number of test vectors

  //randomly permutes training data:
  template <class real, class cls_t>
  void randomize_vec(real **mat,	//coordinates
		dim_ta D, 		//number of dimensions
		nel_ta n, 		//number of samples
		cls_t *cls);		//classes

  //removes duplicate vectors and returns a set of weights with the
  //number of duplicates:
  template <class real>
  real ** remove_duplicates(real **mat, //set of vectors
		dim_ta D, 		//dimension
		nel_ta n, 		//number of vectors
		real *wt, 		//returned weights
		nel_ta &nnew);		//new number of samples

  //calculate uncertainty coefficient and print out contingency matrix:
  template <class cls_t>
  double uncertainty_coefficient(cls_t *truth, //actual classes
		cls_t *ret, 		//retrieved classes
		nel_ta n, 		//number of samples
		cls_t nclt, 		//number of classes in first set
		cls_t nclr, 		//number in second set
		FILE *fs = NULL);	//print to this file stream

  //if there are the same number of clases in both sets:
  template <class cls_t>
  double uncertainty_coefficient(cls_t *, cls_t *, nel_ta, cls_t, FILE *fs=NULL);

  //prints out a table of accuracies versus compute confidence ratings:
  template <class real, class cls_t>
  void check_confidence(cls_t *truth, 	//actual classes
		cls_t *cls, 		//retrieved classes
		real *con, 		//retrieved confidence ratings
		cls_t ncls, 		//number of classes
		nel_ta n, 		//number of samples
		FILE *fs = stdout);	//print to this file stream

}

#endif