Adaptive Gaussian Filtering SVN

Machine learning with Gaussian kernels.

Status: Beta
Brought to you by: peteysoft
[r109]: / src / agf_util.cc Maximize Restore History
830 lines (731 with data), 26.0 kB

//Copyright (C) 2007 Peter Mills.  All rights reserved.

#include <math.h>
#include <stdlib.h>
#include <getopt.h>
#include <string.h>
#include <sys/timeb.h>

#include "kextreme.h"
#include "quicksort.h"
#include "vector_s.h"
//#include "coeffs.h"
//#include "nr.h"
#include "peteys_tmpl_lib.h"
#include "agf_lib.h"

#include "gsl/gsl_rng.h"

#include "agf_util.h"

#include <stdio.h>

using namespace std;
using namespace libpetey;

namespace libagf {

const gsl_rng_type *agf_gsl_rng_type=gsl_rng_mt19937;

//to prevent the gsl library from aborting if it encounters an error:
void agf_gsl_handler(const char *reason, const char * file, int line, int gsl_errno) {
  fprintf(stderr, "Error in %s, line %d: %s\n", file, line, reason);
}

//this whole thing needs to be redone... (does not scale well)
int agf_parse_command_opts(int &argc, char **&argv, const char *optlist, agf_command_opts *opt_args) {
  char c;
  int ncon;
  int errcode;

  int32_t maxitr;
  float tol;

  //the defaults for these parameters are set in the main routines:
  float var_0;
  float var2;
  float Wc;
  float wmax;
  int32_t k;

  //set defaults
  errcode=0;			//no errors

  //"output" file:
  opt_args->ofile=NULL;

  //flags:
  opt_args->asciiflag=0;	//-A
  opt_args->Bflag=0;		//-B
  opt_args->Cflag=0;		//-C
  opt_args->errflag=0;		//-e
  opt_args->Hflag=0;		//-H
  opt_args->jointflag=0;	//-j
  opt_args->Kflag=0;		//-K
  opt_args->Lflag=0;		//-L
  opt_args->Mflag=0;		//-L
  opt_args->normflag=0;		//-n
  opt_args->Nflag=0;		//-N
  opt_args->Oflag=0;		//-O
  opt_args->Pflag=0;		//-P
  opt_args->Rflag=0;		//-R
  opt_args->uflag=0;		//-u
  opt_args->Uflag=0;		//-U
  opt_args->zflag=0;		//-z

  //enumerated selections:
  opt_args->algtype=-1;		//-c
  opt_args->metrictype=0;	//-m

  //integer parameters:
  //opt_args->k=K_DEFAULT_KNN;	//-k
  //for (int j=0; optlist[j]!=NULL; j++) if (optlist[j]=='w') opt_args->k=K_DEFAULT_AGF;
  //number of trials for AGF optimal
  opt_args->nt=NT_DEFAULT;	//-q

  //floating point parameters:
  //for AGF:
  //opt_args->Wc=W_DEFAULT;	//-W
  opt_args->var[0]=-1;		//-v
  opt_args->var[1]=-1;		//-V

  //for AGF borders:
  opt_args->tol=TOL_DEFAULT;	//-t
  opt_args->n=NBORD_DEFAULT;	//-s
  opt_args->rthresh=RTHRESH_DEFAULT;	//-r
  opt_args->cl_thresh=1;		//-T

  //for clustering:
  opt_args->pmin=-1.;		//-p

  //for AGF optimal:
  opt_args->W1=-1;		//-w

  //for validation routines:
  opt_args->ftest=F_DEFAULT;	//-f
  opt_args->fflag=0;
  opt_args->div=-1;		//-d

  //string parameters:
  opt_args->normfile=NULL;	//-a

  //pre-processing:
  opt_args->svd=-1;		//-S
  opt_args->selectflag=0;	//-F

  while ((c = getopt(argc, argv, optlist)) != -1) {
    switch (c) {
      case ('a'):
             opt_args->normfile=new char[strlen(optarg)+1];
             strcpy(opt_args->normfile, optarg);
	     break;
      case ('A'):
             opt_args->asciiflag=1;
	     break;
      case ('c'):
             ncon=sscanf(optarg, "%d", &opt_args->algtype);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -c %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
             }
	     break;
      case ('B'):
             opt_args->Bflag=1;
	     break;
      case ('C'):
             opt_args->Cflag=1;
	     break;
      case ('d'):
             ncon=sscanf(optarg, "%d", &opt_args->div);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -d %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
             }
	     break;
      case ('e'):
             opt_args->errflag=1;
	     break;
      case ('f'):
             ncon=sscanf(optarg, "%f", &opt_args->ftest);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -f %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->ftest <= 0.) {
               fprintf(stderr, "Warning: parameter ftest=%g out of range, using default\n", opt_args->ftest);
               errcode=PARAMETER_OUT_OF_RANGE;
               opt_args->ftest=F_DEFAULT;
             }
             opt_args->fflag=1;
	     break;
      case ('F'):
             opt_args->selectflag=1;
	     break;
      case ('h'):
             ncon=sscanf(optarg, "%d", &maxitr);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -h %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (maxitr <= 0.) {
               fprintf(stderr, "Warning: parameter borders_maxiter=%d out of range, using default\n", maxitr);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               agf_global_borders_maxiter=maxitr;
             }
	     break;
      case ('H'):
             opt_args->Hflag=1;
             break;
      case ('i'):
             ncon=sscanf(optarg, "%d", &maxitr);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -i %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (maxitr <= 0.) {
               fprintf(stderr, "Warning: parameter weights_maxiter=%d out of range, using default\n", maxitr);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               agf_global_weights_maxiter=maxitr;
             }
	     break;
      case ('I'):
             ncon=sscanf(optarg, "%d", &maxitr);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -I %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (maxitr <= 0.) {
               fprintf(stderr, "Warning: parameter weights_maxiter=%d out of range, using default\n", maxitr);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               agf_global_borders_maxiter=maxitr;
               agf_global_weights_maxiter=maxitr;
             }
	     break;
      case ('j'):
             opt_args->jointflag=1;
	     break;
      case ('k'):
             ncon=sscanf(optarg, "%d", &opt_args->k);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled command option argument: -k %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     }
	     break;
      case ('K'):
             opt_args->Kflag=1;
	     break;
      case ('l'):
             ncon=sscanf(optarg, "%f", &tol);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -l %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (tol <= 0.) {
               fprintf(stderr, "Warning: parameter weights_maxiter=%g out of range, using default\n", tol);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               agf_global_weights_tol=tol;
             }
	     break;
      case ('L'):
             opt_args->Lflag=1;
	     break;
      case ('m'):
             ncon=sscanf(optarg, "%d", &opt_args->metrictype);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -m %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->metrictype < 0 || opt_args->metrictype >= global_nmetric) {
               fprintf(stderr, "Warning: parameter, metric-type=%d, out of range\n", opt_args->metrictype);
               fprintf(stderr, "         using default Cartesian metric\n");
	       opt_args->metrictype=0;
               errcode=PARAMETER_OUT_OF_RANGE;
             }
	     //is it wise to set the metric right here??
	     global_metric2=global_metric2_pointer_list[opt_args->metrictype];
	     break;
      case ('M'):
             opt_args->Mflag=1;
	     break;
      case ('n'):
             opt_args->normflag=1;
	     break;
      case ('N'):
             opt_args->Nflag=1;
	     break;
      case ('o'):
             opt_args->ofile=new char[strlen(optarg)+1];
             strcpy(opt_args->ofile, optarg);
	     break;
      case ('O'):
             opt_args->Oflag=1;
	     break;
      case ('p'):
             ncon=sscanf(optarg, "%f", &opt_args->pmin);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -p %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->pmin <= 0.) {
               fprintf(stderr, "Warning: parameter P_min=%g out of range\n", opt_args->pmin);
	       opt_args->pmin=-1;
               errcode=PARAMETER_OUT_OF_RANGE;
             }
	     break;
      case ('P'):
             opt_args->Pflag=1;
	     break;
      case ('q'):
             ncon=sscanf(optarg, "%d", &opt_args->nt);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -q %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->nt < 4) {
               fprintf(stderr, "Warning: parameter nt=%d out of range\n", opt_args->nt);
               errcode=PARAMETER_OUT_OF_RANGE;
               opt_args->nt=NT_DEFAULT;
             }
	     break;
      case ('r'):
             ncon=sscanf(optarg, "%f", &opt_args->rthresh);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -r %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->rthresh <= -1. || opt_args->rthresh > 1.) {
               fprintf(stderr, "Warning: parameter r0=%g out of range, using default\n", opt_args->rthresh);
               errcode=PARAMETER_OUT_OF_RANGE;
               opt_args->rthresh=RTHRESH_DEFAULT;
             }
	     break;
      case ('R'):
             opt_args->Rflag=1;
	     break;
      case ('s'):
             ncon=sscanf(optarg, "%d", &opt_args->n);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -s %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->n <= 0) {
               fprintf(stderr, "Warning: parameter n=%d out of range, using default=%d\n", opt_args->n, opt_args->n);
               errcode=PARAMETER_OUT_OF_RANGE;
	       opt_args->n=NBORD_DEFAULT;
             }
	     break;
      case ('S'):
             ncon=sscanf(optarg, "%d", &opt_args->svd);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -S %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
             }
	     break;
      case ('t'):
             ncon=sscanf(optarg, "%g", &opt_args->tol);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -t %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->tol <= 0) {
               fprintf(stderr, "Warning: parameter tol=%g out of range, using default=%g\n", opt_args->tol, TOL_DEFAULT);
               errcode=PARAMETER_OUT_OF_RANGE;
               opt_args->tol=TOL_DEFAULT;
             }
	     break;
      case ('T'):
             ncon=sscanf(optarg, "%d", &opt_args->cl_thresh);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -T %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (opt_args->cl_thresh < 0) {
               fprintf(stderr, "Warning: parameter cl-thresh=%d out of range, using default=%d\n", opt_args->cl_thresh, 1);
               errcode=PARAMETER_OUT_OF_RANGE;
               opt_args->cl_thresh=1;
             }
	     break;
      case ('u'):
             opt_args->uflag=1;
	     break;
      case ('U'):
             opt_args->Uflag=1;
	     break;
      case ('v'):
             ncon=sscanf(optarg, "%f", &var_0);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -v %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (var_0 <= 0. && var_0!=-1) {
               fprintf(stderr, "Warning: parameter var_0=%g out of range, using default\n", var_0);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               opt_args->var[0]=var_0;
             }
	     break;
      case ('V'):
             ncon=sscanf(optarg, "%f", &var2);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -v %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (var2 <= 0. && var2 != -1) {
               fprintf(stderr, "Warning: parameter var_0=%g out of range, using default\n", var2);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               opt_args->var[1]=var2;
             }
	     break;
      case ('w'):
             ncon=sscanf(optarg, "%f", &wmax);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -w %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (wmax <= opt_args->W2) {
               fprintf(stderr, "Warning: parameter ftest=%g out of range, using default: %g\n", wmax, opt_args->W2);
               errcode=PARAMETER_OUT_OF_RANGE;
             } else {
               opt_args->W1=wmax;
             }
             break;
      case ('W'):
             ncon=sscanf(optarg, "%g", &Wc);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -W %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (Wc <= 0) {
               fprintf(stderr, "Warning: parameter Wc=%f out of range, using default=%f\n", Wc, opt_args->W2);
               errcode=PARAMETER_OUT_OF_RANGE;
	     } else {
	       opt_args->W2=Wc;
             }
	     break;
      case ('X'):
             real_a pratio;
             ncon=sscanf(optarg, "%g", &pratio);
             if (ncon != 1) {
               fprintf(stderr, "Warning: garbled option argument: -X %s", optarg);
               errcode=COMMAND_OPTION_PARSE_ERROR;
	     } else if (pratio <= 0) {
               fprintf(stderr, "Warning: parameter pratio=%f out of range, using default=%f\n", pratio, opt_args->pratio);
               errcode=PARAMETER_OUT_OF_RANGE;
	     } else {
	       opt_args->pratio=pratio;
             }
	     break;
      case ('z'):
             opt_args->zflag=1;
	     break;
      case ('?'):
             fprintf(stderr, "Unknown option: -%c -- ignored\n", optopt);
	     errcode=COMMAND_OPTION_PARSE_ERROR;
	     break;
      default:
	     fprintf(stderr, "Error parsing command line\n");
	     errcode=FATAL_COMMAND_OPTION_PARSE_ERROR;
	     break;
    }
  }

  argc-=optind;
  argv+=optind;

  return errcode;
}

//given averages and std. devs., normalize a set of vectors:
template <class real>
void norm_vec(real **mat, dim_ta D, nel_ta n, real *ave, real *std) {
  for (dim_ta j=0; j<D; j++) {
    for (nel_ta i=0; i<n; i++) mat[i][j]=(mat[i][j]-ave[j])/std[j];
  }
}

//un-normalize the vectors:
template <class real>
void unnorm_vec(real **mat, dim_ta D, nel_ta n, real *ave, real *std) {
  for (dim_ta j=0; j<D; j++) {
    for (nel_ta i=0; i<n; i++) mat[i][j]=mat[i][j]*std[j]+ave[j];
  }
}

template void norm_vec<float>(float **mat, dim_ta D, nel_ta n, float *ave, float *std);
template void norm_vec<double>(double **mat, dim_ta D, nel_ta n, double *ave, double *std);

//calculate the averages and standard deviations of a set of vectors:
template <class real>
void calc_norm(real **mat, dim_ta D, nel_ta n, real *ave, real *std) {
  real anom;		//intermediate output

  for (dim_ta i=0; i<D; i++) {
    ave[i]=0;
    for (nel_ta j=0; j<n; j++) ave[i]+=mat[j][i];
    ave[i]/=n;
    std[i]=0;
    for (nel_ta j=0; j<n; j++) {
      anom=mat[j][i]-ave[i];
      std[i]+=anom*anom;
    }
    std[i]=sqrt(std[i]/(n-1));

  }

}

template void calc_norm<float>(float **mat, dim_ta D, nel_ta n, float *ave, float *std);
template void calc_norm<double>(double **mat, dim_ta D, nel_ta n, double *ave, double *std);

//normalize a set of vectors by the std. deviations of each of the variables:
template <class real>
void norm_vec_std(real **mat1, dim_ta m, nel_ta n1, real **mat2, nel_ta n2) {
  real *ave;
  real *std;
  real anom;		//intermediate output

  //allocate space:
  ave=new real[m];
  std=new real[m];

  for (dim_ta i=0; i<m; i++) {
    ave[i]=0;
    for (nel_ta j=0; j<n1; j++) ave[i]+=mat1[j][i];
    ave[i]/=n1;
    std[i]=0;
    for (nel_ta j=0; j<n1; j++) {
      anom=mat1[j][i]-ave[i];
      std[i]+=anom*anom;
      mat1[j][i]=anom;
    }
    std[i]=sqrt(std[i]/(n1-1));

    //normalize the first matrix:
    for (nel_ta j=0; j<n1; j++) mat1[j][i]=mat1[j][i]/std[i];

    //normalize the second matrix:
    for (nel_ta j=0; j<n2; j++) mat2[j][i]=(mat2[j][i]-ave[i])/std[i];

  }

  //clean up:
  delete [] ave;
  delete [] std;

}

template void norm_vec_std<float>(float **mat1, dim_ta m, nel_ta n1, float **mat2, nel_ta n2);
template void norm_vec_std<double>(double **mat1, dim_ta m, nel_ta n1, double **mat2, nel_ta n2);

//randomizes a set of vectors:
template <class real, class cls_t>
void randomize_vec(real **mat, dim_ta m, nel_ta n, cls_t *cls) {
  int32_t *randata;
  long *index;
  gsl_rng *rann;        //GSL random number generator
  real **newmat;
  cls_t *newcls;

  //allocate space:
  randata=new int32_t[n];
  index=new long[n];
  newmat=new real *[n];
  newcls=new cls_t[n];

  //initialize random number generator:
  rann=gsl_rng_alloc(agf_gsl_rng_type);
  gsl_rng_set(rann, seed_from_clock());

  for (nel_ta i=0; i<n; i++) randata[i]=gsl_rng_get(rann);
  heapsort(randata, index, n);

  for (nel_ta i=0; i<n; i++) {
    newmat[i]=mat[index[i]];
    newcls[i]=cls[index[i]];
  }

  //its not so efficient, but it saves some trouble:
  for (nel_ta i=0; i<n; i++) {
    mat[i]=newmat[i];
    cls[i]=newcls[i];
  }

  //clean up:
  gsl_rng_free(rann);

  delete [] randata;
  delete [] index;
  delete [] newmat;
  delete [] newcls;


}

template void randomize_vec<float,cls_ta>(float **mat, dim_ta m, nel_ta n, cls_ta *cls);
template void randomize_vec<double,cls_ta>(double **mat, dim_ta m, nel_ta n, cls_ta *cls);

//sorts a set of vectors by their classes:
//
//mat is the set of vectors
//m is the dimension of each vector
//n is the number of vectors
//cl are the classes
//ncl are the number of class
//clsind is an longword array of length ncl+1 indexing into the rearranged mat
//	giving the starting point of each class
template <class real, class cls_t>
nel_ta * sort_classes(real **mat, nel_ta n, cls_t *cl, cls_t ncl) {
  real *swap;
  nel_ta *clind;
  cls_t swpcls;
  nel_ta last;

  //2013-09-18 PM: class values of -1 are allowed and will be excluded
  //from the analysis; clind[ncl] should be n, not n-1
  clind=new cls_t[ncl+1];
  clind[0]=0;
  for (cls_t i=0; i<ncl; i++) {
    while (cl[clind[i]] == i-1 && clind[i] < n) {
      clind[i]++;
    }
    for (nel_ta j=clind[i]; j<n; j++) {
      if (cl[j] == i-1) {
	swap=mat[j];
        mat[j]=mat[clind[i]];
	mat[clind[i]]=swap;
	swpcls=cl[j];
	cl[j]=cl[clind[i]];
	cl[clind[i]]=swpcls;
	do {
	  clind[i]++;
	} while (cl[clind[i]] == i-1);
	if (clind[i] >= j) j=clind[i]+1;
      }
    }
    clind[i+1]=clind[i];
  }
  clind[ncl]=n;

  return clind;
}

//lets do this the right way
//it should be an O(n) operation:
//(really, how much difference will it make?
template <class real, class cls_t>
nel_ta * sort_classes_n(real **mat, nel_ta n, cls_t *cl, cls_t ncl) {
  real *swap;
  cls_t swpcls;
  nel_ta *clind;
  nel_ta clptr[ncl];
  real **matnew;
  cls_t *clnew;

  clind=new nel_ta[ncl+1];

  //first we simply count the number of each class label:
  clind[0]=0;
  for (nel_ta i=0; i<n; i++) clind[cl[i]+1]++;
  for (cls_t i=1; i<=ncl; i++) {
    clptr[i-1]=clind[i-1];
    clind[i]+=clind[i-1];
  }

  //need to figure out a way to do it in-place:
  matnew=new real[n];
  clnew=new cls_t[n];
  for (nel_ta i=0; i<n; i++) {
    matnew[clptr[cl[i]]]=mat[i];
    clnew[clptr[cl[i]]]=cl[i];
    clptr[cl[i]]++;
  }

  //ruins the neat allocation scheme (but then so did the old algorithm)
  delete [] mat;
  mat=matnew;
  delete [] cl;
  cl=clnew;

  return clind;

}

template nel_ta * sort_classes<float,cls_ta>(float **mat, nel_ta n, cls_ta *cl, cls_ta ncl);
template nel_ta * sort_classes<double,cls_ta>(double **mat, nel_ta n, cls_ta *cl, cls_ta ncl);

template <class real, class cls_t>
real ** remove_duplicates(real **mat, dim_ta m, nel_ta n, cls_t *cls, real *wt, nel_ta &nnew) {
  long *sind;
  long ind[n];
  vector_s<real> *mat2;
  real wtnew[n];
  real **result;
  nel_ta lastind;

  //preparation:
  //-move data into a sortable array (including class values)
  for (nel_ta i=0; i<n; i++) {
    mat2[i]=new vector_s<real>(m+1);
    for (dim_ta j=0; j<m; j++) mat2[i][j]=mat[i][j];
    mat2[i][m]=cls[i];
    wtnew[i]=0;
  }

  //work, part 1: heapsort the data
  sind=heapsort(mat2, n);

  //work, part 2: group duplicates, average weights for duplicates
  nnew=1;
  lastind=0;
  wtnew[0]=wt[0];
  ind[0]=sind[0];
  for (nel_ta i=1; i<n; i++) {
    if (mat2[sind[i-1]]!=mat2[sind[i]]) {
      ind[i]=sind[i];
      nnew++;
      wtnew[i-1]/=(i-lastind);
      lastind=i-1;
    } else {
      wtnew[lastind]+=wt[i];
    }
  }
  wtnew[nnew-1]/=(nnew-lastind);

  //postprocessing: move data, including new weights, into a new array
  result=new real *[nnew];
  result[0]=new real[nnew*(m+1)];
  for (nel_ta i=0; i<nnew; i++) {
    result[i]=result[0]+i*(m+1);
    for (dim_ta j=0; j<m; j++) result[i][j]=mat[ind[i]][j];
    result[i][m]=wtnew[i];
  }

  //cleanup: delete dynamic data, return results
  delete [] mat2;

  return result;

}

      
//calculate the uncertainty coefficient:
template <class cls_t>
double uncertainty_coefficient(cls_t *truth, cls_t *ret, nel_ta n, cls_t nclt, cls_t nclr, FILE *fs) {
  nel_ta **acc_mat;		//histogram (joint probability)
  double hrt, hr, ht;		//entropy measures
  nel_ta *pt, *pr;		//row and column totals
  double uc, ucr, uct;		//uncertainty coefficient
  //cls_t nclt, nclr;		//number of classes

  /*
  for (nel_ta i=0; i<n; i++) {
    if (truth[i]>=nclt) nclt=truth[i]+1;
    if (ret[i]>=nclr) nclr=ret[i]+1;
  }
  */

  pt=new nel_ta[nclt];
  pr=new nel_ta[nclr];
  
  acc_mat=new nel_ta *[nclt];
  acc_mat[0]=new nel_ta[nclt*nclr];
  for (cls_t i=1; i<nclt; i++) acc_mat[i]=&acc_mat[0][i*nclr];
  for (long i=0; i<nclr*nclt; i++) acc_mat[0][i]=0;
  
  for (nel_ta i=0; i<n; i++) {
    acc_mat[truth[i]][ret[i]]++;
  }
  
  //calculate the uncertainty coefficient:
  hrt=0;
  hr=0;
  ht=0;
  for (cls_t i=0; i<nclr; i++) pr[i]=0;
  for (cls_t i=0; i<nclt; i++) {
    //pr[i]=0;
    pt[i]=0;
    for (cls_t j=0; j<nclr; j++) {
      pt[i]+=acc_mat[i][j];
      pr[j]+=acc_mat[i][j];
      if (acc_mat[i][j] != 0) {
        hrt-=(double) acc_mat[i][j]*log((double) acc_mat[i][j]/(double) n);
      }
    }
    ht-=(double) pt[i]*log((double) pt[i]/(double) n);
  }
  for (cls_t i=0; i<nclr; i++) {
    hr-=(double) pr[i]*log((double) pr[i]/(double) n);
  }
  
  uc=(ht-hrt+hr)/ht;
  
  if (fs != NULL) {
    fprintf(fs, "        retrieval\n");
    fprintf(fs, "truth ");
    for (cls_t i=0; i<nclr; i++) fprintf(fs, "%3d ", i);
    fprintf(fs, "  tot\n");
    for (cls_t i=0; i<nclt; i++) {
      fprintf(fs, "%3d   ", i);
      for (cls_t j=0; j<nclr; j++) {
        fprintf(fs, "%3d ", acc_mat[i][j]);
      }
      fprintf(fs, " %4d\n", pt[i]);
    }
    fprintf(fs, "\ntotal ");
    for (cls_t i=0; i<nclr; i++) fprintf(fs, "%3d ", pr[i]);
    fprintf(fs, "\n\n");
    fprintf(fs, "U C (reverse): %f\n", (hr-hrt+ht)/hr);
    fprintf(fs, "U C (total): %f\n", 2*(hr+ht-hrt)/(hr+ht));
    fprintf(fs, "Uncertainty coefficient: %f\n", uc);
  }
  
  //clean up:
  delete [] acc_mat[0];
  delete [] acc_mat;
  delete [] pt;
  delete [] pr;
  
  return uc;
  
}

template <class cls_t>
double uncertainty_coefficient(cls_t *truth, cls_t *ret, nel_ta n, cls_t ncl, FILE *fs) {
  return uncertainty_coefficient(truth, ret, n, ncl, ncl, fs);
}

template double uncertainty_coefficient<cls_ta>(cls_ta *, cls_ta *, nel_ta, cls_ta, FILE *fs);

#define NCONHIST 10

//test the accuracy of the confidence ratings:
template <class real, class cls_t>
void check_confidence(cls_t *truth, cls_t *cls, real *con, cls_t ncls, nel_ta n, FILE *fs) {
  nel_ta total[NCONHIST];
  nel_ta ntrue[NCONHIST];
  real conave[NCONHIST];
  nel_ta ind;
  real acc;

  for (nel_ta i=0; i<NCONHIST; i++) {
    total[i]=0;
    ntrue[i]=0;
    conave[i]=0;
  }

  for (nel_ta i=0; i<n; i++) {
    ind=(nel_ta) (con[i]*NCONHIST);
    if (ind == NCONHIST) ind--;
    total[ind]++;
    if (cls[i] == truth[i]) ntrue[ind]++;
    conave[ind] += con[i];
  }

  fprintf(fs, "\nAccuracy vs. confidence:\n\n");
  for (nel_ta i=0; i<NCONHIST; i++) {
    acc=(real) ntrue[i]/(real) total[i];
    //printf("%ld %ld %f\n", ntrue[i], total[i], acc);
    //fprintf(fs, "%10.3f %10.3f\n", (i+0.5)/NCONHIST, (ncls*acc-1.)/(ncls-1.));
    fprintf(fs, "%10.3f %10.3f\n", conave[i]/total[i], (ncls*acc-1.)/(ncls-1.));
  }
  fprintf(fs, "\n");

}

template void check_confidence<float,cls_ta>(cls_ta *truth, cls_ta *cls, float *con, cls_ta ncls, nel_ta n, FILE *fs);
template void check_confidence<double,cls_ta>(cls_ta *truth, cls_ta *cls, double *con, cls_ta ncls, nel_ta n, FILE *fs);

}