/* -*- mona-c++ -*-
* Copyright (c) Leipzig, Madrid 2004 - 2008
* Max-Planck-Institute for Human Cognitive and Brain Science
* Max-Planck-Institute for Evolutionary Anthropology
* BIT, ETSI Telecomunicacion, UPM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <stdexcept>
#include <cmath>
#include <cassert>
#include <numeric>
#include <libmona/monaIOStream.hh>
#include <libmona/probmapio.hh>
using namespace mona;
using namespace std;
typedef pair<int, int> CClassRange;
typedef map<double, CClassRange> CClassMap;
struct CForwardTransform {
CForwardTransform(float minh, float maxh):
_M_minh(minh),
_M_div(maxh - minh + 1.0)
{
}
float operator() (float x) const
{
return (x - _M_minh) / _M_div;
}
private:
float _M_minh;
float _M_div;
};
struct CBackTransform {
CBackTransform(float minh, float maxh):
_M_minh(minh),
_M_div(maxh - minh + 1.0)
{
}
float operator() (float x) const
{
return x * _M_div + _M_minh;
}
private:
float _M_minh;
float _M_div;
};
typedef vector<double> CDoubleVector;
typedef vector<CDoubleVector> CProbabilityVector;
class CCMeans {
public:
CCMeans(double k, double epsilon, bool start_even);
CProbabilityVector operator()(CDoubleVector const& histogram, CDoubleVector& class_centers, bool initialise) const;
private:
void initialise_cc(CDoubleVector& class_centers, CDoubleVector const& histogram) const;
void evaluate_probabilities(CDoubleVector const& classes, size_t nvalues , CProbabilityVector& pv)const;
double update_class_centers(CDoubleVector& center, CDoubleVector const& histo, CProbabilityVector const& pv)const;
double _M_k;
double _M_epsilon;
bool _M_even_start;
};
CCMeans::CCMeans(double k, double epsilon, bool start_even):
_M_k(k),
_M_epsilon(epsilon),
_M_even_start(start_even)
{
}
void CCMeans::evaluate_probabilities(CDoubleVector const& classes, size_t nvalues , CProbabilityVector& pv)const
{
vector<float> b(classes.size());
for (size_t i = 0; i < nvalues; ++i) {
double const x = double(i)/ nvalues;
double sum = 0.0;
bool skip_step = false;
for (size_t j = 0; j < classes.size(); ++j) {
double val = x - classes[j];
val *= val;
if (val == 0.0) {
for (size_t k = 0; k < classes.size(); ++k) {
pv[k][i] = j != k ? 0 : 1;
}
skip_step = true;
break;
}
b[j] = val;
sum += 1.0/val;
}
if (!skip_step)
for (size_t j = 0; j < classes.size(); ++j) {
pv[j][i] = 1.0 / (sum * b[j]);
}
}
}
double CCMeans::update_class_centers(CDoubleVector& class_center, CDoubleVector const& histo, CProbabilityVector const& pv)const
{
double residuum = 0.0;
const size_t nvalues = histo.size();
for (size_t i = 0; i < class_center.size(); ++i) {
float cc = class_center[i];
float loc_res = 100;
// try a fix-point iteration for the new class center
while (loc_res > _M_epsilon) {
float cc_old = cc;
CDoubleVector::const_iterator ihelp = histo.begin();
CDoubleVector::const_iterator iprob = pv[i].begin();
CDoubleVector::const_iterator eprob = pv[i].end();
double sum_prob = 0.0;
double sum_weight = 0.0;
for (size_t ix = 0; ix < histo.size(); ++ix, ++iprob, ++ihelp) {
if (*iprob > 0.0f) {
const double x = (double) ix / nvalues;
/*
const double val = (x - cc);
const double val2 = val * val * _M_k;
const double val2mexpval2 = val2 * exp(-val2) * *iprob;
*/
sum_prob += *ihelp * *iprob;
sum_weight += *ihelp * *iprob * x;
}
}
// how do i deal with the (very unprobable case sum_prob==0.0?)
if (sum_prob != 0.0) {
cc += 0.1 * (sum_weight / sum_prob - cc);
} else {
cvwarn() << "class[" << i << "] has no probable members, keeping old value:" <<
sum_prob << ":" <<sum_weight <<endl;
}
loc_res = cc > cc_old ? cc - cc_old : cc_old - cc;
cvdebug() << sum_weight / sum_prob << ": res = " << loc_res << " : cc = "<< cc <<"\n";
}// end while (loc_res > 0.01)
cverb << "\n";
float delta = cc - class_center[i];
residuum += delta * delta;
class_center[i] = cc;
}// end update class centers
return sqrt(residuum);
}
CProbabilityVector CCMeans::operator()(CDoubleVector const& histogram, CDoubleVector& class_centers, bool initialise) const
{
if (initialise)
initialise_cc(class_centers, histogram);
CProbabilityVector pv(class_centers.size(), histogram);
for (size_t i = 0; i < class_centers.size(); ++i)
class_centers[i] /= histogram.size();
const size_t size = histogram.size();
do {
evaluate_probabilities(class_centers, size, pv);
cvmsg() << "Class centers: ";
for (CDoubleVector::const_iterator i = class_centers.begin(), e = class_centers.end();
i != e; ++i) {
cverb << *i << ", ";
}
cverb << '\n';
}while ( update_class_centers(class_centers, histogram, pv) > _M_epsilon);
for (CDoubleVector::iterator i = class_centers.begin(), e = class_centers.end(); i != e; ++i)
*i *= size;
return pv;
}
void CCMeans::initialise_cc(CDoubleVector& class_centers, CDoubleVector const& histogram)const
{
double const size = histogram.size();
double const step = size / double(class_centers.size() + 1);
if (_M_even_start) {
for (size_t i = 0; i < class_centers.size(); ++i)
class_centers[i] = i * step;
}else{
class_centers[0] = 0.0;
CDoubleVector::const_iterator hi = histogram.begin();
CDoubleVector::const_iterator const he = histogram.end();
++hi;
double const thresh = accumulate(hi, he, 0.0) / size;
float hit = 0.0;
size_t i = 1;
float val = step;
while (i < class_centers.size() && hi != he) {
hit += *hi;
if (hit > thresh) {
class_centers[i++] = val;
hit -= thresh;
}
++hi;
val += step;
}
}
}
int main(int argc, const char *args[])
{
int nclasses;
int max_iter;
bool even_start;
double epsilon = 0.00001;
double k = 32.0;
CDoubleVector class_centers;
popt::COptions options;
options.push_back(popt::option( nclasses, "nclasses", 'n',
"number of classes to partition into","3"));
options.push_back(popt::option( max_iter, "max-iter", 'm',
"maximum number of iterations", "100"));
options.push_back(popt::option( even_start, "even-start", 'e', "start with centers evenly distributed over the histogram"));
options.push_back(popt::option( class_centers, "class-centers", 'c', "initial class centers", ""));
options.push_back(popt::option( k, "variance", 'k', "variance parameter", "32"));
try {
vector<string> non_args;
popt::parse_options(argc, args, options, non_args);
if (!non_args.empty()) {
cverr() << "some arguments where not recogniced\n";
cverr() << "try 'eva-kmeans --help' for help\n";
return -1;
}
vector<double> histo;
// read input data
while (cin.good()) {
float val;
cin >> val;
histo.push_back(val);
}
cvmsg() << "got a histogram with " << histo.size() << " values\n";
bool initialise = false;
if (class_centers.empty()) {
class_centers.resize(nclasses);
initialise = true;
}
CCMeans cmeans(k, even_start, epsilon);
CProbabilityVector pv = cmeans(histo, class_centers, initialise);
if (!save_probability_map("-", pv)) {
cverr() << "runtime: unable to save probability map\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
catch (const runtime_error &e){
cerr << args[0] << " runtime: " << e.what() << endl;
}
catch (const invalid_argument &e){
cerr << args[0] << " error: " << e.what() << endl;
}
catch (const exception& e){
cerr << args[0] << " error: " << e.what() << endl;
}
catch (...){
cerr << args[0] << " unknown exception" << endl;
}
return EXIT_FAILURE;
}