/* -*- mona-c++ -*-
* Copyright (c) Leipzig, Madrid 2004 - 2008
* Max-Planck-Institute for Human Cognitive and Brain Science
* Max-Planck-Institute for Evolutionary Anthropology
* BIT, ETSI Telecomunicacion, UPM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdexcept>
#include <cmath>
#include <cassert>
#include <numeric>
#ifdef HAVE_BLAS
extern "C" {
#include <cblas.h>
}
#endif
#include <boost/lambda/lambda.hpp>
#include <libmona/monaIOStream.hh>
#include <libmona/probmapio.hh>
using namespace mona;
using namespace std;
using namespace boost::lambda;
typedef pair<int, int> CClassRange;
typedef map<double, CClassRange> CClassMap;
struct CForwardTransform {
CForwardTransform(float minh, float maxh):
_M_minh(minh),
_M_div(maxh - minh + 1.0)
{
}
float operator() (float x) const
{
return (x - _M_minh) / _M_div;
}
private:
float _M_minh;
float _M_div;
};
struct CBackTransform {
CBackTransform(float minh, float maxh):
_M_minh(minh),
_M_div(maxh - minh + 1.0)
{
}
float operator() (float x) const
{
return x * _M_div + _M_minh;
}
private:
float _M_minh;
float _M_div;
};
typedef vector<double> CDoubleVector;
typedef vector<CDoubleVector> CProbabilityVector;
class CCMeans {
public:
CCMeans(double k, double epsilon, bool start_even);
CProbabilityVector operator()(CDoubleVector const& histogram,
CDoubleVector& class_centers, bool initialise, bool auto_k) const;
private:
void initialise_cc(CDoubleVector& class_centers, CDoubleVector const& histogram) const;
void evaluate_probabilities(CDoubleVector const& classes, size_t nvalues , CProbabilityVector& pv, double k)const;
double update_class_centers(CDoubleVector& center, CDoubleVector const& histo, CDoubleVector const& shisto,
CProbabilityVector const& pv)const;
double adjust_k(CDoubleVector const& class_centers, CDoubleVector const& histogram, CProbabilityVector const & pv)const;
double _M_k;
double _M_epsilon;
bool _M_even_start;
};
CCMeans::CCMeans(double k, double epsilon, bool start_even):
_M_k(k),
_M_epsilon(epsilon),
_M_even_start(start_even)
{
}
void CCMeans::evaluate_probabilities(CDoubleVector const& classes, size_t nvalues , CProbabilityVector& pv, double k)const
{
// cvmsg() << "evaluate_probabilities:" << k << "\n";
for (size_t i = 0; i < nvalues; ++i) {
double const x = double(i)/ nvalues;
double sum = 0.0;
for (size_t j = 0; j < classes.size(); ++j) {
double val = x - classes[j];
val = exp(- (val * val) / k);
pv[j][i] = val;
sum += val;
}
assert(sum != 0.0);
for (size_t j = 0; j < classes.size(); ++j)
pv[j][i] /= sum;
}
}
double CCMeans::update_class_centers(CDoubleVector& class_center, CDoubleVector const& histo,
CDoubleVector const& shisto,CProbabilityVector const& pv)const
{
double residuum = 0.0;
for (size_t i = 0; i < class_center.size(); ++i) {
float cc = class_center[i];
//float loc_res = 100;
//while (loc_res > _M_epsilon) {
// float cc_old = cc;
#ifdef HAVE_BLAS
double sum_prob = cblas_ddot(histo.size(), &histo[0], 1, &pv[i][0], 1);
double sum_weight = cblas_ddot(histo.size(), &shisto[0], 1, &pv[i][0], 1);
#else
CDoubleVector::const_iterator ihelp = histo.begin();
CDoubleVector::const_iterator shelp = shisto.begin();
CDoubleVector::const_iterator iprob = pv[i].begin();
CDoubleVector::const_iterator eprob = pv[i].end();
double sum_prob = 0.0;
double sum_weight = 0.0;
for (size_t ix = 0; ix < histo.size(); ++ix, ++iprob, ++ihelp, ++shelp) {
if (*iprob > 0.0f) {
sum_prob += *iprob * *ihelp;
sum_weight += *iprob * *shelp;
}
}
#endif
if (sum_prob != 0.0) // move slowly in the direction of new center
cc += 0.5 * (sum_weight / sum_prob - cc);
else {
cvwarn() << "class[" << i << "] has no probable members, keeping old value:" <<
sum_prob << ":" <<sum_weight <<endl;
}
// loc_res = cc > cc_old ? cc - cc_old : cc_old - cc;
// cvdebug() << "res = " << loc_res << " : cc = "<< cc <<"\n";
//}
double delta = cc - class_center[i];
residuum += delta * delta;
class_center[i] = cc;
}// end update class centers
return sqrt(residuum);
}
double CCMeans::adjust_k(CDoubleVector const& class_centers, CDoubleVector const& histogram, CProbabilityVector const & pv)const
{
CDoubleVector cc(class_centers.size());
transform(class_centers.begin(), class_centers.end(), cc.begin(), _1 * histogram.size());
// evaluate best mapping of classes based on maximum probability
vector<int> classmap(histogram.size(), 0);
vector<double> classprob(pv[0]);
for (size_t i = 1; i < cc.size(); ++i) {
CDoubleVector::const_iterator iprob = pv[i].begin();
CDoubleVector::const_iterator eprob = pv[i].end();
vector<int>::iterator cmi = classmap.begin();
vector<double>::iterator cpi = classprob.begin();
while (iprob != eprob) {
if (*cpi < *iprob) {
*cpi = *iprob;
*cmi = i;
}
++iprob;
++cpi;
++cmi;
}
}
double sum = 0.0;
double sum2 = 0.0;
double n = 0.0;
for (size_t i = 1; i < histogram.size(); ++i) {
double delta = i - cc[classmap[i]];
sum += delta * histogram[i];
sum2 += delta * delta * histogram[i];
n += histogram[i];
}
const double avg = sum / n;
float hsize = histogram.size() - 1;
double new_k = 2 * ((sum2 - avg * sum) / (n-1)) / (hsize * hsize);
return new_k > 0.001 ? new_k : 0.001;
}
CProbabilityVector CCMeans::operator()(CDoubleVector const& histogram, CDoubleVector& class_centers, bool initialise, bool auto_k) const
{
if (initialise)
initialise_cc(class_centers, histogram);
CProbabilityVector pv(class_centers.size(), histogram);
CDoubleVector scale_histo(histogram);
const double dx = 1.0 / histogram.size();
double x = 0.0;
for (size_t i = 0; i < histogram.size(); ++i, x+=dx)
scale_histo[i] *= x;
for (size_t i = 0; i < class_centers.size(); ++i)
if (class_centers[i] > 1.0)
class_centers[i] /= histogram.size();
const size_t size = histogram.size();
double k = _M_k;
bool cont = true;
while (cont) {
evaluate_probabilities(class_centers, size, pv, k);
cvmsg() << "Class centers: ";
for (CDoubleVector::const_iterator i = class_centers.begin(), e = class_centers.end();
i != e; ++i) {
cverb << *i << ", ";
}
cont = update_class_centers(class_centers, histogram, scale_histo, pv) > _M_epsilon;
if (auto_k) {
k = adjust_k(class_centers, histogram, pv);
cvmsg() << "k = " << k;
}
cvmsg() << '\n';
};
for (CDoubleVector::iterator i = class_centers.begin(), e = class_centers.end(); i != e; ++i)
*i *= size;
return pv;
}
void CCMeans::initialise_cc(CDoubleVector& class_centers, CDoubleVector const& histogram)const
{
double const size = histogram.size();
double const step = size / double(class_centers.size() + 1);
if (_M_even_start) {
for (size_t i = 0; i < class_centers.size(); ++i)
class_centers[i] = i * step;
}else{
class_centers[0] = 0.0;
CDoubleVector::const_iterator hi = histogram.begin();
CDoubleVector::const_iterator const he = histogram.end();
++hi;
double const thresh = accumulate(hi, he, 0.0) / size;
float hit = 0.0;
size_t i = 1;
float val = step;
while (i < class_centers.size() && hi != he) {
hit += *hi;
if (hit > thresh) {
class_centers[i++] = val;
hit -= thresh;
}
++hi;
val += step;
}
}
}
void test(double k, bool auto_k)
{
const size_t Nh = 1024;
const size_t Nc = 3;
cvdebug() << "k = " << k << "\n";
CDoubleVector class_centers(Nc);
float cstep = 1.0 / float(Nc);
cvdebug() << "class centers: ";
for (size_t i =0; i < Nc; ++i) {
class_centers[i] = i * cstep + cstep / 4.0;
cverb << class_centers[i] << ", ";
}
cverb << "\n";
CDoubleVector histogram(Nh);
CProbabilityVector pv(class_centers.size(), histogram);
for (size_t i = 0; i < Nh; ++i) {
float h = 0;
float x = float(i)/Nh;
for (size_t c = 0; c < Nc; ++c) {
float delta = class_centers[c] - x;
delta *= delta;
delta /= k;
float val = exp(-delta);
pv[c][i] = val;
cout << pv[c][i] << " ";
h += val;
histogram[i] += 1024 * val;
}
cout << histogram[i] << " ";
for (size_t c = 0; c < Nc; ++c) {
pv[c][i] /= h;
}
cout << "\n";
}
CDoubleVector eval_class_centers(Nc);
for (size_t i = 0; i < Nc; ++i)
eval_class_centers[i] = i * cstep;
CCMeans cmeans(k, false, 0.00001);
CProbabilityVector eval_pv = cmeans(histogram, eval_class_centers, false, auto_k);
for (size_t i = 0; i < Nc; ++i) {
if (fabs(eval_class_centers[i] - Nh * class_centers[i]) > 0.5) {
cverr() << i << ": " << eval_class_centers[i] << " vs." << Nh * class_centers[i] << "\n";
}
}
}
int main(int argc, const char *args[])
{
int nclasses;
int max_iter;
bool even_start = false;
bool auto_k = false;
bool cut_histo = false;
double epsilon = 0.00001;
double k = 1.0;
bool self_test = false;
CDoubleVector class_centers;
popt::COptions options;
options.push_back(popt::option( nclasses, "nclasses", 'n', "number of classes to partition into","3"));
options.push_back(popt::option( max_iter, "max-iter", 'm', "maximum number of iterations", "100"));
options.push_back(popt::option( even_start, "even-start", 'e', "start with centers evenly distributed over the histogram"));
options.push_back(popt::option( class_centers, "class-centers", 'c', "initial class centers", ""));
options.push_back(popt::option( auto_k, "auto", 'a', "atomatic adaption of variance (experimental)"));
options.push_back(popt::option( cut_histo,"cut-histo", 't', "cut empty histogram at the end "));
options.push_back(popt::option( k, "variance", 'k', "variance parameter", "256"));
options.push_back(popt::option( self_test, "self-test", 0, "run self test"));
try {
vector<string> non_args;
popt::parse_options(argc, args, options, non_args);
if (!non_args.empty()) {
cverr() << "some arguments where not recogniced\n";
cverr() << "try 'eva-kmeans --help' for help\n";
return -1;
}
if (self_test) {
test(k, auto_k);
return 0;
}
vector<double> histo;
// read input data
size_t sig_size_c = 0;
size_t sig_size = 0;
while (cin.good()) {
float val;
cin >> val;
histo.push_back(val);
++sig_size_c;
if (val > 0)
sig_size = sig_size_c;
}
if (sig_size < sig_size_c && cut_histo)
histo.resize(sig_size);
cvmsg() << "got a histogram with " << histo.size() << " values\n";
bool initialise = false;
if (class_centers.empty()) {
class_centers.resize(nclasses);
initialise = true;
}
CCMeans cmeans(k, even_start, epsilon);
CProbabilityVector pv = cmeans(histo, class_centers, initialise, auto_k);
if (!save_probability_map("-", pv)) {
cverr() << "runtime: unable to save probability map\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
catch (const runtime_error &e){
cerr << args[0] << " runtime: " << e.what() << endl;
}
catch (const invalid_argument &e){
cerr << args[0] << " error: " << e.what() << endl;
}
catch (const exception& e){
cerr << args[0] << " error: " << e.what() << endl;
}
catch (...){
cerr << args[0] << " unknown exception" << endl;
}
return EXIT_FAILURE;
}