/* -*- mona-c++ -*-
* Copyright (c) Leipzig, Madrid 2004 - 2008
* Max-Planck-Institute for Human Cognitive and Brain Science
* Max-Planck-Institute for Evolutionary Anthropology
* BIT, ETSI Telecomunicacion, UPM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <libmona/monaIOStream.hh>
#include <map>
#include <stdexcept>
#include <cmath>
typedef std::pair<int, int> CClassRange;
typedef std::map<double, CClassRange> CClassMap;
CClassMap kmeans(size_t classes, const std::vector<double>& histo, int max_iter, bool even_start = true)
{
/** now kmeans */
std::vector<double> center(classes);
if (even_start) {
double steps = histo.size() / double(classes);
for (size_t i = 0; i < classes; ++i)
center[i] = (i+1) * steps;
}else{
size_t intensity = 0;
double g_size = 1.0 / classes;
size_t i;
for (i = 0; (i < classes - 1) && (intensity < histo.size()); ++i) {
double val = 0.0;
double l_size = 0;
while (l_size < g_size && intensity < histo.size()) {
l_size += histo[intensity];
val += histo[intensity] * intensity;
++intensity;
}
center[i] = val / l_size;
mona::cvmsg() << "center[" << i << "] = " << center[i] << "\n";
}
{
double val = 0.0;
double l_size = 0;
while (intensity < histo.size()) {
l_size += histo[intensity];
val += histo[intensity] * intensity;
++intensity;
}
center[i] = val / l_size;
mona::cvmsg() << "center[" << i << "] = " << center[i] << "\n";
}
}
std::vector<size_t> next_center(histo.size());
double diff = 10.0;
int iter = 0;
while (diff > 1.0 && iter++ < max_iter) {
mona::cvdebug() << "iterate classes:" << iter << "\n";
diff = 0.0;
for (size_t i = 0; i < next_center.size(); ++i) {
next_center[i] = 0;
double dist = fabs(i - center[0]);
for (size_t k = 1; k < classes; ++k) {
double ndist = fabs(i - center[k]);
if (ndist < dist) {
dist = ndist;
next_center[i] = k;
}
}
}
for (size_t i = 0; i < classes; ++i) {
double xnorm = 0.0;
double sum = 0.0;
for (size_t j = 0; j < next_center.size(); j++)
if (next_center[j] == i) {
xnorm += histo[j];
sum += j * histo[j];
}
if (xnorm > 0)
sum /= xnorm;
else
sum = 0;
double dx = sum - center[i];
center[i] = sum;
diff += dx * dx;
mona::cvdebug() << "center[" << i << "] = " << center[i] << "\n";
}
mona::cvdebug() << "diff = " << diff << std::endl;
}
if (next_center[0] != 0)
throw std::runtime_error("First value not next to the first center - this shouldn't happen: Bye bye\n");
CClassMap result;
CClassRange range;
range.first = range.second = 0;
for (size_t i = 0; i < classes; ++i) {
while (next_center[range.second] == i)
++range.second;
result[center[i]] = range;
range.first = range.second;
}
return result;
}
int main(int argc, const char *args[])
{
int classes;
int max_iter;
bool even_start;
popt::COptions options;
options.push_back(popt::option( classes, "classes", 'n', "number of classes to partition into","10"));
options.push_back(popt::option( max_iter, "max-iter", 'm', "maximum number of iterations", "100"));
options.push_back(popt::option( even_start, "even-start", 'e', "start with centers evenly distributed over the histogram"));
try {
std::vector<std::string> non_args;
popt::parse_options(argc, args, options, non_args);
if (!non_args.empty()) {
mona::cverr() << "some arguments where not recogniced\n";
mona::cverr() << "try 'eva-kmeans --help' for help\n";
return -1;
}
std::vector<double> histo;
// read input data
while (std::cin.good()) {
float val;
std::cin >> val;
histo.push_back(val);
}
CClassMap result = kmeans(classes, histo, max_iter, even_start);
std::cout << result.size() << "\n";
for (CClassMap::const_iterator i = result.begin(); i != result.end(); ++i) {
std::cout << i->first << " " << i->second.first << " " << i->second.second <<"\n";
}
return 0;
}
catch (const std::runtime_error &e){
std::cerr << args[0] << " runtime: " << e.what() << std::endl;
}
catch (const std::invalid_argument &e){
std::cerr << args[0] << " error: " << e.what() << std::endl;
}
catch (const std::exception& e){
std::cerr << args[0] << " error: " << e.what() << std::endl;
}
catch (...){
std::cerr << args[0] << " unknown exception" << std::endl;
}
return EXIT_FAILURE;
}