[go: up one dir, main page]

Menu

[r994]: / libmona / src / kmeans.cc  Maximize  Restore  History

Download this file

192 lines (156 with data), 5.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/* -*- mona-c++ -*-
* Copyright (c) Leipzig, Madrid 2004 - 2008
* Max-Planck-Institute for Human Cognitive and Brain Science
* Max-Planck-Institute for Evolutionary Anthropology
* BIT, ETSI Telecomunicacion, UPM
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <libmona/monaIOStream.hh>
#include <map>
#include <stdexcept>
#include <cmath>
typedef std::pair<int, int> CClassRange;
typedef std::map<double, CClassRange> CClassMap;
CClassMap kmeans(size_t classes, const std::vector<double>& histo, int max_iter, bool even_start = true)
{
/** now kmeans */
std::vector<double> center(classes);
if (even_start) {
double steps = histo.size() / double(classes);
for (size_t i = 0; i < classes; ++i)
center[i] = (i+1) * steps;
}else{
size_t intensity = 0;
double g_size = 1.0 / classes;
size_t i;
for (i = 0; (i < classes - 1) && (intensity < histo.size()); ++i) {
double val = 0.0;
double l_size = 0;
while (l_size < g_size && intensity < histo.size()) {
l_size += histo[intensity];
val += histo[intensity] * intensity;
++intensity;
}
center[i] = val / l_size;
mona::cvmsg() << "center[" << i << "] = " << center[i] << "\n";
}
{
double val = 0.0;
double l_size = 0;
while (intensity < histo.size()) {
l_size += histo[intensity];
val += histo[intensity] * intensity;
++intensity;
}
center[i] = val / l_size;
mona::cvmsg() << "center[" << i << "] = " << center[i] << "\n";
}
}
std::vector<size_t> next_center(histo.size());
double diff = 10.0;
int iter = 0;
while (diff > 1.0 && iter++ < max_iter) {
mona::cvdebug() << "iterate classes:" << iter << "\n";
diff = 0.0;
for (size_t i = 0; i < next_center.size(); ++i) {
next_center[i] = 0;
double dist = fabs(i - center[0]);
for (size_t k = 1; k < classes; ++k) {
double ndist = fabs(i - center[k]);
if (ndist < dist) {
dist = ndist;
next_center[i] = k;
}
}
}
for (size_t i = 0; i < classes; ++i) {
double xnorm = 0.0;
double sum = 0.0;
for (size_t j = 0; j < next_center.size(); j++)
if (next_center[j] == i) {
xnorm += histo[j];
sum += j * histo[j];
}
if (xnorm > 0)
sum /= xnorm;
else
sum = 0;
double dx = sum - center[i];
center[i] = sum;
diff += dx * dx;
mona::cvdebug() << "center[" << i << "] = " << center[i] << "\n";
}
mona::cvdebug() << "diff = " << diff << std::endl;
}
if (next_center[0] != 0)
throw std::runtime_error("First value not next to the first center - this shouldn't happen: Bye bye\n");
CClassMap result;
CClassRange range;
range.first = range.second = 0;
for (size_t i = 0; i < classes; ++i) {
while (next_center[range.second] == i)
++range.second;
result[center[i]] = range;
range.first = range.second;
}
return result;
}
int main(int argc, const char *args[])
{
int classes;
int max_iter;
bool even_start;
popt::COptions options;
options.push_back(popt::option( classes, "classes", 'n', "number of classes to partition into","10"));
options.push_back(popt::option( max_iter, "max-iter", 'm', "maximum number of iterations", "100"));
options.push_back(popt::option( even_start, "even-start", 'e', "start with centers evenly distributed over the histogram"));
try {
std::vector<std::string> non_args;
popt::parse_options(argc, args, options, non_args);
if (!non_args.empty()) {
mona::cverr() << "some arguments where not recogniced\n";
mona::cverr() << "try 'eva-kmeans --help' for help\n";
return -1;
}
std::vector<double> histo;
// read input data
while (std::cin.good()) {
float val;
std::cin >> val;
histo.push_back(val);
}
CClassMap result = kmeans(classes, histo, max_iter, even_start);
std::cout << result.size() << "\n";
for (CClassMap::const_iterator i = result.begin(); i != result.end(); ++i) {
std::cout << i->first << " " << i->second.first << " " << i->second.second <<"\n";
}
return 0;
}
catch (const std::runtime_error &e){
std::cerr << args[0] << " runtime: " << e.what() << std::endl;
}
catch (const std::invalid_argument &e){
std::cerr << args[0] << " error: " << e.what() << std::endl;
}
catch (const std::exception& e){
std::cerr << args[0] << " error: " << e.what() << std::endl;
}
catch (...){
std::cerr << args[0] << " unknown exception" << std::endl;
}
return EXIT_FAILURE;
}