00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef FSMAPREDUCE_H
00018 #define FSMAPREDUCE_H
00019
00020 #include <iostream>
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <mpi.h>
00024 #include <string.h>
00025 #include <vector>
00026 #include <map>
00027
00028 using namespace std;
00029
00031
00034 template<class T>
00035 class MaPI_FSMapReduce
00036 {
00037 public:
00038
00040
00041
00042
00043
00044
00045
00046
00047
00048 void init(int argc, char **argv, string (*) (string,T), T object);
00049
00051
00052
00053
00054
00055
00056
00057
00058
00059 void init(int argc, char **argv, vector< string (*) (string,T) > functions, T object);
00060
00062
00063
00064
00065
00066
00067
00068 vector<string> * mapper(string (*map) (string,T), vector<string>* inputs);
00069
00071
00072 vector<string> * mapreduce (string (*map) (string,T), vector<string> * (*reduce) (vector<string>*,T), vector<string> * input);
00073
00075
00076 void finalize();
00077
00079
00080 int mapperSize()
00081 { return (MRSize-1); }
00082
00083 private:
00084
00085 T shared;
00086 vector< string (*) (string,T) > functionsT;
00087
00088 void applyFunction();
00089
00090 int MRSize,MRRank;
00091 MPI_Status MRStat;
00092 };
00093
00094
00095
00096
00097
00098 template<class T>
00099 void MaPI_FSMapReduce<T>::init(int argc, char **argv, string (*function) (string,T), T object)
00100 {
00101 MPI_Init(&argc,&argv);
00102 MPI_Comm_size(MPI_COMM_WORLD,&MRSize);
00103 MPI_Comm_rank(MPI_COMM_WORLD,&MRRank);
00104 functionsT.push_back(function);
00105 shared = object;
00106 if (MRRank!=0) applyFunction();
00107 }
00108
00109 template<class T>
00110 void MaPI_FSMapReduce<T>::init(int argc, char **argv, vector< string (*) (string,T) > functions, T object)
00111 {
00112 MPI_Init(&argc,&argv);
00113 MPI_Comm_size(MPI_COMM_WORLD,&MRSize);
00114 MPI_Comm_rank(MPI_COMM_WORLD,&MRRank);
00115 functionsT = functions;
00116 shared = object;
00117 if (MRRank!=0) applyFunction();
00118 }
00119
00120 template<class T>
00121 void MaPI_FSMapReduce<T>::applyFunction()
00122 {
00123 while (true)
00124 {
00125 int id_func;
00126 MPI_Recv(&id_func, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &MRStat);
00127 if (id_func == -1) { MPI_Finalize(); exit(0); }
00128
00129 int input_size;
00130 MPI_Recv(&input_size, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &MRStat);
00131
00132 char input[input_size];
00133 MPI_Recv(&input, input_size, MPI_CHAR, 0, 1, MPI_COMM_WORLD, &MRStat);
00134 if (id_func >= functionsT.size()) { cout << "\nERRO: overflow id_func\n"; exit(1); }
00135
00136 string _output = input[0] == '_' ? "_" : functionsT[id_func](string(input),shared);
00137 int output_size = _output.size() + 1;
00138
00139 char * output = (char*) _output.c_str();
00140 MPI_Send(&output_size, 1, MPI_INT, 0, 1, MPI_COMM_WORLD);
00141 MPI_Send(output, output_size, MPI_CHAR, 0, 1, MPI_COMM_WORLD);
00142 }
00143 }
00144
00145 template<class T>
00146 vector<string> * MaPI_FSMapReduce<T>::mapper(string (*map) (string,T), vector<string>* inputs)
00147 {
00148 vector<string> * outputs = new vector<string>;
00149
00150 int numMapProcs = MRSize-1;
00151 int numInputs = inputs->size();
00152
00153
00154 int id_func = 0;
00155 while (map != functionsT[id_func]) id_func++;
00156
00157
00158 if (numInputs <= numMapProcs)
00159 {
00160
00161
00162 for (int i = 0; i < numInputs; i++)
00163 {
00164
00165 int stsize = inputs->at(i).size() + 1;
00166 char st[stsize];
00167 strcpy(st,inputs->at(i).c_str());
00168
00169 MPI_Send(&id_func, 1, MPI_INT, i+1, 1, MPI_COMM_WORLD);
00170
00171 MPI_Send(&stsize, 1, MPI_INT, i+1, 1, MPI_COMM_WORLD);
00172
00173 MPI_Send(st, stsize, MPI_CHAR, i+1, 1, MPI_COMM_WORLD);
00174
00175 }
00176
00177
00178 for (int i = numInputs; i < numMapProcs; i++)
00179 {
00180
00181 int stsize = 1;
00182 char st[] = "_";
00183 MPI_Send(&id_func, 1, MPI_INT, i+1, 1, MPI_COMM_WORLD);
00184 MPI_Send(&stsize, 1, MPI_INT, i+1, 1, MPI_COMM_WORLD);
00185 MPI_Send(st, stsize, MPI_CHAR, i+1, 1, MPI_COMM_WORLD);
00186 }
00187
00188
00189 for (int i = 1; i <= numMapProcs; i++)
00190 {
00191
00192 int stsize;
00193 MPI_Recv(&stsize, 1, MPI_INT, i, 1, MPI_COMM_WORLD, &MRStat);
00194 char st[stsize];
00195 MPI_Recv(&st, stsize, MPI_CHAR, i, 1, MPI_COMM_WORLD, &MRStat);
00196 if (st[0] != '_')
00197 outputs->push_back(string(st));
00198 }
00199 }
00200
00201 else
00202
00203
00204 {
00205
00206
00207 for (int i = 0; i < numInputs; i++)
00208 {
00209 int dest = i % numMapProcs + 1;
00210
00211
00212 int stsize = inputs->at(i).size();
00213 char st[stsize];
00214 strcpy(st,inputs->at(i).c_str());
00215 MPI_Send(&id_func, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
00216 MPI_Send(&stsize, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
00217 MPI_Send(st, stsize, MPI_CHAR, dest, 1, MPI_COMM_WORLD);
00218 }
00219
00220
00221 for (int i = 0; i < numInputs; i++)
00222 {
00223 int from = i % numMapProcs + 1;
00224 int stsize;
00225 MPI_Recv(&stsize, 1, MPI_INT, from, 1, MPI_COMM_WORLD, &MRStat);
00226 char st[stsize];
00227 MPI_Recv(&st, stsize, MPI_CHAR, from, 1, MPI_COMM_WORLD, &MRStat);
00228 if (st[0] != '_') outputs->push_back(string(st));
00229 }
00230 }
00231
00232 return outputs;
00233 }
00234
00235 template<class T>
00236 vector<string> * MaPI_FSMapReduce<T>::mapreduce (string (*map) (string,T), vector<string> * (*reduce) (vector<string>*,T), vector<string> * input)
00237 {
00238
00239 cout << "== Mapper ==\n";
00240 vector<string> * mapped = mapper(map,input);
00241
00242 cout << "== Reducer ==\n";
00243 vector<string> * output = reduce(mapped,shared);
00244 delete mapped;
00245 return output;
00246 }
00247
00248 template<class T>
00249 void MaPI_FSMapReduce<T>::finalize()
00250 {
00251
00252 int id_func = -1;
00253 for (int i = 1; i < MRSize; i++)
00254 MPI_Send(&id_func, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
00255
00256 MPI_Finalize();
00257 }
00258
00259 #endif
00260