/* giati - Machine Translation via Finite State Models Copyright (C) 2006 Jorge González This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Usage: giati -c {[-OPTION [param]]} */ #include #include #include #include "op.h" #include "fsm.h" #include "arpa.h" /* #include "giati.h" */ int GetNextTag(char *line, char *parameters) { char tag[MAXCONF]; int i, length; length=strlen(line); line[length-1]='\0'; /* if ((line[0]=='[') && (line[length-2]==']')) { //if (!strcmp(line, "[FITXERS]")) *files = 1; //else *files = 0; return CLASS_TAG; } */ if (strchr(line, '=')) { sscanf( (const char *) line, "%[^= ] = %[^=]", tag, parameters); if (!strcmp(tag, "WORKDIR")) i = 1; else if (!strcmp(tag, "SRILMDIR")) i = 2; else if (!strcmp(tag, "CORPUSDIR")) i = 3; else if (!strcmp(tag, "TMPDIR")) i = 4; else if (!strcmp(tag, "CORPUS")) i = 5; else if (!strcmp(tag, "ALIGN")) i = 6; else if (!strcmp(tag, "SEPARATOR")) i = 7; else if (!strcmp(tag, "PREFIX")) i = 8; else if (!strcmp(tag, "SIMBEXTMAX")) i = 9; else if (!strcmp(tag, "ALGORITHM")) i = 10; else if (!strcmp(tag, "NGRAM")) i = 11; else if (!strcmp(tag, "HASH")) i = 12; else if (!strcmp(tag, "LEMMAS")) i = 13; else if (!strcmp(tag, "EAGLES")) i = 14; else if (!strcmp(tag, "TEST")) i = 15; else if (!strcmp(tag, "OUTPUT")) i = 16; else i = ERROR_TAG; return i; } else return ERROR_TAG; } struct options usage(int argc, char *argv[]) { op_parser_t *parser; char *prefix, *file, *output; FILE *fd_config; char line[MAXCONF], parameters[MAXCONF]; int tag; struct options opt; parser = op_create("%p -c ", PACKAGE_STRING, 1); if (parser == NULL) { fprintf(stderr, "No s'ha pogut crear el parser d'opcions.\n"); exit(EXIT_FAILURE); } op_add_string(parser, 'c', "conf", "FILE", "configuration file", &file, "./giati.cfg"); op_parse(parser, argc, argv); if ((fd_config = fopen(file, "r")) == NULL ) { fprintf(stderr, "Failed open: configuration file <%s>\n", file); exit(EXIT_FAILURE); } while (fgets(line, MAXCONF, fd_config) != NULL) { tag = GetNextTag(line, parameters); //if (files) { switch (tag) { case 1: opt.work_dir = strdup(parameters); break; case 2: opt.srilm_dir = strdup(parameters); break; case 3: opt.corpus_dir = strdup(parameters); break; case 4: opt.tmp_dir = strdup(parameters); break; case 5: opt.corpus = strdup(parameters); break; case 13: opt.lemmas = strdup(parameters); break; case 14: opt.eagles = strdup(parameters); break; case 15: opt.test = strdup(parameters); break; case 6: opt.align = strdup(parameters); break; case 7: opt.separator = strdup(parameters); break; case 8: opt.prefix = strdup(parameters); break; /*} } else { switch (tag) { */ case 9: opt.simbextmax = atoi(parameters); break; case 10: opt.algorithm = atoi(parameters); break; case 11: opt.ngram = atoi(parameters); break; case 12: opt.hts = atoi(parameters); break; case 16: opt.transducer = strdup(parameters); break; } //} } fclose(fd_config); op_add_int(parser, 'n', "ngram", "N", "n-gram order", &opt.ngram, opt.ngram); op_add_string(parser, 'p', "prefix", "STRING", "output prefix", &prefix, opt.prefix); op_add_flag(parser, 's', "segment", "Using provided extended symbols file", &opt.reord); op_add_int(parser, 'F', "filter", "N", "Filter model from test", &opt.type, 0); op_add_flag(parser, 'e', "end", "Show only the separator characters", &opt.beam); op_add_string(parser, 'o', "output", "[arpa|v2]", "Model format", &output, opt.transducer); op_add_float(parser, 'g', "lambda1", "N", "Weight of the model 1: p(f,e)", &opt.l[0], 1); op_add_float(parser, 'i', "lambda2", "N", "Weight of the model 2: p(f|e)", &opt.l[1], 1); op_add_float(parser, 'd', "lambda3", "N", "Weight of the model 3: p(e|f)", &opt.l[2], 1); op_add_float(parser, 'd', "lambda3", "N", "Weight of the model 3: p(e|f)", &opt.l[2], 1); op_add_float(parser, 'l', "lambda4", "N", "Weight of the model 4: Word bonus", &opt.l[3], 1); op_add_float(parser, 'x', "lambda5", "N", "Weight of the model 5: Exponential", &opt.l[4], 1); op_add_float(parser, 'y', "lambda6", "N", "Weight of the model 6: Word penalty", &opt.l[5], 1); op_add_float(parser, 'j', "lambda0", "N", "Weight of the model 0: Language model", &opt.norm, 1); op_add_string(parser, 'a', "model", "STRING", "model file without extension", &opt.automata, NULL); op_parse(parser, argc, argv); if (prefix != opt.prefix) strcpy(opt.prefix, prefix); if (output != opt.transducer) strcpy(opt.transducer, output); op_destroy(parser); return opt; } void write_parameters(struct options *opt) { char params[MAXCONF]; FILE *fd_params; sprintf(params, "%s/%s%s.params", opt->corpus_dir, opt->prefix, opt->corpus); if ((fd_params = fopen(params, "w")) == NULL ) { fprintf(stderr, "Failed write: parameters file <%s>\n", params); exit(EXIT_FAILURE); } fprintf(fd_params, "WORKDIR %s\n", opt->work_dir); fprintf(fd_params, "SRILMDIR %s\n", opt->srilm_dir); fprintf(fd_params, "CORPUSDIR %s\n", opt->corpus_dir); fprintf(fd_params, "TMPDIR %s\n", opt->tmp_dir); fprintf(fd_params, "CORPUS %s\n", opt->corpus); fprintf(fd_params, "LEMMAS %s\n", opt->lemmas); fprintf(fd_params, "EAGLES %s\n", opt->eagles); fprintf(fd_params, "TEST %s\n", opt->test); fprintf(fd_params, "ALIGN %s\n", opt->align); fprintf(fd_params, "SEPARATOR %s\n", opt->separator); fprintf(fd_params, "PREFIX %s\n", opt->prefix); fprintf(fd_params, "SIMBEXTMAX %d\n", opt->simbextmax); fprintf(fd_params, "ALGORITHM %d\n", opt->algorithm); fprintf(fd_params, "NGRAM %d\n", opt->ngram); fprintf(fd_params, "OUTPUT %s\n", opt->transducer); fprintf(fd_params, "HASH %d\n", opt->hts); fclose(fd_params); } void freedom(struct options *opt) { free(opt->work_dir); free(opt->srilm_dir); free(opt->corpus_dir); free(opt->tmp_dir); free(opt->corpus); free(opt->lemmas); free(opt->eagles); free(opt->test); free(opt->align); free(opt->separator); free(opt->prefix); free(opt->transducer); } int main(int argc, char *argv[]) { struct options options; unsigned char c1, c2, c3, c4; h_t *table; T_REDGEN *v2; char train[MAXCONF], test[MAXCONF], fsm[MAXCONF]; options = usage(argc, argv); write_parameters(&options); search_characters(&options, &c1, &c2, &c3, &c4); if (options.beam) { printf("%c(%d) %c(%d)\n", c1, c1, c2, c2); freedom(&options); exit(1); } if (!options.reord) write_extended_simbols(&options, c1, c2); get_coded_corpus(&options); get_automaton(&options); if (!strcmp(options.transducer, "v2")) { sprintf(train, "%s/%s%s", options.corpus_dir, options.prefix, options.corpus); sprintf(test, "%s/%s", options.corpus_dir, options.test); v2 = arpa2fsm(train, test, options.type, options.hts, options.separator, options.algorithm, c1, c2, Training, &options); sprintf(fsm, "%s.tr", train); write_fsm(v2, fsm, 1, &options); free_fsm(v2); } if ((options.algorithm == 2) || (options.algorithm == 4)) get_l2w_table(&options, c1); freedom(&options); }