/* Marcion
Copyright (C) 2009 - 2011 Milan Konvicka
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program */
#include "lsj.h"
/*morph_code const wclass[15]={
{'a',"adjective"},
{'d',"adverb"},
{'t',"participle"},
{'v',"verb"},
{'n',"noun"},
{'x',"x"},
{'r',"preposition"},
{'e',"interjection"},
{'c',"conjunction"},
{'g',"particle"},
{'p',"pronoun"},
{'m',"numeral"},
{'l',"l"},
{'f',"f"},
{'-',""}
};*/
/*WDictItem const CLSJ::wclass[]={
WDictItem('a',QString(tr("adjective"))),
WDictItem('d',QString(tr("adverb"))),
WDictItem('t',QString(tr("participle"))),
WDictItem('v',QString(tr("verb"))),
WDictItem('n',QString(tr("noun"))),
WDictItem('x',QString("x")),
WDictItem('r',QString(tr("preposition"))),
WDictItem('e',QString(tr("interjection"))),
WDictItem('c',QString(tr("conjunction"))),
WDictItem('g',QString(tr("particle"))),
WDictItem('p',QString(tr("pronoun"))),
WDictItem('m',QString(tr("numeral"))),
WDictItem('l',QString("l")),
WDictItem('f',QString("f")),
WDictItem('-',QString())
};
WDictItem const CLSJ::wpers[]={
WDictItem('1',QString("1.")),
WDictItem('2',QString("2.")),
WDictItem('3',QString("3.")),
WDictItem('-',QString())
};
WDictItem const CLSJ::wnum[]={
WDictItem('s',QString(tr("singular"))),
WDictItem('p',QString(tr("plural"))),
WDictItem('d',QString(tr("dual"))),
WDictItem('-',QString())
};
WDictItem const CLSJ::wtens[]={
WDictItem('a',QString(tr("aorist"))),
WDictItem('p',QString(tr("present"))),
WDictItem('i',QString(tr("imperfectum"))),
WDictItem('f',QString(tr("futurum"))),
WDictItem('l',QString(tr("plusquamperfectum"))),
WDictItem('r',QString(tr("perfectum"))),
WDictItem('t',QString("t")),
WDictItem('-',QString())
};
WDictItem const CLSJ::wmode[]={
WDictItem('p',QString(tr("participle"))),
WDictItem('n',QString(tr("infinitive"))),
WDictItem('i',QString(tr("indicative"))),
WDictItem('s',QString(tr("conjunctive"))),
WDictItem('m',QString(tr("imperative"))),
WDictItem('o',QString(tr("optative"))),
WDictItem('g',QString("g")),
WDictItem('u',QString("u")),
WDictItem('-',QString())
};
WDictItem const CLSJ::wvoice[]={
WDictItem('p',QString(tr("passive"))),
WDictItem('a',QString(tr("active"))),
WDictItem('m',QString(tr("medium"))),
WDictItem('e',QString(tr("mediopassive"))),
WDictItem('-',QString())
};
WDictItem const CLSJ::wgender[]={
WDictItem('f',QString(tr("female"))),
WDictItem('m',QString(tr("male"))),
WDictItem('n',QString(tr("neuter"))),
WDictItem('-',QString())
};
WDictItem const CLSJ::wcase[]={
WDictItem('a',QString(tr("accusative"))),
WDictItem('n',QString(tr("nominative"))),
WDictItem('v',QString(tr("vocative"))),
WDictItem('d',QString(tr("dative"))),
WDictItem('g',QString(tr("genitive"))),
WDictItem('b',QString("b")),
WDictItem('-',QString())
};
WDictItem const CLSJ::wadjform[]={
WDictItem('c',QString(tr("comparative"))),
WDictItem('s',QString(tr("superlative"))),
WDictItem('-',QString())
};
WDictItem const *CLSJ::mclist[]=
{
&wclass[0],
&wpers[0],
&wnum[0],
&wtens[0],
&wmode[0],
&wvoice[0],
&wgender[0],
&wcase[0],
&wadjform[0],
0
};*/
WDict CLSJ::wclass;
WDict CLSJ::wpers;
WDict CLSJ::wnum;
WDict CLSJ::wtens;
WDict CLSJ::wmode;
WDict CLSJ::wvoice;
WDict CLSJ::wgender;
WDict CLSJ::wcase;
WDict CLSJ::wadjform;
QList<WDict*> CLSJ::mclist;
QString const lat(QString::fromUtf8("abgdezhqiklmncoprstufxywABGDEZHQIKLMNCOPRSTUFXYW-,. "));
QString const gr(QString::fromUtf8("αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ-,. "));
unsigned short lat_acc[]={
'(',')','/','$','\\','=','+','|','?','_','^',0
};
unsigned short gr_acc[]={
0x0314,
0x0313,
0x0301,
0x0300,
0x0300,
0x0342,
0x0308,
0x0345,
0x0323,
0x0304,
0x0306,
0x0
};
CLSJ::CLSJ(CMessages * const messages,
QWidget *parent) :
QWidget(parent),messages(messages),
_store()
{
setupUi(this);
txtInput->setScript(CTranslit::Greek);
txtInputParse->setScript(CTranslit::Greek);
brOutput->init(messages,windowTitle(),CBookTextBrowser::Greek);
brOutput->browser()->clear();
brOutput->allowChangeScript();
IC_SIZES
}
void CLSJ::init()
{
wclass << WDictItem('a',tr("adjective"))
<< WDictItem('d',tr("adverb"))
<< WDictItem('t',tr("participle"))
<< WDictItem('v',tr("verb"))
<< WDictItem('n',tr("noun"))
<< WDictItem('x',"x")
<< WDictItem('r',tr("preposition"))
<< WDictItem('e',tr("interjection"))
<< WDictItem('c',tr("conjunction"))
<< WDictItem('g',tr("particle"))
<< WDictItem('p',tr("pronoun"))
<< WDictItem('m',tr("numeral"))
<< WDictItem('l',"l")
<< WDictItem('f',"f")
<< WDictItem('-',QString());
wpers << WDictItem('1',"1.")
<< WDictItem('2',"2.")
<< WDictItem('3',"3.")
<< WDictItem('-',QString());
wnum << WDictItem('s',tr("singular"))
<< WDictItem('p',tr("plural"))
<< WDictItem('d',tr("dual"))
<< WDictItem('-',QString());
wtens << WDictItem('a',tr("aorist"))
<< WDictItem('p',tr("present"))
<< WDictItem('i',tr("imperfectum"))
<< WDictItem('f',tr("futurum"))
<< WDictItem('l',tr("plusquamperfectum"))
<< WDictItem('r',tr("perfectum"))
<< WDictItem('t',"t")
<< WDictItem('-',QString());
wmode << WDictItem('p',tr("participle"))
<< WDictItem('n',tr("infinitive"))
<< WDictItem('i',tr("indicative"))
<< WDictItem('s',tr("conjunctive"))
<< WDictItem('m',tr("imperative"))
<< WDictItem('o',tr("optative"))
<< WDictItem('g',"g")
<< WDictItem('u',"u")
<< WDictItem('-',QString());
wvoice << WDictItem('p',tr("passive"))
<< WDictItem('a',tr("active"))
<< WDictItem('m',tr("medium"))
<< WDictItem('e',tr("mediopassive"))
<< WDictItem('-',QString());
wgender << WDictItem('f',tr("female"))
<< WDictItem('m',tr("male"))
<< WDictItem('n',tr("neuter"))
<< WDictItem('-',QString());
wcase << WDictItem('a',tr("accusative"))
<< WDictItem('n',tr("nominative"))
<< WDictItem('v',tr("vocative"))
<< WDictItem('d',tr("dative"))
<< WDictItem('g',tr("genitive"))
<< WDictItem('b',"b")
<< WDictItem('-',QString());
wadjform << WDictItem('c',tr("comparative"))
<< WDictItem('s',tr("superlative"))
<< WDictItem('-',QString());
mclist << &wclass
<< &wpers
<< &wnum
<< &wtens
<< &wmode
<< &wvoice
<< &wgender
<< &wcase
<< &wadjform;
}
/*void CLSJ::search_in_parses(QString const & word)
{
brOutput->clear();
brOutput->append(word);
QSqlQuery q;
QString query("select * from `lsj_parses` where `bare_form_utf8`='"+word+"'");
messages->MsgMsg("executing query \""+query+"\"");
if(!q.exec(query))
{
messages->MsgErr(q.lastError().text());
return;
}
QList<s_parses> parses;
while(q.next())
{
s_parses p;
p.id=q.value(1).toInt();
p.m_code=q.value(2).toString();
p.word=q.value(3).toString();
parses.append(p);
}
for(int x=0;x<parses.size();x++)
{
QString l;
l.append(parses[x].word+"\t"+
parses[x].m_code);
brOutput->append(l);
QSqlQuery q2;
QString query2("select * from `lsj_lemmas` where `lemma_id`="+q.value(1).toString());
messages->MsgMsg("executing query \""+query2+"\" ...");
if(!q2.exec(query2))
{
messages->MsgErr(q2.lastError().text());
return;
}
while(q2.next())
{
QString l(q2.value(1).toString()+"\t"+
q2.value(4).toString());
brOutput->append(l);
}
}
messages->MsgOk();
}*/
void CLSJ::on_btQuery_clicked()
{
brOutput->browser()->clear();
_store.clear();
if(tabInput->currentIndex()==0)
{
dictionary(txtInput->text_utf8(),searchMode());
displayStore();
txtInput->updateHistory();
}
else
{
parse();
txtInputParse->updateHistory();
}
}
QString CLSJ::prepare_sense(QString const & sense)
{
QString _sense(sense);
_sense.replace("&mdash",QString::fromUtf8("—"));
_sense.replace("&rpar",QString::fromUtf8("#)"));
_sense.replace("<tr>","<tr>%%%!").
replace("</tr>","!%%%</tr>");
_sense.replace("<usg>","<usg>%%%#").
replace("</usg>","#%%%</usg>");
int p=0;
while((p=_sense.indexOf(
QString::fromUtf8(
"\"greek\">"),p))!=-1)
{
int np=_sense.indexOf(
QString::fromUtf8("<"),p+8);
if(np!=-1)
{
QString w(_sense.mid(p+8,np-p-8));
_sense.replace(p+8,np-p-8,perseusToUtf8(w));
p=np+9;
}
else break;
}
QStringList l(_sense.split('\n'));
l.removeLast();
l.removeLast();
QString r;
int sc=0;
for(int x=0;x<l.size();x++)
{
if(l[x].left(6)=="<sense")
r.append(QString::number(++sc)+". ");
int p2=0;
while(p2!=-1)
{
QString s2(enclosedBy(">","<",l[x],false,&p2));
s2.replace("%%%!","<b>").
replace("!%%%","</b>");
s2.replace("%%%#","<small><span style=\"font-style: italic;\">").
replace("#%%%","</span></small>");
if(s2.trimmed()!=QString())
r.append(s2);
}
r.append("<br>");
}
return r;
}
/*QString CLSJ::prepare_sense(QString const & sense) const
{
QString _sense(sense);
_sense.replace("&mdash",QString::fromUtf8("—"));
_sense.replace("&rpar",QString::fromUtf8("#)"));
_sense.replace("<tr>","<tr>%%%!").
replace("</tr>","!%%%</tr>");
_sense.replace("<usg>","<usg>%%%#").
replace("</usg>","#%%%</usg>");
int p=0;
while((p=_sense.indexOf(
QString::fromUtf8(
"\"greek\">"),p))!=-1)
{
int np=_sense.indexOf(
QString::fromUtf8("<"),p+8);
if(np!=-1)
{
QString w(_sense.mid(p+8,np-p-8));
_sense.replace(p+8,np-p-8,perseusToUtf8(w));
p=np+9;
}
else break;
}
QString r;
int i1,i2;
i1=_sense.indexOf("<sense");
i2=_sense.lastIndexOf("</sense>");
if(i1!=-1&&i2!=-1)
{
QString __sense(_sense.mid(i1,i2+8-i1));
_sense.remove(i1,i2-i1+8);
//r.append(_sense+"\n");
p=0;
while(p!=-1)
{
QString s(enclosedBy(">","<",_sense,false,&p));
if(s.trimmed()!=QString())
r.append(s+"<br>");
}
p=0;
while(p!=-1)
{
QString s(enclosedBy("<sense","</sense>",__sense,true,&p));
int p2=0;
while(p2!=-1)
{
QString s2(enclosedBy(">","<",s,false,&p2));
s2.replace("%%%!","<b>").
replace("!%%%","</b>");
s2.replace("%%%#","<small><span style=\"font-style: italic;\">").
replace("#%%%","</span></small>");
r.append(s2);
}
r.append("<br>");
}
}
return r;
}*/
QString CLSJ::enclosedBy(QString const & begin,
QString const & end,
QString const & text,
bool outer,
int * from)
{
int p=text.indexOf(begin,*from);
if(p!=-1)
{
int p2=text.indexOf(end,p+begin.length());
if(p2!=-1)
{
if(from)
{
/*if(p2-end.length()==text.length())
*from=-1;
else*/
*from=p2+1;
}
if(outer)
return text.mid(p,(p2+end.length())-p);
else
return text.mid(p+begin.length(),p2-p-begin.length());
}
}
if(from)
*from=-1;
return QString();
}
QString CLSJ::perseusToUtf8(QString const & latin)
{
QString l(latin);
QString ns;
bool next_untr=false;
for(int x=0;x<l.length();x++)
{
if(next_untr)
{
ns.append(l[x]);
next_untr=false;
continue;
}
if(l[x]=='#')
{
next_untr=true;
continue;
}
if(QRegExp("\\d").exactMatch(l.mid(x,1)))
{
ns.append(" "+l[x]);
continue;
}
if(l[x]=='*')
{
QRegExp r("\\w");
int p=l.indexOf(r,x+1);
if(p!=-1)
{
l.replace(x,1,l[p].toUpper());
l.remove(p,1);
}
}
int p=lat.indexOf(l[x]);
if(p!=-1)
ns.append(gr[p]);
else
{
bool b=false;
for(unsigned short * y=&lat_acc[0];*y!=0;y++)
if(l[x]==*y)
{
ns.append(gr_acc[y-lat_acc]);
b=true;
break;
}
if(!b)
ns.append(latin[x]);
}
}
QRegExp r(QString::fromUtf8("σ\\W"));
ns.append(" ");
int p;
while((p=ns.indexOf(r))!=-1)
ns.replace(p,1,QString::fromUtf8("ς"));
ns.chop(1);
return ns;
}
void CLSJ::dictionary(QString const & str,SearchMode mode)
{
CMySql q;
QString query("select * from `lsj_senses` where ");
QString where,pstr(str);
pstr.replace("'","\\'");
switch(mode)
{
case Exact :
{
where="`word_utf8`='"+pstr+"'";
break;
}
case Like :
{
where="`word_utf8` like '"+pstr+"'";
break;
}
case RegExp :
{
where="`word_utf8` regexp '"+pstr+"'";
break;
}
}
query.append(where+limit());
messages->MsgMsg(tr("executing query '")+query+"'");
if(!q.exec(query))
{
messages->MsgErr(q.lastError());
return;
}
QString size(QString::number(q.size()));
_store.appendItem(false,QString("<br>"+size+tr(" matches<br>")));
//brOutput->browser()->append(size+" matches");
int x=0;
while(++x,q.next())
{
_store.appendItem(false,QString(tr("<br>match: ")+QString::number(x)+"/"+size+tr(" page: ")+
QString::number(spnPage->value())+"<br>"));
/*brOutput->browser()->append("match: "+QString::number(x)+"/"+size+" page: "+
QString::number(spnPage->value()));*/
if(cbRaw->isChecked())
_store.appendItem(false,QString(q.value(2)+"<br>"));
//brOutput->browser()->append(q.value(2));
else
_store.appendItem(false,prepare_sense(QString(q.value(2)+"<br>")));
//brOutput->browser()->insertHtml(prepare_sense(q.value(2)));
}
messages->MsgOk();
}
void CLSJ::parse()
{
CMySql q;
QString query("select * from `lsj_parses` left join `lsj_lemmas` on `lsj_parses`.`lemma_id`=`lsj_lemmas`.`lemma_id` where `bare_form_utf8`='"+txtInputParse->text_utf8()+"'"+limit());
if(!q.exec(query))
{
messages->MsgErr(q.lastError());
return;
}
QStringList lemmas;
while(q.next())
{
QString morph_code(q.value(2));
QString exp_form(q.value(3));
QString lemma_text(
perseusToUtf8(q.value(8)));
QString seq_num(q.value(9));
QString def(q.value(11));
_store.appendItem(false,QString(morphCode(morph_code)+" - "+morph_code+" | "));
_store.appendItem(true,perseusToUtf8(exp_form));
_store.appendItem(false," | ");
_store.appendItem(true,perseusToUtf8(lemma_text));
_store.appendItem(false,QString(" "+
seq_num+" | "+
def)+"<br>");
/*brOutput->browser()->append(morphCode(morph_code)+" - "+morph_code+" | "+
perseusToUtf8(exp_form)+" | "+
perseusToUtf8(lemma_text)+" "+
seq_num+" | "+
def);*/
if(!lemmas.contains(lemma_text))
lemmas << lemma_text;
}
_store.appendItem(false,QString("<br>"+QString::number(lemmas.size())+tr(" entries<br>")));
/*brOutput->browser()->append(QString::number(lemmas.size())+" entries\n");
//brOutput->browser()->append("\n");*/
for(int x=0;x<lemmas.size();x++)
dictionary(lemmas[x],Exact);
displayStore();
messages->MsgOk();
}
CLSJ::SearchMode CLSJ::searchMode() const
{
if(rbExact->isChecked())
return Exact;
else if(rbLike->isChecked())
return Like;
else if(rbRegExp->isChecked())
return RegExp;
return Exact;
}
void CLSJ::displayStore()
{
QString atxt;
for(int x=0;x<_store.count;x++)
{
QString s(_store.items[x].text);
if(_store.items[x].is_greek)
{
s=CTranslit::tr(s,CTranslit::GreekNToGreekTr,brOutput->rmAccents(),brOutput->rmSpaces());
s=CTranslit::tr(s,CTranslit::GreekTrToGreekN,brOutput->rmAccents(),brOutput->rmSpaces());
/*if(brOutput->isHighlightChecked())
s=brOutput->highlightText(s);*/
}
atxt.append(s);
}
brOutput->browser()->clear();
brOutput->browser()->insertHtml(atxt);
brOutput->finalizeContent();
brOutput->browser()->moveCursor(QTextCursor::Start);
}
/*void CLSJ::on_brOutput_highlightActivated(bool * processed)
{
displayStore();
*processed=true;
}
void CLSJ::on_brOutput_highlightDeactivated(bool * processed)
{
displayStore();
*processed=true;
}*/
void CLSJ::on_brOutput_contentChanged(bool, bool, bool * processed)
{
displayStore();
*processed=true;
}
void CLSJ::on_txtInput_query()
{
on_btQuery_clicked();
}
void CLSJ::on_txtInputParse_query()
{
on_btQuery_clicked();
}
QString CLSJ::limit() const
{
return " limit "+QString::number(
spnPage->value()*spnLimit->value())+","+
QString::number(spnLimit->value());
}
void CLSJ::prepareParse(QString const & str)
{
_store.clear();
brOutput->browser()->clear();
rbExact->setChecked(true);
tabInput->setCurrentIndex(1);
txtInput->setSwitchState(true);
txtInput->setText(str);
txtInputParse->setSwitchState(true);
txtInputParse->setText(str);
}
void CLSJ::directSearch(QString const & str)
{
_store.clear();
brOutput->browser()->clear();
rbExact->setChecked(true);
tabInput->setCurrentIndex(0);
txtInput->setSwitchState(true);
txtInput->setText(str);
dictionary(txtInput->text_utf8(),CLSJ::Exact);
displayStore();
}
QString CLSJ::morphCode(QString const & mcode) const
{
QString r;
for(int x=0;x<mclist.count();x++)
{
QChar s(mcode.at(x));
WDict * wd(mclist.at(x));
for(int y=0;y<wd->count();y++)
if(wd->at(y).first==s)
{
r.append(" "+wd->at(y).second);
break;
}
}
return r;
}
/*void CLSJ::prepareDictionary()
{
txtInput->setSwitchState(false);
rbExact->setChecked(true);
}*/
//
CLSJStore::CLSJStore()
:items(),count(0)
{
}
void CLSJStore::appendItem(bool is_greek,QString const & text)
{
items.append(CLSJPiece(is_greek,text));
count++;
}
void CLSJStore::clear()
{
count=0;
items.clear();
}
//
CLSJPiece::CLSJPiece()
:is_greek(false),text()
{
}
CLSJPiece::CLSJPiece(bool is_greek,QString const & text)
:is_greek(is_greek),text(text)
{
}