[go: up one dir, main page]

Menu

Tree [r283] / ddc / trunk /
 History

HTTPS access


File Date Author Commit
 Test 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 bin 2013-05-31 mukau [r77] + added ddc_dump to bin/Makefile.am
 config 2014-03-25 mukau [r158] * add config/config.rpath to dist (for AM_ICONV)
 doc 2014-09-09 mukau [r283] * updated ddc_opt.pod for v2.0.30
 etc 2014-09-09 mukau [r281] v2.0.29 Tue, 09 Sep 2014 13:15:33 +0200 moocow
 log 2011-12-02 mukau [r22] v2.0.1 Fri, 02 Dec 2011 14:56:18 +0100
 m4 2011-12-02 mukau [r22] v2.0.1 Fri, 02 Dec 2011 14:56:18 +0100
 scripts 2014-06-27 mukau [r261] + moved scripts/ddc-make-bibl.perl to ddc-perl ...
 skel 2013-12-02 mukau [r79] + fixed runcmd problem creating new ddc-common....
 src 2014-09-09 mukau [r281] v2.0.29 Tue, 09 Sep 2014 13:15:33 +0200 moocow
 AUTHORS 2013-03-21 mukau [r44] + v2.0.6: added generic wildcard operator '*'
 COPYING 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 COPYING.LESSER 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 Changes 2014-09-09 mukau [r283] * updated ddc_opt.pod for v2.0.30
 INSTALL 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 Makefile.am 2014-03-27 mukau [r161] v2.0.22(pre) Thu, 27 Mar 2014 13:46:48 +0100 mo...
 README.html 2013-12-06 mukau [r84] v2.0.15: + band-aid fix for lower/upper-bound p...
 README.moo 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 README.pod 2014-08-13 mukau [r268] + check for missing/unparsed m_Port in CHost::R...
 README.txt 2013-12-06 mukau [r84] v2.0.15: + band-aid fix for lower/upper-bound p...
 autoreconf.sh 2011-11-15 mukau [r3] + initial import of ddc-2.0 branch
 configure.ac 2014-09-09 mukau [r282] + v2.0.30 : corrected version (revision 281 rep...
 ddc.pc.in 2014-03-27 mukau [r161] v2.0.22(pre) Thu, 27 Mar 2014 13:46:48 +0100 mo...
 distcheck.sh 2011-12-09 mukau [r23] + added PACKAGE_VERSION to ddc_daemon starup me...

Read Me

##-*- Mode: Shell-Script -*-

Tue, 03 May 2011 15:57:41 +0200 (moocow)
 + managed to build ddc from (this) source on linux (debian squeeze, 32bit) using the following procedure:

##-- checkout
$ svn co svn+ssh://moocow@odo.dwds.de/home/svn/dev/ddc/trunk ddc
$ cd ddc

##-- set annoying environment variables
$ export RML=$PWD
$ export RML_PCRE_INCLUDE=/usr/include ##-- annoying (libpcre3-dev=8.02-1.1 installed)
$ export RML_PCRE_LIB=/usr/lib         ##-- annoying (make breaks b/c of dangling -L flag otherwise)

##-- compile basic package
$ ./compile_concord.sh
$ ./gen_graphan_bin.sh German
$ ./gen_graphan_bin.sh Russian
$ ./compile_morph.sh
$ ./generate_morph_bin.sh German
$ ./generate_morph_bin.sh English
$ ./generate_morph_bin.sh Russian

##-- now at least ./Bin/ConcordConsole starts up without errors

##--------------------------------------------------------------
## autotool-ification
##  + re-factored sources from Source/*/* to src/*
##  + lump all sources together and let automake take care
##    of dependencies
##  + libtool libraries instead of static
##  + distribution targets probably broken for now

##----- PREREQUISITES / build trace
./compile_concord.sh:
# + ConcordIndex
#   + ConcordLib/libConcord
#    + GraphanLib/libGraphan
#    + StructDictLib/libStructDict
#    + LemmatizerLib/libLemmatizer
#    + AgramtabLib/libAgramtab
#    + tinyxml/libTinyXml
#    + RmlTar/libRmlTar
#    + MorphWizardLib/libMorphWizard
#  + ConcordSimple
#  + ConcordConsole
#  + ConcordDaemon
#  + Search
#  + ddc_xml
#  + ./gen_graphan_bin.sh Russian
#    + ./compile_ross.sh Obor
#      + make -C src/StructDictLoader
#      + Bin/StructDictLoader FromTxt Dicts/$1/ross.txt Dicts/$1
#  + ./gen_graphan_bin.sh German
#    + ./compile_ross.sh GerObor
#+ ./compile_morph.sh
#  + make -C src/MorphGen
#  + make -C src/TestLem
#  + make -C src/FileLem
#  + ./gen_graphan_bin.sh Russian
#  + ./gen_graphan_bin.sh German
#  + echo "try generate_morph_bin.sh"
#+ ./generate_morph_bin.sh $lang ##-- for $lang in qw(German English Russian)
#  + require $RML
#  + require compile_morph.sh (MorphGen)
#  + require Dicts/SrcMorph/$lang.mwz
#    + echo "Error: you should first download and install the morphological dictionary (http://sf.net/projects/morph-lexicon/)"
#  + compile: $RML/Source/MorphGen/MorphGen Dicts/SrcMorph/$lang.mwz Dicts/Morph/$lang 5 3
#~  + test: Bin/TestLem $1 <Test/Morph/$lang/test.txt  >test.tem
#~  + test: cmp test.temp Test/Morph/$lang/result.txt
#~  + manual test: Bin/TestLem $lang

##--------------------------------------------------------------
## install

+ ConcordConsole
  - require $RML/Dicts/Morph/{Rus,Eng,Ger}/{morph.forms_autom,morph.annot,npredict.bin}
+ ConcordIndex
  - require $RML/Dicts/GraphAn/{space.dic,enames.txt,idents.txt,keyboard.txt,extensions.txt}
  - require/Russian: REG("Software\\Dialing\\Obor\\DictPath") = $RML/Dicts/Obor
  - require/German:  REG("Software\\Dialing\\GerObor\\DictPath") = $RML/Dicts/GerObor
    * $RML/Dicts/GerObor/{*.txt,*.bin}

##--------------------------------------------------------------
## I/O formats

ConcordLib/ConcHolder.cpp:1514: CConcHolder::SetResultFormat(string ResultTypeStr)
 ##-- e.g. Holder.SetResultFormat("table");
 : just sets m_ResultFormat = GetResultFormatByString(ResultTypeStr);

ConcordLib/ConcHolder.cpp:1498: CConcHolder::FormatTypeEnum CConcHolder::GetResultFormatByString(const string& ResultTypeStr )
##-- hacked: added DDC_ResultJson

##-- actual hit-local generation in

ConcordLib/ConcHolder.cpp:1260: DDCErrorEnum CConcHolder::GetHits(string Query,  DWORD& EndHitNo) 
ConcordLib/ConcHolder.cpp:438: bool CConcHolder::GenerateOneHitString(DWORD PageNumber, const CHit& Hit, const vector<COutputToken>& Tokens)

##-- now working on drop-in replacement
#  bool CConcHolder::GenerateOneHitStringJson(DWORD PageNumber, const CHit& Hit, const vector<COutputToken>& Tokens)
#  : need to wrap context-generation
#  : original call(s):
#     LEFT:      GetContext((int)Hit.m_BreakNo-(int)m_pQueryEvaluator->m_ContextSentencesCount, Hit.m_BreakNo,  Hit.m_FileNo,  bConvASCIIToHtml, LeftContext )
#     MID/html:  BuildHtmlHitStrWithHighlighting(Tokens, bConvASCIIToHtml, m_pIndexator->m_HtmlHighlighting, true);
#     MID/text:  BuildHtmlHitStrWithHighlighting(Tokens, bConvASCIIToHtml, m_pIndexator->m_TextHighlighting, false);
#     MID/table: "<s part=\"m\">"+ BuildHtmlHitStrWithHighlighting(Tokens,bConvASCIIToHtml, CHighlightTags(), false) + "</s>";
#     RIGHT:     GetContext(Hit.m_BreakNo+1, Hit.m_BreakNo+m_pQueryEvaluator->m_ContextSentencesCount+1, Hit.m_FileNo, bConvASCIIToHtml, RightContext )

##-- ugly regex stuff: /(?i:sapere)/ matches just about everything from '30sten' to 'Abgerupft' to ...
## + culprit search (backtrace)

#0  RML_RE::TryMatch (this=0xbfffe2ac, text=..., startpos=0, anchor=RML_RE::UNANCHORED, vec=0xbfffe0ec, vecsize=51) at ../PCRE/pcre_rml.cpp:475
#1  0x080b9d45 in RML_RE::DoMatchImpl (this=0xbfffe2ac, text=..., anchor=RML_RE::UNANCHORED, consumed=0xbfffe1f8, args=0xbfffe1b8, n=0, vec=0xbfffe0ec, vecsize=51) at ../PCR/pcre_rml.cpp:521
#2  0x080b9227 in RML_RE::PartialMatch (this=0xbfffe2ac, text=..., ptr1=..., ptr2=..., ptr3=..., ptr4=..., ptr5=..., ptr6=..., ptr7=..., ptr8=..., ptr9=..., ptr10=..., ptr11=..., ptr12=..., ptr13=..., ptr14=..., ptr15=..., ptr16=...) at ../PCRE/pcre_rml.cpp:295
#3  0x0809284b in CStringIndexSet::QueryTokenListUsingRegExp (this=0x821c6a0, RegExp=..., MatchWords=...) at IndexSet.cpp:84
#4  0x080c9ddd in CQueryTokenNode::BuildRegExp (this=0x9194e10, RegExpStr=..., IndexItems=...) at QueryNode.cpp:581
#5  0x080caca3 in CQueryTokenNode::CreateTokenPattern (this=0x9194e10, pHolder=0xbffff014, src=0x8a455dc "(?i:sapere)", bRegularExpession=true, bExact=false) at QueryNode.cpp:744
#6  0x080e5670 in yyqparse (_parser=0x8219a90) at yyQParser.y:270
#7  0x080ddf5c in CQueryParser::ParseQuery (this=0x8219a90, src=0x821981c "/(?i:sapere)/") at QueryParser.cpp:387
#8  0x08062075 in CConcHolder::GetHits (this=0xbffff014, Query=..., EndHitNo=@0xbffff100) at ConcHolder.cpp:1466
#9  0x08062588 in CConcHolder::SimpleQuery (this=0xbffff014, Query=..., EndHitNo=@0xbffff100, HitsCount=@0xbffff0fc) at ConcHolder.cpp:1533
#10 0x0804cbc1 in main (argc=4, argv=0xbffff274) at ConcordSimple.cpp:263

 with options_ = {match_limit_ = 0, match_limit_recursion_ = 0, all_options_ = 0}
 and extra    = {flags = 0, study_data = 0x0, match_limit = 0, callout_data = 0x0, tables = 0x0, match_limit_recursion = 0}

479     int rc = pcre_exec(re,              // The regular expression object
480                      &extra,
481                      text.data(),
482                      text.size(),
483                      startpos,
484                      (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
485                      vec,
486                      vecsize)

 where:
   re = this->re_full_ (== this->re_partial_)
   text.data() = "30sten"
   test.size() = 6
   startpos = 0
   ancor = UNANCHORED (:--> pcre_exec() anchor arg = 0)
   vec = ??
   vecsize = 51

 pcre_exec() returns rc==1.  why?!