[go: up one dir, main page]

File: webqueue.h

package info (click to toggle)
recoll 1.43.0-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 16,400 kB
  • sloc: cpp: 103,890; python: 9,349; xml: 7,305; ansic: 6,447; sh: 1,212; perl: 130; makefile: 72
file content (84 lines) | stat: -rw-r--r-- 3,044 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/* Copyright (C) 2009 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
#ifndef _webqueue_h_included_
#define _webqueue_h_included_

#include <list>

/**
 * Process the WEB indexing queue. 
 *
 * This was originally written to reuse the Beagle Firefox plug-in (which
 * copied visited pages and bookmarks to the queue), long dead and replaced by a
 * recoll-specific plugin.
 */

#include "fstreewalk.h"
#include "rcldoc.h"

class CirCache;
class RclConfig;
class WebStore;
namespace Rcl {
class Db;
}

class WebQueueIndexer : public FsTreeWalkerCB {
public:
    WebQueueIndexer(RclConfig *cnf, Rcl::Db *db);
    ~WebQueueIndexer();
    WebQueueIndexer(const WebQueueIndexer&) = delete;
    WebQueueIndexer& operator=(const WebQueueIndexer&) = delete;

    /** This is called by the top indexer in recollindex. 
     *  Does the walking and the talking */
    bool index();

    /** Called when we fstreewalk the queue dir */
    FsTreeWalker::Status 
    processone(const std::string &, FsTreeWalker::CbFlag, const struct PathStat&) override;

    /** Index a list of files. No db cleaning or stemdb updating. 
     *  Used by the real time monitor */
    bool indexFiles(std::list<std::string>& files);
    /** Purge a list of files. No way to do this currently and dont want
     *  to do anything as this is mostly called by the monitor when *I* delete
     *  files inside the queue dir */
    bool purgeFiles(std::list<std::string>&) {return true;}

    /** Called when indexing data from the cache, and from internfile for
     * search result preview */
    bool getFromCache(const std::string& udi, Rcl::Doc &doc, std::string& data,
                      std::string *hittype = nullptr);
private:
    RclConfig *m_config{nullptr};
    Rcl::Db   *m_db{nullptr};
    WebStore  *m_cache{nullptr};
    std::string     m_queuedir;
    // Don't process the cache. Set by indexFiles().
    bool       m_nocacheindex{false};
    // Config: page erase interval. We normally keep only one
    // instance. This can be set to "day", "week", "month", "year" to
    // keep more.
    enum KeepInterval {WQKI_NONE, WQKI_DAY, WQKI_WEEK, WQKI_MONTH, WQKI_YEAR};
    KeepInterval  m_keepinterval{WQKI_NONE};
    
    bool indexFromCache(const std::string& udi);
    void updstatus(const std::string& udi);
};

#endif /* _webqueue_h_included_ */