#ifndef LURCH_PATH_TRACKER
#define LURCH_PATH_TRACKER
#include <QFileSystemWatcher>
#include <QMap>
#include <QDateTime>
#include <QStringList>
#include "lob.h"
/** \brief Tracks Lurch Documents in a set of watched folders, including their URNs and any
* other attributes the client chooses, as well as changes to them
*
* Give a LurchPathTracker a list of directories, and it will index the URNs of every
* <code>.lurch</code> file in those directories, and will watch for when those files change,
* when new <code>.lurch</code> files appear in those directories, and when
* <code>.lurch</code> files are deleted from those directories. It will update its
* internal URN/filename index appropriately and emit signals indicating what
* happened, as \ref FileChangeRules "described further below".
*
* All interface methods this class provides deal with URNs, so that clients can simply
* say where documents may be (by giving this class its watched folders) and then not care
* about specific filenames. (Note that a new URN created by URNToNewFilename(), will be
* in the first directory in the watched folder list, which therefore acts as a "default
* directory.")
*
* When files or directories are given as input to any routine in this class, the first
* thing it does is convert them to a canonical, absolute path using canonical(). Every
* filename or pathname returned from any routine in this class will be canonical and
* absolute in the same sense. (This is accomplished with the QFileInfo::canonicalPath()
* and QFileInfo::canonicalFilePath() routines.)
*
* Note that no two different documents in the watched folders should have the same URN (as
* in Lob::getURN()). However, as this may happen due to inadvertent copying or modifying of
* files, it is important for Lurch to handle it gracefully. This class calls such a
* situation an "indexing conflict," because it cannot maintain the injectivity of the
* maps filenameToURN() and URNToFilename(). In such a case, the first file encountered
* when Lurch processes the watched folders is indexed, but the second file encountered
* with the same URN (or any subsequent file with the same URN) is not indexed, and errors
* are reported as in the function add(), below, and in the signals documented here.
*
* \anchor FileChangeRules
* <h2>How this class handles changes in the watched folders</h2>
* <ul>
* <li>When a file from the watched folders is deleted (or moved out of the watched
* folders):
* <ol>
* <li>If it was not indexed by this object (perhaps because it was not a
* <code>.lurch</code> file, or it did not contain a valid document according
* to Lob::isDocument(), or it was part of an indexing conflict),
* its disappearance is ignored.</li>
* <li>Otherwise, the signal documentDisappeared() is emitted with the document's
* URN and its data is removed from all internal indexes.</li>
* </ol>
* </li>
* <li>When a new file is created in (or moved into) one of the watched folders:
* <ol>
* <li>If it is not a <code>.lurch</code> file, its arrival is ignored.</li>
* <li>If it does not represent a valid document (according to Lob::isDocument()),
* its arrival is ignored.</li>
* <li>If its URN is already indexed under a different filename, and thus this
* file creates an indexing conflict, then the signal documentAppeared() is
* emitted with the document's URN, and the conflict flag set to true.</li>
* <li>It is a valid document that is not yet indexed, so this object indexes it
* for use in filenameToURN() and URNToFilename(), and emits the signal
* documentAppeared() with the conflict flag set to false.</li>
* </ol>
* </li>
* <li>When a file in one of the watched folders is modified (but not moved, renamed, or
* deleted):
* <ol>
* <li>If it was not indexed by this object (perhaps because it was not a
* <code>.lurch</code> file, or it did not contain a valid document according
* to Lob::isDocument(), or it was part of an indexing conflict),
* its modification is handled just as if it were newly created;
* see above.</li>
* <li>The file was indexed, and so it contained a valid document before this
* change. If it does not now contain one, then emit the documentDisappeared()
* signal, with conflict flag set to false. Drop the old indexing information.
* </li>
* <li>The file was indexed, and still should be if possible, because it contains a
* valid document. Proceed as follows.
* <ol>
* <li>If the old and new URNs ar the same, no change in indexing information
* need be made except to update the modification time. Do so and emit
* the documentChanged() signal.</li>
* <li>If the new URN is already indexed, then it is indexed under a different
* filename, and thus this change creates an indexing conflict.
* Emit the documentURNChanged() signal, with conflict flag set to true.
* Remove the old indexing information.</li>
* <li>The new URN is not already indexed under a different filename,
* and thus this change simply requires updating internal indexes.
* Remove the old indexing information, add the new indexing information,
* and emit the documentURNChanged() signal,
* with conflict flag set to false.</li>
* </ol>
* </ol>
* </li>
* <li>Some other possible filesystem events are combinations of the three above.
* When a file is moved or renamed, and both its original and final filenames (with
* full path) are in the watched folders, this will be treated as two successive
* operations, in one of the following categories.
* <ul>
* <li>If the destination is a new file (not writing over an old one) then the event
* is first a disappearance from the original location,
* then an appearance in the new location.</li>
* <li>If the destination is not a new file (but writing over an old one) then the
* event is first a disappearance from the original location,
* then a modification of the destination file.</li>
* </ul>
* </li>
* </ul>
* Note that if a document changes content and URN, then only the documentURNChanged()
* signal is emitted; this implies that the URN changed and possibly more than that as well.
*/
class LurchPathTracker : public QFileSystemWatcher
{
Q_OBJECT
public:
/** \brief Construct a new LurchPathTracker with no watched folders
*
* Call the member function add() to add watched folders to this object.
* Call the member function addWatchedAttribute() to add an attribute to the list of
* attributes this object watches for in the files it indexes.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
LurchPathTracker ();
/** \brief Add a watched attribute (a symbol Lob to be treated as an attribute key)
*
* It is assumed that \a key is a symbol Lob, and it is added to the list of watched
* attributes, which you can read via watchedAttributes().
* To know what that list is for, see the documentation for watchedAttributes().
*
* Calling this function may result in re-reading many files on disk, to find the values
* for the attribute you pass in. Thus it is sensible to make all necessary calls to
* this function before any calls to add(), i.e., before any files have been read/indexed.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
void addWatchedAttribute ( const Lob key );
/** \brief Remove a watched attribute (a symbol Lob to be treated as an attribute key)
*
* If a Lob that is Lob::equivalentTo() \a key appears on the internal list of watched
* attributes, this routine removes it.
* To know more about that list, see the documentation for watchedAttributes().
*
* Calling this function will not result in re-reading any files on disk, unlike
* addWatchedAttribute(), because it only discards data, and does not fetch any new data.
*/
void removeWatchedAttribute ( const Lob key );
/** \brief Return the list of watched attributes stored in this object
*
* For any <code>.lurch</code> file in watched folders(), when its URN is read,
* so are the values of each attribute of that document whose keys are on the list
* returned by this function. You can query the values cached at that time
* using cachedAttributeFor().
*
* This list returns a copy of those stored internally, so modifying the results here
* do not impact the internal state of this object.
*
* This routine is implicitly tested by its use in the implementation of indexFile().
*
* \see addWatchedAttribute(), removeWatchedAttribute(), cachedAttributeFor()
*/
QList<Lob> watchedAttributes () const;
/** \brief Lookup the value of an attribute for a document on disk, from cached values
*
* The given Lob must be a symbol lob on the watchedAttributes() list, and the value
* of that attribute for the document with the given URN will be returned. An empty Lob
* is returned if the document did not have that attribute or if the URN does not
* correspond to any valid Lurch Document stored in a file in the watched folders.
*
* The Lob returned is one that is actually stored in this LurchPathTracker object, but
* it is read-only.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
Lob cachedAttributeFor ( QString urn, Lob keySymbol ) const;
/** \brief Lookup the list of documents that have a certain attribute, with the values
*
* Returns a mapping from document URNs to the values they have for the given attribute.
* If a document in the watched folders did not have the attribute whose key was passed
* in as \a keySymbol, it will not appear in this mapping. The Lobs in this mapping
* are the actual Lobs stored in this LurchPathTracker object, but are read-only.
*/
QMap<QString,Lob> documentsWithAttributes ( Lob keySymbol ) const;
/** \brief Add a new watched folder to this object's list of folders to track
*
* Assuming the path is valid in that it refers to an existing folder,
* this will be added to the list of folders to watch for changes, and all the
* <code>.lurch</code> files in it will be indexed by URN and modification time,
* for later reference when potential changes happen to them which need to be
* emitted as signals.
*
* It is possible that there are two different documents in the set of watched folders
* (the \a folder parameter plus any watched folders added earlier) that have the same
* URN; this is invalid. Whenever it happens, the second of the two encountered is
* considered the invalid one (arbitrarily) and is not added to this object's internal
* index of filenames and URNs, but instead its filename is recorded as a conflict.
* The total list of conflicts is returned from this function.
*
* If the caller needs to find out which other file(s) had the same URN, use
* loadFileURN() to fetch the URN from any existing <code>.lurch</code> file,
* and filenameToURN() to fetch the URN of the first encountered file.
*
* If the given folder does not refer to an existing directory, this function does
* nothing and returns the empty list.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
QStringList add ( QString folder );
/** \brief Remove a watched folder from this object's list of folders to watch
*
* Information about files in the folder is wiped from this object's internal data
* structures and the folder is no longer watched for changes to files.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
void remove ( QString folder );
/** \brief List of all folders currently being tracked
*
* All folders currently being tracked by this object, with the "default" one first
* (see description of this class, or comments about URNToFilename() in
* LurchPathTracker() documentation).
*
* This routine is tested in test_lpathtracker::test_paths().
*/
QStringList folders () const;
/** \brief Convert a URN to a filename in the watched folder list
*
* If there exists in the watched folder list a <code>.lurch</code> file whose URN
* (as a document, as in Lob::isDocument() and Lob::getURN()) matches \a urn,
* this returns the filename of that document.
*
* Otherwise, an empty string is returned, to indicate that the URN could not be
* found for any document in the watched folder list. In such a case, you can create
* a reasonable default filename using URNToNewFilename().
*
* The reverse conversion is accomplished by filenameToURN().
*
* This routine is tested in test_lpathtracker::test_paths()
* and test_lpathtracker::test_signals().
*/
QString URNToFilename ( QString urn ) const;
/** \brief Converts a filename in the watched folder list to an URN
*
* If the given filename refers to an existing <code>.lurch</code> document in the
* watched folder list that contains a valid document (see Lob::isDocument()) then
* this function returns that document's URN (see Lob::getURN()).
*
* The exception is if there were conflicts when indexing due to the invalid case
* where two different documents have the same URN. In this case, try loadFileURN().
*
* Otherwise, an empty string is returned (that is, if the filename is outside
* the watched folder, does not exist, or does not contain a valid document).
*
* The reverse conversion is accomplished by URNToFilename().
*
* This routine is tested in test_lpathtracker::test_paths()
* and test_lpathtracker::test_signals().
*/
QString filenameToURN ( QString filename ) const;
/** \brief Reads the URN from a <code>.lurch</code> document file on the filesystem
*
* This does not depend on any internal indexing data, but reads the file afresh
* (or at least enough of it to determine its URN). If the file is not an existing
* <code>.lurch</code> file containing a valid document, the empty string is returned.
* Note that the file need not be in the watched folders.
*
* If the file is in the watched folders and is not involved in any indexing conflicts
* that invalidate it, then filenameToURN() is designed to be much faster, operating
* on indexed data and thus not touching the filesystem.
*
* This routine is implicitly tested whenever URNAlreadyIndexed() and other routines are
* tested, because several such routines call this one.
*/
QString loadFileURN ( QString filename ) const;
/** \brief Does what loadFileURN(QString) does, and captures other document attributes
*
* This routine performs the same job as loadFileURN(QString), and also extracts from
* the document the values of any attributes whose keys are given in the list of
* Lobs in the \a otherData parameter. Each key in \a otherData is replaced with the
* corresponding value of the document's attribute indexed by that key. If the given
* \a filename does not refer to a file containing a valid Lurch Document, then not
* only does this function return an empty string, it clears the list \a otherData
* as well.
*
* This routine exists for the sake of efficiency;
* as long as we must load the document to compute its URN,
* we may as well do all other data extraction necessary while we have it in Lob form.
*
* This routine is implicitly tested whenever filenameToURN() is tested.
* The \a attributeValues portion of this function's work is tested in
* test_lpathtracker::test_paths().
*/
QString loadFileURN ( QString filename, QList<Lob>& otherData ) const;
/** \brief Creates a new (reasonable) filename in the default folder from the given URN
*
* This is for use when a file with the URN \a urn does not exist, but a reasonable
* default filename for such a file is needed.
*
* It creates a filename from the title in the URN, appended with the
* extension <code>.lurch</code>, and prefixed with the first folder in the watched
* folders list, presumed to be the default. If that filename is in use, this one will
* suffix it with the string " copy <i>n</i>" with <i>n</i> the smallest positive integer
* that creates an as-yet-unused filename.
*
* If \a urn is not a valid URN (according to Lob::isLurchURN()) then an
* empty string is returned. If \a urn is valid but
* folders() is empty, no path is prefixed, and a
* lone (relative) filename is returned (e.g., "My Document Title.lurch").
*
* Note that this function <b>does not</b> check to ensure that the URN is not already
* in use by another filename! It is the responsibility of the caller to do so
* using URNToFilename() before calling this function.
*
* This routine is tested in test_lpathtracker::test_paths().
*/
QString URNToNewFilename ( QString urn ) const;
/** The list of all filenames indexed by this object's internal data structures.
* The filenames will use full, absolute paths.
*/
QStringList indexedFiles () const;
/** The list of all URNs indexed by this object's internal data structures.
*/
QStringList indexedURNs () const;
/** This class deals with paths; all must be canonical for string comparison to be
* useful. This converts any valid path into a canonical one, whether it is a path to
* a file or to a directory. It returns invalid paths unchanged.
*
* This is useful for asking questions like the following one.
* \code
* if ( myPathTracker.folders().contains( myPathTracker.canonical( myFolder ) ) ) {
* ...
* }
* \endcode
*/
static QString canonical ( const QString path );
signals:
/** \brief Signals that a new document was created in one of the watched folders
*
* Emitted when a new <code>.lurch</code> file appears in one of the
* watched folders, if and only if it contains a valid Lurch Document.
*
* The parameter \a conflict is true if and only if
* \a urn was a duplicate of another URN already in the watched folders, and
* thus an indexing conflict prevented the document from being indexed
* (as in URNAlreadyIndexed() and add()).
*
* This signal is tested in test_lpathtracker::test_paths().
*/
void documentAppeared ( QString urn, bool conflict );
/** \brief Signals that a document in one of the watched folders was just deleted
*
* Emitted after an indexed <code>.lurch</code> file is removed from one of the
* watched folders.
*
* This signal is tested in test_lpathtracker::test_paths().
*/
void documentDisappeared ( QString urn );
/** \brief Signals that a document in one of the watched folders changed content
*
* Emitted when a <code>.lurch</code> file in one of the watched folders
* had its content changed, but its URN remained the same.
*
* This signal will be tested in test_lpathtracker::test_paths(), but it is not
* yet being tested there due to a bug in QFileSystemWatcher; this text will be
* updated when tests are possible (and succeed).
*/
void documentChanged ( QString urn );
/** \brief Signals that a document in one of the watched folders changed URN
*
* Emitted when a <code>.lurch</code> file in one of the watched folders
* had its URN changed, but its content may or may not have remained the same.
* The before and after URNs are given, and \a conflict is true if and only if
* \a urnAfter was a duplicate of another URN already in the watched folders, and
* thus an indexing conflict prevented the document from being indexed
* (as in URNAlreadyIndexed() and add()).
*
* This signal will be tested in test_lpathtracker::test_paths(), but it is not
* yet being tested there due to a bug in QFileSystemWatcher; this text will be
* updated when tests are possible (and succeed).
*/
void documentURNChanged ( QString urnBefore, QString urnAfter, bool conflict );
private:
/** Whether the given URN already appears in this object's internal data with a different
* filename; in that case there is an indexing conflict. However if the URN
* is not indexed already, or if it maps to \a filename, then there is no conflict.
*
* If \a urn is empty, this routine will first obtain it from the file, and that value
* will therefore be available to the caller due to the pass-by-reference parameter.
* If a valid URN cannot be obtained from the file, or the file is not a
* <code>.lurch</code> file, this routine returns false.
*/
bool URNAlreadyIndexed ( QString filename, QString& urn );
/** If the file exists and is a valid <code>.lurch</code> document,
* this records its data internally in this object for later lookup.
* This routine does not check whether URNAlreadyIndexed() is true; it simply changes
* the internal data.
*
* The \a urn and \a attributeValues parameters are optional;
* if either is not provided, the routine will attempt to get them from the file.
* \return true if either \a urn was provided or it was loadable from the file,
* and thus the indexing was possible; false otherwise
*
* The \a attributeValues portion of this function's work is tested in
* test_lpathtracker::test_paths().
*/
bool indexFile ( QString filename, QString urn = QString(),
QList<Lob> attributeValues = QList<Lob>() );
/** Call indexFile() on every <code>.lurch</code> file in the directory, except when
* conflicts arise. If for any file URNAlreadyIndexed() returns true, then decide
* which of the conflicting files should be indexed based on which has the latest
* modification time. Do not index the rest. Return the list of files that were not
* indexed.
*
* \return A list of those files which could not be indexed due to conflicts,
* and their last modification times not being the latest in their conflict.
*/
QStringList indexDir ( QString directory );
/** This is used when any indexed file is deleted or changes URN; it seeks to
* promote any unindexed files whose reason for not being indexed was a conflict with
* the deleted (or URN-changed) file, a confict that no longer exists because it was
* deleted or had its URN changed.
*
* It looks through all unindexed <code>.lurch</code> files to find if any are valid
* documents with the URN \a urn. If any are found, index the most recently modified
* one. This routine assumes that it is called when \a urn is unindexed.
*
* \return true if a conflict actually did exist and
* was fixed (or lessened) by this call, false otherwise.
*/
bool decreaseConflict ( QString urn );
/** Remove the file from all the internal data structures; this is the opposite of
* indexFile().
*/
void dropFile ( QString filename );
/** Call dropFile() on every <code>.lurch</code> file in the directory;
* this is the opposite of indexFile().
*/
void dropDir ( QString directory );
/** Store map from URN to the filename containing the Lurch Document with that URN.
* This map is injective, with inverse fn2urn.
* This map is kept up to date using this object as a QFileSystemWatcher.
*/
QMap<QString,QString> urn2fn;
/** Store map from filename containing a Lurch Document to that document's URN.
* This map is injective, with inverse urn2fn.
* This map is kept up to date using this object as a QFileSystemWatcher.
*/
QMap<QString,QString> fn2urn;
/** Store map from filename to its last modified date.
* This map is kept up to date using this object as a QFileSystemWatcher.
*/
QMap<QString,QDateTime> fn2mod;
/** Store map from filename to its list of cached attributes.
* This map is kept up to date using this object as a QFileSystemWatcher.
*/
QMap<QString,QList<Lob> > fn2attrs;
/** Stores the list of watched attributes given to this object at construction time
*/
QList<Lob> watchKeys;
/** When a file disappears, this processes that disappearance according to
* \ref FileChangeRules "the rules specified in the description of this class".
*/
void fileDisappeared ( QString file );
/** When a file appears, this processes that appearance according to
* \ref FileChangeRules "the rules specified in the description of this class".
*/
void fileAppeared ( QString file );
/** When a file changes, this processes that change according to
* \ref FileChangeRules "the rules specified in the description of this class".
*/
void fileChanged ( QString file );
private slots:
/** When a directory's contents change, this slot handles figuring out which file(s)
* changed, updating the correct internal data structures, and emitting one
* of this class's signals.
*/
void dirChanged ( QString dir );
#ifdef LURCH_UNIT_TEST
friend class LURCH_UNIT_TEST;
#endif
};
#endif // LURCH_PATH_TRACKER