[go: up one dir, main page]

File: esa.h

package info (click to toggle)
andi 0.13-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 952 kB
  • sloc: ansic: 2,267; sh: 426; cpp: 99; makefile: 76; awk: 51
file content (72 lines) | stat: -rw-r--r-- 1,976 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/**
 * @file
 * @brief This header contains the declarations for functions in esa.c.
 *
 */
#ifndef _ESA_H_
#define _ESA_H_

#include "config.h"
#include "sequence.h"
#include <divsufsort.h>
#include <sys/types.h>

/**
 * @brief Represents LCP-Intervals.
 *
 * This struct is used to represent LCP-intervals. The member `i` should
 * coincide with the lower bound whereas `j` is the upper bound. Both bounds
 * are inclusive. So if `i == j` the interval contains exactly one element,
 * namely `i`. To represent an empty interval please use `i == j == -1`.
 * Other variants, such as `i == j == -2` can be used to indicate an error.
 * The common prefix length is denoted by l and should always be non-negative.
 * Variables of this type are often called `ij`.
 */
typedef struct {
	/** @brief The common prefix length */
	saidx_t l;
	/** @brief lower bound */
	saidx_t i;
	/** @brief upper bound */
	saidx_t j;
	/** The new middle. */
	saidx_t m;
} lcp_inter_t;

/**
 * @brief The ESA type.
 *
 * This structure holds arrays and objects associated with an enhanced
 * suffix array (ESA).
 */
typedef struct esa_s {
	/** The base string from which the ESA was generated. */
	const char *S;
	/** The actual suffix array with indexes into S. */
	saidx_t *SA;
	/** The LCP holds the number of letters up to which a suffix `S[SA[i]]`
		equals `S[SA[i-1]]`. Hence the name longest common prefix. For `i = 0`
		and `i = len` the LCP value is -1. */
	saidx_t *LCP;
	/** The length of the string S. */
	saidx_t len;
	/** A cache for lcp-intervals */
	lcp_inter_t *cache;
	/** The FVC array holds the character after the LCP. */
	char *FVC;
	/** This is the child array. */
	saidx_t *CLD;
} esa_s;

lcp_inter_t get_match_cached(const esa_s *, const char *query, size_t qlen);
lcp_inter_t get_match(const esa_s *, const char *query, size_t qlen);
int esa_init(esa_s *, const seq_subject *S);
void esa_free(esa_s *);

#ifdef DEBUG

char code2char(ssize_t code);

#endif // DEBUG

#endif