15 #ifndef PAAL_PREFIX_TREE_HPP
16 #define PAAL_PREFIX_TREE_HPP
26 Letter letter = DELIMITER;
27 Node *son = CHILDLESS;
28 std::vector<int> prefixes;
30 Node(
int _letter) : letter(_letter) {};
36 std::vector<Letter>
const &sumWords,
37 std::vector<int>
const &
lcp,
38 std::vector<int>
const &lengthSuffixWord)
39 : m_length(length), m_prefix_tree(m_length), m_which_son_am_i(m_length),
40 m_prefix_to_tree(m_length), m_suffix_to_tree(m_length),
41 m_suffix_array(suffix_array), m_sum_words(sumWords), m_lcp(lcp),
42 m_length_suffix_word(lengthSuffixWord) {}
44 void build_prefix_tree() {
45 m_prefix_tree.push_back(Node());
46 for (
auto suffix : m_suffix_array) {
49 if ((suffix != 0) && (m_sum_words[suffix - 1] == DELIMITER)) {
50 add_word_to_prefix_tree(suffix);
55 void erase_word_form_prefix_tree(
int wordBegin) {
56 for (
int letterOfWord = 0;
57 m_sum_words[letterOfWord + wordBegin] != DELIMITER;
59 auto letterIdx = wordBegin + letterOfWord;
60 auto whichSon = m_which_son_am_i[letterIdx];
61 auto &nodePrefixes = m_prefix_to_tree[letterIdx]->prefixes;
62 assert(std::size_t(whichSon) < nodePrefixes.size());
63 int lastPrefix = nodePrefixes.back();
64 nodePrefixes[whichSon] = lastPrefix;
65 m_which_son_am_i[lastPrefix + letterOfWord] = whichSon;
66 nodePrefixes.pop_back();
72 void fill_suffix_to_tree() {
73 for (
int suffix = m_length - 1, lastWord = 0, commonPrefix = 0;
74 suffix > 0; suffix--) {
75 auto beginOfSuffix = m_suffix_array[suffix];
76 if (beginOfSuffix == 0 ||
77 m_sum_words[beginOfSuffix - 1] == DELIMITER) {
78 lastWord = beginOfSuffix;
79 commonPrefix = m_lcp[suffix];
81 if (commonPrefix == m_length_suffix_word[beginOfSuffix]) {
82 m_suffix_to_tree[suffix] =
83 m_prefix_to_tree[lastWord + commonPrefix - 1];
85 if (m_lcp[suffix] < commonPrefix) {
86 commonPrefix = m_lcp[suffix];
92 int get_prefixequal_to_suffix(
int suffix,
int firstWordInBlock) {
93 Node *nodeCorrespondingToSuffix = m_suffix_to_tree[suffix];
94 if (nodeCorrespondingToSuffix == NO_SUFFIX_IN_TREE) {
97 auto const &overlapPrefixes = nodeCorrespondingToSuffix->prefixes;
99 if (overlapPrefixes.size()) {
100 int whichPrefix = ANY_PREFIX;
104 if (overlapPrefixes[whichPrefix] == firstWordInBlock) {
105 if (overlapPrefixes.size() >= 2) {
106 whichPrefix = ANY_OTHER_PREFIX;
111 return overlapPrefixes[whichPrefix];
118 void add_word_to_prefix_tree(
int word) {
119 Node *node = &m_prefix_tree[ROOT];
123 while (node->son != CHILDLESS &&
124 node->son->letter == m_sum_words[letter] &&
125 m_sum_words[letter] != DELIMITER) {
130 while (m_sum_words[letter]) {
132 assert(m_prefix_tree.capacity() > m_prefix_tree.size());
133 m_prefix_tree.push_back(Node(m_sum_words[letter]));
134 node->son = &m_prefix_tree.back();
135 node = &m_prefix_tree.back();
138 node = &m_prefix_tree[ROOT];
144 while (m_sum_words[letter] != DELIMITER) {
146 m_prefix_to_tree[letter] = node;
147 m_which_son_am_i[letter] = node->prefixes.size();
148 node->prefixes.push_back(word);
154 std::vector<Node> m_prefix_tree;
155 std::vector<int> m_which_son_am_i;
157 std::vector<Node *> m_prefix_to_tree;
158 std::vector<Node *> m_suffix_to_tree;
160 const std::vector<int> &m_suffix_array;
161 const std::vector<Letter> &m_sum_words;
162 const std::vector<int> &m_lcp;
163 const std::vector<int> &m_length_suffix_word;
165 const static int ROOT = 0;
166 const static int NOT_PREFIX = -1;
167 const static int ANY_PREFIX = 0;
168 const static int ANY_OTHER_PREFIX = 1;
169 const static std::nullptr_t NO_SUFFIX_IN_TREE;
170 const static std::nullptr_t CHILDLESS;
173 const static Letter DELIMITER = 0;
178 #endif // PAAL_PREFIX_TREE_HPP
void lcp(std::vector< int > const &suffix_array, std::vector< int > const &rank, std::vector< int > &lcp, std::vector< Letter > const &sumWords)
fill array lcp lcp[0] is undefined and lcp[i] stores the largest common prefix of the lexicographical...
void suffix_array(std::vector< Letter > &text, std::vector< int > &SA, Letter max_letter=0)
detail