-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsuffix-tree.py
107 lines (94 loc) · 2.64 KB
/
suffix-tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
class Node:
def __init__(self, sub="", children=None):
self.sub = sub
self.ch = children or []
class SuffixTree:
"""
Class for suffixtree
"""
def __init__(self, str):
self.nodes = [Node()]
for i in range(len(str)):
self.addSuffix(str[i:])
def addSuffix(self, suf):
n = 0
i = 0
while i < len(suf):
b = suf[i]
x2 = 0
while True:
children = self.nodes[n].ch
if x2 == len(children):
# no matching child, remainder of suf becomes new node
n2 = len(self.nodes)
self.nodes.append(Node(suf[i:], []))
self.nodes[n].ch.append(n2)
return
n2 = children[x2]
if self.nodes[n2].sub[0] == b:
break
x2 = x2 + 1
# find prefix of remaining suffix in common with child
sub2 = self.nodes[n2].sub
j = 0
while j < len(sub2):
if suf[i + j] != sub2[j]:
# split n2
n3 = n2
# new node for the part in common
n2 = len(self.nodes)
self.nodes.append(Node(sub2[:j], [n3]))
# old node loses the part in common
self.nodes[n3].sub = sub2[j:]
self.nodes[n].ch[x2] = n2
break # continue down the tree
j = j + 1
i = i + j # advance past part in common
n = n2 # continue down the tree
def visualize(self):
"""
pretty prints the suffixtree
>>> SuffixTree("banana$").visualize()
+-
+-
-- banana$
+-
+- a
| +-
+- na
| | +-
-- na$
| | +-
-- $
| +-
-- $
+-
+- na
| +-
-- na$
| +-
-- $
+-
-- $
"""
if len(self.nodes) == 0:
print("<empty>")
return
def f(n, pre):
children = self.nodes[n].ch
if len(children) == 0:
print("--", self.nodes[n].sub)
return
print("+-", self.nodes[n].sub)
for c in children[:-1]:
print(pre, "+-")
f(c, pre + " | ")
print(
pre,
"+-",
)
f(children[-1], pre + " ")
f(0, "")
if __name__ == "__main__":
import doctest
doctest.testmod()