Skip to content

Commit

Permalink
chap13: pattern matching algos; boyer moore and kmp
Browse files Browse the repository at this point in the history
  • Loading branch information
AliNisarAhmed committed Aug 22, 2023
1 parent 8e4db68 commit 88bca5f
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 0 deletions.
39 changes: 39 additions & 0 deletions python/python-algo-ds/chap13/boyer_moore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
def find_boyer_moore(T, P):
n, m = len(T), len(P)

if m == 0:
return 0

last = {}

for k in range(m):
# last occurence of a character in P
last[P[k]] = k

# align end of patter at index m - 1 of text
i = m - 1 # an index into T
k = m - 1 # an index into P

while i < n:
current = T[i]
if current == P[k]:
if k == 0:
# found the match
return i
else:
# examine previous character of both T and P
i -= 1
k -= 1
else:
j = last.get(current, -1)
i += m - min(k, j + 1)
k = m - 1

return -1


if __name__ == "__main__":
T = "a quick brown fox jumped over a lazy fox"
P = "over"

print(find_boyer_moore(T, P))
13 changes: 13 additions & 0 deletions python/python-algo-ds/chap13/find_brute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def find_brute(T, P):
"""Return the lowest index of T at which substring P begins (or else -1)"""
n, m = len(T), len(P)
for i in range(n - m + 1):
# don't search for strings smaller than m at the end
k = 0
while k < m and T[i + k] == P[k]:
k += 1
if k == m:
# we matched the full substring
return i

return -1
52 changes: 52 additions & 0 deletions python/python-algo-ds/chap13/kmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
def find_kmp(T, P):
n, m = len(T), len(P)

if m == 0:
return 0

fail = compute_kmp_fail(P)

j = 0 # index into T
k = 0 # index into P

while j < n:
current = T[j]
if current == P[k]:
if k == m - 1:
return j - m + 1
j += 1
k += 1
elif k > 0:
k = fail[k - 1]
else:
j += 1

return -1


def compute_kmp_fail(P):
"""Utility that computes and returns KMP 'fail' list"""
m = len(P)
fail = [0] * m # by default, presume overlap of 0 everywhere
j = 1
k = 0

while j < m:
if P[j] == P[k]:
fail[j] = k + 1
j += 1
k += 1
elif k > 0:
# k follows a matching prefix
k = fail[k - 1]
else:
j += 1

return fail


if __name__ == "__main__":
T = "abacaabaccabacabaabb"
P = "abacab"

print(find_kmp(T, P))

0 comments on commit 88bca5f

Please sign in to comment.