forked from TheAlgorithms/Julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmp_substring_search.jl
109 lines (88 loc) · 3.45 KB
/
kmp_substring_search.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
GetIndexWithKMP(string::String, sub_string::String, ignore_case::Bool)::Int
Find if a string contain a given sub string with KMP Search, as an explanation might be too lengthy
As such, a detailed explanation can be found at https://towardsdatascience.com/pattern-search-with-the-knuth-morris-pratt-kmp-algorithm-8562407dba5b
Questions and answers
1. Why KMP instead of naive search?
- Given n = length of string , m = length of string P
- Then, the naive search will have time complexity O(mn) to find all occurrences of pattern P in S
- However, knuth-morris-pratt (KMP) algorithm will have tim complexity of O(m+n) to find all occurrences of pattern P in S
- Hence, KMP is more efficient than the naive approach
Example
```julia
GetIndexWithKMP("Hello I am Gervin", "Hello I am Gervin", false) # returns 1
GetIndexWithKMP("ABABDABACDABABCABAB", "ABABCABAB", false) # returns 11
GetIndexWithKMP("SeEms Like IgNOrE CaSe Work", "seems like ignore case work", true) # returns 1
```
Contributed By:- [Gervin Fung](https://github.com/GervinFung)
Note: This function will also allow ignoring cases also
"""
# by convention -1 will be returned if the string does not contain the sub-string given
# in this case, 0 will be returned for Julia with start index of 1
# reason is that -1 is for language with start index of 0
# Should be convenient for others as most programming language use -1 if the string does not contain the sub-string given
const NO_SUBSTRING_INDEX = 0
# 1 is the first index, unlike others, where 0 is the first index
# Should be convenient for others as most programming language use 0 as first index
const JULIA_FIRST_INDEX = 1
function create_suffic_array(
pattern_length::Int,
sub_string::String,
)::Vector{Int}
# Longest Proper Prefix which is Suffix array
lps::Vector{Int} = ones(Int, pattern_length)
index::Int = JULIA_FIRST_INDEX
i::Int = 1 + JULIA_FIRST_INDEX
while (i < pattern_length)
if sub_string[i] == sub_string[index]
lps[i] = index + 1
index += 1
i += 1
else
if index != 1
index = lps[index-1]
else
lps[i] = 1
i += 1
end
end
end
return lps
end
# this function will be used to obtain the index which the substring was found
function get_index_with_kmp(
string::String,
sub_string::String,
ignore_case::Bool,
)::Int
string = ignore_case ? lowercase(string) : string
sub_string = ignore_case ? lowercase(sub_string) : sub_string
string_length::Int = length(string)
substring_length::Int = length(sub_string)
lps::Vector{Int} = create_suffic_array(substring_length, sub_string)
# likewise, 1 is the first index, unlike others, where 0 is the first index
i::Int = JULIA_FIRST_INDEX
j::Int = JULIA_FIRST_INDEX
while i < string_length + 1 && j < substring_length + 1
if string[i] == sub_string[j]
i += 1
j += 1
else
if j != 1
j = lps[j-1]
else
i += 1
end
end
end
return j == substring_length + 1 ? i - j + 1 : NO_SUBSTRING_INDEX
end
# optional function that returns boolean if string does contain the sub-string given
function contain_substring_with_kmp(
string::String,
sub_string::String,
ignore_case::Bool,
)::Bool
return get_index_with_kmp(string, sub_string, ignore_case) !=
NO_SUBSTRING_INDEX
end