Skip to content

Commit 8d00ff7

Browse files
author
shengshijun
committed
字符串朴素匹配算法:bugfix
Rabin-krap算法实现
1 parent 85fce78 commit 8d00ff7

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

string/rabin-karp.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/env python
2+
# -*- coding:UTF-8
3+
__author__ = 'shenshijun'
4+
"""
5+
首先计算pattern字符串的hash值,然后在从目标字符串的开头,计算相同长度字符串的hash值。若hash值相同,则表示匹配,若不同,则向右移动一位,计算新的hash值。整个过程,与暴力的字符串匹配算法很相似,
6+
但由于计算hash值时,可以利用上一次的hash值,从而使新的hash值只需要加上新字母的计算,并减去上一次的第一个字母的计算,即可。
7+
Rabin-Karp算法的预处理时间为O(m),最坏情况下该算法的匹配时间为O((n-m+1)m),期望复杂度O(m+n)
8+
"""
9+
10+
11+
def match(origin, pattern):
12+
pattern_len = len(pattern)
13+
14+
def _hash(string, start=0):
15+
hash_code = 0
16+
for x in xrange(pattern_len):
17+
hash_code += ord(string[start + x]) * 2 ** (pattern_len - x - 1)
18+
return hash_code
19+
20+
def _refresh(old_hash, old_char, new_char):
21+
return (old_hash - ord(old_char) * 2 ** (pattern_len - 1)) * 2 + ord(new_char)
22+
23+
def test_equal(start_index):
24+
for x in xrange(pattern_len):
25+
if origin[x + start_index] != pattern[x]:
26+
return False
27+
return True
28+
29+
origin_index = 0
30+
pattern_hash = _hash(pattern)
31+
origin_hash = _hash(origin)
32+
while origin_index < len(origin) - pattern_len - 1:
33+
if pattern_hash == origin_hash and test_equal(origin_index):
34+
return origin_index
35+
else:
36+
print "origin hash:%s,pattern hash:%s" % (origin_hash, pattern_hash)
37+
origin_hash = _refresh(origin_hash, origin[origin_index], origin[origin_index + pattern_len])
38+
origin_index += 1
39+
40+
41+
def main():
42+
print match("sbsfsdgdgfgasbssssfsfdfeferf", 'sb')
43+
44+
45+
if __name__ == "__main__":
46+
main()

string/simple.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,25 @@
55

66
def match(origin, pattern):
77
origin_index, pattern_index = 0, 0
8+
match_flag = True
89
pattern_len = len(pattern)
910
while origin_index < len(origin):
1011
for pattern_index in xrange(pattern_len):
1112
if pattern[pattern_index] != origin[origin_index]:
1213
origin_index -= (pattern_index - 1)
14+
match_flag = False
1315
break
1416
else:
1517
origin_index += 1
1618
pattern_index += 1
19+
match_flag = True
1720

18-
if origin[origin_index] == pattern[pattern_index]:
21+
if match_flag:
1922
return origin_index - pattern_index
2023

2124

2225
def main():
23-
print match("absbsbshdhhd", 'sb')
26+
print match("absabsvbshsbdhhd", 'sb')
2427

2528

2629
if __name__ == "__main__":

0 commit comments

Comments
 (0)