Skip to content

Commit fbaf43c

Browse files
committed
alternatives search algo
1 parent 116fd51 commit fbaf43c

File tree

3 files changed

+61
-36
lines changed

3 files changed

+61
-36
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
*.example.py
22
*.pdf
33
*.backup.py
4+
backups
5+
backups/*
46
__pycache__

regex.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
def is_start_set(char):
33
return char == '[' or char == '('
44

5-
def match_star(start, end, txt):
6-
# for i in range(len(txt) + 1):
7-
return [True]
8-
9-
105
# extract set from expression
116
def extract_set(char, exp):
127
end_pos = 0
@@ -20,60 +15,86 @@ def extract_set(char, exp):
2015

2116
# match range set
2217
def match_range(exp, txt, pos = 0):
23-
if(len(txt) == 0):
24-
return [True, pos]
25-
18+
if(len(txt) == 0 or len(txt) - 1 == pos):
19+
if(len(txt) - 1 == pos):
20+
return [True, pos - 1]
21+
else:
22+
return [True, pos]
23+
2624
# if string contains lowercase letters
2725
if('a-z' in exp):
28-
if(txt[0] >= 'a' and txt[0] <= 'z'):
29-
return match_range(exp, txt[1:], pos + 1)
26+
if(txt[pos] >= 'a' and txt[pos] <= 'z'):
27+
return match_range(exp, txt, pos + 1)
3028

3129
# if string contains uppercase letters
3230
if('A-Z' in exp):
33-
if(txt[0] >= 'A' and txt[0] <= 'Z'):
34-
return match_range(exp, txt[1:], pos + 1)
31+
if(txt[pos] >= 'A' and txt[pos] <= 'Z'):
32+
return match_range(exp, txt, pos + 1)
3533

3634
# if string contains integers
3735
if('0-9' in exp):
38-
if(txt[0] >= '0' and txt[0] <= '9'):
39-
return match_range(exp, txt[1:], pos + 1)
36+
if(txt[pos] >= '0' and txt[pos] <= '9'):
37+
return match_range(exp, txt, pos + 1)
4038

4139
return [False, pos]
4240

4341
# match options set
44-
def match_set(exp, txt):
42+
def match_set(exp, txt, pos = 0, end = 0):
43+
if(end == len(txt)):
44+
return [True, end]
45+
4546
arr = exp.replace('(', '').replace(')', '').split('|')
46-
return [txt in arr]
47+
char = txt[end]
48+
49+
50+
if any(char in s for s in arr):
51+
[matched, end] = match_set(exp, txt, pos, end + 1)
52+
53+
if(matched):
54+
for item in arr:
55+
if(item == txt[pos:end]):
56+
[matched, txt_pos] = match_exp(item, txt[pos:end], 0, 0)
57+
58+
if (matched):
59+
return [True, end]
60+
else:
61+
return [False, pos + txt_pos]
62+
63+
return [False, pos]
4764

4865

4966
# find match of the expression in the text
5067
def match_exp(exp, txt, txt_pos = 0, exp_pos = 0):
5168
# if expression is empty - we have checked all
5269
if (len(exp) == 0):
53-
return True
70+
return [True, txt_pos]
5471

5572
# if match set of characters
5673
if(is_start_set(exp[exp_pos])):
5774
[set_exp, exp_pos] = extract_set(exp[exp_pos], exp)
5875

5976
if(exp[0] == '['):
60-
[matched, txt_pos] = match_range(set_exp, txt)
77+
[matched, txt_pos] = match_range(set_exp, txt, txt_pos)
78+
6179
if(matched):
62-
return True
80+
return [True, txt_pos]
6381

6482
elif(exp[0] == '('):
65-
[matched] = match_set(set_exp, txt)
83+
[matched, txt_pos] = match_set(set_exp, txt, txt_pos, txt_pos)
84+
6685
if(matched):
67-
return True
68-
86+
return [True, txt_pos]
87+
6988
# if character matches
7089
if(len(exp) > exp_pos and len(txt) > txt_pos):
7190
if (exp[exp_pos] == txt[txt_pos]):
72-
if(match_exp(exp[(exp_pos + 1):], txt[(txt_pos + 1):])):
73-
return True
91+
[matched, txt_pos] = match_exp(exp[(exp_pos + 1):], txt, txt_pos + 1)
92+
93+
if(matched):
94+
return [True, txt_pos]
7495

7596
# if nothing matches
76-
return False
97+
return [False, txt_pos]
7798

7899
# if valid to start
79100
def is_valid(exp, txt):
@@ -88,7 +109,9 @@ def init_match(exp, txt):
88109
if (is_valid(exp, txt)):
89110
# naive algorithm
90111
while txt_pos < len(txt) - 1:
91-
if (match_exp(exp, txt[txt_pos:])):
112+
[matched, txt_pos] = match_exp(exp, txt, txt_pos)
113+
114+
if (matched):
92115
matched_count += 1
93116

94117
elif(is_start_set(exp[0])):
@@ -98,9 +121,9 @@ def init_match(exp, txt):
98121

99122
# if the matched count is greater than zero
100123
if (matched_count > 0):
101-
return True
124+
return [True, matched_count]
102125

103-
return False
126+
return [False, 0]
104127

105128

106129
class RegEx:

test.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
# run each test case
44
def test(num, exp, txt):
55
re = RegEx(exp)
6+
result = re.match(txt)
7+
68
print("Pattern: ", exp)
79
print("String: ", txt)
8-
print(f"Test {num} result: ", re.match(txt), end='\n\n')
10+
print(f"Test {num} result: ", result[0])
11+
print(f"Match count: ", result[1], end='\n\n')
912

1013

1114
if __name__ == '__main__':
@@ -32,15 +35,12 @@ def test(num, exp, txt):
3235
'string': '[email protected]',
3336
},
3437
{
35-
'pattern': '[A-Z]',
36-
'string': 'Hello',
38+
'pattern': '[a-z0-9]@[a-z].(moc)',
39+
'string': '[email protected]',
3740
},
38-
]
39-
40-
tests_1 = [
4141
{
42-
'pattern': '[a-z0-9]@[a-z].(com|net|org)',
43-
'string': '[email protected]',
42+
'pattern': '[A-Z]',
43+
'string': 'Hello',
4444
},
4545
]
4646

0 commit comments

Comments
 (0)