2
2
def is_start_set (char ):
3
3
return char == '[' or char == '('
4
4
5
- def match_star (start , end , txt ):
6
- # for i in range(len(txt) + 1):
7
- return [True ]
8
-
9
-
10
5
# extract set from expression
11
6
def extract_set (char , exp ):
12
7
end_pos = 0
@@ -20,60 +15,86 @@ def extract_set(char, exp):
20
15
21
16
# match range set
22
17
def match_range (exp , txt , pos = 0 ):
23
- if (len (txt ) == 0 ):
24
- return [True , pos ]
25
-
18
+ if (len (txt ) == 0 or len (txt ) - 1 == pos ):
19
+ if (len (txt ) - 1 == pos ):
20
+ return [True , pos - 1 ]
21
+ else :
22
+ return [True , pos ]
23
+
26
24
# if string contains lowercase letters
27
25
if ('a-z' in exp ):
28
- if (txt [0 ] >= 'a' and txt [0 ] <= 'z' ):
29
- return match_range (exp , txt [ 1 :] , pos + 1 )
26
+ if (txt [pos ] >= 'a' and txt [pos ] <= 'z' ):
27
+ return match_range (exp , txt , pos + 1 )
30
28
31
29
# if string contains uppercase letters
32
30
if ('A-Z' in exp ):
33
- if (txt [0 ] >= 'A' and txt [0 ] <= 'Z' ):
34
- return match_range (exp , txt [ 1 :] , pos + 1 )
31
+ if (txt [pos ] >= 'A' and txt [pos ] <= 'Z' ):
32
+ return match_range (exp , txt , pos + 1 )
35
33
36
34
# if string contains integers
37
35
if ('0-9' in exp ):
38
- if (txt [0 ] >= '0' and txt [0 ] <= '9' ):
39
- return match_range (exp , txt [ 1 :] , pos + 1 )
36
+ if (txt [pos ] >= '0' and txt [pos ] <= '9' ):
37
+ return match_range (exp , txt , pos + 1 )
40
38
41
39
return [False , pos ]
42
40
43
41
# match options set
44
- def match_set (exp , txt ):
42
+ def match_set (exp , txt , pos = 0 , end = 0 ):
43
+ if (end == len (txt )):
44
+ return [True , end ]
45
+
45
46
arr = exp .replace ('(' , '' ).replace (')' , '' ).split ('|' )
46
- return [txt in arr ]
47
+ char = txt [end ]
48
+
49
+
50
+ if any (char in s for s in arr ):
51
+ [matched , end ] = match_set (exp , txt , pos , end + 1 )
52
+
53
+ if (matched ):
54
+ for item in arr :
55
+ if (item == txt [pos :end ]):
56
+ [matched , txt_pos ] = match_exp (item , txt [pos :end ], 0 , 0 )
57
+
58
+ if (matched ):
59
+ return [True , end ]
60
+ else :
61
+ return [False , pos + txt_pos ]
62
+
63
+ return [False , pos ]
47
64
48
65
49
66
# find match of the expression in the text
50
67
def match_exp (exp , txt , txt_pos = 0 , exp_pos = 0 ):
51
68
# if expression is empty - we have checked all
52
69
if (len (exp ) == 0 ):
53
- return True
70
+ return [ True , txt_pos ]
54
71
55
72
# if match set of characters
56
73
if (is_start_set (exp [exp_pos ])):
57
74
[set_exp , exp_pos ] = extract_set (exp [exp_pos ], exp )
58
75
59
76
if (exp [0 ] == '[' ):
60
- [matched , txt_pos ] = match_range (set_exp , txt )
77
+ [matched , txt_pos ] = match_range (set_exp , txt , txt_pos )
78
+
61
79
if (matched ):
62
- return True
80
+ return [ True , txt_pos ]
63
81
64
82
elif (exp [0 ] == '(' ):
65
- [matched ] = match_set (set_exp , txt )
83
+ [matched , txt_pos ] = match_set (set_exp , txt , txt_pos , txt_pos )
84
+
66
85
if (matched ):
67
- return True
68
-
86
+ return [ True , txt_pos ]
87
+
69
88
# if character matches
70
89
if (len (exp ) > exp_pos and len (txt ) > txt_pos ):
71
90
if (exp [exp_pos ] == txt [txt_pos ]):
72
- if (match_exp (exp [(exp_pos + 1 ):], txt [(txt_pos + 1 ):])):
73
- return True
91
+ [matched , txt_pos ] = match_exp (exp [(exp_pos + 1 ):], txt , txt_pos + 1 )
92
+
93
+ if (matched ):
94
+ return [True , txt_pos ]
74
95
75
96
# if nothing matches
76
- return False
97
+ return [ False , txt_pos ]
77
98
78
99
# if valid to start
79
100
def is_valid (exp , txt ):
@@ -88,7 +109,9 @@ def init_match(exp, txt):
88
109
if (is_valid (exp , txt )):
89
110
# naive algorithm
90
111
while txt_pos < len (txt ) - 1 :
91
- if (match_exp (exp , txt [txt_pos :])):
112
+ [matched , txt_pos ] = match_exp (exp , txt , txt_pos )
113
+
114
+ if (matched ):
92
115
matched_count += 1
93
116
94
117
elif (is_start_set (exp [0 ])):
@@ -98,9 +121,9 @@ def init_match(exp, txt):
98
121
99
122
# if the matched count is greater than zero
100
123
if (matched_count > 0 ):
101
- return True
124
+ return [ True , matched_count ]
102
125
103
- return False
126
+ return [ False , 0 ]
104
127
105
128
106
129
class RegEx :
0 commit comments