From fbaf43c5e686c3510228c8761885aaa78e70c37a Mon Sep 17 00:00:00 2001
From: Terance Edmonds <teranceedmonds20@gmail.com>
Date: Tue, 1 Aug 2023 22:36:56 +0530
Subject: [PATCH 1/3] alternatives search algo

---
 .gitignore |  2 ++
 regex.py   | 79 +++++++++++++++++++++++++++++++++++-------------------
 test.py    | 16 +++++------
 3 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9460d9f..9b9fd10 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
 *.example.py
 *.pdf
 *.backup.py
+backups
+backups/*
 __pycache__
\ No newline at end of file
diff --git a/regex.py b/regex.py
index 92a733a..2d2300c 100644
--- a/regex.py
+++ b/regex.py
@@ -2,11 +2,6 @@
 def is_start_set(char):
     return char == '[' or char == '('
 
-def match_star(start, end, txt):
-    # for i in range(len(txt) + 1):
-    return [True]
-
-
 # extract set from expression
 def extract_set(char, exp):
     end_pos = 0
@@ -20,60 +15,86 @@ def extract_set(char, exp):
 
 # match range set
 def match_range(exp, txt, pos = 0):
-    if(len(txt) == 0):
-        return [True, pos]
-    
+    if(len(txt) == 0 or len(txt) - 1 == pos):
+        if(len(txt) - 1 == pos):
+            return [True, pos - 1]
+        else:
+            return [True, pos]
+
     # if string contains lowercase letters
     if('a-z' in exp):
-        if(txt[0] >= 'a' and txt[0] <= 'z'):
-            return match_range(exp, txt[1:], pos + 1)
+        if(txt[pos] >= 'a' and txt[pos] <= 'z'):
+            return match_range(exp, txt, pos + 1)
         
     # if string contains uppercase letters
     if('A-Z' in exp):
-        if(txt[0] >= 'A' and txt[0] <= 'Z'):
-            return match_range(exp, txt[1:], pos + 1)
+        if(txt[pos] >= 'A' and txt[pos] <= 'Z'):
+            return match_range(exp, txt, pos + 1)
     
     # if string contains integers
     if('0-9' in exp):
-        if(txt[0] >= '0' and txt[0] <= '9'):
-            return match_range(exp, txt[1:], pos + 1)
+        if(txt[pos] >= '0' and txt[pos] <= '9'):
+            return match_range(exp, txt, pos + 1)
     
     return [False, pos]
 
 # match options set
-def match_set(exp, txt):
+def match_set(exp, txt, pos = 0, end = 0):
+    if(end == len(txt)):
+        return [True, end]
+
     arr = exp.replace('(', '').replace(')', '').split('|')
-    return [txt in arr]
+    char = txt[end]
+
+    
+    if any(char in s for s in arr):
+        [matched, end] =  match_set(exp, txt, pos, end + 1)
+        
+        if(matched):
+            for item in arr:
+                if(item == txt[pos:end]):
+                    [matched, txt_pos] = match_exp(item, txt[pos:end], 0, 0)
+                    
+                    if (matched):
+                        return [True, end]
+                    else:
+                        return [False, pos + txt_pos]
+    
+    return [False, pos]
 
 
 # find match of the expression in the text
 def match_exp(exp, txt, txt_pos = 0, exp_pos = 0):
     # if expression is empty - we have checked all
     if (len(exp) == 0):
-        return True
+        return [True, txt_pos]
     
     # if match set of characters
     if(is_start_set(exp[exp_pos])):
         [set_exp, exp_pos] = extract_set(exp[exp_pos], exp)
         
         if(exp[0] == '['):
-            [matched, txt_pos] = match_range(set_exp, txt)
+            [matched, txt_pos] = match_range(set_exp, txt, txt_pos)
+
             if(matched):
-                return True
+                return [True, txt_pos]
             
         elif(exp[0] == '('):
-            [matched] = match_set(set_exp, txt)
+            [matched, txt_pos] = match_set(set_exp, txt, txt_pos, txt_pos)
+
             if(matched):
-                return True
-    
+                return [True, txt_pos]
+
     # if character matches
     if(len(exp) > exp_pos and len(txt) > txt_pos):
         if (exp[exp_pos] == txt[txt_pos]):
-            if(match_exp(exp[(exp_pos + 1):], txt[(txt_pos + 1):])):
-                return True
+            [matched, txt_pos] = match_exp(exp[(exp_pos + 1):], txt, txt_pos + 1)
+            
+            if(matched):
+                return [True, txt_pos]
 
     # if nothing matches
-    return False
+    return [False, txt_pos]
 
 # if valid to start
 def is_valid(exp, txt):
@@ -88,7 +109,9 @@ def init_match(exp, txt):
     if (is_valid(exp, txt)):
         # naive algorithm
         while txt_pos < len(txt) - 1:
-            if (match_exp(exp, txt[txt_pos:])):
+            [matched, txt_pos] = match_exp(exp, txt, txt_pos)
+            
+            if (matched):
                 matched_count += 1
             
             elif(is_start_set(exp[0])):
@@ -98,9 +121,9 @@ def init_match(exp, txt):
 
     # if the matched count is greater than zero
     if (matched_count > 0):
-        return True
+        return [True, matched_count]
 
-    return False
+    return [False, 0]
 
 
 class RegEx:
diff --git a/test.py b/test.py
index 5fbafde..14fe91a 100644
--- a/test.py
+++ b/test.py
@@ -3,9 +3,12 @@
 # run each test case
 def test(num, exp, txt):
     re = RegEx(exp)
+    result = re.match(txt)
+
     print("Pattern: ", exp)
     print("String: ", txt)
-    print(f"Test {num} result: ", re.match(txt), end='\n\n')
+    print(f"Test {num} result: ", result[0])
+    print(f"Match count: ", result[1], end='\n\n')
 
 
 if __name__ == '__main__':
@@ -32,15 +35,12 @@ def test(num, exp, txt):
             'string': 'hello99@gmail.com',
         },
         {
-            'pattern': '[A-Z]',
-            'string': 'Hello',
+            'pattern': '[a-z0-9]@[a-z].(moc)',
+            'string': 'hello99@gmail.com',
         },
-    ]
-    
-    tests_1 = [
         {
-            'pattern': '[a-z0-9]@[a-z].(com|net|org)',
-            'string': 'hello99@mail.net',
+            'pattern': '[A-Z]',
+            'string': 'Hello',
         },
     ]
 

From af9d1847af2912b04c68eae33ef70eb2fe0502d5 Mon Sep 17 00:00:00 2001
From: Terance Edmonds <teranceedmonds20@gmail.com>
Date: Tue, 1 Aug 2023 22:38:47 +0530
Subject: [PATCH 2/3] rename

---
 test.py => tests.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename test.py => tests.py (100%)

diff --git a/test.py b/tests.py
similarity index 100%
rename from test.py
rename to tests.py

From d48a36b382bb6d94259b2ffc5051dbb6310e0206 Mon Sep 17 00:00:00 2001
From: Terance Edmonds <teranceedmonds20@gmail.com>
Date: Fri, 4 Aug 2023 09:57:46 +0530
Subject: [PATCH 3/3] start with

---
 ReadMe.md |  1 +
 regex.py  | 62 +++++++++++++++++++++++++++++++++----------------------
 tests.py  | 15 +++++++++++++-
 3 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/ReadMe.md b/ReadMe.md
index 7195944..426fe54 100644
--- a/ReadMe.md
+++ b/ReadMe.md
@@ -6,6 +6,7 @@ It supports:
   - Literals ( abc )
   - Ranges ( [a-z], [A-Z], [0-9], [a-zA-Z0-9] )
   - Alternatives ( a|b )
+  - Start ( ^[0-9], ^H )
   - Matching in the middle of a string
 
 ## Usage
diff --git a/regex.py b/regex.py
index 2d2300c..3e69325 100644
--- a/regex.py
+++ b/regex.py
@@ -2,6 +2,10 @@
 def is_start_set(char):
     return char == '[' or char == '('
 
+# is the character is to check starting character
+def is_start(char):
+    return char == '^'
+
 # extract set from expression
 def extract_set(char, exp):
     end_pos = 0
@@ -15,12 +19,11 @@ def extract_set(char, exp):
 
 # match range set
 def match_range(exp, txt, pos = 0):
-    if(len(txt) == 0 or len(txt) - 1 == pos):
-        if(len(txt) - 1 == pos):
-            return [True, pos - 1]
-        else:
-            return [True, pos]
-
+    if(len(txt) == 0):
+        return [True, pos]
+    elif(pos > 0 and pos >= len(txt) - 1):
+        return [True, pos - 1]
+    
     # if string contains lowercase letters
     if('a-z' in exp):
         if(txt[pos] >= 'a' and txt[pos] <= 'z'):
@@ -43,13 +46,15 @@ def match_set(exp, txt, pos = 0, end = 0):
     if(end == len(txt)):
         return [True, end]
 
+    # remove parenthesis and split by "|"
     arr = exp.replace('(', '').replace(')', '').split('|')
     char = txt[end]
-
     
+    # check if characters in txt is in the expression
     if any(char in s for s in arr):
         [matched, end] =  match_set(exp, txt, pos, end + 1)
         
+        # validate if the matched string is exact the same
         if(matched):
             for item in arr:
                 if(item == txt[pos:end]):
@@ -69,16 +74,24 @@ def match_exp(exp, txt, txt_pos = 0, exp_pos = 0):
     if (len(exp) == 0):
         return [True, txt_pos]
     
+    # if to check the starting character
+    if (is_start(exp[0]) and txt_pos == 0):
+        return match_exp(exp[1:], txt[0])
+    elif (is_start(exp[0]) and txt_pos != 0):
+        return [False, txt_pos]
+    
     # if match set of characters
     if(is_start_set(exp[exp_pos])):
         [set_exp, exp_pos] = extract_set(exp[exp_pos], exp)
         
+        # if the expression start is a range
         if(exp[0] == '['):
             [matched, txt_pos] = match_range(set_exp, txt, txt_pos)
-
+        
             if(matched):
                 return [True, txt_pos]
             
+        # if the expression start is a set
         elif(exp[0] == '('):
             [matched, txt_pos] = match_set(set_exp, txt, txt_pos, txt_pos)
 
@@ -96,28 +109,27 @@ def match_exp(exp, txt, txt_pos = 0, exp_pos = 0):
     # if nothing matches
     return [False, txt_pos]
 
-# if valid to start
-def is_valid(exp, txt):
-    return len(txt) >= len(exp) or is_start_set(exp[0])
-
 # start matching
 def init_match(exp, txt):
     matched_count = 0
     txt_pos = 0
 
-    # if the text length is greater than the expression proceed
-    if (is_valid(exp, txt)):
-        # naive algorithm
-        while txt_pos < len(txt) - 1:
-            [matched, txt_pos] = match_exp(exp, txt, txt_pos)
-            
-            if (matched):
-                matched_count += 1
-            
-            elif(is_start_set(exp[0])):
-                break
-            
-            txt_pos += 1
+    # naive algorithm
+    while txt_pos < len(txt) - 1:
+        [matched, txt_pos] = match_exp(exp, txt, txt_pos)
+        
+        # is matched increase the count
+        if (matched):
+            matched_count += 1
+        # if not matched by the set pattern then end
+        elif(is_start_set(exp[0])):
+            break
+        # if not matched by the first character then end
+        elif(is_start(exp[0])):
+            break
+        
+        # increment the text pointer position
+        txt_pos += 1
 
     # if the matched count is greater than zero
     if (matched_count > 0):
diff --git a/tests.py b/tests.py
index 14fe91a..187a52c 100644
--- a/tests.py
+++ b/tests.py
@@ -5,9 +5,10 @@ def test(num, exp, txt):
     re = RegEx(exp)
     result = re.match(txt)
 
+    print(f"=== Test {num} === ")
     print("Pattern: ", exp)
     print("String: ", txt)
-    print(f"Test {num} result: ", result[0])
+    print("Result: ", result[0])
     print(f"Match count: ", result[1], end='\n\n')
 
 
@@ -42,6 +43,18 @@ def test(num, exp, txt):
             'pattern': '[A-Z]',
             'string': 'Hello',
         },
+        {
+            'pattern': '^[0-9]',
+            'string': 'Hello',
+        },
+        {
+            'pattern': '^[0-9]',
+            'string': '7Hello',
+        },
+        {
+            'pattern': '^H',
+            'string': 'Hello',
+        },
     ]
 
     # run all test cases