-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgroup-identical-strings.py
83 lines (63 loc) · 1.89 KB
/
group-identical-strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import math
from collections import deque
def computehash(s):
hash_value = 0
p = 51
m = math.e ** 9 + 9
p_pow = 1
for c in s:
hash_value = (hash_value + ((ord(c) - ord("a") + 1) * p_pow)) % m
p_pow = (p_pow * p) % m
return int(hash_value)
def merge(arr, start, mid, end, key="hash"):
start2 = mid + 1
# If the direct merge is already sorted
if arr[mid][key] <= arr[start2][key]:
return
while start <= mid and start2 <= end:
# If element 1 is in right place
if arr[start][key] <= arr[start2][key]:
start += 1
else:
value = arr[start2][key]
index = start2
# Shift all the elements between element 1
# element 2, right by 1.
while index != start:
arr[index] = arr[index - 1]
index -= 1
arr[start][key] = value
# Update all the pointers
start += 1
mid += 1
start2 += 1
def mergeSort(arr, l, r, key="hash"):
if l < r:
m = l + (r - l) // 2
mergeSort(arr, l, m)
mergeSort(arr, m + 1, r)
merge(arr, l, m, r, key=key)
def groupidenticalStrings(s):
n = len(s)
hashes = [{}] * n
for i in range(n):
hashes[i] = {"hash": computehash(s[i]), "index": i}
mergeSort(hashes, 0, n - 1, key="hash")
groups = []
count = -1
for i in range(n):
if i == 0 or hashes[i]["hash"] != hashes[i - 1]["hash"]:
count += 1
groups.append([])
try:
groups[count].append(hashes[i]["index"])
except:
groups.append([hashes[i]["index"]])
return groups
def main():
list_of_strings = ["hello"] * 2 + ["hi"] * 2 + ["fizz"] * 5 + ["hello"] * 2
print(groupidenticalStrings(list_of_strings))
if __name__ == "__main__":
main()
else:
print("None")