-
Notifications
You must be signed in to change notification settings - Fork 0
/
regexfinder.py
2648 lines (2172 loc) · 98.1 KB
/
regexfinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import copy
import random
from itertools import combinations
import more_itertools as mit
import numpy as np
import pandas as pd
from graphviz import Digraph
from utils import *
class NODE:
def __init__(self, regex:str=None, vector=None, replaced=False, alpha=1, quantifier=None):
"""
Node creation is defined firstly by a regex. If a regex is provided with a vector and/or quantifier,
the latter two will be ignored and the node will be created from the given regex. If a regex
is not given, a vector muse be given. Quantifier is optional, but must have a finite upper bound.
An empty quantifier/none implies a quantifier of {1}
"""
if regex is None and vector is None:
raise Exception('Either regex or vector required.')
self.alpha = alpha
self.regex:str = regex
self.vector:VECTOR = vector
self.id_:str = next(strCounter)
self.reduced:bool = False
self.replaced = replaced
if self.regex is None:
# if merging nodes this will make the characters go in alphabetical order
self.regex:str = self.vector.regex
if quantifier is not None:
regexUpdates:list[str] = setClassQuantList(self.regex, quantifier)
self.classQuantList = regexUpdates[0]
self.regex = regexUpdates[1]
else:
self.classQuantList = getClassQuantList(self.regex)
else:
self.classQuantList = getClassQuantList(self.regex)
self.removeOuterParentheses()
if self.simple:
self.createVector()
else:
pass
@property
def simple(self):
"""
Returns true if a node is simple, false is not. This comes into play when building and
simplifing/partitioning graph objects.
"""
return not ('(' in self.regex or '|' in self.regex or len(self.classQuantList) > 1)
@property
def getQuantifier(self):
"""Returns the quantifier of the given node. Throws Exception if
the node is not simple. If the quantifier is {1}, None is returned.
Raises:
Exception: Node is not simple
Returns:
str: Node quantifier
"""
if self.simple:
return self.classQuantList[0]['quantifier']
else:
raise Exception("Node is not simple")
@property
def getQuantifierMin(self):
"""Returns the lower bound of the given's node quantifier. Throws Exception
if the node is not simple. If the quantifier is {1}, None is returned.
Raises:
Exception: Node is not simple
Returns:
int: Quantifier min
"""
if self.simple:
quantifier = self.getQuantifier
if quantifier == '?':
return 0
else:
return self.classQuantList[0]['min']
else:
raise Exception("Node is not simple")
@property
def getQuantifierMax(self):
"""Returns the upper bound of the given's node quantifier. Throws Exception
if the node is not simple. If the quantifier is {1}, None is returned.
Raises:
Exception: Node is not simple
Returns:
int: Quantifier max
"""
if self.simple:
quantifier = self.getQuantifier
if quantifier == '?':
return 1
else:
return self.classQuantList[0]['max']
else:
raise Exception("Node is not simple")
def mergeQuantifiers(self, nodeList:list):
"""Returns the new lower and upper bounds following the merging of the nodes' quantifier.
Args:
nodeList (list[str]): list of node ids
Returns:
tuple[int,int]: A tuple containing the new minimum and maximum quantifier
"""
nodeList.append(self)
lowestLow = min(lowestMin.getQuantifierMin for lowestMin in nodeList)
highestHigh = max(highestMax.getQuantifierMax for highestMax in nodeList)
if lowestLow == highestHigh:
return lowestLow
else:
return (lowestLow, highestHigh)
@property
def topOrExists(self):
"""Returns boolean if the node has an or statement seperating the whole regex.
Returns:
bool: True or false on whether the top or exists
"""
return topOrExists(self.regex)
def removeOuterParentheses(self):
"""Removes unnecesary outer parenthesis if they exist in a node
i.e. (a).
"""
if hasOuterParentheses(self.regex):
while self.regex[0] == '(' and self.regex[-1] == ')':
self.regex = self.regex[1:-1]
def createVector(self):
"""Creates a vector for the given node.
"""
assert self.simple
reClass = self.classQuantList[0]['class']
vector = np.zeros(128, dtype=int)
if len(reClass) == 1:
vector[ord(reClass)] = 1
elif reClass == '\d':
vector[48:58] = 1
elif reClass == '\D':
vector[31:47] = 1
vector[59:127] = 1
elif reClass == '\w':
vector[48:58] = 1
vector[65:91] = 1
vector[95] = 1
vector[97:123] = 1
elif reClass == '\W':
vector[31:48] = 1
vector[58:65] = 1
vector[91:95] = 1
vector[96] = 1
vector[123:127] = 1
elif '[' in reClass:
pieces = partitionClass(reClass)
for piece in pieces:
if '-' in piece:
start = ord(piece[0])
end = ord(piece[2]) + 1
vector[start:end] = 1
elif piece == '\d':
vector[48:58] = 1
elif piece == '\D':
vector[31:47] = 1
vector[59:127] = 1
elif piece == '\w':
vector[48:58] = 1
vector[65:91] = 1
vector[95] = 1
vector[97:123] = 1
elif piece == '\W':
vector[31:48] = 1
vector[58:65] = 1
vector[91:95] = 1
vector[96] = 1
vector[123:127] = 1
else:
vector[ord(piece)] = 1
self.vector = VECTOR(vector, self.alpha)
def reduce(self):
"""Reduces the regex of a given simple node if possible, i.e. 'abcde' -> 'a-e'.
Same as VECTOR method.
Raises:
Exception: Node is not simple. A node cannot be reduced if it is not simple.
"""
if not self.simple:
raise Exception('Node is not simple. A node cannot be reduced if it is not simple.')
elif not self.isPureSimple:
if not self.vector:
self.createVector()
else:
pass
self.vector.reduce()
self.regex = self.vector.regex + (self.getQuantifier if self.getQuantifier else "")
self.reduced = True
@property
def cardinality(self):
"""Returns the cardinality, the possible number of strings that satisfy its given regex, of the
given node.
Raises:
Exception: Check class quantifier
Returns:
int: Cardinality, number of possible allowable string matches
"""
if self.vector is None:
self.createVector()
valCount = sum(self.vector.v)
quantifier = self.classQuantList[0]['quantifier']
if quantifier is None:
return valCount
elif quantifier == '?':
return valCount + 1
elif self.classQuantList[0]['min'] is not None and self.classQuantList[0]['max']:
return sum([valCount ** exponent for exponent in
range(self.classQuantList[0]['min'], self.classQuantList[0]['max'] + 1)])
else:
raise Exception('Check class quantifier')
@property
def singleQuantifier(self):
"""Returns boolean if the quantifier is the same for its lower bound and upper bound i.e. {5}.
Returns:
bool: True or false on whether there is a single quantifier
"""
return self.getQuantifierMin == self.getQuantifierMax
@property
def entropy(self):
"""Returns the entropy of a node, as defined by Information-Theoretic entropy.
Returns:
float: Node entropy
"""
return round(np.log2(self.cardinality), 4)
@property
def K(self):
"""Returns the length of given node's regex.
Returns:
int: Regex length
"""
return len(self.regex)
@property
def phi(self):
"""Returns the phi of the node, being the entropy of the node added to the product
of the alpha (weight parameter) and its K value.
Returns:
float: Node phi
"""
return self.entropy + self.alpha * self.K
def match(self, inString, boolean=False):
"""
Checks for matches of this node's regex(?)
"""
matches = re.finditer(self.regex, inString)
if boolean:
return bool(list(matches))
else:
return matches
@property
def isPureSimple(self):
"""
Checks if a node's regex is one 'item' i.e. 'a', \d, 'y' not [ab]
Returns:
Boolean
"""
if not self.vector:
self.createVector()
if self.vector.minIndex == self.vector.maxIndex:
return True
rawRegex = getClassQuantList(self.regex)[0]['class']
if rawRegex == '\\d' or rawRegex == '\\w':
return True
else:
return False
@property
def random(self):
"""Returns a random string that satifies a given node's regex.
Raises:
Exception: if min != max
Returns:
str: Random string that matches the regex
"""
if self.vector is None:
self.createVector()
quantifier = self.classQuantList[0]['quantifier']
if quantifier is None:
return self.vector.random
else:
mm = self.classQuantList[0]['min']
mM = self.classQuantList[0]['max']
if mm == mM:
return ''.join([self.vector.random for x in range(mm)])
else:
raise Exception('still need to fix min != max')
class EDGE:
def __init__(self, parent:str, child:str, words=None):
"""CHECK WORDS
An edge is what connects 2 nodes, and exactly two nodes.
Its parent and child must not be the same.
Args:
parent (str): Parent node
child (str): Child node
words (TYPECHECK, optional): CHECK. Defaults to None.
Raises:
Exception: Parent and Child must not be the same
"""
if words is None:
words:list = []
if parent != child:
self.parent = parent
self.child = child
else:
raise Exception("Parent and child cannot be the same")
self.words = words
class VECTOR:
def __init__(self, vector:np.ndarray, alpha=None):
""" A vector is a matrix of 1's and 0's 128 big, each place
representing an ASCII character (i.e. a space 33 in represents the '!' character.
Args:
vector (np.ndarray): An array of size 128, each position denotes an ASCII character
alpha ((float, int), optional): Weighted value. Defaults to None.
"""
self.v = vector
self.alpha = alpha
@property
def regex(self):
"""Returns the regex built from the given vector.
Raises:
Exception: Vector has no nonzero values
Returns:
str: The regex of the given vector
"""
if not self.consecutiveSublists:
raise Exception('Vector has no nonzero values.')
else:
pass
toReturn = '['
for subList in self.consecutiveSublists:
subList = [chr(x) for x in subList]
if len(subList) == 1:
toReturn += subList[0]
elif len(subList) == 2:
toReturn += subList[0]
toReturn += subList[1]
elif subList[0] == '0' and subList[-1] == '9':
toReturn += '\d'
else:
toReturn += subList[0]
toReturn += '-'
toReturn += subList[-1]
toReturn += ']'
if toReturn == '[\\dA-Z_a-z]':
toReturn = '[\\w]'
return toReturn
@property
def getndArray(self):
"""Returns Ndarray representation of this vector
Returns:
ndarray
"""
return self.v
def reduce(self):
"""Reduces the regex of a given vector if possible, i.e. 'abcde' -> 'a-e'.
Same as NODE method.
"""
flatten = lambda x: [y for z in x for y in z]
subLists = self.consecutiveSublists
combs = [list(combinations(subLists, i)) for i in range(1, len(subLists) + 1)]
combs = flatten(combs)
for comb in combs:
support = flatten(comb)
first = min(support)
last = max(support)
temp = VECTOR(self.v.copy(), self.alpha)
temp.v[first:last] = 1
if temp.phi < self.phi:
self.v = temp.v
@property
def consecutiveSublists(self):
"""Returns all consecutive sublists in the vector
Returns:
list: A list of all consecutive subgroups
"""
return [list(group) for group in mit.consecutive_groups(self.support)]
@property
def minIndex(self):
"""CHECKReturns the first (min) index in the given vector field.
Returns:
int: Index of minimum value in ndArray
"""
return np.min(np.where(self.v))
@property
def maxIndex(self):
"""CHECKReturns the last (max) index in the given vector field.
Returns:
int: Index of maximum value
"""
return np.max(np.where(self.v))
@property
def ent(self):
"""Returns the Informatic-Theory entropy of the given vector field, much like
a regex.
Returns:
float: Entropy of the vector
"""
return round(np.log2(np.sum(self.v)), 4)
@property
def support(self):
"""
CHECK
"""
return np.where(self.v)[0]
@property
def K(self):
"""Returns the length of the regex built from the given vector.
Returns:
int: Length of regex
"""
return len(self.regex)
@property
def phi(self):
"""Returns the phi of the vector, being the entropy of the vector added to the product
of the alpha (weight parameter) and its K value.
Returns:
float: Vector Phi value
"""
return self.ent + self.alpha * self.K
@property
def random(self):
"""Returns a random string that satifies a given vector's regex.
Returns:
str: String that matches the regex
"""
return chr(random.sample(list(np.where(self.v)[0]), 1)[0])
class ALPHABET:
def __init__(self, alphabetList):
"""
CHECK
"""
self.alphabetList = alphabetList
def sample(self, count=1):
"""
CHECK
"""
return ''.join(random.choices(self.alphabetList, k=count))
class WORDCOLLECTION:
def __init__(self, words:list):
"""
CHECK
"""
self.words = words
self.maxLength = max([len(word) for word in words])
self.M = [list(x) for x in self.words]
self.df = pd.DataFrame([list(word) for word in words])
self.prefixDicts = []
self.suffixDicts = []
self.spClasses = []
self.setToStr = lambda x: ''.join(sorted(list(x))) if isinstance(x, set) else x
self.dictValSetToStr = lambda d: dict([(k, self.setToStr(v)) for k, v in d.items()])
for i in range(self.maxLength):
prefixes = {}
suffixes = {}
if i != 0:
for v in self.M:
if v[i] in prefixes.keys():
prefixes[v[i]].add(v[i - 1])
else:
prefixes[v[i]] = set(v[i - 1])
prefixes = self.dictValSetToStr(prefixes)
self.prefixDicts.append(prefixes)
else:
pass
if i != self.maxLength - 1:
for v in self.M:
if v[i] in suffixes.keys():
suffixes[v[i]].add(v[i + 1])
else:
suffixes[v[i]] = set(v[i + 1])
suffixes = dict([(k, self.setToStr(v)) for k, v in suffixes.items()])
self.suffixDicts.append(suffixes)
else:
pass
def createClasses(self):
"""
CHECK
"""
eq = {}
# for k in set(list(preClasses.keys()) + list(suffClasses.keys())):
# eq[k] = (preClasses.get(k,None),suffClasses.get(k,None))
for i in range(1, self.maxLength - 1):
for k in set(list(self.prefixDicts[i].keys()) + list(self.suffixDicts[i].keys())):
eq[k] = (
self.setToStr(self.prefixDicts[i].get(k, None)), self.setToStr(self.suffixDicts[i].get(k, None)))
# The keys of eq are the alphabet. The values are each a tuple, where the first value is a
# string of the prefixes and the second is a string of the suffixes.
classes = self.partitionValueEquality(eq)
self.spClasses.append(classes)
self.spClasses = [self.partitionValueEquality(self.prefixDicts[0])] + self.spClasses
self.spClasses = self.spClasses + [self.partitionValueEquality(self.suffixDicts[-1])]
def partitionValueEquality(self, inDict):
"""
Returns a dictionary grouping the input dictionary keys by same value
"""
outDict = {}
for k, v in inDict.items():
if v in outDict.keys():
outDict[v].add(k)
else:
outDict[v] = set(k)
outDict = self.dictValSetToStr(outDict)
return outDict
class GRAPH:
def __init__(self, regex:str=None, wordList=None, nodes:dict[str, NODE]=None, edges:list[EDGE]=None, alpha=1):
"""
A Graph is a data structure made of nodes and edges. Graphs are built by priority from either a regex,
a wordlist, or nodes[dictionary]. If a graph is built from a regex, a non-simple node will be
made and added to the nodes dictionary. Nodes are not required to instantiate a graph object.
Edges are not required to be made with nodes.
Args:
regex (str, optional): Build the graph from a regex. Will take priority if wordlist or nodes are provided. Defaults to None.
wordList (WORDCOLLECTION, optional): Build the graph from a wordcollectionCHECK. Takes priorty over nodes. Defaults to None.
nodes (dict[str, NODE], optional): Build the graph from given nodes (Edges are not included here). Defaults to None.
edges (list[EDGE], optional): Provide a list of edges to pair with nodes. Defaults to None.
alpha (int, optional): Weighted parameter. Defaults to 1.
Raises:
Exception: re.compile failed
"""
self.regex = regex
self.wordList = wordList
self.alpha = alpha
self.nodes:dict[str, NODE] = {}
self.edges:list[EDGE] = []
if self.regex:
try:
re.compile(self.regex)
except:
raise Exception('Invalid argument. re.compile failed')
elif self.wordList:
self.wordCollection = WORDCOLLECTION(self.wordList)
self.wordCollection.createClasses()
self.columnState = 0
elif nodes:
self.nodes = nodes
if edges:
for edge in edges:
self.addEdge(edge)
else:
pass
if self.regex:
self.startNode = NODE(self.regex)
self.nodes.update({self.startNode.id_: self.startNode})
else:
pass
def deepCopy(self):
"""Returns a content-equal copy of the given graph, with a different
memory address
Returns:
GRAPH: Content-equal copy of this graph
"""
return copy.deepcopy(self)
def addNode(self, node:NODE, edges=None):
"""Adds a node into the given graph. Edges not required.
Args:
node (NODE): Node to add
edges (_type_, optional): Edge to connect to the node. Defaults to None.
Raises:
Exception: Node key already exists
"""
if edges is None:
edges = []
if node.id_ in self.nodes.keys():
raise Exception('Node key already exists')
self.nodes[node.id_] = node
if edges:
for currEdge in edges:
self.edges.append(currEdge)
else:
pass
def removeNodeAndEdges(self, nodeList:list[str]):
"""Removes the given nodes, and its edges, from the given graph.
Returns the affected nodes and edges adjecent to the group of nodes removed.
Args:
nodeList (list[str]): THe list of nodes to remove
Returns:
list[list[str], list[str]]: A list of nodes above and below the affected area
"""
upperAffectedNodes = []
lowerAffectedNodes = []
edgesToRemove = []
# Future, could be more efficient by not going through entire edgeList
for edge in self.edges:
if edge.parent in nodeList or edge.child in nodeList:
edgesToRemove.append(edge)
if edge.child in nodeList and edge.parent not in nodeList:
if edge.parent not in upperAffectedNodes:
upperAffectedNodes.append(edge.parent)
elif edge.parent in nodeList and edge.child not in nodeList:
if edge.child not in lowerAffectedNodes:
lowerAffectedNodes.append(edge.child)
[self.edges.remove(x) for x in edgesToRemove]
for node in nodeList:
del self.nodes[node]
return [upperAffectedNodes, lowerAffectedNodes]
def addEdge(self, edge:EDGE):
"""Adds the given edge to the graph ONLY if the parent and child nodes do not already
exist and if an edge for said parent and child nodes does not already exist.
Args:
edge (EDGE): Edge to add
Raises:
Exception: Edge alreay exists
Exception: Parent or child does not exits
"""
for gEdge in self.edges:
if edge == gEdge:
raise Exception("Edge already exists")
if edge.parent not in self.nodes.keys() or edge.child not in self.nodes.keys():
raise Exception("Parent or Child node does not exist")
self.edges.append(edge)
def getEdge(self, parent:str, child:str):
"""Returns EDGE object for a given parent and child node, if it exists. Returns
false otherwise.
Args:
parent (str): Id of parent node
child (str): Id of child node
Raises:
Exception: More than one edge with same parent/child
Returns:
EDGE: The requested edge, or False if nonexistent
"""
matches = [edge for edge in self.edges if (edge.parent == parent and edge.child == child)]
if len(matches) > 1:
raise Exception('More than one edge with same parent/child.')
elif len(matches) == 1:
return matches[0]
else:
return False
def removeEdge(self, parent:str, child:str):
"""Removes edge binding two nodes (parent and child) IF they exist in the given graph.
Args:
parent (str): Id of the parent node
child (str): Id of the child node
Raises:
Exception: Parent or child node does not exists
"""
if parent not in self.nodes.keys() or child not in self.nodes.keys():
raise Exception("Parent or Child node does not exist")
toRemove = [edge for edge in self.edges if (edge.parent == parent and edge.child == child)]
if toRemove:
[self.edges.remove(edge) for edge in toRemove]
def removeEdgesList(self, edgeList:list[EDGE]):
"""Removes all edges in the given graph.
Args:
edgeList (list[EDGE]): Edges to remove
"""
[self.edges.remove(edge) for edge in edgeList]
def removeStraightShots(self):
"""Removes all edges between node and child that are redundant to a path through nodes,
if it exists e.g. a->b, b->c, a->c. a->c is a straight shot.
"""
straightShot = self.doesStraightShotExist
while straightShot:
self.edges.remove(straightShot)
straightShot = self.doesStraightShotExist
def getParents(self, id_:str):
"""Returns node ID(s) of the parent(s) of a given node via ID. Returns false if nonexistent.
Args:
id_ (str): Id of the node in question
Returns:
list[str]: List of nodes that are parents to the nodes in question
"""
return [x.parent for x in self.edges if x.child == id_]
def getChildren(self, id_:str):
"""Returns node ID(s) of the child(ren) of a given node via ID. Returns false if nonexistent.
Args:
id_ (str): Id of the node in question
Returns:
list[str]: Children of the node in question
"""
return [x.child for x in self.edges if x.parent == id_]
def getSiblings(self, id_:str):
"""Returns neighboring nodes, siblings of a given node via ID. Nodes are siblings if they share the same parent
(nodes with no parents are also siblings to eachother)
Args:
id_ (str): Id of the node in question
Returns:
list[str]: A list of the siblings of the node in question
"""
if not self.getParents(id_):
return self.getNodesNoParents()
else:
return sorted(list(set(flatten([self.getChildren(parent) for parent in self.getParents(id_)]))))
def getNodesNoChildren(self):
"""Returns node IDs for all nodes with no children.
Returns:
list[str]: A list of node ids that have no children
"""
return [x.id_ for x in self.nodes.values() if not self.getChildren(x.id_)]
def getNodesNoParents(self):
"""Returns node IDs for all nodes with no parents (nodes that do not extend from any node).
Returns:
list[str]: A list of node ids that have no parents
"""
return [x.id_ for x in self.nodes.values() if not self.getParents(x.id_)]
def getLonelyNodes(self):
"""Returns all nodes with no edges.
Returns:
list[str]: A list of all nodes(ids) with no edges
"""
setOfNoParents = set(self.getNodesNoParents())
setOfNoChildren = set(self.getNodesNoChildren())
return list(setOfNoChildren.intersection(setOfNoParents))
def getNotSimple(self):
"""Returns all nodes that are not simple.
Returns:
list[str]: A list of all nodes(ids) that are not simple
"""
return [id_ for id_, node in self.nodes.items() if (not node.replaced and not node.simple)]
def simplify(self):
"""
Simplifies all existing nodes.
"""
while self.getNotSimple():
self.process(self.getNotSimple()[0])
self.nodes = dict([(name, node) for name, node in self.nodes.items() if node.simple])
def getNodeEqClasses(self):
"""Returns a list of node IDs that have the same parents and children.
Returns:
list[str]: A list of all nodes that have the same parents and children
"""
# two nodes are equivalent if they have the same parents and children
tempDict:dict[tuple[str, str], str] = {}
for id_ in self.nodes.keys():
parents = tuple(sorted(self.getParents(id_)))
children = tuple(sorted(self.getChildren(id_)))
if (parents, children) in tempDict.keys():
tempDict[(parents, children)].append(id_)
else:
tempDict[(parents, children)] = [id_]
return list(tempDict.values())
def getGenerationalSets(self):
"""A method to get a list of how far away each node is from the topNodes (via edges)
Returns:
list[[list[tuple]]]: Returns a list of lists containing tuples (nodeId, distanceFromTopNodes). List groups are ordered.
"""
topNodes = self.getNodesNoParents()
generationMarkedNodes = [(topNode, 1) for topNode in topNodes]
for node in topNodes:
children = self.getChildren(node)
for child in children:
returnList = self.getGenerationalSetsRecursive(child, 2)
for element in returnList:
generationMarkedNodes.append(element)
maxDepth = 0
for element in generationMarkedNodes:
depth = element[1]
if depth > maxDepth:
maxDepth = depth
nodeIdList = []
recDepthList = []
for element in generationMarkedNodes:
if element[0] in nodeIdList:
nodeIdPlace = nodeIdList.index(element[0])
if element[1] > recDepthList[nodeIdPlace]:
nodeIdList.append(element[0])
recDepthList.append(element[1])
nodeIdList.pop(nodeIdPlace)
recDepthList.pop(nodeIdPlace)
else:
nodeIdList.append(element[0])
recDepthList.append(element[1])
generationMarkedNodes = [(nodeIdList[n], recDepthList[n]) for n in range(len(nodeIdList))]
toReturn = [[] for _ in range(maxDepth)]
for element in generationMarkedNodes:
place = element[1]-1
toReturn[place].append(element[0])
return toReturn
def getGenerationalSetsRecursive(self, currNodeId, recursionDepth):
"""Recursive method for getGenerationalSets
Args:
currNodeId (str): The id of the current node being recursed into
recursionDepth (int): The current recursion depth
Returns:
list[tuple]: [Recursive return] Returns information of all children of current Node
"""
'''
Base Case:
At a node. Return (nodeId, depth) + all previous nodes and depths
Else:
Recurse into children, +1 depth
'''
children = self.getChildren(currNodeId)
returnsList = [(currNodeId, recursionDepth)]
if children:
for child in children:
downstream = self.getGenerationalSetsRecursive(child, recursionDepth+1)
for element in downstream:
returnsList.append(element)
return returnsList
def getNodeAncestorsList(self, inList:list[str]):
"""Returns a list of node IDs of ancestors, the parents of all of each node's parents
Args:
inList (list[str]): A list of node ids in question
Returns:
list[str]: A list of all the nodes' ancestors
"""
toReturn = []
for id_ in inList:
if self.getNodeAncestors(id_) not in toReturn:
toReturn += self.getNodeAncestors(id_)
return toReturn
def getNodeDescendantsList(self, inList:list[str]):
"""Returns a list of node IDs of descendants, the children of all of each node's children
Args:
inList (list[str]): A list of the node ids in question
Raises:
Exception: Value inList must be a list
Returns:
list[str]: A list of all the nodes' descendants
"""