Add files via upload

benedekrozemberczki · Dec 29, 2018 · 502fcff · 502fcff
1 parent d8b4d3f
commit 502fcff
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 80 deletions.
diff --git a/src/calculation_helper.py b/src/calculation_helper.py
@@ -30,7 +30,7 @@ def overlap(g, node_1, node_2):
 
 def unit(g, node_1, node_2):
     """
-    Function to calculate the 'unit' weight.
+    Function to calculate the "unit" weight.
     :param g: NX graph.
     :param node_1: Node 1. of a pair.
     :param node_2: Node 2. of a pair.    
@@ -181,8 +181,8 @@ def __init__(self, nx_G, is_directed, p, q):
         self.nodes = nx.nodes(self.G)
         print("Edge weighting.\n")
         for edge in tqdm(self.G.edges()):
-            self.G[edge[0]][edge[1]]['weight'] = 1.0
-            self.G[edge[1]][edge[0]]['weight'] = 1.0
+            self.G[edge[0]][edge[1]]["weight"] = 1.0
+            self.G[edge[1]][edge[0]]["weight"] = 1.0
         self.is_directed = is_directed
         self.p = p
         self.q = q
@@ -249,11 +249,11 @@ def get_alias_edge(self, src, dst):
         unnormalized_probs = []
         for dst_nbr in sorted(G.neighbors(dst)):
             if dst_nbr == src:
-                unnormalized_probs.append(G[dst][dst_nbr]['weight']/p)
+                unnormalized_probs.append(G[dst][dst_nbr]["weight"]/p)
             elif G.has_edge(dst_nbr, src):
-                unnormalized_probs.append(G[dst][dst_nbr]['weight'])
+                unnormalized_probs.append(G[dst][dst_nbr]["weight"])
             else:
-                unnormalized_probs.append(G[dst][dst_nbr]['weight']/q)
+                unnormalized_probs.append(G[dst][dst_nbr]["weight"]/q)
         norm_const = sum(unnormalized_probs)
         normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
 
@@ -270,7 +270,7 @@ def preprocess_transition_probs(self):
         print("")
         print("Preprocesing.\n")
         for node in tqdm(G.nodes()):
-             unnormalized_probs = [G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))]
+             unnormalized_probs = [G[node][nbr]["weight"] for nbr in sorted(G.neighbors(node))]
              norm_const = sum(unnormalized_probs)
              normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
              alias_nodes[node] = alias_setup(normalized_probs)

diff --git a/src/parser.py b/src/parser.py
@@ -13,137 +13,137 @@ def parameter_parser():
     # Input and output file parameters.
     #------------------------------------------------------------------
 
-    parser.add_argument('--input',
-                        nargs = '?',
-                        default = './data/politician_edges.csv',
-	                help = 'Input graph path.')
-
-    parser.add_argument('--embedding-output',
-                        nargs = '?',
-                        default = './output/embeddings/politician_embedding.csv',
-	                help = 'Embeddings path.')
-
-    parser.add_argument('--cluster-mean-output',
-                        nargs = '?',
-                        default = './output/cluster_means/politician_means.csv',
-	                help = 'Cluster means path.')
-
-    parser.add_argument('--log-output',
-                        nargs = '?',
-                        default = './output/logs/politician.json',
-	                help = 'Log path.')
-
-    parser.add_argument('--assignment-output',
-                        nargs = '?',
-                        default = './output/assignments/politician.json',
-	                help = 'Log path.')
-
-    parser.add_argument('--dump-matrices',
+    parser.add_argument("--input",
+                        nargs = "?",
+                        default = "./data/politician_edges.csv",
+	                help = "Input graph path.")
+
+    parser.add_argument("--embedding-output",
+                        nargs = "?",
+                        default = "./output/embeddings/politician_embedding.csv",
+	                help = "Embeddings path.")
+
+    parser.add_argument("--cluster-mean-output",
+                        nargs = "?",
+                        default = "./output/cluster_means/politician_means.csv",
+	                help = "Cluster means path.")
+
+    parser.add_argument("--log-output",
+                        nargs = "?",
+                        default = "./output/logs/politician.json",
+	                help = "Log path.")
+
+    parser.add_argument("--assignment-output",
+                        nargs = "?",
+                        default = "./output/assignments/politician.json",
+	                help = "Log path.")
+
+    parser.add_argument("--dump-matrices",
                         type = bool,
                         default = True,
-	                help = 'Save the embeddings to disk or not. Default is not.')
+	                help = "Save the embeddings to disk or not. Default is not.")
 
-    parser.add_argument('--model',
-                        nargs = '?',
-                        default = 'GEMSECWithRegularization',
-	                help = 'The model type.')
+    parser.add_argument("--model",
+                        nargs = "?",
+                        default = "GEMSECWithRegularization",
+	                help = "The model type.")
 
     #------------------------------------------------------------------
     # RandomWalk parameters.
     #------------------------------------------------------------------
 
-    parser.add_argument('--P',
+    parser.add_argument("--P",
                         type = float,
                         default = 1,
-	                help = 'Return hyperparameter. Default is 1.')
+	                help = "Return hyperparameter. Default is 1.")
 
-    parser.add_argument('--Q',
+    parser.add_argument("--Q",
                         type = float,
                         default = 1,
-	                help = 'In-out hyperparameter. Default is 1.')
+	                help = "In-out hyperparameter. Default is 1.")
 
-    parser.add_argument('--walker',
-                        nargs = '?',
-                        default = 'first',
-	                help = 'Random walker order. Default is first.')
+    parser.add_argument("--walker",
+                        nargs = "?",
+                        default = "first",
+	                help = "Random walker order. Default is first.")
 
 
     #------------------------------------------------------------------
     # Skipgram parameters.
     #------------------------------------------------------------------
 
-    parser.add_argument('--dimensions',
+    parser.add_argument("--dimensions",
                         type = int,
                         default = 16,
-	                help = 'Number of dimensions. Default is 16.')
+	                help = "Number of dimensions. Default is 16.")
 
-    parser.add_argument('--random-walk-length',
+    parser.add_argument("--random-walk-length",
                         type = int,
                         default = 80,
-	                help = 'Length of random walk per source. Default is 80.')
+	                help = "Length of random walk per source. Default is 80.")
 
-    parser.add_argument('--num-of-walks',
+    parser.add_argument("--num-of-walks",
                         type = int,
                         default = 5,
-	                help = 'Number of random walks per source. Default is 5.')
+	                help = "Number of random walks per source. Default is 5.")
 
-    parser.add_argument('--window-size',
+    parser.add_argument("--window-size",
                         type = int,
                         default = 5,
-                    	help = 'Window size for proximity statistic extraction. Default is 5.')
+                    	help = "Window size for proximity statistic extraction. Default is 5.")
 
-    parser.add_argument('--distortion',
+    parser.add_argument("--distortion",
                         type = float,
                         default = 0.75,
-	                help = 'Downsampling distortion. Default is 0.75.')
+	                help = "Downsampling distortion. Default is 0.75.")
 
-    parser.add_argument('--negative-sample-number',
+    parser.add_argument("--negative-sample-number",
                         type = int,
                         default = 10,
-	                help = 'Number of negative samples to draw. Default is 10.')
+	                help = "Number of negative samples to draw. Default is 10.")
 
     #------------------------------------------------------------------
     # Optimization parameters.
     #------------------------------------------------------------------
 
-    parser.add_argument('--initial-learning-rate',
+    parser.add_argument("--initial-learning-rate",
                         type = float,
                         default = 0.01,
-	                help = 'Initial learning rate. Default is 0.01.')
+	                help = "Initial learning rate. Default is 0.01.")
 
-    parser.add_argument('--minimal-learning-rate',
+    parser.add_argument("--minimal-learning-rate",
                         type = float,
                         default = 0.001,
-	                help = 'Minimal learning rate. Default is 0.001.')
+	                help = "Minimal learning rate. Default is 0.001.")
 
-    parser.add_argument('--annealing-factor',
+    parser.add_argument("--annealing-factor",
                         type = float,
                         default = 1,
-	                help = 'Annealing factor. Default is 1.0.')
+	                help = "Annealing factor. Default is 1.0.")
 
-    parser.add_argument('--initial-gamma',
+    parser.add_argument("--initial-gamma",
                         type = float,
                         default = 0.1,
-	                help = 'Initial clustering weight. Default is 0.1.')
+	                help = "Initial clustering weight. Default is 0.1.")
 
-    parser.add_argument('--lambd',
+    parser.add_argument("--lambd",
                         type = float,
                         default = 2.0**-4,
-	                help = 'Smoothness regularization penalty. Default is 0.0625.')
+	                help = "Smoothness regularization penalty. Default is 0.0625.")
 
-    parser.add_argument('--cluster-number',
+    parser.add_argument("--cluster-number",
                         type = int,
                         default = 20,
-	                help = 'Number of clusters. Default is 20.')
+	                help = "Number of clusters. Default is 20.")
 
-    parser.add_argument('--overlap-weighting',
-                        nargs = '?',
-                        default = 'normalized_overlap',
-	                help = 'Weight construction technique for regularization.')
+    parser.add_argument("--overlap-weighting",
+                        nargs = "?",
+                        default = "normalized_overlap",
+	                help = "Weight construction technique for regularization.")
 
-    parser.add_argument('--regularization-noise',
+    parser.add_argument("--regularization-noise",
                         type = float,
                         default = 10**-8,
-	                help = 'Uniform noise max and min on the feature vector distance.')
+	                help = "Uniform noise max and min on the feature vector distance.")
 
     return parser.parse_args()
diff --git a/src/print_and_read.py b/src/print_and_read.py
@@ -26,7 +26,7 @@ def json_dumper(data, path):
     """
     Function to dump the logs and assignments.
     """    
-    with open(path, 'w') as outfile:
+    with open(path, "w") as outfile:
         json.dump(data, outfile)
 
 def initiate_dump_gemsec(log, assignments, args, final_embeddings, c_means):
@@ -56,15 +56,15 @@ def tab_printer(log):
     Function to print the logs in a nice tabular format.
     """    
     t = Texttable() 
-    t.add_rows([['Epoch', log["losses"][-1][0]]])
+    t.add_rows([["Epoch", log["losses"][-1][0]]])
     print(t.draw())
 
     t = Texttable()
-    t.add_rows([['Loss', round(log["losses"][-1][1],3)]])
+    t.add_rows([["Loss", round(log["losses"][-1][1],3)]])
     print(t.draw()) 
 
     t = Texttable()
-    t.add_rows([['Modularity', round(log["cluster_quality"][-1][1],3)]])
+    t.add_rows([["Modularity", round(log["cluster_quality"][-1][1],3)]])
     print(t.draw()) 
 
 def epoch_printer(repetition):