Parameterize node/edge columns (holoviz#494)

* Instead of requiring hard-coded column names for nodes, edges, and weights, provides parameters to allow them to be specified in a call. * If the name of the weight column is None, then weights are ignored. * Changed forcelayout2 to ignore weights by default.
benzei · Oct 17, 2017 · 8ee50c3 · 8ee50c3
1 parent e8de1b5
commit 8ee50c3
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 63 deletions.
diff --git a/datashader/bundling.py b/datashader/bundling.py
@@ -191,10 +191,16 @@ def create_delimiter(cls):
 
 class UnweightedSegment(BaseSegment):
     ndims = 3
-    columns = ['edge_id', 'x', 'y']
-    merged_columns = ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']
     idx, idy = 1, 2
 
+    @staticmethod
+    def get_columns(params):
+        return ['edge_id', params.x, params.y]
+
+    @staticmethod
+    def get_merged_columns(params):
+        return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']
+
     @staticmethod
     @nb.jit
     def create_segment(edge):
@@ -208,10 +214,16 @@ def accumulate(img, point, accuracy):
 
 class EdgelessUnweightedSegment(BaseSegment):
     ndims = 2
-    columns = ['x', 'y']
-    merged_columns = ['src_x', 'src_y', 'dst_x', 'dst_y']
     idx, idy = 0, 1
 
+    @staticmethod
+    def get_columns(params):
+        return [params.x, params.y]
+
+    @staticmethod
+    def get_merged_columns(params):
+        return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']
+
     @staticmethod
     @nb.jit
     def create_segment(edge):
@@ -225,10 +237,16 @@ def accumulate(img, point, accuracy):
 
 class WeightedSegment(BaseSegment):
     ndims = 4
-    columns = ['edge_id', 'x', 'y', 'weight']
-    merged_columns = ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y', 'weight']
     idx, idy = 1, 2
 
+    @staticmethod
+    def get_columns(params):
+        return ['edge_id', params.x, params.y, params.weight]
+
+    @staticmethod
+    def get_merged_columns(params):
+        return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y', params.weight]
+
     @staticmethod
     @nb.jit
     def create_segment(edge):
@@ -242,10 +260,16 @@ def accumulate(img, point, accuracy):
 
 class EdgelessWeightedSegment(BaseSegment):
     ndims = 3
-    columns = ['x', 'y', 'weight']
-    merged_columns = ['src_x', 'src_y', 'dst_x', 'dst_y', 'weight']
     idx, idy = 0, 1
 
+    @staticmethod
+    def get_columns(params):
+        return [params.x, params.y, params.weight]
+
+    @staticmethod
+    def get_merged_columns(params):
+        return ['src_x', 'src_y', 'dst_x', 'dst_y', params.weight]
+
     @staticmethod
     @nb.jit
     def create_segment(edge):
@@ -257,7 +281,7 @@ def accumulate(img, point, accuracy):
         img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2]
 
 
-def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weights=False):
+def _convert_graph_to_edge_segments(nodes, edges, params):
     """
     Merge graph dataframes into a list of edge segments.
 
@@ -272,21 +296,21 @@ def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weight
     the accumulator function for drawing to an image.
     """
 
-    df = pd.merge(edges, nodes, left_on=['source'], right_index=True)
-    df = df.rename(columns={'x': 'src_x', 'y': 'src_y'})
+    df = pd.merge(edges, nodes, left_on=[params.source], right_index=True)
+    df = df.rename(columns={params.x: 'src_x', params.y: 'src_y'})
 
-    df = pd.merge(df, nodes, left_on=['target'], right_index=True)
-    df = df.rename(columns={'x': 'dst_x', 'y': 'dst_y'})
+    df = pd.merge(df, nodes, left_on=[params.target], right_index=True)
+    df = df.rename(columns={params.x: 'dst_x', params.y: 'dst_y'})
 
     df = df.sort_index()
     df = df.reset_index()
 
-    if include_edge_id:
+    if params.include_edge_id:
         df = df.rename(columns={'id': 'edge_id'})
 
-    include_weight = not ignore_weights and 'weight' in edges
+    include_weight = params.weight and params.weight in edges
 
-    if include_edge_id:
+    if params.include_edge_id:
         if include_weight:
             segment_class = WeightedSegment
         else:
@@ -297,15 +321,15 @@ def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weight
         else:
             segment_class = EdgelessUnweightedSegment
 
-    df = df.filter(items=segment_class.merged_columns)
+    df = df.filter(items=segment_class.get_merged_columns(params))
 
     edge_segments = []
     for edge in df.get_values():
         edge_segments.append(segment_class.create_segment(edge))
     return edge_segments, segment_class
 
 
-def _convert_edge_segments_to_dataframe(edge_segments, segment_class):
+def _convert_edge_segments_to_dataframe(edge_segments, segment_class, params):
     """
     Convert list of edge segments into a dataframe.
 
@@ -321,7 +345,7 @@ def edge_iterator():
             yield segment_class.create_delimiter()
 
     df = DataFrame(np.concatenate(list(edge_iterator())))
-    df.columns = segment_class.columns
+    df.columns = segment_class.get_columns(params)
     return df
 
 
@@ -334,6 +358,21 @@ class directly_connect_edges(param.ParameterizedFunction):
     curved or manhattan-style polylines.
     """
 
+    x = param.String(default='x', doc="""
+        Column name for each node's x coordinate.""")
+
+    y = param.String(default='y', doc="""
+        Column name for each node's y coordinate.""")
+
+    source = param.String(default='source', doc="""
+        Column name for each edge's source.""")
+
+    target = param.String(default='target', doc="""
+        Column name for each edge's target.""")
+
+    weight = param.String(default=None, allow_None=True, doc="""
+        Column name for each edge weight. If None, weights are ignored.""")
+
     include_edge_id = param.Boolean(default=False, doc="""
         Include edge IDs in bundled dataframe""")
 
@@ -350,8 +389,8 @@ def __call__(self, nodes, edges, **params):
         a point with NaN as the x or y value.
         """
         p = param.ParamOverrides(self, params)
-        edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p.include_edge_id, ignore_weights=True)
-        return _convert_edge_segments_to_dataframe(edges, segment_class)
+        edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p)
+        return _convert_edge_segments_to_dataframe(edges, segment_class, p)
 
 
 @nb.jit
@@ -399,23 +438,23 @@ class hammer_bundle(directly_connect_edges):
     max_segment_length = param.Number(default=0.016,bounds=(0,None),precedence=-0.5,doc="""
         Maximum length (in data space?) for an edge segment""")
 
-    include_edge_id = param.Boolean(default=False, doc="""
-        Include edge IDs in bundled dataframe""")
+    weight = param.String(default='weight', allow_None=True, doc="""
+        Column name for each edge weight. If None, weights are ignored.""")
 
     def __call__(self, nodes, edges, **params):
         p = param.ParamOverrides(self, params)
 
         # Calculate min/max for coordinates
-        xmin, xmax = np.min(nodes['x']), np.max(nodes['x'])
-        ymin, ymax = np.min(nodes['y']), np.max(nodes['y'])
+        xmin, xmax = np.min(nodes[p.x]), np.max(nodes[p.x])
+        ymin, ymax = np.min(nodes[p.y]), np.max(nodes[p.y])
 
         # Normalize coordinates
         nodes = nodes.copy()
-        nodes['x'] = minmax_normalize(nodes['x'], xmin, xmax)
-        nodes['y'] = minmax_normalize(nodes['y'], ymin, ymax)
+        nodes[p.x] = minmax_normalize(nodes[p.x], xmin, xmax)
+        nodes[p.y] = minmax_normalize(nodes[p.y], ymin, ymax)
 
         # Convert graph into list of edge segments
-        edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p.include_edge_id)
+        edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p)
 
         # This is simply to let the work split out over multiple cores
         edge_batches = list(batches(edges, p.batch_size))
@@ -460,10 +499,10 @@ def __call__(self, nodes, edges, **params):
             new_segs.extend(batch)
 
         # Convert list of edge segments to Pandas dataframe
-        df = _convert_edge_segments_to_dataframe(new_segs, segment_class)
+        df = _convert_edge_segments_to_dataframe(new_segs, segment_class, p)
 
         # Denormalize coordinates
-        df['x'] = minmax_denormalize(df['x'], xmin, xmax)
-        df['y'] = minmax_denormalize(df['y'], ymin, ymax)
+        df[p.x] = minmax_denormalize(df[p.x], xmin, xmax)
+        df[p.y] = minmax_denormalize(df[p.y], ymin, ymax)
 
         return df
diff --git a/datashader/layout.py b/datashader/layout.py
@@ -16,6 +16,25 @@ class LayoutAlgorithm(param.ParameterizedFunction):
 
     __abstract = True
 
+    seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
+        Random seed used to initialize the pseudo-random number
+        generator.""")
+
+    x = param.String(default='x', doc="""
+        Column name for each node's x coordinate.""")
+
+    y = param.String(default='y', doc="""
+        Column name for each node's y coordinate.""")
+
+    source = param.String(default='source', doc="""
+        Column name for each edge's source.""")
+
+    target = param.String(default='target', doc="""
+        Column name for each edge's target.""")
+
+    weight = param.String(default=None, allow_None=True, doc="""
+        Column name for each edge weight. If None, weights are ignored.""")
+
     def __call__(self, nodes, edges, **params):
         """
         This method takes two dataframes representing a graph's nodes
@@ -36,10 +55,6 @@ class random_layout(LayoutAlgorithm):
     Assign coordinates to the nodes randomly.
     """
 
-    seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
-        Random seed used to initialize the pseudo-random number
-        generator.""")
-
     def __call__(self, nodes, edges, **params):
         p = param.ParamOverrides(self, params)
 
@@ -48,8 +63,8 @@ def __call__(self, nodes, edges, **params):
         df = nodes.copy()
         points = np.asarray(np.random.random((len(df), 2)))
 
-        df['x'] = points[:, 0]
-        df['y'] = points[:, 1]
+        df[p.x] = points[:, 0]
+        df[p.y] = points[:, 1]
 
         return df
 
@@ -64,10 +79,6 @@ class circular_layout(LayoutAlgorithm):
     uniform = param.Boolean(True, doc="""
         Whether to distribute nodes evenly on circle""")
 
-    seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
-        Random seed used to initialize the pseudo-random number
-        generator.""")
-
     def __call__(self, nodes, edges, **params):
         p = param.ParamOverrides(self, params)
 
@@ -84,15 +95,15 @@ def __call__(self, nodes, edges, **params):
         else:
             thetas = np.asarray(np.random.random((len(df),))) * circumference
 
-        df['x'] = x0 + r * np.cos(thetas)
-        df['y'] = y0 + r * np.sin(thetas)
+        df[p.x] = x0 + r * np.cos(thetas)
+        df[p.y] = y0 + r * np.sin(thetas)
 
         return df
 
 
 def _extract_points_from_nodes(nodes, params, dtype=None):
-    if 'x' in nodes.columns and 'y' in nodes.columns:
-        points = np.asarray(nodes[['x', 'y']])
+    if params.x in nodes.columns and params.y in nodes.columns:
+        points = np.asarray(nodes[[params.x, params.y]])
     else:
         points = np.asarray(np.random.random((len(nodes), params.dim)), dtype=dtype)
     return points
@@ -105,13 +116,13 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
     else:
         index = dict(zip(nodes.index.values, range(nlen)))
 
-    if params.use_weights and 'weight' in edges:
-        edge_values = edges[['source', 'target', 'weight']].values
+    if params.weight and params.weight in edges:
+        edge_values = edges[[params.source, params.target, params.weight]].values
         rows, cols, data = zip(*((index[src], index[dst], weight)
                                  for src, dst, weight in edge_values
                                  if src in index and dst in index))
     else:
-        edge_values = edges[['source', 'target']].values
+        edge_values = edges[[params.source, params.target]].values
         rows, cols, data = zip(*((index[src], index[dst], 1)
                                  for src, dst in edge_values
                                  if src in index and dst in index))
@@ -122,15 +133,15 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
     c = cols + rows
 
     # Check for nodes pointing to themselves
-    loops = edges[edges['source'] == edges['target']]
+    loops = edges[edges[params.source] == edges[params.target]]
     if len(loops):
-        if params.use_weights and 'weight' in edges:
-            loop_values = loops[['source', 'target', 'weight']].values
+        if params.weight and params.weight in edges:
+            loop_values = loops[[params.source, params.target, params.weight]].values
             diag_index, diag_data = zip(*((index[src], -weight)
                                           for src, dst, weight in loop_values
                                           if src in index and dst in index))
         else:
-            loop_values = loops[['source', 'target']].values
+            loop_values = loops[[params.source, params.target]].values
             diag_index, diag_data = zip(*((index[src], -1)
                                         for src, dst in loop_values
                                         if src in index and dst in index))
@@ -142,10 +153,10 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
     return M.asformat(format)
 
 
-def _merge_points_with_nodes(nodes, points):
+def _merge_points_with_nodes(nodes, points, params):
     n = nodes.copy()
-    n['x'] = points[:, 0]
-    n['y'] = points[:, 1]
+    n[params.x] = points[:, 0]
+    n[params.y] = points[:, 1]
     return n
 
 
@@ -220,13 +231,6 @@ class forceatlas2_layout(LayoutAlgorithm):
     dim = param.Integer(default=2, bounds=(1, None), doc="""
         Coordinate dimensions of each node""")
 
-    seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
-        Random seed used to initialize the pseudo-random number
-        generator.""")
-
-    use_weights = param.Boolean(True, doc="""
-        Whether to use weights during layout""")
-
     def __call__(self, nodes, edges, **params):
         p = param.ParamOverrides(self, params)
 
@@ -248,4 +252,4 @@ def __call__(self, nodes, edges, **params):
         cooling(matrix, points, temperature, p)
 
         # Return the nodes with updated positions
-        return _merge_points_with_nodes(nodes, points)
+        return _merge_points_with_nodes(nodes, points, p)
diff --git a/datashader/tests/test_bundling.py b/datashader/tests/test_bundling.py
@@ -49,6 +49,18 @@ def test_immutable_nodes(nodes, edges):
     assert original.equals(nodes)
 
 
+@pytest.mark.parametrize('bundle', [directly_connect_edges, hammer_bundle])
+def test_renamed_columns(nodes, weighted_edges, bundle):
+    nodes = nodes.rename(columns={'x': 'xx', 'y': 'yy'})
+    edges = weighted_edges.rename(columns={'source': 'src', 'target': 'dst', 'weight': 'w'})
+
+    df = bundle(nodes, edges, x='xx', y='yy', source='src', target='dst', weight='w')
+
+    assert 'xx' in df and 'x' not in df
+    assert 'yy' in df and 'y' not in df
+    assert 'w' in df and 'weight' not in df
+
+
 @pytest.mark.parametrize('bundle', [directly_connect_edges, hammer_bundle])
 @pytest.mark.parametrize('layout', [random_layout, circular_layout, forceatlas2_layout])
 def test_same_path_endpoints(layout, bundle):