Skip to content

Commit

Permalink
Parameterize node/edge columns (holoviz#494)
Browse files Browse the repository at this point in the history
* Instead of requiring hard-coded column names for nodes, edges, and weights, provides parameters to allow them to be specified in a call. 
* If the name of the weight column is None, then weights are ignored.
* Changed forcelayout2 to ignore weights by default.
  • Loading branch information
jbcrail authored and jbednar committed Oct 17, 2017
1 parent e8de1b5 commit 8ee50c3
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 63 deletions.
101 changes: 70 additions & 31 deletions datashader/bundling.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,16 @@ def create_delimiter(cls):

class UnweightedSegment(BaseSegment):
ndims = 3
columns = ['edge_id', 'x', 'y']
merged_columns = ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']
idx, idy = 1, 2

@staticmethod
def get_columns(params):
return ['edge_id', params.x, params.y]

@staticmethod
def get_merged_columns(params):
return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']

@staticmethod
@nb.jit
def create_segment(edge):
Expand All @@ -208,10 +214,16 @@ def accumulate(img, point, accuracy):

class EdgelessUnweightedSegment(BaseSegment):
ndims = 2
columns = ['x', 'y']
merged_columns = ['src_x', 'src_y', 'dst_x', 'dst_y']
idx, idy = 0, 1

@staticmethod
def get_columns(params):
return [params.x, params.y]

@staticmethod
def get_merged_columns(params):
return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y']

@staticmethod
@nb.jit
def create_segment(edge):
Expand All @@ -225,10 +237,16 @@ def accumulate(img, point, accuracy):

class WeightedSegment(BaseSegment):
ndims = 4
columns = ['edge_id', 'x', 'y', 'weight']
merged_columns = ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y', 'weight']
idx, idy = 1, 2

@staticmethod
def get_columns(params):
return ['edge_id', params.x, params.y, params.weight]

@staticmethod
def get_merged_columns(params):
return ['edge_id', 'src_x', 'src_y', 'dst_x', 'dst_y', params.weight]

@staticmethod
@nb.jit
def create_segment(edge):
Expand All @@ -242,10 +260,16 @@ def accumulate(img, point, accuracy):

class EdgelessWeightedSegment(BaseSegment):
ndims = 3
columns = ['x', 'y', 'weight']
merged_columns = ['src_x', 'src_y', 'dst_x', 'dst_y', 'weight']
idx, idy = 0, 1

@staticmethod
def get_columns(params):
return [params.x, params.y, params.weight]

@staticmethod
def get_merged_columns(params):
return ['src_x', 'src_y', 'dst_x', 'dst_y', params.weight]

@staticmethod
@nb.jit
def create_segment(edge):
Expand All @@ -257,7 +281,7 @@ def accumulate(img, point, accuracy):
img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2]


def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weights=False):
def _convert_graph_to_edge_segments(nodes, edges, params):
"""
Merge graph dataframes into a list of edge segments.
Expand All @@ -272,21 +296,21 @@ def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weight
the accumulator function for drawing to an image.
"""

df = pd.merge(edges, nodes, left_on=['source'], right_index=True)
df = df.rename(columns={'x': 'src_x', 'y': 'src_y'})
df = pd.merge(edges, nodes, left_on=[params.source], right_index=True)
df = df.rename(columns={params.x: 'src_x', params.y: 'src_y'})

df = pd.merge(df, nodes, left_on=['target'], right_index=True)
df = df.rename(columns={'x': 'dst_x', 'y': 'dst_y'})
df = pd.merge(df, nodes, left_on=[params.target], right_index=True)
df = df.rename(columns={params.x: 'dst_x', params.y: 'dst_y'})

df = df.sort_index()
df = df.reset_index()

if include_edge_id:
if params.include_edge_id:
df = df.rename(columns={'id': 'edge_id'})

include_weight = not ignore_weights and 'weight' in edges
include_weight = params.weight and params.weight in edges

if include_edge_id:
if params.include_edge_id:
if include_weight:
segment_class = WeightedSegment
else:
Expand All @@ -297,15 +321,15 @@ def _convert_graph_to_edge_segments(nodes, edges, include_edge_id, ignore_weight
else:
segment_class = EdgelessUnweightedSegment

df = df.filter(items=segment_class.merged_columns)
df = df.filter(items=segment_class.get_merged_columns(params))

edge_segments = []
for edge in df.get_values():
edge_segments.append(segment_class.create_segment(edge))
return edge_segments, segment_class


def _convert_edge_segments_to_dataframe(edge_segments, segment_class):
def _convert_edge_segments_to_dataframe(edge_segments, segment_class, params):
"""
Convert list of edge segments into a dataframe.
Expand All @@ -321,7 +345,7 @@ def edge_iterator():
yield segment_class.create_delimiter()

df = DataFrame(np.concatenate(list(edge_iterator())))
df.columns = segment_class.columns
df.columns = segment_class.get_columns(params)
return df


Expand All @@ -334,6 +358,21 @@ class directly_connect_edges(param.ParameterizedFunction):
curved or manhattan-style polylines.
"""

x = param.String(default='x', doc="""
Column name for each node's x coordinate.""")

y = param.String(default='y', doc="""
Column name for each node's y coordinate.""")

source = param.String(default='source', doc="""
Column name for each edge's source.""")

target = param.String(default='target', doc="""
Column name for each edge's target.""")

weight = param.String(default=None, allow_None=True, doc="""
Column name for each edge weight. If None, weights are ignored.""")

include_edge_id = param.Boolean(default=False, doc="""
Include edge IDs in bundled dataframe""")

Expand All @@ -350,8 +389,8 @@ def __call__(self, nodes, edges, **params):
a point with NaN as the x or y value.
"""
p = param.ParamOverrides(self, params)
edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p.include_edge_id, ignore_weights=True)
return _convert_edge_segments_to_dataframe(edges, segment_class)
edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p)
return _convert_edge_segments_to_dataframe(edges, segment_class, p)


@nb.jit
Expand Down Expand Up @@ -399,23 +438,23 @@ class hammer_bundle(directly_connect_edges):
max_segment_length = param.Number(default=0.016,bounds=(0,None),precedence=-0.5,doc="""
Maximum length (in data space?) for an edge segment""")

include_edge_id = param.Boolean(default=False, doc="""
Include edge IDs in bundled dataframe""")
weight = param.String(default='weight', allow_None=True, doc="""
Column name for each edge weight. If None, weights are ignored.""")

def __call__(self, nodes, edges, **params):
p = param.ParamOverrides(self, params)

# Calculate min/max for coordinates
xmin, xmax = np.min(nodes['x']), np.max(nodes['x'])
ymin, ymax = np.min(nodes['y']), np.max(nodes['y'])
xmin, xmax = np.min(nodes[p.x]), np.max(nodes[p.x])
ymin, ymax = np.min(nodes[p.y]), np.max(nodes[p.y])

# Normalize coordinates
nodes = nodes.copy()
nodes['x'] = minmax_normalize(nodes['x'], xmin, xmax)
nodes['y'] = minmax_normalize(nodes['y'], ymin, ymax)
nodes[p.x] = minmax_normalize(nodes[p.x], xmin, xmax)
nodes[p.y] = minmax_normalize(nodes[p.y], ymin, ymax)

# Convert graph into list of edge segments
edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p.include_edge_id)
edges, segment_class = _convert_graph_to_edge_segments(nodes, edges, p)

# This is simply to let the work split out over multiple cores
edge_batches = list(batches(edges, p.batch_size))
Expand Down Expand Up @@ -460,10 +499,10 @@ def __call__(self, nodes, edges, **params):
new_segs.extend(batch)

# Convert list of edge segments to Pandas dataframe
df = _convert_edge_segments_to_dataframe(new_segs, segment_class)
df = _convert_edge_segments_to_dataframe(new_segs, segment_class, p)

# Denormalize coordinates
df['x'] = minmax_denormalize(df['x'], xmin, xmax)
df['y'] = minmax_denormalize(df['y'], ymin, ymax)
df[p.x] = minmax_denormalize(df[p.x], xmin, xmax)
df[p.y] = minmax_denormalize(df[p.y], ymin, ymax)

return df
68 changes: 36 additions & 32 deletions datashader/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@ class LayoutAlgorithm(param.ParameterizedFunction):

__abstract = True

seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
Random seed used to initialize the pseudo-random number
generator.""")

x = param.String(default='x', doc="""
Column name for each node's x coordinate.""")

y = param.String(default='y', doc="""
Column name for each node's y coordinate.""")

source = param.String(default='source', doc="""
Column name for each edge's source.""")

target = param.String(default='target', doc="""
Column name for each edge's target.""")

weight = param.String(default=None, allow_None=True, doc="""
Column name for each edge weight. If None, weights are ignored.""")

def __call__(self, nodes, edges, **params):
"""
This method takes two dataframes representing a graph's nodes
Expand All @@ -36,10 +55,6 @@ class random_layout(LayoutAlgorithm):
Assign coordinates to the nodes randomly.
"""

seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
Random seed used to initialize the pseudo-random number
generator.""")

def __call__(self, nodes, edges, **params):
p = param.ParamOverrides(self, params)

Expand All @@ -48,8 +63,8 @@ def __call__(self, nodes, edges, **params):
df = nodes.copy()
points = np.asarray(np.random.random((len(df), 2)))

df['x'] = points[:, 0]
df['y'] = points[:, 1]
df[p.x] = points[:, 0]
df[p.y] = points[:, 1]

return df

Expand All @@ -64,10 +79,6 @@ class circular_layout(LayoutAlgorithm):
uniform = param.Boolean(True, doc="""
Whether to distribute nodes evenly on circle""")

seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
Random seed used to initialize the pseudo-random number
generator.""")

def __call__(self, nodes, edges, **params):
p = param.ParamOverrides(self, params)

Expand All @@ -84,15 +95,15 @@ def __call__(self, nodes, edges, **params):
else:
thetas = np.asarray(np.random.random((len(df),))) * circumference

df['x'] = x0 + r * np.cos(thetas)
df['y'] = y0 + r * np.sin(thetas)
df[p.x] = x0 + r * np.cos(thetas)
df[p.y] = y0 + r * np.sin(thetas)

return df


def _extract_points_from_nodes(nodes, params, dtype=None):
if 'x' in nodes.columns and 'y' in nodes.columns:
points = np.asarray(nodes[['x', 'y']])
if params.x in nodes.columns and params.y in nodes.columns:
points = np.asarray(nodes[[params.x, params.y]])
else:
points = np.asarray(np.random.random((len(nodes), params.dim)), dtype=dtype)
return points
Expand All @@ -105,13 +116,13 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
else:
index = dict(zip(nodes.index.values, range(nlen)))

if params.use_weights and 'weight' in edges:
edge_values = edges[['source', 'target', 'weight']].values
if params.weight and params.weight in edges:
edge_values = edges[[params.source, params.target, params.weight]].values
rows, cols, data = zip(*((index[src], index[dst], weight)
for src, dst, weight in edge_values
if src in index and dst in index))
else:
edge_values = edges[['source', 'target']].values
edge_values = edges[[params.source, params.target]].values
rows, cols, data = zip(*((index[src], index[dst], 1)
for src, dst in edge_values
if src in index and dst in index))
Expand All @@ -122,15 +133,15 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
c = cols + rows

# Check for nodes pointing to themselves
loops = edges[edges['source'] == edges['target']]
loops = edges[edges[params.source] == edges[params.target]]
if len(loops):
if params.use_weights and 'weight' in edges:
loop_values = loops[['source', 'target', 'weight']].values
if params.weight and params.weight in edges:
loop_values = loops[[params.source, params.target, params.weight]].values
diag_index, diag_data = zip(*((index[src], -weight)
for src, dst, weight in loop_values
if src in index and dst in index))
else:
loop_values = loops[['source', 'target']].values
loop_values = loops[[params.source, params.target]].values
diag_index, diag_data = zip(*((index[src], -1)
for src, dst in loop_values
if src in index and dst in index))
Expand All @@ -142,10 +153,10 @@ def _convert_graph_to_sparse_matrix(nodes, edges, params, dtype=None, format='cs
return M.asformat(format)


def _merge_points_with_nodes(nodes, points):
def _merge_points_with_nodes(nodes, points, params):
n = nodes.copy()
n['x'] = points[:, 0]
n['y'] = points[:, 1]
n[params.x] = points[:, 0]
n[params.y] = points[:, 1]
return n


Expand Down Expand Up @@ -220,13 +231,6 @@ class forceatlas2_layout(LayoutAlgorithm):
dim = param.Integer(default=2, bounds=(1, None), doc="""
Coordinate dimensions of each node""")

seed = param.Integer(default=None, bounds=(0, 2**32-1), doc="""
Random seed used to initialize the pseudo-random number
generator.""")

use_weights = param.Boolean(True, doc="""
Whether to use weights during layout""")

def __call__(self, nodes, edges, **params):
p = param.ParamOverrides(self, params)

Expand All @@ -248,4 +252,4 @@ def __call__(self, nodes, edges, **params):
cooling(matrix, points, temperature, p)

# Return the nodes with updated positions
return _merge_points_with_nodes(nodes, points)
return _merge_points_with_nodes(nodes, points, p)
12 changes: 12 additions & 0 deletions datashader/tests/test_bundling.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@ def test_immutable_nodes(nodes, edges):
assert original.equals(nodes)


@pytest.mark.parametrize('bundle', [directly_connect_edges, hammer_bundle])
def test_renamed_columns(nodes, weighted_edges, bundle):
nodes = nodes.rename(columns={'x': 'xx', 'y': 'yy'})
edges = weighted_edges.rename(columns={'source': 'src', 'target': 'dst', 'weight': 'w'})

df = bundle(nodes, edges, x='xx', y='yy', source='src', target='dst', weight='w')

assert 'xx' in df and 'x' not in df
assert 'yy' in df and 'y' not in df
assert 'w' in df and 'weight' not in df


@pytest.mark.parametrize('bundle', [directly_connect_edges, hammer_bundle])
@pytest.mark.parametrize('layout', [random_layout, circular_layout, forceatlas2_layout])
def test_same_path_endpoints(layout, bundle):
Expand Down
Loading

0 comments on commit 8ee50c3

Please sign in to comment.