Skip to content

Commit

Permalink
KD Tree extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
peekxc committed Aug 23, 2017
1 parent 8e4d889 commit fd3228f
Show file tree
Hide file tree
Showing 15 changed files with 525 additions and 197 deletions.
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ export(
glosh,
pointdensity,
hullplot,
as.reachability
as.reachability,
kd_tree
)

S3method(print, optics)
Expand Down Expand Up @@ -61,3 +62,8 @@ S3method(plot, NN)
S3method(sort, kNN)
S3method(sort, frNN)
S3method(sort, sNN)

S3method(print, kd_tree)
S3method(summary, kd_tree)
S3method(str, kd_tree)

78 changes: 47 additions & 31 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,122 +2,138 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

distToAdjacency <- function(constraints, N) {
.Call('dbscan_distToAdjacency', PACKAGE = 'dbscan', constraints, N)
.Call('_dbscan_distToAdjacency', PACKAGE = 'dbscan', constraints, N)
}

buildDendrogram <- function(hcl) {
.Call('dbscan_buildDendrogram', PACKAGE = 'dbscan', hcl)
.Call('_dbscan_buildDendrogram', PACKAGE = 'dbscan', hcl)
}

all_children <- function(hier, key, leaves_only = FALSE) {
.Call('dbscan_all_children', PACKAGE = 'dbscan', hier, key, leaves_only)
.Call('_dbscan_all_children', PACKAGE = 'dbscan', hier, key, leaves_only)
}

node_xy <- function(cl_tree, cl_hierarchy, cid = 0L) {
.Call('dbscan_node_xy', PACKAGE = 'dbscan', cl_tree, cl_hierarchy, cid)
.Call('_dbscan_node_xy', PACKAGE = 'dbscan', cl_tree, cl_hierarchy, cid)
}

simplifiedTree <- function(cl_tree) {
.Call('dbscan_simplifiedTree', PACKAGE = 'dbscan', cl_tree)
.Call('_dbscan_simplifiedTree', PACKAGE = 'dbscan', cl_tree)
}

computeStability <- function(hcl, minPts, compute_glosh = FALSE) {
.Call('dbscan_computeStability', PACKAGE = 'dbscan', hcl, minPts, compute_glosh)
.Call('_dbscan_computeStability', PACKAGE = 'dbscan', hcl, minPts, compute_glosh)
}

validateConstraintList <- function(constraints, n) {
.Call('dbscan_validateConstraintList', PACKAGE = 'dbscan', constraints, n)
.Call('_dbscan_validateConstraintList', PACKAGE = 'dbscan', constraints, n)
}

computeVirtualNode <- function(noise, constraints) {
.Call('dbscan_computeVirtualNode', PACKAGE = 'dbscan', noise, constraints)
.Call('_dbscan_computeVirtualNode', PACKAGE = 'dbscan', noise, constraints)
}

fosc <- function(cl_tree, cid, sc, cl_hierarchy, prune_unstable_leaves = FALSE, alpha = 0, useVirtual = FALSE, n_constraints = 0L, constraints = NULL) {
.Call('dbscan_fosc', PACKAGE = 'dbscan', cl_tree, cid, sc, cl_hierarchy, prune_unstable_leaves, alpha, useVirtual, n_constraints, constraints)
.Call('_dbscan_fosc', PACKAGE = 'dbscan', cl_tree, cid, sc, cl_hierarchy, prune_unstable_leaves, alpha, useVirtual, n_constraints, constraints)
}

extractUnsupervised <- function(cl_tree, prune_unstable = FALSE) {
.Call('dbscan_extractUnsupervised', PACKAGE = 'dbscan', cl_tree, prune_unstable)
.Call('_dbscan_extractUnsupervised', PACKAGE = 'dbscan', cl_tree, prune_unstable)
}

extractSemiSupervised <- function(cl_tree, constraints, alpha = 0, prune_unstable_leaves = FALSE) {
.Call('dbscan_extractSemiSupervised', PACKAGE = 'dbscan', cl_tree, constraints, alpha, prune_unstable_leaves)
.Call('_dbscan_extractSemiSupervised', PACKAGE = 'dbscan', cl_tree, constraints, alpha, prune_unstable_leaves)
}

reach_to_dendrogram <- function(reachability, pl_order) {
.Call('dbscan_reach_to_dendrogram', PACKAGE = 'dbscan', reachability, pl_order)
.Call('_dbscan_reach_to_dendrogram', PACKAGE = 'dbscan', reachability, pl_order)
}

dendrogram_to_reach <- function(x) {
.Call('dbscan_dendrogram_to_reach', PACKAGE = 'dbscan', x)
.Call('_dbscan_dendrogram_to_reach', PACKAGE = 'dbscan', x)
}

mst_to_dendrogram <- function(mst) {
.Call('dbscan_mst_to_dendrogram', PACKAGE = 'dbscan', mst)
.Call('_dbscan_mst_to_dendrogram', PACKAGE = 'dbscan', mst)
}

mrd <- function(dm, cd) {
.Call('dbscan_mrd', PACKAGE = 'dbscan', dm, cd)
.Call('_dbscan_mrd', PACKAGE = 'dbscan', dm, cd)
}

mrd_m <- function(dm, cd) {
.Call('dbscan_mrd_m', PACKAGE = 'dbscan', dm, cd)
.Call('_dbscan_mrd_m', PACKAGE = 'dbscan', dm, cd)
}

coreFromDist <- function(dist, n, minPts) {
.Call('dbscan_coreFromDist', PACKAGE = 'dbscan', dist, n, minPts)
.Call('_dbscan_coreFromDist', PACKAGE = 'dbscan', dist, n, minPts)
}

prims <- function(x_dist, n) {
.Call('dbscan_prims', PACKAGE = 'dbscan', x_dist, n)
.Call('_dbscan_prims', PACKAGE = 'dbscan', x_dist, n)
}

order_ <- function(x) {
.Call('dbscan_order_', PACKAGE = 'dbscan', x)
.Call('_dbscan_order_', PACKAGE = 'dbscan', x)
}

hclustMergeOrder <- function(mst, o) {
.Call('dbscan_hclustMergeOrder', PACKAGE = 'dbscan', mst, o)
.Call('_dbscan_hclustMergeOrder', PACKAGE = 'dbscan', mst, o)
}

dbscan_int <- function(data, eps, minPts, weights, borderPoints, type, bucketSize, splitRule, approx, frNN) {
.Call('dbscan_dbscan_int', PACKAGE = 'dbscan', data, eps, minPts, weights, borderPoints, type, bucketSize, splitRule, approx, frNN)
dbscan_int <- function(data, eps, minPts, weights, borderPoints, type, bucketSize, splitRule, approx, frNN, kd_tree) {
.Call('_dbscan_dbscan_int', PACKAGE = 'dbscan', data, eps, minPts, weights, borderPoints, type, bucketSize, splitRule, approx, frNN, kd_tree)
}

dbscan_density_int <- function(data, eps, type, bucketSize, splitRule, approx) {
.Call('dbscan_dbscan_density_int', PACKAGE = 'dbscan', data, eps, type, bucketSize, splitRule, approx)
.Call('_dbscan_dbscan_density_int', PACKAGE = 'dbscan', data, eps, type, bucketSize, splitRule, approx)
}

frNN_int <- function(data, eps, type, bucketSize, splitRule, approx) {
.Call('dbscan_frNN_int', PACKAGE = 'dbscan', data, eps, type, bucketSize, splitRule, approx)
.Call('_dbscan_frNN_int', PACKAGE = 'dbscan', data, eps, type, bucketSize, splitRule, approx)
}

JP_int <- function(nn, kt) {
.Call('dbscan_JP_int', PACKAGE = 'dbscan', nn, kt)
.Call('_dbscan_JP_int', PACKAGE = 'dbscan', nn, kt)
}

SNN_sim_int <- function(nn) {
.Call('dbscan_SNN_sim_int', PACKAGE = 'dbscan', nn)
.Call('_dbscan_SNN_sim_int', PACKAGE = 'dbscan', nn)
}

kd_tree_int <- function(data, bucketSize, splitRule) {
.Call('_dbscan_kd_tree_int', PACKAGE = 'dbscan', data, bucketSize, splitRule)
}

printKdTree <- function(kdtree_ptr, with_pts) {
invisible(.Call('_dbscan_printKdTree', PACKAGE = 'dbscan', kdtree_ptr, with_pts))
}

printKdTreeStats <- function(kdtree_ptr) {
invisible(.Call('_dbscan_printKdTreeStats', PACKAGE = 'dbscan', kdtree_ptr))
}

test_KDtree <- function(kdtree_ptr, k, approx = 0) {
.Call('_dbscan_test_KDtree', PACKAGE = 'dbscan', kdtree_ptr, k, approx)
}

kNN_int <- function(data, k, type, bucketSize, splitRule, approx) {
.Call('dbscan_kNN_int', PACKAGE = 'dbscan', data, k, type, bucketSize, splitRule, approx)
.Call('_dbscan_kNN_int', PACKAGE = 'dbscan', data, k, type, bucketSize, splitRule, approx)
}

optics_int <- function(data, eps, minPts, type, bucketSize, splitRule, approx, frNN) {
.Call('dbscan_optics_int', PACKAGE = 'dbscan', data, eps, minPts, type, bucketSize, splitRule, approx, frNN)
.Call('_dbscan_optics_int', PACKAGE = 'dbscan', data, eps, minPts, type, bucketSize, splitRule, approx, frNN)
}

lowerTri <- function(m) {
.Call('dbscan_lowerTri', PACKAGE = 'dbscan', m)
.Call('_dbscan_lowerTri', PACKAGE = 'dbscan', m)
}

combine <- function(t1, t2) {
.Call('dbscan_combine', PACKAGE = 'dbscan', t1, t2)
.Call('_dbscan_combine', PACKAGE = 'dbscan', t1, t2)
}

concat_int <- function(container) {
.Call('dbscan_concat_int', PACKAGE = 'dbscan', container)
.Call('_dbscan_concat_int', PACKAGE = 'dbscan', container)
}

19 changes: 17 additions & 2 deletions R/dbscan.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

dbscan <- function(x, eps, minPts = 5, weights = NULL, borderPoints = TRUE,
...) {
kd_tree = NULL, ...) {

if(is(x, "frNN") && missing(eps)) eps <- x$eps

Expand Down Expand Up @@ -87,10 +87,25 @@ dbscan <- function(x, eps, minPts = 5, weights = NULL, borderPoints = TRUE,
if(length(frNN) > 0)
frNN <- lapply(1:length(frNN), FUN = function(i) c(i-1L, frNN[[i]]-1L))

## Check to see if kd tree was given as input
kd_tree_inp <- kd_tree
if (missing(kd_tree) || is.null(kd_tree)){
kd_tree_inp <- NULL
} else {
if (!is(kd_tree, "kd_tree")) stop("object passed to 'kd_tree' parameter not a kd_tree object.")

## Extract the external pointer
kd_tree_inp <- attr(kd_tree, ".kd_tree_ptr")
if (class(kd_tree_inp) != "externalptr" || deparse(kd_tree_inp) == "<pointer: 0x0>"){
stop("kd_tree pointing to invalid memory location. Often this happens when a kd_tree R object
is saved between R sessions. Please rebuild the tree.")
}
}

ret <- dbscan_int(x, as.double(eps), as.integer(minPts),
as.double(weights), as.integer(borderPoints),
as.integer(search), as.integer(bucketSize),
as.integer(splitRule), as.double(approx), frNN)
as.integer(splitRule), as.double(approx), frNN, kd_tree_inp)

structure(list(cluster = ret, eps = eps, minPts = minPts),
class = c("dbscan_fast", "dbscan"))
Expand Down
71 changes: 71 additions & 0 deletions R/kd_tree.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#######################################################################
# dbscan - Density Based Clustering of Applications with Noise
# and Related Algorithms
# Copyright (C) 2015 Michael Hahsler, Matthew Piekenbrock

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

kd_tree <- function(x, bucketSize = 10L, splitRule = "suggest"){

## Check bucket size
bucketSize <- if(missing(bucketSize)) 10L else as.integer(bucketSize)

## Check split rule
splitRule <- pmatch(toupper(splitRule), .ANNsplitRule)-1L
if(is.na(splitRule)) stop("Unknown splitRule!")

## Check data x
if(!.matrixlike(x)) stop("x needs to be a matrix")
## make sure x is numeric
x <- as.matrix(x)
if(storage.mode(x) == "integer") storage.mode(x) <- "double"
if(storage.mode(x) != "double") stop("x has to be a numeric matrix.")

## Get the ANN kd tree Xptr
kdtree_ptr <- kd_tree_int(x, bucketSize, splitRule)

## Sanity check
if (deparse(kdtree_ptr) == "<pointer: 0x0>"){
stop("Unable to create kd tree with the given data.")
}

## Create kd tree R structure
res <- structure(list(bucketSize = bucketSize,
splitRule = .ANNsplitRule[splitRule+1],
call = match.call()), class = "kd_tree")
attr(res, ".kd_tree_ptr") <- kdtree_ptr
return(res)
}

print.kd_tree <- function(x){
writeLines(c(
paste0("KD Tree created from call: ", deparse(x$call)),
paste0("Parameters: bucketSize = ", x$bucketSize, ", splitRule = ", x$splitRule)
))
}

summary.kd_tree <- function(x){
kdtree_ptr <- attr(x, ".kd_tree_ptr")
if (!is.null(kdtree_ptr) && class(kdtree_ptr) == "externalptr" && deparse(kdtree_ptr) != "<pointer: 0x0>"){
printKdTreeStats(kdtree_ptr)
}
}

str.kd_tree <- function(x){
kdtree_ptr <- attr(x, ".kd_tree_ptr")
if (!is.null(kdtree_ptr) && class(kdtree_ptr) == "externalptr" && deparse(kdtree_ptr) != "<pointer: 0x0>"){
printKdTree(kdtree_ptr)
}
}
5 changes: 3 additions & 2 deletions src/ANN/ANN.h
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,10 @@ class DLL_API ANNkd_tree: public ANNpointSet {
ANNpointArray thePoints() // return pointer to points
{ return pts; }

// MJP 08/23/2017
// Removed ostream from print methods
virtual void Print( // print the tree (for debugging)
ANNbool with_pts, // print points as well?
std::ostream& out); // output stream
ANNbool with_pts); // print points as well?

virtual void Dump( // dump entire tree
ANNbool with_pts, // print points as well?
Expand Down
20 changes: 14 additions & 6 deletions src/R_dbscan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <Rcpp.h>
#include "ANN/ANN.h"
#include "R_regionQuery.h"
#include "R_kd_tree.h"

using namespace Rcpp;

Expand All @@ -24,7 +25,7 @@ using namespace Rcpp;
IntegerVector dbscan_int(
NumericMatrix data, double eps, int minPts, NumericVector weights,
int borderPoints, int type, int bucketSize, int splitRule, double approx,
List frNN) {
List frNN, SEXP kd_tree) {

// kd-tree uses squared distances
double eps2 = eps*eps;
Expand All @@ -39,7 +40,7 @@ IntegerVector dbscan_int(
if(frNN.size()) {
// no kd-tree but use frNN list from distances
nrow = frNN.size();
}else{
} else{

// copy data for kd-tree
nrow = data.nrow();
Expand All @@ -53,9 +54,15 @@ IntegerVector dbscan_int(
//Rprintf("Points copied.\n");

// create kd-tree (1) or linear search structure (2)
if (type==1) kdTree = new ANNkd_tree(dataPts, nrow, ncol, bucketSize,
(ANNsplitRule) splitRule);
else kdTree = new ANNbruteForce(dataPts, nrow, ncol);
if (kd_tree == NULL || Rf_isNull(kd_tree)){
// Rcout << "Building kd tree" << std::endl;
if (type==1) kdTree = new ANNkd_tree(dataPts, nrow, ncol, bucketSize,
(ANNsplitRule) splitRule);
else kdTree = new ANNbruteForce(dataPts, nrow, ncol);
} else {//Rcout << "Using pre-built kd tree" << std::endl;
kdTree = getKdTree(kd_tree);
}

//Rprintf("kd-tree ready. starting DBSCAN.\n");
}

Expand Down Expand Up @@ -141,7 +148,8 @@ IntegerVector dbscan_int(
}

// cleanup
if(kdTree != NULL) {
// Only deallocate 'kdTree' if the passed argument 'kd_tree' was null
if(kd_tree == NULL || Rf_isNull(kd_tree)) {
delete kdTree;
annDeallocPts(dataPts);
annClose();
Expand Down
Loading

0 comments on commit fd3228f

Please sign in to comment.