forked from hadley/plyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplitter-d.r
95 lines (85 loc) · 2.98 KB
/
splitter-d.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#' Split a data frame by variables.
#'
#' Split a data frame into pieces based on variable contained in that data frame
#'
#' This is the workhorse of the \code{d*ply} functions. Based on the variables
#' you supply, it breaks up a single data frame into a list of data frames,
#' each containing a single combination from the levels of the specified
#' variables.
#'
#' This is basically a thin wrapper around \code{\link{split}} which
#' evaluates the variables in the context of the data, and includes enough
#' information to reconstruct the labelling of the data frame after
#' other operations.
#'
#' @seealso \code{\link{.}} for quoting variables, \code{\link{split}}
#' @family splitter functions
#' @param data data frame
#' @param .variables a \link{quoted} list of variables
#' @param drop drop unnused factor levels?
#' @return a list of data.frames, with attributes that record split details
#' @keywords internal
#' @examples
#' plyr:::splitter_d(mtcars, .(cyl))
#' plyr:::splitter_d(mtcars, .(vs, am))
#' plyr:::splitter_d(mtcars, .(am, vs))
#'
#' mtcars$cyl2 <- factor(mtcars$cyl, levels = c(2, 4, 6, 8, 10))
#' plyr:::splitter_d(mtcars, .(cyl2), drop = TRUE)
#' plyr:::splitter_d(mtcars, .(cyl2), drop = FALSE)
#'
#' mtcars$cyl3 <- ifelse(mtcars$vs == 1, NA, mtcars$cyl)
#' plyr:::splitter_d(mtcars, .(cyl3))
#' plyr:::splitter_d(mtcars, .(cyl3, vs))
#' plyr:::splitter_d(mtcars, .(cyl3, vs), drop = FALSE)
splitter_d <- function(data, .variables = NULL, drop = TRUE) {
stopifnot(is.quoted(.variables))
if (length(.variables) == 0) {
splitv <- rep(1, nrow(data))
split_labels <- NULL
attr(splitv, "n") <- max(splitv)
vars <- character(0)
} else {
splits <- eval.quoted(.variables, data)
splitv <- id(splits, drop = drop)
split_labels <- split_labels(splits, drop = drop, id = splitv)
vars <- unlist(lapply(.variables, all.vars))
}
index <- split_indices(as.integer(splitv), attr(splitv, "n"))
il <- indexed_df(data, index, vars)
structure(
il,
class = c(class(il), "split", "list"),
split_type = "data.frame",
split_labels = split_labels
)
}
#' Generate labels for split data frame.
#'
#' Create data frame giving labels for split data frame.
#'
#' @param list of variables to split up by
#' @param whether all possible combinations should be considered, or only those present in the data
#' @keywords internal
#' @export
split_labels <- function(splits, drop, id = plyr::id(splits, drop = TRUE)) {
if (length(splits) == 0) return(data.frame())
if (drop) {
# Need levels which occur in data
representative <- which(!duplicated(id))[order(unique(id))]
quickdf(lapply(splits, function(x) x[representative]))
} else {
unique_values <- llply(splits, ulevels)
names(unique_values) <- names(splits)
rev(expand.grid(rev(unique_values), stringsAsFactors = FALSE))
}
}
ulevels <- function(x) {
if (is.factor(x)) {
x <- addNA(x, ifany = TRUE)
levs <- levels(x)
factor(levs, levels = levs)
} else {
sort(unique(x))
}
}