-
-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathdata_rename.R
137 lines (125 loc) · 5.15 KB
/
data_rename.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#' @title Rename columns and variable names
#' @name data_rename
#'
#' @description Safe and intuitive functions to rename variables or rows in
#' data frames. `data_rename()` will rename column names, i.e. it facilitates
#' renaming variables `data_addprefix()` or `data_addsuffix()` add prefixes
#' or suffixes to column names. `data_rename_rows()` is a convenient shortcut
#' to add or rename row names of a data frame, but unlike `row.names()`, its
#' input and output is a data frame, thus, integrating smoothly into a possible
#' pipe-workflow.
#'
#' @param data A data frame, or an object that can be coerced to a data frame.
#' @param pattern Character vector. For `data_rename()`, indicates columns that
#' should be selected for renaming. Can be `NULL` (in which case all columns
#' are selected). For `data_addprefix()` or `data_addsuffix()`, a character
#' string, which will be added as prefix or suffix to the column names.
#' @param replacement Character vector. Indicates the new name of the columns
#' selected in `pattern`. Can be `NULL` (in which case column are numbered
#' in sequential order). If not `NULL`, `pattern` and `replacement` must be
#' of the same length.
#' @param rows Vector of row names.
#' @param safe Do not throw error if for instance the variable to be
#' renamed/removed doesn't exist.
#' @param verbose Toggle warnings and messages.
#' @param ... Other arguments passed to or from other functions.
#'
#' @return A modified data frame.
#'
#' @examples
#' # Rename columns
#' head(data_rename(iris, "Sepal.Length", "length"))
#' # data_rename(iris, "FakeCol", "length", safe=FALSE) # This fails
#' head(data_rename(iris, "FakeCol", "length")) # This doesn't
#' head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width")))
#'
#' # Reset names
#' head(data_rename(iris, NULL))
#'
#' # Change all
#' head(data_rename(iris, replacement = paste0("Var", 1:5)))
#'
#' @seealso
#' - Functions to rename stuff: [data_rename()], [data_rename_rows()], [data_addprefix()], [data_addsuffix()]
#' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], [data_remove()]
#' - Functions to reshape, pivot or rotate data frames: [data_to_long()], [data_to_wide()], [data_rotate()]
#' - Functions to recode data: [rescale()], [reverse()], [categorize()], [recode_values()], [slide()]
#' - Functions to standardize, normalize, rank-transform: [center()], [standardize()], [normalize()], [ranktransform()], [winsorize()]
#' - Split and merge data frames: [data_partition()], [data_merge()]
#' - Functions to find or select columns: [data_select()], [data_find()]
#' - Functions to filter rows: [data_match()], [data_filter()]
#'
#' @export
data_rename <- function(data,
pattern = NULL,
replacement = NULL,
safe = TRUE,
verbose = TRUE,
...) {
# change all names if no pattern specified
if (is.null(pattern)) {
pattern <- names(data)
}
if (!is.character(pattern)) {
insight::format_error("Argument `pattern` must be of type character.")
}
# name columns 1, 2, 3 etc. if no replacement
if (is.null(replacement)) {
replacement <- paste0(seq_along(pattern))
}
# if duplicated names in replacement, append ".2", ".3", etc. to duplicates
# ex: c("foo", "foo") -> c("foo", "foo.2")
if (anyDuplicated(replacement) > 0L) {
dup <- as.data.frame(table(replacement))
dup <- dup[dup$Freq > 1, ]
for (i in dup$replacement) {
to_replace <- which(replacement == i)[-1]
new_replacement <- paste0(i, ".", 1 + seq_along(to_replace))
replacement[to_replace] <- new_replacement
}
}
if (length(replacement) > length(pattern) && verbose) {
insight::format_alert(
paste0(
"There are more names in `replacement` than in `pattern`. The last ",
length(replacement) - length(pattern), " names of `replacement` are not used."
)
)
} else if (length(replacement) < length(pattern) && verbose) {
insight::format_alert(
paste0(
"There are more names in `pattern` than in `replacement`. The last ",
length(pattern) - length(replacement), " names of `pattern` are not modified."
)
)
}
for (i in seq_along(pattern)) {
if (!is.na(replacement[i])) {
data <- .data_rename(data, pattern[i], replacement[i], safe, verbose)
}
}
data
}
#' @keywords internal
.data_rename <- function(data, pattern, replacement, safe = TRUE, verbose = TRUE) {
if (!pattern %in% names(data)) {
if (isTRUE(safe)) {
# only give message when verbose is TRUE
if (verbose) {
insight::format_alert(paste0("Variable `", pattern, "` is not in your data frame :/"))
}
# if not safe, always error, no matter what verbose is
} else {
insight::format_error(paste0("Variable `", pattern, "` is not in your data frame :/"))
}
}
names(data) <- replace(names(data), names(data) == pattern, replacement)
data
}
# Row.names ----------------------------------------------------------------
#' @rdname data_rename
#' @export
data_rename_rows <- function(data, rows = NULL) {
row.names(data) <- rows
data
}