-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdemo.R
147 lines (121 loc) · 5.53 KB
/
demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
source("./EMR_LIP_tmp.R")
library(readxl)
library(dplyr)
ds <- read_excel("data.xlsx", col_names=T, sheet=1)
var_dict <- read_excel("var_dict.xlsx", col_names=T, sheet=1)
ds_map <- read_excel("var_dict.xlsx", col_names=T, sheet=2)
ds <- rename_long_table(ds, ds_map$old_name, ds_map$new_name, "item_id")
ds <- rename_wide_table(ds, ds_map$old_name, ds_map$new_name)
ds <- remove_extreme_value_long(ds, var_dict$itemid, var_dict$value_type,
"item_id", "value", var_dict)
ds <- remove_extreme_value_wide(ds, var_dict$itemid, var_dict$value_type, 1:3
var_dict)
stat_ds <- get_stat_long(ds,
var_dict$itemid,
var_dict$value_type,
"item_id",
"value",
var_dict$cont,
var_dict)
stat_ds <- get_stat_wide(ds,
var_dict$itemid,
var_dict$value_type,
var_dict$cont,
var_dict)
# undebug(resample_long)
ds_k <- ds[ds$sid == 1,]
ds_k1 <- resample_long(df = ds_k,
itemid_list = var_dict$itemid,
type_list = var_dict$value_type,
agg_f_list = var_dict$agg_fun,
time_list = 1:15,
time_col1 = "time",
time_col2 = "time2",
itemid_col = "item_id",
value_col = "value",
time_window = 1,
direction = "both",
keepNArow = T,
keep_first = T)
ds_k1 <- resample_wide(df = ds_k,
itemid_list = var_dict$itemid,
type_list = var_dict$value_type,
agg_f_list = var_dict$agg_fun,
time_list = 1:4,
time_col1 = "time",
time_col2 = "time2",
time_window = 1,
direction = "both",
keepNArow = T,
keep_first = T)
ind1 <- (ds_k1[,2] == "1")
ds_k1 <- ds_k1[,c(1,3:ncol(ds_k1))]
mask_k1 <- get_mask(ds_k1, names(ds_k1)[2:ncol(ds_k1)], "time")
ds_k1 <- fill(ds_k1, 2:ncol(ds_k1), get_type(stat_ds), var_dict$fill1, var_dict$fill2, stat_ds)
str(ds_k1)
ds_k1 <- fill_last_values(ds_k1, mask_k1, 2:ncol(ds_k1), 1, var_dict)
ds_k1 <- to_onehot(ds_k1, 2:ncol(ds_k1), 1, get_type(stat_ds), stat_ds)
ds <- read_excel("data.xlsx", col_names=T, sheet=1)
var_dict <- read_excel("var_dict.xlsx", col_names=T, sheet=1)
var_dict1 <- var_dict[var_dict$time_type == "point", ,drop=F]
var_dict2 <- var_dict[var_dict$time_type == "interval", ,drop=F]
ds_map <- read_excel("var_dict.xlsx", col_names=T, sheet=2)
ds <- rename_long_table(ds, ds_map$old_name, ds_map$new_name, "item_id")
ds <- remove_extreme_value_long(ds, var_dict$itemid, var_dict$value_type,
"item_id", "value", var_dict)
stat_ds1 <- get_stat_long(ds,
var_dict1$itemid,
var_dict1$value_type,
"item_id",
"value",
var_dict1$cont)
stat_ds2 <- get_stat_long(ds,
var_dict2$itemid,
var_dict2$value_type,
"item_id",
"value",
var_dict2$cont)
ds_k <- ds[ds$sid == 1,]
ds_k1 <- resample_point_long(ds_k,
var_dict1$itemid,
var_dict1$value_type,
var_dict1$agg_fun,
1:4,
"item_id",
"value",
"time",
1,
direction = "both",
keepNArow = T,
keep_first = F)
# undebug(resample_single_long)
ind1 <- (ds_k1[,2] == "1")
ds_k1 <- ds_k1[,c(1,3:ncol(ds_k1))]
mask_k1 <- get_mask(ds_k1, 2:ncol(ds_k1), 1)
ds_k1 <- fill(ds_k1, 2:ncol(ds_k1), 1, get_type(stat_ds1), var_dict1$fill1, var_dict1$fill2, stat_ds1)
ds_k1 <- fill_last_values(ds_k1, mask_k1, 2:ncol(ds_k1), 1, var_dict1)
ds_k1 <- to_onehot(ds_k1, 2:ncol(ds_k1), 1, get_type(stat_ds1), stat_ds1)
ds_k2 <- resample_interval_long(ds_k,
var_dict2$itemid,
var_dict2$value_type,
var_dict2$agg_fun,
1:4,
"item_id",
"value",
"time",
"time2",
1,
direction = "both",
keepNArow = T,
keep_first = F)
ind2 <- (ds_k2[,2] == "1")
ds_k2 <- ds_k2[,c(1,3:ncol(ds_k2))]
mask_k2 <- get_mask(ds_k2, 2:ncol(ds_k2), 1)
ds_k2 <- fill(ds_k2, 2:ncol(ds_k2), 1, get_type(stat_ds2), var_dict2$fill1, var_dict2$fill2, stat_ds2)
ds_k2 <- fill_last_values(ds_k2, mask_k2, 2:ncol(ds_k2), 1, var_dict2)
ds_k2 <- to_onehot(ds_k2, 2:ncol(ds_k2), 1, get_type(stat_ds2), stat_ds2)
ds_k_ <- cbind(ds_k1, ds_k2[,2:ncol(ds_k2),drop=F]) %>% as.data.frame %>% lapply(., as.numeric) %>% as.data.frame
ds_k_ <- ds_k_[which(ind1 | ind2), ]
as.data.frame(ds_k_)
# as.data.frame(ds_k)
cbind(ds_k1, ds_k2[,2:ncol(ds_k2),drop=F])