-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCummulative antihypertensive dosing in children with CKD (CKiD research project) .Rmd
230 lines (195 loc) · 11.9 KB
/
Cummulative antihypertensive dosing in children with CKD (CKiD research project) .Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
---
title: "CKiD research project"
author: "Benjamin Matta, MD"
date: "December 22, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# CKiD research project
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this...
Importing data from CKiD database:
```{r importing data, echo=FALSE}
#loading libraries
library(readr)
library(corrplot)
library(dplyr)
library(reshape2)
library(histogram)
library(gplots)
library(tidyr)
library(stats)
library(tibble)
library(ggplot2)
# importing data
wd<-if_else(Sys.info()["nodename"]=='matta', "~/Documents/Research/CKiDgit", "C:/Users/Orit/Downloads/CKiD/CKiDgit")
setwd(wd)
# importing data
medsum_full <- read.csv("data/medsum_full.csv")
cardio<-read.csv("data/cardio.csv")
echo<-read.csv("data/echo.csv")
f13 <- read.csv("data/f13.csv")
f14 <- read.csv("data/f14.csv")
f15 <- read.csv("data/f15.csv")
socdem <- read.csv("data/socdem.csv")
growth <- read.csv("data/growth.csv")
pe <- read.csv("data/pe.csv")
gfr <- read.csv("data/gfrcalibratedsummary.csv")
advr <- read.csv("data/advr.csv")
kidhist <- read.csv("data/kidhist.csv")
l05 <- read.csv("data/l05.csv")
l02 <- read.csv("data/l02.csv")
l51 <- read.csv("data/l51.csv")
l31 <- read.csv("data/l31.csv")
gh <- read.csv("data/gh.csv")
```
# Rearranging data
The following section combines all the data into a single dataframe in preparation for analysis
```{r rearranging_data}
test<-f13 %>% select(CASEID,VISIT,GHVISDAT,GHHGRADM,GHHGRADF,GHTOTINC,GHBFAMHB,contains("HBP")) %>% tbl_df
test<-full_join(socdem %>% select(-VERSION),test)
test<-full_join(f14 %>% select(CASEID,VISIT,MHCHRLD,MH_BPD, MHFEMALE),test)
test<-full_join(f15 %>% select(CASEID,VISIT,contains("NSSTER"),NSSTPYBP,NSSTSEBP), test)
test<-full_join(growth %>% select(CASEID,VISIT,LBW,PREMATURE,AVHEIGHT,HTPCTAG,AVWEIGHT,WTPCTAG,BMI,BMIPCTAG),test)
test<-full_join(pe %>% select(CASEID,VISIT,PEPRBLPM,PEPRWEIT),test)
test<-full_join(gfr %>% select(CASEID,VISIT,MALE1FE0,SCR,BUN,CYC_DB,BEDGFR, BSA),test)
test<-full_join(advr %>% select(CASEID,VISIT,AENSHEAD,AENSDIZZ,AENSLWBP),test)
test<-full_join(gh %>% select(CASEID,VISIT,GHGENDER,GHWKSBDD,GHPREMIE),test)
test<-full_join(kidhist %>% select(CASEID,DOB,BSDATE, CKDONST,PRIMDX,GNGDIAG),test)
test<-full_join(l05 %>% select(CASEID,VISIT,RLSERCRE,RLURPROT,RLURMALB,RLURCREA),test)
test<-full_join(echo %>% select(CASEID,VISIT,LVHF,LVHE,LVMI,ECHODATEY,CAUTION),test)
test<-full_join(cardio %>% select(CASEID,VISIT,DB_DATE,SBP,SBPINDXAGH,SBPPCTAGH,SBPZAGH,DBP,DBPINDXAGH,DBPPCTAGH,DBPZAGH,ABPM_DATE,ABPMSUCCESS,SHYPAGH,DHYPAGH),test)
# add new columns for age, Upc, Ualbumin%
# due to DOB provided in integer form, the true age may be off by up to 1 year (eg, born 1/1/1994 = DOB 1994, baseline date 12/31/2000 = BSDATE 2000, calculated age = 6yrs, true age=7yrs)
test<-test %>% mutate(age=(BSDATE-DOB)+DB_DATE,Upc=RLURPROT/RLURCREA, Ualb_pct=RLURMALB/RLURPROT)
# gender: since some values of MALE1FE0 are missing, will solve missing values by taking the mean of MALE1FE0 for each case (no change in gender over time)
test<-test %>% group_by(CASEID) %>% mutate(MALE1FE0=mean(MALE1FE0, na.rm=TRUE))
source('gfr.R')
# gfr will be based on ckidfull equation if the data is available, otherwise bedside schartz equation is used
test$gfr<-mapply(GFR,test$AVHEIGHT,test$SCR,test$CYC_DB,test$BUN,test$MALE1FE0, permissive=TRUE)
# add a column for CKD stage using ckidfull
test$CKD_stage <- cut(test$gfr,
breaks = c(Inf, 90, 60, 30, 15, -Inf),
labels=c(1:5),
ordered_result=TRUE,
right = T)
# create categories for proteinuria based on cutoffs noted
test$Upc.factor<-cut(test$Upc,
breaks=c(-Inf,0.5,1,2,Inf),
labels=c("normal","mild","moderate","severe"),
ordered_result = T,
right=F)
# loads the correct BP medication names into BP_medlist
# and corrects BP medication names for brand names and misspellings
# corrected names are assigned a new variable called med.corrected
# display information about timing of echocardiogram data, grouped by visits
# note: BP_medlist will display the misspelled/brand names of BP meds, frequency at visit 10?, and corrected names
# categorize the BP meds into groups based on BP_medgroups.csv
load("BP_medlist.RData",verbose = TRUE)
BP_medgroups <- read.csv("BP_medgroups.csv")
medsum_full$med.corrected<-BP_medlist$Corrected.Name[match(medsum_full$MSMEDICA,BP_medlist$Var1)]
medsum_full$BPmedgroup<-BP_medgroups$bp_group[match(medsum_full$med.corrected,BP_medgroups$x)]
# remove all unintelligible medications from the data (may be viewed using BP_medlist[which(BP_medlist$Corrected.Name=="*invalid*"),c(2:3)])
medsum_full<-medsum_full %>% filter(med.corrected!="*invalid*")
# combine split dosing medications (eg, some patients taking different doses of same medication at different times of day, eg, labetalol)
medsum_full<-medsum_full %>% group_by(CASEID,VISIT,MSVISDAT, BPmedgroup,med.corrected) %>% summarise_at(vars(DLYDOSE,DLYFREQ,MSMISS7D), sum) %>% ungroup
# determine compliance as percent of doses taken, based on reported missed doses per week and daily frequency of medication
medcompliance<-function(miss7d,dlyfreq) {
if (miss7d<0) miss7d<-0
if (dlyfreq<0) dlyfreq<-NA
return((dlyfreq*7-miss7d)/(dlyfreq*7))
}
medsum_full$compliance<-mapply(medcompliance,medsum_full$MSMISS7D,medsum_full$DLYFREQ)
medsum_full<-medsum_full %>% group_by(CASEID,VISIT) %>% mutate(mean_compliance=mean(compliance,na.rm=TRUE)) %>% ungroup
medsum_full<-left_join(test %>% select(CASEID,VISIT,AVWEIGHT,age,gfr),medsum_full)
medsum_full<-medsum_full %>% mutate(std_dose=ifelse(DLYDOSE>0,DLYDOSE/AVWEIGHT,NA))
source("DDI.R")
medsum_full$ddi<-mapply(DDI,medsum_full$DLYDOSE,medsum_full$med.corrected,medsum_full$age,medsum_full$AVWEIGHT,medsum_full$gfr)
# identify duplicates (cases with visit codes that have coresponding visit dates that are not the same) and eliminate all the corresponding medication data
# justifications 1) unable to determine whether duplicate visits represent 'addition' of medications or 'switching' between meds
# 2) some cases have same visit code and corresponding visit dates separated in time by up to 4.28 years (obviously an error in recording data)
medsum_full<-medsum_full %>% group_by(CASEID,VISIT) %>% mutate(ndup=n_distinct(MSVISDAT))
medsum_full<-medsum_full %>%filter(ndup==1) # remove all duplicates from the data
medsum_full<-medsum_full %>% group_by(CASEID,VISIT) %>% mutate(n_agents=n_distinct(med.corrected, na.rm=TRUE))
medsum_full<-medsum_full %>% group_by(CASEID,VISIT) %>% mutate(sum_DDI=sum(ddi))
medsum_full.old<-medsum_full
medsum_full<-medsum_full %>% ungroup %>% nest(.,BPmedgroup,DLYFREQ,DLYDOSE,std_dose,ddi,compliance, .key="rx_info")
medsum_full<-dcast(medsum_full,CASEID+VISIT+MSVISDAT+n_agents+mean_compliance+sum_DDI~med.corrected, value.var="rx_info")
test<-full_join(medsum_full,test)
# replace all NULL values of drugs with a tibble containing NA's using change_null_to_list function above
# note: the line below takes approx 5 min to complete, so will save results to medsum_full4.RData file and read it into memory
# the line will be commented to save time
change_null_to_list <- function(x) {
# If x is a data frame, do nothing and return x
# Otherwise, return a data frame with 1 row of NAs
if (!is.null(x)) {return(x)}
else {return(as_tibble(t(c(BPmedgroup=as.factor(NA), DLYFREQ=as.numeric(NA), DLYDOSE=as.numeric(NA),std_dose=as.numeric(NA),ddi=as.numeric(NA),compliance=as.numeric(NA)))))}
}
for (i in which(names(test)=="ACETAZOLAMIDE"):which(names(test)=="VERAPAMIL")) {test[[names(test)[i]]]<-lapply(test[[names(test)[i]]],change_null_to_list)}
# parse drug info stored in test as separate variables for analysis
source("get_drug_info.R")
drugname<-names(test)[which(names(test)=="ACETAZOLAMIDE"):which(names(test)=="VERAPAMIL")]
# exclude combo drugs (contain '/', eg "AMLODIPINE/HCTZ")
drugname<-drugname[-grep("\\/",drugname)]
temp <- paste(drugname[1:43], "<- get_drug_info(test,'",drugname,"', c(1:6))", sep="")
eval(parse(text=temp))
temp.1<-paste("test$",drugname[1:43],"_ddi<-",drugname[1:43],"$ddi", sep="")
temp.2<-paste("test$",drugname[1:43],"_std_dose<-",drugname[1:43],"$std_dose", sep="")
eval(parse(text=temp.1))
eval(parse(text=temp.2))
# add columns for new 2017 AAP BP percentiles and z-scores (this takes about 2 minutes to calculate)
source('bpp4.R')
source('BPz/bpzv2.R')
source('BPstatus4th.R')
source('BPstatus2017.R')
source('bpclass.R')
source('bpclass2.R')
test$SBPPCTAGH2017<-mapply(bpp,test$age,test$AVHEIGHT,test$MALE1FE0,1,test$SBP, z=F)
test$DBPPCTAGH2017<-mapply(bpp,test$age,test$AVHEIGHT,test$MALE1FE0,2,test$DBP, z=F)
test$SBPZAGH2017<-mapply(bpp,test$age,test$AVHEIGHT,test$MALE1FE0,1,test$SBP, z=T)
test$DBPZAGH2017<-mapply(bpp,test$age,test$AVHEIGHT,test$MALE1FE0,2,test$DBP, z=T)
# classification of BP status based on 4th report and 2017 guidelines
# note: BP status is NA when BP was unavailable
test$BPstatus2017<-mapply(BPstatus2017,test$SBP,test$DBP,test$age,test$MALE1FE0,test$AVHEIGHT)
# fill in missing height percentiles to reduce number of missing BP status
missing_htpct<-which(is.na(test$HTPCTAG) & !is.na(test$AVHEIGHT))
test$HTPCTAG[missing_htpct]<- mapply(htz,test$AVHEIGHT[missing_htpct], test$age[missing_htpct], test$MALE1FE0[missing_htpct],p=T)
test$BPstatus4th<-mapply(bpstatus4th,test$SBP,test$DBP,test$age,test$MALE1FE0,test$HTPCTAG,test$SBPPCTAGH,test$DBPPCTAGH)
# cardio data organization
# analysis of BP status
# based on 2009 AHA consensus guidelines (old)
# BP status is coded as a variable called BPstatus in cardio
# 0 = normal
# 1 = White coat hypertension (WCH)
# 2 = masked hyeprtension (MH)
# 3 = ambulatory hypertension (AH)
# noctHTN variable: BP% dipping <10%
cardio<-cardio %>%rowwise()%>% mutate(noctHTN=sum(SYSPCTDIPPING<10,DIAPCTDIPPING<10))
cardio<-left_join(cardio,test %>% select(CASEID,VISIT,SBPPCTAGH2017,DBPPCTAGH2017, age,MALE1FE0,AVHEIGHT))
cardio<-cardio %>% rowwise() %>% mutate(BPclass= bpclass2(WKSYSINDX,WKDIAINDX,SLSYSINDX,SLDIAINDX,WKSYSLOAD,WKDIALOAD,SLSYSLOAD, SLDIALOAD, SBP, DBP, SBPPCTAGH2017,DBPPCTAGH2017))
test<-full_join(cardio %>% select(CASEID,VISIT,ABPMSUCCESS,BPclass,noctHTN),test)
# combine BP class variants into larger groups using BP classification from 2014
test$BPclass.factor<-cut(test$BPclass,0:4,right=FALSE, labels=c("NL","WCH","MH","AH"),ordered_result = TRUE)
test$BPclass.2.factor<-factor(test$BPclass.factor<="WCH", labels=c("NL+WCH","MH+AH"))
# Create variable LVMIp using LVMIp function to calculate LVMI percentiles
test<-test %>% filter(!is.na(LVMI)) %>% rowwise() %>% mutate(LVMIp=LVMIp(MALE1FE0,age,LVMI))
test$BMIz<-qnorm(test$BMIPCTAG/100)
test$GNGDIAG.factor<-factor(test$GNGDIAG<=2,labels=c("glom","non-glom"))
```
# Load data from file
```{r}
load("test3.RData")
```
# Define the study population
The following are inclusion criteria:
```{r}
# define study population
combo_cases<-medsum_full.old %>% filter(VISIT==20,length(grep("\\/",med.corrected)!=0L)) %>% ungroup() %>% select(CASEID) %>% unique() %>% unlist() %>% as.numeric()
cases<-test %>% filter(!CASEID %in% combo_cases,VISIT==20, n_agents>0,!is.na(sum_DDI),ABPMSUCCESS==1,!is.na(BUN), !is.na(CYC_DB),!is.na(SCR),!is.na(AVHEIGHT), !is.na(AVWEIGHT), !is.na(BPstatus2017)) %>% ungroup() %>%select(CASEID) %>% unique %>% unlist %>% as.numeric()# exclude those taking combo medications
test.cohort<-test %>% filter(VISIT==20, CASEID %in% cases)
test.cohort<-test.cohort %>% mutate(sum_DDI.ln=log(sum_DDI))
library(summarytools)
dfSummary(test.cohort %>% select(age,MALE1FE0,CKD_stage,gfr,CKDONST,n_agents,BPclass.factor,SBPZAGH2017,DBPPCTAGH2017, LVMIp,Upc,Upc.factor,BMIz,GNGDIAG,GHTOTINC,RACE,sum_DDI))
```