-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNP-TwoSamplesTests.R
185 lines (155 loc) · 6.31 KB
/
NP-TwoSamplesTests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#############
# KOLMOGOROV - SMIRNOV TWO samples
#############
#' @title Exact p values for Kolmogorov-Smirnov test for compare two samples
#'
#' @description This function compute the exact pvalue for the Kolmogorov-Smirnov test for two samples
#' @param n Size of distribution
#' @param Dn Statistic
#' @return computed pvalue
computeKolmogorovExactProbability <- function(n, Dn){
if(n < 9){
nmDn <- round(n * n * Dn)
pvalue <- KolmogorovTwoSample$distribution[KolmogorovTwoSample$x == n &
KolmogorovTwoSample$y == n &
KolmogorovTwoSample$Dn == nmDn]
pvalue <- ifelse(pvalue == -1, 1, pvalue)
return(pvalue)
}
else
return(1)
}
#' @title Asymptotic p values for Kolmogorov-Smirnov test for compare two samples
#'
#' @description This function compute the asymptotic pvalue for the Kolmogorov-Smirnov test for two samples
#' @param n Size of distribution
#' @param Dn Statistic
#' @return computed pvalue
computeKolmogorovAsymptoticProbability <- function(n, Dn){
if(n < 21){
nmDn <- round(n * n * Dn)
pvalue <- KolmogorovTwoSampleAsymptotic[which(rownames(KolmogorovTwoSampleAsymptotic) == n), 1]
if(pvalue == -1)
return(-1)
condition <- nmDn >= KolmogorovTwoSampleAsymptotic[which(rownames(KolmogorovTwoSampleAsymptotic) == n), 1]
possible.pvalues <- colnames(KolmogorovTwoSampleAsymptotic)[condition]
pvalue <- ifelse(length(possible.pvalues) > 0,
as.numeric(possible.pvalues[length(possible.pvalues)]),
1)
return(pvalue)
}
else{
size <- sqrt(2*n)/n
v <- Dn >= c(1.07,1.22,1.36,1.52,1.63)/size
if(length(v) > 0){
pvalue <- colnames(KolmogorovTwoSampleAsymptotic)[v[length(v)]]
return(pvalue)
}
else
return(1)
}
}
#' @title Kolmogorov-Smirnov test for compare two samples
#'
#' @export
#' @description This function performs the Kolmogorov-Smirnov test for two samples
#' @param matrix Matrix of data
#' @return A list with pvalues for alternative hypothesis, statistics, method and data name
ksTwoSamples.test <- function(matrix){
if(ncol(matrix) != 2)
stop("Kolmogorov-Smirnov test only can be employed with two samples")
sample1 <- matrix[ ,1]
sample2 <- matrix[ ,2]
combined <- c(sample1, sample2)
n <- length(combined)
# Order data
sample1 <- sample1[order(sample1)]
sample2 <- sample2[order(sample2)]
combined <- combined[order(combined)]
# Distict values
distinct <- length(unique(combined))
increment <- 1 / distinct
combinedCdf <- vector(mode = "numeric", length = n)
for(i in 2:n)
combinedCdf[i] <- ifelse(combined[i] == combined[i-1], combinedCdf[i-1],
combinedCdf[i-1] + increment)
# Sn and Sm arrays
# For each element in the sample vector get the element in the same position in the vector of
# accumulated increments that it is in the combined vector
Sm <- sapply(1:(n/2), function(i) combinedCdf[combined == sample1[i] & 1:n >= i][1])
Sn <- sapply(1:(n/2), function(i) combinedCdf[combined == sample2[i] & 1:n >= i][1])
# Diff
diff <- Sm - Sn
DnPos <- max(diff)
DnNeg <- min(diff)
Dn <- max(abs(c(DnPos, DnNeg)))
pvalue <- c("Exact P-Value (Left tail, Y > X)" = computeKolmogorovExactProbability(n, abs(DnNeg)),
"Exact P-Value (Right tail, Y < X)" = computeKolmogorovExactProbability(n, abs(DnPos)),
"Exact P-Value (Double tail, Y != X)" = computeKolmogorovExactProbability(n, Dn),
"Asymptotic Left Tail" = computeKolmogorovAsymptoticProbability(n, abs(DnNeg)),
"Asymptotic Right Tail" = computeKolmogorovAsymptoticProbability(n, abs(DnPos)),
"Asymptotic Double Tail" = computeKolmogorovAsymptoticProbability(n, Dn))
htest <- list(data.name = deparse(substitute(sequence)),
statistic = c("DnPos" = DnPos, "DnNeg" = DnNeg, "Dn" = Dn),
p.value = pvalue,
method = "Kolmogorov-Smirnov")
return(htest)
}
############
# WALD WOLFOWITZ TEST
############
#' @title Wald-Wolfowitz char vector construction
#'
#' @description Get sequence for Wald-Wolfowitz test
#' @param sample1 First sample
#' @param sample2 Second sample
#' @return Sequence to apply Wald-Wolfowitz test
waldWolfowitzSequence <- function(sample1, sample2){
# Sort
sample1 <- sample1[order(sample1)]
sample2 <- sample2[order(sample2)]
n <- length(sample1) + length(sample2)
combined <- c(sample1, sample2)
combined <- combined[order(combined)]
sequence <- sapply(combined, function(i){
if(i %in% sample1 & !(i %in% sample2))
return(1)
else if(!(i %in% sample1) & i %in% sample2)
return(-1)
else
return(0)
})
for(i in 1:n){
if(sequence[i] == 0)
sequence[i] <- ifelse(sequence[i-1] == 1, -1, 1)
}
return(sequence)
}
#' @title Wald-Wolfowitz test for compare two samples
#'
#' @export
#' @description This function performs the Wald-Wolfowitz test for two samples
#' @param matrix Matrix of data
#' @examples
#' waldWolfowitz.test(results[ ,c(1,2)])
#' @return A list with pvalues for alternative hypothesis, statistics, method and data name
waldWolfowitz.test <- function(matrix){
if(ncol(matrix) != 2)
stop("Wald-Wolfowitz test only can be employed with two samples")
# Remove NA numbers
sample1 <- matrix[!is.na(matrix[ ,1]), 1]
sample2 <- matrix[!is.na(matrix[ ,2]), 2]
sequence <- waldWolfowitzSequence(sample1, sample2)
rle <- rle(sequence)
runs <- length(rle$lengths)
exact.pvalue <- computeNumberOfRunsLeftTailProbability(length(sample1),
length(sample2), runs)
asymptotic.pvalue <- computeTotalNumberOfRunsAsymptoticProbability(length(sample1),
length(sample2),
runs)["Asymptotic Left Tail"]
pvalue <- c("Exact p-value" = exact.pvalue,
"Asymptotic p-value" = asymptotic.pvalue)
htest <- list(data.name = deparse(substitute(sequence)),
statistic = c("R" = runs), p.value = pvalue, method = "Wald-Wolfowitz")
return(htest)
}