-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathRandom Forest.Rmd
97 lines (79 loc) · 2.25 KB
/
Random Forest.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
---
title: "Random Forest"
output: html_notebook
---
```{r}
# Remove all variables from the R environment to create a fresh start
rm(list=ls())
# Load datasets
train1 <- read.csv("train_dataset01.csv")
train2 <- read.csv("train_dataset02.csv")
test <- read.csv("test_dataset.csv")
levels(train2$STATUS_PU3) <- c("False", "True")
levels(train2$STATUS_PU5) <- c("False", "True")
levels(train2$STATUS_PU8) <- c("False", "True")
levels(train2$STATUS_PU9) <- c("False", "True")
levels(test$STATUS_PU8) <- c("False", "True")
levels(test$STATUS_PU9) <- c("False", "True")
```
Split
```{r}
library(caTools)
set.seed(100)
spl <-sample.split(train2$ATT_FLAG, SplitRatio =0.7)
attackTrain <- subset(train2, spl == TRUE)
attackTest <- subset(train2, spl == FALSE)
```
Train and validate (CART)
```{r}
# Random Forest
library(randomForest)
# Build the model
attackTrain$DATETIME <- NULL
model1 <- randomForest(ATT_FLAG~., data=attackTrain)
summary(model1)
varImpPlot(model1)
# Prediction
predict1 <- predict(model1, newdata=attackTest)
cm <- table(predict1, attackTest$ATT_FLAG)
cm
precision <- cm[2,2]/sum(cm[2,])
recall <- cm[2,2]/sum(cm[,2])
f1 <- 2 * precision * recall / (precision + recall)
precision
recall
f1
```
Train on whole train2 and predict test
```{r}
# Build the model
train2$DATETIME <- NULL
model2 <- randomForest(ATT_FLAG~., data=train2)
summary(model2)
varImpPlot(model2)
# Prediction
predict2 <- predict(model2, newdata=test)
```
See performance
```{r}
test$ATT_FLAG <- predict2
test.ts <- ts(test)
ignore = c("LEVEL_T5", "FLOW_PU3", "FLOW_PU5", "FLOW_PU9", "STATUS_PU3", "STATUS_PU5", "STATUS_PU8", "STATUS_PU9")
test.small <- test[ , -which(names(test) %in% ignore)]
test.small.ts <- ts(test.small)
for (col in colnames(test.small.ts)) {
if (col != "DATETIME" & col != "ATT_FLAG") {
plot.ts(test.small.ts[,col], ylab=col, col=c("black"))
par(new = TRUE)
plot.ts(test.small.ts[,"ATT_FLAG"], axes=FALSE, bty = "n", xlab = "", ylab = "", col="red")
}
}
# section <- test.small.ts[6000:7000, ]
# for (col in colnames(section)) {
# if (col != "DATETIME" & col != "ATT_FLAG") {
# plot.ts(section[,col], ylab=col, col=c("black"))
# par(new = TRUE)
# plot.ts(section[,"ATT_FLAG"], axes=FALSE, bty = "n", xlab = "", ylab = "", col="red")
# }
# }
```