-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathago21_scriptAI.R
135 lines (112 loc) · 7.21 KB
/
ago21_scriptAI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
####################################################################################################
# title : Machine learning exercise for Sentinel-2 data
# purpose : Implementing a machine learning workflow in R
# author : Abdulhakim M. Abdi (Twitter: @HakimAbdi / www.hakimabdi.com)
# input : A multi-temporal raster stack of Sentinel-2 data comprising scenes from four dates
# output : One classified land cover map from each of three machine learning algorithms
# Note 1 : This brief tutorial assumes that you are already well-grounded in R concepts and are
# : familiar with image classification procedure and terminology
# Reference : Please cite Abdi (2020): "Land cover and land use classification performance of machine learning
# : algorithms in a boreal landscape using Sentinel-2 data" in GIScience & Remote Sensing if you find this
# : tutorial useful in a publication.
# Reference URL : https://doi.org/10.1080/15481603.2019.1650447
# Data for Code : http://bit.ly/downloadMLtutorialdata
#####################################################################################################
rm(list = ls(all.names = TRUE)) # will clear all objects, including hidden objects
gc() # free up memory and report memory usage
# load required libraries (Note: if these packages are not installed, then install them first and then load)
#
# library(rgdal)
# library(raster)
# library(caret)
# library(sp)
# library(nnet)
# library(randomForest)
# library(kernlab)
# library(e1071)
# library(pacman)
# rgdal: a comprehansive repository for handling spatial data
# raster: for the manipulation of raster data
# caret: for the machine learning algorithms
# sp: for the manipulation of spatial objects
# nnet: Artificial Neural Network
# randomForest: Random Forest
# kernlab: Support Vector Machines
# e1071: provides miscellaneous functions requiered by the caret package
# install.packages("pacman");
pacman::p_load(rgdal,raster,caret,sp,nnet,randomForest,kernlab,e1071)
# Load the Sentinel-2 stack of the study area
s2data = stack("S2StackSmall.tif")
# Name the layers of the Sentinel-2 stack based on previously saved information
names(s2data) = as.character(read.csv("S2StackSmall_Names.csv")[,1])
# Load the sample data
# Alternatively, you can use the supplied orthophotos to generate a new set of training and validation data
# Your samples layer must have a column for each image in the raster stack, a column for the land cover class that point represents, an X and Y column
# You can create such a sample file using QGIS or another GIS software
samples = read.csv("Samples.csv")
# Split the data frame into 70-30 by class
trainx = list(0)
evalx = list(0)
for (i in 1:8){ # loop through all eight classes
cls = samples[samples$class == i,]
smpl <- floor(0.70 * nrow(cls))
tt <- sample(seq_len(nrow(cls)), size = smpl)
trainx[[i]] <- cls[tt,]
evalx[[i]] <- cls[-tt,]
}
# combine them all into training and evaluation data frames
trn = do.call(rbind, trainx)
eva = do.call(rbind, evalx)
# Set up a resampling method in the model training process
tc <- trainControl(method = "repeatedcv", # repeated cross-validation of the training data
number = 10, # number of folds
repeats = 5, # number of repeats
allowParallel = TRUE, # allow use of multiple cores if specified in training
verboseIter = TRUE) # view the training iterations
# Generate a grid search of candidate hyper-parameter values for inclusion into the models training
# These hyper-parameter values are examples. You will need a more complex tuning process to achieve high accuracies
# For example, you can play around with the parameters to see which combinations gives you the highest accuracy.
nnet.grid = expand.grid(size = seq(from = 2, to = 10, by = 2), # number of neurons units in the hidden layer
decay = seq(from = 0.1, to = 0.5, by = 0.1)) # regularization parameter to avoid over-fitting
rf.grid <- expand.grid(mtry=1:20) # number of variables available for splitting at each tree node
svm.grid <- expand.grid(sigma=seq(from = 0.01, to = 0.10, by = 0.02), # controls for non-linearity in the hyperplane
C=seq(from = 2, to = 10, by = 2)) # controls the influence of each support vector
## Begin training the models. It took my laptop 8 minutes to train all three algorithms
# Train the neural network model
nnet_model <- caret::train(x = trn[,(5:ncol(trn)-1)], y = as.factor(as.integer(as.factor(trn$class))),
method = "nnet", metric="Accuracy", trainControl = tc, tuneGrid = nnet.grid)
# Train the random forest model
rf_model <- caret::train(x = trn[,(5:ncol(trn)-1)], y = as.factor(as.integer(as.factor(trn$class))),
method = "rf", metric="Accuracy", trainControl = tc, tuneGrid = rf.grid)
# Train the support vector machines model
svm_model <- caret::train(x = trn[,(5:ncol(trn)-1)], y = as.factor(as.integer(as.factor(trn$class))),
method = "svmRadialSigma", metric="Accuracy", trainControl = tc, tuneGrid = svm.grid)
## Apply the models to data. It took my laptop 2 minutes to apply all three models
# Apply the neural network model to the Sentinel-2 data.
nnet_prediction = raster::predict(s2data, model=nnet_model)
# Apply the random forest model to the Sentinel-2 data
rf_prediction = raster::predict(s2data, model=rf_model)
# Apply the support vector machines model to the Sentinel-2 data
svm_prediction = raster::predict(s2data, model=svm_model)
# Convert the evaluation data into a spatial object using the X and Y coordinates and extract predicted values
eva.sp = SpatialPointsDataFrame(coords = cbind(eva$x, eva$y), data = eva,
proj4string = crs("+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"))
## Superimpose evaluation points on the predicted classification and extract the values
# neural network
nnet_Eval = extract(nnet_prediction, eva.sp)
# random forest
rf_Eval = extract(rf_prediction, eva.sp)
# support vector machines
svm_Eval = extract((svm_prediction), eva.sp)
# Create an error matrix for each of the classifiers
nnet_errorM = confusionMatrix(as.factor(nnet_Eval),as.factor(eva$class)) # nnet is a poor classifier, so it will not capture all the classes
rf_errorM = confusionMatrix(as.factor(rf_Eval),as.factor(eva$class))
svm_errorM = confusionMatrix(as.factor(svm_Eval),as.factor(eva$class))
# Plot the results next to one another along with the 2018 NMD dataset for comparison
nmd2018 = raster("NMD_S2Small.tif") # load NMD dataset (Nationella Marktaeckedata, Swedish National Land Cover Dataset)
crs(nmd2018) <- crs(nnet_prediction) # Correct the coordinate reference system so it matches with the rest
rstack = stack(nmd2018, nnet_prediction, rf_prediction, svm_prediction) # combine the layers into one stack
names(rstack) = c("NMD 2018", "Single Layer Neural Network", "Random Forest", "Support Vector Machines") # name the stack
plot(rstack) # plot it!
# Congratulations! You conducted your first machine learning classification in R.
# Please cite the paper referred to at the beginning if you use any part of this script in a publication. Thank you! :-)