-
Notifications
You must be signed in to change notification settings - Fork 0
/
GTD.Rmd
724 lines (574 loc) · 36.2 KB
/
GTD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
---
title: "A brief analysis of the Global Terrorism Database"
author: "Julian Hatwell"
date: "April 1st, 2016"
output: html_document
---
```{r prologue, results='hide', echo=FALSE}
knitr::opts_chunk$set(warning = FALSE
, message = FALSE
, echo = FALSE
, results = 'hide'
, fig.show='hide'
)
```
```{r setup}
require(readr)
require(MASS)
require(dplyr)
require(reshape2)
require(ggplot2)
require(vcd)
require(vcdExtra)
require(splines)
require(car)
require(countreg)
intextCols = c("pink3", "darkblue")
yrmnth.grid <- data.frame(nMonthsSinceJan1970 = 1:552)
series_base <- data.frame(n = rep(0,552), nMonthsSinceJan1970 = 1:552)
series_base$yrmnth <- as.character(1:552)
yr <- factor(floor((series_base$nMonthsSinceJan1970-1) / 12 + 1970))
pos.part <- function(x) {
x <- ifelse(x < 0, 0, x)
x
}
```
```{r getDataLocal, cache=TRUE}
gtd <- read_csv("gtd.csv")
```
```{r tidyData}
gtdwe <- gtd[gtd$region_txt == "Western Europe" &
gtd$nkill > 0 & !(is.na(gtd$nkill))
, c("eventid", "iyear", "imonth", "country_txt", "nkill", "gname")]
gtdwe <- within(gtdwe, {
country_txt <- ifelse(country_txt %in% c("Italy", "France"
, "Germany", "United Kingdom", "Spain")
, country_txt, "Other")
imonth[eventid == 197400000001] <- 4 # Red Brigades Genoa 1974 Kidnapping
})
rm("gtd")
```
```{r basePlotData}
gtdfatals <- tapply(gtdwe$nkill, INDEX = list(gtdwe$iyear, gtdwe$country), FUN = sum, na.rm = TRUE)
gtdfatals <- data.frame(year = rownames(gtdfatals), gtdfatals)
gtdfatals <- melt(id.vars = "year"
, variable.name = "country"
, value.name = "fatalities"
, data = gtdfatals)
gtdfatals <- gtdfatals[!is.na(gtdfatals$fatalities),]
gtdfatals <- within(gtdfatals, {
country <- factor(country, levels = c("Other", "United.Kingdom", "Italy", "Spain", "France", "Germany"))
year <- factor(year, levels = 2015:1970)
})
gtdfatals <- rbind(gtdfatals, list(2015, "France", 130))
```
```{r groupsData}
gtdgroups <- tapply(gtdwe$nkill
, INDEX = list(gtdwe$iyear, gtdwe$imonth, gtdwe$gname)
, FUN = sum, na.rm = TRUE)
gtdgroups <- data.frame(ftable(gtdgroups, row.vars = 1:3)) %>%
filter(!(is.na(Freq))) %>%
rename(fatalities = Freq
, tergroup = Var3
, month = Var2) %>%
mutate(year = factor(Var1, levels = 2015:1970))
gtdgroups <- within(gtdgroups, {
levels(tergroup) <- c(levels(tergroup), "Lockerbie"
, "Paris Attacks")
})
gtdgroups <- gtdgroups[, c("year", "month", "tergroup", "fatalities")]
gtdgroups[gtdgroups$year == 1988 &
gtdgroups$tergroup == "Unknown"
, "tergroup"] <- "Lockerbie"
gtdgroups <- rbind(gtdgroups, list(2015, 11, "Paris Attacks", 130))
gtdgroups$tergroup <- make.names(gtdgroups$tergroup)
load("C:\\Dev\\Study\\R\\R_Basics\\tervars.RData")
#load("D:\\github\\R_Basics\\tervars.RData")
tervars <- tervars %>% mutate(tergroup = make.names(tergroup))
gtdgroups <- left_join(gtdgroups, tervars) %>%
mutate(tergroup = factor(tergroup)
, International.Extremism = factor(intext, labels = c("No", "Yes")))
```
```{r numberOfIncidentsData}
gtdgroups <- within(gtdgroups, {
yr <- as.numeric(as.character(year)) - 1970
Year <- as.numeric(as.character(year))
mnth <- as.numeric(as.vector(month))
yrmnth <- yr * 12 + mnth
ieCols <- ifelse(International.Extremism == "No"
, intextCols[1], intextCols[2])
})
nincm <- tapply(gtdgroups$International.Extremism, gtdgroups$yrmnth, function(x)
{sum(1 * (x == "No")) })
nincm <- cbind(series_base, nincm = as.vector(nincm[series_base$yrmnth]))
nincm$nIncidents <- nincm$n + ifelse(!(is.na(nincm$nincm)), nincm$nincm, 0)
nincm$year <- yr
c(mean = mean(nincm$nIncidents), var = var(nincm$nIncidents),
ratio = var(nincm$nIncidents) / mean(nincm$nIncidents))
yincm <- tapply(gtdgroups$International.Extremism, gtdgroups$yrmnth, function(x)
{sum(1 * (x == "Yes")) })
yincm <- cbind(series_base, yincm = as.vector(yincm[series_base$yrmnth]))
yincm$nIncidents <- yincm$n + ifelse(!(is.na(yincm$yincm)), yincm$yincm, 0)
yincm$year <- yr
c(mean = mean(yincm$nIncidents), var = var(yincm$nIncidents),
ratio = var(yincm$nIncidents) / mean(yincm$nIncidents))
```
```{r fatalitiesData}
fatm <- t(tapply(gtdgroups$fatalities, INDEX = list(gtdgroups$International.Extremism, gtdgroups$yrmnth), FUN = sum))
nfatm <- fatm[, "No"]
nfatm <- cbind(series_base, nfatm = as.vector(nfatm[series_base$yrmnth]))
nfatm$fatalities <- nfatm$n + ifelse(!(is.na(nfatm$nfatm)), nfatm$nfatm, 0)
nfatm$year <- yr
yfatm <- fatm[, "Yes"]
yfatm <- cbind(series_base, yfatm = as.vector(yfatm[series_base$yrmnth]))
yfatm$fatalities <- yfatm$n + ifelse(!(is.na(yfatm$yfatm)), yfatm$yfatm, 0)
yfatm$year <- yr
```
```{r nfatPoisFitBad}
nfatc.pois <- goodfit(table(nfatm$fatalities), type = "poisson")
sum.nfatc.pois <- summary(nfatc.pois)
unlist(nfatc.pois$par)
plot(nfatc.pois, shade = TRUE
, main = "poisson model"
, xlab = "Number of incidents\nin a given month")
distplot(table(log(nfatm$fatalities+1)), lambda = unlist(nfatc.pois$par)[1]
, xlab = "Number of incidents\nin a given month")
```
```{r nfatPoisFit}
nfatc.pois <- goodfit(table(log(nfatm$fatalities+1)), type = "poisson")
sum.nfatc.pois <- summary(nfatc.pois)
unlist(nfatc.pois$par)
plot(nfatc.pois, shade = TRUE
, main = "poisson model"
, xlab = "log(fatalities) in a given month")
distplot(table(log(nfatm$fatalities+1)), lambda = unlist(nfatc.pois$par)[1]
, xlab = "log(fatalities) in a given month")
```
```{r nfatNbinFit}
nfatc.nbin <- goodfit(table(log(nfatm$fatalities+1)), type = "nbinom")
sum.nfatc.nbin <- summary(nfatc.nbin)
unlist(nfatc.nbin$par)
plot(nfatc.nbin, shade = TRUE
, main = "negative binomial model"
, xlab = "log(fatalities) in a given month")
distplot(table(log(nfatm$fatalities+1)), type = "nbinomial", size = unlist(nfatc.nbin$par)[1]
, xlab = "log(fatalities) in a given month")
```
```{r yfatPoisFit}
yfatc.pois <- goodfit(table(log(yfatm$fatalities+1)), type = "poisson")
sum.yfatc.pois <- summary(yfatc.pois)
unlist(yfatc.pois$par)
plot(yfatc.pois, shade = TRUE
, main = "poisson model"
, xlab = "log(fatalities) in a given month")
distplot(table(log(yfatm$fatalities+1)), lambda = unlist(yfatc.pois$par)[1]
, xlab = "log(fatalities) in a given month")
```
```{r yfatNbinFit}
yfatc.nbin <- goodfit(table(log(yfatm$fatalities+1)), type = "nbinom")
sum.yfatc.nbin <- summary(yfatc.nbin)
unlist(yfatc.nbin$par)
plot(yfatc.nbin, shade = TRUE
, main = "negative binomial model"
, xlab = "log(fatalities) in a given month")
distplot(table(log(yfatm$fatalities+1)), type = "nbinomial", size = unlist(yfatc.nbin$par)[1]
, xlab = "log(fatalities) in a given month")
```
```{r nfatNbinomGLM}
gtf.glm <- glm(fatalities~nMonthsSinceJan1970, data = nfatm
, family = negative.binomial(theta = unlist(nfatc.nbin$par)[1], link=log))
gtf.nlm <- glm(fatalities~bs(nMonthsSinceJan1970
, knots = c(60, 150, 240, 420))
, data = nfatm
, family = negative.binomial(theta = unlist(nfatc.nbin$par)[1], link=log))
gtf.hur <- countreg::hurdle(fatalities~nMonthsSinceJan1970
, data = nfatm, dist = "negbin", zero.dist = "negbin")
z.cont <- zeroinfl.control(start = gtf.hur$start)
gtf.znb <- countreg::zeroinfl(fatalities~nMonthsSinceJan1970
, data = nfatm, dist = "negbin", zero.dist = "negbin"
, control = z.cont)
```
```{r yfatNbinomGLM}
gtf.glm2 <- glm(fatalities~nMonthsSinceJan1970, data = yfatm
, family = negative.binomial(theta = unlist(yfatc.nbin$par)[1], link=log))
gtf.nlm2 <- glm(fatalities~ns(nMonthsSinceJan1970, knots = c(150, 300))
, data = yfatm
, family = negative.binomial(theta = unlist(yfatc.nbin$par)[1], link=log))
gtf.hur2 <- countreg::hurdle(fatalities~nMonthsSinceJan1970
, data = yfatm, dist = "negbin", zero.dist = "negbin")
z.cont <- zeroinfl.control(start = gtf.hur2$start)
gtf.znb2 <- countreg::zeroinfl(fatalities~nMonthsSinceJan1970
, data = yfatm, dist = "negbin", zero.dist = "negbin"
, control = z.cont)
```
## Introduction
This post is inspired by a recent [article in the Huffington Post](http://www.huffingtonpost.co.uk/2015/11/28/islamic-state-terrorism-threat_n_8670458.html) which contained a barchart (see appendices) of fatalities caused by terrorism in Western Europe over the years since 1970.
The original source of the data is the [Global Terrorism Database](https://www.start.umd.edu/gtd/), which is freely available to download. It's fairly trivial to reconstruct the graph from this base data, as I show here. The original is in the appendices:
```{r basePlot, fig.show='show'}
gg <- ggplot(data = gtdfatals
, aes(x = year, y = fatalities, fill = country))
gg + geom_bar(stat = "identity", colour = "lightgrey") +
scale_fill_manual(values=c("darkgrey", "steelblue", "darkgreen", "gold", "red", "lightblue")) +
coord_flip() + theme_bw() + scale_x_discrete(drop = FALSE) +
theme(legend.position="top", axis.text.y = element_text(size=6)) +
labs(title = "Victims of Terrorist Attacks in Western Europe", y = "Number of fatalities", x = "Year")
```
On initial visual assessment, the eye is drawn to the mountain of bars representing the 70's and 80's and the relative sparsity from the mid-90's onwards. After 2001 there are only the 4 notable peaks in 2004 (Madrid), 2005 (London), 2011 (Norway) and 2015 (Paris). One can immediately gain a sense that the danger from terrorism from 1970 to the end of the 1990's was much greater than it is now.
In my opinion, the graph is a fairly successful attempt to demonstrate that our current perception and fear of global terrorism is caused by something other than factual measures such as the number of deaths (or number of incidents). Our collective perception may be more a symptom of the tactics of terror or simply the dominant socio-political narrative. This unease we feel causes us to find rationales for ignoring the facts of our recent history and to allow ourselves to believe that no time is as dangerous as now.
My reposting of the original graphic on Facebook drew the attention of one particular friend who asserted that the graph wasn't making a valid comparison to current Islamist terrorism because it "includes Northern Ireland in the 1970s and ETA in the 1980s which is not quite the same."
He went on to say "ETA and IRA conflicts were localised not global, secondly, it could be argued that they were liberation movements in search of a political solution and as a result they had the sympathy of parties on the Left. Corbyn and Livingstone supported Sinn Fein but not Al Qaeda or ISIS."
Essentially these comments serve well to emphasise my point. Can we say that people killed in Ireland by the IRA are less dead than people killed in Paris by Islamic extremists? Or were bombs planted by the IRA less dangerous because Jeremy Corbyn liaised with Gerry Adams when he was otherwise shut out of the political discourse? I think not and neither does the [Global Terrorism Database](https://www.start.umd.edu/gtd/) make any such distinction.
This inspired me to do my own analysis of the [Global Terrorism Database](https://www.start.umd.edu/gtd/) to see what (objective) stories the data has to tell and help to cut through some of the political noise and cognitive bias that surrounds such an emotive topic.
## Exploring the data
To examine more closely the different trends in localised terrorism compared to international extremism it's necessary to determine which are the local and international terror groups in the subset of the data pertaining to Western Europe and incidents with 1 or more fatality.
The [Global Terrorism Database](https://www.start.umd.edu/gtd/) does not make this distinction explicit. I had to go through the list myself and label all the groups that I could be sure about (e.g. Basque separatists and Irish Republicans, compared to Secret Organization of al Qa'ida in Europe). I share my categories in the appendices.
My first question, of all the deaths from terrorism recorded in the database since 1970 (plus 130 from Paris 2015), how many are from International terrorism?
```{r AllDeaths, fig.show='show', fig.height=3}
DeathsAll <- with(gtdgroups, tapply(fatalities, International.Extremism, sum))
barplot(DeathsAll, col = intextCols
, main = "Fatalities from Terror Attacks in Western Europe\n1970-2015"
, xlab = "Action by International Extremists?")
text(c(0.7, 1.9), 5600, DeathsAll, xpd = TRUE)
```
Judging by the numbers here, `r round(DeathsAll[1]/DeathsAll[2],1)` times as many deaths were caused by localised terrorism between 1970 and 2015.
Given many of these localised groups have been much quieter since September 11th bombings of 2001 and we've had the Good Friday Agreement (GFA) working well since December 1999, it might help to look at the balance since the GFA:
```{r PostGFADeats, fig.show='show', fig.height=3}
DeathsGFA <- with(gtdgroups[as.numeric(as.character(gtdgroups$year)) >= 2000,], tapply(fatalities, International.Extremism, sum))
barplot(DeathsGFA, col = intextCols
, main = "Fatalities from Terror Attacks in Western Europe\n2001-2015"
, xlab = "Action by International Extremists?"
, ylim = c(0, max(DeathsAll)))
text(c(0.7, 1.9), 1000, DeathsGFA, xpd = TRUE)
```
The balance has shifted to `r round(DeathsGFA[2]/DeathsGFA[1],1)` times as many deaths from international terrorists as localised terrorism. The international threat must feel more current, but is still altogether out of balance given the totals. Take a look at the proportions:
```{r, mosaicPrePostGFA, fig.show='show', fig.height=4, fig.width=4.5}
gtdgroups <- within(gtdgroups, {
GFA <- factor(ifelse(as.numeric(as.character(year)) >= 2000, "Post", "Pre"), levels = c("Pre", "Post"))
})
DeathsMosaic <- with(gtdgroups, tapply(fatalities, INDEX = list(GFA, International.Extremism), sum))
largs <- list(set_varnames = c(B = "Action by International Extremists",
A = "Relative to GFA")
, cex = 0.9)
mosaic(DeathsMosaic
, gp = gpar(col = intextCols, lwd = c(5,2,2,5), lty = c(1,2,2,1))
, labeling_args = largs
, labeling = labeling_values
, legend = FALSE
, keep_aspect_ratio = FALSE
, main = "Fatalities from terrorist attacks 1970-2015"
, main_gp = gpar(fontsize = 12))
mosaic(round(prop.table(DeathsMosaic),2) * 100
, gp = gpar(col = intextCols, lwd = c(5,2,2,5), lty = c(1,2,2,1))
, labeling_args = largs
, labeling = labeling_values
, legend = FALSE
, keep_aspect_ratio = FALSE
, main = "Fatalities (%) from terrorist attacks 1970-2015"
, main_gp = gpar(fontsize = 12))
```
The dashed outlines show where numbers are lower than expected given the overall proportions. In short, the weight of fatalities from international terrorist attacks falls disproportionately after the GFA. This is no surprise.
What's important is this; Based on an assumption that things are more dangerous now, in the 15 years after the GFA you'd expect to see at least half as many fatalities as in the 30 previous years, a ratio of 2:1 (Pre:Post). The observed ratio is 9:1. In other words, 90% of all these fatalities occurred before the beginning of 2001, the year of the September 11th attacks.
Here's the first graph with the local vs international split highlighted:
```{r intextPlot, fig.show='show'}
gg <- ggplot(data = gtdgroups
, aes(x = year, y = fatalities)) +
scale_fill_manual(values=intextCols)
gg + geom_bar(stat = "identity", aes(fill = International.Extremism)) +
coord_flip() + theme_bw() + scale_x_discrete(drop = FALSE) +
theme(legend.position="top", axis.text.y = element_text(size=6)) +
labs(title = "Victims of Terrorist Attacks in Western Europe", y = "Number of fatalities", x = "Year")
```
One should conclude that those `r DeathsGFA[2]` deaths caused by global terrorists in the years since the GFA are almost entirely accounted for in Madrid and London bombings and the Paris attacks. Clearly these are very deadly incidents. However, it is now easy to pick out some equally large events in the data going back to the 1970's and 1980's. It would be useful see if the lethality of the attacks has changed over time.
```{r AvgDeaths, fig.show='show', fig.height=3}
DeathsAllAve <- with(gtdgroups, tapply(fatalities, International.Extremism, mean))
DeathsGFAAve <- with(gtdgroups[as.numeric(as.character(gtdgroups$year)) >= 2000,], tapply(fatalities, International.Extremism, mean))
barplot(c(DeathsAllAve,DeathsGFAAve), col = intextCols
, main = "Average Fatalities per Terrorist Incident in Western Europe"
, xlab = "Action by International Extremists?")
abline(v = 2.5, lwd = 3)
text(c(0.6, 1.85, 3.1, 4.3)
, 102
, round(c(DeathsAllAve,DeathsGFAAve),0)
, xpd = TRUE)
text(2.4, 40, "All Count\n1970-2015", pos = 2)
text(2.6, 40, "Since GFA\n2001-2015", pos = 4)
```
Looking at average death toll per incident seems to indicate that when they strike, the international terrorists are deadlier. They appear to have become vastly more deadly per incident in the last 15 years. However, this simplistic explanation does not tell the whole story, as we shall see.
The following two graphs restate the same data as counts of numbers of incidents that result in fatalities:
```{r intextCountPlot, fig.show='show'}
gg2 <- ggplot(data = gtdgroups
, aes(x = year)) +
scale_fill_manual(values=intextCols)
gg2 + geom_bar(stat = "count", aes(fill = International.Extremism)) +
scale_x_discrete(drop = FALSE) +
coord_flip() + theme_bw() + theme(legend.position="top"
, axis.text.y = element_text(size=6)) +
labs(title = "Number of terrorist incidents
resulting in fatality in Western Europe"
, y = "Number of incidents", x = "Year")
```
Here, the localised terror groups absolutely dominate the graph. It's also obvious that there was a far greater, more constant and continuous threat in the past. All incidents are rapidly diminishing in number to the present day.
Looking at the international incidents alone, we can see that they follow the same trend, decreasing in number since the 1970's-80's:
```{r extCountPlot, fig.show='show'}
gg3 <- ggplot(data = gtdgroups[gtdgroups$International.Extremism == "Yes",]
, aes(x = year))
gg3 + geom_bar(stat = "count", fill = "darkblue") +
scale_x_discrete(drop = FALSE) +
coord_flip() + theme_bw() + theme(legend.position="top"
, axis.text.y = element_text(size=6)) +
labs(title = "Number of terrorist incidents
committed by international extremists
resulting in fatality in Western Europe"
, y = "Number of incidents", x = "Year")
```
This is the clearest evidence that not only are global terror attacks are much rarer than localised groups over period 1970-2015, they have also become far more rare since the 1990's.
So back to the question of whether the attacks have become deadlier in recent years. If that is the case then surely we do have as much to fear from larger scale attacks, even if they are less frequent.
Firstly, a look at the localised groups:
```{r nfatNbinomGLMplot, fig.show='show', fig.height=4, fig.width=4}
gtf.glm.preds <- predict(gtf.glm, newdata = yrmnth.grid, se = TRUE)
gtf.hur.preds <- data.frame(fit = predict(gtf.znb, newdata = yrmnth.grid, type = "response"))
gtf.hur.preds <- within(gtf.hur.preds, {
ci.upr <- fit + 1.96 * gtf.glm.preds$se.fit
ci.lwr <- fit - 1.96 * gtf.glm.preds$se.fit
})
gtf.hur.preds <- lapply(gtf.hur.preds, pos.part)
gtf.nlm.preds <- predict(gtf.nlm, newdata = yrmnth.grid, type = "response", se = TRUE)
gtf.nlm.preds <- within(gtf.nlm.preds, {
ci.upr <- fit + 1.96 * se.fit
ci.lwr <- fit - 1.96 * se.fit
})
gtf.nlm.preds <- lapply(gtf.nlm.preds, pos.part)
plot(log(fatalities+1)~nMonthsSinceJan1970
, data = nfatm#, xlim = c(0,600)
, xaxt = "n"
, col = intextCols[1]
, pch = 20, cex =0.5
, xlab = "Year"
, ylab = "log(Fatalities) in a given month"
, main = "Fatalities from terrorist actions\n (local, non-international)")
with(gtf.nlm.preds
, polygon(c(1:552,552:1)
, log(c(ci.upr, rev(ci.lwr))+1)
, col=rgb(0,0.8,0.5,0.1), border=NA))
lines(1:552, log(gtf.nlm.preds$fit+1), col = "aquamarine1", lwd = 3)
with(gtf.hur.preds
, polygon(c(1:552,552:1)
, log(c(ci.upr, rev(ci.lwr))+1)
, col=rgb(1,0.3,0.4,0.15), border=NA))
lines(1:552, log(gtf.hur.preds$fit+1), col = intextCols[1], lwd = 3)
axis(side = 1, at = seq(0, 600, 60), labels = seq(0, 600, 60)/12 +1970)
points(c(128,499), log(c(85,77)), pch = c(4,1), col = "red", cex = 1.5)
text(c(128,499), log(c(85,77)), pos = c(4,1), c("Bologna, 1980", "Norway, 2011"), cex = 0.6)
legend("bottomleft"
, inset = c(0.15, 0)
, legend = c("log(fatalities)"
, "zinf fit"
, "basis func")
, cex = 0.6
, col = c(intextCols[1], intextCols[1], "aquamarine2")
, pch = c(19, NA, NA)
, lty = c(NA, 1, 1)
, lwd = c(NA, 2, 2))
plot(log(fatalities)~year, nfatm, col = intextCols[1]
, main = "Distribution of fatalities\n(local, non-international)")
points(42, log(77), col = "red", cex = 1.5)
text(42, log(77), pos = 2, "Norway, 2011", cex = 0.6)
```
The points are aggregated such that August 1980 shows up as 137 fatalities. In fact the most lethal non-International terrorist attack in Western Europe was the ["Strage di Bologna"](https://en.wikipedia.org/wiki/Bologna_massacre) which killed 85, allegedly carried out by the neo-fascist Nuclei Armati Rivoluzionari. A mark has been added to indicate where this would lie on the plot as a standalone point. Note how this compares to the second single most lethal attack, carried out by Anders Breivik in Norway (also indicated). The official explanation is that he was an individual unlikely to have been linked to any group.
For the localised terrorist attacks, the steady downward trend is obvious and this echoes what we know to be true from history; In Western Europe, terrorism from local sources (Nationalism, Republicanism, Revolutionary Communism, Fascism) was very prevalent in the 1970's and 1980's. From this peak it has steadily declined and all but disappeared today.
I continue along the same lines for the international extremist terror threat:
```{r yfatNbinomGLMplot, fig.show='show', fig.height=4, fig.width=4}
gtf.glm.preds <- predict(gtf.glm2, newdata = yrmnth.grid, se = TRUE)
gtf.hur.preds <- data.frame(fit = predict(gtf.znb2, newdata = yrmnth.grid, type = "response"))
gtf.hur.preds <- within(gtf.hur.preds, {
ci.upr <- fit + 1.96 * gtf.glm.preds$se.fit
ci.lwr <- fit - 1.96 * gtf.glm.preds$se.fit
})
gtf.hur.preds <- lapply(gtf.hur.preds, pos.part)
gtf.nlm.preds <- predict(gtf.nlm2, newdata = yrmnth.grid
, type = "response"
, se = TRUE)
gtf.nlm.preds <- within(gtf.nlm.preds, {
ci.upr <- fit + 1.96 * se.fit
ci.lwr <- fit - 1.96 * se.fit
})
gtf.nlm.preds <- lapply(gtf.nlm.preds, pos.part)
plot(log(fatalities)~nMonthsSinceJan1970
, data = yfatm
, xaxt = "n"
, col = intextCols[2]
, pch = 20, cex =0.5
, xlab = "Year"
, ylab = "log(Fatalities) in a given month"
, main = "Fatalities from terrorist actions\n (international in Western Europe)")
axis(side = 1, at = seq(0, 600, 60), labels = seq(0, 600, 60)/12 +1970)
with(gtf.nlm.preds
, polygon(c(1:552,552:1)
, log(c(ci.upr, rev(ci.lwr))+1)
, col=rgb(0,0.8,0.5,0.1), border=NA))
lines(1:552, log(gtf.nlm.preds$fit+1), col = "aquamarine2", lwd = 3)
with(gtf.hur.preds
, polygon(c(1:552,552:1)
, log(c(ci.upr, rev(ci.lwr))+1)
, col=rgb(0.2,0.3,1,0.15), border=NA))
lines(1:552, log(gtf.hur.preds$fit+1), col = intextCols[2], lwd = 3)
legend("bottomright", legend = c("log(fatalities)"
, "zinf fit"
, "nspline")
, inset = c(0.1, 0.4)
, cex = 0.6
, col = c(intextCols[2], intextCols[2], "aquamarine3")
, pch = c(19, NA, NA)
, lty = c(NA, 1, 1)
, lwd = c(NA, 2, 2))
points(c(411,427,551)
, log(c(191, 56, 130))
, col = "red", cex = 1.5)
text(c(411,427,551)
, log(c(191, 56, 130))
, pos = c(2,4,2)
, cex = 0.6
, c("Madrid, 2004"
, "London 2005"
, "Paris 2015"))
points(c(2, 191,57,228)
, log(c(55, 60, 88, 270))
, col = "red", cex = 1.5)
text(c(2, 191,57,228), log(c(55, 60, 88, 270))
, pos = c(4, 4,4,2)
, cex = 0.6
, c("\nSwissair 330"
, "EgyptAir 648"
, "TWA 841"
, "Pan Am 103"))
plot(log(fatalities)~year, yfatm, col = intextCols[2]
, main = "Distribution of fatalities\n(international in Western Europe)")
points(c(35,36,46)
, log(c(191, 56, 130))
, col = "red", cex = 1.5)
text(c(35,36,46)
, log(c(191, 56, 130))
, pos = c(2,4,2)
, cex = 0.6
, c("Madrid, 2004"
, "London 2005"
, "Paris 2015"))
points(c(1,16,5,19)
, log(c(55, 60, 88, 270))
, col = "red", cex = 1.5)
text(c(1,16,5,19), log(c(55, 60, 88, 270))
, pos = c(4, 4,4,2)
, cex = 0.6
, c("\nSwissair 330"
, "EgyptAir 648"
, "TWA 841"
, "Pan Am 103"))
```
Here we see a shallow rise in the in the 1970's to 90's followed by a steady decline to the present day with very low fatality rates.
There is a majority of low level attacks with a small number of very serious incidents involving aeroplane bombings in the 70's and 80's, the public transport suicide bombings in Madrid and London ('04 and '05) and the death cell that attacked Paris last year.
The four catastrophic flight bombings easily rival the death tolls of more recent attacks. So, if the fatalities are trending downwards for both groups, is it reasonable to see the apparent 10-fold increase in the average number of fatalities per incident as a sign of greater danger?
The problem is that it's a somewhat crude measure. It takes no account of all the months where there were no fatal attacks. Consider instead the fatalities per month over the whole period. This acts as a weighting against very rare events:
```{r AvgDeathsMonth, fig.show='show', fig.height=4}
DeathsAveMnthAll <- c("No" = round(mean(nfatm$fatalities,0))
, "Yes" = round(mean(yfatm$fatalities),0))
DeathsAveMnthGFA <- c("No" = round(mean(nfatm$fatalities[nfatm$nMonthsSinceJan1970 > 372],0))
, "Yes" = round(mean(yfatm$fatalities[yfatm$nMonthsSinceJan1970 > 372]),0))
barplot(c(DeathsAveMnthAll,DeathsAveMnthGFA), col = intextCols
, main = "Average Terrorist Fatalities per Month in Western Europe"
, xlab = "Action by International Extremists?")
abline(v = 2.5, lwd = 3)
text(c(0.7, 1.9, 3.1, 4.3)
, 9.5
, round(c(DeathsAveMnthAll,DeathsAveMnthGFA),0)
, xpd = TRUE)
text(2.4, 4, "All Count\n1970-2015", pos = 2)
text(2.6, 4, "Since GFA\n2001-2015", pos = 4)
```
Accounting for all the months when there were no incidents, it's clear that the localised terrorists were the more deadly group in the period 1970-2000, by the sheer number of incidents. Even though international terrorists carried out a number of airplane bombing atrocities, these were less than a handful in two decades.
The threat from international terrorists, if measured in a rate of fatalities over a given time period, has remained constant and low and has never come close to the danger we faced in the period 1970 - 2000 from localised terrorism representing a mix of sectarian, separatist, revolutionary communist and fascist ideologies in different countries at the time.
This analysis deals with the situation in Western Europe and widely available, highly credible data. The only conclusion one can draw from this is that we're much safer now than we were in the period 1970-2000.
Of course, there is no way to count the attacks that have been covertly thwarted before they take place. These represent an unknowable, unrealised danger only hinted at by government and media sources. To consider this at all takes us full circle to the question of why we feel so threatened in the first place.
## Appendices
This was purely a theoretical exercise not intended to be taken as a prediction or forecast. There are many volatile factors that this picture could change dramatically and upredictably at any time. I'm looking at trends over time and not taking any influential factors in to consideration.
The analysis is done. If anyone is interested in the workings, read on.
### Original Chart
The original chart from the [article in the Huffington Post](http://www.huffingtonpost.co.uk/2015/11/28/islamic-state-terrorism-threat_n_8670458.html)
![barchart of fatalities][1]
[1]: GTD_files\\figure-html\\o-TERRORISM-570.jpg "Victims of Terrorist Attacks in Western Europe"
### Data Corrections
After downloading the freely available [Global Terrorism Database](https://www.start.umd.edu/gtd/) I found that it contains data only up to 2014. Huffington Post's statistical consultant probably added 2015-16 from other sources, although they don't tell us that. I've added a value of [130](https://en.wikipedia.org/wiki/November_2015_Paris_attacks#Casualties) fatalities in France for 2015.
There is an entry for the Red Brigades 1974 hostage taking in Genoa with 1 killed which has month set as zero. [I have updated this to April](https://web.stanford.edu/group/mappingmilitants/cgi-bin/groups/view/77)
### Categorisation of the terrorist groups
The catogories applied to the various terror groups. Lockerbie was listed under "Unknown" but was easy to single out and label. Paris Attacks are also individually marked as this point has been added in. Any advice on making it more accurate is most welcome:
```{r cats, results='show'}
cats <- unique(gtdgroups[, c("tergroup", "International.Extremism")])
cats[,1] <- abbreviate(as.character(cats[,1]), minlength = 40)
cats
```
### The plot trend lines
The main trend lines were added using a zero inflated model. Traditional linear models would be dragged downwards on the Y axis by the prevalence of zeros in the data, giving too low an estimate among other undesirable characteristics. The zero inflated model is robust to this type of data.
In the non-international plot, a secondary line is created from a basis function with knots that follow the non-linear ups and downs in historical trends.
To make the fairest possible comparison in the international groups plot, the same basis function above was considered. However, it was found to be far too sensitive to outliers. The model used here is a natural spline with 3 degrees of freedom which is a lot smoother and linear at the ends. Still, it is less stable than the hurdle model because of the spread between all the months with zero fatalities recorded and the number of points with very high recorded fatalities. This is reflected in the very large confidence bands and the uplift at the right extremity where it tries to head towards the influential outliers.
### Finding the best fit
For the scatter plots, finding the right models to add trend lines was not a traditional linear regression task as the data are not iid.
I'm really looking at a time series where it is as certain as can be that near neighbours in the data will be correlated. Whatever are the factors that lead to terrorism one month are probably just as true in the previous and next months, but may vary more from year to year, and most as the decades go by.
There are also many months where the number of incidents, and consequently fatalities, is zero. It's not a smooth distribution.
The challenge was to find a robust smoothing line that reveals trends in the count data where there is serious non-linearity and dramatic zero inflation.
I started with the assumption that number of fatalities in a given month follows standard models for count data, but initially struggled with it as can be seen from the first attempt to perform a poisson goodness of fit:
```{r nfatPoisFitBad, fig.show='show', fig.height=4, fig.width=4}
```
What's happening here? The data are very skewed and my obvious next step is to operate on the log transformed data (+1 for the zeros). When thinking about why that is, it might make more sense to think of the incidents themselves as the count data, while the number of deaths for each incident is really some level of intensity. This might even be considered to be quantitative, and certainly leaves me thinking that I'm actually trying to model a multivariate response of frequency and intensity. That's going to take quite a bit of brain power and will require a lot more explanatory variables to solve it. I put this problem aside for another day. I just need something good enough for a trend line.
I try with the log transform to tame the high skew and dispersion.
```{r nfatPoisFit, fig.show='show', fig.height=4, fig.width=4}
```
Here in the poisson model I can see evidence of zero inflation. Note that the integer valued bars aggregate a number of values by the log transform, and how the previous graph has a very pronounced wave shape starting way below the line at zero and the travelling well above the line for many values.
This means there are more times when zero deaths are counted in a given month than the standard poisson model allows for.
```{r nfatNbinFit, fig.show='show', fig.height=4, fig.width=4}
```
These are a little better with less evidence of zero inflation. However, there's still a real lack of fit at $e^2$ and above. I want to find a model suitable for this situation.
I go through the same process with the fatalities data from international terror incidents.
```{r yfatPoisFit, fig.show='show', fig.height=4, fig.width=4}
```
This is a disasterous fit. There is huge zero inflation compared to what the model expects followed by intense overdispersion at higher values. Of course this stands to reason as there are many months out of the time period when there are no fatal incidents.
```{r yfatNbinFit, fig.show='show', fig.height=4, fig.width=4}
```
Again, the negative binomial is a far better, albeit rather imperfect fit. This can be corrected by means of a zero inflated or hurdle model:
Here is the non-international data fitted with linear negative binomial, hurdle and zero-inflated poisson models:
```{r nfatRootFits, fig.show='show', fig.height=4, fig.width=3}
gtn_hnb <- hurdle(fatalities~nMonthsSinceJan1970
, data = nfatm, dist = "negbin")
gtn_znb <- zeroinfl(fatalities~nMonthsSinceJan1970
, data = nfatm, dist = "negbin")
countreg::rootogram(gtf.glm, max = 50, main = "Negative Binomial"
, xlab = "fatalities (non-international)")
countreg::rootogram(gtn_hnb, max = 50, main = "Hurdle Negative Binomial"
, xlab = "fatalities (non-international)")
countreg::rootogram(gtn_znb, max = 50
, main = "Zero-inflated Negative Binomial"
, xlab = "fatalities (non-international)")
```
Here is the international data fitted with linear negative binomial, hurdle and zero-inflated poisson models:
```{r yfatRootFits, fig.show='show', fig.height=4, fig.width=3}
gty_hnb <- hurdle(fatalities~nMonthsSinceJan1970
, data = yfatm, dist = "negbin")
gty_znb <- zeroinfl(fatalities~nMonthsSinceJan1970
, data = yfatm, dist = "negbin")
countreg::rootogram(gtf.glm2, max = 50, main = "Negative Binomial"
, xlab = "fatalities (international)")
countreg::rootogram(gty_hnb, max = 50, main = "Hurdle Negative Binomial"
, xlab = "fatalities (international)")
countreg::rootogram(gty_znb, max = 50
, main = "Zero-inflated Negative Binomial"
, xlab = "fatalities (international)")
```
There's not much to choose between the latter two in each case. Vuong's non-nested hypothesis test is useful here:
```{r vuongTest, results='show'}
require(pscl)
cat("Generic hurdle and zero infl fits to the data. Non-international, then international")
vuong(gtn_hnb, gtn_znb)
vuong(gty_hnb, gty_znb)
cat("Final hurdle and zero infl fits to the data, using parameters discovered from model fitting process. Non-international, then international")
vuong(gtf.hur, gtf.znb)
vuong(gtf.hur2, gtf.znb2)
```
*Negative values favour model 2, in the sequence. My test for the non-international doesn't give a significant result and for International groups favours model 2, zero inflated rather than hurdle.*