Time Series Analysis is a statistical modeling and forcasting approach to understand a patten of an event observed over a period of time. The approach has a wide variety of applications ranging from predicting stock market prices to modeling Climate Change and in this introductory tutorial, we will analyse data on conflicts that occured in Africa, The Middle East and South East Asia.
The data we will use for this tutorial is collected from the Armed Conflict Location & Event Data Project (ACLED). The data contains more than 450,000 conflict related incidents that occured in the last 20 years. For a closer look, let’s bring in the data and do basic exploratory analysis.
The main R libraries we will use are tseries, forecast and ggplot2. Let’s go ahead and load them first.
library(anytime)
library(ggplot2)
library(ggmap)
library(dplyr)
library(lubridate)
library(rgdal)
library(gganimate)
library(plyr)
library(dplyr)
library(lubridate)
library(wesanderson)
library(ggthemes)
library(grid)
library(gridExtra)
library(plotly)
library(ggplotify)
You can also embed plots, for example:
cf = read.csv("conflicts_in_africa.csv", header = T, sep =",")
names(cf)
[1] "field_1" "data_id" "iso"
[4] "event_id_cnty" "event_id_no_cnty" "event_date"
[7] "year" "time_precision" "event_type"
[10] "actor1" "assoc_actor_1" "inter1"
[13] "interaction" "region" "country"
[16] "admin1" "admin2" "admin3"
[19] "location" "latitude" "longitude"
[22] "geo_precision" "source" "source_scale"
[25] "notes" "fatalities" "timestamp"
[28] "iso3" "date"
In addition to the day and time of incidents, the data contains the geographic location of incidents from sub-continental to local adminstrative level. Let’s take a look at the first 3 rows.
head(cf, 3)
field_1 data_id iso event_id_cnty event_id_no_cnty event_date year
1 364 1592420 204 BEN179 179 26 April 2018 2018
2 365 1592420 204 BEN179 179 26 April 2018 2018
3 626 1592420 204 BEN178 178 25 April 2018 2018
time_precision event_type
1 1 Violence against civilians
2 1 Violence against civilians
3 2 Battle-No change of territory
actor1 assoc_actor_1 inter1 interaction
1 Fulani Ethnic Militia (Benin) 4 47
2 Civilians (Benin) 7 47
3 Fulani Ethnic Militia (Benin) 4 44
region country admin1 admin2 admin3 location latitude longitude
1 Western Africa Benin Plateau Ketou Ketou 7.36332 2.59978
2 Western Africa Benin Plateau Ketou Ketou 7.36332 2.59978
3 Western Africa Benin Plateau Ketou Ketou 7.36332 2.59978
geo_precision source source_scale
1 2 Agence Benin Press National
2 2 Agence Benin Press National
3 2 Agence Benin Press National
notes
1 On 26 April, Fulani herdmen attacked Igboiganan, Ketou, as a reprisal for the 25 April clashes. 5 civilians were killed.
2 On 26 April, Fulani herdmen attacked Igboiganan, Ketou, as a reprisal for the 25 April clashes. 5 civilians were killed.
3 On 25 April, two Fulani herdsmen were killed as a clash erupted with local farmers in Igboiganan, Ketou.
fatalities timestamp iso3 date
1 5 1525110000 BEN 2018-04-26
2 5 1525110000 BEN 2018-04-26
3 2 1525110000 BEN 2018-04-25
Althoug we’re not primarly interested in the brief description of the incidents along with the source of the information, we can see that the data is well organized and compiled for further processing
dim(cf)
[1] 266679 29
# create an index
cf_2<- data.frame(1,cf[,])
# subset data
cf_3<-cf_2[,c(30,8,16,15,10,11,1,27,21,22)]
# Rename
colnames(cf_3)<-c("Date","Year", "Country", "Region",
"Event Type", "Actor", "Incidents",
"Fatalities","Latitude", "Longitude")
# Date Column as Date format
ordered_date<-anytime(as.ordered(cf_3$Date))
cf_3$Date <-as.Date(ordered_date,"%Y %m %d", tz="UTC")
# Aggregate and count
cf_agg_1 <-data.frame(aggregate(Incidents~Country, data=cf_3, FUN=length))
cf_agg_2<-aggregate(Incidents~Date +Year +Region +Country +Fatalities, data = cf_3, FUN=length)
cf_agg_3<- aggregate(Incidents~Longitude +Latitude +Date +Fatalities, data = cf_3, FUN=length)
# National summaries
cf_agg_3_f = filter(cf_3, Fatalities != 0)
cf_agg_4<-aggregate(Fatalities~Country, data=cf_agg_3_f, FUN=sum)
cf_agg_5 <-aggregate(Incidents~Country, data =cf_3, FUN=length)
cf_agg_6 <-join(cf_agg_4, cf_agg_5, by="Country")
# Arrange values
reorder_class <- function(x){
factor(x, levels =names(sort(table(x))))
}
p1<-ggplot()
p1+geom_bar(data= cf_3,
aes(x=reorder_class(Country),
fill=factor(Country)))+
coord_flip()+
theme_hc()+
theme(legend.position = "none")+
labs(title="Number of Incidents",
subtitle ="1997-2018",
y="Incidents",
x= "Country")
p2<-ggplot(data=cf_agg_2,
aes(x=Date,
y=Fatalities,
group=1,
fill=factor(Country)))
p2+geom_line(aes(color=Country, group =1))+
facet_wrap(~Country,ncol=7)+
scale_y_continuous(limits=c(0,100))+
theme_hc()+
theme(legend.position = "none")+
labs(title="Recorded Fatalities",
subtitle="1997-2018",
y="Fatalities",
x="Year" )
p3<-ggplot(data=cf_agg_2,
aes(x=Date,
y= Incidents))
p3+geom_bar(aes(color = Region), stat ='identity')+
theme_hc()+
facet_wrap(~Region,ncol=5)+
scale_y_continuous(limits=c(0,200))+
theme(legend.position = "none")+
labs(title ="Incidents by Sub Continents",
subtitle ="1997-2018",
y="Count",
x ="Year")
p4<- ggplot()
p4+geom_col(data = cf_agg_2,
aes(x=Year,
y=Fatalities,
color = Region,
fill =Region
))+
theme_hc()+
theme(legend.position ="bottom")+
labs(title="Incidents by Sub continents",
subtitle ="1997-2018",
y="Fatalities",
x= "Year")
# Shape file
dsn<-"ne_110_africa_admin_0_countries"
africa<-readOGR(dsn=dsn, layer ="Africa_110m_admin_0_countries", verbose = FALSE)
# point data
map<-ggplot()+borders(africa, fill ="#1c3075",colour="#8ec4e2")+
theme_void()+
theme(legend.position="none",
panel.background = element_rect(fill="#1c3075"),
panel.grid.major = element_line(colour = "#1c3075"),
panel.grid.minor = element_line(colour = "#1c3075"))
p5<-map+geom_point(data = cf_agg_3,
aes(x=Longitude, y = Latitude,
color="#0096ed",
size=Incidents,
group=1,
alpha =0.000001,
stroke =0))+
scale_size_area()
# Heat map
p6<- map+stat_density2d(data = cf_agg_3,
aes(x=Longitude, y = Latitude,
fill = ..level..,
alpha = ..level..),
size = 5,
bins = 250,
geom = "polygon")+
scale_fill_gradient(low = "green",
high = "red",
guide = FALSE)+
scale_alpha(range = c(0, 0.33),
guide = FALSE)
grid.arrange(p5, p6, ncol=2, top = textGrob("Conflicts(1997-2018)", gp=gpar(fontsize=12)))
#geographic location map
p7<-map+geom_point(data = cf_agg_3_f,
aes(x=Longitude, y = Latitude,
color="#0096ed",
size=Fatalities,
group=1,
alpha =0.000001,
stroke =0))+
scale_size_area()
# Heat map
p8<- map+stat_density2d(data = cf_agg_3_f,
aes(x=Longitude, y = Latitude,
fill = ..level..,
alpha = ..level..),
size = 5,
bins = 250,
geom = "polygon")+
scale_fill_gradient(low = "green",
high = "red",
guide = FALSE)+
scale_alpha(range = c(0, 0.33),
guide = FALSE)
grid.arrange(p7, p8, ncol=2, top = textGrob("Fatalities(1997-2018)",gp=gpar(fontsize=12)))
# Group Fatalities by actors
f_by_actor<-aggregate(Fatalities~Actor, data=cf_3, FUN=length)
# Select top 25 death records
top50_actors<- data.frame(f_by_actor[order(-f_by_actor$Fatalities),][1:25,])
# Organize columns
top50_actors$actor1 <-factor(top50_actors$Actor,
levels =unique(as.character(top50_actors$Actor)))
p10<-ggplot(data =top50_actors)
p10+geom_point(aes(y=reorder(Actor, Fatalities),
x=Fatalities,
size =sqrt(Fatalities),
colour=-Fatalities
))+
theme_hc()+
theme(legend.position="none")+
labs(title="Top 25 agents of Fatalities",
subtitle="1997-2018",
y="Groups",
x="Recorded Fatalities")