Conflicts in Africa - EDA

Introduction

Time Series Analysis is a statistical modeling and forcasting approach to understand a patten of an event observed over a period of time. The approach has a wide variety of applications ranging from predicting stock market prices to modeling Climate Change and in this introductory tutorial, we will analyse data on conflicts that occured in Africa, The Middle East and South East Asia.

The data we will use for this tutorial is collected from the Armed Conflict Location & Event Data Project (ACLED). The data contains more than 450,000 conflict related incidents that occured in the last 20 years. For a closer look, let’s bring in the data and do basic exploratory analysis.

Libraries

The main R libraries we will use are tseries, forecast and ggplot2. Let’s go ahead and load them first.

library(anytime)
library(ggplot2)
library(ggmap)
library(dplyr)
library(lubridate)
library(rgdal)
library(gganimate)
library(plyr)
library(dplyr)
library(lubridate)
library(wesanderson)
library(ggthemes)
library(grid)
library(gridExtra)
library(plotly)
library(ggplotify)

Data

You can also embed plots, for example:

cf = read.csv("conflicts_in_africa.csv", header = T, sep =",")

Basic information about the data

names(cf)
 [1] "field_1"          "data_id"          "iso"
 [4] "event_id_cnty"    "event_id_no_cnty" "event_date"
 [7] "year"             "time_precision"   "event_type"
[10] "actor1"           "assoc_actor_1"    "inter1"
[13] "interaction"      "region"           "country"
[16] "admin1"           "admin2"           "admin3"
[19] "location"         "latitude"         "longitude"
[22] "geo_precision"    "source"           "source_scale"
[25] "notes"            "fatalities"       "timestamp"
[28] "iso3"             "date"            

In addition to the day and time of incidents, the data contains the geographic location of incidents from sub-continental to local adminstrative level. Let’s take a look at the first 3 rows.

head(cf, 3)
  field_1 data_id iso event_id_cnty event_id_no_cnty    event_date year
1     364 1592420 204        BEN179              179 26 April 2018 2018
2     365 1592420 204        BEN179              179 26 April 2018 2018
3     626 1592420 204        BEN178              178 25 April 2018 2018
  time_precision                    event_type
1              1    Violence against civilians
2              1    Violence against civilians
3              2 Battle-No change of territory
                         actor1 assoc_actor_1 inter1 interaction
1 Fulani Ethnic Militia (Benin)                    4          47
2             Civilians (Benin)                    7          47
3 Fulani Ethnic Militia (Benin)                    4          44
          region country  admin1 admin2 admin3 location latitude longitude
1 Western Africa   Benin Plateau  Ketou           Ketou  7.36332   2.59978
2 Western Africa   Benin Plateau  Ketou           Ketou  7.36332   2.59978
3 Western Africa   Benin Plateau  Ketou           Ketou  7.36332   2.59978
  geo_precision             source source_scale
1             2 Agence Benin Press     National
2             2 Agence Benin Press     National
3             2 Agence Benin Press     National
                                                                                                                     notes
1 On 26 April, Fulani herdmen attacked Igboiganan, Ketou, as a reprisal for the 25 April clashes. 5 civilians were killed.
2 On 26 April, Fulani herdmen attacked Igboiganan, Ketou, as a reprisal for the 25 April clashes. 5 civilians were killed.
3                 On 25 April, two Fulani herdsmen were killed as a clash erupted with local farmers in Igboiganan, Ketou.
  fatalities  timestamp iso3       date
1          5 1525110000  BEN 2018-04-26
2          5 1525110000  BEN 2018-04-26
3          2 1525110000  BEN 2018-04-25

Althoug we’re not primarly interested in the brief description of the incidents along with the source of the information, we can see that the data is well organized and compiled for further processing

Dimension of the Data

dim(cf)
[1] 266679     29

Pre-processing

# create an index 
cf_2<- data.frame(1,cf[,])

# subset data
cf_3<-cf_2[,c(30,8,16,15,10,11,1,27,21,22)]

# Rename 
colnames(cf_3)<-c("Date","Year", "Country", "Region",
                  "Event Type", "Actor", "Incidents",
                  "Fatalities","Latitude", "Longitude")

# Date Column as Date format
ordered_date<-anytime(as.ordered(cf_3$Date))
cf_3$Date <-as.Date(ordered_date,"%Y %m %d", tz="UTC")

# Aggregate and count
cf_agg_1 <-data.frame(aggregate(Incidents~Country, data=cf_3, FUN=length))

cf_agg_2<-aggregate(Incidents~Date +Year +Region +Country +Fatalities, data = cf_3, FUN=length)

cf_agg_3<- aggregate(Incidents~Longitude +Latitude +Date +Fatalities, data = cf_3, FUN=length)

# National summaries
cf_agg_3_f = filter(cf_3, Fatalities != 0)

cf_agg_4<-aggregate(Fatalities~Country, data=cf_agg_3_f, FUN=sum)

cf_agg_5 <-aggregate(Incidents~Country, data =cf_3, FUN=length)

cf_agg_6 <-join(cf_agg_4, cf_agg_5, by="Country")

Conflicts by Countries

# Arrange values
reorder_class <- function(x){
  factor(x, levels =names(sort(table(x))))
  }

p1<-ggplot()
p1+geom_bar(data= cf_3,
            aes(x=reorder_class(Country),
                fill=factor(Country)))+
  coord_flip()+
  theme_hc()+
  theme(legend.position = "none")+
  labs(title="Number of Incidents",
       subtitle ="1997-2018",
       y="Incidents",
       x= "Country")

Fatalities by Countries

p2<-ggplot(data=cf_agg_2,
           aes(x=Date,
               y=Fatalities,
               group=1,
               fill=factor(Country)))

p2+geom_line(aes(color=Country, group =1))+
  facet_wrap(~Country,ncol=7)+
  scale_y_continuous(limits=c(0,100))+
  theme_hc()+
  theme(legend.position = "none")+
  labs(title="Recorded Fatalities",
       subtitle="1997-2018",
       y="Fatalities",
       x="Year" )

Incidents by sub-continents

p3<-ggplot(data=cf_agg_2,
           aes(x=Date,
               y= Incidents))

p3+geom_bar(aes(color = Region), stat ='identity')+
  theme_hc()+
  facet_wrap(~Region,ncol=5)+
  scale_y_continuous(limits=c(0,200))+
  theme(legend.position = "none")+
  labs(title ="Incidents by Sub Continents",
       subtitle ="1997-2018",
       y="Count",
       x ="Year")

Fatalities by sub-continents

p4<- ggplot()
p4+geom_col(data = cf_agg_2,
             aes(x=Year,
                 y=Fatalities,
             color = Region,
             fill =Region
             ))+
  theme_hc()+
  theme(legend.position ="bottom")+
  labs(title="Incidents by Sub continents",
       subtitle ="1997-2018",
       y="Fatalities",
       x= "Year")

Incidents Map

# Shape file
dsn<-"ne_110_africa_admin_0_countries"
africa<-readOGR(dsn=dsn, layer ="Africa_110m_admin_0_countries", verbose = FALSE)

# point data
map<-ggplot()+borders(africa, fill ="#1c3075",colour="#8ec4e2")+
  theme_void()+
  theme(legend.position="none",
        panel.background = element_rect(fill="#1c3075"),
        panel.grid.major = element_line(colour = "#1c3075"),
        panel.grid.minor = element_line(colour = "#1c3075"))


p5<-map+geom_point(data = cf_agg_3,
              aes(x=Longitude, y = Latitude,
                  color="#0096ed",
                  size=Incidents,
                  group=1,
                  alpha =0.000001,
                  stroke =0))+
  scale_size_area()

# Heat map  
p6<- map+stat_density2d(data = cf_agg_3,
                  aes(x=Longitude, y = Latitude,
                      fill = ..level..,
                      alpha = ..level..),
                  size = 5,
                  bins = 250,
                  geom = "polygon")+
  scale_fill_gradient(low = "green",
                      high = "red",
                      guide = FALSE)+
  scale_alpha(range = c(0, 0.33),
              guide = FALSE)


grid.arrange(p5, p6, ncol=2, top = textGrob("Conflicts(1997-2018)", gp=gpar(fontsize=12)))

#geographic location map

p7<-map+geom_point(data = cf_agg_3_f,
              aes(x=Longitude, y = Latitude,
                  color="#0096ed",
                  size=Fatalities,
                  group=1,
                  alpha =0.000001,
                  stroke =0))+
  scale_size_area()
# Heat map  
p8<- map+stat_density2d(data = cf_agg_3_f,
                  aes(x=Longitude, y = Latitude,
                      fill = ..level..,
                      alpha = ..level..),
                  size = 5,
                  bins = 250,
                  geom = "polygon")+
  scale_fill_gradient(low = "green",
                      high = "red",
                      guide = FALSE)+
  scale_alpha(range = c(0, 0.33),
              guide = FALSE)

grid.arrange(p7, p8, ncol=2, top = textGrob("Fatalities(1997-2018)",gp=gpar(fontsize=12)))

Top 25 Agents of Fatalities

# Group Fatalities by actors
f_by_actor<-aggregate(Fatalities~Actor, data=cf_3, FUN=length)
# Select top 25 death records
top50_actors<- data.frame(f_by_actor[order(-f_by_actor$Fatalities),][1:25,])

# Organize columns
top50_actors$actor1 <-factor(top50_actors$Actor,
                             levels =unique(as.character(top50_actors$Actor)))

p10<-ggplot(data =top50_actors)
p10+geom_point(aes(y=reorder(Actor, Fatalities),
                  x=Fatalities,
                  size =sqrt(Fatalities),
                  colour=-Fatalities
                  ))+
  theme_hc()+
  theme(legend.position="none")+
  labs(title="Top 25 agents of Fatalities",
         subtitle="1997-2018",
         y="Groups",
         x="Recorded Fatalities")