Choropleth maps with R

Examples of plotting region-level data on country maps using the ggplot2 package and shape files from gadm.org.


Overview

  1. Simulated data for Norwegian communes
  2. Region-level literacy data from Ghanaian census

Start by installing the following packages.

## load packages
install.packages("raster")
install.packages("ggplot2")
install.packages("rgeos")
install.packages("maptools")
install.packages("scales")

1. Simulated data for Norwegian communes

## download shapefile from gadm.org level=1 -> region ; level=2 -> commune
library(raster)
states.shp <- getData("GADM", country = "NOR", level = 2)
class(states.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(states.shp)
##  [1] "OBJECTID"  "ID_0"      "ISO"       "NAME_0"    "ID_1"     
##  [6] "NAME_1"    "ID_2"      "NAME_2"    "HASC_2"    "CCN_2"    
## [11] "CCA_2"     "TYPE_2"    "ENGTYPE_2" "NL_NAME_2" "VARNAME_2"
head(states.shp$NAME_2)
## [1] "Aremark"     "Askim"       "Borge"       "Eidsberg"    "Fredrikstad"
## [6] "Halden"
## fortify shape file to dataframe format
library(rgeos)
library(maptools)
library(ggplot2)
states.shp.f <- fortify(states.shp, region = "NAME_2")
class(states.shp.f)
## [1] "data.frame"
head(states.shp.f)
##       long      lat order  hole piece     id    group
## 1 10.44185 63.75298     1 FALSE     1 Åfjord Åfjord.1
## 2 10.44034 63.75742     2 FALSE     1 Åfjord Åfjord.1
## 3 10.43884 63.76186     3 FALSE     1 Åfjord Åfjord.1
## 4 10.43734 63.76630     4 FALSE     1 Åfjord Åfjord.1
## 5 10.43583 63.77074     5 FALSE     1 Åfjord Åfjord.1
## 6 10.43433 63.77519     6 FALSE     1 Åfjord Åfjord.1
## create random data to plot on map
num.states <- length(states.shp$NAME_2)
set.seed(123)
mydata <- data.frame(id = states.shp$NAME_2, prevalence = rnorm(num.states, 
    55, 20))
head(mydata)
##            id prevalence
## 1     Aremark   43.79049
## 2       Askim   50.39645
## 3       Borge   86.17417
## 4    Eidsberg   56.41017
## 5 Fredrikstad   57.58575
## 6      Halden   89.30130
## merge shape file with data
merge.shp.coef <- merge(states.shp.f, mydata, by = "id")
head(merge.shp.coef)
##       id     long      lat order  hole piece    group prevalence
## 1 Åfjord 10.44185 63.75298     1 FALSE     1 Åfjord.1   69.07048
## 2 Åfjord 10.44034 63.75742     2 FALSE     1 Åfjord.1   69.07048
## 3 Åfjord 10.43884 63.76186     3 FALSE     1 Åfjord.1   69.07048
## 4 Åfjord 10.43734 63.76630     4 FALSE     1 Åfjord.1   69.07048
## 5 Åfjord 10.43583 63.77074     5 FALSE     1 Åfjord.1   69.07048
## 6 Åfjord 10.43433 63.77519     6 FALSE     1 Åfjord.1   69.07048
## basic plot
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group, 
    fill = prevalence), color = "black", size = 0.25) + coord_map()

## nicer plot
library(scales)
library(ggmap)
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group, 
    fill = prevalence), color = "black", size = 0.25) + coord_map() + scale_fill_distiller(name = "prevalence", 
    palette = "YlGn", breaks = pretty_breaks(n = 5)) + theme_nothing(legend = TRUE) + 
    labs(title = "Prevalence of X in Norway")

Back to top


2. Region-level literacy data from Ghanaian census

Now, let us try to use the above code snippets to produce regional-level plots for Ghana based on the 2010 census. Take the literacy rate as an example.

## read individual-level literacy data from 2010 census
load(url("http://klein.uk/R/Viz/litGH.RData"))

## drop population below 11 age
litGH <- litGH[litGH$LITERACY != "Less than 11", ]

## define binary literacy variable
litGH$literacy <- ifelse(litGH$LITERACY == "None (Not literate)", 0, 1)
litGH$id <- litGH$REGION

## obtain region-level literacy rates
(litGH <- aggregate(formula = literacy ~ id, data = litGH, FUN = mean))
##               id  literacy
## 1        Western 0.7646089
## 2        Central 0.7809877
## 3  Greater Accra 0.8930270
## 4          Volta 0.7350536
## 5        Eastern 0.8113143
## 6        Ashanti 0.8276943
## 7    Brong Ahafo 0.6979829
## 8       Northern 0.3715225
## 9     Upper East 0.4757000
## 10    Upper West 0.4646903
mydata <- litGH

Start over from 1. to download the shapefiles for Ghana (ISO code 'GHA') and produce the choropleth maps. Note: make sure to use regional levels, i.e. level = 1 and NAME_1, instead of the commune-level variables in the Norwegian data above.

## download shapefile from gadm.org level=1 -> region ; level=2 -> commune
library(raster)
states.shp <- getData("GADM", country = "GHA", level = 1)
class(states.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(states.shp)
##  [1] "OBJECTID"  "ID_0"      "ISO"       "NAME_0"    "ID_1"     
##  [6] "NAME_1"    "HASC_1"    "CCN_1"     "CCA_1"     "TYPE_1"   
## [11] "ENGTYPE_1" "NL_NAME_1" "VARNAME_1"
states.shp$NAME_1
##  [1] "Ashanti"       "Brong Ahafo"   "Central"       "Eastern"      
##  [5] "Greater Accra" "Northern"      "Upper East"    "Upper West"   
##  [9] "Volta"         "Western"
## fortify shape file to dataframe format
library(rgeos)
library(maptools)
library(ggplot2)
states.shp.f <- fortify(states.shp, region = "NAME_1")
class(states.shp.f)
## [1] "data.frame"
head(states.shp.f)
##        long      lat order  hole piece      id     group
## 1 -1.260922 7.456426     1 FALSE     1 Ashanti Ashanti.1
## 2 -1.253900 7.456198     2 FALSE     1 Ashanti Ashanti.1
## 3 -1.236103 7.456228     3 FALSE     1 Ashanti Ashanti.1
## 4 -1.229099 7.456490     4 FALSE     1 Ashanti Ashanti.1
## 5 -1.225717 7.456898     5 FALSE     1 Ashanti Ashanti.1
## 6 -1.222512 7.457714     6 FALSE     1 Ashanti Ashanti.1
## merge shape file with data
merge.shp.coef <- merge(states.shp.f, mydata, by = "id")
head(merge.shp.coef)
##        id      long      lat order  hole piece     group  literacy
## 1 Ashanti -1.260922 7.456426     1 FALSE     1 Ashanti.1 0.8276943
## 2 Ashanti -1.253900 7.456198     2 FALSE     1 Ashanti.1 0.8276943
## 3 Ashanti -1.236103 7.456228     3 FALSE     1 Ashanti.1 0.8276943
## 4 Ashanti -1.229099 7.456490     4 FALSE     1 Ashanti.1 0.8276943
## 5 Ashanti -1.225717 7.456898     5 FALSE     1 Ashanti.1 0.8276943
## 6 Ashanti -1.222512 7.457714     6 FALSE     1 Ashanti.1 0.8276943
## basic plot
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group, 
    fill = literacy), color = "black", size = 0.25) + coord_map()

Back to top