Choropleth maps with R
Examples of plotting region-level data on country maps using the ggplot2 package and shape files from gadm.org.
Overview
Start by installing the following packages.
## load packages
install.packages("raster")
install.packages("ggplot2")
install.packages("rgeos")
install.packages("maptools")
install.packages("scales")
1. Simulated data for Norwegian communes
## download shapefile from gadm.org level=1 -> region ; level=2 -> commune
library(raster)
states.shp <- getData("GADM", country = "NOR", level = 2)
class(states.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(states.shp)
## [1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1"
## [6] "NAME_1" "ID_2" "NAME_2" "HASC_2" "CCN_2"
## [11] "CCA_2" "TYPE_2" "ENGTYPE_2" "NL_NAME_2" "VARNAME_2"
head(states.shp$NAME_2)
## [1] "Aremark" "Askim" "Borge" "Eidsberg" "Fredrikstad"
## [6] "Halden"
## fortify shape file to dataframe format
library(rgeos)
library(maptools)
library(ggplot2)
states.shp.f <- fortify(states.shp, region = "NAME_2")
class(states.shp.f)
## [1] "data.frame"
head(states.shp.f)
## long lat order hole piece id group
## 1 10.44185 63.75298 1 FALSE 1 Åfjord Åfjord.1
## 2 10.44034 63.75742 2 FALSE 1 Åfjord Åfjord.1
## 3 10.43884 63.76186 3 FALSE 1 Åfjord Åfjord.1
## 4 10.43734 63.76630 4 FALSE 1 Åfjord Åfjord.1
## 5 10.43583 63.77074 5 FALSE 1 Åfjord Åfjord.1
## 6 10.43433 63.77519 6 FALSE 1 Åfjord Åfjord.1
## create random data to plot on map
num.states <- length(states.shp$NAME_2)
set.seed(123)
mydata <- data.frame(id = states.shp$NAME_2, prevalence = rnorm(num.states,
55, 20))
head(mydata)
## id prevalence
## 1 Aremark 43.79049
## 2 Askim 50.39645
## 3 Borge 86.17417
## 4 Eidsberg 56.41017
## 5 Fredrikstad 57.58575
## 6 Halden 89.30130
## merge shape file with data
merge.shp.coef <- merge(states.shp.f, mydata, by = "id")
head(merge.shp.coef)
## id long lat order hole piece group prevalence
## 1 Åfjord 10.44185 63.75298 1 FALSE 1 Åfjord.1 69.07048
## 2 Åfjord 10.44034 63.75742 2 FALSE 1 Åfjord.1 69.07048
## 3 Åfjord 10.43884 63.76186 3 FALSE 1 Åfjord.1 69.07048
## 4 Åfjord 10.43734 63.76630 4 FALSE 1 Åfjord.1 69.07048
## 5 Åfjord 10.43583 63.77074 5 FALSE 1 Åfjord.1 69.07048
## 6 Åfjord 10.43433 63.77519 6 FALSE 1 Åfjord.1 69.07048
## basic plot
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group,
fill = prevalence), color = "black", size = 0.25) + coord_map()
## nicer plot
library(scales)
library(ggmap)
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group,
fill = prevalence), color = "black", size = 0.25) + coord_map() + scale_fill_distiller(name = "prevalence",
palette = "YlGn", breaks = pretty_breaks(n = 5)) + theme_nothing(legend = TRUE) +
labs(title = "Prevalence of X in Norway")
2. Region-level literacy data from Ghanaian census
Now, let us try to use the above code snippets to produce regional-level plots for Ghana based on the 2010 census. Take the literacy rate as an example.
## read individual-level literacy data from 2010 census
load(url("http://klein.uk/R/Viz/litGH.RData"))
## drop population below 11 age
litGH <- litGH[litGH$LITERACY != "Less than 11", ]
## define binary literacy variable
litGH$literacy <- ifelse(litGH$LITERACY == "None (Not literate)", 0, 1)
litGH$id <- litGH$REGION
## obtain region-level literacy rates
(litGH <- aggregate(formula = literacy ~ id, data = litGH, FUN = mean))
## id literacy
## 1 Western 0.7646089
## 2 Central 0.7809877
## 3 Greater Accra 0.8930270
## 4 Volta 0.7350536
## 5 Eastern 0.8113143
## 6 Ashanti 0.8276943
## 7 Brong Ahafo 0.6979829
## 8 Northern 0.3715225
## 9 Upper East 0.4757000
## 10 Upper West 0.4646903
mydata <- litGH
Start over from 1. to download the shapefiles for Ghana (ISO code 'GHA'
) and produce the choropleth maps. Note: make sure to use regional levels, i.e. level = 1
and NAME_1
, instead of the commune-level variables in the Norwegian data above.
## download shapefile from gadm.org level=1 -> region ; level=2 -> commune
library(raster)
states.shp <- getData("GADM", country = "GHA", level = 1)
class(states.shp)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
names(states.shp)
## [1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1"
## [6] "NAME_1" "HASC_1" "CCN_1" "CCA_1" "TYPE_1"
## [11] "ENGTYPE_1" "NL_NAME_1" "VARNAME_1"
states.shp$NAME_1
## [1] "Ashanti" "Brong Ahafo" "Central" "Eastern"
## [5] "Greater Accra" "Northern" "Upper East" "Upper West"
## [9] "Volta" "Western"
## fortify shape file to dataframe format
library(rgeos)
library(maptools)
library(ggplot2)
states.shp.f <- fortify(states.shp, region = "NAME_1")
class(states.shp.f)
## [1] "data.frame"
head(states.shp.f)
## long lat order hole piece id group
## 1 -1.260922 7.456426 1 FALSE 1 Ashanti Ashanti.1
## 2 -1.253900 7.456198 2 FALSE 1 Ashanti Ashanti.1
## 3 -1.236103 7.456228 3 FALSE 1 Ashanti Ashanti.1
## 4 -1.229099 7.456490 4 FALSE 1 Ashanti Ashanti.1
## 5 -1.225717 7.456898 5 FALSE 1 Ashanti Ashanti.1
## 6 -1.222512 7.457714 6 FALSE 1 Ashanti Ashanti.1
## merge shape file with data
merge.shp.coef <- merge(states.shp.f, mydata, by = "id")
head(merge.shp.coef)
## id long lat order hole piece group literacy
## 1 Ashanti -1.260922 7.456426 1 FALSE 1 Ashanti.1 0.8276943
## 2 Ashanti -1.253900 7.456198 2 FALSE 1 Ashanti.1 0.8276943
## 3 Ashanti -1.236103 7.456228 3 FALSE 1 Ashanti.1 0.8276943
## 4 Ashanti -1.229099 7.456490 4 FALSE 1 Ashanti.1 0.8276943
## 5 Ashanti -1.225717 7.456898 5 FALSE 1 Ashanti.1 0.8276943
## 6 Ashanti -1.222512 7.457714 6 FALSE 1 Ashanti.1 0.8276943
## basic plot
ggplot() + geom_polygon(data = merge.shp.coef, aes(x = long, y = lat, group = group,
fill = literacy), color = "black", size = 0.25) + coord_map()