Now that we have geocoded and visualized where our ANC sites are located, let’s map the HIV prevalence data!
# Set working directory
# Load packages
library(rgdal) # rgdal automatically loads SP package with it
library(prettymapr)
# Options
options(stringsAsFactors = F)
# import our previously geocoded HIV prevalence data
ANC2012_geocoded <- read.csv("ANC2012_geocoded.csv")
ANC_shp <- ANC2012_geocoded
coordinates(ANC_shp) <- ~lon + lat
class(ANC_shp)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
# import our provinces shapefile to map in background
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "zwe_polbnda_adm1_250k_cso.shp", layer: "zwe_polbnda_adm1_250k_cso"
## with 10 features
## It has 5 fields
# Label HIV prevalence
plot(provinces_shp, col = "azure")
plot(ANC_shp, add = T, pch = 15)
text(ANC_shp$lon, ANC_shp$lat - 0.1, labels = ANC_shp$prevalence, cex = 0.8)
Labeling is nice, but it doesn’t really help us visualize the data. Let’s try again.
We’re going to want a few more packages to help us create data categories and assign nice colors to each category.
# Load packages
library(classInt)
library(RColorBrewer)
#check out the ColorBrewer palette options
display.brewer.all()
### Represent HIV prevalence with colored symbols
# first set some parameters
plotvar <- ANC_shp$prevalence #variable to display
ncolors <- 5 #number of colors
pal <- brewer.pal(ncolors,"Reds") #color palette
# 1. Equal-interval class intervals
breaks <- classIntervals(plotvar, ncolors, style="equal") #category breaks
colcode <- findColours(breaks, pal) #assign colors based on category
plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Equal Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
fill = attr(colcode, "palette"), cex = 1, bty = "n")
NOTE: In the legend, square brackets indicate that a number is included in the category, whereas parentheses indicate that a number is not included. For example, in the above map, the first category reads “From 7.1 up to but not including 10.88” and the second category is “From 10.88 up to but not including 14.66”. If a clinic’s HIV prevalence is exactly 10.88, it is included in the second category.
# 2. Quantile class intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Quantile Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
fill = attr(colcode, "palette"), cex = 1, bty = "n")
These two maps show the same data, but first map uses equal intervals (each category is 3.78 pct points) while the second map uses quantiles (each category contains 20% of the data).
A third type of interval classification method is called “Natural Breaks” or “Jenks”. This method seeks to reduce the variance within classes and maximize the variance between classes. Essentially, it tries to break the data into natural groupings.
# 3. Jenks class intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
fill = attr(colcode, "palette"), cex = 1, bty = "n")
Now, we can go one step further and add varying circle sizes to enhance the visualization.
# vary the size of the site symbol
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
circles = ANC_shp$prevalence, bg = colcode, inches = 0.1)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
fill = attr(colcode, "palette"), cex = 1, bty = "n")
# Import the provinces shapefile
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "zwe_polbnda_adm1_250k_cso.shp", layer: "zwe_polbnda_adm1_250k_cso"
## with 10 features
## It has 5 fields
class(provinces_shp) # check the class of data
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(provinces_shp@data) # @data is the data frame embedded in the shapefile
## [1] "data.frame"
provinces_shp@data #view the shapefile's data frame
## PROVINCE PROV_CODE HRName HRPCode HRParent
## 0 Matabeleland North 15 Matabeleland North ZW15 ZW
## 1 Matabeleland South 16 Matabeleland South ZW16 ZW
## 2 Manicaland 11 Manicaland ZW11 ZW
## 3 Mashonaland Central 12 Mashonaland Central ZW12 ZW
## 4 Masvingo 18 Masvingo ZW18 ZW
## 5 Midlands 17 Midlands ZW17 ZW
## 6 Bulawayo 10 Bulawayo ZW10 ZW
## 7 Mashonaland West 14 Mashonaland West ZW14 ZW
## 8 Mashonaland East 13 Mashonaland East ZW13 ZW
## 9 Harare 19 Harare ZW19 ZW
plot(provinces_shp) #map the shapefile
# Import the Census Data CSV spreadsheet
Census2012 <- read.csv("Census2012.csv")
class(Census2012)
## [1] "data.frame"
head(Census2012)
## Province Male_pct Female_pct Population Pop_pct SexRatio CBR CDR RNI IMR MMR Electricity
## 1 Bulawayo 46.4 53.6 653337 5.0 87 27.3 9.3 1.8 46 550 90.9
## 2 Manicaland 47.4 52.6 1752698 13.4 90 33.4 10.3 2.3 69 505 37.2
## 3 Mashonaland Central 49.2 50.8 1152520 8.8 97 34.1 11.4 2.3 77 619 19.9
## 4 Mashonaland East 48.5 51.5 1344955 10.3 94 32.5 11.6 2.1 71 588 24.9
## 5 Mashonaland West 49.8 50.2 1501656 11.5 99 34.0 10.7 2.3 65 536 42.9
## 6 Matabeleland North 48.2 51.8 749017 5.7 93 27.5 10.0 1.8 54 578 22.6
We’ll use the inner_join()
function from the dplyr
package to join the data.
# Load dplyr package to get the inner_join function
library(dplyr)
### Merge the Census data with the province shapefile
# First, make sure the column we'll use to merge is the same
# rename the 1st column, currently province, to PROVINCE as in the shp
colnames(Census2012)[1] <- "PROVINCE"
head(Census2012)
## PROVINCE Male_pct Female_pct Population Pop_pct SexRatio CBR CDR RNI IMR MMR Electricity
## 1 Bulawayo 46.4 53.6 653337 5.0 87 27.3 9.3 1.8 46 550 90.9
## 2 Manicaland 47.4 52.6 1752698 13.4 90 33.4 10.3 2.3 69 505 37.2
## 3 Mashonaland Central 49.2 50.8 1152520 8.8 97 34.1 11.4 2.3 77 619 19.9
## 4 Mashonaland East 48.5 51.5 1344955 10.3 94 32.5 11.6 2.1 71 588 24.9
## 5 Mashonaland West 49.8 50.2 1501656 11.5 99 34.0 10.7 2.3 65 536 42.9
## 6 Matabeleland North 48.2 51.8 749017 5.7 93 27.5 10.0 1.8 54 578 22.6
# check that province names match
cbind(sort(provinces_shp@data$PROVINCE), sort(Census2012$PROVINCE))
## [,1] [,2]
## [1,] "Bulawayo" "Bulawayo"
## [2,] "Harare" "Harare"
## [3,] "Manicaland" "Manicaland"
## [4,] "Mashonaland Central" "Mashonaland Central"
## [5,] "Mashonaland East" "Mashonaland East"
## [6,] "Mashonaland West" "Mashonaland West"
## [7,] "Masvingo" "Masvingo"
## [8,] "Matabeleland North" "Matabeleland North"
## [9,] "Matabeleland South" "Matabeleland South"
## [10,] "Midlands" "Midlands"
cbind(table(provinces_shp@data$PROVINCE), table(Census2012$PROVINCE))
## [,1] [,2]
## Bulawayo 1 1
## Harare 1 1
## Manicaland 1 1
## Mashonaland Central 1 1
## Mashonaland East 1 1
## Mashonaland West 1 1
## Masvingo 1 1
## Matabeleland North 1 1
## Matabeleland South 1 1
## Midlands 1 1
# Now join using the PROVINCE variable
provinces_shp@data <- inner_join(provinces_shp@data, Census2012, by="PROVINCE")
provinces_shp@data #check the result
## PROVINCE PROV_CODE HRName HRPCode HRParent Male_pct Female_pct Population Pop_pct SexRatio CBR CDR RNI IMR MMR Electricity
## 1 Matabeleland North 15 Matabeleland North ZW15 ZW 48.2 51.8 749017 5.7 93 27.5 10.0 1.8 54 578 22.6
## 2 Matabeleland South 16 Matabeleland South ZW16 ZW 47.8 52.2 683893 5.2 92 26.8 12.5 1.4 50 677 25.4
## 3 Manicaland 11 Manicaland ZW11 ZW 47.4 52.6 1752698 13.4 90 33.4 10.3 2.3 69 505 37.2
## 4 Mashonaland Central 12 Mashonaland Central ZW12 ZW 49.2 50.8 1152520 8.8 97 34.1 11.4 2.3 77 619 19.9
## 5 Masvingo 18 Masvingo ZW18 ZW 46.5 53.5 1485090 11.4 87 30.9 10.6 2.0 63 568 18.5
## 6 Midlands 17 Midlands ZW17 ZW 48.1 51.9 1614941 12.4 93 31.7 10.1 2.2 69 502 31.7
## 7 Bulawayo 10 Bulawayo ZW10 ZW 46.4 53.6 653337 5.0 87 27.3 9.3 1.8 46 550 90.9
## 8 Mashonaland West 14 Mashonaland West ZW14 ZW 49.8 50.2 1501656 11.5 99 34.0 10.7 2.3 65 536 42.9
## 9 Mashonaland East 13 Mashonaland East ZW13 ZW 48.5 51.5 1344955 10.3 94 32.5 11.6 2.1 71 588 24.9
## 10 Harare 19 Harare ZW19 ZW 48.3 51.7 2123132 16.3 93 33.3 7.7 2.6 53 371 75.8
# Simply label the provinces
plot(provinces_shp, col = "azure", border = "grey")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=as.character(provinces_shp$MMR), cex=0.8)
title("Maternal Mortality Ratio 2012 (per 100,000 live births)")
# Set parameters
plotvar <- provinces_shp$MMR
ncolors <- 4
pal <- brewer.pal(ncolors,"PuRd")
#Set plot window to combine 4 maps (2x2)
par(mfrow=c(2,2))
# 1. Fixed intervals
breaks <- classIntervals(plotvar, ncolors, style="fixed",
fixedBreaks=c(300,400,500,600,700))
colcode <- findColours(breaks, pal)
plot(provinces_shp, col=colcode, border = "grey",
main="Fixed Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)
# 2. Equal intervals
breaks <- classIntervals(plotvar, ncolors, style="equal")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = colcode, border = "grey",
main="Equal Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)
# 3. Quantile intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col=colcode, border = "grey",
main="Quantile Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)
# 4. Jenks intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col=colcode, border = "grey",
main="Natural Intervals (Jenks)")
legend("bottomleft", legend=names(attr(colcode, "table")),
fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)
#Title for combined plot
title("Maternal Mortality Ratio 2012 (per 100,000 live births)",
outer=TRUE, line = -1, cex = 1)
#Return to a single plot format
par(mfrow=c(1,1))
# Prettier quantiles: round quantiles to nearest 5
breaks <- classIntervals(plotvar, ncolors, style="fixed",
fixedBreaks=c(370,515,560,585,680))
colcode <- findColours(breaks, pal)
# Parameters for legend
sym <- c(15,15,15,15,19)
labels <- c(paste0("MMR: ", names(attr(colcode, "table"))),
"ANC HIV Prevalence")
labels
## [1] "MMR: [370,515)" "MMR: [515,560)" "MMR: [560,585)" "MMR: [585,680]" "ANC HIV Prevalence"
colors <- c(attr(colcode, "palette"), "black")
colors
## [1] "#F1EEF6" "#D7B5D8" "#DF65B0" "#CE1256" "black"
#Plot
plot(provinces_shp, col=colcode, border = "grey",
main="Zimbabwe Maternal Health 2012")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
circles = ANC_shp$prevalence, bg = "black", inches = 0.1)
legend("topleft", legend=labels, pch=sym, col=colors,
cex=1, pt.cex=2, bty="y")
addscalebar(style = "ticks")
addnortharrow(cols = c("white", "grey50"), scale = 0.6)