<< Workshop Home

Visualizing Quantitative Data: Points

Now that we have geocoded and visualized where our ANC sites are located, let’s map the HIV prevalence data!

# Set working directory

# Load packages
library(rgdal) # rgdal automatically loads SP package with it
library(prettymapr)

# Options
options(stringsAsFactors = F)
# import our previously geocoded HIV prevalence data
ANC2012_geocoded <- read.csv("ANC2012_geocoded.csv")
ANC_shp <- ANC2012_geocoded
coordinates(ANC_shp) <- ~lon + lat
class(ANC_shp)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"
# import our provinces shapefile to map in background
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "zwe_polbnda_adm1_250k_cso.shp", layer: "zwe_polbnda_adm1_250k_cso"
## with 10 features
## It has 5 fields

Labeling Points

# Label HIV prevalence
plot(provinces_shp, col = "azure")
plot(ANC_shp, add = T, pch = 15)
text(ANC_shp$lon, ANC_shp$lat - 0.1, labels = ANC_shp$prevalence, cex = 0.8)

Labeling is nice, but it doesn’t really help us visualize the data. Let’s try again.

Varying Colors

We’re going to want a few more packages to help us create data categories and assign nice colors to each category.

# Load packages
library(classInt)
library(RColorBrewer)
#check out the ColorBrewer palette options
display.brewer.all() 

### Represent HIV prevalence with colored symbols

# first set some parameters
plotvar <- ANC_shp$prevalence #variable to display
ncolors <- 5 #number of colors
pal <- brewer.pal(ncolors,"Reds") #color palette

Equal Intervals

# 1. Equal-interval class intervals
breaks <- classIntervals(plotvar, ncolors, style="equal") #category breaks
colcode <- findColours(breaks, pal) #assign colors based on category

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Equal Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

NOTE: In the legend, square brackets indicate that a number is included in the category, whereas parentheses indicate that a number is not included. For example, in the above map, the first category reads “From 7.1 up to but not including 10.88” and the second category is “From 10.88 up to but not including 14.66”. If a clinic’s HIV prevalence is exactly 10.88, it is included in the second category.

Quantile Intervals

# 2. Quantile class intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Quantile Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

These two maps show the same data, but first map uses equal intervals (each category is 3.78 pct points) while the second map uses quantiles (each category contains 20% of the data).

Natural Breaks (Jenks)

A third type of interval classification method is called “Natural Breaks” or “Jenks”. This method seeks to reduce the variance within classes and maximize the variance between classes. Essentially, it tries to break the data into natural groupings.

# 3. Jenks class intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

Varying Symbol Sizes

Now, we can go one step further and add varying circle sizes to enhance the visualization.

# vary the size of the site symbol
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = colcode, inches = 0.1)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

Visualizing Quantitative Data: Polygons

Joining Data to a Shapefile

# Import the provinces shapefile
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "zwe_polbnda_adm1_250k_cso.shp", layer: "zwe_polbnda_adm1_250k_cso"
## with 10 features
## It has 5 fields
class(provinces_shp) # check the class of data
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
class(provinces_shp@data) # @data is the data frame embedded in the shapefile
## [1] "data.frame"
provinces_shp@data #view the shapefile's data frame
##              PROVINCE PROV_CODE              HRName HRPCode HRParent
## 0  Matabeleland North        15  Matabeleland North    ZW15       ZW
## 1  Matabeleland South        16  Matabeleland South    ZW16       ZW
## 2          Manicaland        11          Manicaland    ZW11       ZW
## 3 Mashonaland Central        12 Mashonaland Central    ZW12       ZW
## 4            Masvingo        18            Masvingo    ZW18       ZW
## 5            Midlands        17            Midlands    ZW17       ZW
## 6            Bulawayo        10            Bulawayo    ZW10       ZW
## 7    Mashonaland West        14    Mashonaland West    ZW14       ZW
## 8    Mashonaland East        13    Mashonaland East    ZW13       ZW
## 9              Harare        19              Harare    ZW19       ZW
plot(provinces_shp) #map the shapefile

# Import the Census Data CSV spreadsheet
Census2012 <- read.csv("Census2012.csv")
class(Census2012)
## [1] "data.frame"
head(Census2012)
##              Province Male_pct Female_pct Population Pop_pct SexRatio  CBR  CDR RNI IMR MMR Electricity
## 1            Bulawayo     46.4       53.6     653337     5.0       87 27.3  9.3 1.8  46 550        90.9
## 2          Manicaland     47.4       52.6    1752698    13.4       90 33.4 10.3 2.3  69 505        37.2
## 3 Mashonaland Central     49.2       50.8    1152520     8.8       97 34.1 11.4 2.3  77 619        19.9
## 4    Mashonaland East     48.5       51.5    1344955    10.3       94 32.5 11.6 2.1  71 588        24.9
## 5    Mashonaland West     49.8       50.2    1501656    11.5       99 34.0 10.7 2.3  65 536        42.9
## 6  Matabeleland North     48.2       51.8     749017     5.7       93 27.5 10.0 1.8  54 578        22.6

We’ll use the inner_join() function from the dplyr package to join the data.

# Load dplyr package to get the inner_join function
library(dplyr)

### Merge the Census data with the province shapefile 

# First, make sure the column we'll use to merge is the same
# rename the 1st column, currently province, to PROVINCE as in the shp
colnames(Census2012)[1] <- "PROVINCE"
head(Census2012)
##              PROVINCE Male_pct Female_pct Population Pop_pct SexRatio  CBR  CDR RNI IMR MMR Electricity
## 1            Bulawayo     46.4       53.6     653337     5.0       87 27.3  9.3 1.8  46 550        90.9
## 2          Manicaland     47.4       52.6    1752698    13.4       90 33.4 10.3 2.3  69 505        37.2
## 3 Mashonaland Central     49.2       50.8    1152520     8.8       97 34.1 11.4 2.3  77 619        19.9
## 4    Mashonaland East     48.5       51.5    1344955    10.3       94 32.5 11.6 2.1  71 588        24.9
## 5    Mashonaland West     49.8       50.2    1501656    11.5       99 34.0 10.7 2.3  65 536        42.9
## 6  Matabeleland North     48.2       51.8     749017     5.7       93 27.5 10.0 1.8  54 578        22.6
# check that province names match
cbind(sort(provinces_shp@data$PROVINCE), sort(Census2012$PROVINCE))
##       [,1]                  [,2]                 
##  [1,] "Bulawayo"            "Bulawayo"           
##  [2,] "Harare"              "Harare"             
##  [3,] "Manicaland"          "Manicaland"         
##  [4,] "Mashonaland Central" "Mashonaland Central"
##  [5,] "Mashonaland East"    "Mashonaland East"   
##  [6,] "Mashonaland West"    "Mashonaland West"   
##  [7,] "Masvingo"            "Masvingo"           
##  [8,] "Matabeleland North"  "Matabeleland North" 
##  [9,] "Matabeleland South"  "Matabeleland South" 
## [10,] "Midlands"            "Midlands"
cbind(table(provinces_shp@data$PROVINCE), table(Census2012$PROVINCE))
##                     [,1] [,2]
## Bulawayo               1    1
## Harare                 1    1
## Manicaland             1    1
## Mashonaland Central    1    1
## Mashonaland East       1    1
## Mashonaland West       1    1
## Masvingo               1    1
## Matabeleland North     1    1
## Matabeleland South     1    1
## Midlands               1    1
# Now join using the PROVINCE variable
provinces_shp@data <- inner_join(provinces_shp@data, Census2012, by="PROVINCE")
provinces_shp@data #check the result
##               PROVINCE PROV_CODE              HRName HRPCode HRParent Male_pct Female_pct Population Pop_pct SexRatio  CBR  CDR RNI IMR MMR Electricity
## 1   Matabeleland North        15  Matabeleland North    ZW15       ZW     48.2       51.8     749017     5.7       93 27.5 10.0 1.8  54 578        22.6
## 2   Matabeleland South        16  Matabeleland South    ZW16       ZW     47.8       52.2     683893     5.2       92 26.8 12.5 1.4  50 677        25.4
## 3           Manicaland        11          Manicaland    ZW11       ZW     47.4       52.6    1752698    13.4       90 33.4 10.3 2.3  69 505        37.2
## 4  Mashonaland Central        12 Mashonaland Central    ZW12       ZW     49.2       50.8    1152520     8.8       97 34.1 11.4 2.3  77 619        19.9
## 5             Masvingo        18            Masvingo    ZW18       ZW     46.5       53.5    1485090    11.4       87 30.9 10.6 2.0  63 568        18.5
## 6             Midlands        17            Midlands    ZW17       ZW     48.1       51.9    1614941    12.4       93 31.7 10.1 2.2  69 502        31.7
## 7             Bulawayo        10            Bulawayo    ZW10       ZW     46.4       53.6     653337     5.0       87 27.3  9.3 1.8  46 550        90.9
## 8     Mashonaland West        14    Mashonaland West    ZW14       ZW     49.8       50.2    1501656    11.5       99 34.0 10.7 2.3  65 536        42.9
## 9     Mashonaland East        13    Mashonaland East    ZW13       ZW     48.5       51.5    1344955    10.3       94 32.5 11.6 2.1  71 588        24.9
## 10              Harare        19              Harare    ZW19       ZW     48.3       51.7    2123132    16.3       93 33.3  7.7 2.6  53 371        75.8

Labeling Polygons

# Simply label the provinces
plot(provinces_shp, col = "azure", border = "grey")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=as.character(provinces_shp$MMR), cex=0.8)
title("Maternal Mortality Ratio 2012 (per 100,000 live births)")

Choropleth Maps

# Set parameters
plotvar <- provinces_shp$MMR
ncolors <- 4
pal <- brewer.pal(ncolors,"PuRd")

#Set plot window to combine 4 maps (2x2)
par(mfrow=c(2,2))

# 1. Fixed intervals
breaks <- classIntervals(plotvar, ncolors, style="fixed",
                         fixedBreaks=c(300,400,500,600,700))
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Fixed Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 2. Equal intervals
breaks <- classIntervals(plotvar, ncolors, style="equal")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = colcode, border = "grey", 
     main="Equal Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 3. Quantile intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Quantile Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 4. Jenks intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Natural Intervals (Jenks)")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

#Title for combined plot
title("Maternal Mortality Ratio 2012 (per 100,000 live births)",
       outer=TRUE, line = -1, cex = 1)

#Return to a single plot format
par(mfrow=c(1,1))

Layering Points and Polygons

# Prettier quantiles: round quantiles to nearest 5
breaks <- classIntervals(plotvar, ncolors, style="fixed",
fixedBreaks=c(370,515,560,585,680))
colcode <- findColours(breaks, pal)

# Parameters for legend
sym <- c(15,15,15,15,19)
labels <- c(paste0("MMR: ", names(attr(colcode, "table"))),
                   "ANC HIV Prevalence")
labels
## [1] "MMR: [370,515)"     "MMR: [515,560)"     "MMR: [560,585)"     "MMR: [585,680]"     "ANC HIV Prevalence"
colors <- c(attr(colcode, "palette"), "black")
colors
## [1] "#F1EEF6" "#D7B5D8" "#DF65B0" "#CE1256" "black"
#Plot
plot(provinces_shp, col=colcode, border = "grey",
     main="Zimbabwe Maternal Health 2012")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = "black", inches = 0.1)
legend("topleft", legend=labels, pch=sym, col=colors,
       cex=1, pt.cex=2, bty="y")
addscalebar(style = "ticks")
addnortharrow(cols = c("white", "grey50"), scale = 0.6)

<< Workshop Home