<< Workshop Home

Visualizing Quantitative Data: Points

Now that we have geocoded and visualized where our ANC sites are located, let’s map the HIV prevalence data!

# Set working directory

# Load packages
library(rgdal) # rgdal automatically loads SP package with it
library(prettymapr)

# Options
options(stringsAsFactors = F)
# import our previously geocoded HIV prevalence data
ANC2012_geocoded <- read.csv("ANC2012_geocoded.csv")
ANC_shp <- ANC2012_geocoded
coordinates(ANC_shp) <- ~lon + lat
class(ANC_shp)

# import our provinces shapefile to map in background
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")

Labeling Points

# Label HIV prevalence
plot(provinces_shp, col = "azure")
plot(ANC_shp, add = T, pch = 15)
text(ANC_shp$lon, ANC_shp$lat - 0.1, labels = ANC_shp$prevalence, cex = 0.8)

Labeling is nice, but it doesn’t really help us visualize the data. Let’s try again.

Varying Colors

We’re going to want a few more packages to help us create data categories and assign nice colors to each category.

# Load packages
library(classInt)
library(RColorBrewer)
#check out the ColorBrewer palette options
display.brewer.all() 
### Represent HIV prevalence with colored symbols

# first set some parameters
plotvar <- ANC_shp$prevalence #variable to display
ncolors <- 5 #number of colors
pal <- brewer.pal(ncolors,"Reds") #color palette

Equal Intervals

# 1. Equal-interval class intervals
breaks <- classIntervals(plotvar, ncolors, style="equal") #category breaks
colcode <- findColours(breaks, pal) #assign colors based on category

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Equal Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

NOTE: In the legend, square brackets indicate that a number is included in the category, whereas parentheses indicate that a number is not included. For example, in the above map, the first category reads “From 7.1 up to but not including 10.88” and the second category is “From 10.88 up to but not including 14.66”. If a clinic’s HIV prevalence is exactly 10.88, it is included in the second category.

Quantile Intervals

# 2. Quantile class intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Quantile Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

These two maps show the same data, but first map uses equal intervals (each category is 3.78 pct points) while the second map uses quantiles (each category contains 20% of the data).

Natural Breaks (Jenks)

A third type of interval classification method is called “Natural Breaks” or “Jenks”. This method seeks to reduce the variance within classes and maximize the variance between classes. Essentially, it tries to break the data into natural groupings.

# 3. Jenks class intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

Varying Symbol Sizes

Now, we can go one step further and add varying circle sizes to enhance the visualization.

# vary the size of the site symbol
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = colcode, inches = 0.1)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")

Visualizing Quantitative Data: Polygons

Joining Data to a Shapefile

# Import the provinces shapefile
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
class(provinces_shp) # check the class of data
class(provinces_shp@data) # @data is the data frame embedded in the shapefile
provinces_shp@data #view the shapefile's data frame
plot(provinces_shp) #map the shapefile
# Import the Census Data CSV spreadsheet
Census2012 <- read.csv("Census2012.csv")
class(Census2012)
head(Census2012)

We’ll use the inner_join() function from the dplyr package to join the data.

# Load dplyr package to get the inner_join function
library(dplyr)

### Merge the Census data with the province shapefile 

# First, make sure the column we'll use to merge is the same
# rename the 1st column, currently province, to PROVINCE as in the shp
colnames(Census2012)[1] <- "PROVINCE"
head(Census2012)

# check that province names match
cbind(sort(provinces_shp@data$PROVINCE), sort(Census2012$PROVINCE))
cbind(table(provinces_shp@data$PROVINCE), table(Census2012$PROVINCE))

# Now join using the PROVINCE variable
provinces_shp@data <- inner_join(provinces_shp@data, Census2012, by="PROVINCE")
provinces_shp@data #check the result

Labeling Polygons

# Simply label the provinces
plot(provinces_shp, col = "azure", border = "grey")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=as.character(provinces_shp$MMR), cex=0.8)
title("Maternal Mortality Ratio 2012 (per 100,000 live births)")

Choropleth Maps

# Set parameters
plotvar <- provinces_shp$MMR
ncolors <- 4
pal <- brewer.pal(ncolors,"PuRd")

#Set plot window to combine 4 maps (2x2)
par(mfrow=c(2,2))

# 1. Fixed intervals
breaks <- classIntervals(plotvar, ncolors, style="fixed",
                         fixedBreaks=c(300,400,500,600,700))
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Fixed Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 2. Equal intervals
breaks <- classIntervals(plotvar, ncolors, style="equal")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = colcode, border = "grey", 
     main="Equal Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 3. Quantile intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Quantile Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 4. Jenks intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Natural Intervals (Jenks)")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

#Title for combined plot
title("Maternal Mortality Ratio 2012 (per 100,000 live births)",
       outer=TRUE, line = -1, cex = 1)
#Return to a single plot format
par(mfrow=c(1,1))

Layering Points and Polygons


# Prettier quantiles: round quantiles to nearest 5
breaks <- classIntervals(plotvar, ncolors, style="fixed",
fixedBreaks=c(370,515,560,585,680))
colcode <- findColours(breaks, pal)

# Parameters for legend
sym <- c(15,15,15,15,19)
labels <- c(paste0("MMR: ", names(attr(colcode, "table"))),
                   "ANC HIV Prevalence")
labels
colors <- c(attr(colcode, "palette"), "black")
colors

#Plot
plot(provinces_shp, col=colcode, border = "grey",
     main="Zimbabwe Maternal Health 2012")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = "black", inches = 0.1)
legend("topleft", legend=labels, pch=sym, col=colors,
       cex=1, pt.cex=2, bty="y")
addscalebar(style = "ticks")
addnortharrow(cols = c("white", "grey50"), scale = 0.6)

<< Workshop Home

---
title: "Introduction to Mapping Spatial Data in R"
subtitle: "Part 4: Visualizing Quantitative Data"
output: 
  html_document:
    toc: true
    toc_float: 
      collapsed: false
      smooth_scroll: false
    toc_depth: 2
    number_sections: false 
    theme: lumen
    highlight: textmate
  html_notebook:
    toc: true
    toc_float: 
      collapsed: false
      smooth_scroll: false
    toc_depth: 2
    number_sections: false 
    theme: lumen
    highlight: textmate  

---
  
```{css}
code {
  white-space: pre !important;
  overflow-x: scroll !important;
  word-break: keep-all !important;
  word-wrap: initial !important;
}
body{ /* Normal  */
      font-size: 16px;
  }

```

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
options(width=500)

```

[**<< Workshop Home**](https://cafahey.github.io/spatial)




# Visualizing Quantitative Data: Points

Now that we have geocoded and visualized where our ANC sites are located, let's map the HIV prevalence data!

```{r}
# Set working directory

# Load packages
library(rgdal) # rgdal automatically loads SP package with it
library(prettymapr)

# Options
options(stringsAsFactors = F)
```


```{r }
# import our previously geocoded HIV prevalence data
ANC2012_geocoded <- read.csv("ANC2012_geocoded.csv")
ANC_shp <- ANC2012_geocoded
coordinates(ANC_shp) <- ~lon + lat
class(ANC_shp)

# import our provinces shapefile to map in background
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
```

## Labeling Points
```{r, fig.height=10, fig.width=10}
# Label HIV prevalence
plot(provinces_shp, col = "azure")
plot(ANC_shp, add = T, pch = 15)
text(ANC_shp$lon, ANC_shp$lat - 0.1, labels = ANC_shp$prevalence, cex = 0.8)
```
Labeling is nice, but it doesn't really help us visualize the data. Let's try again. 

## Varying Colors
We're going to want a few more packages to help us create data categories and assign nice colors to each category.
```{r}
# Load packages
library(classInt)
library(RColorBrewer)
```

```{r, fig.height=8, fig.width=10}
#check out the ColorBrewer palette options
display.brewer.all() 
```

```{r}
### Represent HIV prevalence with colored symbols

# first set some parameters
plotvar <- ANC_shp$prevalence #variable to display
ncolors <- 5 #number of colors
pal <- brewer.pal(ncolors,"Reds") #color palette
```

###Equal Intervals
```{r}
# 1. Equal-interval class intervals
breaks <- classIntervals(plotvar, ncolors, style="equal") #category breaks
colcode <- findColours(breaks, pal) #assign colors based on category

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Equal Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")
```

**NOTE:** In the legend, square brackets indicate that a number is included in the category, whereas parentheses indicate that a number is not included. For example, in the above map, the first category reads "From 7.1 up to *but not including* 10.88" and the second category is "From 10.88 up to *but not including* 14.66". If a clinic's HIV prevalence is exactly 10.88, it is included in the second category.

###Quantile Intervals
```{r}
# 2. Quantile class intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Quantile Intervals")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")
```

These two maps show the same data, but first map uses equal intervals (each category is 3.78 pct points) while the second map uses quantiles (each category contains 20% of the data).

###Natural Breaks (Jenks)

A third type of interval classification method is called "Natural Breaks" or "Jenks". This method seeks to reduce the variance within classes and maximize the variance between classes. Essentially, it tries to break the data into natural groupings.

```{r}
# 3. Jenks class intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = "grey93", border = "grey")
points(ANC_shp, pch=16, col=colcode, cex=2)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")
```

## Varying Symbol Sizes
Now, we can go one step further and add varying circle sizes to enhance the visualization.

```{r}
# vary the size of the site symbol
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)
plot(provinces_shp, col = "grey93", border = "grey")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = colcode, inches = 0.1)
title("HIV Prevalence at ANC Sites", sub = "Natural Breaks (Jenks)")
legend("bottomleft", legend = names(attr(colcode, "table")),
       fill = attr(colcode, "palette"), cex = 1, bty = "n")
```

# Visualizing Quantitative Data: Polygons

## Joining Data to a Shapefile

```{r}
# Import the provinces shapefile
provinces_shp <- readOGR("zwe_polbnda_adm1_250k_cso.shp")
class(provinces_shp) # check the class of data
class(provinces_shp@data) # @data is the data frame embedded in the shapefile
provinces_shp@data #view the shapefile's data frame
plot(provinces_shp) #map the shapefile

```

```{r}
# Import the Census Data CSV spreadsheet
Census2012 <- read.csv("Census2012.csv")
class(Census2012)
head(Census2012)
```

We'll use the `inner_join()` function from the `dplyr` package to join the data.
```{r}
# Load dplyr package to get the inner_join function
library(dplyr)

### Merge the Census data with the province shapefile 

# First, make sure the column we'll use to merge is the same
# rename the 1st column, currently province, to PROVINCE as in the shp
colnames(Census2012)[1] <- "PROVINCE"
head(Census2012)

# check that province names match
cbind(sort(provinces_shp@data$PROVINCE), sort(Census2012$PROVINCE))
cbind(table(provinces_shp@data$PROVINCE), table(Census2012$PROVINCE))

# Now join using the PROVINCE variable
provinces_shp@data <- inner_join(provinces_shp@data, Census2012, by="PROVINCE")
provinces_shp@data #check the result
```


## Labeling Polygons

```{r}
# Simply label the provinces
plot(provinces_shp, col = "azure", border = "grey")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=as.character(provinces_shp$MMR), cex=0.8)
title("Maternal Mortality Ratio 2012 (per 100,000 live births)")
```

## Choropleth Maps

```{r, fig.height=10, fig.width=10}
# Set parameters
plotvar <- provinces_shp$MMR
ncolors <- 4
pal <- brewer.pal(ncolors,"PuRd")

#Set plot window to combine 4 maps (2x2)
par(mfrow=c(2,2))

# 1. Fixed intervals
breaks <- classIntervals(plotvar, ncolors, style="fixed",
                         fixedBreaks=c(300,400,500,600,700))
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Fixed Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 2. Equal intervals
breaks <- classIntervals(plotvar, ncolors, style="equal")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col = colcode, border = "grey", 
     main="Equal Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 3. Quantile intervals
breaks <- classIntervals(plotvar, ncolors, style="quantile")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Quantile Intervals")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

# 4. Jenks intervals
breaks <- classIntervals(plotvar, ncolors, style="jenks")
colcode <- findColours(breaks, pal)

plot(provinces_shp, col=colcode, border = "grey",
     main="Natural Intervals (Jenks)")
legend("bottomleft", legend=names(attr(colcode, "table")),
       fill=attr(colcode, "palette"), cex=1, bty="n")
text(getSpPPolygonsLabptSlots(provinces_shp),
labels=plotvar, cex=0.8)

#Title for combined plot
title("Maternal Mortality Ratio 2012 (per 100,000 live births)",
       outer=TRUE, line = -1, cex = 1)
```

```{r, fig.height=10, fig.width=10}
#Return to a single plot format
par(mfrow=c(1,1))
```

## Layering Points and Polygons
```{r, fig.height=10, fig.width=10}

# Prettier quantiles: round quantiles to nearest 5
breaks <- classIntervals(plotvar, ncolors, style="fixed",
fixedBreaks=c(370,515,560,585,680))
colcode <- findColours(breaks, pal)

# Parameters for legend
sym <- c(15,15,15,15,19)
labels <- c(paste0("MMR: ", names(attr(colcode, "table"))),
                   "ANC HIV Prevalence")
labels
colors <- c(attr(colcode, "palette"), "black")
colors

#Plot
plot(provinces_shp, col=colcode, border = "grey",
     main="Zimbabwe Maternal Health 2012")
symbols(ANC_shp$lon, ANC_shp$lat, add = TRUE,
        circles = ANC_shp$prevalence, bg = "black", inches = 0.1)
legend("topleft", legend=labels, pch=sym, col=colors,
       cex=1, pt.cex=2, bty="y")
addscalebar(style = "ticks")
addnortharrow(cols = c("white", "grey50"), scale = 0.6)
```





[**<< Workshop Home**](https://cafahey.github.io/spatial)
