R Code: Where did my ancestors live?

The following code pertains to this post.

Load R packages

The USAboundaries package provides historical boundaries for United States states, counties, and other regions.

library(ggplot2)
library(reshape2)
library(magrittr)
library(dplyr)
library(USAboundaries)
library(sf)

Read in Census County Assigment Table

This reads in in a tab-delimited text file where I have listed one individual per row, with columns named “Name” (person’s ID), “Branch” (branch of the family, not used for this analysis), a column for each census from 1850 to 1940. My year columns are headed with numbers, so “1850” became “X1850” when read into R.

I only read in part of the .txt file I created, including the subset of individuals for whom I had data (indicated by a 1 in column “Include”). I have also only read in a subset of columns (ranging from column “Name” to column “X1850”), to avoid dealing with columns irrelevant to this analysis.

Counties were entered into my .txt file in the same format as in the USAboundaries dataset (e.g. Philadelphia County, Pennsylvania is “pas_philadelphia”)

res <- subset(read.delim("CensusCounties.txt", header=TRUE), Include==1, select=c(Name:X1850))

# melt table to have one column of counties, with each person now listed in a separate row for each census year
resmelt <- melt(res, id.vars=c("Name", "Branch"))

Get residency for each county, for each census

Count the number of times each county appeared in each year, and then filter out some of the non-data values (such as “NA” or “??”) that had been in my table. The filters below are specific to my own data’s messiness.

counttab <- as.data.frame(table(resmelt$value, resmelt$variable))

clean <- counttab %>% 
  filter(!grepl('xd|xm|xn|xp', Var1)) %>%
  filter(grepl('s', Var1))
colnames(clean) <- c("id", "year", "count")
clean$count <- ifelse(clean$count==0, NA, clean$count)

Get subset of county data by State and Date

Subset the data from the USAboundaries package for just the states needed, and for the historical boundaries for each census’s date.

Also remove data for Deseret, which was somehow not filtered out using the state list.

list_state <- c("Delaware", "Pennsylvania", "New Jersey", "Maryland", "New York", "Massachusetts")

# get list of counties for states of interest, by year
counties_1850 <- us_counties(map_date="1850-06-01", states=list_state, resolution="high")
counties_1860 <- us_counties(map_date="1860-06-01", states=list_state, resolution="high")
counties_1870 <- us_counties(map_date="1870-06-01", states=list_state, resolution="high")
counties_1880 <- us_counties(map_date="1880-06-01", states=list_state, resolution="high")
counties_1900 <- us_counties(map_date="1900-06-01", states=list_state, resolution="high")
counties_1910 <- us_counties(map_date="1910-04-15", states=list_state, resolution="high")
counties_1920 <- us_counties(map_date="1920-01-01", states=list_state, resolution="high")
counties_1930 <- us_counties(map_date="1930-04-01", states=list_state, resolution="high")
counties_1940 <- us_counties(map_date="1940-04-01", states=list_state, resolution="high")

# merge in count data for each year
data_1850 <- merge(counties_1850[counties_1850$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1860 <- merge(counties_1860[counties_1860$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1870 <- merge(counties_1870[counties_1870$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1880 <- merge(counties_1880[counties_1880$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1900 <- merge(counties_1900[counties_1900$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1910 <- merge(counties_1910[counties_1910$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1920 <- merge(counties_1920[counties_1920$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1930 <- merge(counties_1930[counties_1930$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)
data_1940 <- merge(counties_1940[counties_1940$state_terr!="Deseret", ], clean, by="id", all.x=TRUE)

# filter data by year for plotting
toplot_1850 <- data_1850 %>% filter(year=="X1850" | is.na(year))
toplot_1860 <- data_1860 %>% filter(year=="X1860" | is.na(year))
toplot_1870 <- data_1870 %>% filter(year=="X1870" | is.na(year))
toplot_1880 <- data_1880 %>% filter(year=="X1880" | is.na(year))
toplot_1900 <- data_1900 %>% filter(year=="X1900" | is.na(year))
toplot_1910 <- data_1910 %>% filter(year=="X1910" | is.na(year))
toplot_1920 <- data_1920 %>% filter(year=="X1920" | is.na(year))
toplot_1930 <- data_1930 %>% filter(year=="X1930" | is.na(year))
toplot_1940 <- data_1940 %>% filter(year=="X1940" | is.na(year))

Draw plots for each of the censuses

plot_states <- us_states(states=list_state, resolution = 'high')

g_sf_aes <- geom_sf(aes(fill=count), color=NA, lwd=0.5)
g_sf_data <- geom_sf(data=plot_states, fill=NA, color="black", lwd=0.5)
scale_fg <- scale_fill_gradientn(colors=c("blue", "green", "yellow", "orange"), breaks=c(1,3,5), limits=c(1,5), name="Households")
thm <- theme_void()
thm_title <- theme(plot.title = element_text(), legend.justification=c(1,0), legend.position=c(0.945,0.08), legend.title = element_text(size=7.5, face="bold"), plot.margin=margin(-3.5,-5,0,-9, "cm"))

# make plot for each year
p1850 <- ggplot(toplot_1850) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1850", size=6, fontface="bold")

p1860 <- ggplot(toplot_1860) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1860", size=6, fontface="bold")

p1870 <- ggplot(toplot_1870) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1870", size=6, fontface="bold")

p1880 <- ggplot(toplot_1880) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1880", size=6, fontface="bold")

p1900 <- ggplot(toplot_1900) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1900", size=6, fontface="bold")

p1910 <- ggplot(toplot_1910) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1910", size=6, fontface="bold")

p1920 <- ggplot(toplot_1920) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1920", size=6, fontface="bold")

p1930 <- ggplot(toplot_1930) + g_sf_aes + g_sf_data + scale_fg +
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1930", size=6, fontface="bold")

p1940 <- ggplot(toplot_1940) + g_sf_aes + g_sf_data + scale_fg + 
  thm + thm_title + annotate("text", x = -75.5, y = 42.5, label = "1940", size=6, fontface="bold")

Save each plot to file

path <- "path/to/folder"  # <-- update to desired path

ggsave("CountyChloropleth_1850.jpg", p1850, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1860.jpg", p1860, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1870.jpg", p1870, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1880.jpg", p1880, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1900.jpg", p1900, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1910.jpg", p1910, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1920.jpg", p1920, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1930.jpg", p1930, device="jpeg", path=path, width=3, height=3)
ggsave("CountyChloropleth_1940.jpg", p1940, device="jpeg", path=path, width=4, height=3)

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

Create a free website or blog at WordPress.com.

Up ↑

<span>%d</span> bloggers like this: