R Code: Mining Census Data for Historical Context

Load necessary R packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Read in IPUMS .csv file

Data were downloaded from the IPUMS 1920 full dataset, for the state of Connecticut.
Filter to include only data from Fairfield County.

ctdat <- read.csv("usa_00003.csv", header=TRUE)
fairfield <- subset(ctdat, COUNTY==10)

Where were residents born?

Among residents of Fairfield County in 1920, eight birth places outside of the United States are found represented at least 5000 residents. Let’s categorize these individually, and then group everyone else by birth place as either “United States” or “Other.” People born in either the United States Virgin Islands or Puerto Rico (the only U.S. Outlying Areas or Territories found in this dataset, represented by 44 people in total) were classfied as born outside the United States for this analysis.

fairBP %
  mutate(CityName = ifelse(CITY==830, "Bridgeport", ifelse(CITY==4870, "Norwalk", ifelse(CITY==6730, "Stamford", "Outside Cities")))) %>%
  mutate(BirthPlace = case_when(
    BPL%
  select(SERIAL, PERNUM, BPL, YRIMMIG, CityName, BirthPlace)

Get Frequency of each birthplace in each place of residence

bpTab <- as.data.frame(table(fairBP$CityName, fairBP$BirthPlace))
colnames(bpTab) <- c("City", "Birth", "Freq")

cityTab <- as.data.frame(table(fairBP$CityName))
colnames(cityTab) <- c("City", "Total")

bpCity <- merge(bpTab, cityTab, by="City", all.x=T)
bpProp %
  mutate(Prop = Freq/Total)

Plot birth place composition for each area of residence

pie <- ggplot(bpProp, aes(x = "", y=Prop, fill = factor(Birth))) + 
  geom_bar(width = 1, stat = "identity") +
  facet_wrap(~ City)

pie_format <- pie + coord_polar(theta = "y") + 
  theme_minimal() + 
  labs(fill="Birth Place", 
       x=NULL, 
       y=NULL, 
       title="Country of Birth (for each area of Fairfield County)") + 
  scale_fill_brewer(palette="Set3") + 
  theme(plot.title = element_text(size=14, face="bold", hjust=0.25),
        axis.text.x=element_blank(),
        axis.line = element_blank(),
        strip.text.x = element_text(size = 12),
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 8))

pie_format

path <- "path/to/folder"  # <-- update to desired path

ggsave("1920_Fairfield_BPbyCityPie.jpg", pie_format, device="jpeg", path=path, width=4.75, height=3.25)

1920_Fairfield_BPbyCityPie

How big were each of these areas?

table(fairBP$CityName)
## 
##     Bridgeport        Norwalk Outside Cities       Stamford 
##         143677          27741         114563          35111

How many Polish people lived in each area?

fairPol %
  mutate(CityName = ifelse(CITY==830, "Bridgeport", ifelse(CITY==4870, "Norwalk", ifelse(CITY==6730, "Stamford", "Outside Cities")))) %>%
  mutate(Polish = case_when(
    BPL==455 ~ "Polish born",
    YRIMMIG==0 & (MBPL==455 | FBPL==455) ~ "US-born Polish",
    TRUE ~ "Neither")) %>%
  filter(Polish != "Neither") %>%
  select(SERIAL, PERNUM, RELATE, BPL, MBPL, FBPL, CityName, Polish)

Plot counts

bar <- ggplot(fairPol, aes(x = "", fill = factor(Polish))) + 
  geom_bar(width = 1) +
  labs(fill="Birth Place", 
       x=NULL,
       y="Number of People",
       title="Polish residents (in each area of Fairfield County)") + 
  facet_wrap(~ CityName) + 
  scale_fill_brewer(palette="Set3") + 
  theme_minimal() + 
  theme(axis.line = element_blank(), 
        plot.title = element_text(face="bold", hjust=0.1),
        strip.text.x = element_text(size = 12),
        legend.title = element_blank()) 

bar

path <- "path/to/folder"  # <-- update to desired path

ggsave("1920_Fairfield_PolishbyCityBar.jpg", bar, device="jpeg", path=path, width=5, height=3)

1920_Fairfield_PolishbyCityBar

When did the immigrants arrive in Fairfield County?

Look at year of immigration (as reported in the 1920 census), by country of birth, across all years

fairBP.imm %
  filter(YRIMMIG > 0)

h <- ggplot(fairBP.imm, aes(YRIMMIG)) + scale_fill_brewer(palette = "Spectral")

hist_all <- h + geom_histogram(aes(fill=BirthPlace),
                   binwidth=1,
                   col="black") + 
  labs(title="Fairfield County Residents'\nYear of Immigration (All Years)",
       x="Year",
       y="Number of Immigrants") + 
  theme_minimal() + 
  theme(plot.title = element_text(face="bold", hjust=0.5),
        legend.title = element_text(size = 10, face="bold"),
        legend.text = element_text(size = 9)) 

hist_all

path <- "path/to/folder"  # <-- update to desired path

ggsave("1920_Fairfield_YearImmHistAll.jpg", hist_all, device="jpeg", path=path, width=5, height=3)

1920_Fairfield_YearImmHistAll

hist_late <- h + geom_histogram(aes(fill=BirthPlace),
                   binwidth=1,
                   col="black") + 
  scale_x_continuous(limits = c(1900, 1920)) + 
  labs(title="Fairfield County Residents'\nYear of Immigration (1900-1920)",
       x="Year",
       y="Number of Immigrants") + 
  theme_minimal() + 
  theme(plot.title = element_text(face="bold", hjust=0.5),
        legend.title = element_text(size = 10, face="bold"),
        legend.text = element_text(size = 9)) 

hist_late

path <- "path/to/folder"  # <-- update to desired path

ggsave("1920_Fairfield_YearImmHistLate.jpg", hist_late, device="jpeg", path=path, width=5, height=3)

1920_Fairfield_YearImmHistLate

Heads of Household

# all Bridgeport heads of household
br.head %
  filter(RELATE==1 & CITY==830) %>%
    mutate(BirthPlace = case_when(
      BPL%
  mutate(Group = "All") %>%
  select(SERIAL, PERNUM, AGE, BirthPlace, Group)
summary(br.head$AGE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   32.00   40.00   42.36   50.00   98.00
summary(subset(br.head, BirthPlace=="Poland")$AGE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   30.00   35.00   37.13   43.00   78.00

How many households in Bridgeport had children?

All Bridgeport residents

# Bridgeport households
bport %
  filter(CITY=="830")
# Total number of residents
bptotal <- length(bport$SERIAL)
# Distinct households
bphouse <- length(unique(bport$SERIAL))

# number with children under 18 (including children, step-children, and adopted chilren of the head of household)
bport_ch %
  filter(RELATE==3 & AGE<18) 
# Total number of children
bpchtotal <- length(bport_ch$SERIAL)
# Number of distinct households with children
bpchhouse <- length(unique(bport_ch$SERIAL))

Households with immigrant head of household

# List immigrant heads of household
bport_imm %
  filter(RELATE==1 & BPL>100) %>%
  select(SERIAL, RELATE)
# Total number of immigrant heads of household (same as #households)
bpimmhouse <- length(bport_imm$SERIAL)

# Table listing household IDs with immigrant heads
colnames(bport_imm) <- c("SERIAL", "ImmHead")

# merge immigrant-headed household IDs with households with children
bport_imm_ch <- merge(bport_imm, bport_ch, by="SERIAL")

# Total number of children in immigrant-headed households
bpimmchtotal <- length(bport_imm_ch$SERIAL)

# Number of distinct immigrant-headed households with children
bpimmchhouse <- length(unique(bport_imm_ch$SERIAL))

Households wth Polish-born head of household

# List households with Polish-born heads
bport_pol %
  filter(RELATE==1 & BPL==455) %>%
  select(SERIAL, RELATE)
# Total number of Polish-born heads of household (same as #households)
bppolhouse <- length(bport_pol$SERIAL)

# Table listing household IDs with Polish-born heads
colnames(bport_pol) <- c("SERIAL", "PolHead")

# merge Polish-headed household IDs with households with children
bport_pol_ch <- merge(bport_pol, bport_ch, by="SERIAL")

# Total number of children in Polish-headed households
bppolchtotal <- length(bport_pol_ch$SERIAL)

# Number of distinct Polish-headed households with children
bppolchhouse <- length(unique(bport_pol_ch$SERIAL))

Table of family counts to plot

famtab <- as.data.frame(matrix(nrow=6, ncol=3, data=NA))
colnames(famtab) <- c("Group", "Children", "Number")
famtab$Group <- c("All", "Immigrant", "Polish", "All", "Immigrant", "Polish")
famtab$Children <- c(rep("yes", 3), rep("no", 3))
famtab$Number <- c(bpchhouse, bpimmchhouse, bppolchhouse, (bphouse-bpchhouse), (bpimmhouse-bpimmchhouse), (bppolhouse-bppolchhouse))
famtab
##       Group Children Number
## 1       All      yes  19318
## 2 Immigrant      yes  12217
## 3    Polish      yes    897
## 4       All       no  13603
## 5 Immigrant       no   5430
## 6    Polish       no    186

Percentage of households with children

# All
bpchhouse/bphouse
## [1] 0.5867987
# Immigrant
bpimmchhouse/bpimmhouse
## [1] 0.692299
# Polish
bppolchhouse/bppolhouse
## [1] 0.8282548

Number of Children per household

# All
bpchtotal/bpchhouse
## [1] 2.397298
# Immigrant
bpimmchtotal/bpimmchhouse
## [1] 2.661046
# Polish
bppolchtotal/bppolchhouse
## [1] 2.754738

Barplot to show counts

bar_ch <- ggplot(data=famtab, aes(x=Group, y=Number, fill=Children)) +
  geom_bar(stat="identity") +
  scale_fill_brewer(palette="Paired") + 
  ggtitle("Number of Bridgeport Households\nwith Children Under 18") + 
  annotate("text", x = 1, y = 26000, label = "59% have\nchildren\n(2.4 kids per\nhousehold)", size=3.5) +
  annotate("text", x = 2, y = 26000, label = "69% have\nchildren\n(2.7 kids per\nhousehold)", size=3.5) +
  annotate("text", x = 3, y = 26000, label = "83% have\nchildren\n(2.8 kids per\nhousehold)", size=3.5) +
  theme_classic() + 
  theme(plot.title = element_text(face="bold", hjust=0.5)) 

bar_ch

path <- "path/to/folder"  # <-- update to desired path

ggsave("1920_Fairfield_HouseholdChildrenBar.jpg", bar_ch, device="jpeg", path=path, width=5, height=3)

1920_Fairfield_HouseholdChildrenBar

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

Blog at WordPress.com.

Up ↑

%d bloggers like this: