Load necessary R packages
library(dplyr)
## ## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats': ## ## filter, lag
## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union
library(ggplot2)
Read in IPUMS .csv file
Data were downloaded from the IPUMS 1920 full dataset, for the state of Connecticut.
Filter to include only data from Fairfield County.
ctdat <- read.csv("usa_00003.csv", header=TRUE)
fairfield <- subset(ctdat, COUNTY==10)
Where were residents born?
Among residents of Fairfield County in 1920, eight birth places outside of the United States are found represented at least 5000 residents. Let’s categorize these individually, and then group everyone else by birth place as either “United States” or “Other.” People born in either the United States Virgin Islands or Puerto Rico (the only U.S. Outlying Areas or Territories found in this dataset, represented by 44 people in total) were classfied as born outside the United States for this analysis.
fairBP %
mutate(CityName = ifelse(CITY==830, "Bridgeport", ifelse(CITY==4870, "Norwalk", ifelse(CITY==6730, "Stamford", "Outside Cities")))) %>%
mutate(BirthPlace = case_when(
BPL%
select(SERIAL, PERNUM, BPL, YRIMMIG, CityName, BirthPlace)
Get Frequency of each birthplace in each place of residence
bpTab <- as.data.frame(table(fairBP$CityName, fairBP$BirthPlace))
colnames(bpTab) <- c("City", "Birth", "Freq")
cityTab <- as.data.frame(table(fairBP$CityName))
colnames(cityTab) <- c("City", "Total")
bpCity <- merge(bpTab, cityTab, by="City", all.x=T)
bpProp %
mutate(Prop = Freq/Total)
Plot birth place composition for each area of residence
pie <- ggplot(bpProp, aes(x = "", y=Prop, fill = factor(Birth))) +
geom_bar(width = 1, stat = "identity") +
facet_wrap(~ City)
pie_format <- pie + coord_polar(theta = "y") +
theme_minimal() +
labs(fill="Birth Place",
x=NULL,
y=NULL,
title="Country of Birth (for each area of Fairfield County)") +
scale_fill_brewer(palette="Set3") +
theme(plot.title = element_text(size=14, face="bold", hjust=0.25),
axis.text.x=element_blank(),
axis.line = element_blank(),
strip.text.x = element_text(size = 12),
legend.title = element_text(size = 10),
legend.text = element_text(size = 8))
pie_format
path <- "path/to/folder" # <-- update to desired path
ggsave("1920_Fairfield_BPbyCityPie.jpg", pie_format, device="jpeg", path=path, width=4.75, height=3.25)
How big were each of these areas?
table(fairBP$CityName)
## ## Bridgeport Norwalk Outside Cities Stamford ## 143677 27741 114563 35111
How many Polish people lived in each area?
fairPol %
mutate(CityName = ifelse(CITY==830, "Bridgeport", ifelse(CITY==4870, "Norwalk", ifelse(CITY==6730, "Stamford", "Outside Cities")))) %>%
mutate(Polish = case_when(
BPL==455 ~ "Polish born",
YRIMMIG==0 & (MBPL==455 | FBPL==455) ~ "US-born Polish",
TRUE ~ "Neither")) %>%
filter(Polish != "Neither") %>%
select(SERIAL, PERNUM, RELATE, BPL, MBPL, FBPL, CityName, Polish)
Plot counts
bar <- ggplot(fairPol, aes(x = "", fill = factor(Polish))) +
geom_bar(width = 1) +
labs(fill="Birth Place",
x=NULL,
y="Number of People",
title="Polish residents (in each area of Fairfield County)") +
facet_wrap(~ CityName) +
scale_fill_brewer(palette="Set3") +
theme_minimal() +
theme(axis.line = element_blank(),
plot.title = element_text(face="bold", hjust=0.1),
strip.text.x = element_text(size = 12),
legend.title = element_blank())
bar
path <- "path/to/folder" # <-- update to desired path
ggsave("1920_Fairfield_PolishbyCityBar.jpg", bar, device="jpeg", path=path, width=5, height=3)
When did the immigrants arrive in Fairfield County?
Look at year of immigration (as reported in the 1920 census), by country of birth, across all years
fairBP.imm %
filter(YRIMMIG > 0)
h <- ggplot(fairBP.imm, aes(YRIMMIG)) + scale_fill_brewer(palette = "Spectral")
hist_all <- h + geom_histogram(aes(fill=BirthPlace),
binwidth=1,
col="black") +
labs(title="Fairfield County Residents'\nYear of Immigration (All Years)",
x="Year",
y="Number of Immigrants") +
theme_minimal() +
theme(plot.title = element_text(face="bold", hjust=0.5),
legend.title = element_text(size = 10, face="bold"),
legend.text = element_text(size = 9))
hist_all
path <- "path/to/folder" # <-- update to desired path
ggsave("1920_Fairfield_YearImmHistAll.jpg", hist_all, device="jpeg", path=path, width=5, height=3)
hist_late <- h + geom_histogram(aes(fill=BirthPlace),
binwidth=1,
col="black") +
scale_x_continuous(limits = c(1900, 1920)) +
labs(title="Fairfield County Residents'\nYear of Immigration (1900-1920)",
x="Year",
y="Number of Immigrants") +
theme_minimal() +
theme(plot.title = element_text(face="bold", hjust=0.5),
legend.title = element_text(size = 10, face="bold"),
legend.text = element_text(size = 9))
hist_late
path <- "path/to/folder" # <-- update to desired path
ggsave("1920_Fairfield_YearImmHistLate.jpg", hist_late, device="jpeg", path=path, width=5, height=3)
Heads of Household
# all Bridgeport heads of household
br.head %
filter(RELATE==1 & CITY==830) %>%
mutate(BirthPlace = case_when(
BPL%
mutate(Group = "All") %>%
select(SERIAL, PERNUM, AGE, BirthPlace, Group)
summary(br.head$AGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 9.00 32.00 40.00 42.36 50.00 98.00
summary(subset(br.head, BirthPlace=="Poland")$AGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 20.00 30.00 35.00 37.13 43.00 78.00
How many households in Bridgeport had children?
All Bridgeport residents
# Bridgeport households
bport %
filter(CITY=="830")
# Total number of residents
bptotal <- length(bport$SERIAL)
# Distinct households
bphouse <- length(unique(bport$SERIAL))
# number with children under 18 (including children, step-children, and adopted chilren of the head of household)
bport_ch %
filter(RELATE==3 & AGE<18)
# Total number of children
bpchtotal <- length(bport_ch$SERIAL)
# Number of distinct households with children
bpchhouse <- length(unique(bport_ch$SERIAL))
Households with immigrant head of household
# List immigrant heads of household
bport_imm %
filter(RELATE==1 & BPL>100) %>%
select(SERIAL, RELATE)
# Total number of immigrant heads of household (same as #households)
bpimmhouse <- length(bport_imm$SERIAL)
# Table listing household IDs with immigrant heads
colnames(bport_imm) <- c("SERIAL", "ImmHead")
# merge immigrant-headed household IDs with households with children
bport_imm_ch <- merge(bport_imm, bport_ch, by="SERIAL")
# Total number of children in immigrant-headed households
bpimmchtotal <- length(bport_imm_ch$SERIAL)
# Number of distinct immigrant-headed households with children
bpimmchhouse <- length(unique(bport_imm_ch$SERIAL))
Households wth Polish-born head of household
# List households with Polish-born heads
bport_pol %
filter(RELATE==1 & BPL==455) %>%
select(SERIAL, RELATE)
# Total number of Polish-born heads of household (same as #households)
bppolhouse <- length(bport_pol$SERIAL)
# Table listing household IDs with Polish-born heads
colnames(bport_pol) <- c("SERIAL", "PolHead")
# merge Polish-headed household IDs with households with children
bport_pol_ch <- merge(bport_pol, bport_ch, by="SERIAL")
# Total number of children in Polish-headed households
bppolchtotal <- length(bport_pol_ch$SERIAL)
# Number of distinct Polish-headed households with children
bppolchhouse <- length(unique(bport_pol_ch$SERIAL))
Table of family counts to plot
famtab <- as.data.frame(matrix(nrow=6, ncol=3, data=NA))
colnames(famtab) <- c("Group", "Children", "Number")
famtab$Group <- c("All", "Immigrant", "Polish", "All", "Immigrant", "Polish")
famtab$Children <- c(rep("yes", 3), rep("no", 3))
famtab$Number <- c(bpchhouse, bpimmchhouse, bppolchhouse, (bphouse-bpchhouse), (bpimmhouse-bpimmchhouse), (bppolhouse-bppolchhouse))
famtab
## Group Children Number ## 1 All yes 19318 ## 2 Immigrant yes 12217 ## 3 Polish yes 897 ## 4 All no 13603 ## 5 Immigrant no 5430 ## 6 Polish no 186
Percentage of households with children
# All
bpchhouse/bphouse
## [1] 0.5867987
# Immigrant
bpimmchhouse/bpimmhouse
## [1] 0.692299
# Polish
bppolchhouse/bppolhouse
## [1] 0.8282548
Number of Children per household
# All
bpchtotal/bpchhouse
## [1] 2.397298
# Immigrant
bpimmchtotal/bpimmchhouse
## [1] 2.661046
# Polish
bppolchtotal/bppolchhouse
## [1] 2.754738
Barplot to show counts
bar_ch <- ggplot(data=famtab, aes(x=Group, y=Number, fill=Children)) +
geom_bar(stat="identity") +
scale_fill_brewer(palette="Paired") +
ggtitle("Number of Bridgeport Households\nwith Children Under 18") +
annotate("text", x = 1, y = 26000, label = "59% have\nchildren\n(2.4 kids per\nhousehold)", size=3.5) +
annotate("text", x = 2, y = 26000, label = "69% have\nchildren\n(2.7 kids per\nhousehold)", size=3.5) +
annotate("text", x = 3, y = 26000, label = "83% have\nchildren\n(2.8 kids per\nhousehold)", size=3.5) +
theme_classic() +
theme(plot.title = element_text(face="bold", hjust=0.5))
bar_ch
path <- "path/to/folder" # <-- update to desired path
ggsave("1920_Fairfield_HouseholdChildrenBar.jpg", bar_ch, device="jpeg", path=path, width=5, height=3)
Leave a Reply