library(tidyverse)
library(readxl)
# Data source:
# https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html
census <- read_xlsx("census-raw.xlsx", skip=3) %>%
# Drop unneeded columns
select(-c("Census","Estimates Base","2016":"2019")) %>%
# Drop unneeded rows
slice(6:56) %>%
# Fix name of first column
rename("state" = "...1") %>%
# Remove opening period from state names
mutate(state=str_remove(state,".")) %>%
# Pivot the Year column
pivot_longer(-state,names_to="year",values_to="population")
# All set with this!
head(census)
# Now let's read in state land area
# Data source:
# https://19january2017snapshot.epa.gov/sites/production/files/2016-11/fy2017_proposed105_allocation_state_land_pop_data.xlsx
state_land <- read_xlsx("state-land-raw.xlsx",sheet = "LandArea",skip=2) %>%
# Take only the columns we need
select(c("State/Territory","Sq. Mi....4")) %>%
# Rename these columns
rename("state"="State/Territory","land_area"="Sq. Mi....4")
# All set with this
head(state_land)
# Now let's left outer join our census data into our
# land data
census <- left_join(census,state_land)
head(census)
# Write out our data
write_csv(census,"census.csv")
# Now let's contend with the divisions info
census_divisions <- read_csv("census-divisions-raw.csv")
# Merge this
census_full <- left_join(census, census_divisions)
View(census_full)
head(census_full)
# Write to a new file
write_csv(census_full,"census_with_divisions.csv")
library(tidyverse)
library(readxl)
census <- read_xlsx("raw-data/census-raw.xlsx", skip=3) %>%
# Drop unneeded columns
select(-c("Census","Estimates Base","2016":"2019")) %>%
# Drop unneeded rows
slice(6:56) %>%
# Fix name of first column
rename("state" = "...1") %>%
# Remove opening period from state names
mutate(state=str_remove(state,".")) %>%
# Pivot the Year column
pivot_longer(-state,names_to="year",values_to="population")
# All set with this!
head(census)
state_land <- read_xlsx("raw-data/state-land-raw.xlsx",sheet = "LandArea",skip=2) %>%
# Take only the columns we need
select(c("State/Territory","Sq. Mi....4")) %>%
# Rename these columns
rename("state"="State/Territory","land_area"="Sq. Mi....4")
# All set with this
head(state_land)
census <- left_join(census,state_land)
head(census)
# Write out our data
write_csv(census,"census.csv")
glimpse(census)
census <- read_csv('../datasets/census/census.csv')
glimpse(census)
divisions <- read_csv('../datasets/census/divisions.csv')
glimpse(divisions)
getwd()
setwd("~/GitHub/advancing-into-analytics-book/solutions")
census <- read_csv('../datasets/census/census.csv')
glimpse(census)
divisions <- read_csv('../datasets/census/divisions.csv')
glimpse(divisions)
divisions <- read_csv('../datasets/census/census-divisions.csv')
glimpse(divisions)
census <- left_join(census, divisions)
head(census)
arrange(census, c(region,desc(division)))
?arrange
library(writexl)
census %>%
arrange(region, division, desc(population)) %>%
write_xlsx("../datasets/census/solutions-data/census-sorted.xlsx")
census <- select(census, -postal_code)
head(census)
census %>%
arrange(region, division, desc(population)) %>%
head() %>%
write_xlsx("../datasets/census/solutions-data/census-sorted.xlsx")
census %>%
arrange(region, division, desc(population)) %>%
write_xlsx("../datasets/census/solutions-data/census-sorted.xlsx")
census <- mutate(density = population/land_area)\
census <- mutate(density = population/land_area)
head(census)
census <- mutate(density = population/land_area)
census <- mutate(census, density = population/land_area)
head(census)
ggplot(data = census, aes(x = land_area, y = population))+
geom_point()
census %>%
filter(year == 2015)
census %>%
filter(year == 2015) %>%
ggplot(aes(x = land_area, y = population)) + geom_point()
census_2015 <- filter(census, year == 2015)
ggplot(data = census_2015, aes(x = land_area, y = population))+
geom_point()
# Check out our large land areas...
census_2015 %>% arrange(desc(land_area))
# Find the total population for each region in 2015.
census_2015 %>%
group_by(region) %>%
summarise(ttl_population = sum(population))
