Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(tidycensus)
- library(tidyverse)
- library(cowplot)
- library(viridis)
- library(scales)
- library(sf)
- library(tigris)
- library(rgeos)
- library(geosphere)
- library(gridExtra)
- ### Request your Census API key here:
- # https://api.census.gov/data/key_signup.html
- #
- api_key_census <- "INSERT_YOUR_CENSUS_API_KEY_HERE"
- census_api_key(api_key_census, install = TRUE)
- ### How to search Census for series (Painful!!!)
- # vars <- load_variables(2018, "acs5", cache = TRUE)
- # search <- vars %>% filter(grepl("TOTAL POPULATION", concept))
- #1 B02001_001 Estimate!!Total RACE
- #2 B02001_002 Estimate!!Total!!White alone RACE
- #3 B02001_003 Estimate!!Total!!Black or African American alone RACE
- #4 B02001_004 Estimate!!Total!!American Indian and Alaska Native alone RACE
- #5 B02001_005 Estimate!!Total!!Asian alone RACE
- #6 B02001_006 Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone RACE
- #7 B02001_007 Estimate!!Total!!Some other race alone RACE
- #8 B02001_008 Estimate!!Total!!Two or more races RACE
- #9 B02001_009 Estimate!!Total!!Two or more races!!Two races including Some other race RACE
- #10 B02001_010 Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races RACE
- ### African-American
- my_demographic <- c("B02001_003")
- my_demographic_txt <- "African Americans"
- ### Hispanics
- #my_demographic <- c("B03002_012")
- #my_demographic_txt <- "Hispanics"
- ### Non-Hispanic whites
- #my_demographic <- c("B03002_003")
- #my_demographic_txt <- "Non-Hispanics Whites"
- ### Native American
- #my_demographic <- c("B02001_004")
- #my_demographic_txt <- "Native American"
- ### Asian
- #my_demographic <- c("B02001_005")
- #my_demographic_txt <- "Asian"
- ### FIPS code for Syracuse, NY
- my_states <- c("36")
- my_counties <- c("067")
- ### Pull tract area so we can compute population density
- area_tract <-
- tracts(year = 2018,
- state = my_states,
- cb = TRUE,
- class = "sf",
- progress_bar = FALSE) %>%
- mutate(area = ALAND / 2589988) %>%
- select(GEOID, STATEFP, COUNTYFP, area) %>%
- st_drop_geometry() %>%
- filter(COUNTYFP %in% my_counties) %>%
- select(GEOID, area)
- ### Pull the demographic population of your chosen subset
- subset_pop <-
- get_acs(geography = "tract",
- variables = my_demographic,
- state = my_states,
- county = my_counties,
- year = 2018,
- geometry = TRUE,
- cache_table = TRUE) %>%
- select(GEOID, estimate) %>%
- rename(subset_pop = estimate)
- ### Pull the total population
- total_pop <-
- get_acs(geography = "tract",
- variables = c("B02001_001"),
- state = my_states,
- county = my_counties,
- year = 2018,
- geometry = FALSE,
- cache_table = TRUE) %>%
- select(GEOID, estimate) %>%
- rename(total_pop = estimate) %>%
- left_join(area_tract, by = "GEOID") %>%
- mutate(pop_density = total_pop / area)
- ### Pull median housing costs per month
- housing_costs <-
- get_acs(geography = "tract",
- variables = c("B25105_001"),
- state = my_states,
- county = my_counties,
- year = 2018,
- geometry = TRUE,
- cache_table = TRUE) %>%
- select(GEOID, estimate) %>%
- rename(housing_costs = estimate)
- ### Pull median income
- median_income <-
- get_acs(geography = "tract",
- variables = c("B19013_001"),
- state = my_states,
- county = my_counties,
- year = 2018,
- geometry = FALSE,
- cache_table = TRUE) %>%
- select(GEOID, estimate) %>%
- rename(income = estimate)
- ### Determine distance of tract from center of city, to see if housing costs depends on distance to center.
- ### Lat/lon are for Nashville TN.
- center_lat <- 36.16587
- center_lon <- -86.78434
- tract_centers <- housing_costs$geometry %>% as_Spatial() %>% gCentroid(byid = TRUE)
- dst <- tibble(tract_centers$x, tract_centers$y) %>% rename(lon = "tract_centers$x", lat = "tract_centers$y")
- ctr <- c(center_lon, center_lat)
- dist <- distVincentyEllipsoid(ctr, dst, a=6378137, b=6356752.3142, f=1/298.257223563) / 1609.34
- housing_costs$distance_to_center <- dist
- map_housing_to_income <-
- housing_costs %>%
- left_join(median_income, by = "GEOID") %>%
- mutate(housing_per = (12 * housing_costs) / income) #%>%
- #mutate(housing_per = ifelse(housing_per < 0.28, NA, housing_per))
- map_subset_per_pop <-
- subset_pop %>%
- left_join(total_pop, by = "GEOID") %>%
- mutate(subset_per = subset_pop / total_pop)
- data_combined <-
- (subset_pop %>% st_drop_geometry()) %>%
- left_join(total_pop, by = "GEOID") %>%
- left_join((housing_costs %>% st_drop_geometry()), by = "GEOID") %>%
- left_join(median_income, by = "GEOID") %>%
- mutate(housing_per = (12 * housing_costs) / income) %>%
- mutate(subset_per = subset_pop / total_pop) %>%
- as_tibble()
- areawater <-
- area_water(state = my_states,
- county = my_counties,
- class = "sf")
- linearwater <-
- linear_water(state = my_states,
- county = my_counties,
- class = "sf")
- roads <-
- roads(state = my_states,
- county = my_counties,
- class = "sf")
- interstates <- roads %>% filter(grepl("^I-|State Rte 840|Winfield", FULLNAME))
- hwy <- roads %>% filter(RTTYP %in% c("C", "S", "U")) %>% filter(!grepl("^Old", FULLNAME))
- rivers <- linearwater %>% filter(grepl(" Riv$", FULLNAME))
- lakes <- areawater
- color_water <- "steelblue2"
- color_interstate <- "darkred"
- ### Use this if you want to plot population density on an absolute scale
- graph_scale <-
- scale_fill_viridis(direction = -1,
- option = "inferno",
- labels = percent_format(accuracy = 0.1))
- ### Graph the counties
- p_subset_per_pop <-
- ggplot() +
- theme_void() +
- theme(
- legend.title = element_blank(),
- legend.position = "right",
- plot.title = element_text(hjust = 0.5),
- ) +
- labs(title = paste(my_demographic_txt, " as % of Population\n[ ",
- my_demographic, " / B02001_001 ]", sep = "")) +
- geom_sf(data = map_subset_per_pop, size = 0.05, alpha = 0.5,
- aes(fill = subset_per)) +
- geom_sf(data = areawater, size = 0.2, fill = color_water, color = color_water) +
- geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
- geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
- #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
- graph_scale
- print(p_subset_per_pop)
- ### Graph the counties
- p_housing_to_income <-
- ggplot() +
- theme_void() +
- theme(
- legend.title = element_blank(),
- legend.position = "right",
- plot.title = element_text(hjust = 0.5),
- ) +
- labs(title = "Median Yearly Housing Costs as a percent of median Houshold Income exceeds 28%\n(12 * [B25105_001] / [B19013_001] > 0.28)") +
- geom_sf(data = map_housing_to_income, size = 0.05, alpha = 0.5,
- aes(fill = housing_per)) +
- geom_sf(data = lakes, size = 0.2, fill = color_water, color = color_water) +
- geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
- geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
- #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
- graph_scale
- fit <- lm(housing_per ~ subset_per * pop_density * income * distance_to_center, data = data_combined)
- predict_fit <- 0.282728 -0.005900 * data_combined$distance_to_center
- p_fit <-
- ggplot(data = data_combined, aes(y = housing_per, x = subset_per)) +
- theme_classic() +
- geom_point(shape = 19, size = 0.5, color = "black") +
- geom_smooth(method = "lm", formula = y ~ x) +
- geom_hline(yintercept = 0.28, alpha = 0.5, linetype = "dotted") +
- labs(
- x = paste(my_demographic_txt, " % of Population", sep = ""),
- y = "Median Housing to Income Ratio") +
- scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
- scale_y_continuous(labels = scales::percent_format(accuracy = 1))
- plot_list <- list(p_subset_per_pop, p_housing_to_income, p_fit)
- layout_list <- rbind(
- c(1, 2),
- c(1, 2),
- c(1, 2),
- c(3, 3),
- c(3, 3)
- )
- charts <-
- grid.arrange(grobs = plot_list,
- layout_matrix = layout_list,
- ncols = 2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement