Advertisement
MetricT

Demographic vs Housing-to-Income ratio

Jun 16th, 2020
2,217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.88 KB | None | 0 0
  1. library(tidycensus)
  2. library(tidyverse)
  3. library(cowplot)
  4. library(viridis)
  5. library(scales)
  6. library(sf)
  7. library(tigris)
  8. library(rgeos)
  9. library(geosphere)
  10. library(gridExtra)
  11.  
  12. ### Request your Census API key here:
  13. # https://api.census.gov/data/key_signup.html
  14. #
  15. api_key_census <- "INSERT_YOUR_CENSUS_API_KEY_HERE"
  16. census_api_key(api_key_census, install = TRUE)
  17.  
  18. ### How to search Census for series (Painful!!!)
  19. # vars <- load_variables(2018, "acs5", cache = TRUE)
  20. # search <- vars %>% filter(grepl("TOTAL POPULATION", concept))
  21.  
  22. #1 B02001_001 Estimate!!Total                                                                                  RACE  
  23. #2 B02001_002 Estimate!!Total!!White alone                                                                     RACE  
  24. #3 B02001_003 Estimate!!Total!!Black or African American alone                                                 RACE  
  25. #4 B02001_004 Estimate!!Total!!American Indian and Alaska Native alone                                         RACE  
  26. #5 B02001_005 Estimate!!Total!!Asian alone                                                                     RACE  
  27. #6 B02001_006 Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone                                RACE  
  28. #7 B02001_007 Estimate!!Total!!Some other race alone                                                           RACE  
  29. #8 B02001_008 Estimate!!Total!!Two or more races                                                               RACE  
  30. #9 B02001_009 Estimate!!Total!!Two or more races!!Two races including Some other race                          RACE  
  31. #10 B02001_010 Estimate!!Total!!Two or more races!!Two races excluding Some other race, and three or more races RACE  
  32.  
  33. ### African-American
  34. my_demographic <- c("B02001_003")
  35. my_demographic_txt <- "African Americans"
  36.  
  37. ### Hispanics
  38. #my_demographic <- c("B03002_012")
  39. #my_demographic_txt <- "Hispanics"
  40.  
  41. ### Non-Hispanic whites
  42. #my_demographic <- c("B03002_003")
  43. #my_demographic_txt <- "Non-Hispanics Whites"
  44.  
  45. ### Native American
  46. #my_demographic <- c("B02001_004")
  47. #my_demographic_txt <- "Native American"
  48.  
  49. ### Asian
  50. #my_demographic <- c("B02001_005")
  51. #my_demographic_txt <- "Asian"
  52.  
  53. ### FIPS code for Syracuse, NY
  54. my_states <- c("36")
  55. my_counties <- c("067")
  56.  
  57. ### Pull tract area so we can compute population density
  58. area_tract <-
  59.   tracts(year = 2018,
  60.          state = my_states,
  61.          cb = TRUE,
  62.          class = "sf",
  63.          progress_bar = FALSE) %>%
  64.   mutate(area = ALAND / 2589988) %>%
  65.   select(GEOID, STATEFP, COUNTYFP, area) %>%
  66.   st_drop_geometry() %>%
  67.   filter(COUNTYFP %in% my_counties) %>%
  68.   select(GEOID, area)
  69.  
  70. ### Pull the demographic population of your chosen subset
  71. subset_pop <-
  72.   get_acs(geography   = "tract",
  73.           variables   = my_demographic,
  74.           state       = my_states,
  75.           county      = my_counties,
  76.           year        = 2018,
  77.           geometry    = TRUE,
  78.           cache_table = TRUE) %>%
  79.   select(GEOID, estimate) %>%
  80.   rename(subset_pop = estimate)
  81.  
  82. ### Pull the total population
  83. total_pop <-
  84.   get_acs(geography   = "tract",
  85.           variables   = c("B02001_001"),
  86.           state       = my_states,
  87.           county      = my_counties,
  88.           year        = 2018,
  89.           geometry    = FALSE,
  90.           cache_table = TRUE) %>%
  91.   select(GEOID, estimate) %>%
  92.   rename(total_pop = estimate) %>%
  93.   left_join(area_tract, by = "GEOID") %>%
  94.   mutate(pop_density = total_pop / area)
  95.  
  96. ### Pull median housing costs per month
  97. housing_costs <-
  98.   get_acs(geography   = "tract",
  99.           variables   = c("B25105_001"),
  100.           state       = my_states,
  101.           county      = my_counties,
  102.           year        = 2018,
  103.           geometry    = TRUE,
  104.           cache_table = TRUE) %>%
  105.   select(GEOID, estimate) %>%
  106.   rename(housing_costs = estimate)
  107.  
  108. ### Pull median income
  109. median_income <-
  110.   get_acs(geography   = "tract",
  111.           variables   = c("B19013_001"),
  112.           state       = my_states,
  113.           county      = my_counties,
  114.           year        = 2018,
  115.           geometry    = FALSE,
  116.           cache_table = TRUE) %>%
  117.   select(GEOID, estimate) %>%
  118.   rename(income = estimate)
  119.  
  120. ### Determine distance of tract from center of city, to see if housing costs depends on distance to center.
  121. ### Lat/lon are for Nashville TN.
  122. center_lat <-  36.16587
  123. center_lon <- -86.78434
  124. tract_centers <- housing_costs$geometry %>% as_Spatial() %>% gCentroid(byid = TRUE)
  125. dst <- tibble(tract_centers$x, tract_centers$y) %>% rename(lon = "tract_centers$x", lat = "tract_centers$y")
  126. ctr <- c(center_lon, center_lat)
  127. dist <- distVincentyEllipsoid(ctr, dst, a=6378137, b=6356752.3142, f=1/298.257223563) / 1609.34
  128. housing_costs$distance_to_center <- dist
  129.  
  130. map_housing_to_income <-
  131.   housing_costs %>%
  132.   left_join(median_income, by = "GEOID") %>%
  133.   mutate(housing_per = (12 * housing_costs) / income) #%>%
  134.   #mutate(housing_per = ifelse(housing_per < 0.28, NA, housing_per))
  135.  
  136. map_subset_per_pop <-
  137.   subset_pop %>%
  138.   left_join(total_pop, by = "GEOID") %>%
  139.   mutate(subset_per = subset_pop / total_pop)
  140.  
  141. data_combined <-
  142.   (subset_pop %>% st_drop_geometry()) %>%
  143.   left_join(total_pop, by = "GEOID") %>%
  144.   left_join((housing_costs %>% st_drop_geometry()), by = "GEOID") %>%
  145.   left_join(median_income, by = "GEOID") %>%
  146.   mutate(housing_per = (12 * housing_costs) / income) %>%
  147.   mutate(subset_per = subset_pop / total_pop) %>%
  148.   as_tibble()
  149.  
  150. areawater <-
  151.   area_water(state = my_states,
  152.              county = my_counties,
  153.              class = "sf")
  154.  
  155. linearwater <-
  156.   linear_water(state = my_states,
  157.                county = my_counties,
  158.                class = "sf")
  159.  
  160. roads <-
  161.   roads(state = my_states,
  162.         county = my_counties,
  163.         class = "sf")
  164.  
  165. interstates <- roads %>% filter(grepl("^I-|State Rte 840|Winfield", FULLNAME))
  166. hwy         <- roads %>% filter(RTTYP %in% c("C", "S", "U")) %>% filter(!grepl("^Old", FULLNAME))
  167. rivers      <- linearwater %>% filter(grepl(" Riv$", FULLNAME))
  168. lakes       <- areawater
  169.  
  170. color_water      <- "steelblue2"
  171. color_interstate <- "darkred"
  172.  
  173. ### Use this if you want to plot population density on an absolute scale
  174. graph_scale <-
  175.   scale_fill_viridis(direction = -1,
  176.                      option    = "inferno",
  177.                      labels    = percent_format(accuracy = 0.1))
  178.  
  179. ### Graph the counties
  180. p_subset_per_pop <-
  181.   ggplot() +
  182.   theme_void() +
  183.   theme(
  184.     legend.title    = element_blank(),
  185.     legend.position = "right",
  186.     plot.title      = element_text(hjust = 0.5),
  187.   ) +
  188.   labs(title = paste(my_demographic_txt, " as % of Population\n[ ",
  189.                      my_demographic, " / B02001_001 ]", sep = "")) +
  190.   geom_sf(data = map_subset_per_pop, size = 0.05, alpha = 0.5,
  191.           aes(fill = subset_per)) +
  192.  
  193.   geom_sf(data = areawater, size = 0.2, fill = color_water, color = color_water) +
  194.   geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  195.   geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  196.   #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
  197.  
  198.   graph_scale
  199. print(p_subset_per_pop)
  200.  
  201. ### Graph the counties
  202. p_housing_to_income <-
  203.   ggplot() +
  204.   theme_void() +
  205.   theme(
  206.     legend.title    = element_blank(),
  207.     legend.position = "right",
  208.     plot.title      = element_text(hjust = 0.5),
  209.   ) +
  210.   labs(title = "Median Yearly Housing Costs as a percent of median Houshold Income exceeds 28%\n(12 * [B25105_001] / [B19013_001] > 0.28)") +
  211.   geom_sf(data = map_housing_to_income, size = 0.05, alpha = 0.5,
  212.           aes(fill = housing_per)) +
  213.  
  214.   geom_sf(data = lakes, size = 0.2, fill = color_water, color = color_water) +
  215.   geom_sf(data = rivers, size = 0.2, fill = color_water, color = color_water) +
  216.   geom_sf(data = interstates, size = 0.7, color = color_interstate, alpha = 1.0) +
  217.   #geom_sf(data = hwy, size = 0.2, color = color_interstate, alpha = 1.0) +
  218.  
  219.   graph_scale
  220.  
  221. fit <- lm(housing_per ~ subset_per * pop_density * income * distance_to_center, data = data_combined)
  222. predict_fit <- 0.282728 -0.005900 * data_combined$distance_to_center
  223.  
  224. p_fit <-
  225.   ggplot(data = data_combined, aes(y = housing_per, x = subset_per)) +
  226.   theme_classic() +
  227.   geom_point(shape = 19, size = 0.5, color = "black") +
  228.   geom_smooth(method = "lm", formula = y ~ x) +
  229.   geom_hline(yintercept = 0.28, alpha = 0.5, linetype = "dotted") +
  230.   labs(
  231.     x = paste(my_demographic_txt, " % of Population", sep = ""),
  232.     y = "Median Housing to Income Ratio") +
  233.   scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
  234.   scale_y_continuous(labels = scales::percent_format(accuracy = 1))
  235.  
  236. plot_list <- list(p_subset_per_pop, p_housing_to_income, p_fit)
  237.  
  238. layout_list <- rbind(
  239.   c(1, 2),
  240.   c(1, 2),
  241.   c(1, 2),
  242.   c(3, 3),
  243.   c(3, 3)
  244. )
  245.  
  246. charts <-
  247.   grid.arrange(grobs         = plot_list,
  248.                layout_matrix = layout_list,
  249.                ncols = 2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement