Advertisement
MetricT

TN COVID-19 script, Apr 21

Apr 21st, 2020
2,389
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 35.87 KB | None | 0 0
  1. ################################################################################
  2. ### Render a map of Tennessee using TN Dept. of Health data on COVID-19
  3. ### infections showing total cases, new cases, cases by age, and a map of
  4. ### confirmed infected.
  5. ###
  6. ### TN Dept. of Health COVID-19 website:
  7. ###     https://www.tn.gov/health/cedep/ncov.html
  8. ###
  9. ### Note: You will need a Census API key to access their data.  You
  10. ###       can get a key by going to this website:
  11. ###
  12. ###       https://api.census.gov/data/key_signup.html
  13. ###
  14. ### Once you have the key, just add it below.
  15. ################################################################################
  16.  
  17. ### Set the Census API key
  18. census_key <- "PUT_YOUR_CENSUS_API_KEY_HERE"
  19.  
  20. ### Let's start by cleaning the environment, it currently causes the graphs
  21. ### some problems if the environment is already populated
  22. rm(list = ls())
  23.  
  24. # Need to install the github version of googlesheets4
  25. # install.packages("devtools")
  26. devtools::install_github("tidyverse/googlesheets4")
  27.  
  28. ### Load the necessary libraries
  29. packages <- c("tidyverse", "dplyr", "readxl", "ggplot2", "sf", "rgeos", "TTR",
  30.               "scales", "cowplot", "viridis", "gridExtra", "tidycensus",
  31.               "googlesheets4")
  32.  
  33. new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
  34. if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
  35. invisible(lapply(packages, "library", quietly = TRUE,
  36.                  character.only = TRUE, warn.conflicts = FALSE))
  37.  
  38. ### Set the Census API key
  39. census_api_key(census_key)
  40.  
  41. ### Enable map caching
  42. options(tigris_use_cache = TRUE)
  43.  
  44.  
  45. ################################################################################
  46. ### Read data from my spreadsheet
  47. ################################################################################
  48. ### Load COVID data from Google Sheets
  49. gs4_deauth()
  50. covid_data <- "https://docs.google.com/spreadsheets/d/1Opqs3DIYTlPbPhqrUa66RfKBBDVo0u5sntFM6uYZDzk/edit?usp=sharing"
  51. infected_df  <- read_sheet(covid_data, sheet = "Infected")
  52. deaths_df    <- read_sheet(covid_data, sheet = "Deaths")
  53. hospital_df  <- read_sheet(covid_data, sheet = "Hospitalizations")
  54. testres_df   <- read_sheet(covid_data, sheet = "Test_Results")
  55. testrates_df <- read_sheet(covid_data, sheet = "Testing_Rates") %>%
  56.                 mutate(County = gsub(" County", "", County))
  57. recover_df   <- read_sheet(covid_data, sheet = "Recovered")
  58. age_df       <- read_sheet(covid_data, sheet = "Age_Data")
  59.  
  60.  
  61. ################################################################################
  62. ### Subset the the data here (if desired)
  63. ################################################################################
  64. ### For the map, we only want the latest total for each county.
  65. non_county_fields <- c("Date", "Total", "Other_State_Country",
  66.                        "Unknown", "ERROR!!!")
  67.  
  68. ### All counties
  69. all_counties <-
  70.   colnames(infected_df) %>%
  71.   enframe() %>%
  72.   filter(!colnames(infected_df) %in% non_county_fields) %>%
  73.   select("value") %>%
  74.   deframe()
  75.  
  76. ### Counties in the Nashville Metropolitan Statistical Area
  77. nashville_msa <-
  78.   c("Cannon", "Cheatham", "Davidson", "Dickson", "Macon", "Maury", "Robertson",
  79.     "Rutherford", "Smith", "Sumner", "Trousdale", "Williamson", "Wilson")
  80.  
  81. middle_tn <- c(
  82.   "Bedford",    "Cannon",  "Cheatham",  "Clay",       "Coffee",    "Davidson",
  83.   "DeKalb",     "Dickson", "Fentress",  "Franklin",   "Giles",     "Grundy",
  84.   "Hickman",    "Houston", "Humphreys", "Jackson",    "Lawrence",  "Lewis",
  85.   "Lincoln",    "Macon",   "Marshall",  "Maury",      "Montgomery", "Moore",
  86.   "Overton",    "Perry",   "Pickett",   "Putnam",     "Robertson", "Rutherford",
  87.   "Sequatchie", "Smith",   "Stewart",   "Sumner",     "Trousdale", "Van Buren",
  88.   "Warren",     "Wayne",   "White",     "Williamson", "Wilson")
  89.  
  90.  
  91. ################################################################################
  92. ### Select the map you want
  93. ################################################################################
  94. #graph_counties <- nashville_msa
  95. #map_counties <- nashville_msa
  96. #location <- "Nashville MSA"
  97.  
  98. #graph_counties <- c("Davidson")
  99. #map_counties <- c("Davidson")
  100. #location <- "Davidson County"
  101.  
  102. #graph_counties <- middle_tn
  103. #map_counties <- middle_tn
  104. #location <- "Middle TN"
  105.  
  106. graph_counties <- c("Total")
  107. map_counties <- all_counties
  108. location <- "Tennessee"
  109.  
  110. ### Use TidyCensus to pull population data as well as map geometry
  111. ### from the Census bureau
  112. county_acs <-
  113.   get_acs(geography   = "county",
  114.           variables   = c(POP2018 = "B01003_001"),
  115.           state       = c("47"),
  116.           county      = map_counties,
  117.           year        = 2018,
  118.           geometry    = TRUE,
  119.           cache_table = TRUE) %>%
  120.   rename(POP2018 = estimate)
  121.  
  122. ### Split the Census data apart into a map...
  123. tn_map_df <-
  124.   county_acs %>%
  125.   select(GEOID, geometry)
  126.  
  127. ### And a population dataframe
  128. tn_pop_df <-
  129.   county_acs %>%
  130.   st_set_geometry(NULL) %>%
  131.   mutate(NAME = str_replace(NAME, " County, Tennessee", "")) %>%
  132.   select(GEOID, NAME, POP2018) %>%
  133.   rename(County = NAME)
  134.  
  135.  
  136. ################################################################################
  137. ### A few visual configuration settings
  138. ################################################################################
  139. map_palette <- c("#c4c4c4", viridis(n = 1000, direction = -1))
  140. map_fontsize <- 3
  141. graph_color <- "darkseagreen4"
  142. geom_thickness <- 0.25
  143. geom_age_orientation <- coord_flip()
  144. ENABLE_LOG_SCALE <- TRUE
  145.  
  146. ### If TRUE, graph total cases, new cases, and total deaths on a log scale
  147. ### If FALSE, graph them normally
  148. if (isTRUE(ENABLE_LOG_SCALE)) {
  149.      graph_log10_opts1 <- scale_y_log10(breaks = c(1, 10, 100, 1000))
  150.      graph_log10_opts2 <- annotation_logticks()
  151. } else {
  152.      graph_log10_opts1 <- geom_blank()
  153.      graph_log10_opts2 <- geom_blank()
  154. }
  155.  
  156.  
  157. ################################################################################
  158. ### Adjusting the geometry of our map
  159. ################################################################################
  160. ### Massage the state/county map data a bit
  161. state_mapdata <- tn_pop_df
  162.  
  163. ### Make a copy of the state geometry
  164. counties <- state_mapdata
  165.  
  166. ### Find the center of each county so we can add the number of infected
  167. county_centers <-
  168.   tn_map_df$geometry %>%
  169.   as_Spatial() %>%
  170.   gCentroid(byid = TRUE)
  171.  
  172. ### The centroid function is very good, but occasionally can give less-than-
  173. ### perfect results, often due to counties with extreme concave shape.   We
  174. ### can use "nudges" to adjust the location of the number to make it look good.
  175. ### Default nudge is 0.
  176. counties$nudge_y <- 0
  177. counties$nudge_x <- 0
  178.  
  179. counties$nudge_y[counties$County == "Chester"]   <-  0.035
  180. counties$nudge_y[counties$County == "Houston"]   <-  0.015
  181. counties$nudge_y[counties$County == "Putnam"]    <-  0.025
  182. counties$nudge_y[counties$County == "Loudon"]    <-  0.015
  183. counties$nudge_y[counties$County == "Anderson"]  <- -0.015
  184. counties$nudge_x[counties$County == "Pickett"]   <- -0.065
  185. counties$nudge_x[counties$County == "Unicoi"]    <- -0.070
  186. counties$nudge_y[counties$County == "Unicoi"]    <- -0.035
  187. counties$nudge_x[counties$County == "Hancock"]   <- -0.035
  188. counties$nudge_x[counties$County == "Trousdale"] <- -0.020
  189. counties$nudge_x[counties$County == "Lake"]      <-  0.030
  190. counties$nudge_x[counties$County == "Moore"]     <-  0.005
  191. counties$nudge_y[counties$County == "Moore"]     <-  0.015
  192.  
  193.  
  194. ################################################################################
  195. ### Math section for maps
  196. ################################################################################
  197. ### Get the number of total confirmed cases by county
  198. totalinfected_by_county <-
  199.   infected_df %>%
  200.   arrange(Date) %>%
  201.   tail(n = 1) %>%
  202.   select(-all_of(non_county_fields)) %>%
  203.   gather(Date) %>%
  204.   rename(County = Date) %>%
  205.   rename(totalinfected = value)
  206.  
  207. ### Get the number of new confirmed cases by county
  208. new_date <-
  209.   infected_df %>%
  210.   pull(Date) %>%
  211.   max() %>%
  212.   as_tibble() %>%
  213.   rename(Date = value) %>%
  214.   pull()
  215.  
  216. newinfected_by_county <-
  217.   infected_df %>%
  218.   arrange(Date) %>%
  219.   tail(n = 2) %>%
  220.   select(-all_of(non_county_fields))
  221.  
  222. newinfected_by_county <-
  223.   (newinfected_by_county[2, ] - newinfected_by_county[1, ]) %>%
  224.   as_tibble() %>%
  225.   add_column(new_date, .before = 1) %>%
  226.   rename(Date = new_date) %>%
  227.   gather(Date) %>%
  228.   rename(County = Date) %>%
  229.   rename(newinfected = value)
  230.  
  231. ### Get the number of total deaths by county
  232. totaldeaths_by_county <-
  233.   deaths_df %>%
  234.   filter(Date > "2020-03-31") %>%
  235.   head(n = 1) %>%
  236.   select("Date", all_of(all_counties)) %>%
  237.   gather(Date) %>%
  238.   rename(County = Date) %>%
  239.   rename(totaldeaths = value)
  240.  
  241. ### Get the number of new deaths by county
  242. new_date <-
  243.   deaths_df %>%
  244.   select("Date") %>%
  245.   filter(!is.na(Date)) %>%
  246.   pull(Date) %>%
  247.   max() %>%
  248.   as_tibble() %>%
  249.   pull()
  250.  
  251. newdeaths_by_county <-
  252.   deaths_df %>%
  253.   filter(!is.na(Date)) %>%
  254.   arrange(Date) %>%
  255.   tail(n = 2) %>%
  256.   select(-all_of(non_county_fields))
  257.  
  258. newdeaths_by_county <-
  259.   (newdeaths_by_county[2, ] - newdeaths_by_county[1, ]) %>%
  260.   as_tibble() %>%
  261.   add_column(new_date, .before = 1) %>%
  262.   rename(Date = new_date) %>%
  263.   gather(Date) %>%
  264.   rename(County = Date) %>%
  265.   rename(newdeaths = value)
  266.  
  267. ### Get the number of total deaths by county
  268. recovered_by_county <-
  269.   recover_df %>%
  270.   filter(Date > "2020-03-31") %>%
  271.   arrange(Date) %>%
  272.   tail(n = 1) %>%
  273.   select("Date", all_of(all_counties)) %>%
  274.   gather(Date) %>%
  275.   rename(County = Date) %>%
  276.   rename(recovered = value)
  277.  
  278. ### Still sick
  279. sick_by_county <-
  280.   totalinfected_by_county %>%
  281.   left_join(recovered_by_county, by = "County") %>%
  282.   left_join(totaldeaths_by_county, by = "County") %>%
  283.   mutate(sick = totalinfected - recovered - totaldeaths) %>%
  284.   select(County, sick)
  285.  
  286. ### Combine the data
  287. stats_df <-
  288.   totalinfected_by_county %>%
  289.   left_join(newinfected_by_county, by = "County") %>%
  290.   left_join(totaldeaths_by_county, by = "County") %>%
  291.   left_join(newdeaths_by_county, by = "County")   %>%
  292.   left_join(sick_by_county, by = "County") %>%
  293.   left_join(recovered_by_county, by = "County")
  294.  
  295. ### Combine all the above together
  296. counties <-
  297.   counties %>%
  298.   left_join(tn_map_df, by = "GEOID") %>%
  299.   left_join(stats_df,  by = "County") %>%
  300.   mutate(newinfected = if_else(newinfected == -1, 0, newinfected))
  301.  
  302.  
  303. ################################################################################
  304. ### Map of total COVID-19 cases
  305. ################################################################################
  306. ### We want to add text with the number of infected in each county to that
  307. ### county on the map.   But we *don't* want to label counties that don't yet
  308. ### have any confirmed cases.
  309. totalinfected_label <- counties$totalinfected
  310. totalinfected_label[totalinfected_label == 0] <- ""
  311.  
  312. ### By default use black for the font, but if the value is over 1/2's of the way
  313. ### to the max, use the white font instead as it looks nicer
  314. frac <- 0.5 * max(counties$totalinfected)
  315. counties$textcolor <- if_else(counties$totalinfected >= frac, "white", "black")
  316.  
  317. ### Map:  COVID-19 confirmed infected per county
  318. p_map_totalinfected <-
  319.   ggplot(counties) +
  320.   theme_void() +
  321.   theme(legend.title = element_blank()) +
  322.   theme(plot.title = element_text(hjust = 0.5)) +
  323.   theme(legend.position = "none") +
  324.   geom_sf(data = counties$geometry, aes(fill = counties$totalinfected),
  325.           size = geom_thickness) +
  326.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  327.             color = counties$textcolor,
  328.             aes(x     = county_centers$x,
  329.                 y     = county_centers$y,
  330.                 label = totalinfected_label),
  331.             nudge_x = counties$nudge_x,
  332.             nudge_y = counties$nudge_y) +
  333.   labs(title = "Total Cases") +
  334.   scale_fill_gradientn(colours = map_palette)
  335.  
  336.  
  337. ################################################################################
  338. ### Map of new COVID-19 cases
  339. ################################################################################
  340. ### We want to add text with the number of infected in each county to that
  341. ### county on the map.   But we *don't* want to label counties that don't yet
  342. ### have any confirmed cases.
  343. newinfected_label <- counties$newinfected
  344. newinfected_label[newinfected_label <= 0] <- ""
  345.  
  346. ### By default use black for the font, but if the value is over 1/2's of the way
  347. ### to the max, use the white font instead as it looks nicer
  348. frac <- 0.5 * max(counties$newinfected)
  349. counties$textcolor <- if_else(counties$newinfected >= frac, "white", "black")
  350.  
  351. ### Map:  COVID-19 confirmed new infected per county
  352. p_map_newinfected <-
  353.   ggplot(counties) +
  354.   theme_void() +
  355.   theme(legend.title = element_blank()) +
  356.   theme(plot.title = element_text(hjust = 0.5)) +
  357.   theme(legend.position = "none") +
  358.   geom_sf(data = counties$geometry, aes(fill = counties$newinfected),
  359.           size = geom_thickness) +
  360.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  361.             color = counties$textcolor,
  362.             aes(x     = county_centers$x,
  363.                 y     = county_centers$y,
  364.                 label = newinfected_label),
  365.             nudge_x = counties$nudge_x,
  366.             nudge_y = counties$nudge_y) +
  367.   labs(title = "New Cases") +
  368.   scale_fill_gradientn(colours = map_palette)
  369.  
  370.  
  371. ################################################################################
  372. ### Map of deaths by county
  373. ################################################################################
  374. total_deaths_label <- counties$totaldeaths
  375. total_deaths_label[total_deaths_label == 0] <- ""
  376.  
  377. ### By default use black for the font, but if the value is over 1/2's of the way
  378. ### to the max, use the white font instead as it looks nicer
  379. frac <- 0.5 * max(counties$totaldeaths)
  380. counties$textcolor <- if_else(counties$totaldeaths >= frac, "white", "black")
  381.  
  382. p_map_totaldeaths <- ggplot(counties) +
  383.   theme_void() +
  384.   theme(legend.title = element_blank()) +
  385.   theme(plot.title = element_text(hjust = 0.5)) +
  386.   theme(legend.position = "none") +
  387.   geom_sf(data = counties$geometry, size = geom_thickness,
  388.           aes(fill = counties$totaldeaths)) +
  389.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  390.             color = counties$textcolor,
  391.             aes(x     = county_centers$x,
  392.                 y     = county_centers$y,
  393.                 label = total_deaths_label),
  394.             nudge_x = counties$nudge_x,
  395.             nudge_y = counties$nudge_y) +
  396.   labs(title = "Total Deaths") +
  397.   scale_fill_gradientn(colours = map_palette)
  398.  
  399. new_deaths_label <- counties$newdeaths
  400. new_deaths_label[new_deaths_label == 0] <- ""
  401.  
  402. ### By default use black for the font, but if the value is over 1/2's of the way
  403. ### to the max, use the white font instead as it looks nicer
  404. frac <- 0.5 * max(counties$newdeaths)
  405. counties$textcolor <- if_else(counties$newdeaths > frac, "white", "black")
  406.  
  407. p_map_newdeaths <- ggplot(counties) +
  408.   theme_void() +
  409.   theme(legend.title = element_blank()) +
  410.   theme(plot.title = element_text(hjust = 0.5)) +
  411.   theme(legend.position = "none") +
  412.   geom_sf(data = counties$geometry, aes(fill = counties$newdeaths),
  413.           size = geom_thickness) +
  414.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  415.             color = counties$textcolor,
  416.             aes(x     = county_centers$x,
  417.                 y     = county_centers$y,
  418.                 label = new_deaths_label),
  419.             nudge_x = counties$nudge_x,
  420.             nudge_y = counties$nudge_y) +
  421.   labs(title = "New Deaths") +
  422.   scale_fill_gradientn(colours = map_palette)
  423.  
  424.  
  425. ################################################################################
  426. ### Map of deaths per 100k by county
  427. ################################################################################
  428. counties$deaths_per100k <- round(100000 * counties$totaldeaths /
  429.                                    counties$POP2018, 3)
  430. deaths100k_label <- round(counties$deaths_per100k, 1)
  431. deaths100k_label[deaths100k_label == 0] <- ""
  432.  
  433. ### By default use black for the font, but if the value is over 1/2's of the way
  434. ### to the max, use the white font instead as it looks nicer
  435. frac <- 0.5 * max(counties$deaths_per100k)
  436. counties$textcolor <- if_else(counties$deaths_per100k >= frac, "white", "black")
  437.  
  438. p_map_totaldeaths_100k <- ggplot(counties) +
  439.   theme_void() +
  440.   theme(legend.title = element_blank()) +
  441.   theme(plot.title = element_text(hjust = 0.5)) +
  442.   theme(legend.position = "none") +
  443.   geom_sf(data = counties$geometry, aes(fill = counties$deaths_per100k),
  444.           size = geom_thickness) +
  445.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  446.             color = counties$textcolor,
  447.             aes(x     = county_centers$x,
  448.                 y     = county_centers$y,
  449.                 label = deaths100k_label),
  450.             nudge_x = counties$nudge_x,
  451.             nudge_y = counties$nudge_y) +
  452.   labs(title = "Total Deaths per 100k") +
  453.   scale_fill_gradientn(colours = map_palette)
  454.  
  455.  
  456. ################################################################################
  457. ### Map of total COVID-19 cases per 100k
  458. ################################################################################
  459. counties$infected_percapita <-
  460.   round(100000 * counties$totalinfected / counties$POP2018, 0)
  461.  
  462. frac <- 0.5 * max(counties$infected_percapita)
  463. counties$textcolor <- if_else(counties$infected_percapita >= frac,
  464.                               "white", "black")
  465.  
  466. infected_label_per  <- round(counties$infected_percapita, 1)
  467. infected_label_per[infected_label_per == 0] <- ""
  468.  
  469. p_map_totalinfected_per <- ggplot(counties) +
  470.    theme_void() +
  471.    theme(plot.title = element_text(hjust = 0.5)) +
  472.    theme(plot.caption = element_text(hjust = 0.5)) +
  473.    theme(legend.title = element_blank()) +
  474.    theme(legend.position = "none") +
  475.    geom_sf(data = counties$geometry, size = geom_thickness,
  476.              aes(fill = counties$infected_percapita)) +
  477.    geom_text(data = counties, size = map_fontsize, fontface = "bold",
  478.              color = counties$textcolor,
  479.              aes(x     = county_centers$x,
  480.                  y     = county_centers$y,
  481.                  label = infected_label_per),
  482.                nudge_x = counties$nudge_x,
  483.                nudge_y = counties$nudge_y) +
  484.    labs(title = "Total Cases per 100k") +
  485.      scale_fill_gradientn(colours = map_palette)
  486.  
  487.  
  488. ################################################################################
  489. ### Map of people still sick (not recovered) by county
  490. ################################################################################
  491. frac <- 0.5 * max(counties$sick)
  492. counties$textcolor <- if_else(counties$sick >= frac, "white", "black")
  493.  
  494. sick_label_per  <- counties$sick
  495. sick_label_per[sick_label_per == 0] <- ""
  496.  
  497. p_map_sick <- ggplot(counties) +
  498.   theme_void() +
  499.   theme(plot.title = element_text(hjust = 0.5)) +
  500.   theme(plot.caption = element_text(hjust = 0.5)) +
  501.   theme(legend.title = element_blank()) +
  502.   theme(legend.position = "none") +
  503.   geom_sf(data = counties$geometry, size = geom_thickness,
  504.           aes(fill = counties$sick)) +
  505.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  506.             color = counties$textcolor,
  507.             aes(x     = county_centers$x,
  508.                 y     = county_centers$y,
  509.                 label = sick_label_per),
  510.             nudge_x = counties$nudge_x,
  511.             nudge_y = counties$nudge_y) +
  512.   labs(title = "Still Sick") +
  513.   scale_fill_gradientn(colours = map_palette)
  514.  
  515.  
  516. ################################################################################
  517. ### Map of people recovered by county
  518. ################################################################################
  519. frac <- 0.5 * max(counties$recovered)
  520. counties$textcolor <- if_else(counties$recovered >= frac, "white", "black")
  521.  
  522. recovered_label_per  <- counties$recovered
  523. recovered_label_per[recovered_label_per == 0] <- ""
  524.  
  525. p_map_recovered <- ggplot(counties) +
  526.   theme_void() +
  527.   theme(plot.title = element_text(hjust = 0.5)) +
  528.   theme(plot.caption = element_text(hjust = 0.5)) +
  529.   theme(legend.title = element_blank()) +
  530.   theme(legend.position = "none") +
  531.   geom_sf(data = counties$geometry, size = geom_thickness,
  532.           aes(fill = counties$recovered)) +
  533.   geom_text(data = counties, size = map_fontsize, fontface = "bold",
  534.             color = counties$textcolor,
  535.             aes(x     = county_centers$x,
  536.                 y     = county_centers$y,
  537.                 label = recovered_label_per),
  538.             nudge_x = counties$nudge_x,
  539.             nudge_y = counties$nudge_y) +
  540.   labs(title = "Recovered") +
  541.   scale_fill_gradientn(colours = map_palette)
  542.  
  543.  
  544. ################################################################################
  545. ### Math section for charts
  546. ################################################################################
  547. infected_data <-
  548.   infected_df %>%
  549.   arrange(Date) %>%
  550.   select("Date", all_of(graph_counties)) %>%
  551.   mutate(Total = rowSums(select(., !starts_with("Date")))) %>%
  552.   select(Date, Total) %>%
  553.   rename(totalinfected = Total) %>%
  554.   rename(date = Date) %>%
  555.   filter(totalinfected != 0) %>%
  556.   mutate(newinfected = c(totalinfected[1], diff(totalinfected))) %>%
  557.   mutate(numdays = difftime(as.POSIXct(date),
  558.                             as.POSIXct(as.Date(min(date)) - 1, tz = "UTC"),
  559.                             units = "days")) %>%
  560.   mutate(numdays = as.numeric(numdays))
  561.  
  562. hospital_data <-
  563.   hospital_df %>%
  564.   arrange(Date) %>%
  565.   mutate(numdays = difftime(as.POSIXct(Date),
  566.                             as.POSIXct(as.Date(min(Date)) - 1, tz = "UTC"),
  567.                             units = "days")) %>%
  568.   mutate(numdays = as.numeric(numdays)) %>%
  569.   rename(total_hospitalizations = Hospitalizations) %>%
  570.   mutate(new_hospitalizations = c(total_hospitalizations[1],
  571.                                   diff(total_hospitalizations)))
  572.  
  573. recovered_data <-
  574.   recover_df %>%
  575.   rename(Total = Recovered) %>%
  576.   select("Date", all_of(graph_counties)) %>%
  577.   mutate(Total = rowSums(select(., !starts_with("Date")))) %>%
  578.   select(Date, Total) %>%
  579.   rename(totalrecovered = Total) %>%
  580.   rename(date = Date) %>%
  581.   filter(totalrecovered != 0) %>%
  582.   arrange(date) %>%
  583.   mutate(newrecovered = c(totalrecovered[1], diff(totalrecovered))) %>%
  584.   mutate(numdays = difftime(as.POSIXct(date),
  585.                             as.POSIXct(as.Date(min(date)) - 1, tz = "UTC"),
  586.                             units = "days")) %>%
  587.   mutate(numdays = as.numeric(numdays))
  588.  
  589. deaths_data <-
  590.   deaths_df %>%
  591.   na.omit() %>%
  592.   arrange(Date) %>%
  593.   select("Date", all_of(graph_counties)) %>%
  594.   mutate(Total = rowSums(select(., !starts_with("Date")))) %>%
  595.   mutate(New = c(1, diff(Total))) %>%
  596.   select(Date, Total, New) %>%
  597.   filter(Total != 0) %>%
  598.   rename(totaldeaths = Total) %>%
  599.   rename(newdeaths = New) %>%
  600.   rename(date = Date) %>%
  601.   mutate(numdays = difftime(as.POSIXct(date),
  602.                             as.POSIXct(as.Date(min(date)) - 1, tz = "UTC"),
  603.                             units = "days")) %>%
  604.   mutate(numdays = as.numeric(numdays))
  605.  
  606. sick_data <-
  607.   infected_data %>%
  608.   left_join(recovered_data, by = "date") %>%
  609.   left_join(deaths_data,   by = "date") %>%
  610.   select(date, totalinfected, totaldeaths, totalrecovered) %>%
  611.   filter(date >= as.Date("2020-04-10")) %>%
  612.   filter(!is.na(totaldeaths)) %>%
  613.   filter(!is.na(totalrecovered)) %>%
  614.   filter(!is.na(totalinfected)) %>%
  615.   mutate(current_sick = totalinfected - totaldeaths - totalrecovered) %>%
  616.   select(date, current_sick)
  617.  
  618.  
  619. ################################################################################
  620. ### Graph:  COVID-19 Total Confirmed Cases
  621. ################################################################################
  622. ### Use non-linear fitting to do the exponential fit
  623. fit_totalinfected <-
  624.   nls(totalinfected ~ a * exp(b * numdays),
  625.       data = infected_data,
  626.       start = list(a = 334,
  627.                    b = 0.0677))
  628.  
  629. ### Extract the regression coefficents so we can add the regression formula
  630. ### to the graph
  631. intercept  <- coef(fit_totalinfected)[1]
  632. d_constant <- coef(fit_totalinfected)[2]
  633.  
  634. ### Add the fit to our dataframe
  635. infected_data <-
  636.   infected_data %>%
  637.   mutate(fit_totalinfected = intercept *
  638.            exp(d_constant * infected_data$numdays))
  639.  
  640. ### Compute the doubling time
  641. doubling_time <- log(2) / d_constant
  642.  
  643. ### Compute the "order-of-magnitude" time
  644. magnitude_time <- log(10) / d_constant
  645.  
  646. ### Growth rate
  647. growth_rate <- exp(log(2) / doubling_time) - 1
  648.  
  649. ### What's the current number of total infected cases
  650. total_num <-
  651.   infected_data %>%
  652.   tail(n = 1) %>%
  653.   select("totalinfected") %>%
  654.   pull()
  655.  
  656. ### What's the date of the last data point
  657. data_date <- infected_data  %>% select("date") %>% tail(n = 1) %>% pull()
  658.  
  659. p_total <- ggplot(data = infected_data) +
  660.   theme_classic() +
  661.   theme(axis.text.x = element_text(angle = 45)) +
  662.   theme(axis.text.x = element_text(vjust = 0.7)) +
  663.   theme(axis.text.x = element_text(hjust = 0.8)) +
  664.   theme(legend.title = element_blank()) +
  665.   theme(legend.position = "none") +
  666.   geom_point(data = infected_data, shape = 19, size = 0.5,
  667.              aes(x = as.Date(date), y = totalinfected), color = "black") +
  668.   geom_line(data = infected_data, color = graph_color,
  669.             aes(x = as.Date(date), y = fit_totalinfected)) +
  670.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  671.   graph_log10_opts1 +
  672.   graph_log10_opts2 +
  673.   labs(title = paste("Total Cases: ", total_num, sep = ""), x = "", y = "")
  674.  
  675.  
  676. ################################################################################
  677. ### Graph:  COVID-19 New Confirmed Cases
  678. ################################################################################
  679. ### If we assume infected = a * B^(c * days), then
  680. ###
  681. ###  d(infected)/dt = c * log_e(B) * infected
  682. ###
  683. ### We can use the same constants from the total fit on the new fit.   If new
  684. ### cases are still growing exponentially, it will work.   And when it doesn't
  685. ### work, that's a sign that we're either "flattening the curve", or something
  686. ### else weird is going on
  687.  
  688. ### Add the fit to our dataframe
  689. infected_data <-
  690.   infected_data %>%
  691.   mutate(fit_newinfected = fit_totalinfected * d_constant)
  692.  
  693. ### What's the current number of total infected cases
  694. new_num <- infected_data %>% tail(n = 1) %>% select("newinfected") %>% pull()
  695. SMA_num <- infected_data$newinfected %>% SMA(n = 7) %>% tail(n = 1) %>% round()
  696.  
  697. ### Graph:  new COVID-19 Cases
  698. p_new <- ggplot(data = infected_data) +
  699.   theme_classic() +
  700.   theme(axis.text.x = element_text(angle = 45)) +
  701.   theme(axis.text.x = element_text(vjust = 0.7)) +
  702.   theme(axis.text.x = element_text(hjust = 0.8)) +
  703.   theme(legend.title = element_blank()) +
  704.   theme(legend.position = "none") +
  705.   geom_point(data = infected_data, shape = 19, size = 0.5,
  706.              aes(x = as.Date(date), y = newinfected), color = "black") +
  707.   geom_line(data = infected_data, color = graph_color,
  708.             size = 1.0, aes(x = as.Date(date) - 3,
  709.                             y = SMA(newinfected, n = 7))) +
  710.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  711.   graph_log10_opts1 +
  712.   graph_log10_opts2 +
  713.   labs(title = paste("New Cases: ", new_num, ", SMA: ", SMA_num, sep = ""),
  714.        x = "", y = "")
  715.  
  716.  
  717. ################################################################################
  718. ### Graph:  COVID-19 Cases [ log(Total) vs log(New) ]
  719. ### Reference:  https://aatishb.com/covidtrends/
  720. ################################################################################
  721. p_total_new <- ggplot(data = infected_data,
  722.                       aes(x = totalinfected, y = newinfected)) +
  723.   theme_classic() +
  724.   theme(axis.text.x = element_text(angle = 45)) +
  725.   theme(axis.text.x = element_text(vjust = 0.7)) +
  726.   theme(axis.text.x = element_text(hjust = 0.8)) +
  727.   theme(legend.position = "none") +
  728.   geom_point(data = infected_data, shape = 19, size = 0.5,
  729.              aes(x = totalinfected, y = newinfected), color = "black") +
  730.   geom_smooth(method = "loess", formula = y ~ x,
  731.               se = FALSE, size = 0.5, color = graph_color) +
  732.   scale_x_log10(breaks = c(1, 10, 100, 1000)) +
  733.   scale_y_log10(breaks = c(1, 10, 100, 1000)) +
  734.   annotation_logticks() +
  735.   labs(title = "Log(New vs Total)", x = "", y = "")
  736.  
  737.  
  738. ################################################################################
  739. ### Graph:  COVID-19 Hospitalizations
  740. ################################################################################
  741. hospital_num <-
  742.   hospital_data %>%
  743.   arrange(Date) %>%
  744.   tail(n = 1) %>%
  745.   select("total_hospitalizations") %>%
  746.   pull()
  747.  
  748. p_total_hospital <- ggplot(data = hospital_data,
  749.                            aes(x = as.Date(Date), y = total_hospitalizations)) +
  750.   theme_classic() +
  751.   theme(axis.text.x = element_text(angle = 45)) +
  752.   theme(axis.text.x = element_text(vjust = 0.7)) +
  753.   theme(axis.text.x = element_text(hjust = 0.8)) +
  754.   theme(legend.title = element_blank()) +
  755.   theme(legend.position = "none") +
  756.   geom_bar(stat = "identity", fill = graph_color) +
  757.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  758.   labs(title = paste("TN Hospitalizations: ", hospital_num, sep = ""),
  759.        x = "", y = "")
  760.  
  761.  
  762. ################################################################################
  763. ### Graph:  COVID-19 Total Recovered
  764. ################################################################################\
  765. new_num <-
  766.   recovered_data %>%
  767.   tail(n = 1) %>%
  768.   select("totalrecovered") %>%
  769.   pull()
  770.  
  771. p_recover <- ggplot(data = recovered_data,
  772.                     aes(x = as.Date(date), y = totalrecovered)) +
  773.   theme_classic() +
  774.   theme(axis.text.x = element_text(angle = 45)) +
  775.   theme(axis.text.x = element_text(vjust = 0.7)) +
  776.   theme(axis.text.x = element_text(hjust = 0.8)) +
  777.   theme(legend.title = element_blank()) +
  778.   theme(legend.position = "none") +
  779.   geom_point(data = recovered_data, shape = 19, size = 0.5,
  780.              aes(x = as.Date(date), y = totalrecovered), color = "black") +
  781.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  782.   labs(title = paste("Total Recovered: ", new_num, sep = ""), x = "", y = "")
  783.  
  784.  
  785. ################################################################################
  786. ### Graph:  COVID-19 Total Infected by Age
  787. ################################################################################
  788. p_age_infected <- ggplot(data = age_df, aes(x = infected_statewide, y = age)) +
  789.   theme_classic() +
  790.   theme(axis.text.x = element_text(angle = 45)) +
  791.   theme(axis.text.x = element_text(vjust = 0.7)) +
  792.   theme(axis.text.x = element_text(hjust = 0.8)) +
  793.   geom_age_orientation +
  794.   geom_bar(stat = "identity", fill = graph_color) +
  795.   labs(title = "TN Infected by Age", x = "", y = "")
  796.  
  797.  
  798. ################################################################################
  799. ### Graph:  COVID-19 Total Infected by Age
  800. ################################################################################
  801. p_age_deaths <- ggplot(data = age_df, aes(x = deaths_statewide, y = age)) +
  802.   theme_classic() +
  803.   theme(axis.text.x = element_text(angle = 45)) +
  804.   theme(axis.text.x = element_text(vjust = 0.7)) +
  805.   theme(axis.text.x = element_text(hjust = 0.8)) +
  806.   geom_age_orientation +
  807.   geom_bar(stat = "identity", fill = graph_color) +
  808.   labs(title = "TN Deaths by Age", x = "", y = "")
  809.  
  810.  
  811. ################################################################################
  812. ### Graph:  COVID-19 Total/New Deaths
  813. ################################################################################
  814. ### Use non-linear fitting to do the exponential fit
  815. fit_totaldeaths <- nls(totaldeaths ~ a * exp(b * numdays),
  816.                   data = deaths_data, start = list(a = 12.51, b = 0.082))
  817.  
  818. ### Extract the regression coefficents so we can add the regression formula
  819. ### to the graph
  820. intercept  <- coef(fit_totaldeaths)[1]
  821. d_constant <- coef(fit_totaldeaths)[2]
  822.  
  823. ### Add fit for total/new deaths
  824. deaths_data <-
  825.   deaths_data %>%
  826.   mutate(fit_totaldeaths = intercept * exp(d_constant * numdays)) %>%
  827.   mutate(fit_newdeaths   = fit_totaldeaths * d_constant)
  828.  
  829. p_totaldeaths <- ggplot(data = deaths_data) +
  830.   theme_classic() +
  831.   theme(axis.text.x = element_text(angle = 45)) +
  832.   theme(axis.text.x = element_text(vjust = 0.7)) +
  833.   theme(axis.text.x = element_text(hjust = 0.8)) +
  834.   theme(legend.title = element_blank()) +
  835.   theme(legend.position = "none") +
  836.   geom_point(shape = 19, size = 0.5,
  837.              aes(x = as.Date(date), y = totaldeaths), color = "black") +
  838.   geom_line(color = graph_color,
  839.             aes(x = as.Date(date), y = fit_totaldeaths)) +
  840.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  841.   graph_log10_opts1 +
  842.   graph_log10_opts2 +
  843.   labs(title = paste("Total Deaths: ", max(deaths_data$totaldeaths),  sep = ""),
  844.        x = "", y = "")
  845.  
  846. p_newdeaths <- ggplot(data = deaths_data) +
  847.   theme_classic() +
  848.   theme(axis.text.x = element_text(angle = 45)) +
  849.   theme(axis.text.x = element_text(vjust = 0.7)) +
  850.   theme(axis.text.x = element_text(hjust = 0.8)) +
  851.   theme(legend.title = element_blank()) +
  852.   theme(legend.position = "none") +
  853.   geom_point(shape = 19, size = 0.5,
  854.              aes(x = as.Date(date), y = newdeaths), color = "black") +
  855.   geom_line(color = graph_color,
  856.             aes(x = as.Date(date), y = fit_newdeaths)) +
  857.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  858.   graph_log10_opts1 +
  859.   graph_log10_opts2 +
  860.   labs(title = paste("New Deaths: ", max(deaths_data$newdeaths),  sep = ""),
  861.        x = "", y = "")
  862.  
  863.  
  864. ################################################################################
  865. ### Graph:  COVID-19 Current Sick
  866. ################################################################################
  867. new_num <- sick_data %>% tail(n = 1) %>% select("current_sick") %>% pull()
  868.  
  869. p_sick <- ggplot(data = sick_data, aes(x = as.Date(date), y = current_sick)) +
  870.   theme_classic() +
  871.   theme(axis.text.x = element_text(angle = 45)) +
  872.   theme(axis.text.x = element_text(vjust = 0.7)) +
  873.   theme(axis.text.x = element_text(hjust = 0.8)) +
  874.   theme(legend.title = element_blank()) +
  875.   theme(legend.position = "none") +
  876.   geom_point(data = sick_data, shape = 19, size = 0.5,
  877.              aes(x = as.Date(date), y = current_sick), color = "black") +
  878.   scale_x_date(date_breaks = "3 days", date_labels = "%m/%d") +
  879.   labs(title = paste("Currently Sick: ", new_num, sep = ""), x = "", y = "")
  880.  
  881.  
  882. ################################################################################
  883. ### Arrange the graphs into the final product
  884. ################################################################################
  885. # What's the date of the last data point
  886. data_date <- infected_df %>% arrange(Date) %>% tail(n = 1) %>% pull("Date")
  887.  
  888. # Title
  889. title_string <- paste("COVID-19 Confirmed Cases in ", location,
  890.                       " [", as.Date(data_date), "]", sep = "")
  891.  
  892. # Footer
  893. footer_string <- paste("Source:  Tennessee Department of Health\n",
  894.                        "https://www.tn.gov/health/cedep/ncov.html", sep = "")
  895.  
  896. grob_charts <-
  897.   plot_grid(p_total,        p_new,        p_totaldeaths,    p_recover,   p_sick,
  898.             p_age_infected, p_age_deaths, p_total_hospital, p_total_new, NA,
  899.             ncol = 5, nrow = 2, align = "hv")
  900.  
  901. grob_maps <-
  902.   plot_grid(p_map_recovered,    p_map_totalinfected,
  903.             p_map_sick,         p_map_totalinfected_per,
  904.             p_map_totaldeaths,  p_map_newinfected,
  905.             ncol = 2, nrow = 3, align = "hv")
  906.  
  907. charts <- plot_grid(grob_maps,
  908.                     grob_charts,
  909.                     ncol = 1, nrow = 2,
  910.                     rel_widths  = c(1.2, 1),
  911.                     rel_heights = c(1, 1))
  912.  
  913. title <- ggdraw() + draw_label(title_string, fontface = "bold")
  914. Sys.sleep(2)
  915.  
  916. footer <- ggdraw() + draw_label(footer_string, size = 10)
  917. Sys.sleep(2)
  918.  
  919. final <- plot_grid(title, charts, footer, nrow = 3,
  920.                    rel_heights = c(0.05, 1, 0.05))
  921. Sys.sleep(2)
  922.  
  923. print(final)
  924.  
  925. ### Save image to file and we're done!
  926. scale <- 1.3
  927. width <- round(1300 * scale)
  928. height <- round(900 * scale)
  929. png("TN_COVID_Viridis.png", width = width, height = height)
  930. plot(final)
  931. dev.off()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement