Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Load libraries we need...
- packages <- c("tidyverse", "dplyr", "ggplot2", "scales", "ggthemes")
- new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
- if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
- invisible(lapply(packages, "library", quietly = TRUE,
- character.only = TRUE, warn.conflicts = FALSE))
- # As our data starts on 2020-03-05, we want to make our reference date the
- # day before the first case was announced
- ref_date <- as.Date("2020-03-04", tz = "UTC")
- # A table with info on TN COVID cases
- # Data from: https://www.tn.gov/health/cedep/ncov.html
- tn_covid_df <- read.table(textConnection(
- "date, totalinfected, totaldeaths
- 2020-03-05, 1, 0
- 2020-03-06, 1, 0
- 2020-03-07, 1, 0
- 2020-03-08, 3, 0
- 2020-03-09, 4, 0
- 2020-03-10, 7, 0
- 2020-03-11, 9, 0
- 2020-03-12, 18, 0
- 2020-03-13, 26, 0
- 2020-03-14, 32, 0
- 2020-03-15, 39, 0
- 2020-03-16, 52, 0
- 2020-03-17, 73, 0
- 2020-03-18, 98, 0"),
- sep = ",",
- colClasses = c("Date", "integer", "integer"),
- header = TRUE)
- # Take the difference to get a vector of new daily cases.
- tn_covid_df$newinfected <- c(tn_covid_df$totalinfected[1],
- diff(tn_covid_df$totalinfected))
- tn_covid_df$newdeaths <- c(tn_covid_df$totaldeaths[1],
- diff(tn_covid_df$totaldeaths))
- # What base log do you want to use (2, e, 10, 3.14159, 42, or something else).
- # This allows you to change the base for the exponential formula in the graph.
- # So if you want to know a * 2^b, a' * 10^b', etc, just change the base here.
- log_base <- 2
- # Compute the number of days since our reference date and the given date
- tn_covid_df$numdays <- difftime(as.POSIXct(tn_covid_df$date),
- as.POSIXct(ref_date, tz = "UTC"),
- units = "days") %>%
- as.numeric()
- # Use non-linear fitting to do the exponential fit
- fit_totalinfected <- nls(totalinfected ~ I(a * log_base ^ (b * numdays)),
- data = tn_covid_df, start = list(a = 0.5, b = 0.5))
- summary(fit_totalinfected)
- # Extract the regression coefficents so we can add the regression formula
- # to the graph
- intercept <- coef(fit_totalinfected)[1]
- d_constant <- coef(fit_totalinfected)[2]
- # Add the fit to our dataframe
- tn_covid_df$fit_totalinfected <- intercept *
- log_base ^ (d_constant * tn_covid_df$numdays)
- # Compute the doubling time
- doubling_time <- log(2, log_base) / d_constant
- ################################################################################
- ### Compute "doomsday" - the day we run out of hospital beds
- ################################################################################
- # My estimate for the number of hospital beds, made by adding up the number of
- # beds each major hospital lists:
- # Vanderbilt University Medical - 1,019
- # TriStar Centennial - 741
- # St Thomas Midtown - 683
- # TriStar Skyline - 353
- # St Thomas West - 350
- # TriStar Southern Hills - 126
- # Nashville General - 150
- total_beds_nashville <- 3500
- # What fraction of those beds are currently empty?
- # Data from: https://www.statista.com/statistics/185904/hospital-occupancy-rate-in-the-us-since-2001/
- bed_vacancy_rate <- 0.341
- # What fraction of confirmed infected require hospitalization. This has been
- # ~20% in data from Wuhan, Italy, and NY State
- critical_fraction <- 0.2
- # We will run out of hospital beds once infected cases reaches this level
- critical_number <- total_beds_nashville * bed_vacancy_rate / critical_fraction
- # How many days until we exceed the critical number of cases beds (ie, run out)
- doomsdays <- ceiling(log(critical_number / 2^intercept, log_base) / d_constant)
- doomsdate <- ref_date + doomsdays
- ################################################################################
- ### Graph the number of total confirmed infected cases
- ################################################################################
- # Create a text string with the growth formula based on current regression
- caption <- paste("Number of Total Confirmed Cases = ",
- round(log_base^intercept, 3),
- " * ", round(log_base, 3), "^(",
- round(d_constant, 3),
- " * Days since 2020-03-04)\nDoubling time = ",
- round(doubling_time, 2),
- " days\nEstimated date of last available bed: ",
- doomsdate, sep = "")
- ylab <- "Total Confirmed Cases"
- xlab <- "Date"
- title <- "Total Confirmed COVID-19 cases in TN"
- subtitle <- caption
- p_total <- ggplot(data = tn_covid_df) +
- theme_classic() +
- theme(legend.title = element_blank()) +
- theme(legend.position = "none") +
- geom_point(data = tn_covid_df,
- aes(x = date, y = totalinfected), color = "black") +
- geom_line(data = tn_covid_df,
- aes(x = date, y = fit_totalinfected), color = "darkred") +
- scale_y_continuous(breaks = pretty_breaks()) +
- labs(title = title, subtitle = caption, x = xlab, y = ylab)
- print(p_total)
- readline(prompt = "Press [ENTER] to continue...")
- ################################################################################
- ### Graph the number of new confirmed infected cases
- ################################################################################
- ### If we assume infected = a * B^(c * days), then
- ###
- ### d(infected)/dt = c * log_e(B) * infected
- # Add the fit to our dataframe
- tn_covid_df$fit_newinfected <- tn_covid_df$fit * log(log_base) * d_constant
- # Create a text string with the growth formula based on current regression
- caption <- paste("Number of New Confirmed Cases = ",
- round(d_constant * log(log_base) * log_base^intercept, 3),
- " * ", round(log_base, 3), "^(",
- round(d_constant, 3),
- " * Days since 2020-03-04)", sep = "")
- # Graph the number of new confirmed infected cases
- ylab <- "New Confirmed Cases"
- xlab <- "Date"
- title <- "New Confirmed COVID-19 cases in TN"
- p_new <- ggplot(data = tn_covid_df) +
- theme_classic() +
- theme(legend.title = element_blank()) +
- theme(legend.position = "none") +
- geom_point(data = tn_covid_df,
- aes(x = date, y = newinfected), color = "black") +
- geom_line(data = tn_covid_df,
- aes(x = date, y = fit_newinfected), color = "darkred") +
- scale_y_continuous(breaks = pretty_breaks()) +
- labs(title = title, subtitle = caption, x = xlab, y = ylab)
- print(p_new)
- readline(prompt = "Press [ENTER] to continue...")
- ################################################################################
- ### Project expected confirmed cases forward 4 weeks (output to console window)
- ################################################################################
- for (day in seq(1, 28)) {
- this_date <- ref_date + day
- total_cases <- round(intercept * log_base ^ (d_constant * day), 0)
- new_cases <- round(log(log_base) * d_constant *
- intercept * log_base ^ (d_constant * day), 0)
- print(paste(this_date, " - ", total_cases, "-", new_cases))
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement