Advertisement
bdill

coronavirus_johns_hopkins_2020-03-16.R

Mar 16th, 2020
848
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 6.45 KB | None | 0 0
  1.  
  2. #----- Johns Hopkins CSSE data repository -----
  3. # https://pastebin.com/g8BTSmwN
  4. # https://github.com/CSSEGISandData/COVID-19
  5. # https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports
  6. # https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-15-2020.csv
  7. # https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
  8. # https://datahub.io/JohnSnowLabs/population-figures-by-country
  9. #library(data.table)
  10. library(tidyverse)
  11. library(lubridate)
  12. setwd("C:/Users/brian.dill/Downloads/")
  13. setwd("C:/Users/bdill/Downloads/")
  14.  
  15. #----- Read in data -----
  16. jhconfirmed <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv")
  17. jhdeaths <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv")
  18. jhrecovered <- read.csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv")
  19. jhcountries <- jhconfirmed %>% select(Province.State, Country.Region, Lat, Long)
  20. names(jhcountries) <- c("Province", "Country", "Lat", "Long")
  21.  
  22. #----- data wrangle -----
  23. jhconfirmed2 <- select(jhconfirmed, -Lat, -Long) %>%
  24.   gather(key = "date", value = "confirmed", -Province.State, -Country.Region)
  25. jhconfirmed2$date <- str_replace(jhconfirmed2$date, "X", "")
  26. jhconfirmed2$date <- mdy(jhconfirmed2$date)
  27. names(jhconfirmed2) <- c("Province", "Country", "Date", "Confirmed")
  28.  
  29. jhdeaths2 <- select(jhdeaths, -Lat, -Long) %>%
  30.   gather(key = "date", value = "deaths", -Province.State, -Country.Region)
  31. jhdeaths2$date <- str_replace(jhdeaths2$date, "X", "")
  32. jhdeaths2$date <- mdy(jhdeaths2$date)
  33. names(jhdeaths2) <- c("Province", "Country", "Date", "Deaths")
  34.  
  35. jhrecovered2 <- select(jhrecovered, -Lat, -Long) %>%
  36.   gather(key = "date", value = "recovered", -Province.State, -Country.Region)
  37. jhrecovered2$date <- str_replace(jhrecovered2$date, "X", "")
  38. jhrecovered2$date <- mdy(jhrecovered2$date)
  39. names(jhrecovered2) <- c("Province", "Country", "Date", "Recovered")
  40. # head(jhrecovered2)
  41.  
  42. jh <- inner_join(jhconfirmed2, jhdeaths2) %>%
  43.   inner_join(jhrecovered2) %>%
  44.   arrange(Country, Province, desc(Date))
  45.  
  46. head(jh)
  47. jh_gis <- inner_join(jh, jhcountries)
  48.  
  49. jh_country <- jh %>%
  50.   group_by(Country, Date) %>%
  51.   summarize(Confirmed = sum(Confirmed),
  52.             Deaths= sum(Deaths),
  53.             Recovered = sum(Recovered)) %>%
  54.   arrange(Country, desc(Date))
  55.  
  56. #----- Save csv files -----
  57. write_csv(jh, path = paste0(getwd(), "/covid19_jh_timeseries.csv"))
  58. write_csv(jh_country, path = paste0(getwd(), "/covid19_jh_country_timeseries.csv"))
  59. write_csv(jhcountries, path = paste0(getwd(), "/covid19_jh_country_lat_long.csv"))
  60. write_csv(jh_gis, path = paste0(getwd(), "/covid19_jh_timeseris_with_latlong.csv"))
  61.  
  62.  
  63. #----- graphs -----
  64.  
  65. #----- China by province -----
  66. jh %>%
  67.   filter(Country %in% c("China")) %>%
  68.   ggplot(aes(x = Date, y = Confirmed, color = Province)) +
  69.   geom_line() +
  70.   labs(title = "Confirmed covid-19 cases by Province in China",
  71.        subtitle = "Data Repository by Johns Hopkins CSSE",
  72.        caption = "Source: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series")
  73.  
  74. ggsave(filename = paste0(getwd(), "/covid19_by_china_province.png"), width = 10, height = 6, dpi = 120)
  75.  
  76.  
  77. #----- Top countries -----
  78. jh_country %>%
  79.   filter(Country %in% c("Italy", "Iran", "US", "Spain", "Germany", "China")) %>%
  80.   ggplot(aes(x = Date, y = Confirmed, color = Country)) +
  81.   geom_line() +
  82.   scale_y_log10(limits = c(1, 100000)) +  
  83.   labs(title = "Confirmed covid-19 cases by Country",
  84.        subtitle = "Data Repository by Johns Hopkins CSSE",
  85.        caption = "Source: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series")
  86. ggsave(filename = paste0(getwd(), "/covid19_cases_by_country.png"), width = 10, height = 6, dpi = 120)
  87.  
  88. jh_country %>%
  89.   filter(Country %in% c("Italy", "Iran", "US", "Spain", "Germany", "China")) %>%
  90.   ggplot(aes(x = Date, y = Deaths, color = Country)) +
  91.   geom_line() +
  92.   scale_y_log10(limits = c(10, 10000)) +  
  93.   labs(title = "Confirmed covid-19 Deaths by Country",
  94.        subtitle = "Data Repository by Johns Hopkins CSSE",
  95.        caption = "Source: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series")
  96. ggsave(filename = paste0(getwd(), "/covid19_deaths_by_country.png"), width = 10, height = 6, dpi = 120)
  97.  
  98. ggsave(filename = paste0(getwd(), "/covid19_by_country.png"), width = 10, height = 6, dpi = 120)
  99. jh_country %>% filter(Country %in% c("Italy", "Iran", "US", "Spain", "Germany", "China")) %>% arrange(desc(Date), Country)
  100.  
  101. #----- Country populations -----
  102.  
  103. country_pop <-read_csv("https://datahub.io/JohnSnowLabs/population-figures-by-country/r/population-figures-by-country-csv.csv")
  104. country_pop2 <- country_pop %>%
  105.   select(Country, pop = Year_2016) %>%
  106.   mutate(Country = case_when(Country == "United States" ~ "US",
  107.                              Country == "Iran, Islamic Rep." ~ "Iran",
  108.                              TRUE ~ Country))
  109.  
  110. #----- daily update v population -----
  111.  
  112. jh_daily <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-21-2020.csv")
  113.                      
  114. names(jh_daily) <- c("Province", "Country", "Date", "Confirmed", "Deaths", "Recovered", "Lat", "Long")
  115.  
  116. countries_of_interest <- c("US", "Italy", "China", "Canada", "Iran", "United Kingdom", "Germany", "France", "Spain", "Switzerland")
  117.  
  118. jh_daily %>%
  119.   #filter(Country %in% countries_of_interest ) %>%
  120.   group_by(Country) %>%
  121.   summarise(Confirmed = sum(Confirmed), Deaths = sum(Deaths)) %>%
  122.   inner_join(country_pop2) %>%
  123.   filter(pop > 1000000) %>%
  124.   mutate(PopMillions = round(pop / 1000000, digits = 1),
  125.          ConfirmedPerMill = round(Confirmed / PopMillions, digits = 1),
  126.          DeathsPerMill = round(Deaths / PopMillions, digits = 1),
  127.          MortalityRate = round((Deaths / Confirmed)*100, digits = 1)) %>%
  128.   select(Country, PopMillions, Confirmed, ConfirmedPerMill, Deaths, DeathsPerMill, MortalityRate) %>%
  129.   arrange(desc(ConfirmedPerMill)) %>%
  130.   top_n(50) %>%
  131.   write_csv(path = paste0(getwd(), "/covid19_high_confirmed_per_pop.csv"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement