Advertisement
binkleym

Cheatham County Unemployment

Dec 20th, 2019
726
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 7.65 KB | None | 0 0
  1. ### R Script to analyze Cheatham County Unemployment
  2. ###  - by <Mathew.Binkley@Vanderbilt.edu>
  3.  
  4. ### You need to get an API key to pull data from FRED.  
  5. ### You may request an API key at:
  6. ### https://research.stlouisfed.org/useraccount/apikeys
  7. api_key_fred = "INSERT_YOUR_FRED_API_KEY_HERE"
  8.  
  9. ### We need the following packages for this example.
  10. packages <- c("fredr", "lubridate", "fredr", "ggplot2", "forecast",
  11.               "ggthemes", "tsibble", "dplyr", "magrittr", "broom", "ggfortify")
  12.  
  13. ### Install packages if needed, then load them quietly
  14. new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
  15. if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
  16. invisible(lapply(packages, "library",
  17.                  quietly = TRUE,
  18.                  character.only = TRUE,
  19.                  warn.conflicts = FALSE))
  20.  
  21. ### Set FRED API Keys
  22. fredr_set_key(api_key_fred)
  23.  
  24. # I like the "Economist" ggplot theme, so use it by default.
  25. # theme_set(theme_economist() + scale_colour_economist())
  26. theme_set(theme_economist())
  27.  
  28. ####################################################################
  29. ### Add recession bars to ggplot graphs
  30. ####################################################################
  31.  
  32. geom_recession_bars <- function (date_df_start, date_df_end) {
  33.  
  34.   recessions_df = read.table(textConnection(
  35.     "peak, trough
  36.   1857-06-01, 1858-12-01
  37.   1860-10-01, 1861-06-01
  38.   1865-04-01, 1867-12-01
  39.   1869-06-01, 1870-12-01
  40.   1873-10-01, 1879-03-01
  41.   1882-03-01, 1885-05-01
  42.   1887-03-01, 1888-04-01
  43.   1890-07-01, 1891-05-01
  44.   1893-01-01, 1894-06-01
  45.   1895-12-01, 1897-06-01
  46.   1899-06-01, 1900-12-01
  47.   1902-09-01, 1904-08-01
  48.   1907-05-01, 1908-06-01
  49.   1910-01-01, 1912-01-01
  50.   1913-01-01, 1914-12-01
  51.   1918-08-01, 1919-03-01
  52.   1920-01-01, 1921-07-01
  53.   1923-05-01, 1924-07-01
  54.   1926-10-01, 1927-11-01
  55.   1929-08-01, 1933-03-01
  56.   1937-05-01, 1938-06-01
  57.   1945-02-01, 1945-10-01
  58.   1948-11-01, 1949-10-01
  59.   1953-07-01, 1954-05-01
  60.   1957-08-01, 1958-04-01
  61.   1960-04-01, 1961-02-01
  62.   1969-12-01, 1970-11-01
  63.   1973-11-01, 1975-03-01
  64.   1980-01-01, 1980-07-01
  65.   1981-07-01, 1982-11-01
  66.   1990-07-01, 1991-03-01
  67.   2001-03-01, 2001-11-01
  68.   2007-12-01, 2009-06-01"),
  69.     sep = ',',
  70.     colClasses = c('Date', 'Date'),
  71.     header = TRUE)
  72.  
  73.    recessions_trim <- subset(recessions_df, peak >= min(date_df_start) &
  74.                               trough <= max(date_df_end))
  75.  
  76.   if (nrow(recessions_trim) > 0) {
  77.     recession_bars = geom_rect(data = recessions_trim, inherit.aes = F,
  78.                                fill = "darkgray", alpha = 0.25,
  79.                                aes(xmin = peak, xmax = trough, ymin = -Inf, ymax = +Inf))
  80.   } else {
  81.     recession_bars = geom_blank()
  82.   }
  83. }
  84.  
  85.  
  86. ####################################################################
  87. ### Fetch unemployment data from FRED
  88. ####################################################################
  89.  
  90. ### Load unemployment from FRED
  91.  
  92. #target <- c("UNRATE",     "US")
  93. #target <- c("TNUR",       "Tennessee")
  94. target <- c("TNCHEA1URN", "Cheatham County")
  95.  
  96. series <- target[1]
  97. name   <- target[2]
  98.  
  99. data <- as_tsibble(fredr(series_id = series, frequency = "m"), index = "date")
  100.  
  101. date   <- data %>% pull("date")
  102. values <- data %>% pull("value")
  103.  
  104. # Decompose the data into trend/seasonal/remainder
  105. values_ts         <- ts(values, frequency = 12)
  106.  
  107. # Use stl() decomposition
  108. values_decomposed <- stl(values_ts, s.window = "periodic",
  109.                          t.window = 13, robust = TRUE)
  110. seasonal  <- values_decomposed$time.series[, 1]
  111. trend     <- values_decomposed$time.series[, 2]
  112. remainder <- values_decomposed$time.series[, 3]
  113.  
  114. data_df <- data.frame(date = date, values = values, trend = trend,
  115.                       seasonal = seasonal, remainder = remainder)
  116.  
  117. ####################################################################
  118. ### Graph 1:  stl() decomposition of unemployment data
  119. ####################################################################
  120. p1 = autoplot(values_decomposed, range.bars = TRUE) +
  121.     ggtitle("Cheatham County Unemployment Decomposition") +
  122.     xlab("Year")
  123.  
  124. print(p1)
  125.  
  126. readline(prompt = "Press [ENTER] to continue...")
  127.  
  128. ###################################################################
  129. ### Graph 2:  Closeup on last 3 years of unemployment with trend
  130. ###################################################################
  131. date_start <- as.Date("2017-01-01")
  132. date_end   <- as.Date(now())
  133.  
  134. data_df_subset_a <- subset(data_df, date >= date_start)
  135.  
  136. c1 <- paste("U.S. Bureau of Labor Statistics, Unemployment Rate in ",
  137.             name, " [", series, "]\n", sep = "")
  138. c2 <- "Retrieved from FRED, Federal Reserve Bank of St. Louis;\n"
  139. c3 <- paste("https://fred.stlouisfed.org/series/", series, "\n")
  140. c4 <- paste("Data retrieved on", format(Sys.time(), "%B %d, %Y at %I:%M %p %Z"))
  141.  
  142. title    <- paste(name, "Trend Unemployment Rate\nbased on unemployment data from",
  143.                   month.name[month(last(date))], year(last(date)))
  144. subtitle <- "Trend in red, seasonal in blue, raw data in grey\nRecessions marked with vertical bars"
  145. xlab     <- "Year"
  146. ylab     <- "Unemployment Rate"
  147. caption  <- paste(c1, c2, c3, c4)
  148.  
  149. p2 <- ggplot(data = data_df_subset_a, aes(x = date, y = values)) +
  150.       geom_point(color = "grey60", size = 3, shape = 1) +
  151.       geom_line(data = data_df_subset_a, aes(x = date, y = values),
  152.                 size = 1.3, color = "grey", linetype = "dotted") +
  153.       geom_line(data = data_df_subset_a, aes(x = date, y = trend),
  154.                 size = 1.3, color = "red3") +
  155.       geom_line(data = data_df_subset_a, aes(x = date, y = mean(trend) + seasonal),
  156.                 size = 1.3, color = "steelblue2", alpha = 0.4) +
  157.       geom_recession_bars(min(data_df_subset_a$date), max(data_df_subset_a$date)) +
  158.       labs(title = title, subtitle = subtitle, caption = caption,
  159.            x = xlab, y = ylab)
  160. print(p2)
  161.  
  162. readline(prompt = "Press [ENTER] to continue...")
  163.  
  164. #####################################################################
  165. ### Graph 3:  Unemployment Rate Forecast
  166. #####################################################################
  167. s1 <- "Raw unemployment data in black, seasonal unemployment in green\n"
  168. s2 <- "Forecast with 1σ confidence bands in blue\n"
  169. s3 <- "Red dotted line shows current month's value"
  170.  
  171. title    <- paste(name, "Unemployment Six Month Forecast\nbased on unemployment data from",
  172.                   month.name[month(last(date))], year(last(date)))
  173. subtitle <- paste(s1, s2, s3)
  174. xlab     <- "Year"
  175. ylab     <- "Unemployment Rate"
  176. caption  <- paste(c1, c2, c3, c4)
  177.  
  178. fc_start_date = as.Date("2017-01-01")
  179. data_df_subset_b <- subset(data_df, date >= fc_start_date)
  180.  
  181. data_ts <- ts(data_df_subset_b$values, freq = 12,
  182.               start = c(year(fc_start_date), month(fc_start_date)))
  183.  
  184. forecast <- data_ts %>%
  185.             stl(s.window = "periodic") %>%
  186.             forecast(h = 6, level = c(68.27, 95.45))
  187.  
  188. p3 <- autoplot(forecast, size = 1.3, predict.size = 1.3,
  189.                conf.int.fill = '#0000FF') +
  190.                ggtitle(title, subtitle = subtitle) +
  191.                labs(caption = caption) +
  192.                xlab(xlab) +
  193.                ylab(ylab) +
  194.       geom_line(data = data_df_subset_a,
  195.                 aes(x = date, y = 1.5*mean(trend) + seasonal),
  196.                 size = 1.3, color = "darkolivegreen4", alpha = 0.4) +
  197.       geom_vline(xintercept = as.Date("2020-01-01"), alpha = 0.4) +
  198.       geom_hline(yintercept = tail(data_ts, n = 1)[1],
  199.                  size = 1, linetype = "dotted", color = "red") +
  200.       geom_recession_bars(min(data_df_subset_b$date),
  201.                           max(data_df_subset_b$date))
  202. print(p3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement