Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ### R Script to analyze Cheatham County Unemployment
- ### - by <Mathew.Binkley@Vanderbilt.edu>
- ### You need to get an API key to pull data from FRED.
- ### You may request an API key at:
- ### https://research.stlouisfed.org/useraccount/apikeys
- api_key_fred = "INSERT_YOUR_FRED_API_KEY_HERE"
- ### We need the following packages for this example.
- packages <- c("fredr", "lubridate", "fredr", "ggplot2", "forecast",
- "ggthemes", "tsibble", "dplyr", "magrittr", "broom", "ggfortify")
- ### Install packages if needed, then load them quietly
- new_packages <- packages[!(packages %in% installed.packages()[, "Package"])]
- if (length(new_packages)) install.packages(new_packages, quiet = TRUE)
- invisible(lapply(packages, "library",
- quietly = TRUE,
- character.only = TRUE,
- warn.conflicts = FALSE))
- ### Set FRED API Keys
- fredr_set_key(api_key_fred)
- # I like the "Economist" ggplot theme, so use it by default.
- # theme_set(theme_economist() + scale_colour_economist())
- theme_set(theme_economist())
- ####################################################################
- ### Add recession bars to ggplot graphs
- ####################################################################
- geom_recession_bars <- function (date_df_start, date_df_end) {
- recessions_df = read.table(textConnection(
- "peak, trough
- 1857-06-01, 1858-12-01
- 1860-10-01, 1861-06-01
- 1865-04-01, 1867-12-01
- 1869-06-01, 1870-12-01
- 1873-10-01, 1879-03-01
- 1882-03-01, 1885-05-01
- 1887-03-01, 1888-04-01
- 1890-07-01, 1891-05-01
- 1893-01-01, 1894-06-01
- 1895-12-01, 1897-06-01
- 1899-06-01, 1900-12-01
- 1902-09-01, 1904-08-01
- 1907-05-01, 1908-06-01
- 1910-01-01, 1912-01-01
- 1913-01-01, 1914-12-01
- 1918-08-01, 1919-03-01
- 1920-01-01, 1921-07-01
- 1923-05-01, 1924-07-01
- 1926-10-01, 1927-11-01
- 1929-08-01, 1933-03-01
- 1937-05-01, 1938-06-01
- 1945-02-01, 1945-10-01
- 1948-11-01, 1949-10-01
- 1953-07-01, 1954-05-01
- 1957-08-01, 1958-04-01
- 1960-04-01, 1961-02-01
- 1969-12-01, 1970-11-01
- 1973-11-01, 1975-03-01
- 1980-01-01, 1980-07-01
- 1981-07-01, 1982-11-01
- 1990-07-01, 1991-03-01
- 2001-03-01, 2001-11-01
- 2007-12-01, 2009-06-01"),
- sep = ',',
- colClasses = c('Date', 'Date'),
- header = TRUE)
- recessions_trim <- subset(recessions_df, peak >= min(date_df_start) &
- trough <= max(date_df_end))
- if (nrow(recessions_trim) > 0) {
- recession_bars = geom_rect(data = recessions_trim, inherit.aes = F,
- fill = "darkgray", alpha = 0.25,
- aes(xmin = peak, xmax = trough, ymin = -Inf, ymax = +Inf))
- } else {
- recession_bars = geom_blank()
- }
- }
- ####################################################################
- ### Fetch unemployment data from FRED
- ####################################################################
- ### Load unemployment from FRED
- #target <- c("UNRATE", "US")
- #target <- c("TNUR", "Tennessee")
- target <- c("TNCHEA1URN", "Cheatham County")
- series <- target[1]
- name <- target[2]
- data <- as_tsibble(fredr(series_id = series, frequency = "m"), index = "date")
- date <- data %>% pull("date")
- values <- data %>% pull("value")
- # Decompose the data into trend/seasonal/remainder
- values_ts <- ts(values, frequency = 12)
- # Use stl() decomposition
- values_decomposed <- stl(values_ts, s.window = "periodic",
- t.window = 13, robust = TRUE)
- seasonal <- values_decomposed$time.series[, 1]
- trend <- values_decomposed$time.series[, 2]
- remainder <- values_decomposed$time.series[, 3]
- data_df <- data.frame(date = date, values = values, trend = trend,
- seasonal = seasonal, remainder = remainder)
- ####################################################################
- ### Graph 1: stl() decomposition of unemployment data
- ####################################################################
- p1 = autoplot(values_decomposed, range.bars = TRUE) +
- ggtitle("Cheatham County Unemployment Decomposition") +
- xlab("Year")
- print(p1)
- readline(prompt = "Press [ENTER] to continue...")
- ###################################################################
- ### Graph 2: Closeup on last 3 years of unemployment with trend
- ###################################################################
- date_start <- as.Date("2017-01-01")
- date_end <- as.Date(now())
- data_df_subset_a <- subset(data_df, date >= date_start)
- c1 <- paste("U.S. Bureau of Labor Statistics, Unemployment Rate in ",
- name, " [", series, "]\n", sep = "")
- c2 <- "Retrieved from FRED, Federal Reserve Bank of St. Louis;\n"
- c3 <- paste("https://fred.stlouisfed.org/series/", series, "\n")
- c4 <- paste("Data retrieved on", format(Sys.time(), "%B %d, %Y at %I:%M %p %Z"))
- title <- paste(name, "Trend Unemployment Rate\nbased on unemployment data from",
- month.name[month(last(date))], year(last(date)))
- subtitle <- "Trend in red, seasonal in blue, raw data in grey\nRecessions marked with vertical bars"
- xlab <- "Year"
- ylab <- "Unemployment Rate"
- caption <- paste(c1, c2, c3, c4)
- p2 <- ggplot(data = data_df_subset_a, aes(x = date, y = values)) +
- geom_point(color = "grey60", size = 3, shape = 1) +
- geom_line(data = data_df_subset_a, aes(x = date, y = values),
- size = 1.3, color = "grey", linetype = "dotted") +
- geom_line(data = data_df_subset_a, aes(x = date, y = trend),
- size = 1.3, color = "red3") +
- geom_line(data = data_df_subset_a, aes(x = date, y = mean(trend) + seasonal),
- size = 1.3, color = "steelblue2", alpha = 0.4) +
- geom_recession_bars(min(data_df_subset_a$date), max(data_df_subset_a$date)) +
- labs(title = title, subtitle = subtitle, caption = caption,
- x = xlab, y = ylab)
- print(p2)
- readline(prompt = "Press [ENTER] to continue...")
- #####################################################################
- ### Graph 3: Unemployment Rate Forecast
- #####################################################################
- s1 <- "Raw unemployment data in black, seasonal unemployment in green\n"
- s2 <- "Forecast with 1σ confidence bands in blue\n"
- s3 <- "Red dotted line shows current month's value"
- title <- paste(name, "Unemployment Six Month Forecast\nbased on unemployment data from",
- month.name[month(last(date))], year(last(date)))
- subtitle <- paste(s1, s2, s3)
- xlab <- "Year"
- ylab <- "Unemployment Rate"
- caption <- paste(c1, c2, c3, c4)
- fc_start_date = as.Date("2017-01-01")
- data_df_subset_b <- subset(data_df, date >= fc_start_date)
- data_ts <- ts(data_df_subset_b$values, freq = 12,
- start = c(year(fc_start_date), month(fc_start_date)))
- forecast <- data_ts %>%
- stl(s.window = "periodic") %>%
- forecast(h = 6, level = c(68.27, 95.45))
- p3 <- autoplot(forecast, size = 1.3, predict.size = 1.3,
- conf.int.fill = '#0000FF') +
- ggtitle(title, subtitle = subtitle) +
- labs(caption = caption) +
- xlab(xlab) +
- ylab(ylab) +
- geom_line(data = data_df_subset_a,
- aes(x = date, y = 1.5*mean(trend) + seasonal),
- size = 1.3, color = "darkolivegreen4", alpha = 0.4) +
- geom_vline(xintercept = as.Date("2020-01-01"), alpha = 0.4) +
- geom_hline(yintercept = tail(data_ts, n = 1)[1],
- size = 1, linetype = "dotted", color = "red") +
- geom_recession_bars(min(data_df_subset_b$date),
- max(data_df_subset_b$date))
- print(p3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement