Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ### Compute the ratio of patents granted per 100k US population, as a measure
- ### of innovation over time.
- library(tidyverse)
- library(rvest)
- library(scales)
- library(broom)
- ### US Population since 1900
- us_pop_pres <-
- "https://multpl.com/united-states-population/table/by-year" %>%
- read_html() %>%
- html_nodes("table") %>%
- .[[1]] %>%
- html_table() %>%
- filter(grepl("Jul", Date)) %>%
- rename(Year = Date) %>%
- mutate(
- Year = gsub("Jul 1, ", "", Year) %>% as.integer(),
- Value = gsub(" million", "", Value) %>% as.numeric(),
- Date = as.Date(paste(Year, "-01-01", sep = "")),
- Value = Value * 1000000
- ) %>%
- select(Date, Value)
- # US population before 1900 from Wikipedia
- # https://en.wikipedia.org/wiki/Demographic_history_of_the_United_States
- us_pop_hist <-
- tribble(
- ~year, ~Value,
- 1610, 350,
- 1620, 2302,
- 1630, 4646,
- 1640, 26634,
- 1650, 50368,
- 1660, 75058,
- 1670, 111935,
- 1680, 151507,
- 1690, 210372,
- 1700, 250888,
- 1710, 331711,
- 1720, 466185,
- 1730, 629445,
- 1740, 905563,
- 1750, 1170760,
- 1760, 1593625,
- 1770, 2148076,
- 1780, 2780369,
- 1790, 3929214,
- 1800, 5308483,
- 1810, 7239881,
- 1820, 9638453,
- 1830, 12866020,
- 1840, 17069453,
- 1850, 23191876,
- 1860, 31443321,
- 1870, 38558371,
- 1880, 50189209,
- 1890, 62979766
- ) %>%
- mutate(Date = as.Date(paste(year, "-01-01", sep = ""))) %>%
- select(Date, Value)
- ### Combine past/present history into one dataset
- us_pop <-
- us_pop_pres %>%
- bind_rows(us_pop_hist) %>%
- arrange(Date) %>%
- rename(date = Date, us_pop = Value)
- # Info on number of patents from US Patent & Trademark Office
- patents <-
- "https://www.uspto.gov/web/offices/ac/ido/oeip/taf/h_counts.htm" %>%
- read_html() %>%
- html_elements("table") %>%
- .[[3]] %>%
- html_table() %>%
- janitor::clean_names() %>%
- mutate_all(~sub(",", "", .)) %>%
- select(-notes) %>%
- rename(
- year = 1,
- utility_patent_applications = 2,
- design_patent_applications = 3,
- plant_patent_applications = 4,
- utility_patents = 5,
- design_patents = 6,
- plant_patents = 7,
- patents_to_foreign_residents = 8
- ) %>%
- mutate(
- year = ifelse(year == "1836 (c)", "1836", year),
- design_patents = ifelse(design_patents == "(b)", "", design_patents)
- ) %>%
- mutate_all(~sub("n/a", "", .)) %>%
- mutate_all(as.numeric) %>%
- replace(is.na(.), 0) %>%
- mutate(
- date = as.Date(paste(year, "-01-01", sep = "")),
- total_patents = utility_patents + design_patents + plant_patents
- ) %>%
- select(date, total_patents)
- # Combine patents and population into one table so we can compute the rate of
- # patents per 100k population
- combo <-
- us_pop %>%
- full_join(patents, by = "date") %>%
- filter(!is.na(total_patents), !is.na(us_pop)) %>%
- mutate(
- year = year(date),
- rate = 100000 * total_patents / us_pop)
- # Compute a linear fit of the data to detrend
- fit <- combo %>% lm(rate ~ year, data = .) %>% tidy()
- c0 <- fit %>% filter(term == "(Intercept)") %>% pull(estimate)
- c1 <- fit %>% filter(term == "year") %>% pull(estimate)
- combo <-
- combo %>%
- mutate(
- fit = c0 + c1 * year,
- detrended_rate = rate - fit
- )
- # Graph the raw patent per 100k rate, showing the trend line we will subtract
- g_patents <-
- ggplot(data = combo) +
- theme_bw() +
- geom_line(aes(x = date, y = rate)) +
- geom_line(aes(x = date, y = fit), color = "firebrick2") +
- scale_x_date(breaks = pretty_breaks(10)) +
- scale_y_continuous(breaks = pretty_breaks(10)) +
- labs(x = "", y = "Patent Rate per 100k population", title = "US Patent Rate per 100k population")
- print(g_patents)
- # Detrend and graph the z-score of the patent rate
- g_patents_detrend <-
- ggplot(data = combo) +
- theme_bw() +
- geom_line(aes(x = date, y = (detrended_rate - mean(detrended_rate))/sd(detrended_rate))) +
- geom_hline(yintercept = 0, linetype = "dashed", color = "steelblue2") +
- scale_x_date(breaks = pretty_breaks(10)) +
- scale_y_continuous(breaks = pretty_breaks(10)) +
- labs(x = "", y = "Z-Score",
- title = "Z-Score of Detrended Patent Rate per 100k population")
- print(g_patents_detrend)
- plot_grid(g_patents, g_patents_detrend, ncol = 1, align = "hv")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement