Nate Silver’s 538 US presidential election forecasts include 80% intervals for 51 locations (50 states + Washington DC). Here’s a reproducible notebook looking at how well 538 did this year. Click on “Code” to see the code used to perform each step of this analysis.

library(tidyverse)
library(jsonlite)
library(lubridate)
library(forcats)
library(plotly)

We start by loading the election results as displayed by the New York Times (scraped by Alex Gaynor). The results shown here are based on data downloaded on Nov 8, 2020.

# read the JSON file and extract the relevant information into a data frame
results <- jsonlite::read_json("results.json")
state_name <- results$data$races %>% map_chr("state_name")
electoral_votes <- results$data$races %>% map_dbl("electoral_votes")
biden <- results$data$races %>% 
  map_dbl(~ .x$candidates %>% 
        keep(~ .x$last_name == "Biden") %>% 
        map_dbl("percent"))
trump <- results$data$races %>% 
  map_dbl(~ .x$candidates %>% 
        keep(~ .x$last_name == "Trump") %>% 
        map_dbl("percent"))
actual <- tibble(state = state_name, 
                 biden = biden, 
                 trump = trump,
                 electoral_votes = electoral_votes)

We next get the 538 forecasts from their github. In particular, we are interested in what they predicted on November 3, 2020.

# extract the relevant information into a data frame
five38 <- read_csv("election-forecasts-2020/presidential_state_toplines_2020.csv")
five38 <- five38 %>% 
  mutate(date = mdy(modeldate))
forecasts <- five38 %>% 
  filter(date == "2020-11-03") %>% 
  transmute(state, 
            trump = voteshare_inc,
            trump_lo = voteshare_inc_lo,
            trump_hi = voteshare_inc_hi,
            biden = voteshare_chal,
            biden_lo = voteshare_chal_lo,
            biden_hi = voteshare_chal_hi,
            biden_win_prob = winstate_chal)
# reorder the states by actual % Biden and join the two data frames
# and add some relevant columns
both <- actual %>% 
  left_join(forecasts, by = "state", suffix = c("_actual",
                                                "_538")) %>% 
  mutate(state = fct_reorder(state, biden_actual),
         cover80 = biden_actual <= biden_hi & biden_actual >= biden_lo,
                        biden_win = biden_actual > trump_actual,
                        pred_biden_win = biden_538 > trump_538)

This plot shows the actual percentage voting for Biden (in black) for each location along with 538’s 80% interval.

both %>% 
  ggplot(aes(x = state,
             y = biden_538,
             ymin = biden_lo,
             ymax = biden_hi,
             color = cover80)) +
  geom_errorbar() +
  geom_point(data = both, aes(x = state, y = biden_actual), color = "black") + 
  theme(legend.position = "none") +
  labs(x = "State", y = "Percent Voting Biden", title = "How Well Did 538 Do?") +
  coord_flip() 

# Coverage:
coverage <- round(100*binom.test(sum(both$cover80), 51)$conf.int)

# Absolute error:
ae <- both %>%
  summarize(ae = mean(abs(biden_actual - biden_538))) %>% 
  pull(ae)

# Number of wrong calls:
confusion_matrix <- both %>% 
  transmute(biden_win_actual = biden_actual > trump_actual,
            biden_win_538 = biden_538 > trump_538) %>% 
  table()

Some observations

lims <- c(25, 95)
g <- both %>% 
  mutate(correct_forecast = biden_win == pred_biden_win) %>% 
  ggplot(aes(x = biden_actual,
             y = biden_538,
             size = electoral_votes,
             fill = correct_forecast,
             label = state_name)) +
  geom_point(color = "black", pch = 21) + 
  geom_abline(slope = 1, intercept = 0) +
  labs(x = "Actual % Voting for Biden",
       y = "Predicted % Voting for Biden") +
  theme(legend.position = "none") +
  xlim(lims) +
  ylim(lims)
ggplotly(g, tooltip = c("state_name", 
                        "biden_actual",
                        "biden_538",
                        "electoral_votes"))
both %>% filter(biden_actual > biden_538) %>% 
  select(state, biden_actual, biden_538) %>% 
  knitr::kable()
state biden_actual biden_538
California 65.1 64.06170
Colorado 55.3 54.53553
District of Columbia 92.6 91.27472
g <- both %>% 
  mutate(how_sure = pmax(biden_win_prob, 1 - biden_win_prob),
         were_correct = if_else(biden_win == pred_biden_win,
                                "Forecast Was Right", "Forecast Was Wrong")) %>% 
  ggplot(aes(x = how_sure, y = were_correct, state = state_name)) + 
  geom_point() +
  labs(x = "Probability given by 538 model", y = "",
       title = "When 538 gave a high probability, did it tend to be right?") +
  xlim(0.5, 1)
ggplotly(g, tooltip = "state_name")
set.seed(123)
p <- both %>% 
  mutate(p = pmax(biden_win_prob, 1 - biden_win_prob)) %>% 
  pull(p)
nsim <- 5000
num_right <- colSums(matrix(rbinom(n = nsim * length(p), size = 1, prob = p), ncol = nsim))
tibble(num_right = num_right) %>% 
  ggplot(aes(x = num_right, y = ..density..)) + geom_histogram(binwidth = 1, color = "white") +
  geom_vline(xintercept = sum(both$biden_win == both$pred_biden_win), 
             col = "red", lwd = 2) +
  labs(x = "Number of correct forecasts",
       title = "Did 538 do better than they might have expected?")

It appears that their actual performance was within what they might have expected (and in fact if one were to actually account for the positive correlation in errors, one would expect the distribution shown to be even wider).

How well would 538 have done one month before?

Let’s repeat this for 538’s forecasts from one month earlier (Oct. 4, 2020).

old_forecasts <- five38 %>% 
  filter(date == "2020-10-04") %>% 
  transmute(state, 
            trump = voteshare_inc,
            trump_lo = voteshare_inc_lo,
            trump_hi = voteshare_inc_hi,
            biden = voteshare_chal,
            biden_lo = voteshare_chal_lo,
            biden_hi = voteshare_chal_hi,
            biden_win_prob = winstate_chal)
old_both <- actual %>% 
  left_join(old_forecasts, by = "state", suffix = c("_actual",
                                                "_538")) %>% 
  mutate(state = fct_reorder(state, biden_actual),
         cover80 = biden_actual <= biden_hi & biden_actual >= biden_lo,
         biden_win = biden_actual > trump_actual,
         pred_biden_win = biden_538 > trump_538)
old_both %>% 
  ggplot(aes(x = state,
             y = biden_538,
             ymin = biden_lo,
             ymax = biden_hi,
             color = cover80)) +
  geom_errorbar() +
  geom_point(data = both, aes(x = state, y = biden_actual), color = "black") + 
  theme(legend.position = "none") +
  labs(x = "State", y = "Percent Voting Biden", title = "How Well Did 538 One Month in Advance?") +
  coord_flip() 

g <- old_both %>% 
  mutate(correct_forecast = biden_win == pred_biden_win) %>% 
  ggplot(aes(x = biden_actual, y = biden_538,
             size = electoral_votes,
             fill = correct_forecast,
             label = state_name)) +
  geom_point(color = "black", pch = 21) + 
  geom_abline(slope = 1, intercept = 0) +
  labs(x = "Actual % Voting for Biden",
       y = "Predicted % Voting for Biden") +
  theme(legend.position = "none") +
  xlim(lims) +
  ylim(lims)
ggplotly(g, tooltip = c("state_name", 
                        "biden_actual",
                        "biden_538",
                        "electoral_votes"))
g <- old_both %>% 
  mutate(how_sure = pmax(biden_win_prob, 1 - biden_win_prob),
         were_correct = if_else(biden_win == pred_biden_win,
                                "Forecast Was Right", "Forecast Was Wrong")) %>% 
  ggplot(aes(x = how_sure, y = were_correct, state = state_name)) + 
  geom_point() +
  labs(x = "Probability given by 538 model", y = "",
       title = "When 538 gave a high probability, did it tend to be right?") + 
  xlim(0.5, 1)
ggplotly(g, tooltip = "state_name")

Apparently, their forecasts were doing well even one month before the election.