Install Packages

We are going to use a suite of packages. If those are not installed on your machine, please run the following code. {ggplot2} is part of the {tidyverse} package collection, tgther with other helpful packages for a data science workflow such as {dplyr}, {tibble}, {tidyr}, and {readr}.

#install.packages("ggplot2")
install.packages("tidverse")
install.packages("here")
install.packages("gghighlight")

Import the Data

We can import the data with the {readr} package. One could alternatively use the base function readRDS().

library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2.9000     v purrr   0.3.4     
## v tibble  3.0.3          v dplyr   1.0.1     
## v tidyr   1.1.1          v stringr 1.4.0     
## v readr   1.3.1          v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
spendings <- readr::read_rds(here::here("data", "data-jpm.Rds"))

Explore the Raw Data

Let’s have a look at the data set:

tibble::glimpse(spendings)
## Rows: 165
## Columns: 4
## Groups: category [11]
## $ date     <dbl> -0.01870324, 1.08478803, 2.11346633, 3.23566085, 4.3017456...
## $ change   <dbl> -2.197802e-01, -2.197802e+00, -5.494505e+00, 6.593407e-01,...
## $ category <fct> Groceries, Groceries, Groceries, Groceries, Groceries, Gro...
## $ id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3...

Story: Compare changes in year-to-year spendings (change) by essential categories (category)
Goal: Show winners and losers of the COVID-19 epidemic


The Original Plot

The original plot was created by the JPMorgan Chase Institute:

We can create a similar plot with {ggplot2}:

## change default theme
theme_set(theme_minimal())

theme_update(
  panel.grid.minor = element_blank(),
  axis.line.x = element_line(color = "black"),
  axis.ticks.x = element_line(color = "black"),
  plot.title = element_text(hjust = .5, margin = margin(b = 10)),
  plot.title.position = "plot",
  plot.caption.position = "plot",
  plot.margin = margin(rep(15, 4))
)

g <- 
  ggplot(data = spendings,
         aes(x = id, y = change,
             color = category)) +
    ## add line national emergency
    geom_vline(xintercept = 10.85, color = "grey40", linetype = "dashed") +
    ## add baseline at zero
    geom_hline(yintercept = 0, color = "grey30", size = .8) +
    ## ad connected timeseries
    geom_line(size = .8) +
    geom_point(size = 1.6) +
    ## add label national emergenecy
    geom_text(
      data = tibble(
        date = 10.6,
        change = 65,
        label = "National emergency\ndeclared March 13"
      ),
      aes(x = date, y = change,
          label = label),
      inherit.aes = F,
      size = 2.8,
      lineheight = .95,
      hjust = 1
    ) +
    ## avoid clipping of points close to the border
    coord_cartesian(clip = "off") +
    ## change default colors
    rcartocolor::scale_color_carto_d(palette = "Prism") +
    ## modify axes and plot appearance
    scale_x_continuous(
      expand = c(.003, .003),
      breaks = seq(1, 15, by = 2),
      labels = c("Jan 4", "Jan 18", "Feb 1", "Feb 15", "Feb 29", "Mar 14", "Mar 28", "Apr 11")
    ) +
    scale_y_continuous(
      breaks = seq(-80, 60, by = 20),
      labels = glue::glue("{seq(-80, 60, by = 20)}%")
    ) +
    labs(x = NULL, y = "End of week",
         color = NULL, 
         title = "Year-over-year percent change in spending by essential category",
         caption = "Source: JPMorgan Chase Institute")

g


Small Multiples

One solution to resolve the overplotting-spaghetti-mess: small multiples, called facets in {ggplot2}:

g + 
  facet_wrap(~ category) +
  theme(legend.position = "none",
        strip.text = element_text(size = 13, face = "bold"),
        panel.spacing = unit(2, "lines"))

The Final Plot

We can combine both approaches by plotting small multiples but adding the other categories in the background as grey lines. We also keep the “national emergency” line for all facets but remove redundant labels.

gg <-
  ggplot(spendings, aes(id, change, color = category)) +
    geom_point() +
    geom_line(size = .8, alpha = .5) +
    gghighlight::gghighlight(
      use_direct_label = F,
      unhighlighted_params = list(color = "grey70", size = .5)
    ) +
    facet_wrap(~ category, ncol = 4, scales = "free_x") +
    coord_cartesian(clip = "off") +
    scale_x_continuous(
      expand = c(.003, .003),
      breaks = seq(1, 15, by = 2),
      labels = c("Jan 4", "Jan 18", "Feb 1", "Feb 15", "Feb 29", "Mar 14", "Mar 28", "Apr 11")
    ) +
    scale_y_continuous(
      breaks = seq(-80, 60, by = 20),
      labels = glue::glue("{seq(-80, 60, by = 20)}%")
    ) +
    scale_color_viridis_d(option = "inferno", end = .8, begin = .1, guide = "none") +
    labs(
      x = "End of week", y = NULL,
      title = "Year-over-year percent change in spending by essential category",
      caption = "Source: JPMorgan Chase Institute | Makeover: Cedric Scherer, Frontpage Data"
    )

gg + 
  geom_vline(xintercept = 10.85, color = "grey40", linetype = "dashed") +
  geom_hline(yintercept = 0, color = "grey30", size = .8) +
  geom_area(alpha = .1) +
  geom_line(size = 1.2) +
  geom_point(size = 1.8) +
  geom_text(
    data = tibble(
      id = 10.6,
      change = -60,
      label = "National emergency\ndeclared March 13",
      category = factor("Groceries", levels = levels(spendings$category)) 
    ),
    aes(label = label),
    color = "grey40",
    lineheight = .95,
    hjust = 1
  ) +
  theme(panel.grid.major.x = element_blank(),
        panel.spacing = unit(2, "lines"),
        strip.text = element_text(size = 13, face = "bold"),
        plot.title = element_text(size = 20, hjust = .5, margin = margin(b = 10)))

Session Info

## [1] "2020-12-26 18:48:34 CET"
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## system code page: 65001
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] forcats_0.5.0      stringr_1.4.0      dplyr_1.0.1        purrr_0.3.4       
## [5] readr_1.3.1        tidyr_1.1.1        tibble_3.0.3       ggplot2_3.3.2.9000
## [9] tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.0  xfun_0.16         haven_2.3.1       colorspace_1.4-1 
##  [5] vctrs_0.3.2       generics_0.0.2    viridisLite_0.3.0 htmltools_0.5.0  
##  [9] yaml_2.2.1        utf8_1.1.4        blob_1.2.1        rlang_0.4.7      
## [13] pillar_1.4.6      glue_1.4.1        withr_2.2.0       DBI_1.1.0        
## [17] dbplyr_1.4.4      modelr_0.1.8      readxl_1.3.1      lifecycle_0.2.0  
## [21] munsell_0.5.0     gtable_0.3.0      cellranger_1.1.0  rvest_0.3.6      
## [25] evaluate_0.14     knitr_1.29        rmdformats_0.3.7  fansi_0.4.1      
## [29] broom_0.7.0       Rcpp_1.0.5        scales_1.1.1      backports_1.1.7  
## [33] showtext_0.8-1    jsonlite_1.7.0    sysfonts_0.8.1    farver_2.0.3     
## [37] fs_1.5.0          hms_0.5.3         digest_0.6.25     stringi_1.4.6    
## [41] bookdown_0.20     showtextdb_3.0    rprojroot_1.3-2   grid_4.0.2       
## [45] here_0.1          cli_2.0.2         tools_4.0.2       gghighlight_0.3.0
## [49] magrittr_1.5      rcartocolor_2.0.0 crayon_1.3.4      pkgconfig_2.0.3  
## [53] ellipsis_0.3.1    xml2_1.3.2        reprex_0.3.0      lubridate_1.7.9  
## [57] assertthat_0.2.1  rmarkdown_2.3     httr_1.4.2        rstudioapi_0.11  
## [61] R6_2.4.1          compiler_4.0.2