cedricscherer.com/2019/08/05/a-ggplot2-tutorial-for-beautiful-plotting-in-r
{ggplot2} is a system for declaratively creating graphics,
based on “The Grammar of Graphics” (Wilkinson, 2005).
data +
mapping +
geometry
data +
mapping +
geometry +
statistics +
facets +
coordinate systems +
scales +
theme
Illustration by Allison Horst
ggplot2 Examples featured on ggplot2.tidyverse.org
Illustration by Allison Horst
Selection of visualizations created 100% with ggplot2 by Thomas Linn Pedersen,
Georgios Karamanis, Tanya Shapiro, Jake Kaupp, Jack Davison, and myself.
Bike sharing counts in London, UK, powered by TfL Open Data
| Variable | Description | Class |
|---|---|---|
| date | Date encoded as `YYYY-MM-DD` | date |
| day_night | `day` (6:00am–5:59pm) or `night` (6:00pm–5:59am) | character |
| year | `2015` or `2016` | factor |
| month | `1` (January) to `12` (December) | factor |
| season | `winter`, `spring`, `summer`, or `autumn` | factor |
| count | Sum of reported bikes rented | integer |
| is_workday | `TRUE` being Monday to Friday and no bank holiday | logical |
| is_weekend | `TRUE` being Saturday or Sunday | logical |
| is_holiday | `TRUE` being a bank holiday in the UK | logical |
| temp | Average air temperature (°C) | double |
| temp_feel | Average feels like temperature (°C) | double |
| humidity | Average air humidity (%) | double |
| wind_speed | Average wind speed (km/h) | double |
| weather_type | Most common weather type | character |
{ggdist}

My reinterpreted The Economist graphic
My Contribution to the SWD Challenge “Visualizing Uncertainty”
Source: mjskay.github.io/ggdist
{ggridges}{ggpointdensity}{ggblend}ggplot(bikes, aes(x = humidity, y = temp, color = day_night, partition = day_night)) +
list(geom_point(size = 5, alpha = .5) * (blend("lighten") + blend("multiply", alpha = 0.5)),
geom_vline(xintercept = mean(bikes$humidity), color = "grey", linewidth = 7)) |> blend("hard.light") +
scale_color_manual(values = c("#EFAC00", "#9C55E3"), name = NULL){ggdensity}

{geomtextpath}bikes_monthly |>
mutate(day_night = if_else(
day_night == "day",
"Day period (6am-6pm)",
"Night period (6pm-6am)"
)) |>
ggplot(aes(x = month, y = count,
color = day_night,
group = day_night)) +
geomtextpath::geom_textline(
aes(label = day_night),
linewidth = 1,
family = "Asap SemiCondensed",
fontface = "bold",
size = 6.5,
vjust = -.5,
hjust = .05
) +
scale_color_manual(
values = c("#EFAC00", "#9C55E3"),
guide = "none"
)
{ggforce}g +
ggforce::geom_mark_hull(
aes(label = "Tube Network Strikes 2015",
filter = count > 40000),
description = "Commuters had to deal with severe disruptions in public transport on July 9 and August 6",
color = "black",
label.family = "Asap SemiCondensed",
label.fontsize = c(18, 14),
expand = unit(8, "pt"),
con.cap = unit(0, "pt"),
label.buffer = unit(15, "pt"),
con.type = "straight",
label.fill = "transparent"
)
“Verbraucherumfrage zur Zukunft nach der Krise”, kuendigung.org
{ggtext}
<b style='font-size:40pt;font-family:times;'>TfL</b> bike sharing trends by *<b style='color:#B48200;'>day</b>* and *<b style='color:#663399;'>night</b>*
“Chats about Friends and their Past, Present, and Future Partners”
g +
ggtitle("TfL bike sharing trends in London for the years 2015 and 2016 during day and night") +
theme(
plot.title =
ggtext::element_textbox_simple(
margin = margin(t = 12, b = 12),
padding = margin(rep(12, 4)),
fill = "grey90",
box.colour = "grey30",
linetype = "13",
r = unit(9, "pt"),
halign = .5,
lineheight = 1
)
)
g +
ggtitle("TfL bike sharing trends in London for the years 2015 and 2016 during *<b style='color:#B48200;'>day</b>* and *<b style='color:#663399;'>night</b>*") +
theme(
plot.title =
ggtext::element_textbox_simple(
margin = margin(t = 12, b = 12),
padding = margin(rep(12, 4)),
fill = "grey90",
box.colour = "grey30",
linetype = "13",
r = unit(9, "pt"),
halign = .5,
lineheight = 1
),
legend.position = "none"
)
{ggiraph}p1 <-
ggplot(bikes_monthly, aes(x = month, y = count, color = day_night, group = day_night)) +
ggiraph::geom_line_interactive(aes(tooltip = day_night, data_id = day_night), linewidth = 1) +
scale_color_manual(values = c("#EFAC00", "#9C55E3"), guide = "none")
ggiraph::set_girafe_defaults(
opts_zoom = ggiraph::opts_zoom(min = 1, max = 4),
opts_toolbar = ggiraph::opts_toolbar(position = "bottomright")
)
ggiraph::girafe(
ggobj = p1, width_svg = 12, height_svg = 7,
options = list(
ggiraph::opts_hover_inv(css = "opacity:0.3;"),
ggiraph::opts_hover(css = "stroke-width:5;")
)
)p2 <-
ggplot(bikes, aes(x = temp, y = count, color = day_night)) +
ggiraph::geom_point_interactive(aes(tooltip = date, data_id = date), size = 3, alpha = .7) +
ggforce::geom_mark_hull(
aes(label = "Tube Network Strikes 2015", filter = count > 40000),
description = "Commuters had to deal with severe disruptions in public transport on July 9 and August 6",
color = "black", label.family = "Asap SemiCondensed", label.fontsize = c(18, 14)
) +
scale_color_manual(values = c("#EFAC00", "#9C55E3"), guide = "none") +
ggtitle("TfL bike sharing trends by *<b style='color:#B48200;'>day</b>* and *<b style='color:#663399;'>night</b>*") +
theme(plot.title = ggtext::element_markdown())
ggiraph::girafe(
ggobj = p2, width_svg = 12, height_svg = 7,
options = list(
ggiraph::opts_tooltip(use_fill = TRUE, css = "font-size:18pt;font-weight:600;color:white;padding:7px;"),
ggiraph::opts_hover(css = "fill:black;stroke:black;stroke-width:8px;opacity:1;"),
ggiraph::opts_hover_inv(css = "opacity:0.3;")
)
){geofacet} — tile grid maps{ggalluvial} — alluvial plots{ggalt} — dumbbell, horizon, and lollipop charts, splines, …{ggbeeswarm} — beeswarm plots and variants{ggbraid} — ribbons for alternating groups{ggbump} — bump charts{ggdensity} — improved density plots{ggdist} — uncertainty visualizations{ggforce} — several interesting layers (and more){ggpattern} — pattern fills for layers{ggpointdensity} — density gradients for scatter plots{ggraph} — networks, graphs & trees{ggridges} — ridgeline plots{ggsankey} — sankey diagrams{ggsignif} — significance levels{ggstar} — more point shapes{ggstream} — stream graphs{ggupset} — upset graphs{treemapify} — treemaps{cowplot} — combine ggplots{ggannotate} — point-n-click annotations{ggblend} — blend, compose, adjust layers{ggfittext} — scale text according to space{ggfx} — shaders and filters for layers{ggh4x} — facets, positions, and more{ggtext} — text rendering for theme elements + text layers{lemon} — axis lines (and a few layers){patchwork} — combine ggplots{scales} — control scales{ggdark}{ggsci} (also color scales){ggtech} (also color scales){ggthemes} (also color scales){ggthemr}{hrbrthemes} (also color scales){tvthemes} (also color scales){ggiraph}{plotly}{echarts4r}*{highcharter}*{charter}*{streamgraph}*{tmap}*{leaflet}*{globe4r}*{grapher}** not using ggplot2

{ggdist}ggplot(bikes, aes(x = season, y = humidity)) +
ggdist::stat_interval(.width = 1:4*.25) +
ggdist::stat_halfeye(aes(fill = day_night), slab_alpha = .3, shape = 21, .width = 0, color = "white", position = position_nudge(x = .025)) +
scale_color_grey(start = .9, end = .2) +
scale_fill_manual(values = c("#EFAC00", "#9C55E3"), name = NULL){ggtext}friends <- readr::read_csv(
"https://cedricscherer.com/data/friends-mentions-partners.csv"
)
friends# A tibble: 725 × 6
id season episode key partners mentions
<dbl> <dbl> <dbl> <chr> <chr> <dbl>
1 1 1 1 Ross & Rachel Ross & Rachel 4
2 1 1 1 Rachel & Joey Rachel & Joey 1
3 2 1 2 Ross & Rachel Ross & Rachel 1
4 2 1 2 Ross Ross & Carol 2
5 2 1 2 Rachel & Joey Rachel & Joey 1
6 2 1 2 Rachel Rachel & Barry 3
7 2 1 2 Monica & Chandler Monica & Chandler 1
8 5 1 5 Ross & Rachel Ross & Rachel 3
9 5 1 5 Ross Ross & Carol 1
10 5 1 5 Chandler Chandler & Janice 2
# ℹ 715 more rows
match_colors <-
tibble::tibble(
key = c("Chandler", "Joey", "Monica", "Monica & Chandler",
"Phoebe", "Rachel", "Rachel & Joey", "Ross", "Ross & Rachel"),
color = c("#48508c", "#55331d", "#a64d64", "#774f78",
"#5b7233", "#ba2a22", "#882f20", "#f6ab18", "#d86b1d")
)
match_colors# A tibble: 9 × 2
key color
<chr> <chr>
1 Chandler #48508c
2 Joey #55331d
3 Monica #a64d64
4 Monica & Chandler #774f78
5 Phoebe #5b7233
6 Rachel #ba2a22
7 Rachel & Joey #882f20
8 Ross #f6ab18
9 Ross & Rachel #d86b1d
friends |>
mutate(key = if_else(
!partners %in% c("Ross & Rachel", "Rachel & Joey", "Monica & Chandler"),
word(partners, 1), partners
)) |>
left_join(
match_colors
)# A tibble: 725 × 7
id season episode key partners mentions color
<dbl> <dbl> <dbl> <chr> <chr> <dbl> <chr>
1 1 1 1 Ross & Rachel Ross & Rachel 4 #d86b1d
2 1 1 1 Rachel & Joey Rachel & Joey 1 #882f20
3 2 1 2 Ross & Rachel Ross & Rachel 1 #d86b1d
4 2 1 2 Ross Ross & Carol 2 #f6ab18
5 2 1 2 Rachel & Joey Rachel & Joey 1 #882f20
6 2 1 2 Rachel Rachel & Barry 3 #ba2a22
7 2 1 2 Monica & Chandler Monica & Chandler 1 #774f78
8 5 1 5 Ross & Rachel Ross & Rachel 3 #d86b1d
9 5 1 5 Ross Ross & Carol 1 #f6ab18
10 5 1 5 Chandler Chandler & Janice 2 #48508c
# ℹ 715 more rows
friends_render <- friends |>
mutate(key = if_else(
!partners %in% c("Ross & Rachel", "Rachel & Joey", "Monica & Chandler"),
word(partners, 1), partners
)) |>
left_join(
match_colors
) |>
mutate(
partners = if_else(
key %in% c("Ross & Rachel", "Rachel & Joey", "Monica & Chandler"),
paste0("<b style='color:", color, "'>", partners, "</b>"),
str_replace(partners, key, paste0("<b style='color:", color, "'>", key, "</b>"))
)
)# A tibble: 25 × 3
key color partners
<chr> <chr> <chr>
1 Ross & Rachel #d86b1d <b style='color:#d86b1d'>Ross & Rachel</b>
2 Rachel & Joey #882f20 <b style='color:#882f20'>Rachel & Joey</b>
3 Ross #f6ab18 <b style='color:#f6ab18'>Ross</b> & Carol
4 Rachel #ba2a22 <b style='color:#ba2a22'>Rachel</b> & Barry
5 Monica & Chandler #774f78 <b style='color:#774f78'>Monica & Chandler</b>
6 Chandler #48508c <b style='color:#48508c'>Chandler</b> & Janice
7 Rachel #ba2a22 <b style='color:#ba2a22'>Rachel</b> & Paolo
8 Phoebe #5b7233 <b style='color:#5b7233'>Phoebe</b> & David
9 Rachel #ba2a22 <b style='color:#ba2a22'>Rachel</b> & Tag
10 Ross #f6ab18 <b style='color:#f6ab18'>Ross</b> & Julie
# ℹ 15 more rows
ggplot(friends_render,
aes(x = id, y = partners)) +
geom_point(aes(size = mentions, color = color), alpha = .3) +
scale_color_identity() +
scale_size_area(max_size = 5, guide = "none") +
coord_cartesian(expand = FALSE, clip = "off") +
labs(x = "Episodes", y = NULL) +
theme_minimal(base_family = "Asap SemiCondensed") +
theme(
axis.text.y = ggtext::element_markdown(hjust = 0),
axis.text.x = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank()
){patchwork}Illustration by Allison Horst

theme_set(theme_minimal(base_size = 18, base_family = "Pally"))
theme_update(
text = element_text(family = "Pally"),
panel.grid = element_blank(),
axis.text = element_text(color = "grey50", size = 12),
axis.title = element_text(color = "grey40", face = "bold"),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 12)),
axis.line = element_line(color = "grey80", linewidth = .4),
legend.text = element_text(color = "grey50", size = 12),
plot.tag = element_text(size = 40, margin = margin(b = 15)),
plot.background = element_rect(fill = "white", color = "white")
)
bikes_sorted <-
bikes %>%
filter(!is.na(weather_type)) %>%
group_by(weather_type) %>%
mutate(sum = sum(count)) %>%
ungroup() %>%
mutate(
weather_type = forcats::fct_reorder(
str_to_title(str_wrap(weather_type, 5)), sum
)
)
p1 <- ggplot(
bikes_sorted,
aes(x = weather_type, y = count, color = weather_type)
) +
geom_hline(yintercept = 0, color = "grey80", linewidth = .4) +
stat_summary(
geom = "point", fun = "sum", size = 12
) +
stat_summary(
geom = "linerange", ymin = 0, fun.max = function(y) sum(y),
size = 2, show.legend = FALSE
) +
coord_flip(ylim = c(0, NA), clip = "off") +
scale_y_continuous(
expand = c(0, 0), limits = c(0, 8500000),
labels = scales::comma_format(scale = .0001, suffix = "K")
) +
scale_color_viridis_d(
option = "magma", direction = -1, begin = .1, end = .9, name = NULL,
guide = guide_legend(override.aes = list(size = 7))
) +
labs(
x = NULL, y = "Sum of reported bike shares", tag = "P1",
) +
theme(
axis.line.y = element_blank(),
axis.text.y = element_text(family = "Pally", color = "grey50", face = "bold",
margin = margin(r = 15), lineheight = .9)
)
p1
p2 <- bikes_sorted %>%
filter(season == "winter", is_weekend == TRUE, day_night == "night") %>%
group_by(weather_type, .drop = FALSE) %>%
mutate(id = row_number()) %>%
ggplot(
aes(x = weather_type, y = id, color = weather_type)
) +
geom_point(size = 4.5) +
scale_color_viridis_d(
option = "magma", direction = -1, begin = .1, end = .9, name = NULL,
guide = guide_legend(override.aes = list(size = 7))
) +
labs(
x = NULL, y = "Reported bike shares on\nweekend winter nights", tag = "P2",
) +
coord_cartesian(ylim = c(.5, NA), clip = "off")
p2
my_colors <- c("#cc0000", "#000080")
p3 <- bikes %>%
group_by(week = lubridate::week(date), day_night, year) %>%
summarize(count = sum(count)) %>%
group_by(week, day_night) %>%
mutate(avg = mean(count)) %>%
ggplot(aes(x = week, y = count, group = interaction(day_night, year))) +
geom_line(color = "grey65", size = 1) +
geom_line(aes(y = avg, color = day_night), stat = "unique", size = 1.7) +
annotate(
geom = "text", label = c("Day", "Night"), color = my_colors,
x = c(5, 18), y = c(125000, 29000), size = 8, fontface = "bold", family = "Pally"
) +
scale_x_continuous(breaks = c(1, 1:10*5)) +
scale_y_continuous(labels = scales::comma_format()) +
scale_color_manual(values = my_colors, guide = "none") +
labs(
x = "Week of the Year", y = "Reported bike shares\n(cumulative # per week)", tag = "P3",
)
p3

Cédric Scherer // Data Visualization & Information Design