Selected british literary prizes (1990-2022) analysis

Out of the top 15 universities, what’s the rate of winners?

Code
top15_win_percent_plot <- ggplot(
  top15_win_percent,
  aes(x = degree_institution, y = percent, fill = person_role)
) +
  geom_col(width = 0.75) +
  geom_text(
    aes(label = label),
    position = position_stack(vjust = 0.5),
    size = 3.5,
    color = "black"
  ) +
  geom_text(
    data = top15_win_percent_totals,
    inherit.aes = FALSE,
    aes(
      x = degree_institution,
      y = 1.0,
      label = paste0("    (n = ", total_nom, ")")
    ),
    size = 3.5,
    hjust = 0,
    vjust = 0.5,
    color = "grey40"
  ) +
  coord_flip(clip = "off") +
  scale_fill_manual(
    values = c("winner" = "#66c2a5", "shortlisted" = "#fc8d62"),
    breaks = c("winner", "shortlisted"),
    labels = c("Winner", "Shortlisted"),
    name = NULL
  ) +
  scale_y_continuous(
    labels = percent,
    expand = expansion(mult = c(0, 0))
  ) +
  labs(
    title = str_wrap(
      "British literary prize nominees (1990–2022): Share of winners versus shortlisted for top 15 universities",
      70
    ),
    subtitle = "Only nominees with known institutions included | n = total nominees",
    x = NULL,
    y = "Share of nominees",
    caption = "Top 15 universities selected by total nominations\nData: Data Science Learning Community (2024) | Visualisation: Lewis TJ Ward"
  ) +
  theme_tt()

ggsave(
  here("years", "2025", "43", "output", "top15_universities_winner_percent.png"),
  plot = top15_win_percent_plot,
  width = 12,
  height = 8,
  dpi = 300
)

top15_win_percent_plot

Do certain prizes have a higher proportion of LGBTQ+ nominees?

Code
lgbtq_representation_plot <- ggplot(
  lgbtq_by_prize,
  aes(x = prize_alias, y = prop, fill = lgbtq)
) +
  geom_col(width = 0.75) +
  geom_text(
    aes(label = percent(prop, accuracy = 1)),
    position = position_stack(vjust = 0.5),
    size = 3.5,
    color = "black",
    hjust = 0.2
  ) +
  geom_text(
    data = lgbtq_prize_totals,
    inherit.aes = FALSE,
    aes(
      x = prize_alias,
      y = 1.0,
      label = paste0("    (n = ", total, ")")
    ),
    size = 3.5,
    hjust = 0,
    vjust = 0.5,
    color = "grey40"
  ) +
  geom_hline(
    yintercept = 0.037,
    linetype = "dashed",
    color = "black",
    linewidth = 0.5,
    alpha = 0.5
  ) +
  coord_flip(clip = "off") +
  scale_fill_manual(
    values = c("FALSE" = "#b2df8a", "TRUE" = "#fb9a99"),
    labels = c("FALSE" = "Not LGBTQ+", "TRUE" = "LGBTQ+"),
    breaks = c("TRUE", "FALSE"),
    name = NULL
  ) +
  scale_y_continuous(
    labels = percent,
    breaks = c(0, 0.25, 0.5, 1),
    expand = expansion(mult = c(0, 0))
  ) +
  labs(
    title = str_wrap(
      "British literary prize nominees (1990–2022): LGBTQ+ representation versus UK baseline",
      70
    ),
    subtitle = "Prizes with ≥1 LGBTQ+ nominee | Dashed line = UK population (3.7%) | n = total nominees",
    x = NULL,
    y = "Share of nominees",
    caption = "Dashed line shows UK population baseline (3.7%, 2021 Census) | Nominees limited to UK residents\nLGBTQ+ status = self-identified non-cisgender and/or non-heterosexual nominees\nData: Data Science Learning Community (2024) | Census data: UK Parliament Commons Library | Visualisation: Lewis TJ Ward"
  ) +
  theme_tt()

ggsave(
  here("years", "2025", "43", "output", "lgbtq_representation_plot.png"),
  plot = lgbtq_representation_plot,
  width = 12,
  height = 8,
  dpi = 300
)

lgbtq_representation_plot

How does the proportion of LGBTQ+ representation change over time?

Code
lgbtq_over_time_plot <- ggplot(
  lgbtq_over_time,
  aes(x = prize_year, y = prop_lgbtq)
) +
    geom_smooth(
    linewidth = 1.5,
    color = "#fb9a99",
    se = FALSE,
    span = 1
  ) +
  geom_point(aes(size = total_nominees), color = "#fb9a99") +
  geom_hline(
    yintercept = 0.037,
    linetype = "dashed",
    color = "black",
    linewidth = 0.6,
    alpha = 0.6
  ) +
  scale_size_continuous(name = "Total nominees", range = c(1.5, 6)) +
  scale_y_continuous(
    limits = c(0, NA),
    labels = percent_format(accuracy = 0.1),
    breaks = function(lims) sort(unique(c(0, pretty(lims, n = 5), 0.037))),
    expand = expansion(mult = c(0, 0.05))
  ) +
  scale_x_continuous(
    breaks = seq(
      min(lgbtq_over_time$prize_year, na.rm = TRUE),
      max(lgbtq_over_time$prize_year, na.rm = TRUE),
      by = 1
    ),
    labels = function(x) ifelse(x %% 4 == 0, x, ""),
    expand = expansion(mult = c(0.01, 0.01))
  ) +
  labs(
    title = str_wrap(
      "British literary prize nominees (1990–2022): LGBTQ+ representation over time versus UK baseline",
      70
    ),
    subtitle = "All UK-resident nominees across all prizes | Dashed line = UK population baseline (3.7%, 2021 Census)",
    x = "Year",
    y = "Share of nominees identifying as LGBTQ+",
    caption = "Point size = total nominees that year | Dashed line shows UK population baseline (3.7%, 2021 Census)\nNominees limited to UK residents | LGBTQ+ status = self-identified non-cisgender and/or non-heterosexual nominees\nData: Data Science Learning Community (2024) | Census data: UK Parliament Commons Library | Visualisation: Lewis TJ Ward"
  ) +
  theme_tt() +
  theme(
    axis.title.y = element_text(size = 11),
    legend.position = "right",
    legend.direction = "vertical",
    legend.title = element_text(size = 11)
  )

ggsave(
  here("years", "2025", "43", "output", "lgbtq_over_time_plot.png"),
  plot = lgbtq_over_time_plot,
  width = 12,
  height = 8,
  dpi = 300
)

lgbtq_over_time_plot

Ethnic diversity across prizes?

Code
ethnicity_plot <- ggplot(
  ethnicity_prizes_plot_data,
  aes(x = prize_institution, y = prop, fill = ethnicity_macro)
) +
  geom_col(width = 0.75) +
  geom_text(
    aes(label = label_display),
    position = position_stack(vjust = 0.5),
    size = 3.5,
    color = "black",
    na.rm = TRUE
  ) +
  geom_text(
    data = ethnicity_totals,
    inherit.aes = FALSE,
    aes(
      x = prize_institution,
      y = 1,
      label = paste0("    (n = ", total_nominees, ")")
    ),
    size = 3.5,
    hjust = 0,
    vjust = 0.5,
    color = "grey40",
    na.rm = TRUE
  ) +
  coord_flip(clip = "off") +
  scale_fill_manual(
    values = palette_used,
    breaks = present_levels,
    name = "Ethnicity",
    guide = guide_legend(
      title.position = "top",
      title.hjust = 0.5,
      nrow = 1,
      byrow = TRUE,
      override.aes = list(size = 4)
    )
  ) +
  scale_y_continuous(
    labels = percent,
    breaks = c(0, 0.25, 0.5, 0.75, 1),
    expand = expansion(mult = c(0, 0))
  ) +
  labs(
    title = str_wrap(
      "British literary prize nominees (1990–2022): Ethnic representation per prize institution",
      70
    ),
    subtitle = "Prizes with ≥1 nominee | n = total nominees",
    x = NULL,
    y = "Share of nominees",
    caption = "Values < 3% not labelled\nData: Data Science Learning Community (2024) | Visualisation: Lewis TJ Ward"
  ) +
  theme_tt() +
  theme(
    legend.title = element_text(size = 11, face = "plain", hjust = 0.5),
    legend.text = element_text(size = 9),
    legend.key.width = unit(0.7, "cm"),
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.x = unit(0.12, "cm"),
    legend.margin = margin(t = 6, b = 6),
    legend.box = "horizontal",
    legend.box.just = "center"
  )

ggsave(
  here("years", "2025", "43", "output", "ethnicity_representation_plot.png"),
  plot = ethnicity_plot,
  width = 12,
  height = 8,
  dpi = 300
)

ethnicity_plot