Code and values for Figures 4 and 5 in Section 4 of the report “Excess deaths”
Raw code can be downloaded by selecting the Download Rmd option from the Code drop down menu at the top of this page.
R packages and required helper functions.
library(tidyverse)
library(curl)
library(readxl)
library(lubridate)
library(forcats)
library(patchwork)
library(ggthemes)
library(socviz)
library(here)
#Helper function
`%nin%` <- negate(`%in%`)
#Short cut for csv output with html tables
my_datatable <- function(x){
DT::datatable(x, extensions = "Buttons", options = list(dom = "Bfrtip",
buttons = c("csv")))
}
#Baseline plot settings
theme_set(theme_minimal(base_family = "Roboto", base_size = 20) +
theme(panel.grid.minor = element_blank(),
axis.title.y = element_text(margin = margin(0, 20, 0, 0)),
axis.title.x = element_text(margin = margin(20, 0, 0, 0)),
plot.caption = element_text(colour = "#AAAAAA")))#,
#plot.margin = margin(3,15,3,3,"mm")))
#global options for scientific numbers and significant digits.
options(scipen = 10,
digits = 1)Data came from disparate sources. The tabset below identifies where and how it was wrangled in order to be combined into one UK table.
Data sourced from here and here and collated into .csv files by Pietro Patrignani. I have saved this file in the excess_mort_data folder available on the project GitHub repository.
england <- read_csv(here("excess_mort_data/Weekly Summary England.csv"))
england %<>%
slice(-1) %>%
select(location,
week_number = `week number`,
all = `all deaths`,
non_covid, covid,
average = `average 2015-2019`,
excess = `excess deaths`,
country) %>%
mutate(location = factor(location,
levels = c("Hospital", "Care Home", "Home", "Other"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
mutate_at(.vars = c("week_number", "all", "covid"), as.integer)
my_datatable(england)This comes from two files on the National Records of Scotland (NRS) site. One has weekly COIVD and all-cause deaths, the other the weekly numbers (including averages) for the previous 5 years. URLs are shown in the code chunk. The data is downloaded directly evertime this chunk is run.
temp_1 <- tempfile()
temp_2 <- tempfile()
source <- "https://www.nrscotland.gov.uk/files//statistics/covid19/covid-deaths-data-week-26.zip"
temp_1 <- curl_download(url = source, destfile = temp_1, quiet = FALSE)
unzip(temp_1, exdir = temp_2)
ch_death_covid <- read_csv(file.path(temp_2,"covid-deaths-data-week-26_Table 1 - COVID deaths.csv"), skip = 3)
ch_death_all <- read_csv(file.path(temp_2, "covid-deaths-data-week-26_Table 2 - All deaths.csv"), skip = 3)
#5 year average can be downladed directly from this link...
ch_death_average <- read_csv("https://www.nrscotland.gov.uk/files//statistics/covid19/weekly-deaths-by-location-2015-2019.csv", skip = 2)
#Now tidy and join together
loc_names <- c("Care Home", "Home and other non institution", "Hospital",
"Other institution")
# Create a look-up table for week numbers to dates
week_lookup <- tibble(
week_number = 1:52,
week_date = seq(ymd(20191230), ymd(20201221), by = "1 week"))
#Wrangle Average deaths
ch_death_average %>%
slice(2:6, 9:13, 16:20, 23:27) %>%
mutate(location = rep(loc_names, each = 5, times = 1)) %>%
select(year = `Week number2`, location, everything(), -`53`) %>%
pivot_longer(cols = `1`:`52`, names_to = "week_number",
values_to = "n_deaths") %>%
group_by(location, week_number) %>%
mutate(min_deaths = min(n_deaths),
max_deaths = max(n_deaths),
mean_deaths = mean(n_deaths)) %>%
distinct(location, week_number, .keep_all = TRUE) %>%
select(-year, -n_deaths) %>%
ungroup %>%
mutate(week_number = as.integer(week_number)) -> sc
#Wrangle all deaths
ch_death_all %>%
select(location = X2, everything(), -`Week beginning`, -X30:-X65) %>%
slice(85:88) %>%
mutate_at(vars(`30-Dec-19`:`22-Jun-20`), as.numeric) %>%
pivot_longer(cols = `30-Dec-19`:`22-Jun-20`, names_to = "date",
values_to = "deaths_all_2020") %>%
mutate(date = dmy(date),
week_number = rep(1:26, each = 1, times = 4),
location = rep(loc_names, each = 26, times = 1)) %>%
select(-date) %>%
left_join(sc, .) -> sc
#Wrangle Covid deaths and join together
ch_death_covid %>%
select(location = X2, everything(), -`Week beginning`,
-`Year to Date`:-X44) %>%
slice(83:86) %>%
pivot_longer(cols = `30-Dec-19`:`22-Jun-20`, names_to = "date",
values_to = "deaths_covid_2020") %>%
mutate(date = dmy(date),
week_number = rep(1:26, each = 1, times = 4),
location = rep(loc_names, each = 26, times = 1)) %>%
select(-date) %>%
left_join(sc, .) %>%
mutate(deaths_nonCovid_2020 = deaths_all_2020 - deaths_covid_2020,
location = factor(location,
levels = c("Hospital", "Care Home",
"Home and other non institution",
"Other institution"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
left_join(., week_lookup) %>%
filter(week_number >=11 & week_number <= 26) %>%
select(-min_deaths, -max_deaths, -X29, -week_date) %>%
rename(all = deaths_all_2020,
covid = deaths_covid_2020,
non_covid = deaths_nonCovid_2020,
average = mean_deaths) %>%
mutate(excess = all - average,
country = "Scotland") -> sc
sc %>%
round_df(.) %>%
my_datatable(.)Same source as England data. Also wrangled into a .csv by Pietro.
wales <- read_csv(here("excess_mort_data/Weekly Summary Wales.csv"))
wales %<>%
slice(-1) %>%
select(location,
week_number = `week number`,
all = `all deaths`,
non_covid, covid,
average = `average 2015-2019`,
excess = `excess deaths`,
country) %>%
mutate(location = factor(location,
levels = c("Hospital", "Care Home", "Home", "Other"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
mutate_at(.vars = c("week_number", "all", "covid"), as.integer)
my_datatable(wales)Multisheet Excel file via Siobhán Murphy as requested and provided direct from the Northern Ireland Statistics and Research Agency (NISRA) who are keen to highlight these figures are provisional. Also need to point out here the figures we got from NISRA don’t break down by COVID/non-COVID deaths. Needs some wrangling….
#Start with hospital deaths on sheet 3
ni_hosp <- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 3,
range = "A2:I18") %>%
mutate(location = "Hospital") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Now ingest care home data - sheet 2
ni_ch <- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 2,
range = "A2:I18") %>%
mutate(location = "Care Home") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Home deaths - sheet 4
ni_home <- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 4,
range = "A2:I18") %>%
mutate(location = "Home") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#And other place of death - sheet 5
ni_other <- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 5,
range = "A2:I18") %>%
mutate(location = "Other") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Join these objects together
ni <-
bind_rows(ni_hosp, ni_ch, ni_home, ni_other) %>%
mutate(country = "Northern Ireland") %>%
separate(week_number, c("drop", "week_number"), sep = " ") %>%
mutate(week_number = as.integer(week_number)) %>%
select(-drop)
my_datatable(ni)With each nations data now wrangled into the same format we can join them all together into one object - uk.
uk <- bind_rows(england, sc, wales, ni)
uk %>%
mutate(country = factor(country,
levels = c("England", "Scotland",
"Wales", "Northern Ireland"))) %>%
select(country, everything()) -> uk
my_datatable(uk)No we have all the information combined in the uk object we can start plotting. Firstly the weekly excess by nation for Figure 5.
fig_5 <-
uk %>%
group_by(country, week_number) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
ggplot(aes(week_number, pct_change, colour = country)) +
geom_point(size = 1.5) +
geom_line(size = 1.5) +
scale_colour_wsj(guide = guide_legend(keywidth = 2)) +
scale_y_continuous(limits = c(-15, 120),
breaks = scales::pretty_breaks(n = 6),
labels = scales::percent_format(scale = 1)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 12)) +
theme(legend.position = "top") +
labs(title = "Weeekly excess mortality in the UK.",
subtitle = "Weeks 11-26 2020",
x = "Week number",
y = "% change from 5-yr average",
colour = "",
caption = "Source: ONS, NRS & NISRA\nFigures are provisional\nCode:https://github.com/davidhen/ltc_covid_uk")
fig_5uk %>%
group_by(country, week_number) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
round_df(.) %>%
my_datatable(.)Figure 6 is a combination of 3 plots. First we show the individual plots and underlying tables
Summarising the excess across all weeks 11-26 by nation
uk %>%
group_by(country) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average,
pct_change = excess / average * 100) %>%
ggplot(aes(country, pct_change, fill = country)) +
geom_col() +
scale_fill_wsj() +
scale_y_continuous(limits = c(0,40),
labels = scales::percent_format(scale = 1)) +
#scale_x_discrete(position = "top") +
theme(legend.position = "none",
axis.title.y = element_text(size = 16)) +
labs(title = "Overall",
x = "",
y = "% difference from 5 year average",
caption = "") -> uk_excess
uk_excessuk %>%
group_by(country) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
round_df(.) %>%
my_datatable(.)Again a summary across all weeks 11-26 but split by location of death.
fig_excess_uk_2 <-
uk %>%
group_by(country, location) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average,
pct_change = excess / average * 100) %>%
ggplot(aes(country, pct_change, fill = country)) +
geom_col() +
facet_grid(~location, switch = "x") +
scale_fill_wsj() +
scale_y_continuous(limits = c(-20,80),
labels = scales::percent_format(scale = 1),
breaks = scales::pretty_breaks()) +
theme(legend.position = "top",
axis.text.x = element_blank(),
strip.placement = "outside",
axis.title.y = element_text(size = 16)) +
labs(title = "by Location",
x = "",
y = "% difference from 5 year average",
fill = "")
fig_excess_uk_2uk %>%
group_by(country, location) %>%
summarise(all = sum(all),
covid = sum(covid),
non_covid = sum(non_covid),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
arrange(location) %>%
round_df(.) %>%
my_datatable(.)Finally, weekly change by location and country.
fig_uk_excess_1 <-
uk %>%
mutate(pct_change = (excess / average * 100)) %>%
ggplot(aes(week_number, pct_change, fill = country)) +
geom_col() +
facet_grid(location~country) +
scale_fill_wsj() +
scale_y_continuous(limits = c(-100, 300),
labels = scales::percent_format(scale = 1)) +
theme(legend.position = "none",
axis.title.y = element_text(size = 16),
panel.spacing.y = unit(2, "lines")) +
labs(title = "by Location and Week",
x = "Week Number",
y = "% difference from 5 year average")#,
#caption = "Source: ONS & NRS\nCode:https://github.com/davidhen/ltc_covid_uk/blob/master/excess_mort_plot.Rmd")
fig_uk_excess_1uk %>%
mutate(pct_change = (excess / average * 100)) %>%
arrange(country, location, week_number) %>%
round_df(.) %>%
my_datatable(.)Using the patchwork package to combine together.
(uk_excess + fig_excess_uk_2) / fig_uk_excess_1 +
plot_annotation(title = "Excess mortality in the UK. Weeks 11-26 2020",
subtitle = "Breakdown",
caption = "Source: ONS, NRS & NISRA\nFigures are provisional\nCode:https://github.com/davidhen/ltc_covid_uk") +
plot_layout(heights = c(1, 1.2)) -> fig_6
fig_6devtools::session_info()## ─ Session info ────────────────────────────────────────
## setting value
## version R version 4.1.0 (2021-05-18)
## os macOS Big Sur 11.4
## system aarch64, darwin20
## ui RStudio
## language (EN)
## collate en_GB.UTF-8
## ctype en_GB.UTF-8
## tz Europe/London
## date 2021-06-14
##
## ─ Packages ────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.1.0)
## broom 0.7.7 2021-06-13 [1] CRAN (R 4.1.0)
## bslib 0.2.5.1 2021-05-18 [1] CRAN (R 4.1.0)
## cachem 1.0.5 2021-05-15 [1] CRAN (R 4.1.0)
## callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
## cli 2.5.0 2021-04-26 [1] CRAN (R 4.1.0)
## colorspace 2.0-1 2021-05-04 [1] CRAN (R 4.1.0)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.1.0)
## crosstalk 1.1.1 2021-01-12 [1] CRAN (R 4.1.0)
## curl * 4.3.1 2021-04-30 [1] CRAN (R 4.1.0)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.1.0)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.0)
## desc 1.3.0 2021-03-05 [1] CRAN (R 4.1.0)
## devtools 2.4.2 2021-06-07 [1] CRAN (R 4.1.0)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.1.0)
## dplyr * 1.0.6 2021-05-05 [1] CRAN (R 4.1.0)
## DT 0.18 2021-04-14 [1] CRAN (R 4.1.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.1.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.1.0)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.1.0)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
## feather 0.3.5 2019-09-15 [1] CRAN (R 4.1.0)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.1.0)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.1.0)
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.1.0)
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.1.0)
## ggthemes * 4.2.4 2021-01-20 [1] CRAN (R 4.1.0)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.1.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
## haven 2.4.1 2021-04-23 [1] CRAN (R 4.1.0)
## here * 1.0.1 2020-12-13 [1] CRAN (R 4.1.0)
## highr 0.9 2021-04-16 [1] CRAN (R 4.1.0)
## hms 1.1.0 2021-05-17 [1] CRAN (R 4.1.0)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.1.0)
## htmlwidgets 1.5.3 2020-12-10 [1] CRAN (R 4.1.0)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.1.0)
## knitr 1.33 2021-04-24 [1] CRAN (R 4.1.0)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.0)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.1.0)
## lubridate * 1.7.10 2021-02-26 [1] CRAN (R 4.1.0)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.1.0)
## memoise 2.0.0 2021-01-26 [1] CRAN (R 4.1.0)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.1.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.1.0)
## pillar 1.6.1 2021-05-16 [1] CRAN (R 4.1.0)
## pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.1.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
## pkgload 1.2.1 2021-04-06 [1] CRAN (R 4.1.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
## processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
## ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.1.0)
## Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.1.0)
## readr * 1.4.0 2020-10-05 [1] CRAN (R 4.1.0)
## readxl * 1.3.1 2019-03-13 [1] CRAN (R 4.1.0)
## rematch 1.0.1 2016-04-21 [1] CRAN (R 4.1.0)
## remotes 2.4.0 2021-06-02 [1] CRAN (R 4.1.0)
## reprex 2.0.0 2021-04-02 [1] CRAN (R 4.1.0)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.1.0)
## rmarkdown * 2.8 2021-05-07 [1] CRAN (R 4.1.0)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
## rvest 1.0.0 2021-03-09 [1] CRAN (R 4.1.0)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.0)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.1.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.1.0)
## socviz * 1.2 2020-06-10 [1] CRAN (R 4.1.0)
## stringi 1.6.2 2021-05-17 [1] CRAN (R 4.1.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
## testthat 3.0.2 2021-02-14 [1] CRAN (R 4.1.0)
## tibble * 3.1.2 2021-05-16 [1] CRAN (R 4.1.0)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.1.0)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.1.0)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.1.0)
## usethis 2.0.1 2021-02-10 [1] CRAN (R 4.1.0)
## utf8 1.2.1 2021-03-12 [1] CRAN (R 4.1.0)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.0)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.1.0)
## xfun 0.23 2021-05-15 [1] CRAN (R 4.1.0)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.1.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.1.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library