Code and values for Figures 4 and 5 in Section 4 of the report “Excess deaths”
Raw code can be downloaded by selecting the Download Rmd
option from the Code
drop down menu at the top of this page.
R packages and required helper functions.
library(tidyverse)
library(curl)
library(readxl)
library(lubridate)
library(forcats)
library(patchwork)
library(ggthemes)
library(socviz)
library(here)
#Helper function
`%nin%` <- negate(`%in%`)
#Short cut for csv output with html tables
<- function(x){
my_datatable ::datatable(x, extensions = "Buttons", options = list(dom = "Bfrtip",
DTbuttons = c("csv")))
}
#Baseline plot settings
theme_set(theme_minimal(base_family = "Roboto", base_size = 20) +
theme(panel.grid.minor = element_blank(),
axis.title.y = element_text(margin = margin(0, 20, 0, 0)),
axis.title.x = element_text(margin = margin(20, 0, 0, 0)),
plot.caption = element_text(colour = "#AAAAAA")))#,
#plot.margin = margin(3,15,3,3,"mm")))
#global options for scientific numbers and significant digits.
options(scipen = 10,
digits = 1)
Data came from disparate sources. The tabset below identifies where and how it was wrangled in order to be combined into one UK table.
Data sourced from here and here and collated into .csv
files by Pietro Patrignani. I have saved this file in the excess_mort_data
folder available on the project GitHub repository.
<- read_csv(here("excess_mort_data/Weekly Summary England.csv"))
england
%<>%
england slice(-1) %>%
select(location,
week_number = `week number`,
all = `all deaths`,
non_covid, covid, average = `average 2015-2019`,
excess = `excess deaths`,
%>%
country) mutate(location = factor(location,
levels = c("Hospital", "Care Home", "Home", "Other"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
mutate_at(.vars = c("week_number", "all", "covid"), as.integer)
my_datatable(england)
This comes from two files on the National Records of Scotland (NRS) site. One has weekly COIVD and all-cause deaths, the other the weekly numbers (including averages) for the previous 5 years. URLs are shown in the code chunk. The data is downloaded directly evertime this chunk is run.
<- tempfile()
temp_1 <- tempfile()
temp_2 <- "https://www.nrscotland.gov.uk/files//statistics/covid19/covid-deaths-data-week-26.zip"
source
<- curl_download(url = source, destfile = temp_1, quiet = FALSE)
temp_1 unzip(temp_1, exdir = temp_2)
<- read_csv(file.path(temp_2,"covid-deaths-data-week-26_Table 1 - COVID deaths.csv"), skip = 3)
ch_death_covid <- read_csv(file.path(temp_2, "covid-deaths-data-week-26_Table 2 - All deaths.csv"), skip = 3)
ch_death_all #5 year average can be downladed directly from this link...
<- read_csv("https://www.nrscotland.gov.uk/files//statistics/covid19/weekly-deaths-by-location-2015-2019.csv", skip = 2)
ch_death_average
#Now tidy and join together
<- c("Care Home", "Home and other non institution", "Hospital",
loc_names "Other institution")
# Create a look-up table for week numbers to dates
<- tibble(
week_lookup week_number = 1:52,
week_date = seq(ymd(20191230), ymd(20201221), by = "1 week"))
#Wrangle Average deaths
%>%
ch_death_average slice(2:6, 9:13, 16:20, 23:27) %>%
mutate(location = rep(loc_names, each = 5, times = 1)) %>%
select(year = `Week number2`, location, everything(), -`53`) %>%
pivot_longer(cols = `1`:`52`, names_to = "week_number",
values_to = "n_deaths") %>%
group_by(location, week_number) %>%
mutate(min_deaths = min(n_deaths),
max_deaths = max(n_deaths),
mean_deaths = mean(n_deaths)) %>%
distinct(location, week_number, .keep_all = TRUE) %>%
select(-year, -n_deaths) %>%
%>%
ungroup mutate(week_number = as.integer(week_number)) -> sc
#Wrangle all deaths
%>%
ch_death_all select(location = X2, everything(), -`Week beginning`, -X30:-X65) %>%
slice(85:88) %>%
mutate_at(vars(`30-Dec-19`:`22-Jun-20`), as.numeric) %>%
pivot_longer(cols = `30-Dec-19`:`22-Jun-20`, names_to = "date",
values_to = "deaths_all_2020") %>%
mutate(date = dmy(date),
week_number = rep(1:26, each = 1, times = 4),
location = rep(loc_names, each = 26, times = 1)) %>%
select(-date) %>%
left_join(sc, .) -> sc
#Wrangle Covid deaths and join together
%>%
ch_death_covid select(location = X2, everything(), -`Week beginning`,
-`Year to Date`:-X44) %>%
slice(83:86) %>%
pivot_longer(cols = `30-Dec-19`:`22-Jun-20`, names_to = "date",
values_to = "deaths_covid_2020") %>%
mutate(date = dmy(date),
week_number = rep(1:26, each = 1, times = 4),
location = rep(loc_names, each = 26, times = 1)) %>%
select(-date) %>%
left_join(sc, .) %>%
mutate(deaths_nonCovid_2020 = deaths_all_2020 - deaths_covid_2020,
location = factor(location,
levels = c("Hospital", "Care Home",
"Home and other non institution",
"Other institution"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
left_join(., week_lookup) %>%
filter(week_number >=11 & week_number <= 26) %>%
select(-min_deaths, -max_deaths, -X29, -week_date) %>%
rename(all = deaths_all_2020,
covid = deaths_covid_2020,
non_covid = deaths_nonCovid_2020,
average = mean_deaths) %>%
mutate(excess = all - average,
country = "Scotland") -> sc
%>%
sc round_df(.) %>%
my_datatable(.)
Same source as England data. Also wrangled into a .csv
by Pietro.
<- read_csv(here("excess_mort_data/Weekly Summary Wales.csv"))
wales
%<>%
wales slice(-1) %>%
select(location,
week_number = `week number`,
all = `all deaths`,
non_covid, covid, average = `average 2015-2019`,
excess = `excess deaths`,
%>%
country) mutate(location = factor(location,
levels = c("Hospital", "Care Home", "Home", "Other"),
labels = c("Hospital", "Care Home", "Home", "Other"))) %>%
mutate_at(.vars = c("week_number", "all", "covid"), as.integer)
my_datatable(wales)
Multisheet Excel file via Siobhán Murphy as requested and provided direct from the Northern Ireland Statistics and Research Agency (NISRA) who are keen to highlight these figures are provisional. Also need to point out here the figures we got from NISRA don’t break down by COVID/non-COVID deaths. Needs some wrangling….
#Start with hospital deaths on sheet 3
<- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 3,
ni_hosp range = "A2:I18") %>%
mutate(location = "Hospital") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Now ingest care home data - sheet 2
<- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 2,
ni_ch range = "A2:I18") %>%
mutate(location = "Care Home") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Home deaths - sheet 4
<- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 4,
ni_home range = "A2:I18") %>%
mutate(location = "Home") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#And other place of death - sheet 5
<- read_xlsx(here("excess_mort_data/NI results_revised.xlsx"), sheet = 5,
ni_other range = "A2:I18") %>%
mutate(location = "Other") %>%
select(location,
week_number = `...1`,
all = `2020`,
average = Average,
excess)
#Join these objects together
<-
ni bind_rows(ni_hosp, ni_ch, ni_home, ni_other) %>%
mutate(country = "Northern Ireland") %>%
separate(week_number, c("drop", "week_number"), sep = " ") %>%
mutate(week_number = as.integer(week_number)) %>%
select(-drop)
my_datatable(ni)
With each nations data now wrangled into the same format we can join them all together into one object - uk
.
<- bind_rows(england, sc, wales, ni)
uk
%>%
uk mutate(country = factor(country,
levels = c("England", "Scotland",
"Wales", "Northern Ireland"))) %>%
select(country, everything()) -> uk
my_datatable(uk)
No we have all the information combined in the uk
object we can start plotting. Firstly the weekly excess by nation for Figure 5.
<-
fig_5 %>%
uk group_by(country, week_number) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
ggplot(aes(week_number, pct_change, colour = country)) +
geom_point(size = 1.5) +
geom_line(size = 1.5) +
scale_colour_wsj(guide = guide_legend(keywidth = 2)) +
scale_y_continuous(limits = c(-15, 120),
breaks = scales::pretty_breaks(n = 6),
labels = scales::percent_format(scale = 1)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 12)) +
theme(legend.position = "top") +
labs(title = "Weeekly excess mortality in the UK.",
subtitle = "Weeks 11-26 2020",
x = "Week number",
y = "% change from 5-yr average",
colour = "",
caption = "Source: ONS, NRS & NISRA\nFigures are provisional\nCode:https://github.com/davidhen/ltc_covid_uk")
fig_5
%>%
uk group_by(country, week_number) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
round_df(.) %>%
my_datatable(.)
Figure 6 is a combination of 3 plots. First we show the individual plots and underlying tables
Summarising the excess across all weeks 11-26 by nation
%>%
uk group_by(country) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average,
pct_change = excess / average * 100) %>%
ggplot(aes(country, pct_change, fill = country)) +
geom_col() +
scale_fill_wsj() +
scale_y_continuous(limits = c(0,40),
labels = scales::percent_format(scale = 1)) +
#scale_x_discrete(position = "top") +
theme(legend.position = "none",
axis.title.y = element_text(size = 16)) +
labs(title = "Overall",
x = "",
y = "% difference from 5 year average",
caption = "") -> uk_excess
uk_excess
%>%
uk group_by(country) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
round_df(.) %>%
my_datatable(.)
Again a summary across all weeks 11-26 but split by location of death.
<-
fig_excess_uk_2 %>%
uk group_by(country, location) %>%
summarise(all = sum(all),
average = sum(average),
excess = all - average,
pct_change = excess / average * 100) %>%
ggplot(aes(country, pct_change, fill = country)) +
geom_col() +
facet_grid(~location, switch = "x") +
scale_fill_wsj() +
scale_y_continuous(limits = c(-20,80),
labels = scales::percent_format(scale = 1),
breaks = scales::pretty_breaks()) +
theme(legend.position = "top",
axis.text.x = element_blank(),
strip.placement = "outside",
axis.title.y = element_text(size = 16)) +
labs(title = "by Location",
x = "",
y = "% difference from 5 year average",
fill = "")
fig_excess_uk_2
%>%
uk group_by(country, location) %>%
summarise(all = sum(all),
covid = sum(covid),
non_covid = sum(non_covid),
average = sum(average),
excess = all - average) %>%
mutate(pct_change = (excess / average * 100)) %>%
arrange(location) %>%
round_df(.) %>%
my_datatable(.)
Finally, weekly change by location and country.
<-
fig_uk_excess_1 %>%
uk mutate(pct_change = (excess / average * 100)) %>%
ggplot(aes(week_number, pct_change, fill = country)) +
geom_col() +
facet_grid(location~country) +
scale_fill_wsj() +
scale_y_continuous(limits = c(-100, 300),
labels = scales::percent_format(scale = 1)) +
theme(legend.position = "none",
axis.title.y = element_text(size = 16),
panel.spacing.y = unit(2, "lines")) +
labs(title = "by Location and Week",
x = "Week Number",
y = "% difference from 5 year average")#,
#caption = "Source: ONS & NRS\nCode:https://github.com/davidhen/ltc_covid_uk/blob/master/excess_mort_plot.Rmd")
fig_uk_excess_1
%>%
uk mutate(pct_change = (excess / average * 100)) %>%
arrange(country, location, week_number) %>%
round_df(.) %>%
my_datatable(.)
Using the patchwork
package to combine together.
+ fig_excess_uk_2) / fig_uk_excess_1 +
(uk_excess plot_annotation(title = "Excess mortality in the UK. Weeks 11-26 2020",
subtitle = "Breakdown",
caption = "Source: ONS, NRS & NISRA\nFigures are provisional\nCode:https://github.com/davidhen/ltc_covid_uk") +
plot_layout(heights = c(1, 1.2)) -> fig_6
fig_6
::session_info() devtools
## ─ Session info ────────────────────────────────────────
## setting value
## version R version 4.1.0 (2021-05-18)
## os macOS Big Sur 11.4
## system aarch64, darwin20
## ui RStudio
## language (EN)
## collate en_GB.UTF-8
## ctype en_GB.UTF-8
## tz Europe/London
## date 2021-06-14
##
## ─ Packages ────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.1.0)
## broom 0.7.7 2021-06-13 [1] CRAN (R 4.1.0)
## bslib 0.2.5.1 2021-05-18 [1] CRAN (R 4.1.0)
## cachem 1.0.5 2021-05-15 [1] CRAN (R 4.1.0)
## callr 3.7.0 2021-04-20 [1] CRAN (R 4.1.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.1.0)
## cli 2.5.0 2021-04-26 [1] CRAN (R 4.1.0)
## colorspace 2.0-1 2021-05-04 [1] CRAN (R 4.1.0)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.1.0)
## crosstalk 1.1.1 2021-01-12 [1] CRAN (R 4.1.0)
## curl * 4.3.1 2021-04-30 [1] CRAN (R 4.1.0)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.1.0)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.1.0)
## desc 1.3.0 2021-03-05 [1] CRAN (R 4.1.0)
## devtools 2.4.2 2021-06-07 [1] CRAN (R 4.1.0)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.1.0)
## dplyr * 1.0.6 2021-05-05 [1] CRAN (R 4.1.0)
## DT 0.18 2021-04-14 [1] CRAN (R 4.1.0)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.1.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.1.0)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.1.0)
## fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.0)
## feather 0.3.5 2019-09-15 [1] CRAN (R 4.1.0)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.1.0)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.1.0)
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.1.0)
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.1.0)
## ggthemes * 4.2.4 2021-01-20 [1] CRAN (R 4.1.0)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.1.0)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.0)
## haven 2.4.1 2021-04-23 [1] CRAN (R 4.1.0)
## here * 1.0.1 2020-12-13 [1] CRAN (R 4.1.0)
## highr 0.9 2021-04-16 [1] CRAN (R 4.1.0)
## hms 1.1.0 2021-05-17 [1] CRAN (R 4.1.0)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.1.0)
## htmlwidgets 1.5.3 2020-12-10 [1] CRAN (R 4.1.0)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.1.0)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.1.0)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.1.0)
## knitr 1.33 2021-04-24 [1] CRAN (R 4.1.0)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.1.0)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.1.0)
## lubridate * 1.7.10 2021-02-26 [1] CRAN (R 4.1.0)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.1.0)
## memoise 2.0.0 2021-01-26 [1] CRAN (R 4.1.0)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.1.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.0)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.1.0)
## pillar 1.6.1 2021-05-16 [1] CRAN (R 4.1.0)
## pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.1.0)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.0)
## pkgload 1.2.1 2021-04-06 [1] CRAN (R 4.1.0)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.1.0)
## processx 3.5.2 2021-04-30 [1] CRAN (R 4.1.0)
## ps 1.6.0 2021-02-28 [1] CRAN (R 4.1.0)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.0)
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.1.0)
## Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.1.0)
## readr * 1.4.0 2020-10-05 [1] CRAN (R 4.1.0)
## readxl * 1.3.1 2019-03-13 [1] CRAN (R 4.1.0)
## rematch 1.0.1 2016-04-21 [1] CRAN (R 4.1.0)
## remotes 2.4.0 2021-06-02 [1] CRAN (R 4.1.0)
## reprex 2.0.0 2021-04-02 [1] CRAN (R 4.1.0)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.1.0)
## rmarkdown * 2.8 2021-05-07 [1] CRAN (R 4.1.0)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.1.0)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.0)
## rvest 1.0.0 2021-03-09 [1] CRAN (R 4.1.0)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.1.0)
## scales 1.1.1 2020-05-11 [1] CRAN (R 4.1.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.1.0)
## socviz * 1.2 2020-06-10 [1] CRAN (R 4.1.0)
## stringi 1.6.2 2021-05-17 [1] CRAN (R 4.1.0)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.1.0)
## testthat 3.0.2 2021-02-14 [1] CRAN (R 4.1.0)
## tibble * 3.1.2 2021-05-16 [1] CRAN (R 4.1.0)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.1.0)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.1.0)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.1.0)
## usethis 2.0.1 2021-02-10 [1] CRAN (R 4.1.0)
## utf8 1.2.1 2021-03-12 [1] CRAN (R 4.1.0)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.1.0)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.1.0)
## xfun 0.23 2021-05-15 [1] CRAN (R 4.1.0)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.1.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.1.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library