河流图
Last updated
Last updated
install.packages("ggstream")
Sys.setlocale(locale = 'EN')
# read data
DataRaw <- read.csv('./data/covid-variants-bar.csv')
# filter USA data
DataUSA <- DataRaw |>
filter(Code == 'USA') |>
select(-c(Entity, Code))
# transformer data
DataUSA <- DataUSA |>
pivot_longer(cols = -Day,
names_to = 'Variants',
values_to = 'Values') |>
group_by(Variants) |>
mutate(Percentage = Values/sum(Values),
Date = as.Date(Day),
Day = Date - as.Date('2021-1-1')) |>
filter(Values != 0)
# transformer lower percentage to others
DataUSA$Labels <- DataUSA$Variants
DataUSA$Labels[DataUSA$Percentage < 0.01] <- 'Others'
DataUSA$Labels[DataUSA$Labels == "non_who"] <- 'Others'
DataUSA$Labels[DataUSA$Labels == "Recombinant"] <- 'Others'
DataPeak <- DataUSA |>
summarise(Date = max(Date)) |>
arrange(Date)
# combind others
DataUSA <- DataUSA |>
group_by(Date, Labels) |>
summarise(Values = sum(Values),
.groups = 'drop') |>
group_by(Date) |>
mutate(Percentage = Values/sum(Values))
# reorder color
fill_label <- DataPeak$Variants
fill_label <- fill_label[fill_label != "Others"]
fill_label <- c(fill_label, 'Others')
fill_color <- viridis(n = length(fill_label)-1,
option = 'A')
fill_color <- c(fill_color, 'grey50')
names(fill_color) <- fill_label
# setting axis breaks
axis_break <- seq.Date(min(DataUSA$Date),
max(DataUSA$Date),
by = '3 months')
# the newest data
DataNew <- DataUSA |>
filter(Date == max(DataUSA$Date)) |>
mutate(Percentages = paste0(Labels, " (", sprintf("%.2f", Percentage*100), "%)"),
Labels = factor(Labels,
levels = rev(fill_label))) |>
arrange(Labels)
DataNew$index <- (1:nrow(DataNew))*0.1
# set order
DataUSA <- DataUSA |>
mutate(Labels = factor(Labels,
levels = fill_label))
ggplot(data = DataUSA)+
geom_stream(type = 'proportional',
mapping = aes(x = Date,
y = Percentage,
fill = Labels))+
geom_label(data = DataNew,
mapping = aes(x = Date + 15,
y = index,
label = Percentages),
color = 'black',
fill = '#FCFDBFFF',
label.size = NA,
size = 2.5,
hjust = 0)+
scale_fill_manual(values = fill_color)+
scale_x_date(expand = expansion(mult = c(0.1, 0.3)),
breaks = axis_break,
date_labels = '%b %Y')+
scale_y_continuous(expand = c(0, 0),
labels = scales::percent_format(scale = 100),
breaks = seq(0, 1, 0.1),
sec.axis = sec_axis(trans = ~.,
labels = scales::percent_format(scale = 100),
breaks = seq(0, 1, 0.1))
)+
theme_classic()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.line.x = element_blank(),
legend.position = 'bottom',
legend.spacing.y = unit(50, 'pt'),
legend.key.height = unit(0.3, 'cm'),
legend.key.width = unit(0.7, 'cm'),
plot.title = element_text(hjust = 1, face = 'bold'),
plot.subtitle = element_text(hjust = 1, face = 'bold'),
plot.caption = element_text(hjust = 0),
plot.margin = margin(10, 10, 10, 10))+
labs(x = NULL,
y = NULL,
fill = NULL,
title = paste0("Timecourse of SARS-Cov-2 Variants sublineage distribution"),
subtitle = max(DataNew$Date),
caption = "See https://www.who.int/activities/tracking-SARS-CoV-2-variants for variant information and definitions.")
ggsave('outcome.png',
width = 14,
height = 6)