🌊河流图

1. SARS-Cov-2变异株变化

原图

获取数据

依赖包

install.packages("ggstream")

运行代码

Sys.setlocale(locale = 'EN')

# read data
DataRaw <- read.csv('./data/covid-variants-bar.csv')

# filter USA data
DataUSA <- DataRaw |> 
  filter(Code == 'USA') |> 
  select(-c(Entity, Code))

# transformer data
DataUSA <- DataUSA |> 
  pivot_longer(cols = -Day,
               names_to = 'Variants',
               values_to = 'Values') |> 
  group_by(Variants) |> 
  mutate(Percentage = Values/sum(Values),
         Date = as.Date(Day),
         Day = Date - as.Date('2021-1-1')) |> 
  filter(Values != 0)

# transformer lower percentage to others
DataUSA$Labels <- DataUSA$Variants
DataUSA$Labels[DataUSA$Percentage < 0.01] <- 'Others'
DataUSA$Labels[DataUSA$Labels == "non_who"] <- 'Others'
DataUSA$Labels[DataUSA$Labels == "Recombinant"] <- 'Others'

DataPeak <- DataUSA |> 
  summarise(Date = max(Date)) |> 
  arrange(Date)

# combind others
DataUSA <- DataUSA |> 
  group_by(Date, Labels) |> 
  summarise(Values = sum(Values),
            .groups = 'drop') |> 
  group_by(Date) |> 
  mutate(Percentage = Values/sum(Values))

# reorder color
fill_label <- DataPeak$Variants
fill_label <- fill_label[fill_label != "Others"]
fill_label <- c(fill_label, 'Others')

fill_color <- viridis(n = length(fill_label)-1,
                      option = 'A')
fill_color <- c(fill_color, 'grey50')
names(fill_color) <- fill_label

# setting axis breaks
axis_break <- seq.Date(min(DataUSA$Date),
                       max(DataUSA$Date),
                       by = '3 months')


# the newest data
DataNew <- DataUSA |> 
  filter(Date == max(DataUSA$Date)) |> 
  mutate(Percentages = paste0(Labels, " (", sprintf("%.2f", Percentage*100), "%)"),
         Labels = factor(Labels,
                         levels = rev(fill_label))) |> 
  arrange(Labels)
DataNew$index <- (1:nrow(DataNew))*0.1

# set order
DataUSA <- DataUSA |> 
  mutate(Labels = factor(Labels,
                         levels = fill_label))

ggplot(data = DataUSA)+
  geom_stream(type = 'proportional',
       mapping = aes(x = Date,
                     y = Percentage,
                     fill = Labels))+
  geom_label(data = DataNew,
             mapping = aes(x = Date + 15,
                           y = index,
                           label = Percentages),
             color = 'black',
             fill = '#FCFDBFFF',
             label.size = NA,
             size = 2.5,
             hjust = 0)+
  scale_fill_manual(values = fill_color)+
  scale_x_date(expand = expansion(mult = c(0.1, 0.3)),
               breaks = axis_break,
               date_labels = '%b %Y')+
  scale_y_continuous(expand = c(0, 0),
                     labels = scales::percent_format(scale = 100),
                     breaks = seq(0, 1, 0.1),
                     sec.axis = sec_axis(trans = ~.,
                                         labels = scales::percent_format(scale = 100),
                                         breaks = seq(0, 1, 0.1))
                     )+
  theme_classic()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.line.x = element_blank(),
        legend.position = 'bottom',
        legend.spacing.y = unit(50, 'pt'),
        legend.key.height = unit(0.3, 'cm'),
        legend.key.width = unit(0.7, 'cm'),
        plot.title = element_text(hjust = 1, face = 'bold'),
        plot.subtitle = element_text(hjust = 1, face = 'bold'),
        plot.caption = element_text(hjust = 0),
        plot.margin = margin(10, 10, 10, 10))+
  labs(x = NULL,
       y = NULL,
       fill = NULL,
       title = paste0("Timecourse of SARS-Cov-2 Variants sublineage distribution"),
       subtitle = max(DataNew$Date),
       caption = "See https://www.who.int/activities/tracking-SARS-CoV-2-variants for variant information and definitions.")

ggsave('outcome.png',
       width = 14,
       height = 6)

运行结果

Last updated