library(here)
library(tidyverse)
metrics <- here("data/metrics.csv") |>
read_csv(show_col_types = FALSE) |>
janitor::clean_names() |>
arrange(month) |>
filter(month >= "2018-05-01")
current_month <- max(metrics$month)
month |
total_pageview |
pageview_multiplier |
total_pageview_corrected |
previews_seen |
interactions |
interactions_corrected |
2018-05-01 |
16081901671 |
1 |
16081901671 |
1965116275 |
18047017946 |
18047017946 |
2018-11-01 |
15838868761 |
1 |
15838868761 |
1818341997 |
17657210758 |
17657210758 |
2019-02-01 |
14994451563 |
1 |
14994451563 |
1738551430 |
16733002993 |
16733002993 |
2019-10-01 |
16722401874 |
1 |
16722401874 |
1936366629 |
18658768503 |
18658768503 |
2020-04-01 |
19825763318 |
1 |
19825763318 |
2419940921 |
22245704239 |
22245704239 |
2020-05-01 |
19295670656 |
1 |
19295670656 |
2399548068 |
21695218724 |
21695218724 |
2021-04-01 |
17594157293 |
1 |
17594157293 |
2129806204 |
19723963497 |
19723963497 |
2021-05-01 |
17783793360 |
1 |
17783793360 |
2054246112 |
19838039472 |
19838039472 |
2022-02-01 |
16040523442 |
1 |
16040523442 |
1833566161 |
17874089603 |
17874089603 |
2022-05-01 |
16386936815 |
1 |
16386936815 |
1788004827 |
18174941642 |
18174941642 |
metrics_ext <- metrics |>
mutate(
dataloss = (pageview_multiplier > 1.0) |
month %in% (
metrics |>
filter(pageview_multiplier > 1.0) |>
pull(month) |>
range() |>
(\(x) x + months(c(-1, 1)))()
),
period = case_when(
month < "2021-06-01" ~ "before dataloss",
month >= "2021-06-01" & month < "2022-02-01" ~ "during dataloss",
month >= "2022-02-01" ~ "after dataloss"
)
)
dataloss
is TRUE
for months during the data loss and 1 month before/after the data loss
month |
other columns |
dataloss |
period |
2018-07-01 |
... |
FALSE |
before dataloss |
2019-12-01 |
... |
FALSE |
before dataloss |
2020-06-01 |
... |
FALSE |
before dataloss |
2021-06-01 |
... |
TRUE |
during dataloss |
2021-08-01 |
... |
TRUE |
during dataloss |
2022-01-01 |
... |
TRUE |
during dataloss |
2022-02-01 |
... |
TRUE |
after dataloss |
2022-03-01 |
... |
FALSE |
after dataloss |
2022-06-01 |
... |
FALSE |
after dataloss |