old_query <- "
SELECT
year,
month,
day,
CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END AS platform,
COUNT(DISTINCT COALESCE(x_analytics_map['wmfuuid'],
parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID'))) AS n_users
FROM wmf.webrequest
{date_clause}
AND user_agent LIKE('WikipediaApp%')
AND ((parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'action') = 'mobileview' AND uri_path == '/w/api.php')
OR (uri_path LIKE '/api/rest_v1%' AND uri_query == ''))
AND COALESCE(x_analytics_map['wmfuuid'],
parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID')) IS NOT NULL
AND webrequest_source IN ('text')
GROUP BY year, month, day, CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END
"
new_query <- "
SELECT
year,
month,
day,
CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END AS platform,
COUNT(DISTINCT COALESCE(x_analytics_map['wmfuuid'],
parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID'))) AS n_users
FROM wmf.webrequest
{date_clause}
AND access_method='mobile app'
AND COALESCE(x_analytics_map['wmfuuid'],
parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID')) IS NOT NULL
AND webrequest_source IN ('text')
GROUP BY year, month, day, CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END
"
start_date <- as.Date("2018-05-27")
end_date <- Sys.Date() - 1
date_clause <- wmf::date_clause(seq(start_date, end_date, "day"))$date_clause
uniques_old_query <- purrr::map_df(
.x=date_clause,
.f=~wmf::query_hive(glue::glue(old_query, date_clause=.x))
)
uniques_new_query <- purrr::map_df(
.x=date_clause,
.f=~wmf::query_hive(glue::glue(new_query, date_clause=.x))
)
suppressPackageStartupMessages({library(tidyverse); library(magrittr)})
uniques_old_query <- uniques_old_query %>%
dplyr::mutate(query = "old")
uniques_new_query <- uniques_new_query %>%
dplyr::mutate(query = "new")
data <- rbind(uniques_old_query, uniques_new_query) %>%
dplyr::mutate(date = lubridate::as_date(paste(year, month, day, sep = "-"))) %>%
dplyr::select(-year, -month, -day)
head(data)
data %>%
ggplot2::ggplot(aes(x=date, y=n_users, colour=query)) +
ggplot2::geom_line(size=1.2) +
ggplot2::scale_x_date(name = "Date", date_breaks = "1 month", date_labels = "%b %d") +
ggplot2::scale_y_continuous(labels=polloi::compress, name = "App Unique Devices") +
ggplot2::scale_color_brewer("Query", palette = "Set1") +
ggplot2::facet_wrap(~ platform, scales = "free_y", ncol = 1) +
ggplot2::labs(title = "App unique device counts - query comparison", subtitle = "From May 27 to Aug 23, 2018") +
wmf::theme_facet(base_size = 15)
data %>%
tidyr::spread(query, n_users) %>%
dplyr::mutate(diff = new - old) %>%
dplyr::group_by(platform) %>%
dplyr::summarize(mean_diff = mean(diff), mean_diff_perc = mean(diff/old))