Apps Unique Devices - Query Comparison

In [1]:
old_query <- "
    SELECT
        year,
        month,
        day,
        CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END AS platform,
        COUNT(DISTINCT COALESCE(x_analytics_map['wmfuuid'],
              parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID'))) AS n_users
    FROM wmf.webrequest
        {date_clause}
        AND user_agent LIKE('WikipediaApp%')
        AND ((parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'action') = 'mobileview' AND uri_path == '/w/api.php')
            OR (uri_path LIKE '/api/rest_v1%' AND uri_query == ''))
        AND COALESCE(x_analytics_map['wmfuuid'],
                     parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID')) IS NOT NULL
        AND webrequest_source IN ('text')
    GROUP BY year, month, day, CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END
"
In [2]:
new_query <- "
    SELECT
        year,
        month,
        day,
        CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END AS platform,
        COUNT(DISTINCT COALESCE(x_analytics_map['wmfuuid'],
              parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID'))) AS n_users
    FROM wmf.webrequest
        {date_clause}
        AND access_method='mobile app'
        AND COALESCE(x_analytics_map['wmfuuid'],
                     parse_url(concat('http://bla.org/woo/', uri_query), 'QUERY', 'appInstallID')) IS NOT NULL
        AND webrequest_source IN ('text')
    GROUP BY year, month, day, CASE WHEN (user_agent LIKE '%iOS%' OR user_agent LIKE '%iPhone%') THEN 'iOS' ELSE 'Android' END
"
In [3]:
start_date <- as.Date("2018-05-27")
end_date <- Sys.Date() - 1
date_clause <- wmf::date_clause(seq(start_date, end_date, "day"))$date_clause

uniques_old_query <- purrr::map_df(
  .x=date_clause,
  .f=~wmf::query_hive(glue::glue(old_query, date_clause=.x))
)
In [4]:
uniques_new_query <- purrr::map_df(
  .x=date_clause,
  .f=~wmf::query_hive(glue::glue(new_query, date_clause=.x))
)
In [8]:
suppressPackageStartupMessages({library(tidyverse); library(magrittr)})
uniques_old_query <- uniques_old_query %>%
  dplyr::mutate(query = "old")
uniques_new_query <- uniques_new_query %>%
  dplyr::mutate(query = "new")
data <- rbind(uniques_old_query, uniques_new_query) %>%
  dplyr::mutate(date = lubridate::as_date(paste(year, month, day, sep = "-"))) %>%
  dplyr::select(-year, -month, -day)
head(data)
platformn_usersquerydate
iOS 143384 old 2018-05-27
Android 1059783 old 2018-05-27
iOS 148700 old 2018-05-28
Android 984404 old 2018-05-28
iOS 147002 old 2018-05-29
Android 953854 old 2018-05-29
In [13]:
data %>%
  ggplot2::ggplot(aes(x=date, y=n_users, colour=query)) +
  ggplot2::geom_line(size=1.2) +
  ggplot2::scale_x_date(name = "Date", date_breaks = "1 month", date_labels = "%b %d") +
  ggplot2::scale_y_continuous(labels=polloi::compress, name = "App Unique Devices") +
  ggplot2::scale_color_brewer("Query", palette = "Set1") +
  ggplot2::facet_wrap(~ platform, scales = "free_y", ncol = 1) +
  ggplot2::labs(title = "App unique device counts - query comparison", subtitle = "From May 27 to Aug 23, 2018") +
  wmf::theme_facet(base_size = 15)
In [15]:
data %>%
  tidyr::spread(query, n_users) %>%
  dplyr::mutate(diff = new - old) %>%
  dplyr::group_by(platform) %>%
  dplyr::summarize(mean_diff = mean(diff), mean_diff_perc = mean(diff/old))
platformmean_diffmean_diff_perc
Android 39949.4831 0.042190373
iOS 295.8652 0.002102671