In FY18-19, Android team will focus editing features.
This is an initial exploration with two goals:
When possible, we will compute daily counts that can be easily aggregated to a weekly/monthly/quarterly resolution. We will also perform a minor "edit drift" adjustment by recalculating the most recent 5-7 days (TBD) of edit metrics by filtering out reverted edits. That is, at any point the most recent 5-7 days of metrics are subject to change.
Note: platform-specific edit data was not available until mid-June via T194424. Prior to the deployment of the patch, all edits made in iOS and Android apps were lumped together under a single change tag "mobile app edit". Since multi-platform usage and platform switching are not common occurrences, any editor who has made a "mobile app edit"-tagged contribution previously is ineligble to be considered a new Android editor.
Here is where we'll measure our impact first:
# https://stackoverflow.com/a/35018739/1091835
library(IRdisplay)
display_html(
'<script>
code_show=true;
function code_toggle() {
if (code_show){
$(\'div.input\').hide();
} else {
$(\'div.input\').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()">
<input type="submit" value="Click here to toggle on/off the raw code.">
</form>'
)
shhh <- function(expr) suppressPackageStartupMessages(suppressWarnings(suppressMessages(expr)))
shhh({
library(magrittr); library(zeallot); library(glue); library(tidyverse)
})
start_date <- "20180710"
start_date_human <- format(as.Date(start_date, '%Y%m%d'), '%d %B %Y')
end_date <- "20190627"
end_date_human <- format(as.Date(end_date, '%Y%m%d'), '%d %B %Y')
target_wikis <- c(
"arwiki" = "Arabic", "bnwiki" = "Bangla", "fawiki" = "Persian",
"fiwiki" = "Finnish", "hewiki"= "Hebrew", "hiwiki" = "Hindi",
"itwiki" = "Italian", "mrwiki" = "Marathi", "ptwiki" = "Portuguese",
"ruwiki" = "Russian", "svwiki" = "Swedish", "tawiki" = "Tamil",
"zhwiki" = "Chinese"
)
collapse_targets <- function(x) {
return(forcats::fct_collapse(x,
`Hindi & Marathi` = c("Hindi", "Marathi"),
`Swedish & Finnish` = c("Swedish", "Finnish")
))
}
message("Baselines last computed up to: ", end_date_human)
options(jupyter.plot_mimetypes = "image/png", digits = 4)
# Interactive time series visualization:
shhh({
library(xts)
library(dygraphs)
})
# For working in Jupyter:
library(htmlwidgets)
library(IRdisplay)
# Helper functions:
make_dygraph <- function(xts_data, brewer_palette = "Paired", legend_width = 600, ...) {
if (ncol(xts_data) > 10) {
color_palette <- scales::hue_pal()(ncol(xts_data))
} else {
color_palette <- RColorBrewer::brewer.pal(ncol(xts_data), brewer_palette)
}
dy <- dygraph(xts_data, ...)%>%
dyAxis("x", drawGrid = FALSE) %>%
dyLegend(width = legend_width) %>%
dyRangeSelector() %>%
dyOptions(colors = color_palette)
return(dy)
}
output_jupyter <- function(html_widget, name, width = 800, height = 600) {
file_name <- glue("widget_{name}.html")
# dir_name <- sub(".html", "_files", file_name, fixed = TRUE)
saveWidget(html_widget, file_name, selfcontained = TRUE)
display_html(glue('<iframe src="{file_name}" width="{width}" height="{height}"></iframe>'))
# message(glue("Remember to download {file_name} when exporting notebook\nand to share it together with the exported HTML."))
}
query <- "SELECT
DATE(LEFT(rev_timestamp, 8)) AS date,
COUNT(*) AS n_edits
FROM (
SELECT ct_rev_id AS rev_id
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
) AS android_app_edits
INNER JOIN (
-- Edits on visible pages:
SELECT
rev_id,
rev_timestamp
FROM revision
WHERE rev_timestamp >= '{start_date}'
AND rev_timestamp < '{end_date}'
UNION ALL
-- Edits on deleted pages:
SELECT
ar_rev_id AS rev_id,
ar_timestamp AS rev_timestamp
FROM archive
WHERE ar_timestamp >= '{start_date}'
AND ar_timestamp < '{end_date}'
) AS combined_revisions ON android_app_edits.rev_id = combined_revisions.rev_id
GROUP BY date;"
if (file.exists("per_wiki_edit_counts.csv")) {
per_wiki_edit_counts <- readr::read_csv("per_wiki_edit_counts.csv", col_types = "Dci")
} else {
per_wiki_edit_counts <- map_df(
set_names(names(target_wikis), names(target_wikis)),
~ shhh(wmf::mysql_read(glue(query), .x)),
.id = "wiki"
) %>%
complete(date, wiki, fill = list(n_edits = 0)) %>%
mutate(
date = as.Date(date),
wiki = collapse_targets(factor(target_wikis[wiki]))
) %>%
group_by(date, wiki) %>%
summarize(n_edits = sum(n_edits)) %>%
ungroup
readr::write_csv(per_wiki_edit_counts, "per_wiki_edit_counts.csv")
}
dy_edits_per_wiki <- per_wiki_edit_counts %>%
mutate(week = lubridate::floor_date(date, "week")) %>%
group_by(date = week, wiki) %>%
summarize(n_edits = sum(n_edits)) %>%
ungroup %>%
spread(wiki, n_edits) %>%
{ xts(select(., -date), order.by = .$date) } %>%
make_dygraph(
main = "Weekly number of edits made with Android app, by Wikipedia",
xlab = "Date", ylab = "Wikipedia edits"
) %>%
dyOptions(stackedGraph = TRUE) %>%
dyEvent("2018-10-24", "Nav Update", labelLoc = "bottom") %>%
dyEvent("2019-04-23", "Suggested Edits v1", labelLoc = "bottom")
output_jupyter(dy_edits_per_wiki, "edit_counts_per_wiki", width = 900, height = 450)
major_releases <- dplyr::tibble(
date = as.Date(c("2018-10-24", "2019-04-23")),
release = c("Nav Update", "Suggested Edits v1")
)
options(repr.plot.width = 14, repr.plot.height = 7)
per_wiki_edit_counts %>%
group_by(date) %>%
summarize(n_edits = sum(n_edits)) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = 0, ymax = n_edits), fill = "gray90") +
geom_line(aes(y = n_edits)) +
geom_smooth(aes(y = n_edits), se = FALSE, method = "loess", size = 2) +
scale_y_continuous(breaks = seq(0, 1000, 100)) +
scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%a\n%m/%d") +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "Number of Wikipedia edits made with Android app",
subtitle = paste("Across the following languages:", paste0(sort(target_wikis), collapse = ", ")),
x = "Date", y = "Wikipedia edits"
)
wd_desc_edits_query <- "SET mapreduce.map.memory.mb=4096;
USE wmf;
WITH android_app_title_description_edits AS (
SELECT
SUBSTR(event_timestamp, 1, 10) AS date,
event_comment AS revision_summary
FROM mediawiki_history
WHERE snapshot = '2019-05'
AND wiki_db = 'wikidatawiki'
AND ARRAY_CONTAINS(revision_tags, 'android app edit')
AND event_entity = 'revision'
AND event_timestamp >= '${start_date}'
AND event_timestamp < '${end_date}'
AND INSTR(event_comment, 'wbsetdescription') > 0
)
SELECT
date,
REGEXP_EXTRACT(revision_summary, '\\\\|([a-z]{2,})\\\\s', 1) AS language_code,
COUNT(1) AS n_edits
FROM android_app_title_description_edits
WHERE REGEXP_EXTRACT(revision_summary, '\\\\|([a-z]{2,})\\\\s', 1) IN('ar', 'bn', 'fa', 'fi', 'he', 'hi', 'it', 'mr', 'pt', 'ru', 'sv', 'ta', 'zh')
GROUP BY date, REGEXP_EXTRACT(revision_summary, '\\\\|([a-z]{2,})\\\\s', 1);"
start_dates <- seq(as.Date("2018-08-01"), as.Date("2019-06-01"), by = "month")
end_dates <- start_dates
start_dates <- c(as.Date("2018-07-10"), start_dates[-11])
start_dates <- as.character(start_dates)
end_dates <- as.character(end_dates)
if (file.exists("wikidata_edit_counts.csv")) {
wikidata_edit_counts <- readr::read_csv("wikidata_edit_counts.csv", col_types = "Dci")
} else {
wikidata_edit_counts <- purrr::map2_dfr(start_dates, end_dates, function(start_date, end_date) {
message("Counting title descriptions from ", substr(start_date, 1, 7))
query <- glue(wd_desc_edits_query, .open = "${")
result <- shhh(wmf::query_hive(query, heap_size = 2048))
return(result)
}) %>% mutate(date = as.Date(date))
readr::write_csv(wikidata_edit_counts, "wikidata_edit_counts.csv")
}
wikidata_edit_counts %<>%
mutate(
wiki = paste0(language_code, "wiki"),
wiki = forcats::fct_collapse(factor(target_wikis[wiki]), `Hindi & Marathi` = c("Hindi", "Marathi"))
)
dy_wikidata_edits_per_language <- wikidata_edit_counts %>%
complete(date, wiki, fill = list(n_edits = 0)) %>%
mutate(month = lubridate::floor_date(date, "month")) %>%
group_by(date = month, wiki) %>%
summarize(n_edits = sum(n_edits)) %>%
ungroup %>%
spread(wiki, n_edits) %>%
{ xts(select(., -date), order.by = .$date) } %>%
make_dygraph(
main = "Number of Wikidata description edits made with Android app, by language",
xlab = "Date", ylab = "Wikidata edits"
) %>%
dyOptions(stackedGraph = TRUE) %>%
dyEvent("2018-10-24", "Nav Update", labelLoc = "bottom") %>%
dyEvent("2019-04-05", "Suggested Edits v1 beta", labelLoc = "bottom") %>%
dyEvent("2019-04-23", "Suggested Edits v1 prod", labelLoc = "bottom")
output_jupyter(dy_wikidata_edits_per_language, "wikidata_edit_counts_per_language", width = 900, height = 900)
options(repr.plot.width = 14, repr.plot.height = 7)
wikidata_edit_counts %>%
complete(date, wiki, fill = list(n_edits = 0)) %>%
mutate(month = lubridate::floor_date(date, "month")) %>%
group_by(date = month, wiki) %>%
summarize(n_edits = sum(n_edits)) %>%
ungroup %>%
ggplot(aes(x = date)) +
geom_area(aes(y = n_edits, fill = wiki), position = "stack", color = "black", size = 0.25) +
scale_x_date(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
# scale_fill_brewer(palette = "Paired") +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "Number of Wikidata description edits made with Android app",
subtitle = "This shows the total number of edits across languages and enables\nus to see how much each language contributed to the total count",
x = "Date", y = "Wikidata edits"
)
options(repr.plot.width = 14, repr.plot.height = 7)
wikidata_edit_counts %>%
group_by(date) %>%
summarize(n_edits = sum(n_edits)) %>%
ggplot(aes(x = date)) +
geom_ribbon(aes(ymin = 0, ymax = n_edits), fill = "gray90") +
geom_line(aes(y = n_edits)) +
geom_smooth(aes(y = n_edits), method = "loess", se = FALSE, size = 1.1) +
scale_x_date(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "Number of Wikidata description edits made with Android app",
x = "Date", y = "Wikidata edits"
)
Here we take users who have registered on or after 10 July 2018 who have made at least one edit with the Android app. In this case we are restricting ourselves to users who created new accounts on the wikis, not those who had pre-existing accounts from elsewhere whose accounts were auto-created when they visited the wiki.
query <- "SELECT
-- For every new user (and their registration date),
-- count how many edits they made on each day:
user_id,
registration_date,
edit_date,
COUNT(*) AS n_edits
FROM (
SELECT
new_users.user_id AS user_id,
registration_date,
edit_date, edit_id
FROM (
-- New users (self-registered, not auto-created):
SELECT
DATE(LEFT(log_timestamp, 8)) AS registration_date,
log_user AS user_id
FROM logging
WHERE log_type = 'newusers'
AND log_action = 'create'
AND log_timestamp >= '{start_date}'
AND log_timestamp < '{end_date}'
) AS new_users
INNER JOIN (
(
-- Edits made on visible pages with the Android app:
SELECT
DATE(LEFT(rev_timestamp, 8)) AS edit_date,
rev_user AS user_id,
rev_id AS edit_id
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN revision ON (
revision.rev_id = change_tag.ct_rev_id
AND rev_timestamp >= '{start_date}'
AND rev_timestamp < '{end_date}'
)
)
UNION ALL
(
-- Edits made on deleted pages with the Android app:
SELECT
DATE(LEFT(ar_timestamp, 8)) AS edit_date,
ar_user AS user_id,
ar_id AS edit_id
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN archive ON (
archive.ar_id = change_tag.ct_rev_id
AND ar_timestamp >= '{start_date}'
AND ar_timestamp < '{end_date}'
)
)
) AS android_app_edits ON new_users.user_id = android_app_edits.user_id
) AS new_android_user_edits
GROUP BY user_id, registration_date, edit_date;"
if (file.exists("per_wiki_per_user_edit_counts.csv")) {
per_wiki_per_user_edit_counts <- readr::read_csv("per_wiki_per_user_edit_counts.csv", col_types = "ciDDi")
} else {
per_wiki_per_user_edit_counts <- set_names(names(target_wikis), names(target_wikis)) %>%
map_df(~ shhh(wmf::mysql_read(glue(query), .x)), .id = "wiki") %>%
mutate(
registration_date = as.Date(registration_date),
edit_date = as.Date(edit_date),
wiki = factor(target_wikis[wiki])
)
readr::write_csv(per_wiki_per_user_edit_counts, "per_wiki_per_user_edit_counts.csv")
}
dy_new_contributor_acquisition <- per_wiki_per_user_edit_counts %>%
distinct(wiki, user_id, registration_date) %>%
mutate(wiki = collapse_targets(wiki)) %>%
group_by(wiki, registration_date) %>%
summarize(n_users = n()) %>%
ungroup %>%
complete(registration_date, wiki, fill = list(n_users = 0)) %>%
mutate(month = lubridate::floor_date(registration_date, "month")) %>%
filter(month < lubridate::floor_date(Sys.Date(), "month")) %>%
group_by(registration_date = month, wiki) %>%
summarize(n_users = sum(n_users)) %>%
ungroup %>%
spread(wiki, n_users) %>%
{ xts(select(., -registration_date), order.by = .$registration_date) } %>%
make_dygraph(
main = "Acquisition of new contributors, by language",
xlab = "Date", ylab = "New Android editors"
) %>%
dyOptions(stackedGraph = TRUE) %>%
dyEvent("2018-10-24", "Nav Update", labelLoc = "bottom") %>%
dyEvent("2019-04-05", "Suggested Edits v1 beta", labelLoc = "bottom") %>%
dyEvent("2019-04-23", "Suggested Edits v1 prod", labelLoc = "bottom")
output_jupyter(dy_new_contributor_acquisition, "new_contributor_acquisition", width = 900, height = 450)
options(repr.plot.width = 14, repr.plot.height = 10)
per_wiki_per_user_edit_counts %>%
distinct(wiki, user_id, registration_date) %>%
mutate(wiki = collapse_targets(wiki)) %>%
group_by(wiki, registration_date) %>%
summarize(n_users = n()) %>%
ungroup %>%
complete(registration_date, wiki, fill = list(n_users = 0)) %>%
mutate(month = lubridate::floor_date(registration_date, "month")) %>%
filter(month < lubridate::floor_date(Sys.Date(), "month")) %>%
group_by(registration_date = month, wiki) %>%
summarize(n_users = sum(n_users)) %>%
ungroup %>%
ggplot() +
geom_area(aes(x = registration_date, y = n_users, fill = wiki), position = "stack", color = "white") +
scale_x_date(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(x = date, label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "Acquisition of new contributors by month",
subtitle = glue("A new contributor is any newly-created account that has
made at least one edit with the Android app since {start_date_human}"),
x = "Account creation date", y = "New contributors"
)
New contributor retention: of the users who registered in a given time span and made at least one edit in their first 30 days, the percentage that made at least one edit in their second 30 days.
check_retention <- function(registration_date, edit_dates, n_days = 30) {
edit_dates <- sort(edit_dates)
if (length(unique(edit_dates)) < 2) return(FALSE)
# Check if user made an edit in the first n_days following their registration:
first_milestone <- registration_date + n_days
if (!any(edit_dates >= registration_date & edit_dates < first_milestone)) return(FALSE)
# Check if user made another edit in the second set of n_days:
second_milestone <- first_milestone + n_days
return(any(edit_dates >= first_milestone & edit_dates < second_milestone))
}
per_wiki_new_editors <- per_wiki_per_user_edit_counts %>%
group_by(wiki, user_id) %>%
summarize(
registration_date = min(registration_date),
total_edits = sum(n_edits),
days_edited = length(unique(edit_date)),
retained_7 = check_retention(registration_date, edit_date, 7),
retained_15 = check_retention(registration_date, edit_date, 15),
retained_30 = check_retention(registration_date, edit_date)
) %>%
ungroup
# per_wiki_per_user_edit_counts %>%
# left_join(per_wiki_new_editors, by = c("wiki", "user_id", "registration_date")) %>%
# arrange(wiki, user_id)
The thing to remember here is the definition of the retention metric, which considers a user's editing activity in two sets of $N$ days. If someone edits in the first week following their registration and then doesn't edit again for another two weeks (for example), that's a negative on 7-day retention but a positive on the 15-day retention.
per_wiki_new_editors %>%
group_by(wiki) %>%
summarize(
n_users = n(),
median_edits_per_user = median(total_edits),
total_edits = sum(total_edits),
retention_7_days = scales::percent(mean(retained_7)),
retention_15_days = scales::percent(mean(retained_15)),
retention_30_days = scales::percent(mean(retained_30))
)
These are users who may be existing editors but have not made any contributions on mobile apps before 10 July 2018 (when platform-specific change tags were deployed).
# Boy I wish our MariaDB wasn't several major versions behind and we at least had 10.2.1+ so that we could use CTEs
# and make this query so much smaller and more readable by replacing & reusing the sub-queries using WITH *le sigh*
query <- "SELECT
new_android_editors.user_id AS user_id,
edit_date,
n_edits
FROM (
SELECT
user_id,
edit_date,
COUNT(*) AS n_edits
FROM (
-- Edits made with Android app on visible pages:
SELECT
rev_user AS user_id,
DATE(LEFT(rev_timestamp, 8)) AS edit_date
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN revision ON (
revision.rev_id = change_tag.ct_rev_id
AND revision.rev_timestamp >= '20180710'
AND revision.rev_timestamp < '20180817'
)
UNION ALL
-- Edits made with Android app on deleted pages:
SELECT
ar_user AS user_id,
DATE(LEFT(ar_timestamp, 8)) AS edit_date
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN archive ON (
archive.ar_id = change_tag.ct_rev_id
AND archive.ar_timestamp >= '20180710'
AND archive.ar_timestamp < '20180817'
)
) AS android_app_edits
WHERE user_id != 0
GROUP BY user_id, edit_date
) AS android_app_edit_counts
INNER JOIN (
-- Editors who have used Android app edit to edit pages since
-- the 2018-07-10 deployment of the platform-specific tags,
-- but who have not previously used a mobile app (Android/iOS)
-- to edit Wikipedia:
SELECT
all_android_app_editors.user_id AS user_id
FROM (
SELECT DISTINCT user_id FROM (
-- Editors who have used Android app to edit visible pages:
SELECT rev_user AS user_id
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN revision ON (
revision.rev_id = change_tag.ct_rev_id
AND revision.rev_timestamp >= '20180710'
AND revision.rev_timestamp < '20180817'
)
UNION ALL
-- Editors who have used Android app to edit deleted pages:
SELECT ar_user AS user_id
FROM change_tag
INNER JOIN change_tag_def ON (
change_tag.ct_tag_id = change_tag_def.ctd_id
AND change_tag_def.ctd_name = 'android app edit'
)
INNER JOIN archive ON (
archive.ar_id = change_tag.ct_rev_id
AND archive.ar_timestamp >= '20180710'
AND archive.ar_timestamp < '20180817'
)
) AS combined_revisions
WHERE user_id != 0
) AS all_android_app_editors
-- LEFT JOIN + WHERE t2.id IS NULL performs an ANTI-JOIN:
LEFT JOIN (
SELECT DISTINCT user_id FROM (
-- Editors who have previously used a mobile app to edit visible pages:
SELECT rev_user AS user_id
FROM change_tag
INNER JOIN revision ON (
revision.rev_id = change_tag.ct_rev_id
AND revision.rev_timestamp < '20180710'
AND change_tag.ct_tag = 'mobile app edit'
)
UNION ALL
-- Editors who have previously used a mobile app to edit deleted pages:
SELECT ar_user AS user_id
FROM change_tag
INNER JOIN archive ON (
archive.ar_id = change_tag.ct_rev_id
AND archive.ar_timestamp < '20180710'
AND change_tag.ct_tag = 'mobile app edit'
)
) AS combined_revisions
WHERE user_id != 0
) AS previous_app_editors
ON all_android_app_editors.user_id = previous_app_editors.user_id
WHERE previous_app_editors.user_id IS NULL
) AS new_android_editors
ON new_android_editors.user_id = android_app_edit_counts.user_id;"
# per_wiki_new_android_editor_edit_counts <- set_names(names(target_wikis), names(target_wikis)) %>%
# map_df(~ shhh(wmf::mysql_read(glue(query), .x)), .id = "wiki") %>%
# mutate(
# edit_date = as.Date(edit_date),
# user_id2 = paste0(wiki, "-", user_id),
# wiki = factor(target_wikis[wiki])
# )
per_wiki_new_android_editor_edit_counts %>%
mutate(wiki = collapse_targets(wiki)) %>%
group_by(wiki, edit_date) %>%
summarize(n_edits = sum(n_edits)) %>%
filter(n_edits > 0) %>%
mutate(week = lubridate::floor_date(edit_date, "week")) %>%
group_by(edit_date = week, wiki) %>%
summarize(n_edits = sum(n_edits)) %>%
ungroup %>%
ggplot() +
geom_area(aes(x = edit_date, y = n_edits, fill = wiki), position = "stack", color = "white") +
scale_y_continuous(breaks = seq(0, 200, 20), minor_breaks = seq(0, 200, 10)) +
scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%a\n%m/%d") +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(x = date, label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "New Android editor activity",
subtitle = "(Does NOT include Wikidata description edits)",
caption = glue("A new Android editor is any account that has made at least one edit with
the Android app since – but not any edits with either app prior to –
{start_date_human}, which is when platform-specific tags were deployed. This
includes users who have previously edited on desktop and/or mobile web."),
x = "Date", y = "Edits"
)
per_wiki_new_android_editor_edit_counts %>%
mutate(wiki = collapse_targets(wiki)) %>%
group_by(wiki, user_id2) %>%
summarize(first_edit_date = min(edit_date)) %>%
group_by(wiki, first_edit_date) %>%
summarize(n_users = n()) %>%
mutate(week = lubridate::floor_date(first_edit_date, "week")) %>%
group_by(first_edit_date = week, wiki) %>%
summarize(n_users = sum(n_users)) %>%
ungroup %>%
ggplot() +
geom_bar(aes(x = first_edit_date, y = n_users, fill = wiki), stat = "identity", color = "white", size = 0.5) +
scale_y_continuous(breaks = seq(0, 40, 5), minor_breaks = seq(0, 40)) +
scale_x_date(date_breaks = "2 months", date_minor_breaks = "1 month", date_labels = "%a\n%m/%d") +
scale_fill_manual(values = brewer_palette_paired11) +
geom_vline(aes(xintercept = date), data = major_releases, linetype = "dashed") +
geom_label(aes(x = date, label = release, y = 0), data = major_releases) +
hrbrthemes::theme_ipsum(base_size = 14, axis_title_size = 18) +
labs(
title = "New Android editor acquisition",
subtitle = "(Does NOT include Wikidata description edits)",
caption = glue("A new Android editor is any account that has made at least one edit with
the Android app since – but not any edits with either app prior to –
{start_date_human}, which is when platform-specific tags were deployed. This
includes users who have previously edited on desktop and/or mobile web."),
x = "Date of first edit with Android app", y = "New Android editors"
)
per_wiki_new_android_editor_edit_counts %>%
group_by(wiki, user_id) %>%
summarize(
total_edits = sum(n_edits),
first_edit_date = min(edit_date),
retained_7 = check_retention(first_edit_date, edit_date, 7),
retained_15 = check_retention(first_edit_date, edit_date, 15),
retained_30 = check_retention(first_edit_date, edit_date, 30)
) %>%
summarize(
n_users = n(),
median_edits_per_user = median(total_edits),
total_edits = sum(total_edits),
retention_7_days = scales::percent(mean(retained_7)),
retention_15_days = scales::percent(mean(retained_15)),
retention_30_days = scales::percent(mean(retained_30))
)
The statistics in this table are based on edits from users who:
As of right now this does not include Wikidata description edits.