This dashboard displays an overview of the growth in usage of Computer-Aided Tagging on Commons. It is updated once a week, shortly after midnight UTC on Mondays.
Data prior to 2020-06-01 reflect estimates based on files available on Commons on 2020-05-29, meaning that deleted files are ignored. From 2020-06-01 onwards the data reflects the state of Commons on the given date.
## Configuration variables
cat_dataset_filename = 'datasets/weekly-CAT-measurements.tsv'
# Packages:
import::from(dplyr, group_by, keep_where = filter, ungroup, summarize,
mutate, rename, select, arrange, n, left_join, n_distinct, count)
import::from(tidyr, spread, gather)
## Helper methods, from
suppress_messages_warnings <- function(x) {
# Read in the historic dataset
cat_dataset = fread(cat_dataset_filename) %>%
snapshot_timestamp = lubridate::parse_date_time(snapshot_timestamp, c('ymd', 'ymdHMS'))
## Query to get a dataset of all currently available CAT edits
cat_revisions_query = "
SELECT NOW() AS snapshot_timestamp, cat_edits.rev_id, actor_user, rev_page,
IF(mobile_edits.rev_id IS NOT NULL, 1, 0) AS mobile_edit,
IF(android_edits.rev_id IS NOT NULL, 1, 0) AS android_edit
SELECT rv.rev_id, coalesce(ac.actor_user, 0) AS actor_user, rev_timestamp, rev_page
FROM revision rv
INNER JOIN change_tag ct
ON rev_id = ct_rev_id
INNER JOIN change_tag_def ctd
ON ct_tag_id = ctd_id
INNER JOIN actor ac
ON rv.rev_actor = ac.actor_id
WHERE ctd_name IN ('computer-aided-tagging', 'computer-aided-tagging-manual')
) AS cat_edits
SELECT ct_rev_id AS rev_id
FROM change_tag
JOIN change_tag_def
ON ct_tag_id = ctd_id
WHERE ctd_name = 'mobile edit'
) AS mobile_edits
ON cat_edits.rev_id = mobile_edits.rev_id
SELECT ct_rev_id AS rev_id
FROM change_tag
JOIN change_tag_def
ON ct_tag_id = ctd_id
WHERE ctd_name = 'android app edit'
) AS android_edits
ON cat_edits.rev_id = android_edits.rev_id"
cat_revisions = suppress_messages_warnings(
wmfdata::mysql_read(cat_revisions_query, 'commonswiki') %>%
snapshot_timestamp = lubridate::ymd_hms(snapshot_timestamp)
## Generate summary statistics for the current snapshot
count_cat_contribs = function(cat_data) {
## From a dataset of CAT-tagged revisions, generate a data.table with summary statistics
snapshot_timestamps = cat_data$snapshot_timestamp[1:4]
snapshot_methods = rep('live', 4)
platforms = c('all', 'desktop', 'mobile', 'android')
## Overall counts of number of contributors making CAT edits,
## and number of files with CAT edits:
n_c_all = n_distinct(cat_data$actor_user)
n_f_all = n_distinct(cat_data$rev_page)
n_c_desktop = n_distinct(cat_data$actor_user[cat_data$mobile_edit == 0])
n_f_desktop = n_distinct(cat_data$rev_page[cat_data$mobile_edit == 0])
n_c_mobile = n_distinct(cat_data$actor_user[cat_data$mobile_edit == 1 & cat_data$android_edit == 0])
n_f_mobile = n_distinct(cat_data$rev_page[cat_data$mobile_edit == 1 & cat_data$android_edit == 0])
n_c_android = n_distinct(cat_data$actor_user[cat_data$android_edit == 1])
n_f_android = n_distinct(cat_data$rev_page[cat_data$android_edit == 1])
snapshot_timestamp = snapshot_timestamps,
snapshot_method = snapshot_methods,
platform = platforms,
n_contributors = c(n_c_all, n_c_desktop, n_c_mobile, n_c_android),
n_files = c(n_f_all, n_f_desktop, n_f_mobile, n_f_android)
cat_summary = count_cat_contribs(cat_revisions)
## Add cat_summary to the dataset
cat_dataset = rbindlist(list(cat_dataset, cat_summary))
# Write out the new dataset
write.table(cat_dataset, cat_dataset_filename, sep = '\t',
row.names = FALSE, quote = FALSE)
cat(paste("Overall number of contributors making CAT edits:",
tail(cat_dataset[platform == 'all']$n_contributors, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'all'],
aes(x = snapshot_timestamp, y = n_contributors)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Contributors",
title = "Overall number of contributors making CAT edits") +
cat(paste("Number of contributors making CAT edits on desktop:",
tail(cat_dataset[platform == 'desktop']$n_contributors, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'desktop'],
aes(x = snapshot_timestamp, y = n_contributors)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Contributors",
title = "Number of contributors making CAT edits on desktop") +
cat(paste("Number of contributors making CAT edits on mobile (excluding Android app edits):",
tail(cat_dataset[platform == 'mobile']$n_contributors, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'mobile'],
aes(x = snapshot_timestamp, y = n_contributors)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Contributors",
title = "Number of contributors making CAT edits on mobile",
subtitle = "Excluding Android app edits") +
cat(paste("Overall number of files with CAT edits:",
tail(cat_dataset[platform == 'all']$n_files, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'all'],
aes(x = snapshot_timestamp, y = n_files)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Files",
title = "Overall number of files with CAT edits") +
cat(paste("Number of files with CAT edits on desktop:",
tail(cat_dataset[platform == 'desktop']$n_files, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'desktop'],
aes(x = snapshot_timestamp, y = n_files)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Files",
title = "Number of files with CAT edits on desktop") +
cat(paste("Number of files with CAT edits on mobile (excluding Android app edits):",
tail(cat_dataset[platform == 'mobile']$n_files, 1)))
options(repr.plot.width = 14, repr.plot.height = 7)
ggplot(cat_dataset[platform == 'mobile'],
aes(x = snapshot_timestamp, y = n_files)) +
scale_y_continuous() +
scale_x_datetime(date_breaks = "1 month", minor_breaks = NULL, date_labels = "%b\n%Y") +
hrbrthemes::theme_ipsum("DejaVu Sans", base_size = 16, strip_text_face = "bold", strip_text_size = 18,
caption_size = 12, axis_title_size = 14, subtitle_size = 14,
axis_title_just = 'cm') +
labs(x = "Date", y = "Files",
title = "Number of files with CAT edits on mobile",
subtitle = "Excluding Android app edits") +
The Phabricator task for this work is T251637. The underlying code is stored in this GitHub repository. For more information about this report or to bring up issues, contact Morten Warncke-Wang). For more information about Computer-Aided Tagging on Commons, see the project page on Commons.
cat("Report generated on", format(lubridate::now(), "%Y-%m-%d at %H:%M:%S UTC"))