Ptwiki community decided to turn off editing for IP editors. This has been done using an AbuseFilter since October 4th, 2020 (Week 40th). This dashboard is to monitor related metrics to see how this impacts the health of the project in the long term. T264940

Metrics:

In [4]:
%%capture
%set_env http_proxy=http://webproxy.eqiad.wmnet:8080
%set_env https_proxy=https://webproxy.eqiad.wmnet:8080
%set_env no_proxy=localhost,127.0.0.1
In [5]:
import wmfdata
wmfdata.utils.insert_code_toggle()
In [6]:
from wmfdata import hive, mariadb
import pandas as pd
from datetime import datetime, timedelta, date
In [7]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
In [8]:
@ticker.FuncFormatter
def million_formatter(x, pos):
    return "%d M" % round(x/1000000)
@ticker.FuncFormatter
def thousand_formatter(x, pos):
    return "%d K" % round(x/1000)
In [148]:
now = pd.Timestamp.utcnow()
today=now.date()
In [57]:
last_Sunday=(today - timedelta(days=today.weekday()+1)).strftime("%Y-%m-%d")
month_1st_day= datetime.today().replace(day=1).strftime("%Y-%m-%d")
In [60]:
start_date_YYYY_MM_DD = '2020-01-01'
end_date_YYYY_MM_DD=last_Sunday
yr='2020'
In [36]:
start_date_YYYYMMDD = '20200101'
#end_date_YYYYMMDD=datetime.today().replace(day=1).strftime("%Y%m%d")
In [131]:
end_date_YYYYMMDD=(today - timedelta(days=today.weekday()+1)).strftime("%Y%m%d")
end_date_YYYYMMDD='20201231'
In [63]:
snapshot='2020-12'
In [132]:
#if datetime.today().day < 7:
#    snapshot = datetime.today().replace(month=datetime.today().month-2).strftime("%Y-%m")
#else:
#    snapshot = datetime.today().replace(month=datetime.today().month-1).strftime("%Y-%m")
In [68]:
lastyear_start_date_YYYY_MM_DD = '2019-01-01'
lastyear_end_date_YYYY_MM_DD='2019-12-31'
lastyear_start_date_YYYYMMDD = '20190101'
lastyear_end_date_YYYYMMDD='20191231'

Number of active editors (registered users)

In [69]:
weekly_user_editors_query='''
SELECT tmp.week_n, COUNT(tmp.user_id) AS user_editors
FROM
(SELECT  WEEKOFYEAR(rev_timestamp) AS week_n,  performer.user_id AS user_id, max(performer.user_text) AS user_text
FROM event.mediawiki_revision_create
WHERE year='{YEAR_YYYY}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp <= '{END_YYYY_MM_DD}') 
AND performer.user_id IS NOT null 
AND page_namespace IN (0,6,12,640,100,102,104,106,108,110,112,114,124,146,250,252) 
AND `database`='ptwiki'
GROUP BY   WEEKOFYEAR(rev_timestamp), performer.user_id 
)AS tmp
GROUP BY tmp.week_n
'''
In [70]:
df_weekly_user_editors_2020=hive.run(weekly_user_editors_query.format(YEAR_YYYY=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [71]:
weekly_user_editors_query='''
SELECT
       weekofyear(event_timestamp) AS week_n, COUNT(DISTINCT event_user_id) wiki_user_editors
FROM wmf.mediawiki_history
WHERE
        event_entity = 'revision' AND
        event_type = 'create' AND
        DATE(event_timestamp) >= '{START_YYYY_MM_DD}' AND DATE(event_timestamp) <= '{END_YYYY_MM_DD}' AND
        event_user_is_anonymous = false  AND page_namespace_is_content
        AND snapshot = '{SNAPSHOT}' AND wiki_db='ptwiki'
GROUP BY weekofyear(event_timestamp) 
'''
In [72]:
df2_weekly_user_editors_2019=hive.run(weekly_user_editors_query.format(SNAPSHOT=snapshot, START_YYYY_MM_DD=lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [73]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly User Editors',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Editors',  fontsize = 16)

ax.plot(df_weekly_user_editors_2020['week_n'], df_weekly_user_editors_2020['user_editors'], 'o-',label='Year 2020')
ax.plot(df2_weekly_user_editors_2019['week_n'], df2_weekly_user_editors_2019['wiki_user_editors'], 'o--',label='Year 2019')
ax.set_xlim(1,52)
ax.set_ylim(0,5000)
ax.vlines(40, 0, 5000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')
ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)
                    

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')

plt.show()

Number of edits

In [74]:
weekly_edits_query='''
SELECT WEEKOFYEAR(rev_timestamp) AS week_n, COUNT(*) AS edits
FROM event.mediawiki_revision_create
WHERE year='{YEAR_YYYY}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp <= '{END_YYYY_MM_DD}') 
AND `database`='ptwiki'
GROUP BY WEEKOFYEAR(rev_timestamp) 
'''
In [75]:
df_weekly_edits_2020=hive.run(weekly_edits_query.format(YEAR_YYYY=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [76]:
weekly_edits_query_2='''
SELECT
       weekofyear(event_timestamp) AS week_n, COUNT(revision_id) AS edits
FROM wmf.mediawiki_history
WHERE
        event_entity = 'revision' AND
        event_type = 'create' AND
        DATE(event_timestamp) >= '{START_YYYY_MM_DD}' AND DATE(event_timestamp) <= '{END_YYYY_MM_DD}' AND
        snapshot = '{SNAPSHOT}'
        AND wiki_db='ptwiki'
GROUP BY weekofyear(event_timestamp) 
'''
In [77]:
df2_weekly_edits_2019_query=hive.run(weekly_edits_query_2.format(SNAPSHOT=snapshot, START_YYYY_MM_DD=lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [78]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly Edits',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Edits',  fontsize = 16)
ax.plot(df_weekly_edits_2020['week_n'], df_weekly_edits_2020['edits'], 'o-',label='Year 2020')
ax.plot(df2_weekly_edits_2019_query['week_n'], df2_weekly_edits_2019_query['edits'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,150000)
ax.vlines(40, 0, 150000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')

ax.yaxis.set_major_formatter(thousand_formatter)

ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
#plt.savefig("1_ptwiki_weekly_edits.png")

plt.show()

Number of reverts

Definition 1: number of edits reverted by the snapshot time

In [79]:
query_weekly_reverts='''

SELECT weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss'))) AS week_n,
 count(revision_id) AS reverted_edits
FROM wmf.mediawiki_history
WHERE snapshot= '{SNAPSHOT}' AND wiki_db='ptwiki'
AND substr(event_timestamp,1,10) BETWEEN '{START_YYYY_MM_DD}' AND '{END_YYYY_MM_DD}'
AND event_entity = "revision" AND revision_is_identity_reverted 
GROUP BY weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss')))
'''
In [80]:
df_weekly_reverts_2020=hive.run(query_weekly_reverts.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [81]:
df_weekly_reverts_2019=hive.run(query_weekly_reverts.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [82]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly Reverted Edits',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Reverts',  fontsize = 16)
ax.plot(df_weekly_reverts_2020['week_n'][:-1], df_weekly_reverts_2020['reverted_edits'][:-1], 'o-',label='Year 2020')
ax.plot(df_weekly_reverts_2019['week_n'], df_weekly_reverts_2019['reverted_edits'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,15000)
ax.vlines(40, 0, 15000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')

ax.yaxis.set_major_formatter(thousand_formatter)

ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Definition 2: number of edits reverted within 48 hours

In [83]:
query_weekly_reverts_48hrs='''
SELECT weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss'))) AS week_n,
 count(revision_id) AS reverted_edits
FROM wmf.mediawiki_history
WHERE snapshot= '{SNAPSHOT}' AND wiki_db='ptwiki'
AND substr(event_timestamp,1,10) BETWEEN '{START_YYYY_MM_DD}' AND '{END_YYYY_MM_DD}'
AND event_entity = "revision" AND revision_is_identity_reverted AND revision_seconds_to_identity_revert <= 172800
GROUP BY weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss')))
'''
In [84]:
df_weekly_reverts_48hrs_2020=hive.run(query_weekly_reverts_48hrs.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [85]:
df_weekly_reverts_48hrs_2019=hive.run(query_weekly_reverts_48hrs.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [86]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly Reverts Within 48 Hours of Editing',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Reverts',  fontsize = 16)
ax.plot(df_weekly_reverts_48hrs_2020['week_n'][:-1], df_weekly_reverts_48hrs_2020['reverted_edits'][:-1], 'o-',label='Year 2020')
ax.plot(df_weekly_reverts_48hrs_2019['week_n'], df_weekly_reverts_48hrs_2019['reverted_edits'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,15000)
ax.vlines(40, 0, 15000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')

ax.yaxis.set_major_formatter(thousand_formatter)

ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Note: Around 60% reverts happened within 48 hours of editing on ptwiki.

Number of non-reverted edits

Definition 1: number of edits which were not reverted by the snapshot time

In [87]:
query_weekly_non_reverted_edits='''

SELECT weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss'))) AS week_n,
 count(revision_id) AS non_reverted_edits
FROM wmf.mediawiki_history
WHERE snapshot= '{SNAPSHOT}' AND wiki_db='ptwiki'
AND substr(event_timestamp,1,10) BETWEEN '{START_YYYY_MM_DD}' AND '{END_YYYY_MM_DD}'
AND event_entity = "revision" AND NOT revision_is_identity_reverted 
GROUP BY weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss')))
'''
In [88]:
df_weekly_non_reverted_edits_2020=hive.run(query_weekly_non_reverted_edits.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [89]:
df_weekly_non_reverted_edits_2019=hive.run(query_weekly_non_reverted_edits.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [90]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly Non-reverted Edits',fontweight="bold",fontsize = 16, y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Non-reverted Edits',  fontsize = 16)
ax.plot(df_weekly_non_reverted_edits_2020['week_n'][:-1], df_weekly_non_reverted_edits_2020['non_reverted_edits'][:-1], 'o-',label='Year 2020')
ax.plot(df_weekly_non_reverted_edits_2019['week_n'], df_weekly_non_reverted_edits_2019['non_reverted_edits'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,150000)
ax.vlines(40, 0, 150000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')

ax.yaxis.set_major_formatter(thousand_formatter)

ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Definition 2: number of edits which were not reverted within 48 hours

In [91]:
query_weekly_non_reverted_48hrs_edits='''

SELECT weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss'))) AS week_n,
 count(revision_id) AS non_reverted_edits
FROM wmf.mediawiki_history
WHERE snapshot= '{SNAPSHOT}' AND wiki_db='ptwiki'
AND substr(event_timestamp,1,10) BETWEEN '{START_YYYY_MM_DD}' AND '{END_YYYY_MM_DD}'
AND event_entity = "revision" AND (NOT revision_is_identity_reverted  OR revision_seconds_to_identity_revert > 172800)
GROUP BY weekofyear(from_unixtime(UNIX_timestamp(event_timestamp,'yyyy-MM-dd HH:mm:ss')))
'''
In [92]:
df_weekly_non_reverted_48hrs_edits_2020=hive.run(query_weekly_non_reverted_48hrs_edits.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [93]:
df_weekly_non_reverted_48hrs_edits_2019=hive.run(query_weekly_non_reverted_48hrs_edits.format(SNAPSHOT=snapshot,START_YYYY_MM_DD= lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [94]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Weekly Edits Which Were Not Reverted in 48 Hours',fontweight="bold",fontsize = 16, y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Non-reverted Edits',  fontsize = 16)
ax.plot(df_weekly_non_reverted_48hrs_edits_2020['week_n'][:-1], df_weekly_non_reverted_48hrs_edits_2020['non_reverted_edits'][:-1], 'o-',label='Year 2020')
ax.plot(df_weekly_non_reverted_48hrs_edits_2019['week_n'], df_weekly_non_reverted_48hrs_edits_2019['non_reverted_edits'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,150000)
ax.vlines(40, 0, 150000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')

ax.yaxis.set_major_formatter(thousand_formatter)

ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Edits by bot and non-bot registered editors

In [95]:
weekly_edits_bot_query='''
SELECT WEEKOFYEAR(rev_timestamp) AS week_n, COUNT(*) AS bot_edits
FROM event.mediawiki_revision_create
WHERE year='{YEAR_YYYY}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp <= '{END_YYYY_MM_DD}') 
AND performer.user_id IS not null AND `database`='ptwiki'
AND  ( performer.user_is_bot=true OR performer.user_text regexp "^.*bot([^a-z].*$|$)")
GROUP BY  WEEKOFYEAR(rev_timestamp) 
'''
In [96]:
weekly_edits_nonbot_query='''
SELECT WEEKOFYEAR(rev_timestamp) AS week_n,  COUNT(*) AS nonbot_edits
FROM event.mediawiki_revision_create
WHERE year='{YEAR_YYYY}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp <= '{END_YYYY_MM_DD}') 
AND performer.user_id IS not null AND `database`='ptwiki'
AND  ( performer.user_is_bot=false AND performer.user_text not regexp "^.*bot([^a-z].*$|$)")
GROUP BY  WEEKOFYEAR(rev_timestamp) 
'''
In [97]:
df_weekly_bot_edits_2020=hive.run(weekly_edits_bot_query.format(YEAR_YYYY=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [98]:
df_weekly_nonbot_edits_2020=hive.run(weekly_edits_nonbot_query.format(YEAR_YYYY=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [99]:
weekly_edits_bot_query_2='''
SELECT
       weekofyear(event_timestamp) AS week_n, COUNT(revision_id) AS edits
FROM wmf.mediawiki_history
WHERE
        event_entity = 'revision' AND
        event_type = 'create' AND
        DATE(event_timestamp) >= '{START_YYYY_MM_DD}' AND DATE(event_timestamp) <= '{END_YYYY_MM_DD}' AND
        event_user_is_anonymous = false AND
        (size(event_user_is_bot_by_historical) > 0 OR size(event_user_is_bot_by) > 0  
        OR event_user_text  regexp "^.*bot([^a-z].*$|$)" )
        AND snapshot = '{SNAPSHOT}' AND wiki_db='ptwiki'
GROUP BY weekofyear(event_timestamp) 
'''
In [100]:
df2_weekly_bot_edits_2019_query=hive.run(weekly_edits_bot_query_2.format(SNAPSHOT=snapshot, START_YYYY_MM_DD=lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [101]:
weekly_edits_nonbot_query_2='''
SELECT
       weekofyear(event_timestamp) AS week_n, COUNT(revision_id) AS edits
FROM wmf.mediawiki_history
WHERE
        event_entity = 'revision' AND
        event_type = 'create' AND
        DATE(event_timestamp) >= '{START_YYYY_MM_DD}' AND DATE(event_timestamp) <= '{END_YYYY_MM_DD}' AND
        event_user_is_anonymous = false AND 
        (size(event_user_is_bot_by_historical) = 0 AND size(event_user_is_bot_by) = 0  
        AND event_user_text not regexp "^.*bot([^a-z].*$|$)" )
        AND snapshot = '{SNAPSHOT}' AND wiki_db='ptwiki'
GROUP BY weekofyear(event_timestamp) 
'''
In [102]:
df2_weekly_nonbot_edits_2019_query=hive.run(weekly_edits_nonbot_query_2.format(SNAPSHOT=snapshot, START_YYYY_MM_DD=lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [103]:
fig, ax = plt.subplots(nrows=2, ncols=1,figsize=(16,12))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax[0].set_title('Weekly Edits by Registered Bot Editors',fontweight="bold",fontsize = 16,y=1.08)
ax[0].set_xlabel('Week',  fontsize = 16)
ax[0].set_ylabel('Edits',  fontsize = 16)
ax[0].plot(df_weekly_bot_edits_2020['week_n'], df_weekly_bot_edits_2020['bot_edits'], 'o-',label='Year 2020')
ax[0].plot(df2_weekly_bot_edits_2019_query['week_n'], df2_weekly_bot_edits_2019_query['edits'], 'o--',label='Year 2019')


ax[0].set_xlim(1,52)
ax[0].set_ylim(0,80000)
ax[0].vlines(40, 0, 80000, colors='k', linestyles='dashdot', label='Turned off')
ax[0].legend(loc='upper right')
ax[0].yaxis.set_major_formatter(thousand_formatter)
ax2 = ax[0].twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')

ax[1].set_title('Weekly Edits by Registered Non-bot Editors',fontweight="bold",fontsize = 16,y=1.08)
ax[1].set_xlabel('Week',fontsize = 16)
ax[1].set_ylabel('Edits',fontsize = 16)

ax[1].set_xlim(1,52)
ax[1].set_ylim(0,80000)
ax[1].vlines(40, 0, 80000, colors='k', linestyles='dashdot', label='Turned off')
ax3=ax[1].twiny()
ax3.set_xlim(0,12)
ax3.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax3.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax3.tick_params(length=0)
#ax3.set_xlabel('Month',fontsize=16)
ax3.xaxis.set_minor_locator(miloc)
ax3.grid(axis='x', which='minor')

ax[1].plot(df_weekly_nonbot_edits_2020['week_n'], df_weekly_nonbot_edits_2020['nonbot_edits'], 'o-',label='Year 2020')
ax[1].plot(df2_weekly_nonbot_edits_2019_query['week_n'], df2_weekly_nonbot_edits_2019_query['edits'], 'o--',label='Year 2019')
ax[1].legend(loc='upper right')
ax[1].yaxis.set_major_formatter(thousand_formatter)

plt.show()

Number of blocks

In [127]:
query_block_weekly='''
SELECT WEEKofyear(from_unixtime(UNIX_timestamp(log_timestamp,'yyyyMMddHHmmss'))) AS week_n, 
COUNT(1) AS blocks
FROM wmf_raw.mediawiki_logging
WHERE snapshot ='{SNAPSHOT}' AND log_type = 'block'  AND wiki_db='ptwiki'
AND  log_action IN ('block','reblock')
AND substr(log_timestamp,1,8) BETWEEN '{START_YYYYMMDD}' AND '{END_YYYYMMDD}'
GROUP BY WEEKofyear(from_unixtime(UNIX_timestamp(log_timestamp,'yyyyMMddHHmmss')))
ORDER BY week_n
LIMIT 100000
'''
In [133]:
df_block_weekly_2020=hive.run(query_block_weekly.format(SNAPSHOT=snapshot, START_YYYYMMDD=start_date_YYYYMMDD, END_YYYYMMDD=end_date_YYYYMMDD))
In [106]:
df_block_weekly_2019=hive.run(query_block_weekly.format(SNAPSHOT=snapshot, START_YYYYMMDD=lastyear_start_date_YYYYMMDD, END_YYYYMMDD=lastyear_end_date_YYYYMMDD))
In [134]:
fig, ax = plt.subplots(nrows=2, ncols=1,figsize=(16,12))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax[0].set_title('Ptwiki Weekly Blocks',fontweight="bold",fontsize = 16,y=1.08)
ax[0].set_xlabel('Week',  fontsize = 16)
ax[0].set_ylabel('Blocks',  fontsize = 16)
ax[0].plot(df_block_weekly_2020['week_n'], df_block_weekly_2020['blocks'], 'o-',label='Year 2020')
ax[0].plot(df_block_weekly_2019['week_n'], df_block_weekly_2019['blocks'], 'o--',label='Year 2019')

ax[0].set_xlim(1,52)
ax[0].set_ylim(0,14000)
ax[0].vlines(40, 0, 140000, colors='k', linestyles='dashdot', label='Turned off')
ax[0].legend(loc='upper right')

ax[0].yaxis.set_major_formatter(thousand_formatter)

ax2 = ax[0].twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')



ax[1].set_title('Ptwiki Weekly Blocks',fontweight="bold",fontsize = 16,y=1.08)
ax[1].set_xlabel('Week',fontsize = 16)
ax[1].set_ylabel('Blocks',fontsize = 16)

ax[1].set_xlim(1,52)
ax[1].set_ylim(0,4000)
ax[1].vlines(40, 0, 4000, colors='k', linestyles='dashdot', label='Turned off')
ax3=ax[1].twiny()
ax3.set_xlim(0,12)
ax3.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax3.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax3.tick_params(length=0)
ax3.set_xlabel('Month',fontsize=16)
ax3.xaxis.set_minor_locator(miloc)
ax3.grid(axis='x', which='minor')

ax[1].plot(df_block_weekly_2020['week_n'], df_block_weekly_2020['blocks'], 'o-',label='Year 2020')
ax[1].legend(loc='upper right')


plt.show()

Note

Investigated the spike in the 43rd week of 2019. The blocks are mainly on IP range blocks with 3 years block duration. Blocks are granted by the same actor.

Number of accounts created

In [136]:
query_accounts_created='''
SELECT WEEKofyear(from_unixtime(UNIX_timestamp(user_registration,'yyyyMMddHHmmss'))) AS week_n ,
COUNT(DISTINCT user_id) AS new_accounts
FROM wmf_raw.mediawiki_user
WHERe snapshot ='{SNAPSHOT}' AND substr(user_registration, 1,8) BETWEEN '{START_YYYYMMDD}' AND '{END_YYYYMMDD}'
 AND wiki_db='ptwiki' AND user_registration IS NOT NULL
 GROUP BY WEEKofyear(from_unixtime(UNIX_timestamp(user_registration,'yyyyMMddHHmmss'))) 
 ORDER BY week_n 
 LIMIT 10000
'''
In [137]:
df_accounts_created_2020=hive.run(query_accounts_created.format(SNAPSHOT=snapshot,START_YYYYMMDD= start_date_YYYYMMDD, END_YYYYMMDD=end_date_YYYYMMDD ))
In [110]:
df_accounts_created_2019=hive.run(query_accounts_created.format(SNAPSHOT=snapshot, START_YYYYMMDD= lastyear_start_date_YYYYMMDD, END_YYYYMMDD=lastyear_end_date_YYYYMMDD ))
In [138]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Created Accounts',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('New Accounts',  fontsize = 16)
ax.plot(df_accounts_created_2020['week_n'][:-1], df_accounts_created_2020['new_accounts'][:-1], 'o-',label='Year 2020')
ax.plot(df_accounts_created_2019['week_n'], df_accounts_created_2019['new_accounts'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,6000)
ax.vlines(40, 0, 6000, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')
ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Retention rate

Definition: Out of the non-bot users who registered in the week before the previous and made at least one edit in their first 30 days, the proportion who also edited during their second 30 days.
Due to the nature of the definition, this metric surfaces 2 months after user account creation.

In [112]:
query_retention_rate='''
-- only count non-bot user 
SELECT 
          weekofyear(1st_month.user_creation_date) AS user_creation_cohort_week, 
          ROUND(SUM(CASE WHEN 2nd_month.edits IS NOT NULL THEN 1 ELSE 0 END) /COUNT(1), 6)          AS retention_rate
FROM      ( 
                   SELECT   event_user_text                           AS user_name, 
                            event_user_id                             AS user_id, 
                            substr(event_user_creation_timestamp,1,10) AS user_creation_date, 
                            count(*)                                  AS edits 
                   FROM     wmf.mediawiki_history 
                   WHERE    snapshot = "{SNAPSHOT}" 
                   AND      wiki_db='ptwiki' 
                   AND      event_entity = "revision" 
                   AND      event_type = "create" 
                   AND      NOT event_user_is_created_by_system 
                   AND      size(event_user_is_bot_by) = 0 
                   AND      event_user_creation_timestamp BETWEEN "{START_YYYY_MM_DD}" AND      "{END_YYYY_MM_DD}" 
                   AND      unix_timestamp(event_timestamp, "yyyy-MM-dd HH:mm:ss.0") < (unix_timestamp(event_user_creation_timestamp, "yyyy-MM-dd HH:mm:ss.0") + (30*24*60*60)) 
                   GROUP BY event_user_text, 
                            event_user_id, 
                            event_user_creation_timestamp
                            ) 1st_month 
LEFT JOIN 
          ( 
                   SELECT   event_user_text                           AS user_name, 
                            event_user_id                             AS user_id, 
                            substr(event_user_creation_timestamp,1,10) AS user_creation_date, 
                            count(*)                                  AS edits 
                   FROM     wmf.mediawiki_history 
                   WHERE    snapshot = "{SNAPSHOT}" 
                   AND      wiki_db='ptwiki' 
                   AND      event_entity = "revision" 
                   AND      event_type = "create" 
                   AND      NOT event_user_is_created_by_system 
                   AND      size(event_user_is_bot_by) = 0 
                   AND      event_user_creation_timestamp BETWEEN "{START_YYYY_MM_DD}" AND      "{END_YYYY_MM_DD}" 
                   AND      unix_timestamp(event_timestamp, "yyyy-MM-dd HH:mm:ss.0") >= (unix_timestamp(event_user_creation_timestamp, "yyyy-MM-dd HH:mm:ss.0") + (30*24*60*60)) 
                   AND      unix_timestamp(event_timestamp, "yyyy-MM-dd HH:mm:ss.0") < (unix_timestamp(event_user_creation_timestamp, "yyyy-MM-dd HH:mm:ss.0")  + (60*24*60*60)) 
                   GROUP BY event_user_text, 
                            event_user_id, 
                            event_user_creation_timestamp
                            ) 2nd_month 
ON        ( 
                    1st_month.user_id = 2nd_month.user_id 
          AND       1st_month.user_creation_date = 2nd_month.user_creation_date)
GROUP BY weekofyear(1st_month.user_creation_date)        
'''
In [113]:
df_retention_rate=hive.run(query_retention_rate.format(SNAPSHOT=snapshot,START_YYYY_MM_DD=start_date_YYYY_MM_DD, END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [114]:
df_retention_rate_2020=df_retention_rate[:-8]
In [115]:
df_retention_rate_2019=hive.run(query_retention_rate.format(SNAPSHOT=snapshot,START_YYYY_MM_DD=lastyear_start_date_YYYY_MM_DD, END_YYYY_MM_DD=lastyear_end_date_YYYY_MM_DD))
In [116]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki Retention Rate',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('User Cohort (Creation Week)',  fontsize = 16)
ax.set_ylabel('Retention Rate',  fontsize = 16)
ax.plot(df_retention_rate_2020['user_creation_cohort_week'], df_retention_rate_2020['retention_rate'], 'o-',label='Year 2020')
ax.plot(df_retention_rate_2019['user_creation_cohort_week'], df_retention_rate_2019['retention_rate'], 'o--',label='Year 2019')

ax.set_xlim(1,52)
ax.set_ylim(0,0.3)
ax.vlines(40, 0, 0.3, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')
ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()

Checkuser checks

In [117]:
query_check_user='''
 SELECT weekofyear(from_unixtime(UNIX_timestamp(cul_timestamp))) AS week_n,
   CASE WHEN cul_type='investigate' THEN 'investigate' ELSE 'checkuser' END AS tool,
   COUNT(cul_id) AS requests
 FROM cu_log WHERE substr(cul_timestamp,1,8)  BETWEEN '{START_YYYYMMDD}' AND '{END_YYYYMMDD}'
 GROUP BY weekofyear(from_unixtime(UNIX_timestamp(cul_timestamp)))
 , CASE WHEN cul_type='investigate' THEN 'investigate' ELSE 'checkuser' END 
 ORDER BY week_n
 LIMIT 10000
'''
In [139]:
df_check_user_2020=mariadb.run(query_check_user.format(START_YYYYMMDD=start_date_YYYYMMDD, END_YYYYMMDD=end_date_YYYYMMDD),'ptwiki')
In [119]:
df_check_user_2019=mariadb.run(query_check_user.format(START_YYYYMMDD=lastyear_start_date_YYYYMMDD, END_YYYYMMDD=lastyear_end_date_YYYYMMDD),'ptwiki')
In [140]:
fig, ax = plt.subplots(nrows=2, ncols=1,figsize=(16,12))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax[0].set_title('Checkuser Requests',fontweight="bold",fontsize = 16,y=1.08)
ax[0].set_xlabel('Week',  fontsize = 16)
ax[0].set_ylabel('Requests',  fontsize = 16)
ax[0].plot(df_check_user_2020[df_check_user_2020['tool']=='checkuser']['week_n'], df_check_user_2020[df_check_user_2020['tool']=='checkuser']['requests'], 'o-',label='Year 2020')
ax[0].plot(df_check_user_2019[df_check_user_2019['tool']=='checkuser']['week_n'], df_check_user_2019[df_check_user_2019['tool']=='checkuser']['requests'], 'o--',label='Year 2019')


ax[0].set_xlim(1,52)
ax[0].set_ylim(0,300)
ax[0].vlines(40, 0, 300, colors='k', linestyles='dashdot', label='Turned off')
ax[0].legend(loc='upper right')
ax2 = ax[0].twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')

ax[1].set_title('Special Investigate Requests',fontweight="bold",fontsize = 16,y=1.08)
ax[1].set_xlabel('Week',fontsize = 16)
ax[1].set_ylabel('Requests',fontsize = 16)

ax[1].set_xlim(1,52)
ax[1].set_ylim(0,300)
ax[1].vlines(40, 0, 300, colors='k', linestyles='dashdot', label='Turned off')
ax3=ax[1].twiny()
ax3.set_xlim(0,12)
ax3.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax3.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax3.tick_params(length=0)
#ax3.set_xlabel('Month',fontsize=16)
ax3.xaxis.set_minor_locator(miloc)
ax3.grid(axis='x', which='minor')

ax[1].plot(df_check_user_2020[df_check_user_2020['tool']=='investigate']['week_n'], df_check_user_2020[df_check_user_2020['tool']=='investigate']['requests'], 'o-',label='Year 2020')
ax[1].plot(df_check_user_2019[df_check_user_2019['tool']=='investigate']['week_n'], df_check_user_2019[df_check_user_2019['tool']=='investigate']['requests'], 'o--',label='Year 2019')


ax[1].legend(loc='upper right')
plt.show()

Quality of edits with ORES

We don't have a perfect model to measure edits quality. ORES model is known for being biased against IP editors. (Reference: https://arxiv.org/pdf/2006.03121.pdf). If including IP editors in baseline measurement. It's not a surprise that the damages/edits rate for all edits dropped since the 40th week when IP editing was turned off. If we exclude IP editors in baseline, we would get an apple to apple comparison of edits quality of registered editors. Meanwhile, ORES model is tested to be friendly to bot editors, not marking bot edits as damages usually. However, the number of bot edits could fluctuate dramatically month by month due to the nature of the bot function. To reduce the fluctuation in monthly trends, we'd better exclude bot editors in analysis of registered editors.

Following analysis will focus on edits quality by all editors and by registered non-bot editors only.

In [143]:
query_edits_quality_damaging_model='''
SELECT WEEKOFYEAR(rev_timestamp) AS week_n, 
ROUND(SUM(case when scores["damaging"].prediction[0]='true' then 1 else 0 end)/count(rev_id) ,4) AS damaging_rate
FROM event.mediawiki_revision_score
WHERE year='{YEAR}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp < '{END_YYYY_MM_DD}') 
AND `database`='ptwiki'
GROUP BY WEEKOFYEAR(rev_timestamp)
ORDER BY week_n
LIMIT 10000
'''
In [144]:
df_edits_quality_damaging_rate=hive.run(query_edits_quality_damaging_model.format(YEAR=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD , END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [147]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki ORES Damaging Rate (Including IP Editors)',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Damages/Edits Rate',  fontsize = 16)
ax.plot(df_edits_quality_damaging_rate['week_n'], df_edits_quality_damaging_rate['damaging_rate'], 'o-',label='Year 2020')

ax.set_xlim(1,52)
ax.set_ylim(0,0.3)
ax.legend(loc='upper right')
ax.vlines(40, 0, 0.3, colors='k', linestyles='dashdot', label='Turned off')
ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()
In [121]:
query_edits_quality_damaging_model_nonbot_registered_user='''
SELECT WEEKOFYEAR(rev_timestamp) AS week_n, 
ROUND(SUM(case when scores["damaging"].prediction[0]='true' then 1 else 0 end)/count(rev_id) ,4) AS damaging_rate
FROM event.mediawiki_revision_score
WHERE year='{YEAR}' AND  (rev_timestamp >= '{START_YYYY_MM_DD}' AND rev_timestamp <= '{END_YYYY_MM_DD}') 
AND `database`='ptwiki' 
-- exclude IP editors in baseline as ORES is biased against anonymous editors
AND performer.user_id IS NOT NULL
-- exclude bot editors
AND not performer.user_is_bot
AND performer.user_text not regexp "^.*bot([^a-z].*$|$)"
GROUP BY WEEKOFYEAR(rev_timestamp)
ORDER BY week_n
LIMIT 10000
'''
In [122]:
df_edits_quality_damaging_rate_nonbot_registered_user=hive.run(query_edits_quality_damaging_model_nonbot_registered_user.format(YEAR=yr, START_YYYY_MM_DD=start_date_YYYY_MM_DD , END_YYYY_MM_DD=end_date_YYYY_MM_DD))
In [141]:
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(16,6))
fig.tight_layout(pad=1.08, w_pad=0.5, h_pad=6)
ax.set_title('Ptwiki ORES Damaging Rate (Registered Non-bot Editors)',fontweight="bold",fontsize = 16,y=1.08)
ax.set_xlabel('Week',  fontsize = 16)
ax.set_ylabel('Damages/Edits Rate',  fontsize = 16)
ax.plot(df_edits_quality_damaging_rate_nonbot_registered_user['week_n'], df_edits_quality_damaging_rate_nonbot_registered_user['damaging_rate'], 'o-',label='Year 2020')

ax.set_xlim(1,52)
ax.set_ylim(0,0.15)
ax.vlines(40, 0, 0.15, colors='k', linestyles='dashdot', label='Turned off')
ax.legend(loc='upper right')
ax2 = ax.twiny()  # instantiate a second axes that shares the same x-axis
ax2.set_xlim(0,12)
ax2.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
ax2.set_xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5])
ax2.tick_params(length=0)
ax2.set_xlabel('Month',fontsize=16)

miloc = plt.MultipleLocator(1)

ax2.xaxis.set_minor_locator(miloc)
ax2.grid(axis='x', which='minor')
plt.show()
In [142]:
print("This dashboard was generated on {} at {}. It is maintained by Jennifer Wang, Product Analytics. If you have questions or feedback please email jwang@wikimedia.org or product-analytics@wikimedia.org.".format(today.strftime("%Y-%m-%d"), now.strftime("%H:%M %Z")))
This dashboard was generated on 2021-01-04 at 22:41 UTC. It is maintained by Jennifer Wang, Product Analytics. If you have questions or feedback please email jwang@wikimedia.org or product-analytics@wikimedia.org.
In [ ]: