Load data¶
Data downloaded from Read the Docs, where this site is hosted. Documentation.
import pandas as pd
analytics = pd.read_csv("assets/data/readthedocs_traffic_analytics_computing-in-context_2024-09-15_2024-12-14.csv")
analytics<frozen importlib._bootstrap>:491: RuntimeWarning: The global interpreter lock (GIL) has been enabled to load module 'pandas._libs.pandas_parser', which has not declared that it can run safely without the GIL. To override this behavior and keep the GIL disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.
Loading...
Cleaning¶
analytics.dtypesDate object
Version object
Path object
Views int64
dtype: objectanalytics["Date"] = pd.to_datetime(analytics["Date"])
analyticsLoading...
Ensure legend and x axis are in order.
analytics = analytics.sort_values(["Path", "Date"])Only include real pages.
html_only = analytics["Path"].str.endswith(".html")
no_redirects = ~analytics["Path"].str.startswith("/redirects/")
analytics = analytics[html_only & no_redirects]Pageviews per day by page¶
def add_important_dates(fig):
# https://github.com/plotly/plotly.py/issues/3065#issuecomment-778652215
# https://bulletin.columbia.edu/sipa/registration/
fig.add_vline(
x=datetime(2024, 11, 18).timestamp() * 1000,
line_dash="dash",
annotation_text="Registration start",
)
fig.add_vline(
x=datetime(2024, 12, 3).timestamp() * 1000,
line_dash="dash",
annotation_text="Test",
)import plotly.io as pio
pio.renderers.default = "notebook_connected+plotly_mimetype"import plotly.express as px
from datetime import datetime
fig = px.line(
analytics,
x="Date",
y="Views",
color="Path",
title="Pageviews per day",
)
add_important_dates(fig)
fig.show()Loading...
Loading...
Site-wide pageviews per day¶
def aggregate_by(df, offset):
aggregated = df.resample(offset, on="Date").sum()
# only keep relevant column
aggregated = aggregated[["Views"]]
# don't include the last period of the dataset, as it's incomplete and thus misleadingly low
aggregated = aggregated.iloc[:-1]
return aggregatedviews_by_day = aggregate_by(analytics, "D")
views_by_dayLoading...
fig = px.line(
views_by_day,
y="Views",
title="Pageviews per day",
)
add_important_dates(fig)
fig.show()Loading...
Site-wide pageviews per week (total)¶
views_by_week = aggregate_by(analytics, "W")
# https://stackoverflow.com/a/19851521/358804
views_by_week.index.names = ["Week starting"]
views_by_weekLoading...
px.line(
views_by_week,
y="Views",
title="Pageviews per week",
)Loading...