import plotly.io as pio
pio.renderers.default = "notebook_connected+plotly_mimetype"Download data¶
More about the URL below:
This code does the download and unzipping; not expecting your code to do that.
import urllib.request
from pathlib import Path
data_dir = Path("data")
# Create data/ directory if it doesn't exist
data_dir.mkdir(exist_ok=True)
# Download the file
url = "https://api.worldbank.org/v2/country/all/indicator/EG.ELC.ACCS.ZS?source=2&date=2002:2022&downloadformat=csv"
zip_path = data_dir / "electricity.zip"
urllib.request.urlretrieve(url, zip_path)
print(f"Downloaded {zip_path}.")
Downloaded data/electricity.zip.
from zipfile import ZipFile
# Extract the zip file
electricity_dir = data_dir / "electricity"
with ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(electricity_dir)
print(f"Extracted to {electricity_dir}.")Extracted to data/electricity.
Load data¶
import pandas as pd
electricity_file = electricity_dir / "API_EG.ELC.ACCS.ZS_DS2_EN_csv_v2_225978.csv"
electricity = pd.read_csv(electricity_file, header=2)
electricity<frozen importlib._bootstrap>:491: RuntimeWarning:
The global interpreter lock (GIL) has been enabled to load module 'pandas._libs.pandas_parser', which has not declared that it can run safely without the GIL. To override this behavior and keep the GIL disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.
Loading...
electricity_by_country_by_year = (
electricity.drop(columns=["Indicator Name", "Indicator Code"])
.melt(
id_vars=["Country Name", "Country Code"],
var_name="Year",
value_name="Access to electricity (% of population)",
)
.dropna()
)
electricity_by_country_by_yearLoading...
Plot¶
import plotly.express as px
fig = px.line(
electricity_by_country_by_year,
x="Year",
y="Access to electricity (% of population)",
color="Country Name",
title="Access to electricity over time",
)
fig.show()Loading...
Loading...
Plotly showing that many values on an axis (the Year in this case) indicates that it’s a string.
Fix year dtype¶
electricity_by_country_by_year.dtypesCountry Name object
Country Code object
Year object
Access to electricity (% of population) float64
dtype: objectelectricity_by_country_by_year["Year"] = electricity_by_country_by_year["Year"].astype(int)Filter¶
avg_electricity = electricity_by_country_by_year.groupby("Country Name")["Access to electricity (% of population)"].mean()
avg_electricityCountry Name
Afghanistan 61.876190
Africa Eastern and Southern 32.881725
Africa Western and Central 45.452160
Albania 99.757143
Algeria 99.142857
...
West Bank and Gaza 99.742857
World 85.250561
Yemen, Rep. 63.404762
Zambia 30.523810
Zimbabwe 40.633333
Name: Access to electricity (% of population), Length: 263, dtype: float64bottom_cutoff = avg_electricity.quantile(0.05)
bottom_cutoffnp.float64(21.837142857142858)bottom_countries = avg_electricity[avg_electricity < bottom_cutoff]
bottom_countriesCountry Name
Burkina Faso 14.676190
Burundi 6.547619
Central African Republic 10.990476
Chad 7.566667
Congo, Dem. Rep. 14.438095
Guinea-Bissau 16.771429
Liberia 14.768750
Madagascar 21.823810
Malawi 9.547619
Mozambique 20.528571
Niger 14.133333
Papua New Guinea 16.957143
Sierra Leone 17.604762
South Sudan 4.475000
Name: Access to electricity (% of population), dtype: float64bottom_countries_electricity = electricity_by_country_by_year[electricity_by_country_by_year["Country Name"].isin(bottom_countries.index)]
fig = px.line(
bottom_countries_electricity,
x="Year",
y="Access to electricity (% of population)",
color="Country Name",
title="Access to electricity over time by country, 5th percentile",
)
fig.show()Loading...
happiness = pd.read_excel("https://files.worldhappiness.report/WHR25_Data_Figure_2.1v3.xlsx")
happinessLoading...
fig = px.line(happiness, x="Year", y="Life evaluation (3-year average)", color="Country name", title="World Happiness")
fig = fig.show()Loading...
happiness.dtypesYear int64
Rank int64
Country name object
Life evaluation (3-year average) float64
Lower whisker float64
Upper whisker float64
Explained by: Log GDP per capita float64
Explained by: Social support float64
Explained by: Healthy life expectancy float64
Explained by: Freedom to make life choices float64
Explained by: Generosity float64
Explained by: Perceptions of corruption float64
Dystopia + residual float64
dtype: objectSingle country¶
country_name = "Madagascar"
madagascar_electricity = electricity_by_country_by_year[electricity_by_country_by_year["Country Name"] == country_name]
madagascar_electricityLoading...
madagascar_happiness = happiness[happiness["Country name"] == country_name]
madagascar_happinessLoading...
Two Y Axes¶
Based on Plotly example.
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
go.Scatter(
x=madagascar_electricity["Year"],
y=madagascar_electricity["Access to electricity (% of population)"],
name="Access to electricity",
),
secondary_y=False,
)
fig.add_trace(
go.Scatter(
x=madagascar_happiness["Year"],
y=madagascar_happiness["Life evaluation (3-year average)"],
name="Life evaluation (3-year average)",
),
secondary_y=True,
)
# Add figure title
fig.update_layout(title_text=f"Access to electricity vs. Happiness, {country_name}")
# Set x-axis title
fig.update_xaxes(title_text="Year")
# Set y-axes titles
fig.update_yaxes(title_text="% of population", secondary_y=False)
fig.update_yaxes(title_text="Happiness score", secondary_y=True)
fig.show()Loading...