{
"cells": [
{
"cell_type": "markdown",
"id": "0a97b2a0",
"metadata": {},
"source": [
"# Recreate Gapminder"
]
},
{
"cell_type": "markdown",
"id": "10ae0f9c",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e75a6d3",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go"
]
},
{
"cell_type": "markdown",
"id": "bc574529",
"metadata": {},
"source": [
"## Load the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "088f9d42",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('Gapminder-data.csv', sep=',')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3323617",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a4f03ca",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9515c150",
"metadata": {},
"outputs": [],
"source": [
"df_info = pd.read_csv('Gapminder-info.csv', sep=',', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c27a468d",
"metadata": {},
"outputs": [],
"source": [
"df_info"
]
},
{
"cell_type": "markdown",
"id": "cbabf0e8",
"metadata": {},
"source": [
"Sort by size"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4d1def4",
"metadata": {},
"outputs": [],
"source": [
"df = df.sort_values(['Year', 'Population'], ascending=[True, False])"
]
},
{
"cell_type": "markdown",
"id": "d096bd8f",
"metadata": {},
"source": [
"## Create the bubble chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27ea103f",
"metadata": {},
"outputs": [],
"source": [
"color_map = {\n",
" 'Asia': '#ff5872',\n",
" 'Americas': '#7feb00',\n",
" 'Africa': '#00d5e9',\n",
" 'Europe': '#ffe700'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14d7d515",
"metadata": {},
"outputs": [],
"source": [
"fig = px.scatter(\n",
" df.query('Year==2020'),\n",
" x='Income',\n",
" y='Life expectancy',\n",
" color='Region',\n",
" size='Population',\n",
" size_max=60,\n",
" log_x=True,\n",
" hover_name='Country',\n",
" hover_data={c: False for c in df.columns},\n",
" color_discrete_map=color_map,\n",
" title='Gapminder
Data from gapminder.org, CC-BY license'\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32b109e2",
"metadata": {},
"outputs": [],
"source": [
"# Adjust markers\n",
"fig.update_traces(marker=dict(line=dict(color='black', width=0.8), opacity=0.9))\n",
"# Adjust figure layout\n",
"fig.update_layout(\n",
" plot_bgcolor='white',\n",
" font=dict(color='dimgray')\n",
")\n",
"# Adjust axes and grid\n",
"fig.update_xaxes(\n",
" linewidth=1,\n",
" linecolor='dimgray',\n",
" gridcolor='lightgray',\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
")\n",
"fig.update_yaxes(\n",
" linewidth=1,\n",
" linecolor='dimgray',\n",
" gridcolor='lightgray',\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
")\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "e5629a50",
"metadata": {},
"source": [
"## Add animation"
]
},
{
"cell_type": "markdown",
"id": "da14209b",
"metadata": {},
"source": [
"Add the year the the background"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "030957f8",
"metadata": {},
"outputs": [],
"source": [
"def background_year(year, xpos=2000, ypos=50):\n",
" \"\"\"Return the dictionary containing a single position for the text that shows the year\"\"\"\n",
" return dict(\n",
" x=[xpos],\n",
" y=[ypos],\n",
" mode='text',\n",
" showlegend=False,\n",
" text=[f'{year}'],\n",
" textposition='middle center',\n",
" textfont=dict(size=200, color='lightgray')\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be55748f",
"metadata": {},
"outputs": [],
"source": [
"def gapminder_fig(xaxis='Income', yaxis='Life expectancy'):\n",
" color_map = {\n",
" 'Asia': '#ff5872',\n",
" 'Americas': '#7feb00',\n",
" 'Africa': '#00d5e9',\n",
" 'Europe': '#ffe700'\n",
" }\n",
" fig = px.scatter(\n",
" df.query('Year>=2010'),\n",
" x=xaxis,\n",
" y=yaxis,\n",
" color='Region',\n",
" size='Population',\n",
" size_max=60,\n",
" log_x=df_info.loc[xaxis, 'LogScale'],\n",
" log_y=df_info.loc[yaxis, 'LogScale'],\n",
" hover_name='Country',\n",
" hover_data={c: False for c in df.columns},\n",
" color_discrete_map=color_map,\n",
" title='Gapminder
Data from gapminder.org, CC-BY license',\n",
" animation_frame='Year',\n",
" animation_group='Country',\n",
" range_x=[df_info.loc[xaxis, 'Min'], df_info.loc[xaxis, 'Max']],\n",
" range_y=[df_info.loc[yaxis, 'Min'], df_info.loc[yaxis, 'Max']]\n",
" )\n",
" # Adjust markers\n",
" fig.update_traces(marker=dict(line=dict(color='black', width=0.8), opacity=0.9))\n",
" # Adjust figure layout\n",
" fig.update_layout(\n",
" plot_bgcolor='white',\n",
" font=dict(color='dimgray')\n",
" )\n",
" # Adjust axes and grid\n",
" fig.update_xaxes(\n",
" linewidth=1,\n",
" linecolor='dimgray',\n",
" gridcolor='lightgray',\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
" )\n",
" fig.update_yaxes(\n",
" linewidth=1,\n",
" linecolor='dimgray',\n",
" gridcolor='lightgray',\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
" )\n",
" # Show the year in the background\n",
" frame_year = fig.frames[0].name # Get the year in the first frame\n",
" fig.add_trace(go.Scatter(\n",
" background_year(frame_year, \n",
" xpos=df_info.loc[xaxis, 'Mid'],\n",
" ypos=df_info.loc[yaxis, 'Mid']\n",
" )\n",
" )) # Add the scatter plot showing the year to the figure\n",
" fig.data = (fig.data[-1], ) + fig.data[:-1] # Change the order (put the year at the beginning)\n",
" # Add the year to each of the frames\n",
" for frame in fig.frames:\n",
" frame.data = (\n",
" background_year(\n",
" frame.name,\n",
" xpos=df_info.loc[xaxis, 'Mid'],\n",
" ypos=df_info.loc[yaxis, 'Mid']\n",
" ), \n",
" ) + frame.data\n",
" fig.update(frames=fig.frames)\n",
" # Add annotations to the axes\n",
" fig.add_annotation(\n",
" x=1, xref='x domain',\n",
" y=0, yref='y domain',\n",
" text=df_info.loc[xaxis, 'Meaning'],\n",
" showarrow=False,\n",
" align='right'\n",
" )\n",
" fig.add_annotation(\n",
" x=0, xref='x domain',\n",
" y=1, yref='y domain',\n",
" text=df_info.loc[yaxis, 'Meaning'],\n",
" showarrow=False,\n",
" valign='top',\n",
" textangle=-90\n",
" )\n",
" # Set the size of the figure (fixed, so it doesn't change with window size)\n",
" fig.update_layout(autosize=False, width=800, height=600)\n",
" return fig\n",
"fig = gapminder_fig()\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "2a7ea93d",
"metadata": {},
"source": [
"## Use Dash"
]
},
{
"cell_type": "markdown",
"id": "2851266c",
"metadata": {},
"source": [
"- Dash html components: https://dash.plotly.com/dash-html-components\n",
"- Dash core components: https://dash.plotly.com/dash-core-components"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c9e9d57",
"metadata": {},
"outputs": [],
"source": [
"import dash\n",
"from dash import html, dcc\n",
"from jupyter_dash import JupyterDash\n",
"from dash.dependencies import Input, Output"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a6a5520",
"metadata": {},
"outputs": [],
"source": [
"attributes = ['Income', 'Life expectancy', 'Fertility', 'Child mortality']\n",
"\n",
"external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']\n",
"app = JupyterDash(__name__, external_stylesheets=external_stylesheets)\n",
"\n",
"app.layout = html.Div([\n",
" html.H1(\n",
" 'Interactive data visualization', \n",
" style={'fontSize': 50}\n",
" ),\n",
" html.Div([\n",
" html.P('X axis'),\n",
" dcc.Dropdown(\n",
" id='dropdown_x',\n",
" options=[{'label': a, 'value': a} for a in attributes],\n",
" value='Income',\n",
" clearable=False\n",
" )],\n",
" style={'width': '390px', 'display': 'inline-block', 'padding-right': '10px'}\n",
" ),\n",
" html.Div([\n",
" html.P('Y axis'),\n",
" dcc.Dropdown(\n",
" id='dropdown_y',\n",
" options=[{'label': a, 'value': a} for a in attributes],\n",
" value='Life expectancy',\n",
" clearable=False\n",
" )],\n",
" style={'width': '390px', 'display': 'inline-block', 'padding-right': '10px'}\n",
" ),\n",
" dcc.Graph(\n",
" id='plot',\n",
" figure=gapminder_fig()\n",
" )\n",
"])\n",
"\n",
"@app.callback(\n",
" Output('plot', 'figure'),\n",
" [Input('dropdown_x', 'value'), Input('dropdown_y', 'value')]\n",
")\n",
"def update_plot(value_x, value_y):\n",
" return gapminder_fig(xaxis=value_x, yaxis=value_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "de4ca77d",
"metadata": {},
"outputs": [],
"source": [
"app.run_server(debug=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a670d1c6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}