{
"cells": [
{
"cell_type": "markdown",
"id": "2e8cff9a",
"metadata": {},
"source": [
"# Goal: Recreate Gapminder's Bubble Chart*"
]
},
{
"cell_type": "markdown",
"id": "0592be0b",
"metadata": {},
"source": [
"![Gapminder](Gapminder.png)"
]
},
{
"cell_type": "markdown",
"id": "7205f9df",
"metadata": {},
"source": [
"*As much as possible..."
]
},
{
"cell_type": "markdown",
"id": "a51e5440",
"metadata": {},
"source": [
"See https://www.gapminder.org/tools/"
]
},
{
"cell_type": "markdown",
"id": "fea83b75",
"metadata": {},
"source": [
"## Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f2b16b8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import plotly.express as px\n",
"import plotly.graph_objs as go"
]
},
{
"cell_type": "markdown",
"id": "bc574529",
"metadata": {},
"source": [
"## Load the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "088f9d42",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('Gapminder-data.csv', sep=',')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3323617",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b703d8cd-31f4-44c3-b25a-5e5fcb57640f",
"metadata": {},
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9515c150",
"metadata": {},
"outputs": [],
"source": [
"df_info = pd.read_csv('Gapminder-info.csv', sep=',', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c27a468d",
"metadata": {},
"outputs": [],
"source": [
"df_info"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "064dda93-71ee-4187-a9b8-80f79129e1d5",
"metadata": {},
"outputs": [],
"source": [
"df = df.sort_values(['Year', 'Population'], ascending=[True, False])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9a5e6a5-976c-4c24-9290-2fe7f6a3be28",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "d096bd8f",
"metadata": {},
"source": [
"## Create the bubble chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc418cc2-2653-4c33-a127-99eedf742904",
"metadata": {},
"outputs": [],
"source": [
"color_map = dict(\n",
" Asia='#ff798e', \n",
" Europe='#ffeb33', \n",
" Africa='#33dded',\n",
" Americas='#98ef33'\n",
")\n",
"marker_dict = dict(\n",
" opacity=1,\n",
" line=dict(\n",
" color='black',\n",
" width=0.8\n",
" )\n",
")\n",
"layout_dict = dict(\n",
" plot_bgcolor='white',\n",
" font=dict(color='dimgray')\n",
")\n",
"axes_dict = dict(\n",
" gridcolor='lightgray',\n",
" showline=True,\n",
" linecolor='dimgray',\n",
" linewidth=1,\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "872e455c",
"metadata": {},
"outputs": [],
"source": [
"fig = px.scatter(\n",
" df.query('Year==2020'),\n",
" x='Income',\n",
" y='Life expectancy',\n",
" color='Region',\n",
" size='Population',\n",
" hover_name='Country',\n",
" color_discrete_map=color_map,\n",
" size_max=60,\n",
" log_x=True,\n",
" hover_data={c: False for c in df.columns},\n",
" title='Gapminder
Data by gapminder.org, CC-BY license'\n",
")\n",
"# Adjust markers\n",
"fig.update_traces(marker=marker_dict)\n",
"# Adjust figure layout\n",
"fig.update_layout(layout_dict)\n",
"# Adjust axes/grid\n",
"fig.update_xaxes(axes_dict)\n",
"fig.update_yaxes(axes_dict)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "e5629a50",
"metadata": {},
"source": [
"## Add animation"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61ee19a5-f57f-4b97-a61a-f5ead203da70",
"metadata": {},
"outputs": [],
"source": [
"color_map = dict(\n",
" Asia='#ff798e', \n",
" Europe='#ffeb33', \n",
" Africa='#33dded',\n",
" Americas='#98ef33'\n",
")\n",
"marker_dict = dict(\n",
" opacity=1,\n",
" line=dict(\n",
" color='black',\n",
" width=0.8\n",
" )\n",
")\n",
"layout_dict = dict(\n",
" plot_bgcolor='white',\n",
" font=dict(color='dimgray'),\n",
" autosize=False,\n",
" width=800,\n",
" height=600,\n",
" clickmode='event+select'\n",
")\n",
"axes_dict = dict(\n",
" gridcolor='lightgray',\n",
" showline=True,\n",
" linecolor='dimgray',\n",
" linewidth=1,\n",
" showspikes=True,\n",
" spikethickness=1,\n",
" spikecolor='dimgray'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70661829-afc6-4773-a0cb-8cbe4eba8316",
"metadata": {},
"outputs": [],
"source": [
"def background_year(year, xaxis, yaxis):\n",
" return dict(\n",
" x=[df_info.loc[xaxis, 'Mid']],\n",
" y=[df_info.loc[yaxis, 'Mid']],\n",
" mode='text',\n",
" text=[str(year)],\n",
" showlegend=False,\n",
" textfont=dict(size=200, color='lightgray'),\n",
" textposition='middle center'\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aed99d63",
"metadata": {},
"outputs": [],
"source": [
"def gapminder_fig(xaxis='Income', yaxis='Life expectancy'):\n",
" fig = px.scatter(\n",
" df.query('Year>=2010'),\n",
" x=xaxis,\n",
" y=yaxis,\n",
" color='Region',\n",
" size='Population',\n",
" hover_name='Country',\n",
" color_discrete_map=color_map,\n",
" size_max=60,\n",
" log_x=df_info.loc[xaxis, 'LogScale'],\n",
" log_y=df_info.loc[yaxis, 'LogScale'],\n",
" hover_data={c: False for c in df.columns},\n",
" title='Gapminder
Data by gapminder.org, CC-BY license',\n",
" animation_frame='Year',\n",
" animation_group='Country',\n",
" range_x=[df_info.loc[xaxis, 'Min'], df_info.loc[xaxis, 'Max']],\n",
" range_y=[df_info.loc[yaxis, 'Min'], df_info.loc[yaxis, 'Max']]\n",
" )\n",
" # Adjust markers\n",
" fig.update_traces(marker=marker_dict)\n",
" # Adjust figure layout\n",
" fig.update_layout(layout_dict)\n",
" # Adjust axes/grid\n",
" fig.update_xaxes(axes_dict)\n",
" fig.update_yaxes(axes_dict)\n",
"\n",
" # Show the year in the background\n",
" frame_year = fig.frames[0].name\n",
" fig.add_trace(go.Scatter(background_year(frame_year, xaxis, yaxis)))\n",
" fig.data = (fig.data[-1], ) + fig.data[:-1]\n",
" \n",
" for frame in fig.frames:\n",
" frame.data = (background_year(frame.name, xaxis, yaxis), ) + frame.data\n",
" fig.update(frames=fig.frames)\n",
"\n",
" # Add annotations to the axes\n",
" fig.add_annotation(\n",
" x=1, y=0, xref='x domain', yref='y domain',\n",
" text=df_info.loc[xaxis, 'Meaning'],\n",
" showarrow=False, align='right'\n",
" )\n",
" fig.add_annotation(\n",
" x=0, y=1, xref='x domain', yref='y domain',\n",
" text=df_info.loc[yaxis, 'Meaning'],\n",
" showarrow=False, valign='top', textangle=-90\n",
" )\n",
" \n",
" return fig\n",
" \n",
"fig = gapminder_fig(yaxis='Fertility')\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"id": "2a7ea93d",
"metadata": {},
"source": [
"## Use Dash"
]
},
{
"cell_type": "markdown",
"id": "2851266c",
"metadata": {},
"source": [
"- Dash html components: https://dash.plotly.com/dash-html-components\n",
"- Dash core components: https://dash.plotly.com/dash-core-components"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c9e9d57",
"metadata": {},
"outputs": [],
"source": [
"from dash import Dash, html, dcc, callback, Output, Input"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c2ec3451-67cf-482d-a5e1-9e61ffbf2a2f",
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a6a5520",
"metadata": {},
"outputs": [],
"source": [
"attributes = ['Income', 'Life expectancy', 'Fertility', 'Child mortality']\n",
"external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']\n",
"app = Dash(__name__, external_stylesheets=external_stylesheets)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a670d1c6",
"metadata": {},
"outputs": [],
"source": [
"app.layout = html.Div([\n",
" html.H1('Interactive data visualization', \n",
" style={'fontSize': 50, 'color': 'brown'}),\n",
" html.P('X axis'),\n",
" dcc.Dropdown(\n",
" id='dropdown_x',\n",
" options=[{'label': a, 'value': a} for a in attributes],\n",
" value='Income'\n",
" ),\n",
" html.P('Y axis'),\n",
" dcc.Dropdown(\n",
" id='dropdown_y',\n",
" options=[{'label': a, 'value': a} for a in attributes],\n",
" value='Life expectancy'\n",
" ),\n",
" dcc.Graph(\n",
" id='plot',\n",
" figure=gapminder_fig()\n",
" ),\n",
" dcc.Textarea(\n",
" id='text',\n",
" value=''\n",
" )\n",
"])\n",
"\n",
"@app.callback(\n",
" Output('plot', 'figure'),\n",
" [Input('dropdown_x', 'value'), Input('dropdown_y', 'value')]\n",
")\n",
"def update_plot(x, y):\n",
" return gapminder_fig(x, y)\n",
"\n",
"@app.callback(\n",
" Output('text', 'value'),\n",
" Input('plot', 'selectedData')\n",
")\n",
"def print_value(selected_data):\n",
" return json.dumps(selected_data, indent=2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0bd60b14-4b4a-4bee-a858-14cc8c430efd",
"metadata": {},
"outputs": [],
"source": [
"app.run()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be842f1d-5519-4482-9b40-9f065dc07c7c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}