{ "cells": [ { "cell_type": "markdown", "id": "2e8cff9a", "metadata": {}, "source": [ "# Goal: Recreate Gapminder's Bubble Chart*" ] }, { "cell_type": "markdown", "id": "0592be0b", "metadata": {}, "source": [ "![Gapminder](Gapminder.png)" ] }, { "cell_type": "markdown", "id": "7205f9df", "metadata": {}, "source": [ "*As much as possible..." ] }, { "cell_type": "markdown", "id": "a51e5440", "metadata": {}, "source": [ "See https://www.gapminder.org/tools/" ] }, { "cell_type": "markdown", "id": "fea83b75", "metadata": {}, "source": [ "## Import libraries" ] }, { "cell_type": "code", "execution_count": null, "id": "5f2b16b8", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import plotly.express as px\n", "import plotly.graph_objs as go" ] }, { "cell_type": "markdown", "id": "bc574529", "metadata": {}, "source": [ "## Load the data" ] }, { "cell_type": "code", "execution_count": null, "id": "088f9d42", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('Gapminder-data.csv', sep=',')" ] }, { "cell_type": "code", "execution_count": null, "id": "b3323617", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "b703d8cd-31f4-44c3-b25a-5e5fcb57640f", "metadata": {}, "outputs": [], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "9515c150", "metadata": {}, "outputs": [], "source": [ "df_info = pd.read_csv('Gapminder-info.csv', sep=',', index_col=0)" ] }, { "cell_type": "code", "execution_count": null, "id": "c27a468d", "metadata": {}, "outputs": [], "source": [ "df_info" ] }, { "cell_type": "code", "execution_count": null, "id": "064dda93-71ee-4187-a9b8-80f79129e1d5", "metadata": {}, "outputs": [], "source": [ "df = df.sort_values(['Year', 'Population'], ascending=[True, False])" ] }, { "cell_type": "code", "execution_count": null, "id": "e9a5e6a5-976c-4c24-9290-2fe7f6a3be28", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "markdown", "id": "d096bd8f", "metadata": {}, "source": [ "## Create the bubble chart" ] }, { "cell_type": "code", "execution_count": null, "id": "bc418cc2-2653-4c33-a127-99eedf742904", "metadata": {}, "outputs": [], "source": [ "color_map = dict(\n", " Asia='#ff798e', \n", " Europe='#ffeb33', \n", " Africa='#33dded',\n", " Americas='#98ef33'\n", ")\n", "marker_dict = dict(\n", " opacity=1,\n", " line=dict(\n", " color='black',\n", " width=0.8\n", " )\n", ")\n", "layout_dict = dict(\n", " plot_bgcolor='white',\n", " font=dict(color='dimgray')\n", ")\n", "axes_dict = dict(\n", " gridcolor='lightgray',\n", " showline=True,\n", " linecolor='dimgray',\n", " linewidth=1,\n", " showspikes=True,\n", " spikethickness=1,\n", " spikecolor='dimgray'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "872e455c", "metadata": {}, "outputs": [], "source": [ "fig = px.scatter(\n", " df.query('Year==2020'),\n", " x='Income',\n", " y='Life expectancy',\n", " color='Region',\n", " size='Population',\n", " hover_name='Country',\n", " color_discrete_map=color_map,\n", " size_max=60,\n", " log_x=True,\n", " hover_data={c: False for c in df.columns},\n", " title='Gapminder
Data by gapminder.org, CC-BY license'\n", ")\n", "# Adjust markers\n", "fig.update_traces(marker=marker_dict)\n", "# Adjust figure layout\n", "fig.update_layout(layout_dict)\n", "# Adjust axes/grid\n", "fig.update_xaxes(axes_dict)\n", "fig.update_yaxes(axes_dict)\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "e5629a50", "metadata": {}, "source": [ "## Add animation" ] }, { "cell_type": "code", "execution_count": null, "id": "61ee19a5-f57f-4b97-a61a-f5ead203da70", "metadata": {}, "outputs": [], "source": [ "color_map = dict(\n", " Asia='#ff798e', \n", " Europe='#ffeb33', \n", " Africa='#33dded',\n", " Americas='#98ef33'\n", ")\n", "marker_dict = dict(\n", " opacity=1,\n", " line=dict(\n", " color='black',\n", " width=0.8\n", " )\n", ")\n", "layout_dict = dict(\n", " plot_bgcolor='white',\n", " font=dict(color='dimgray'),\n", " autosize=False,\n", " width=800,\n", " height=600,\n", " clickmode='event+select'\n", ")\n", "axes_dict = dict(\n", " gridcolor='lightgray',\n", " showline=True,\n", " linecolor='dimgray',\n", " linewidth=1,\n", " showspikes=True,\n", " spikethickness=1,\n", " spikecolor='dimgray'\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "70661829-afc6-4773-a0cb-8cbe4eba8316", "metadata": {}, "outputs": [], "source": [ "def background_year(year, xaxis, yaxis):\n", " return dict(\n", " x=[df_info.loc[xaxis, 'Mid']],\n", " y=[df_info.loc[yaxis, 'Mid']],\n", " mode='text',\n", " text=[str(year)],\n", " showlegend=False,\n", " textfont=dict(size=200, color='lightgray'),\n", " textposition='middle center'\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "aed99d63", "metadata": {}, "outputs": [], "source": [ "def gapminder_fig(xaxis='Income', yaxis='Life expectancy'):\n", " fig = px.scatter(\n", " df.query('Year>=2010'),\n", " x=xaxis,\n", " y=yaxis,\n", " color='Region',\n", " size='Population',\n", " hover_name='Country',\n", " color_discrete_map=color_map,\n", " size_max=60,\n", " log_x=df_info.loc[xaxis, 'LogScale'],\n", " log_y=df_info.loc[yaxis, 'LogScale'],\n", " hover_data={c: False for c in df.columns},\n", " title='Gapminder
Data by gapminder.org, CC-BY license',\n", " animation_frame='Year',\n", " animation_group='Country',\n", " range_x=[df_info.loc[xaxis, 'Min'], df_info.loc[xaxis, 'Max']],\n", " range_y=[df_info.loc[yaxis, 'Min'], df_info.loc[yaxis, 'Max']]\n", " )\n", " # Adjust markers\n", " fig.update_traces(marker=marker_dict)\n", " # Adjust figure layout\n", " fig.update_layout(layout_dict)\n", " # Adjust axes/grid\n", " fig.update_xaxes(axes_dict)\n", " fig.update_yaxes(axes_dict)\n", "\n", " # Show the year in the background\n", " frame_year = fig.frames[0].name\n", " fig.add_trace(go.Scatter(background_year(frame_year, xaxis, yaxis)))\n", " fig.data = (fig.data[-1], ) + fig.data[:-1]\n", " \n", " for frame in fig.frames:\n", " frame.data = (background_year(frame.name, xaxis, yaxis), ) + frame.data\n", " fig.update(frames=fig.frames)\n", "\n", " # Add annotations to the axes\n", " fig.add_annotation(\n", " x=1, y=0, xref='x domain', yref='y domain',\n", " text=df_info.loc[xaxis, 'Meaning'],\n", " showarrow=False, align='right'\n", " )\n", " fig.add_annotation(\n", " x=0, y=1, xref='x domain', yref='y domain',\n", " text=df_info.loc[yaxis, 'Meaning'],\n", " showarrow=False, valign='top', textangle=-90\n", " )\n", " \n", " return fig\n", " \n", "fig = gapminder_fig(yaxis='Fertility')\n", "fig.show()" ] }, { "cell_type": "markdown", "id": "2a7ea93d", "metadata": {}, "source": [ "## Use Dash" ] }, { "cell_type": "markdown", "id": "2851266c", "metadata": {}, "source": [ "- Dash html components: https://dash.plotly.com/dash-html-components\n", "- Dash core components: https://dash.plotly.com/dash-core-components" ] }, { "cell_type": "code", "execution_count": null, "id": "5c9e9d57", "metadata": {}, "outputs": [], "source": [ "from dash import Dash, html, dcc, callback, Output, Input" ] }, { "cell_type": "code", "execution_count": null, "id": "c2ec3451-67cf-482d-a5e1-9e61ffbf2a2f", "metadata": {}, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": null, "id": "4a6a5520", "metadata": {}, "outputs": [], "source": [ "attributes = ['Income', 'Life expectancy', 'Fertility', 'Child mortality']\n", "external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']\n", "app = Dash(__name__, external_stylesheets=external_stylesheets)" ] }, { "cell_type": "code", "execution_count": null, "id": "a670d1c6", "metadata": {}, "outputs": [], "source": [ "app.layout = html.Div([\n", " html.H1('Interactive data visualization', \n", " style={'fontSize': 50, 'color': 'brown'}),\n", " html.P('X axis'),\n", " dcc.Dropdown(\n", " id='dropdown_x',\n", " options=[{'label': a, 'value': a} for a in attributes],\n", " value='Income'\n", " ),\n", " html.P('Y axis'),\n", " dcc.Dropdown(\n", " id='dropdown_y',\n", " options=[{'label': a, 'value': a} for a in attributes],\n", " value='Life expectancy'\n", " ),\n", " dcc.Graph(\n", " id='plot',\n", " figure=gapminder_fig()\n", " ),\n", " dcc.Textarea(\n", " id='text',\n", " value=''\n", " )\n", "])\n", "\n", "@app.callback(\n", " Output('plot', 'figure'),\n", " [Input('dropdown_x', 'value'), Input('dropdown_y', 'value')]\n", ")\n", "def update_plot(x, y):\n", " return gapminder_fig(x, y)\n", "\n", "@app.callback(\n", " Output('text', 'value'),\n", " Input('plot', 'selectedData')\n", ")\n", "def print_value(selected_data):\n", " return json.dumps(selected_data, indent=2)" ] }, { "cell_type": "code", "execution_count": null, "id": "0bd60b14-4b4a-4bee-a858-14cc8c430efd", "metadata": {}, "outputs": [], "source": [ "app.run()" ] }, { "cell_type": "code", "execution_count": null, "id": "be842f1d-5519-4482-9b40-9f065dc07c7c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }