{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Eurovision Song Contest 5 - Country success" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook requires that the file `ESC_points_1957-2017.csv` is in the same directory." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# numpy\n", "import numpy as np\n", "# matplotlib\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import Normalize\n", "from matplotlib import cm\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "%matplotlib inline\n", "# pandas\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read the jury points from the `ESC_points_1957-2017.csv` file" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "esc_points = pd.read_csv(\"ESC_points_1957-2017.csv\", sep=';', index_col=0)\n", "esc_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute the country success" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We wish to show the proportion of times a country was among the top 25% of the competing countries in terms of jury votes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First compute the number of countries per year and the number of times each country competed" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num_countries = esc_points[['From country', 'Year']].groupby('Year').count()\n", "num_countries.rename(columns={'From country': 'Num countries'}, inplace=True)\n", "num_countries.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "num_competitions = esc_points[['From country', 'Year']].groupby('From country').count()\n", "num_competitions.rename(columns={'Year': 'Num competitions'}, inplace=True)\n", "num_competitions.index.rename('Country', inplace=True)\n", "num_competitions.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then compute the total number of points that were awarded to each country by year" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success1 = esc_points.groupby('Year').sum()\n", "success1.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, rank the countries by their success" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success2 = success1.rank(axis=1, method='min', ascending=False)\n", "success2 = success2.join(num_countries)\n", "success2.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute the success in percantage of competing countries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success3 = success2.div(success2['Num countries'], axis=0)\n", "success3.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Show only the countries from the top 25%" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success3[success3 > 0.25] = np.nan\n", "success3.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, compute success as fraction of the number of competitions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success4 = pd.DataFrame(success3.count().drop('Num countries'))\n", "success4.columns = ['Num successes']\n", "success4 = success4.join(num_competitions)\n", "success4.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "success4['Success'] = success4['Num successes'].div(success4['Num competitions'], axis=0) * 100\n", "success4.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualize the country success" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### With GeoPandas" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# geopandas\n", "import geopandas as gpd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Download the vector map containing the shapes of 247 countries from:\n", "- https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/\n", "\n", "Rename all the files to `world_countries.*`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The file `world_countries.shp` contains (among other things) the following information:\n", "- `ADM0_A3`: Three-letter abbreviation of the country\n", "- `NAME`: The short name of the country\n", "- `NAME_LONG`: The long name of the country\n", "- `FORMAL_EN`: The formal name of the country in English\n", "- `geometry`: Polygons defining the geometry of the country" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "shapefile = 'world_countries.shp'\n", "shape = gpd.read_file(shapefile)[['ADM0_A3', 'NAME', 'NAME_LONG', 'FORMAL_EN', 'geometry']]\n", "shape.rename(columns={'ADM0_A3': 'Code', 'NAME': 'Name', 'geometry': 'Geometry'}, inplace=True)\n", "shape.sample(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "country_success = shape[['Code', 'Name', 'Geometry']].merge(success4, left_on='Name', right_index=True, how='right')\n", "country_success" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Two countries ('Yugoslavia' and 'Serbia & Montenegro') no longer exist and therefore cannot be shown on a contemporary map" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The other four countries need to be renamed" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with pd.option_context('display.max_rows', None):\n", " print(shape[['Name']].sort_values(by='Name'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mapping = {'Bosnia & Herzegovina': 'Bosnia and Herz.', \n", " 'Czech Republic': 'Czechia',\n", " 'F.Y.R. Macedonia': 'Macedonia',\n", " 'The Netherlands': 'Netherlands'\n", " }\n", "success = success4.rename(index=mapping)\n", "success.drop(['Yugoslavia', 'Serbia & Montenegro'], inplace=True)\n", "success" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "country_success = shape[['Code', 'Name', 'Geometry']].merge(success, left_on='Name', right_index=True, how='right')\n", "country_success = gpd.GeoDataFrame(country_success)\n", "country_success = country_success.set_geometry('Geometry')\n", "country_success" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a choropleth map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cmap='YlGnBu'\n", "vmin=0\n", "vmax=100" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(12, 10))\n", "country_success.plot(ax=ax, column='Success', \n", " cmap=cmap, vmin=vmin, vmax=vmax, \n", " legend=True)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Refine the visualization:\n", "- Resize the colorbar and add percentage sign\n", "- Zoom in on Europe\n", "- Show borders\n", "- Hide the axes\n", "- Add title and data source" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "title = 'Country success (1957 - 2017) \\n [Proportion of times jury votes placed the country among the top 25%]'\n", "data_source = 'Data sources: https://github.com/mantzaris/eurovision and Wikipedia'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(12, 12))\n", "p = country_success.plot(ax=ax, column='Success',\n", " cmap=cmap, vmin=vmin, vmax=vmax, \n", " # Show borders\n", " linewidth=0.3, edgecolor='k',\n", " legend=False)\n", "# Take care of the colormap\n", "norm = Normalize(vmin=vmin, vmax=vmax)\n", "n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)\n", "n_cmap.set_array([])\n", "divider = make_axes_locatable(ax)\n", "cax = divider.append_axes(\"right\", size=\"5%\", pad=0.2)\n", "plt.colorbar(n_cmap, cax=cax, format='%d%%')\n", "#cax.set_yticklabels(['{0:.0f}%'.format(ytick * 100) for ytick in cax.get_yticks()])\n", "# Zoom in on Europe\n", "ax.set_xlim([-25, 55])\n", "ax.set_ylim([30, 75])\n", "# Hide the axes\n", "ax.axis('off')\n", "ax.set_title(title)\n", "ax.annotate(data_source,\n", " color='dimgray',\n", " xy=(0, 0), \n", " xycoords='figure fraction',\n", " xytext=(60, 110), \n", " textcoords='offset points',\n", " ha='left', va='bottom',\n", " )\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### With Plotly" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plotly\n", "from plotly.offline import init_notebook_mode, plot, iplot\n", "import plotly.graph_objs as go\n", "init_notebook_mode(connected=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "data = [\n", " go.Choropleth(\n", " locations = country_success['Code'],\n", " z = country_success['Success'],\n", " text = country_success['Name'],\n", " colorscale = cmap,\n", " reversescale = True,\n", " zmin = vmin,\n", " zmax = vmax,\n", " )\n", "]\n", "\n", "layout = dict(\n", " geo = dict(\n", " showframe = False,\n", " # Zoom in on Europe\n", " lonaxis = dict(range = [-25.0, 55.0]),\n", " lataxis = dict(range = [ 28.0, 75.0]),\n", " showland = True,\n", " landcolor = 'rgb(217, 217, 217)',\n", " #projection = dict(\n", " # type = 'eckert4'\n", " #)\n", " )\n", ")\n", "\n", "iplot(dict(data=data, layout=layout))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Supported projections: \n", "- \"equirectangular\"\n", "- \"mercator\"\n", "- \"orthographic\"\n", "- \"natural earth\"\n", "- \"kavrayskiy7\"\n", "- \"miller\"\n", "- \"robinson\"\n", "- \"eckert4\"\n", "- \"azimuthal equal area\"\n", "- \"azimuthal equidistant\"\n", "- \"conic equal area\"\n", "- \"conic conformal\"\n", "- \"conic equidistant\"\n", "- \"gnomonic\"\n", "- \"stereographic\"\n", "- \"mollweide\"\n", "- \"hammer\"\n", "- \"transverse mercator\"\n", "- \"albers usa\"\n", "- \"winkel tripel\"\n", "- \"aitoff\"\n", "- \"sinusoidal\" " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Refine the visualization:\n", "- Add Australia\n", "- Use greater detail in borders\n", "- Add percentage signs to colorbar\n", "- Add title and data source" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Define function to avoid code duplication\n", "def choropleth(geo):\n", " c = go.Choropleth(\n", " locations = country_success['Code'],\n", " z = country_success['Success'],\n", " text = country_success['Name'],\n", " colorscale = cmap,\n", " reversescale = True,\n", " zmin = vmin,\n", " zmax = vmax,\n", " # Add percentage sign\n", " colorbar = dict(ticksuffix = \"%\"),\n", " geo = geo\n", " )\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "data = [\n", " choropleth('geo'),\n", " choropleth('geo2')\n", "]\n", "\n", "layout = dict(\n", " title = title.replace('\\n', '
'),\n", " geo = dict(\n", " showframe = False,\n", " # Borders with greater detail\n", " resolution = 50,\n", " lonaxis = dict(range = [-25.0, 55.0]),\n", " lataxis = dict(range = [ 28.0, 75.0]),\n", " showland = True,\n", " landcolor = 'rgb(217, 217, 217)',\n", " domain = dict(\n", " x = [ 0, 1 ],\n", " y = [ 0, 1 ]\n", " )\n", " ),\n", " geo2 = dict(\n", " showframe = True,\n", " # Borders with greater detail\n", " resolution = 50,\n", " lonaxis = dict(range = [112.0, 155.0]),\n", " lataxis = dict(range = [-45.0, -10.0]),\n", " showland = True,\n", " landcolor = 'rgb(217, 217, 217)',\n", " domain = dict(\n", " x = [ 0, 0.3 ],\n", " y = [ 0, 0.3 ]\n", " ),\n", " ),\n", " annotations = [\n", " dict(xref = 'paper', x = -0.08,\n", " yref = 'paper', y = -0.22,\n", " text = data_source,\n", " showarrow = False)\n", " ]\n", ")\n", "\n", "iplot(dict(data=data, layout=layout))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }