{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Eurovision Song Contest 3 - Jury points distribution" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook requires that the file `ESC_points_1957-2017.csv` is in the same directory." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# matplotlib\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import to_hex\n", "%matplotlib inline\n", "# pandas\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read the jury points from the `ESC_points_1957-2017.csv` file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "esc_points = pd.read_csv(\"ESC_points_1957-2017.csv\", sep=';', index_col=0)\n", "esc_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute the normalized jury points" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We wish to show the distribution of jury points for each country (since the voting sistems have been changed several times, the points need to be normalized)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First find the cumulative jury points for each competing country and each year" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "norm_points = esc_points.drop(columns=['From country']).groupby(['Year']).apply(lambda x: x.sum(skipna=False)).drop(columns=['Year'])\n", "norm_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then divide the points by the largest value in that year" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "norm_points = norm_points.div(norm_points.max(axis=1), axis=0)\n", "norm_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualize the normalized jury points" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Box plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ax = norm_points.boxplot(figsize=(10,20), vert=False, grid=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Look only at a subset of countries, for example, the big five (France, Germany, Italy, Spain, United Kingdom)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "big5 = norm_points[['France', 'Germany', 'Italy', 'Spain', 'United Kingdom']]\n", "big5.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "title = 'Distribution of normalized jury points (1957 - 2017)'\n", "data_source = 'Data sources: https://github.com/mantzaris/eurovision and Wikipedia'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ax = big5.boxplot(grid=False, figsize=(10, 5))\n", "ax.set_title(title)\n", "ax.annotate(data_source,\n", " color='dimgray',\n", " xy=(0, 0), \n", " xycoords='figure fraction',\n", " xytext=(30, 0), \n", " textcoords='offset points',\n", " ha='left', va='bottom')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Violin plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# seaborn\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(10, 5))\n", "sns.violinplot(ax=ax, data=big5)\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Refine the visualization:\n", "- Show more details (decrease the bandwidth parameter of the estimation)\n", "- Cut the distributions at the min and max values\n", "- Add title and data source" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(10, 5))\n", "plt.title(title)\n", "sns.violinplot(ax=ax, data=big5, inner='stick', bw=0.2, cut=0)\n", "# The bandwidth (bw) parameter controls how tightly the estimation is fit \n", "# to the data, much like the bin size in a histogram\n", "ax.annotate(data_source,\n", " color='dimgray',\n", " xy=(0, 0), \n", " xycoords='figure fraction',\n", " xytext=(30, 0), \n", " textcoords='offset points',\n", " ha='left', va='bottom')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See how the distributions of jury points changed through time (start in 1958 to have six decades)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "first_year = 1958\n", "last_year = 2017\n", "year_increament = 10\n", "print([x for x in range(first_year, last_year, year_increament)])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "fig, axes = plt.subplots(figsize=(10, 30), nrows=6, ncols= 1)\n", "fig.suptitle('Distribution of normalized jury points by decades', y=.9, fontsize='x-large')\n", "for year, ax in zip(range(first_year, last_year, year_increament), axes):\n", " data = big5.loc[(big5.index >= year) & (big5.index < year + year_increament)]\n", " sns.violinplot(ax=ax, data=data, inner='stick', bw=0.2, cut=0)\n", " ax.set_title('{} - {}'.format(year, year + year_increament - 1))\n", "ax.annotate(data_source,\n", " color='dimgray',\n", " xy=(0, 0.055), \n", " xycoords='figure fraction',\n", " xytext=(30, 0), \n", " textcoords='offset points',\n", " ha='left', va='bottom')\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add interactivity" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### With Plotly" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# plotly\n", "from plotly.offline import init_notebook_mode, plot, iplot\n", "import plotly.graph_objs as go\n", "init_notebook_mode(connected=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "data = []\n", "for i in big5.columns:\n", " trace = dict(\n", " type = 'violin',\n", " x = i,\n", " y = big5[i],\n", " name = i,\n", " box = dict(visible = True),\n", " meanline = dict(visible = True),\n", " bandwidth = 0.1\n", " )\n", " data.append(trace)\n", "\n", "layout = dict(\n", " title = title,\n", " yaxis = dict(range = [0, 1], zeroline = False),\n", " showlegend = False,\n", " annotations = [\n", " dict(xref = 'paper', x = -0.08,\n", " yref = 'paper', y = -0.22,\n", " text = data_source,\n", " showarrow = False)\n", " ]\n", ")\n", "\n", "iplot(dict(data=data, layout=layout), validate=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }