# Eurovision Song Contest 3 - Jury points distribution

This notebook requires that the file `ESC_points_1957-2017.csv` is in the same directory.

In [None]:
# matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
%matplotlib inline
# pandas
import pandas as pd

## Read the data

Read the jury points from the `ESC_points_1957-2017.csv` file

In [None]:
esc_points = pd.read_csv("ESC_points_1957-2017.csv", sep=';', index_col=0)
esc_points.head()

## Compute the normalized jury points

We wish to show the distribution of jury points for each country (since the voting sistems have been changed several times, the points need to be normalized)

First find the cumulative jury points for each competing country and each year

In [None]:
norm_points = esc_points.drop(columns=['From country']).groupby(['Year']).apply(lambda x: x.sum(skipna=False)).drop(columns=['Year'])
norm_points.head()

Then divide the points by the largest value in that year

In [None]:
norm_points = norm_points.div(norm_points.max(axis=1), axis=0)
norm_points.head()

## Visualize the normalized jury points

### Box plots

In [None]:
ax = norm_points.boxplot(figsize=(10,20), vert=False, grid=False)

Look only at a subset of countries, for example, the big five (France, Germany, Italy, Spain, United Kingdom)

In [None]:
big5 = norm_points[['France', 'Germany', 'Italy', 'Spain', 'United Kingdom']]
big5.head()

In [None]:
title = 'Distribution of normalized jury points (1957 - 2017)'
data_source = 'Data sources: https://github.com/mantzaris/eurovision and Wikipedia'

In [None]:
ax = big5.boxplot(grid=False, figsize=(10, 5))
ax.set_title(title)
ax.annotate(data_source,
 color='dimgray',
 xy=(0, 0), 
 xycoords='figure fraction',
 xytext=(30, 0), 
 textcoords='offset points',
 ha='left', va='bottom')
plt.show()

### Violin plots

In [None]:
# seaborn
import seaborn as sns

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
sns.violinplot(ax=ax, data=big5)
plt.show()

Refine the visualization:
- Show more details (decrease the bandwidth parameter of the estimation)
- Cut the distributions at the min and max values
- Add title and data source

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
plt.title(title)
sns.violinplot(ax=ax, data=big5, inner='stick', bw=0.2, cut=0)
# The bandwidth (bw) parameter controls how tightly the estimation is fit 
# to the data, much like the bin size in a histogram
ax.annotate(data_source,
 color='dimgray',
 xy=(0, 0), 
 xycoords='figure fraction',
 xytext=(30, 0), 
 textcoords='offset points',
 ha='left', va='bottom')
plt.show()

See how the distributions of jury points changed through time (start in 1958 to have six decades)

In [None]:
first_year = 1958
last_year = 2017
year_increament = 10
print([x for x in range(first_year, last_year, year_increament)])

In [None]:
fig, axes = plt.subplots(figsize=(10, 30), nrows=6, ncols= 1)
fig.suptitle('Distribution of normalized jury points by decades', y=.9, fontsize='x-large')
for year, ax in zip(range(first_year, last_year, year_increament), axes):
 data = big5.loc[(big5.index >= year) & (big5.index < year + year_increament)]
 sns.violinplot(ax=ax, data=data, inner='stick', bw=0.2, cut=0)
 ax.set_title('{} - {}'.format(year, year + year_increament - 1))
ax.annotate(data_source,
 color='dimgray',
 xy=(0, 0.055), 
 xycoords='figure fraction',
 xytext=(30, 0), 
 textcoords='offset points',
 ha='left', va='bottom')
plt.show()

## Add interactivity

### With Plotly

In [None]:
# plotly
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [None]:
data = []
for i in big5.columns:
 trace = dict(
 type = 'violin',
 x = i,
 y = big5[i],
 name = i,
 box = dict(visible = True),
 meanline = dict(visible = True),
 bandwidth = 0.1
 )
 data.append(trace)

layout = dict(
 title = title,
 yaxis = dict(range = [0, 1], zeroline = False),
 showlegend = False,
 annotations = [
 dict(xref = 'paper', x = -0.08,
 yref = 'paper', y = -0.22,
 text = data_source,
 showarrow = False)
 ]
)

iplot(dict(data=data, layout=layout), validate=False)