# Eurovision Song Contest 5 - Country success

This notebook requires that the file `ESC_points_1957-2017.csv` is in the same directory.

In [None]:
# numpy
import numpy as np
# matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib inline
# pandas
import pandas as pd

## Read the data

Read the jury points from the `ESC_points_1957-2017.csv` file

In [None]:
esc_points = pd.read_csv("ESC_points_1957-2017.csv", sep=';', index_col=0)
esc_points.head()

## Compute the country success

We wish to show the proportion of times a country was among the top 25% of the competing countries in terms of jury votes

First compute the number of countries per year and the number of times each country competed

In [None]:
num_countries = esc_points[['From country', 'Year']].groupby('Year').count()
num_countries.rename(columns={'From country': 'Num countries'}, inplace=True)
num_countries.head()

In [None]:
num_competitions = esc_points[['From country', 'Year']].groupby('From country').count()
num_competitions.rename(columns={'Year': 'Num competitions'}, inplace=True)
num_competitions.index.rename('Country', inplace=True)
num_competitions.head()

Then compute the total number of points that were awarded to each country by year

In [None]:
success1 = esc_points.groupby('Year').sum()
success1.head()

Next, rank the countries by their success

In [None]:
success2 = success1.rank(axis=1, method='min', ascending=False)
success2 = success2.join(num_countries)
success2.head()

Compute the success in percantage of competing countries

In [None]:
success3 = success2.div(success2['Num countries'], axis=0)
success3.head()

Show only the countries from the top 25%

In [None]:
success3[success3 > 0.25] = np.nan
success3.head()

Next, compute success as fraction of the number of competitions

In [None]:
success4 = pd.DataFrame(success3.count().drop('Num countries'))
success4.columns = ['Num successes']
success4 = success4.join(num_competitions)
success4.head()

In [None]:
success4['Success'] = success4['Num successes'].div(success4['Num competitions'], axis=0) * 100
success4.head()

## Visualize the country success

### With GeoPandas

In [None]:
# geopandas
import geopandas as gpd

Download the vector map containing the shapes of 247 countries from:
- https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/

Rename all the files to `world_countries.*`

The file `world_countries.shp` contains (among other things) the following information:
- `ADM0_A3`: Three-letter abbreviation of the country
- `NAME`: The short name of the country
- `NAME_LONG`: The long name of the country
- `FORMAL_EN`: The formal name of the country in English
- `geometry`: Polygons defining the geometry of the country

In [None]:
shapefile = 'world_countries.shp'
shape = gpd.read_file(shapefile)[['ADM0_A3', 'NAME', 'NAME_LONG', 'FORMAL_EN', 'geometry']]
shape.rename(columns={'ADM0_A3': 'Code', 'NAME': 'Name', 'geometry': 'Geometry'}, inplace=True)
shape.sample(5)

In [None]:
country_success = shape[['Code', 'Name', 'Geometry']].merge(success4, left_on='Name', right_index=True, how='right')
country_success

Two countries ('Yugoslavia' and 'Serbia & Montenegro') no longer exist and therefore cannot be shown on a contemporary map

The other four countries need to be renamed

In [None]:
with pd.option_context('display.max_rows', None):
    print(shape[['Name']].sort_values(by='Name'))

In [None]:
mapping = {'Bosnia & Herzegovina': 'Bosnia and Herz.', 
           'Czech Republic': 'Czechia',
           'F.Y.R. Macedonia': 'Macedonia',
           'The Netherlands': 'Netherlands'
          }
success = success4.rename(index=mapping)
success.drop(['Yugoslavia', 'Serbia & Montenegro'], inplace=True)
success

In [None]:
country_success = shape[['Code', 'Name', 'Geometry']].merge(success, left_on='Name', right_index=True, how='right')
country_success = gpd.GeoDataFrame(country_success)
country_success = country_success.set_geometry('Geometry')
country_success

Create a choropleth map

In [None]:
cmap='YlGnBu'
vmin=0
vmax=100

In [None]:
fig, ax = plt.subplots(figsize=(12, 10))
country_success.plot(ax=ax, column='Success', 
                     cmap=cmap, vmin=vmin, vmax=vmax, 
                     legend=True)
plt.show()

Refine the visualization:
- Resize the colorbar and add percentage sign
- Zoom in on Europe
- Show borders
- Hide the axes
- Add title and data source

In [None]:
title = 'Country success (1957 - 2017) \n [Proportion of times jury votes placed the country among the top 25%]'
data_source = 'Data sources: https://github.com/mantzaris/eurovision and Wikipedia'

In [None]:
fig, ax = plt.subplots(figsize=(12, 12))
p = country_success.plot(ax=ax, column='Success',
                         cmap=cmap, vmin=vmin, vmax=vmax, 
                         # Show borders
                         linewidth=0.3, edgecolor='k',
                         legend=False)
# Take care of the colormap
norm = Normalize(vmin=vmin, vmax=vmax)
n_cmap = cm.ScalarMappable(norm=norm, cmap=cmap)
n_cmap.set_array([])
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.2)
plt.colorbar(n_cmap, cax=cax, format='%d%%')
#cax.set_yticklabels(['{0:.0f}%'.format(ytick * 100) for ytick in cax.get_yticks()])
# Zoom in on Europe
ax.set_xlim([-25, 55])
ax.set_ylim([30, 75])
# Hide the axes
ax.axis('off')
ax.set_title(title)
ax.annotate(data_source,
            color='dimgray',
            xy=(0, 0), 
            xycoords='figure fraction',
            xytext=(60, 110),  
            textcoords='offset points',
            ha='left', va='bottom',
           )
plt.show()

### With Plotly

In [None]:
# plotly
from plotly.offline import init_notebook_mode, plot, iplot
import plotly.graph_objs as go
init_notebook_mode(connected=True)

In [None]:
data = [
    go.Choropleth(
        locations = country_success['Code'],
        z = country_success['Success'],
        text = country_success['Name'],
        colorscale = cmap,
        reversescale = True,
        zmin = vmin,
        zmax = vmax,
    )
]

layout = dict(
    geo = dict(
        showframe = False,
        # Zoom in on Europe
        lonaxis = dict(range = [-25.0, 55.0]),
        lataxis = dict(range = [ 28.0, 75.0]),
        showland = True,
        landcolor = 'rgb(217, 217, 217)',
        #projection = dict(
        #    type = 'eckert4'
        #)
    )
)

iplot(dict(data=data, layout=layout))

Supported projections: 
- "equirectangular"
- "mercator"
- "orthographic"
- "natural earth"
- "kavrayskiy7"
- "miller"
- "robinson"
- "eckert4"
- "azimuthal equal area"
- "azimuthal equidistant"
- "conic equal area"
- "conic conformal"
- "conic equidistant"
- "gnomonic"
- "stereographic"
- "mollweide"
- "hammer"
- "transverse mercator"
- "albers usa"
- "winkel tripel"
- "aitoff"
- "sinusoidal" 

Refine the visualization:
- Add Australia
- Use greater detail in borders
- Add percentage signs to colorbar
- Add title and data source

In [None]:
# Define function to avoid code duplication
def choropleth(geo):
    c = go.Choropleth(
        locations = country_success['Code'],
        z = country_success['Success'],
        text = country_success['Name'],
        colorscale = cmap,
        reversescale = True,
        zmin = vmin,
        zmax = vmax,
        # Add percentage sign
        colorbar = dict(ticksuffix = "%"),
        geo = geo
    )
    return c

In [None]:
data = [
    choropleth('geo'),
    choropleth('geo2')
]

layout = dict(
    title = title.replace('\n', '<br>'),
    geo = dict(
        showframe = False,
        # Borders with greater detail
        resolution = 50,
        lonaxis = dict(range = [-25.0, 55.0]),
        lataxis = dict(range = [ 28.0, 75.0]),
        showland = True,
        landcolor = 'rgb(217, 217, 217)',
        domain = dict(
            x = [ 0, 1 ],
            y = [ 0, 1 ]
        )
    ),
    geo2 = dict(
        showframe = True,
        # Borders with greater detail
        resolution = 50,
        lonaxis = dict(range = [112.0, 155.0]),
        lataxis = dict(range = [-45.0, -10.0]),
        showland = True,
        landcolor = 'rgb(217, 217, 217)',
        domain = dict(
            x = [ 0, 0.3 ],
            y = [ 0, 0.3 ]
        ),
    ),
    annotations = [
        dict(xref = 'paper', x = -0.08,
             yref = 'paper', y = -0.22,
             text = data_source,
             showarrow = False)
    ]
)

iplot(dict(data=data, layout=layout))