{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Eurovision Song Contest 1 - Reading the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook requires that the following files (from https://github.com/mantzaris/eurovision) are in the same directory:\n", "- 1957.csv\n", "- 1958.csv\n", "- ...\n", "- 2017.csv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pandas\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1957.csv\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"2017.csv\")\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "By chance we spot that Romania is misspelled, so we check for spelling errors" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for year in range(1957, 2018):\n", " df = pd.read_csv(\"{}.csv\".format(year))\n", " countries1 = df['From country'].tolist()\n", " countries2 = df.columns.tolist()\n", " countries2.remove('From country')\n", " for c2 in countries2:\n", " if c2 not in countries1:\n", " print(year, c2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We additionally check that the total number of votes that one country assigned to others is equal for all countries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for year in range(1957, 2018):\n", " df = pd.read_csv(\"{}.csv\".format(year))\n", " if df.sum(axis=1).nunique() != 1:\n", " print(year, df.sum(axis=1).tolist())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Only the votes for 1972 and 1973 are correct, the others all have errors" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1960.csv\")\n", "df.at[df.index[df['From country'] == 'The Netherlands'][0], 'United Kingdom'] = 5\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1963.csv\")\n", "df.at[df.index[df['From country'] == 'Denmark'][0], 'United Kingdom'] = 3\n", "df.at[df.index[df['From country'] == 'Luxembourg'][0], 'Monaco'] = 2\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1964.csv\")\n", "df.at[df.index[df['From country'] == 'Belgium'][0], 'Norway'] = 0\n", "df.at[df.index[df['From country'] == 'United Kingdom'][0], 'Monaco'] = 0\n", "df.at[df.index[df['From country'] == 'United Kingdom'][0], 'The Netherlands'] = 1\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1965.csv\")\n", "df.at[df.index[df['From country'] == 'Finland'][0], 'United Kingdom'] = 0\n", "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Monaco'] = 1\n", "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Ireland'] = 3\n", "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'France'] = 5\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1969.csv\")\n", "df.at[df.index[df['From country'] == 'Ireland'][0], 'United Kingdom'] = 0\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1971.csv\")\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Ireland'] = 4\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Italy'] = 6\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Luxembourg'] = 2\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Malta'] = 3\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Monaco'] = 10\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Norway'] = 2\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Portugal'] = 5\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Spain'] = 7\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Sweden'] = 4\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Switzerland'] = 6\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'The Netherlands'] = 4\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'United Kingdom'] = 5\n", "df.at[df.index[df['From country'] == 'Germany'][0], 'Yugoslavia'] = 7" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"1974.csv\")\n", "df.at[df.index[df['From country'] == 'Belgium'][0], 'Monaco'] = 2\n", "df\n", "df.sum(axis=1).nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Store all the data into a single data frame (and take care of all the errors) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "esc_points = pd.DataFrame()\n", "for year in range(1957, 2018):\n", " df = pd.read_csv(\"{}.csv\".format(year))\n", " df['Year'] = year\n", " if year == 1960:\n", " df.at[df.index[df['From country'] == 'The Netherlands'][0], 'United Kingdom'] = 5\n", " if year == 1963:\n", " df.at[df.index[df['From country'] == 'Denmark'][0], 'United Kingdom'] = 3\n", " df.at[df.index[df['From country'] == 'Luxembourg'][0], 'Monaco'] = 2\n", " if year == 1964:\n", " df.at[df.index[df['From country'] == 'Belgium'][0], 'Norway'] = 0\n", " df.at[df.index[df['From country'] == 'United Kingdom'][0], 'Monaco'] = 0\n", " df.at[df.index[df['From country'] == 'United Kingdom'][0], 'The Netherlands'] = 1\n", " if year == 1965:\n", " df.at[df.index[df['From country'] == 'Finland'][0], 'United Kingdom'] = 0\n", " df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Monaco'] = 1\n", " df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Ireland'] = 3\n", " df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'France'] = 5\n", " if year == 1974:\n", " df.at[df.index[df['From country'] == 'Belgium'][0], 'Monaco'] = 2\n", " if year == 1971:\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Ireland'] = 4\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Italy'] = 6\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Luxembourg'] = 2\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Malta'] = 3\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Monaco'] = 10\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Norway'] = 2\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Portugal'] = 5\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Spain'] = 7\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Sweden'] = 4\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Switzerland'] = 6\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'The Netherlands'] = 4\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'United Kingdom'] = 5\n", " df.at[df.index[df['From country'] == 'Germany'][0], 'Yugoslavia'] = 7\n", " if year == 2017:\n", " df = df.rename(index=str, columns={\"Romainia\": \"Romania\"})\n", " esc_points = esc_points.append(df, ignore_index=True, sort=True)\n", "esc_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Move the columns 'Year' and 'From country' to the beginning" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cols = esc_points.columns.tolist()\n", "cols.insert(0, cols.pop(cols.index('Year')))\n", "cols.insert(1, cols.pop(cols.index('From country')))\n", "esc_points = esc_points.reindex(columns=cols)\n", "esc_points.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save the table to a file" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "esc_points.to_csv(\"ESC_points_1957-2017.csv\", sep=';', na_rep='NaN')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }