{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Eurovision Song Contest 1 - Reading the data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook requires that the following files (from https://github.com/mantzaris/eurovision) are in the same directory:\n",
    "- 1957.csv\n",
    "- 1958.csv\n",
    "- ...\n",
    "- 2017.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pandas\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1957.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"2017.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By chance we spot that Romania is misspelled, so we check for spelling errors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for year in range(1957, 2018):\n",
    "    df = pd.read_csv(\"{}.csv\".format(year))\n",
    "    countries1 = df['From country'].tolist()\n",
    "    countries2 = df.columns.tolist()\n",
    "    countries2.remove('From country')\n",
    "    for c2 in countries2:\n",
    "        if c2 not in countries1:\n",
    "            print(year, c2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We additionally check that the total number of votes that one country assigned to others is equal for all countries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for year in range(1957, 2018):\n",
    "    df = pd.read_csv(\"{}.csv\".format(year))\n",
    "    if df.sum(axis=1).nunique() != 1:\n",
    "        print(year, df.sum(axis=1).tolist())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Only the votes for 1972 and 1973 are correct, the others all have errors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1960.csv\")\n",
    "df.at[df.index[df['From country'] == 'The Netherlands'][0], 'United Kingdom'] = 5\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1963.csv\")\n",
    "df.at[df.index[df['From country'] == 'Denmark'][0], 'United Kingdom'] = 3\n",
    "df.at[df.index[df['From country'] == 'Luxembourg'][0], 'Monaco'] = 2\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1964.csv\")\n",
    "df.at[df.index[df['From country'] == 'Belgium'][0], 'Norway'] = 0\n",
    "df.at[df.index[df['From country'] == 'United Kingdom'][0], 'Monaco'] = 0\n",
    "df.at[df.index[df['From country'] == 'United Kingdom'][0], 'The Netherlands'] = 1\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1965.csv\")\n",
    "df.at[df.index[df['From country'] == 'Finland'][0], 'United Kingdom'] = 0\n",
    "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Monaco'] = 1\n",
    "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Ireland'] = 3\n",
    "df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'France'] = 5\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1969.csv\")\n",
    "df.at[df.index[df['From country'] == 'Ireland'][0], 'United Kingdom'] = 0\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1971.csv\")\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Ireland'] = 4\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Italy'] = 6\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Luxembourg'] = 2\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Malta'] = 3\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Monaco'] = 10\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Norway'] = 2\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Portugal'] = 5\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Spain'] = 7\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Sweden'] = 4\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Switzerland'] = 6\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'The Netherlands'] = 4\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'United Kingdom'] = 5\n",
    "df.at[df.index[df['From country'] == 'Germany'][0], 'Yugoslavia'] = 7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"1974.csv\")\n",
    "df.at[df.index[df['From country'] == 'Belgium'][0], 'Monaco'] = 2\n",
    "df\n",
    "df.sum(axis=1).nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Store all the data into a single data frame (and take care of all the errors) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "esc_points = pd.DataFrame()\n",
    "for year in range(1957, 2018):\n",
    "    df = pd.read_csv(\"{}.csv\".format(year))\n",
    "    df['Year'] = year\n",
    "    if year == 1960:\n",
    "        df.at[df.index[df['From country'] == 'The Netherlands'][0], 'United Kingdom'] = 5\n",
    "    if year == 1963:\n",
    "        df.at[df.index[df['From country'] == 'Denmark'][0], 'United Kingdom'] = 3\n",
    "        df.at[df.index[df['From country'] == 'Luxembourg'][0], 'Monaco'] = 2\n",
    "    if year == 1964:\n",
    "        df.at[df.index[df['From country'] == 'Belgium'][0], 'Norway'] = 0\n",
    "        df.at[df.index[df['From country'] == 'United Kingdom'][0], 'Monaco'] = 0\n",
    "        df.at[df.index[df['From country'] == 'United Kingdom'][0], 'The Netherlands'] = 1\n",
    "    if year == 1965:\n",
    "        df.at[df.index[df['From country'] == 'Finland'][0], 'United Kingdom'] = 0\n",
    "        df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Monaco'] = 1\n",
    "        df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'Ireland'] = 3\n",
    "        df.at[df.index[df['From country'] == 'Yugoslavia'][0], 'France'] = 5\n",
    "    if year == 1974:\n",
    "        df.at[df.index[df['From country'] == 'Belgium'][0], 'Monaco'] = 2\n",
    "    if year == 1971:\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Ireland'] = 4\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Italy'] = 6\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Luxembourg'] = 2\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Malta'] = 3\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Monaco'] = 10\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Norway'] = 2\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Portugal'] = 5\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Spain'] = 7\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Sweden'] = 4\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Switzerland'] = 6\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'The Netherlands'] = 4\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'United Kingdom'] = 5\n",
    "        df.at[df.index[df['From country'] == 'Germany'][0], 'Yugoslavia'] = 7\n",
    "    if year == 2017:\n",
    "        df = df.rename(index=str, columns={\"Romainia\": \"Romania\"})\n",
    "    esc_points = esc_points.append(df, ignore_index=True, sort=True)\n",
    "esc_points.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Move the columns 'Year' and 'From country' to the beginning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = esc_points.columns.tolist()\n",
    "cols.insert(0, cols.pop(cols.index('Year')))\n",
    "cols.insert(1, cols.pop(cols.index('From country')))\n",
    "esc_points = esc_points.reindex(columns=cols)\n",
    "esc_points.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the table to a file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "esc_points.to_csv(\"ESC_points_1957-2017.csv\", sep=';', na_rep='NaN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}