{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A simple PageRank implementation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_graph(filename):\n",
    "    with open(filename, 'r') as f:\n",
    "        # We have the graph encoded as an adjacency list in a JSON file \n",
    "        g = json.load(f)\n",
    "        # The data structure read from JSON is already \"good enough\" for us\n",
    "        return g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_R(graph):\n",
    "    n = len(graph.keys())\n",
    "    # we make a dictionary saving for each key in the graph\n",
    "    # the corresponding index in the matrix\n",
    "    key_to_pos = dict(zip(graph.keys(), range(0,n)))\n",
    "    R = np.zeros((n,n))\n",
    "    for i, source in enumerate(graph.keys()):\n",
    "        # The out degree of a node is simply the length of its adjacency list\n",
    "        out_deg = len(graph[source])\n",
    "        for dest in graph[source]:\n",
    "            j = key_to_pos[dest]\n",
    "            R[i][j] = 1/out_deg\n",
    "    return R"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def PageRank_iteration(x, R, J, alpha):\n",
    "    n = len(x)\n",
    "    one = np.mat(np.ones(n)).T\n",
    "    P = (alpha * one * J + (1 - alpha) * R)\n",
    "    x_prime = x * P\n",
    "    return x_prime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_PageRank(graph, alpha, epsilon):\n",
    "    n = len(graph.keys())\n",
    "    # We compute the transition matrix without the teleportation\n",
    "    R = compute_R(graph)\n",
    "    # The jump vector is imply a vector of ones divided by its length\n",
    "    J = np.ones(n)/n\n",
    "    # The starting point can be a uniform distribution across all nodes\n",
    "    # x = np.ones(n)/n\n",
    "    # ...or a random stochastic vector\n",
    "    x = np.random.rand(n)\n",
    "    x = x/x.sum()\n",
    "    # We can now iterate until the norm one of the changes in the\n",
    "    # last iteration goes below epsilon\n",
    "    err = np.inf # initially infinity\n",
    "    while (err > epsilon):\n",
    "        x_new = PageRank_iteration(x, R, J, alpha)\n",
    "        err = (abs(x_new - x)).sum()\n",
    "        print(err)\n",
    "        x = x_new\n",
    "    print(\"PageRank scores:\")\n",
    "    for i, k in enumerate(graph.keys()):\n",
    "        print(f\"{k}: {x[0,i]}\")\n",
    "    return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "G = read_graph(\"example.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.3159023406390806\n",
      "0.2337546613070939\n",
      "0.08459514295259896\n",
      "0.0755958382438805\n",
      "0.04061314926038158\n",
      "0.03655183433434331\n",
      "0.021537593389194754\n",
      "0.016702853568551265\n",
      "0.010926842593441594\n",
      "0.007688983130882704\n",
      "PageRank scores:\n",
      "a: 0.30501128984684917\n",
      "b: 0.10747696979546045\n",
      "c: 0.10747696979546045\n",
      "d: 0.25462368253018236\n",
      "e: 0.16072498269070257\n",
      "f: 0.06468610534134528\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[0.30501129, 0.10747697, 0.10747697, 0.25462368, 0.16072498,\n",
       "         0.06468611]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "compute_PageRank(G, 0.1, 0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}