library(igraph)
library(igraphdata)
library(statnet)
library(sand)

############################################################################
#cliques and their weakened versions

karate_uw <- graph.famous("Zachary")
plot(karate_uw)
# For the karate network a census of this sort reflects that there are 34 nodes (cliques
# of size one) and 78 edges (cliques of size two), followed by 45 triangles (cliques of size three).

table(sapply(cliques(karate_uw), length))

# the largest cliques are of size five, of which there are only two. 
#These two both involve four actors in common, including
# actor 1, i.e., the head instructor.

cliques(karate_uw)[sapply(cliques(karate_uw), length) == 5]

# let's look at the maximal cliques (A maximal clique is a clique that is not a subset of a larger clique.)
data(karate)
table(sapply(maximal.cliques(karate_uw), length)) # this applies only for 2 cliques of size 4

maximal.cliques(karate_uw)[sapply(maximal.cliques(karate_uw), length) == 4]

# a k-core of a graph G is a subgraph of G for which all vertex degrees are at least k, and such that no other
# subgraph obeying the same condition contains it (i.e., it is maximal in this property).

cores <- graph.coreness(karate_uw)
table(cores)

# cores are used to plot networks as layers (use the gplot function from statnet)

sna::gplot.target(get.adjacency(karate_uw, sparse=F), cores, circ.lab = FALSE, circ.col="skyblue", usearrows = FALSE, vertex.col=cores, edge.col="darkgray")
detach("package:sna")
detach("package:network")

##################################################################
### dyad census

data("aidsblog")
aidsblog <- simplify(aidsblog) # simplify to get rid of self loops
igraph::dyad.census(aidsblog)

reciprocity(aidsblog, mode="ratio") # which is the ratio between reciprocated ties and the total number of ties 
# 3/180


##################################################################
# subgraph density

# ego-centric density (karate example)
data(karate)
?graph.neighborhood
k.nbhds <- graph.neighborhood(karate_uw, nodes=1:3 , order=1)
class(k.nbhds)

class(k.nbhds[[1]])

sapply(k.nbhds, vcount)

k.1 <- k.nbhds[[1]]

edge_density(k.1)

k.34 <- k.nbhds[[34]]

edge_density(k.34)

edge_density(karate_uw)


# Relative frequency also is used in defining notions of ‘clustering’ in a graph.

transitivity(karate_uw)  #about one quarter of the connected triples close in a triangle

# the "local" transitivity is from the perspective of egos

transitivity(karate_uw, "local", vids=c(1,34))


##################################################################
# Connectivity, Cuts, and Flows

data(yeast)
igraph::is.connected(yeast) # yeast protein interactions

# components census

comps <- decompose.graph(yeast)

table(sapply(comps, vcount)) # there's a giant commponent

yeast.gc <- decompose.graph(yeast)[[1]] # select the giant comp
summary(yeast.gc)

# If the removal of a particular set of vertices (edges) in a graph disconnects the
# graph, that set is called a vertex-cut (edge-cut).

# A single vertex that disconnects the graph is called a cut vertex, or sometimes an articulation point.
# Identification of such vertices can provide a sense of where a network is vulnerable

yeast.cut.vertices <- articulation.points(yeast.gc)

length(yeast.cut.vertices)

length(yeast.cut.vertices)/vcount(yeast.gc) # abbout 15% of the vertices in the yeast.gc are cut vertices


#### weak and strong connectivity (digraph)

igraph::is.connected(aidsblog, mode="strong")

igraph::is.connected(aidsblog, mode=c("weak"))

# strong connectivity in digraphs

aidsblog.scc <- clusters(aidsblog, mode=c("strong")) # Extract the maximal (weakly or strongly) connected components of a graph

table(aidsblog.scc$csize)

table(aidsblog.scc$membership)

scc <- induced.subgraph(aidsblog, vids= aidsblog.scc$membership==28)

plot(scc)

#################################################################################

load("netgot_data.RData")

g <- union_graph
class(g)


#The most straightforward way to partition a network is into connected components. 
#Each component is a group of nodes that are connected to each other

comp <- igraph::components(g)

plot(g)

groups(comp)
str(components(g))


unique(igraph::components(g)$membership)

component_distribution(g, cumulative = FALSE, mul.size = FALSE) # just one connected component

# let's look at the friend GOT graph

gfr <- friends_graph

comp <- igraph::components(gfr)

groups(comp)

component_distribution(gfr, cumulative = FALSE, mul.size = FALSE)

giant <- decompose(gfr, mode="strong")

plot(giant[[1]], vertex.size=8, vertex.label.cex=0.7)

plot(giant[[4]], vertex.size=8, vertex.label.cex=0.7)

plot(gfr)


#K-core decomposition allows us to identify the core and the periphery of the network. A k-core is a maximal subnet of a network such that all nodes have at least degree K.

# compute coreness
kcores <- coreness(g)
coreness <- kcores
str(coreness(g))


V(g)$core <- coreness(g, "all") # assign as node attribute the k-core to which nodes belong


# create layout

CorenessLayout <- function(g) {
  coreness <- graph.coreness(g);
  xy <- array(NA, dim=c(length(coreness), 2));
  
  shells <- sort(unique(coreness));
  for(shell in shells) {
    v <- 1 - ((shell-1) / max(shells));
    nodes_in_shell <- sum(coreness==shell);
    angles <- seq(0,360,(360/nodes_in_shell));
    angles <- angles[-length(angles)]; # remove last element
    xy[coreness==shell, 1] <- sin(angles) * v;
    xy[coreness==shell, 2] <- cos(angles) * v;
  }
  return(xy);
}



ll <- CorenessLayout(g)

# plot
plot(g, layout=ll, vertex.size=6, vertex.label.cex=0.8, main='Coreness')



# if you just need to know the vertices and not to build the subgraph 
# 
maxCoreness <- max(coreness)
verticesHavingMaxCoreness <- which(coreness == 13) 
kcore <- induced.subgraph(graph=g,vids=verticesHavingMaxCoreness)

plot(kcore,  vertex.label=igraph::get.vertex.attribute(kcore,name='vert.names',index=V(kcore)))

##################################
##################################
########## Community detection

#The edge-betweenness method iteratively removes edges with high betweenness, with the idea that they are likely to connect different parts of the network. Here betweenness (gatekeeping potential) applies to edges, but the intuition is the same.
#The label propagation method labels each node with unique labels, and then updates these labels by choosing the label assigned to the majority of their neighbors, and repeat this iteratively until each node has the most common labels among its neighbors.
#The Louvain algorithm initially assigns each node to its own community; nodes are then sequentially assigned to the community that increases modularity (if any) so that communities are merged; this merging process continues until modularity cannot increase or only one community remains.


##################################
# Girvan-newman algorithm
gn.comm <- cluster_edge_betweenness(g)

#How many communities?

table(gn.comm$membership)

is_hierarchical(gn.comm)
plot_dendrogram(gn.comm)

#attach community labels as vertex attribute
V(g)$GN.cluster <- membership(gn.comm)

V(g)$name[V(g)$GN.cluster==1]

V(g)$name[V(g)$GN.cluster==4]


# visualizing the result of dividing the network into communities
colors <- rainbow(max(membership(gn.comm)))
plot(gn.comm, g, vertex.size = 6, vertex.color=colors[membership(gn.comm)], vertex.label = NA, edge.width = 1)



# computing modularity
mod.gn <- modularity(g,membership(gn.comm))


# compute the intra-cluster density
sapply(unique(membership(gn.comm)), function(gg) {
  subg1<-induced.subgraph(g, which(membership(gn.comm)==gg)) #membership id differs for each cluster
  edge_density(subg1)
})

edge_density(g)
#get all combinations of communities and get inter-cluster edges

cs <- data.frame(combn(unique(membership(gn.comm)),2))
cx <- sapply(cs, function(x) {
  es<-E(g)[V(g)[membership(gn.comm)==x[1]] %--% 
                  V(g)[membership(gn.comm)==x[2]]]    
  length(es)
})
cbind(t(cs),inter.edges=cx)


#evaluating cluster homogeneity

#Gini index (complementary). The larget its value the largest cluster homogeneity according to a given actor attribute
gini <- function(x){
  f <- table(x)/length(x)
  sum(f^2)
}

igraph::list.vertex.attributes(g)


gini(V(g)$Faction[V(g)$GN.cluster==4])
V(g)$name[V(g)$GN.cluster==4]
V(g)$Faction[V(g)$GN.cluster==4]


##################################
# Louvain algorithm (modularity optimization)

louv.clu <- cluster_louvain(g)

membership(louv.clu)
length(unique(membership(louv.clu)))

modularity(g,membership(louv.comm))

# visualizing the result of dividing the network into communities
colors <- rainbow(max(membership(louv.clu)))
plot(louv.clu, g, vertex.size = 6, vertex.color=colors[membership(louv.clu)], vertex.label = NA, edge.width = 1)



# evaluate the community as in the previous example


##################################
# Clique percolation algorithm

install.packages("CliquePercolation")

library(CliquePercolation)

cp.comm <-  cpAlgorithm(get.adjacency(g, sparse = F), k=4, method = "unweighted")
?cpAlgorithm

cp.comm$list.of.communities.labels
cp.comm$shared.nodes.labels

##################################
# weighted example

################## KARATE EXAMPLE #####################

karate_uw <- graph.famous("Zachary")

data(karate)

# weighted density
sum(E(karate)$weight)/(vcount(karate)*(vcount(karate)-1)/2)

# unweighted density

length(E(karate)$weight)/(vcount(karate)*(vcount(karate)-1)/2)

#############################
# Louvain example

karate

?cluster_louvain
louv.clu_kw <- cluster_louvain(karate, weights=E(karate)$weight)

membership(louv.clu_kw)
length(unique(membership(louv.clu_kw)))


# visualizing the result of dividing the network into communities
colors <- rainbow(max(membership(louv.clu_kw)))
coord <- layout_with_fr(karate)

plot(louv.clu_kw, karate, layout=coord, vertex.size = 6, vertex.color=colors[membership(louv.clu_kw)], vertex.label = NA, edge.width = E(karate)$weight)

louv.clu_k <- cluster_louvain(karate_uw)

membership(louv.clu_k)
length(unique(membership(louv.clu_k)))


# visualizing the result of dividing the network into communities
colors <- rainbow(max(membership(louv.clu_k)))
plot(louv.clu_k, karate_uw, layout=coord, vertex.size = 6, vertex.color=colors[membership(louv.clu_k)], vertex.label = NA, edge.width = 1)


#############################
# CP example

cp.comm_k <-  cpAlgorithm(get.adjacency(karate_uw, sparse = F), k=4, method = "unweighted")

cp.comm_k$list.of.communities.labels
cp.comm_k$shared.nodes.labels

cp.comm_kw <-  cpAlgorithm(get.adjacency(karate, attr = "weight", sparse = F), k=4, method = "weighted", I=0.5)

cp.comm_kw$list.of.communities.labels
cp.comm_kw$shared.nodes.labels