library(igraph) library(igraphdata) library(statnet) library(sand) ############################################################################ #cliques and their weakened versions karate_uw <- graph.famous("Zachary") plot(karate_uw) # For the karate network a census of this sort reflects that there are 34 nodes (cliques # of size one) and 78 edges (cliques of size two), followed by 45 triangles (cliques of size three). table(sapply(cliques(karate_uw), length)) # the largest cliques are of size five, of which there are only two. #These two both involve four actors in common, including # actor 1, i.e., the head instructor. cliques(karate_uw)[sapply(cliques(karate_uw), length) == 5] # let's look at the maximal cliques (A maximal clique is a clique that is not a subset of a larger clique.) data(karate) table(sapply(maximal.cliques(karate_uw), length)) # this applies only for 2 cliques of size 4 maximal.cliques(karate_uw)[sapply(maximal.cliques(karate_uw), length) == 4] # a k-core of a graph G is a subgraph of G for which all vertex degrees are at least k, and such that no other # subgraph obeying the same condition contains it (i.e., it is maximal in this property). cores <- graph.coreness(karate_uw) table(cores) # cores are used to plot networks as layers (use the gplot function from statnet) sna::gplot.target(get.adjacency(karate_uw, sparse=F), cores, circ.lab = FALSE, circ.col="skyblue", usearrows = FALSE, vertex.col=cores, edge.col="darkgray") detach("package:sna") detach("package:network") ################################################################## ### dyad census data("aidsblog") aidsblog <- simplify(aidsblog) # simplify to get rid of self loops igraph::dyad.census(aidsblog) reciprocity(aidsblog, mode="ratio") # which is the ratio between reciprocated ties and the total number of ties # 3/180 ################################################################## # subgraph density # ego-centric density (karate example) data(karate) ?graph.neighborhood k.nbhds <- graph.neighborhood(karate_uw, nodes=1:3 , order=1) class(k.nbhds) class(k.nbhds[[1]]) sapply(k.nbhds, vcount) k.1 <- k.nbhds[[1]] edge_density(k.1) k.34 <- k.nbhds[[34]] edge_density(k.34) edge_density(karate_uw) # Relative frequency also is used in defining notions of ‘clustering’ in a graph. transitivity(karate_uw) #about one quarter of the connected triples close in a triangle # the "local" transitivity is from the perspective of egos transitivity(karate_uw, "local", vids=c(1,34)) ################################################################## # Connectivity, Cuts, and Flows data(yeast) igraph::is.connected(yeast) # yeast protein interactions # components census comps <- decompose.graph(yeast) table(sapply(comps, vcount)) # there's a giant commponent yeast.gc <- decompose.graph(yeast)[[1]] # select the giant comp summary(yeast.gc) # If the removal of a particular set of vertices (edges) in a graph disconnects the # graph, that set is called a vertex-cut (edge-cut). # A single vertex that disconnects the graph is called a cut vertex, or sometimes an articulation point. # Identification of such vertices can provide a sense of where a network is vulnerable yeast.cut.vertices <- articulation.points(yeast.gc) length(yeast.cut.vertices) length(yeast.cut.vertices)/vcount(yeast.gc) # abbout 15% of the vertices in the yeast.gc are cut vertices #### weak and strong connectivity (digraph) igraph::is.connected(aidsblog, mode="strong") igraph::is.connected(aidsblog, mode=c("weak")) # strong connectivity in digraphs aidsblog.scc <- clusters(aidsblog, mode=c("strong")) # Extract the maximal (weakly or strongly) connected components of a graph table(aidsblog.scc$csize) table(aidsblog.scc$membership) scc <- induced.subgraph(aidsblog, vids= aidsblog.scc$membership==28) plot(scc) ################################################################################# load("netgot_data.RData") g <- union_graph class(g) #The most straightforward way to partition a network is into connected components. #Each component is a group of nodes that are connected to each other comp <- igraph::components(g) plot(g) groups(comp) str(components(g)) unique(igraph::components(g)$membership) component_distribution(g, cumulative = FALSE, mul.size = FALSE) # just one connected component # let's look at the friend GOT graph gfr <- friends_graph comp <- igraph::components(gfr) groups(comp) component_distribution(gfr, cumulative = FALSE, mul.size = FALSE) giant <- decompose(gfr, mode="strong") plot(giant[[1]], vertex.size=8, vertex.label.cex=0.7) plot(giant[[4]], vertex.size=8, vertex.label.cex=0.7) plot(gfr) #K-core decomposition allows us to identify the core and the periphery of the network. A k-core is a maximal subnet of a network such that all nodes have at least degree K. # compute coreness kcores <- coreness(g) coreness <- kcores str(coreness(g)) V(g)$core <- coreness(g, "all") # assign as node attribute the k-core to which nodes belong # create layout CorenessLayout <- function(g) { coreness <- graph.coreness(g); xy <- array(NA, dim=c(length(coreness), 2)); shells <- sort(unique(coreness)); for(shell in shells) { v <- 1 - ((shell-1) / max(shells)); nodes_in_shell <- sum(coreness==shell); angles <- seq(0,360,(360/nodes_in_shell)); angles <- angles[-length(angles)]; # remove last element xy[coreness==shell, 1] <- sin(angles) * v; xy[coreness==shell, 2] <- cos(angles) * v; } return(xy); } ll <- CorenessLayout(g) # plot plot(g, layout=ll, vertex.size=6, vertex.label.cex=0.8, main='Coreness') # if you just need to know the vertices and not to build the subgraph # maxCoreness <- max(coreness) verticesHavingMaxCoreness <- which(coreness == 13) kcore <- induced.subgraph(graph=g,vids=verticesHavingMaxCoreness) plot(kcore, vertex.label=igraph::get.vertex.attribute(kcore,name='vert.names',index=V(kcore))) ################################## ################################## ########## Community detection #The edge-betweenness method iteratively removes edges with high betweenness, with the idea that they are likely to connect different parts of the network. Here betweenness (gatekeeping potential) applies to edges, but the intuition is the same. #The label propagation method labels each node with unique labels, and then updates these labels by choosing the label assigned to the majority of their neighbors, and repeat this iteratively until each node has the most common labels among its neighbors. #The Louvain algorithm initially assigns each node to its own community; nodes are then sequentially assigned to the community that increases modularity (if any) so that communities are merged; this merging process continues until modularity cannot increase or only one community remains. ################################## # Girvan-newman algorithm gn.comm <- cluster_edge_betweenness(g) #How many communities? table(gn.comm$membership) is_hierarchical(gn.comm) plot_dendrogram(gn.comm) #attach community labels as vertex attribute V(g)$GN.cluster <- membership(gn.comm) V(g)$name[V(g)$GN.cluster==1] V(g)$name[V(g)$GN.cluster==4] # visualizing the result of dividing the network into communities colors <- rainbow(max(membership(gn.comm))) plot(gn.comm, g, vertex.size = 6, vertex.color=colors[membership(gn.comm)], vertex.label = NA, edge.width = 1) # computing modularity mod.gn <- modularity(g,membership(gn.comm)) # compute the intra-cluster density sapply(unique(membership(gn.comm)), function(gg) { subg1<-induced.subgraph(g, which(membership(gn.comm)==gg)) #membership id differs for each cluster edge_density(subg1) }) edge_density(g) #get all combinations of communities and get inter-cluster edges cs <- data.frame(combn(unique(membership(gn.comm)),2)) cx <- sapply(cs, function(x) { es<-E(g)[V(g)[membership(gn.comm)==x[1]] %--% V(g)[membership(gn.comm)==x[2]]] length(es) }) cbind(t(cs),inter.edges=cx) #evaluating cluster homogeneity #Gini index (complementary). The larget its value the largest cluster homogeneity according to a given actor attribute gini <- function(x){ f <- table(x)/length(x) sum(f^2) } igraph::list.vertex.attributes(g) gini(V(g)$Faction[V(g)$GN.cluster==4]) V(g)$name[V(g)$GN.cluster==4] V(g)$Faction[V(g)$GN.cluster==4] ################################## # Louvain algorithm (modularity optimization) louv.clu <- cluster_louvain(g) membership(louv.clu) length(unique(membership(louv.clu))) modularity(g,membership(louv.comm)) # visualizing the result of dividing the network into communities colors <- rainbow(max(membership(louv.clu))) plot(louv.clu, g, vertex.size = 6, vertex.color=colors[membership(louv.clu)], vertex.label = NA, edge.width = 1) # evaluate the community as in the previous example ################################## # Clique percolation algorithm install.packages("CliquePercolation") library(CliquePercolation) cp.comm <- cpAlgorithm(get.adjacency(g, sparse = F), k=4, method = "unweighted") ?cpAlgorithm cp.comm$list.of.communities.labels cp.comm$shared.nodes.labels ################################## # weighted example ################## KARATE EXAMPLE ##################### karate_uw <- graph.famous("Zachary") data(karate) # weighted density sum(E(karate)$weight)/(vcount(karate)*(vcount(karate)-1)/2) # unweighted density length(E(karate)$weight)/(vcount(karate)*(vcount(karate)-1)/2) ############################# # Louvain example karate ?cluster_louvain louv.clu_kw <- cluster_louvain(karate, weights=E(karate)$weight) membership(louv.clu_kw) length(unique(membership(louv.clu_kw))) # visualizing the result of dividing the network into communities colors <- rainbow(max(membership(louv.clu_kw))) coord <- layout_with_fr(karate) plot(louv.clu_kw, karate, layout=coord, vertex.size = 6, vertex.color=colors[membership(louv.clu_kw)], vertex.label = NA, edge.width = E(karate)$weight) louv.clu_k <- cluster_louvain(karate_uw) membership(louv.clu_k) length(unique(membership(louv.clu_k))) # visualizing the result of dividing the network into communities colors <- rainbow(max(membership(louv.clu_k))) plot(louv.clu_k, karate_uw, layout=coord, vertex.size = 6, vertex.color=colors[membership(louv.clu_k)], vertex.label = NA, edge.width = 1) ############################# # CP example cp.comm_k <- cpAlgorithm(get.adjacency(karate_uw, sparse = F), k=4, method = "unweighted") cp.comm_k$list.of.communities.labels cp.comm_k$shared.nodes.labels cp.comm_kw <- cpAlgorithm(get.adjacency(karate, attr = "weight", sparse = F), k=4, method = "weighted", I=0.5) cp.comm_kw$list.of.communities.labels cp.comm_kw$shared.nodes.labels