# [R] Kosaraju's SCC Algorithm Running

Jeff Newmiller jdnewmil at dcn.davis.ca.us
Mon Nov 7 08:28:05 CET 2016

```Please read the Posting Guide mentioned at the bottom of this and every post, which warns you that homework is off topic on this mailing list. Use the support provided by your institution of learning (Coursera in this case).
--
Sent from my phone. Please excuse my brevity.

On November 6, 2016 8:12:38 PM PST, Megan <megan.fight at gmail.com> wrote:
>To whom it may concerns,
>
>We encountered stack overflow issues when we implemented DFS(depth
>first search) algorithm on a directed graph having 800,000+ vertices
>and millions of edges.  The purpose of running DFS is to use Kosaraju
>Algorithm to calculate the size of SCC(strongly connected componment)
>in the graph. This is an assignment from Coursea.org
><http://coursea.org/>. We know the maximum size of SCC in the graph is
>434,821, which means the maximum recursion depth during code running is
>434,821. We know it is a large calculation behind the scene, therefore,
>we’ve done the below pre-setup hopefully to solve the stack overflow
>issue. However, we have not got the luck yet. We’ve tried running on
>both R and RStudio.
>
>We would really appreciate if someone in your team can help to
>investigate. We can’t wait to see if our code is working in R.
>
>System Information:
>Model Name:	MacBook Pro
>  Model Identifier:	MacBookPro11,1
>  Processor Name:	Intel Core i5
>  Processor Speed:	2.4 GHz
>  Number of Processors:	1
>  Total Number of Cores:	2
>  L2 Cache (per Core):	256 KB
>  L3 Cache:	3 MB
>  Memory:	8 GB
>
>System settings we have tried:
>1. ulimit- s 65000 (to increase stack size before starting up R)
>2. R --max-ppsize =500000 (start R with max ppsize)
>3. options(expression=500000)
>
>
>The data we used can be found on
>https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A
><https://d18ky98rnyall9.cloudfront.net/_410e934e6553ac56409b2cb7096a44aa_SCC.txt?Expires=1478649600&Signature=YC0OjTn4hmzWpzAMw3WkQjXgxkAc0qhZrCd07JAPtud3dGQqywpQgkAASf-bWX6qgGafVvObciJ3ww-7wbNqY0YhWxwcg-IxmCnz1xJu1SORdDobiVKmhhfSaqgTyullX1hFmcxAA4y6Ud33hY1dhIP~UTAlW~IV8Y-zSliAts8_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A>
>
>Data discription provided as follow:
>https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4
><https://www.coursera.org/learn/algorithm-design-analysis/exam/rOtFq/programming-assignment-4>
>
>
>Below is our code:
>
>options(expressions=500000)
>#Prepare input test file
>x<<-as.matrix(g)
>remove(g)
>vector.is.empty<<-function(x) {return(length(x)==0)}
>'%!in%' <- function(x,y)!('%in%'(x,y))
>#g<-c(2,1,3,1,3,4,4,2,5,4,5,6,6,7,7,8,8,9,2,3)
>#x<<-matrix(g,nrow=10,ncol=2, byrow=TRUE)
>#g<-c(1,4,2,8,3,6,4,7,5,2,6,9,7,1,8,5,8,6,9,3,9,7,10,2)
>#x<<-matrix(g,nrow=12,ncol=2, byrow=TRUE)
>#g<-c(1,2,2,3,2,4,3,4,3,5,4,1,4,13,5,6,6,7,7,8,8,9,9,6,10,9,10,11,11,8,11,12,12,13,12,14,13,10,14,15)
>#x<<-matrix(g,20,2,byrow=TRUE)
>
>u1<-unique(x[,1])
>u2<-unique(x[,2])
>u<-c(u1,u2)
>n<<-length(unique(u))
>remove(u1,u2,u)
>
>G <<- vector("list", length=n)
>G_REV <<- vector("list", length=n)
>P = numeric(n)
>FT = numeric(n)
>
>for (i in 1:nrow(x)) {
>  a = x[i,1]
>  b = x[i,2]
>  #for G
>  if (is.null(G[[a]])) {
>    G[[a]] = c(b)
>  } else {
>    G[[a]] = c(G[[a]], b)
>  }
>  if (is.null(G[[b]])) {
>    G[[b]] = numeric()
>  }
>  #for G_VEV
>  if (is.null(G_REV[[b]])) {
>    G_REV[[b]] = c(a)
>  } else {
>    G_REV[[b]] = c(G_REV[[b]], a)
>  }
>  if (is.null(G_REV[[a]])) {
>    G_REV[[a]] = numeric()
>  }
>}
>
>G_TEMP <<- G_REV
>
>#G_REV<<-x[,c(2,1)]
>
>#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex
>is explored or not
>
>#colnames(P)<-c("node","f","parent_vertex")
>explore<<-numeric()
>assign("time",0,envir=globalenv())
>#Algorithm -DFS
>
>DFS_LOOP<<-function(G){
>  counter = n
>  for(i in n : 1){
>    if (i < counter) {
>      counter = counter - 1000;
>      print(i);
>    }
>    if (i %in% explore){next}
>    else {
>      DFS(i)
>    }
>  }
>}
>
>DFS<<-function(i){
>  #if(time>=n){return(p)}
>  #else{
>  #print(c("i=",i))
>  explore<<-c(explore,i)
>  #print(c("explore",explore))
>
>  #P[i,2] <<- 1 # gray
>  v=G_TEMP[[i]]
>
>  #print(c("v=",v))
>  if (vector.is.empty(v) ==TRUE){
>    len=1
>    v=i
>  }
>
>  if(vector.is.empty(v)==FALSE){
>    len=length(v)
>  }
>
>  for(j in 1: len){
>    if(v[j] %!in% explore){
>      DFS(v[j])
>      #P[v[j],3] <<-i
>      P[v[j]] <<- i
>      # print(c("child",j,"parent",i))
>    }
>  }
>
>  time<<-time + 1
>  FT[i] <<- time
>  #P[i,2] <<- time
>  #P[i,2] <<- 2 #black
>  # } <<-else
>}
>print('Starting DFS_loop on G_REV')
>DFS_LOOP(G_REV)
>###################################################
>#temp0<-matrix(1:n,n,1)
># temp1<-P[,c(1,2)]
># colnames(temp1)<-c("before","after")
># temp1<-as.data.frame(temp1)
># colnames(x)<-c("vertex","edge")
># X<-as.data.frame(x)
># X_NEW<-merge(x=X,y=temp1,by.x="vertex",by.y="before")
># remove(X)
># names(X_NEW)[names(X_NEW)=="after"]<-"vertex_new"
># X_NEW2<-merge(x=X_NEW,y=temp1,by.x="edge",by.y="before")
># remove(X_NEW,temp1)
># names(X_NEW2)[names(X_NEW2)=="after"]<-"edge_new"
># G2<-as.matrix(X_NEW2)
># remove(X_NEW2)
># G2<-G2[,c(3,4)]
># u1<-unique(G2[,1])
># u2<-unique(G2[,2])
># u<-c(u1,u2)
># n<<-length(unique(u))
># remove(u1,u2,u)
>
>FT_SORTED = numeric(n)
>for (i in length(FT):1) {
>  finish_time = FT[i]
>  FT_SORTED[finish_time] = i
>}
>
>P = numeric(n)
>FT = numeric(n)
>
>#P<<-matrix(c(1:n,rep(0,2*n)),nrow=n,ncol=3) #tracking whether vertex
>is explored or not
>#colnames(P)<-c("vertex","f","parent_vertex")
>explore<<-numeric()
>assign("time",0,envir=globalenv())
>
>print('Starting DFS_loop on G')
>#DFS_LOOP(G2)#2nd DFS
>explore<<-numeric()
>
>G_TEMP <<- G
>
>for (i in length(FT_SORTED):1) {
>  k = FT_SORTED[i]
>  if (k %!in% explore) {
>    DFS(k)
>  }
>}
>
>mscc_temp = which(P==0)
>scc_temp=FT[mscc_temp]
>#scc_temp<-P[P[,3]==0,2]
>scc_temp=sort(scc_temp,decreasing=TRUE)
>m=length(scc_temp)
>scc=numeric()
>for (i in 1:(m-1)){
>  scc[i]=scc_temp[i]-scc_temp[i+1]
>}
>scc[m]<-scc_temp[m]
>scc_top_5<-tail(sort(scc),5)
>
>
>Thanks,
>Jing
>	[[alternative HTML version deleted]]
>
>______________________________________________
>R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>https://stat.ethz.ch/mailman/listinfo/r-help