package com.datastax.data.prepare.spark.dataset

import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.graphx.{Edge, Graph}

import scala.collection.mutable.ListBuffer

object GraphXBFS {

  def devideCluster(sc:JavaSparkContext, listRemain:java.util.List[String]):java.util.List[String] = {
    import scala.collection.JavaConverters._
    val edges = new ListBuffer[Edge[Int]]()
    for(edge:java.lang.String <- listRemain.asScala) {
      val splits = edge.split(",")
      edges += new Edge(splits(0).toLong,splits(1).toLong)
    }
    val edgeRdd = sc.parallelize(edges)
    val graphFile = Graph.fromEdges(edgeRdd,0.0)

    val result = new ListBuffer[String]()
    val cc = graphFile.connectedComponents()
    val tuples = cc.vertices.map(x => {
      val vertexId = x._1
      val prop = x._2
      (prop,vertexId)
    }).groupByKey()
    val listResult = new ListBuffer[String]()
    for (row <- tuples.collect()) {
      listResult += row._2.mkString(";")
    }
    listResult.asJava
  }
}

