package com.datastax.data.prepare.spark.dataset.database

import com.datastax.insight.core.driver.SparkContextBuilder
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
import org.apache.hadoop.hbase.filter.FilterList.Operator
import org.apache.hadoop.hbase.filter._
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, HBaseConfiguration, KeyValue, TableName}
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.api.java.JavaPairRDD
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row}

import scala.collection.mutable
import scala.collection.mutable.ListBuffer

object HBaseOperator extends Serializable {

  val FILTER = "filter"
  val COMPAREOP = "compareOp"
  val COMPARATOR = "comparator"
  val FAMILY = "family"
  val QUALIFIER = "qualifier"
  val QUALIFIERPREFIXS = "qualifierPrefixs"
  val VALUE = "value"
  val VALUE_TYPE = "valueType"

  def init(table:String, zookeepers:String,confParams:String): Configuration = {
    val zookeeperSplits = zookeepers.split(";").mkString(",")
    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum",zookeeperSplits)
    conf.set("hbase.zookeeper.property.clientPort", "2181")
    conf.set("zookeeper.znode.parent","/hbase-unsecure")
    conf.setInt("hbase.mapreduce.bulkload.max.hfiles.perRegion.perFamily",10240)
    conf.set(TableInputFormat.INPUT_TABLE, table)
    if ((confParams != null) && (!confParams.isEmpty)) {
      confParams.split(";").foreach(r => {
        val keyValue = r.split("=")
        if (keyValue.length ==2) {
          val key = keyValue(0).trim
          val value = keyValue(1).trim
          if (key !=null && !key.isEmpty && value !=null && !value.isEmpty)
            conf.set(keyValue(0).trim,keyValue(1).trim)
        }
      })
    }
    conf
  }

  /**
    * 生成hfile 再加载hfile到hbase
    * @param rdd
    * @param tableName
    * @param zookeepers zk地址
    * @param confParams 连接zk参数
    * @param hdfsPath hdfs连接地址
    * @param hfilePath 临时hfile路径
    */
  def saveByGenerateHFile(rdd: JavaPairRDD[ImmutableBytesWritable, KeyValue], tableName:String, zookeepers:String,confParams:String,hdfsPath:String,hfilePath:String):Unit = {
    print("=====" +rdd.count())
    val conf = init(tableName,zookeepers,confParams)
//    conf.set("fs.defaultFS",hdfsPath) // 设置生成到hdfs中
    val job = Job.getInstance(conf)
    job.setMapOutputKeyClass (classOf[ImmutableBytesWritable])
    job.setMapOutputValueClass (classOf[KeyValue])

    val conn = ConnectionFactory.createConnection(conf)
    val table = conn.getTable(TableName.valueOf(tableName)).asInstanceOf[HTable]
    print(table)
      HFileOutputFormat.configureIncrementalLoad (job, table)
    // Save Hfiles on HDFS 生成到了本地 如何设置生成到hdfs上面？？
    rdd.saveAsNewAPIHadoopFile(hfilePath, classOf[ImmutableBytesWritable], classOf[KeyValue],
      classOf[HFileOutputFormat2], conf)

    //Bulk load Hfiles to Hbase
    val bulkLoader = new LoadIncrementalHFiles(conf)
    bulkLoader.doBulkLoad(new Path(hfilePath), table)
  }

  def save2(rdd: JavaPairRDD[ImmutableBytesWritable, Put], table:String, zookeepers:String,confParams:String):Unit = {

    import org.apache.hadoop.hbase.{HBaseConfiguration}
    import org.apache.hadoop.hbase.mapred.TableOutputFormat
    import org.apache.hadoop.mapred.JobConf

    val conf = init(table,zookeepers,confParams)

    //初始化jobconf，TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的！
    val jobConf = new JobConf(conf)
    jobConf.setOutputFormat(classOf[TableOutputFormat])
    jobConf.set(TableOutputFormat.OUTPUT_TABLE, table)

    rdd.saveAsHadoopDataset(jobConf)
  }

  def hbase2dataset(table:String, zookeepers:String,confParams:String,family:String,columns:String):DataFrame = {

    val sc = SparkContextBuilder.getContext
    val conf = init(table,zookeepers,confParams)
    var dataset:DataFrame = null
    val admin = new HBaseAdmin(conf)
    try {
      if (admin.isTableAvailable(table)) {
        val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
          classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
          classOf[org.apache.hadoop.hbase.client.Result])

        val listColumn = columns.trim.split(";").toList
        val schema = {
          val fields = listColumn.map(fieldName => StructField(fieldName, StringType, nullable = true))
          StructType(fields)
        }
        val rddResult = rdd.map{case (_,result) =>{
          val row = new ListBuffer[String]
          for (column <- listColumn) {
            row.append(Bytes.toString(result.getValue(family.getBytes,column.getBytes)))
          }
          Row.fromSeq(row.toSeq)
        }
        }
        dataset = SparkContextBuilder.getSession.createDataFrame(rddResult,schema)
      }
    } finally {
      admin.close()
    }
    dataset
  }

  def load(table:String, zookeepers:String,confParams:String,map: scala.collection.mutable.Map[String,Array[String]],mapType: scala.collection.mutable.Map[String,String]):DataFrame = {
    val linkedMap:mutable.LinkedHashMap[String,Array[String]] = new mutable.LinkedHashMap[String,Array[String]]
    for (fm <- map.keys) {
      linkedMap.put(fm,map(fm))
    }
    val sc = SparkContextBuilder.getContext
    val conf = init(table,zookeepers,confParams)
    var dataset:DataFrame = null
    val admin = new HBaseAdmin(conf)
    try {
      if (admin.isTableAvailable(table)) {
        val rdd = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
          classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
          classOf[org.apache.hadoop.hbase.client.Result])

        val listColumn = new ListBuffer[String]
        for (family <- linkedMap.keys) {
          for (column <- linkedMap(family)) {
            listColumn.append(column)
          }
        }
        val schema = {
          val fields = listColumn.map(fieldName => StructField(fieldName, StringType, nullable = true))
          StructType(fields)
        }
        val rddResult = rdd.map{case (_,result) =>{
          val row = new ListBuffer[String]
          for (family <- linkedMap.keys) {
            for (column <- linkedMap(family)) {
              row.append(convert2String(mapType(column),result.getValue(family.getBytes,column.getBytes)))
            }
          }
          Row.fromSeq(row.toSeq)
        }
        }
        dataset = SparkContextBuilder.getSession.createDataFrame(rddResult,schema)
      }
    } finally {
      admin.close()
    }
    dataset
  }

  def filterOperator(table:String, zookeepers:String,confParams:String,filterOperator:String,filterParams:List[java.util.Map[String,String]],mapType: scala.collection.mutable.Map[String,String]):DataFrame= {
    import scala.collection.JavaConverters._
    val conf = init(table,zookeepers,confParams)
    val connection = ConnectionFactory.createConnection(conf)
    val tb = connection.getTable(TableName.valueOf(table))

    val filterList = filterOperator match {
      case "pass_all" => new FilterList(Operator.MUST_PASS_ALL)
      case "pass_one" => new FilterList(Operator.MUST_PASS_ONE)
      case _ => new FilterList(Operator.MUST_PASS_ALL)
    }
    val scan = new Scan()
    for (filterParam <- filterParams) {
      filterList.addFilter(getFilter(filterParam))
    }
    var dataset:DataFrame = null
    scan.setFilter(filterList)
    val scanner = tb.getScanner(scan)
    try {
      val listRow:ListBuffer[Row] = new ListBuffer[Row]
      var listColumn = new ListBuffer[String]
      var i = 0
      for (result:Result <- scanner.iterator().asScala) {
        if (i == 0) {
          for (cell:Cell <- result.rawCells()) {
            listColumn.append(Bytes.toString(cell.getQualifier))
          }
        }
        val row = new ListBuffer[String]
        for (cell:Cell <- result.rawCells()) {
          val column = cell.getQualifier
          row.append(convert2String(mapType(Bytes.toString(column)),cell.getValue))
        }
        listRow.append(Row.fromSeq(row.toSeq))
        i = i + 1
      }
      val schema = {
        val fields = listColumn.map(fieldName => StructField(fieldName, StringType, nullable = true))
        StructType(fields)
      }
      dataset = SparkContextBuilder.getSession.createDataFrame(SparkContextBuilder.getContext.makeRDD(listRow),schema)
    } finally {
      scanner.close()
    }
    dataset
  }

  def getFilter(map:java.util.Map[String,String]):Filter = {
    val filter = map.get(FILTER)
    val compareOp = map.get(COMPAREOP)
    val comparator = map.get(COMPARATOR)
    val family = map.get(FAMILY)
    val qualifier = map.get(QUALIFIER)
    val qualifierPrefixs = map.get(QUALIFIERPREFIXS)
    val value = map.get(VALUE)
    val valueType = map.get(VALUE_TYPE)
    val compareOpValue = getCompareOp(compareOp)
    val comparatorValue = getComparator(comparator,value,valueType)
    val filterResult  = filter match {
      case "rowKey" => new RowFilter(getCompareOp(compareOp),getComparator(comparator,value,valueType))
      case "family" => new FamilyFilter(getCompareOp(compareOp),getComparator(comparator,value,valueType))
      case "simpleColumn" => {
        var filterTemp:Filter = null
        if (!family.isEmpty && !qualifier.isEmpty) {
          filterTemp =  new SingleColumnValueFilter(Bytes.toBytes(family),Bytes.toBytes(qualifier),getCompareOp(compareOp),getComparator(comparator,value,valueType))
        }
        filterTemp
      }
      case "qualifier" => new QualifierFilter(getCompareOp(compareOp),getComparator(comparator,value,valueType))
      case "multipleColumn" => {
        var filterTemp:Filter = null
        if (qualifierPrefixs.length != 0) {
          val bytes2 = qualifierPrefixs.split(";").map(r => Bytes.toBytes(r))
          filterTemp = new MultipleColumnPrefixFilter(bytes2)
        }
        filterTemp
      }
      case _ => null
    }
    filterResult
  }

  def getCompareOp(compareOp:String):CompareOp = {
    val compareOpResult = compareOp match {
      case "LESS" =>  CompareOp.LESS
      case "LESS_OR_EQUAL" => CompareOp.LESS_OR_EQUAL
      case "EQUAL" => CompareOp.EQUAL
      case "NOT_EQUAL" => CompareOp.NOT_EQUAL
      case "GREATER_OR_EQUAL" => CompareOp.GREATER_OR_EQUAL
      case "GREATER" => CompareOp.GREATER
      case "NO_OP" => CompareOp.NO_OP
      case _ => null
    }
    compareOpResult
  }

  def getComparator(comparator:String,value:String,valueType:String): ByteArrayComparable = {
    val comparatorResult = comparator match {
      case "BinaryComparator" => new BinaryComparator(convert2Bytes(value,valueType))
      case "BinaryPrefixComparator" => new BinaryPrefixComparator(convert2Bytes(value,valueType))
      //      case "BitComparator" => new BitComparator(Bytes.toBytes(value),BitwiseOp.AND)
      case "NullComparator" => new NullComparator()
      case "RegexStringComparator" => new RegexStringComparator(value)
      case "SubstringComparator" => new SubstringComparator(value)
      case "LongComparator" => new LongComparator(value.toLong)
      case _ => null
    }
    comparatorResult
  }

  def convert2String(dataType:String,bytes:Array[Byte]):String = {
    dataType match {
      case "Int" => Bytes.toInt(bytes).toString
      case "Short" => Bytes.toShort(bytes).toString
      case "Long" => Bytes.toLong(bytes).toString
      case "BigDecimal" => Bytes.toBigDecimal(bytes).toString
      case "Double" => Bytes.toDouble(bytes).toString
      case "Float" => Bytes.toFloat(bytes).toString
      case "String" => Bytes.toString(bytes)
      case "Boolean" => Bytes.toBoolean(bytes).toString
      case _ => null
    }
  }

  def convert2Bytes(value:String,valueType:String):Array[Byte] = {
    valueType match {
      case "Int" => Bytes.toBytes(value.toInt)
      case "Short" => Bytes.toBytes(value.toShort)
      case "Long" => Bytes.toBytes(value.toLong)
      case "BigDecimal" => Bytes.toBytes( java.math.BigDecimal.valueOf(value.toDouble))
      case "Double" => Bytes.toBytes(value.toDouble)
      case "Float" => Bytes.toBytes(value.toFloat)
      case "String" => Bytes.toBytes(value)
      case "Boolean" => Bytes.toBytes(value.toBoolean)
      case _ => null
    }
  }

}
