package ca.training.bigdata.spark.ml

import org.apache.spark.sql.SparkSession
import ml.dmlc.xgboost4j.scala.spark.{DataUtils, XGBoost}
import org.apache.spark.ml.feature.VectorAssembler
import org.apache.spark.ml.evaluation.RegressionEvaluator

/**
  * Created by BigDataTraining on 2018-03-30.
  */
object PowerPlantPrediction {

  case class PowerPlantTable(AT: Double,
                             V : Double,
                             AP : Double,
                             RH : Double,
                             PE : Double)

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("Power Plant Prediction Using XGBoost").getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    val df = sc.textFile("file:///root/TrainingOnHDP/dataset/spark/powerplant.txt")
      .map(x => x.split("\t"))
      .filter(line => line(0) != "AT")
      .map(line => PowerPlantTable(line(0).toDouble, line(1).toDouble, line(2).toDouble, line(3).toDouble, line(4).toDouble))
      .toDF

    df.printSchema()
    df.show(10)

    val assembler =  new VectorAssembler()
      .setInputCols(Array("AT", "V", "AP", "RH"))
      .setOutputCol("features")

    val vected = assembler.transform(df).withColumnRenamed("PE", "label").drop("AT","V","AP","RH")

    val Array(split20, split80) = vected.randomSplit(Array(0.20, 0.80), 1800009193L)
    val testSet = split20.cache()
    val trainingSet = split80.cache()

    val paramMap = List(
      "eta" -> 0.3,
      "max_depth" -> 6,
      "objective" -> "reg:linear",
      "early_stopping_rounds" ->10).toMap[String, Any]

    val xgboostModel = XGBoost.trainWithDataFrame(trainingSet, paramMap, 5, 1, useExternalMemory=true)

    val predictions = xgboostModel.transform(testSet)

    predictions.show(10)

    val evaluator = new RegressionEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
      .setMetricName("rmse")

    val rmse = evaluator.evaluate(predictions)

    println ("Root mean squared error: " + rmse)

    xgboostModel.save("/tmp/myXgboostModel")

  }

}
