package ca.training.bigdata.spark.ml

import org.apache.spark.sql.SparkSession
import org.apache.spark.ml.recommendation.ALS
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.mllib.evaluation.{RankingMetrics, RegressionMetrics}
import org.apache.spark.sql.functions.{col, expr}

/**
  * Created by BigDataTraining on 2018-04-01.
  */
object MovieRecommendation {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("Movie Recommendationn using ALS").getOrCreate()
    import spark.implicits._
    val sc = spark.sparkContext

    val ratings = spark.read.textFile("file:///root/TrainingOnHDP/dataset/spark/movielens_ratings.txt")
      .selectExpr("split(value , '::') as col")
      .selectExpr(
        "cast(col[0] as int) as userId",
        "cast(col[1] as int) as movieId",
        "cast(col[2] as float) as rating",
        "cast(col[3] as long) as timestamp")
    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))
    val als = new ALS()
      .setMaxIter(5)
      .setRegParam(0.01)
      .setUserCol("userId")
      .setItemCol("movieId")
      .setRatingCol("rating")

    println(als.explainParams())

    val alsModel = als.fit(training)
    val predictions = alsModel.transform(test)


    alsModel.recommendForAllUsers(10)
      .selectExpr("userId", "explode(recommendations)").show()

    alsModel.recommendForAllItems(10)
      .selectExpr("movieId", "explode(recommendations)").show()


    val evaluator = new RegressionEvaluator()
      .setMetricName("rmse")
      .setLabelCol("rating")
      .setPredictionCol("prediction")
    val rmse = evaluator.evaluate(predictions)

    println(s"Root-mean-square error = $rmse")

    val regComparison = predictions.select("rating", "prediction")
      .rdd.map(x => (x.getFloat(0).toDouble,x.getFloat(1).toDouble))
    val metrics = new RegressionMetrics(regComparison)


    val perUserActual = predictions
      .where("rating > 2.5")
      .groupBy("userId")
      .agg(expr("collect_set(movieId) as movies"))

    val perUserPredictions = predictions
      .orderBy(col("userId"), col("prediction").desc)
      .groupBy("userId")
      .agg(expr("collect_list(movieId) as movies"))

    val perUserActualvPred = perUserActual.join(perUserPredictions, Seq("userId"))
      .map(row => (
        row(1).asInstanceOf[Seq[Integer]].toArray,
        row(2).asInstanceOf[Seq[Integer]].toArray.take(15)
        ))
    val ranks = new RankingMetrics(perUserActualvPred.rdd)

    ranks.meanAveragePrecision
    ranks.precisionAt(5)

  }

}
