package ca.training.bigdata.spark.streaming

import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * Created by BigDataTraining on 2018-03-14.
  */
object MapWithStateExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("MapWithState Example").getOrCreate()
    val sc = spark.sparkContext
    val ssc = new StreamingContext(sc, batchDuration = Seconds(5))

    ssc.checkpoint("_checkpoints")

    val rdd = sc.parallelize(0 to 9).map(n => (n, n % 2 toString))
    import org.apache.spark.streaming.dstream.ConstantInputDStream
    val sessions = new ConstantInputDStream(ssc, rdd)

    import org.apache.spark.streaming.{State, StateSpec, Time}
    val updateState = (batchTime: Time, key: Int, value: Option[String], state: State[Int]) => {
      println(s">>> batchTime = $batchTime")
      println(s">>> key       = $key")
      println(s">>> value     = $value")
      println(s">>> state     = $state")
      val sum = value.getOrElse("").size + state.getOption.getOrElse(0)
      state.update(sum)
      Some((key, value, sum)) // mapped value
    }
    val spec = StateSpec.function(updateState)
    val mappedStatefulStream = sessions.mapWithState(spec)

    mappedStatefulStream.print()

    ssc.start()
    ssc.awaitTermination()

  }
}