package ca.training.bigdata.spark.streaming

import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.ConstantInputDStream

/**
  * Created by BigDataTraining on 2018-03-14.
  */
object UpdateStateByKeyExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("UpdateStateByKey Example").getOrCreate()
    val sc = spark.sparkContext
    val ssc = new StreamingContext(sc, batchDuration = Seconds(5))

    ssc.checkpoint("_checkpoints")

    val rdd = sc.parallelize(0 to 9).map(n => (n, n % 2 toString))
    val clicks = new ConstantInputDStream(ssc, rdd)

    val inc = (n: Int) => n + 1

    def buildState: Option[Int] = {
      println(s">>> >>> Initial execution to build state or state is deliberately uninitialized yet")
      println(s">>> >>> Building the state being the number of calls to update state function, i.e. the number of batches")
      Some(1)
    }

    val updateFn: (Seq[String], Option[Int]) => Option[Int] = { case (vs, state) =>
      println(s">>> update state function with values only, i.e. no keys")
      println(s">>> vs    = $vs")
      println(s">>> state = $state")
      state.map(inc).orElse(buildState)
    }

    val statefulStream = clicks.updateStateByKey(updateFn)

    statefulStream.print()

    ssc.start()
    ssc.awaitTermination()

  }
}
