package ca.training.bigdata.spark.streaming

import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}

/**
  * Created by BigDataTraining on 2018-03-14.
  */
object TransformExample {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("Transform Example").getOrCreate()
    val sc = spark.sparkContext
    val ssc = new StreamingContext(sc, batchDuration = Seconds(5))

    val rdd = sc.parallelize(0 to 9)
    import org.apache.spark.streaming.dstream.ConstantInputDStream
    val clicks = new ConstantInputDStream(ssc, rdd)

    import org.apache.spark.rdd.RDD
    val transformFunc: RDD[Int] => RDD[Int] = { inputRDD =>
      println(s">>> inputRDD: $inputRDD")

      // Use SparkSQL's DataFrame to manipulate the input records
      import spark.implicits._
      inputRDD.toDF("num").show

      inputRDD
    }
    clicks.transform(transformFunc).print

    ssc.start()
    ssc.awaitTermination()

  }
}
