package ca.training.bigdata.spark.streaming.bidding

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
import org.apache.spark.sql.ForeachWriter

/**
  * Created by BigDataTraining on 2018-03-15.
  */

case class Bid(bidid: String,
               timestamp: String,
               ipinyouid: String,
               useragent: String,
               IP: String,
               region: Integer,
               cityID: Integer,
               adexchange: String,
               domain: String,
               turl: String,
               urlid: String,
               slotid: String,
               slotwidth: String,
               slotheight: String,
               slotvisibility: String,
               slotformat: String,
               slotprice: String,
               creative: String,
               bidprice: String)

object UsingDataset {

  case class Bid(bidid: String,
                 timestamp: String,
                 ipinyouid: String,
                 useragent: String,
                 IP: String,
                 region: Integer,
                 cityID: Integer,
                 adexchange: String,
                 domain: String,
                 turl: String,
                 urlid: String,
                 slotid: String,
                 slotwidth: String,
                 slotheight: String,
                 slotvisibility: String,
                 slotformat: String,
                 slotprice: String,
                 creative: String,
                 bidprice: String)

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("Real time Bidding - Using Dataset using Spark Streaming").getOrCreate()
    import spark.implicits._

    // Define the income data schema
    val bidSchema = new StructType()
      .add("bidid", StringType)
      .add("timestamp", StringType)
      .add("ipinyouid", StringType)
      .add("useragent", StringType)
      .add("IP", StringType)
      .add("region", IntegerType)
      .add("cityID", IntegerType)
      .add("adexchange", StringType)
      .add("domain", StringType)
      .add("turl", StringType)
      .add("urlid", StringType)
      .add("slotid", StringType)
      .add("slotwidth", StringType)
      .add("slotheight", StringType)
      .add("slotvisibility", StringType)
      .add("slotformat", StringType)
      .add("slotprice", StringType)
      .add("creative", StringType)
      .add("bidprice", StringType)

    // Income Data Streaming
    val streamingInputDF = spark
      .readStream
      .format("csv")
      .schema(bidSchema)
      .option("header", false)
      .option("inferSchema", true)
      .option("sep", "\t")
      .option("maxFilesPerTrigger", 1)
      .load("file:///root/TrainingOnHDP/dataset/spark/bid")

    // Print out Schema
    streamingInputDF.printSchema()

    val ds = streamingInputDF.as[Bid]

    //Code for Using the Foreach Sink for arbitrary computations on output section
    val writer = new ForeachWriter[String] {
      override def open(partitionId: Long, version: Long) = true
      override def process(value: String) = println(value)
      override def close(errorOrNull: Throwable) = {}
    }

    val dsForeach = ds
      .filter(_.adexchange == "3")
      .map(_.useragent)
      .writeStream
      .foreach(writer)
      .start()

    dsForeach.awaitTermination()

  }

}
