package ca.training.bigdata.spark.streaming.bidding

import org.apache.spark.sql.{ForeachWriter, SparkSession}
import org.apache.spark.sql.types.{IntegerType, StringType, StructType}

/**
  * Created by BigDataTraining on 2018-03-15.
  */
object SparkSQLOverMemory {

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("Real time Bidding - Spark SQL over Memory using Spark Streaming").getOrCreate()
    import spark.implicits._

    // Define the income data schema
    val bidSchema = new StructType()
      .add("bidid", StringType)
      .add("timestamp", StringType)
      .add("ipinyouid", StringType)
      .add("useragent", StringType)
      .add("IP", StringType)
      .add("region", IntegerType)
      .add("cityID", IntegerType)
      .add("adexchange", StringType)
      .add("domain", StringType)
      .add("turl", StringType)
      .add("urlid", StringType)
      .add("slotid", StringType)
      .add("slotwidth", StringType)
      .add("slotheight", StringType)
      .add("slotvisibility", StringType)
      .add("slotformat", StringType)
      .add("slotprice", StringType)
      .add("creative", StringType)
      .add("bidprice", StringType)

    // Income Data Streaming
    val streamingInputDF = spark
      .readStream
      .format("csv")
      .schema(bidSchema)
      .option("header", false)
      .option("inferSchema", true)
      .option("sep", "\t")
      .option("maxFilesPerTrigger", 1)
      .load("file:///root/TrainingOnHDP/dataset/spark/bid")

    // Print out Schema
    streamingInputDF.printSchema()

    val aggAdexchangeDF = streamingInputDF.groupBy($"adexchange").count()

    //Wait for the output show on the screen after the next statement
    val aggQuery = aggAdexchangeDF
      .writeStream
      .queryName("aggregateTable")
      .outputMode("complete")
      .format("memory")
      .start()

    Thread.sleep(1000 * 120)

    spark.sql("select * from aggregateTable").show()

    aggQuery.awaitTermination()

  }


}
