package ca.training.bigdata.hbase

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog

case class HBaseRecordAirline(col0: String, Year: Int, Quarter: Int, Month: Int, DayofMonth: Int, DayOfWeek: Int, FlightDate: Int, UniqueCarrier: String, AirlineID: String)

object HBaseRecordAirlineTest {
  def apply(i: Int): HBaseRecordAirline = {
    val s = s"""row${"%03d".format(i)}"""
    HBaseRecordAirline(s, i, i, i, i, i, i, s, s)
  }
}

object AirlineTestDataToHBase {

  def main(args: Array[String]): Unit = {

    val cat =
      s"""{
                  "table":{"namespace":"default", "name":"airdelaydata"},
                  "rowkey":"key",
                  "columns":{
                    "col0":{"cf":"rowkey", "col":"key", "type":"string"},
                    "Year":{"cf":"Year", "col":"Year", "type":"int"},
                    "Quarter":{"cf":"Quarter", "col":"Quarter", "type":"int"},
                    "Month":{"cf":"Month", "col":"Month", "type":"int"},
                    "DayofMonth":{"cf":"DayofMonth", "col":"DayofMonth", "type":"int"},
                    "DayOfWeek":{"cf":"DayOfWeek", "col":"DayOfWeek", "type":"int"},
                    "FlightDate":{"cf":"FlightDate", "col":"FlightDate", "type":"int"},
                    "UniqueCarrier":{"cf":"UniqueCarrier", "col":"UniqueCarrier", "type":"string"},
                    "AirlineID":{"cf":"AirlineID", "col":"AirlineID", "type":"string"}
                  }
                  }""".stripMargin

    val spark = SparkSession.builder().appName("Airline Test Data to HBase").getOrCreate()
    import spark.sqlContext.implicits._

    val data = (0 to 8).map { i => HBaseRecordAirlineTest(i) }
    spark.sparkContext.parallelize(data).toDF.write.options(Map(HBaseTableCatalog.tableCatalog -> cat, HBaseTableCatalog.newTable -> "5")).format("org.apache.spark.sql.execution.datasources.hbase").save()

  }

}
