package ca.training.bigdata.hbase

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.execution.datasources.hbase.HBaseTableCatalog

object AirlineTestDataFromHBase {

  def withCatalog(spark: SparkSession, cat: String): DataFrame = {spark.sqlContext.read.options(Map(HBaseTableCatalog.tableCatalog->cat)).format("org.apache.spark.sql.execution.datasources.hbase").load()}

  def main(args: Array[String]): Unit = {

    val cat =
      s"""{
                  "table":{"namespace":"default", "name":"airdelaydata"},
                  "rowkey":"key",
                  "columns":{
                    "col0":{"cf":"rowkey", "col":"key", "type":"string"},
                    "Year":{"cf":"Year", "col":"Year", "type":"int"},
                    "Quarter":{"cf":"Quarter", "col":"Quarter", "type":"int"},
                    "Month":{"cf":"Month", "col":"Month", "type":"int"},
                    "DayofMonth":{"cf":"DayofMonth", "col":"DayofMonth", "type":"int"},
                    "DayOfWeek":{"cf":"DayOfWeek", "col":"DayOfWeek", "type":"int"},
                    "FlightDate":{"cf":"FlightDate", "col":"FlightDate", "type":"int"},
                    "UniqueCarrier":{"cf":"UniqueCarrier", "col":"UniqueCarrier", "type":"string"},
                    "AirlineID":{"cf":"AirlineID", "col":"AirlineID", "type":"string"}
                  }
                  }""".stripMargin

    val spark = SparkSession.builder().appName("Airline Test Data from HBase").getOrCreate()

    val df = withCatalog(spark, cat)

    df.registerTempTable("airdelaydata")

    spark.sql("select * from airdelaydata").show()

  }

}
