DROP TABLE IF EXISTS DEFAULT.flight_2007_kylindemo;
DROP TABLE IF EXISTS DEFAULT.flight_2007_kylindemo_enrich;
DROP TABLE IF EXISTS DEFAULT.flight_2007_kylindemo_join;
DROP TABLE IF EXISTS DEFAULT.weather_2007_kylindemo;
DROP TABLE IF EXISTS DEFAULT.weather_2007_kylindemo_join;

add jar /training/apps/kylin/project1/demo_cube/mini-labs-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION calgap as 'ca.bin.training.hive.udf.CalHolidaysGap';

CREATE EXTERNAL TABLE IF NOT EXISTS flight_2007_kylindemo(
   year int,
   month int,
   day int,
   week int,
   deptime string,
   crsdeptime string,
   arrtime string,
   crsarrtime string,
   uniquecarrier string,
   flightnum string,
   tailnum string,
   actualelapsedtime int,
   crselapsedtime int,
   airtime int,
   arrdelay int,
   depdelay int,
   origin string,
   dest string,
   distance int,
   taxiin int,
   taxiout int,
   cancelled int,
   cancelledcode string,
   Diverted int,
   CarrierDelay int,
   WeatherDelay int,
   NASDelay int,
   SecurityDelay int,
   LateAircraftDel int
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/tmp/kylin/demo_cube/data/flight2007'
tblproperties ("skip.header.line.count"="1");


CREATE TABLE flight_2007_kylindemo_enrich AS
select concat(year, if(month<10, concat(0,month), month), if(day<10, concat(0,day), day)) as fdate, year, month, day, week, if(length(crsdeptime)=4, substr(crsdeptime,1,2), substr(crsdeptime,1,1)) as crshour, (case when depDelay > 15 then 1 else 0 end) as status,  crsdeptime, depdelay, origin, dest, distance, cancelled from flight_2007_kylindemo where cancelled = 0 and origin = "ORD";


CREATE EXTERNAL TABLE IF NOT EXISTS weather_2007_kylindemo(
   station string,
   wdate string,
   metrics string,
   mvalue string,
   c1 string,
   c2 string,
   c3 string,
   c4 string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/tmp/kylin/demo_cube/data/weather2007';


CREATE TABLE weather_2007_kylindemo_join AS
with tmin as (select wdate, mvalue as tmin from weather_2007_kylindemo where station = "USW00094846" and metrics = "TMIN"),
tmax as (select wdate, mvalue as tmax from weather_2007_kylindemo where station = "USW00094846" and metrics = "TMAX"),
prcp as (select wdate, mvalue as prcp from weather_2007_kylindemo where station = "USW00094846" and metrics = "PRCP"),
snow as (select wdate, mvalue as snow from weather_2007_kylindemo where station = "USW00094846" and metrics = "SNOW"),
awnd as (select wdate, mvalue as awnd from weather_2007_kylindemo where station = "USW00094846" and metrics = "AWND")
select tmax.wdate, tmax.tmax, tmin.tmin, prcp.prcp, snow.snow, awnd.awnd from tmin left join tmax on tmin.wdate = tmax.wdate left join prcp on tmin.wdate = prcp.wdate
left join snow on tmin.wdate = snow.wdate left join awnd on tmin.wdate = awnd.wdate;


CREATE TABLE flight_2007_kylindemo_join AS
select a.status as label, a.month as cmonth, a.day as cday, a.week as cweek, cast(a.crshour as int) as crshour, a.distance, calgap(a.year, a.month, a.day) as gap, cast(b.tmax as int) as tmax, cast(b.tmin as int) as tmin, cast(b.prcp as int) as prcp, cast(b.snow as int) as snow, cast(b.awnd as int) as awnd from flight_2007_kylindemo_enrich a left join weather_2007_kylindemo_join b on a.fdate = b.wdate;
