G-SQL

Load Flights Lookup Data into HBase Datasets

gsql("set gimel.hbase.rowkey=Code")
gsql("insert into pcatalog.flights_lookup_cancellation_code_hbase select * from pcatalog.flights_lookup_cancellation_code_hdfs")

gsql("set gimel.hbase.rowkey=Code")
gsql("insert into pcatalog.flights_lookup_airline_id_hbase select * from pcatalog.flights_lookup_airline_id_hdfs")

gsql("set gimel.hbase.rowkey=Code")
gsql("insert into pcatalog.flights_lookup_carrier_code_hbase select * from pcatalog.flights_lookup_carrier_code_hdfs")

Cache lookup Tables from HBase

gsql("cache table lkp_carrier select * from pcatalog.flights_lookup_carrier_code_hbase")

gsql("cache table lkp_airline select * from pcatalog.flights_lookup_airline_id_hbase")

gsql("cache table lkp_cancellation select * from pcatalog.flights_lookup_cancellation_code_hbase")

Read Data from HBase

gsql("select * from lkp_carrier").show(10)
gsql("select * from lkp_airline").show(10)
gsql("select * from lkp_cancellation").show(10)

Scala API for Catalog Provider-USER

Please execute the steps in this section if you have choosen CatalogProvider as USER or if you executed the following command

gsql("set gimel.catalog.provider=USER")

Set options

val datasetPropsJson = """{
                                  "datasetType": "HBASE",
                                  "fields": [
                                      {
                                          "fieldName": "Code",
                                          "fieldType": "string",
                                          "isFieldNullable": false
                                      },
                                      {
                                          "fieldName": "Description",
                                          "fieldType": "string",
                                          "isFieldNullable": false
                                      }
                                  ],
                                  "partitionFields": [],
                                  "props": {
                                      "gimel.hbase.rowkey":"Code",
                                      "gimel.hbase.table.name":"flights:flights_lookup_cancellation_code",
                                      "gimel.hbase.namespace.name":"flights",
                                      "gimel.hbase.columns.mapping":":key,flights:Description",
                                       "datasetName":"pcatalog.flights_lookup_cancellation_code_hbase"
                                  }
                              }"""

val datasetHivePropsJson = """{ 
                                         "datasetType": "HDFS",
                                         "fields": [],
                                         "partitionFields": [],
                                         "props": {
                                              "gimel.hdfs.data.format":"csv",
                                              "location":"hdfs://namenode:8020/flights/lkp/cancellation_code",
                                              "datasetName":"pcatalog.flights_lookup_cancellation_code_hdfs"
                                         }
                                     }"""

val hbaseoptions = Map("pcatalog.flights_lookup_cancellation_code_hbase.dataSetProperties"->datasetPropsJson)

val hiveOptions = Map("pcatalog.flights_lookup_cancellation_code_hdfs.dataSetProperties"->datasetHivePropsJson)

Load Flights Data into HBase Dataset

import com.paypal.gimel._
val dataSet = DataSet(spark)
val hiveDf = dataSet.read("pcatalog.flights_lookup_cancellation_code_hdfs",hiveOptions)
hiveDf.count
val df =  dataSet.write("pcatalog.flights_lookup_cancellation_code_hbase",hivedf,hbaseoptions)

Read Data from HBase

import com.paypal.gimel._
val dataSet = DataSet(spark)
val df = dataSet.read("pcatalog.flights_lookup_cancellation_code_hbase",hbaseoptions)
df.show(10)

Scala API for Catalog Provider-HIVE

Please execute the steps in this section if you have choosen CatalogProvider as HIVE or if you executed the following command

gsql("set gimel.catalog.provider=HIVE")

Load Flights Data into HBase Dataset

import com.paypal.gimel._
val dataSet = DataSet(spark)
val hiveDf = dataSet.read("pcatalog.flights_lookup_cancellation_code_hdfs")
hiveDf.count
val options = Map("gimel.hbase.rowkey"->"Code")
val df =  dataSet.write("pcatalog.flights_lookup_cancellation_code_hbase",hivedf,options)

Read Data from HBase

import com.paypal.gimel._
val dataSet = DataSet(spark)
val df = dataSet.read("pcatalog.flights_lookup_cancellation_code_hbase")
df.show(10)