Note

The quickstart/start-gimel by default creates all the hive DDLs which you are seeing below. If you want to build a custom dataset hive table, these can be used as a reference.

Also if you want to see the tables which got created. Please execute the following command in a new terminal.

docker exec -it hive-server bash -c 'hive -e "show tables in  pcatalog"'

Setting Catalog Provider

In this step, developer/user can choose a catalog provider. The default option is HIVE.

Catalog Provider Command Notes
USER gsql("set gimel.catalog.provider=USER") To override the default option we need to use the option
HIVE gsql("set gimel.catalog.provider=HIVE") This is the default option provided to the users

Bootstrap Data

Bootstrap Flights Data

Create HDFS Dataset for loading Flights Data

Catalog Provider Command
USER

  gsql("""set pcatalog.flights_hdfs.dataSetProperties={ 
      "datasetType": "HDFS",
      "fields": [],
      "partitionFields": [],
      "props": {
           "gimel.hdfs.data.format":"csv",
           "location":"hdfs://namenode:8020/flights/data",
           "datasetName":"pcatalog.flights_hdfs"
      }
  }""")

HIVE

drop table if exists pcatalog.flights_hdfs;
CREATE external TABLE if not exists pcatalog.flights_hdfs(
    payload string)
PARTITIONED BY (year string, month string)
LOCATION 'hdfs://namenode:8020/flights/data'
TBLPROPERTIES (
    'gimel.storage.type'='HDFS',
    'gimel.hdfs.data.format'='csv'
);


Bootstrap Flights Lookup Data

Create HDFS Datasets for loading Flights Lookup Data

Catalog Provider Command
USER

  gsql("""set pcatalog.flights_lookup_carrier_code_hdfs.dataSetProperties={
      "datasetType": "HDFS",
      "fields": [],
      "partitionFields": [],
      "props": {
           "gimel.hdfs.data.format":"csv",
           "location":"hdfs://namenode:8020/flights/lkp/carrier_code",
           "datasetName":"pcatalog.flights_lookup_carrier_code_hdfs"
      }
  }""")

HIVE

  drop table if exists pcatalog.flights_lookup_carrier_code_hdfs;
  CREATE external TABLE if not exists pcatalog.flights_lookup_carrier_code_hdfs(
    payload string)
  LOCATION 'hdfs://namenode:8020/flights/lkp/carrier_code'
  TBLPROPERTIES (
    'gimel.storage.type'='HDFS',
    'gimel.hdfs.data.format'='csv'
  );


Catalog Provider Command
USER

  gsql("""set pcatalog.flights_lookup_airline_id_hdfs.dataSetProperties={
      "datasetType": "HDFS",
      "fields": [],
      "partitionFields": [],
      "props": {
           "gimel.hdfs.data.format":"csv",
           "location":"hdfs://namenode:8020/flights/lkp/airline_id",
           "datasetName":"pcatalog.flights_lookup_airline_id_hdfs"
      }
  }""")

HIVE

  drop table if exists pcatalog.flights_lookup_airline_id_hdfs;   
  CREATE external TABLE if not exists pcatalog.flights_lookup_airline_id_hdfs(
    payload string)
  LOCATION 'hdfs://namenode:8020/flights/lkp/airline_id'
  TBLPROPERTIES (
    'gimel.storage.type'='HDFS',
    'gimel.hdfs.data.format'='csv'
  );


Catalog Provider Command
USER

  gsql("""set pcatalog.flights_lookup_cancellation_code_hdfs.dataSetProperties={
      "datasetType": "HDFS",
      "fields": [],
      "partitionFields": [],
      "props": {
           "gimel.hdfs.data.format":"csv",
           "location":"hdfs://namenode:8020/flights/lkp/cancellation_code",
           "datasetName":"pcatalog.flights_lookup_cancellation_code_hdfs"
      }
  }""")

HIVE

  drop table if exists pcatalog.flights_lookup_cancellation_code_hdfs;   
  CREATE external TABLE if not exists pcatalog.flights_lookup_cancellation_code_hdfs(
    payload string)
  LOCATION 'hdfs://namenode:8020/flights/lkp/cancellation_code'
  TBLPROPERTIES (
    'gimel.storage.type'='HDFS',
    'gimel.hdfs.data.format'='csv'
  );


Catalog Provider Command
USER

  gsql("""set pcatalog.flights_lookup_airports_hdfs.dataSetProperties={
      "datasetType": "HDFS",
      "fields": [],
      "partitionFields": [],
      "props": {
           "gimel.hdfs.data.format":"csv",
           "location":"hdfs://namenode:8020/flights/lkp/airports",
           "datasetName":"pcatalog.flights_lookup_airports_hdfs"
      }
  }""")

HIVE

  drop table if exists pcatalog.flights_lookup_airports_hdfs;  
  CREATE external TABLE if not exists pcatalog.flights_lookup_airports_hdfs(
    payload string)
  LOCATION 'hdfs://namenode:8020/flights/lkp/airports'
  TBLPROPERTIES (
    'gimel.storage.type'='HDFS',
    'gimel.hdfs.data.format'='csv'
  );