kylin-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [kylin] hit-lacus edited a comment on issue #1172: KYLIN-4385 Fix HiveProducer can not write to Hive Table(AWS S3)
Date Wed, 01 Apr 2020 10:01:57 GMT
hit-lacus edited a comment on issue #1172: KYLIN-4385 Fix HiveProducer can not write to Hive
Table(AWS S3)
URL: https://github.com/apache/kylin/pull/1172#issuecomment-607089486
 
 
   # Test on AWS EMR 5.28
   
   > Glue metadata is enabled.
   
   - create hive table for system cube
   
   ```sql
   CREATE DATABASE IF NOT EXISTS KYLIN;
   
   DROP TABLE IF EXISTS KYLIN.HIVE_METRICS_QUERY_QA;
   
   CREATE TABLE KYLIN.HIVE_METRICS_QUERY_QA
   (
   QUERY_HASH_CODE bigint
   ,HOST string
   ,KUSER string
   ,PROJECT string
   ,REALIZATION string
   ,REALIZATION_TYPE int
   ,QUERY_TYPE string
   ,EXCEPTION string
   ,QUERY_TIME_COST bigint
   ,CALCITE_COUNT_RETURN bigint
   ,STORAGE_COUNT_RETURN bigint
   ,CALCITE_COUNT_AGGREGATE_FILTER bigint
   ,KTIMESTAMP bigint
   ,KYEAR_BEGIN_DATE string
   ,KMONTH_BEGIN_DATE string
   ,KWEEK_BEGIN_DATE string
   ,KDAY_TIME string
   ,KTIME_HOUR int
   ,KTIME_MINUTE int
   ,KTIME_SECOND int
   )
   PARTITIONED BY(KDAY_DATE string)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   STORED AS TEXTFILE
   LOCATION
     's3://xiaoxiang-yu/kylin/system_cube_003/HIVE_METRICS_QUERY_QA';
   
   
   DROP TABLE IF EXISTS KYLIN.HIVE_METRICS_QUERY_CUBE_QA;
   
   CREATE TABLE KYLIN.HIVE_METRICS_QUERY_CUBE_QA
   (
   HOST string
   ,PROJECT string
   ,CUBE_NAME string
   ,SEGMENT_NAME string
   ,CUBOID_SOURCE bigint
   ,CUBOID_TARGET bigint
   ,IF_MATCH boolean
   ,FILTER_MASK bigint
   ,IF_SUCCESS boolean
   ,WEIGHT_PER_HIT double
   ,STORAGE_CALL_COUNT bigint
   ,STORAGE_CALL_TIME_SUM bigint
   ,STORAGE_CALL_TIME_MAX bigint
   ,STORAGE_COUNT_SKIP bigint
   ,STORAGE_COUNT_SCAN bigint
   ,STORAGE_COUNT_RETURN bigint
   ,STORAGE_COUNT_AGGREGATE_FILTER bigint
   ,STORAGE_COUNT_AGGREGATE bigint
   ,KTIMESTAMP bigint
   ,KYEAR_BEGIN_DATE string
   ,KMONTH_BEGIN_DATE string
   ,KWEEK_BEGIN_DATE string
   ,KDAY_TIME string
   ,KTIME_HOUR int
   ,KTIME_MINUTE int
   ,KTIME_SECOND int
   )
   PARTITIONED BY(KDAY_DATE string)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   STORED AS TEXTFILE
   LOCATION
     's3://xiaoxiang-yu/kylin/system_cube_003/HIVE_METRICS_QUERY_CUBE_QA';
   
   DROP TABLE IF EXISTS KYLIN.HIVE_METRICS_QUERY_RPC_QA;
   
   CREATE TABLE KYLIN.HIVE_METRICS_QUERY_RPC_QA
   (
   HOST string
   ,PROJECT string
   ,REALIZATION string
   ,RPC_SERVER string
   ,EXCEPTION string
   ,CALL_TIME bigint
   ,COUNT_RETURN bigint
   ,COUNT_SCAN bigint
   ,COUNT_SKIP bigint
   ,COUNT_AGGREGATE_FILTER bigint
   ,COUNT_AGGREGATE bigint
   ,KTIMESTAMP bigint
   ,KYEAR_BEGIN_DATE string
   ,KMONTH_BEGIN_DATE string
   ,KWEEK_BEGIN_DATE string
   ,KDAY_TIME string
   ,KTIME_HOUR int
   ,KTIME_MINUTE int
   ,KTIME_SECOND int
   )
   PARTITIONED BY(KDAY_DATE string)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   STORED AS TEXTFILE
   LOCATION
     's3://xiaoxiang-yu/kylin/system_cube_003/HIVE_METRICS_QUERY_RPC_QA';
   
   DROP TABLE IF EXISTS KYLIN.HIVE_METRICS_JOB_QA;
   
   CREATE TABLE KYLIN.HIVE_METRICS_JOB_QA
   (
   JOB_ID string
   ,HOST string
   ,KUSER string
   ,PROJECT string
   ,CUBE_NAME string
   ,JOB_TYPE string
   ,CUBING_TYPE string
   ,DURATION bigint
   ,TABLE_SIZE bigint
   ,CUBE_SIZE bigint
   ,PER_BYTES_TIME_COST double
   ,WAIT_RESOURCE_TIME bigint
   ,STEP_DURATION_DISTINCT_COLUMNS bigint
   ,STEP_DURATION_DICTIONARY bigint
   ,STEP_DURATION_INMEM_CUBING bigint
   ,STEP_DURATION_HFILE_CONVERT bigint
   ,KTIMESTAMP bigint
   ,KYEAR_BEGIN_DATE string
   ,KMONTH_BEGIN_DATE string
   ,KWEEK_BEGIN_DATE string
   ,KDAY_TIME string
   ,KTIME_HOUR int
   ,KTIME_MINUTE int
   ,KTIME_SECOND int
   )
   PARTITIONED BY(KDAY_DATE string)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   STORED AS TEXTFILE
   LOCATION
     's3://xiaoxiang-yu/kylin/system_cube_003/HIVE_METRICS_JOB_QA';
   
   DROP TABLE IF EXISTS KYLIN.HIVE_METRICS_JOB_EXCEPTION_QA;
   
   CREATE TABLE KYLIN.HIVE_METRICS_JOB_EXCEPTION_QA
   (
   JOB_ID string
   ,HOST string
   ,KUSER string
   ,PROJECT string
   ,CUBE_NAME string
   ,JOB_TYPE string
   ,CUBING_TYPE string
   ,EXCEPTION string
   ,KTIMESTAMP bigint
   ,KYEAR_BEGIN_DATE string
   ,KMONTH_BEGIN_DATE string
   ,KWEEK_BEGIN_DATE string
   ,KDAY_TIME string
   ,KTIME_HOUR int
   ,KTIME_MINUTE int
   ,KTIME_SECOND int
   )
   PARTITIONED BY(KDAY_DATE string)
   ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
   STORED AS TEXTFILE
   LOCATION
     's3://xiaoxiang-yu/kylin/system_cube_003/HIVE_METRICS_JOB_EXCEPTION_QA';
   ```
   
   - tomcat/webapps/kylin/WEB-INF/classes/kylinMetrics.xml
   
   ```xml
   <bean id="initMetricsManager" class="org.springframework.beans.factory.config.MethodInvokingFactoryBean">
       <property name="targetClass" value="org.apache.kylin.metrics.MetricsManager"/>
       <property name="targetMethod" value="initMetricsManager"/>
       <property name="arguments">
           <list>
               <ref bean="hiveSink"/>
               <map key-type="org.apache.kylin.metrics.lib.ActiveReservoir" value-type="java.util.List">
                   <entry key-ref="blockingReservoir">
                       <list>
                           <bean class="org.apache.kylin.common.util.Pair">
                               <property name="first"
                                         value="org.apache.kylin.metrics.lib.impl.hive.HiveReservoirReporter"/>
                               <property name="second">
                                   <props>
                                       <prop key="fs.defaultFS">s3://BUCKET_NAME</prop>
                                   </props>
                               </property>
                           </bean>
                       </list>
                   </entry>
               </map>
           </list>
       </property>
   </bean>
   ``` 
   
   - conf/kylin.properties
   
   ```properties
   kylin.metadata.url=kylin_metadata_failover@hbase
   kylin.env.hdfs-working-dir=s3://BUCKET_NAME/kylin/
   kylin.storage.hbase.cluster-fs=s3://BUCKET_NAME
   kylin.source.hive.redistribute-flat-table=false
   kylin.source.hive.metadata-type=gluecatalog
   
   kylin.engine.spark-conf.spark.eventLog.dir=s3\://BUCKET_NAME/kylin/spark-history
   kylin.engine.spark-conf.spark.history.fs.logDirectory=s3\://BUCKET_NAME/kylin/spark-history
   
   kylin.query.max-return-rows=10000000
   kylin.engine.spark-conf.spark.yarn.archive=s3://BUCKET_NAME/package/kylin_binary_AWS_GLUE/spark-libs.jar
   
   kylin.metrics.monitor-enabled=true
   kylin.metrics.reporter-job-enabled=true
   kylin.metrics.reporter-query-enabled=true
   kylin.web.dashboard-enabled=true
   ```
   
   
   - Hive Table
   <img width="1071" alt="image" src="https://user-images.githubusercontent.com/14030549/78124604-7e121800-7442-11ea-8656-f62392a91dfd.png">
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message