- 设置参数:
- set hive.fetch.task.conversion=more;
- set hive.exec.dynamic.partition=true;
- set hive.exec.dynamic.partition.mode=nonstrict;
- set hive.exec.max.dynamic.partitions=100000;
- set hive.exec.max.dynamic.partitions.pernode=1000000;
- set mapreduce.job.reduces=10;
- set mapreduce.input.fileinputformat.split.maxsize=1024000000;
- set mapreduce.input.fileinputformat.split.minsize=1;
- set mapreduce.user.classpath.first=true;
- set mapreduce.task.io.sort.mb=500;
- 创建表语句:
- create table daxue_dict(id string,university string,university_alias string) row format delimited fields terminated by ',' stored as textfile;
- alter table daxue_dict set serdeproperties('serialization.null.format' = '');
- 自动分区:
- insert overwrite table daxue_data_words partition(public_day) select key,site_cls,area_code,url,sentiment,title,txt,default.seg_words(title,txt),public_time,public_day from daxue_data where 1=1;
- 导出数据:
- insert overwrite local directory '/tmp/data' row format delimited fields terminated by '\\t' select unis_id,word,count(*) cnt from (select unis_id,word from (select unis_id,words from (select default.unis_check(words) unis_ids,words from daxue_data_words where public_day>='2016-04-01' and default.unis_check(words)!='0') t LATERAL VIEW explode(split(unis_ids,',')) aa as unis_id) tt LATERAL VIEW explode(split(words,',')) bb as word) cc where length(word)>1 group by unis_id,word;
- insert overwrite local directory '/tmp/data2' row format delimited fields terminated by '\\t' select unis_id,site_cls,if(area_code is null,'',area_code),sentiment,count(*) from (select unis_id,site_cls,area_code,sentiment from (select default.unis_check(words) unis_ids,words,site_cls,area_code,sentiment from daxue_data_words where default.unis_check(words)!='0') t LATERAL VIEW explode(split(unis_ids,',')) aa as unis_id) bb group by unis_id,site_cls,area_code,sentiment;
- 操作json的sql:
- select concat_ws(',',regexp_replace(get_json_object(article_split,'$.titleSplit'),'\\\\[|\\\\]|\\\\"',''),regexp_replace(get_json_object(article_split,'$.contextSplit'),'\\\\[|\\\\]|\\\\"','')) from yuqing_new_hive limit 1;
- --该片段来自于http://www.codesnippet.cn/detail/0607201614787.html
来源: http://www.codesnippet.cn/detail/0607201614787.html