alter table ad_app.app_accounting_daily add columns (return_cost bigint) cascade;
数据倾斜
- set hive.map.aggr=true;
- set hive.groupby.skewindata=true;
- set mapreduce.job.queuename = root.data.adonline;
- set mapred.reduce.tasks=8192;
- set mapreduce.job.priority=VERY_HIGH;
- Set hive.groupby.mapaggr.checkinterval = 100000
- set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
- set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
- set hive.exec.compress.intermediate=true;
- set mapred.compress.map.output=true;
set mapred.max.split.size=256000000; -- 决定每个 map 处理的最大的文件大小, 单位为 B
set mapred.min.split.size.per.node=1; -- 节点中可以处理的最小的文件大小
set mapred.min.split.size.per.rack=1;
控制 map 数量
- set mapred.max.split.size=128000000;
- set mapred.min.split.size.per.node=64000000;
- set mapred.min.split.size.per.rack=64000000;
- set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
设置 reducer 最大个数
set hive.exec.reducers.max=8192;
控制 map reduce 内存
- set mapreduce.map.memory.mb=4096;
- set mapreduce.reduce.memory.mb=4096;
- set mapreduce.reduce.java.opts=-Xmx3800M;
- set mapred.child.map.java.opts=-Xmx3800M;
- set mapreduce.map.java.opts=-Xmx3800M;
- ALTER TABLE table_name ADD IF NOT EXISTS PARTITION (dt='20130101') LOCATION '/user/hadoop/warehouse/table_name/dt=20130101'; // 一次添加一个分区
来源: http://www.bubuko.com/infodetail-2536857.html