1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
| -- 创建表
create table [if not exists] student(
name string,
age int,
score int)
row format delimited fileds terminated by '\t';
-- 创建子表——从表中提取出所需要的字段
create table if not exists db_hive.emp_cats
as
select name, age, from student;
-- 清除表数据,但不删除表
truncate table dept_cats;
-- 建立表结构相同的表
create table if not exists student_like
like
test.student;
-- 修改表的名称
alter table dept_like rename to dept_like_rename;
-- 删除表(好像会卡住
drop table if exists dept_like_rename;
-- 创建外部表
-- 外部表在删除时不会删掉内置的数据
create external table if not exists db_hive.emp_external(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
row format delimited fields terminated by '\t'
location '/user/root/hive/warehouse/emp_external';
-- 建立二分区表
set hive.exec.dynamic.partition.mode=nonstrict; # 允许动态分区
-- 静态分区
create external table if not exists db_hive.emp_partition2(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
PARTITIONED BY (month string, day string)
row format delimited fields terminated by '\t'
location '/user/root/hive/warehouse/emp_external';
load data local inpath '/root/data_emp/emp.txt' into table db_hive.emp_partition2
partition (month='201810', day='21');
SELECT * FROM emp_partition2 where month='201810' and day='21';
-- 动态分区(分区的字段必须在后面)
FROM raw_access_log
INSERT OVERWRITE TABLE
partitioned_raw_access_log
PARTITION(year,month,day)
SELECT ip.....year,month,day
-- 混合模式
FROM raw_access_log
INSERT OVERWRITE TABLE
partitioned_raw_access_log
PARTITION(year="2007",month,day)
SELECT ip.....month,day
-- 分桶模式
FROM table_test
CLUSTERED BY (column_name,...) INTO 8 BUCKETS//与分区不同的是列名必须是已经存在的列名
-- 将数据导入分桶表
-- 首先设置reduce数量和Buckets数量一直,即最终决定产生多少个文件
-- hive.enforce.sorting和hive.enforce.bucketing设置为true,免了dirtribute by & sort by
SET hive.enforce.bucketing=true;
SET mapred.reduce.tasks=200;
FROM table_fa
INSERT OVERWRITE TABLE table_son
SELECT ...,request_date
WHERE ...
[DISTRIBUTE BY ...][SORT BY ...]
-- 修复分区
MSCK REPAIR TABLE table_name;
|