hive表
内部表:
hive进行管理(删除表即删除数据)
外部表:
用户进行管理(删除表不删除数据)
创建表的三种方式:
1.create [external] table [if not exists] [db_name.]table_name (col1_name col1_type [comment col1_comment],...)
[comment table_comment]
[partitioned by(col_name col_type [comment col_comment],...)]
[clustered by (col_name,col_name,...)[sorted by (col_name [asc|desc],...)] into num_buckets buckets] --桶
[row format row_format]
[stored as file_format]
[location hdfs_path];
2.create table ... AS select ...(会产生数据)
3.create table table_name like exist_tablename
row_format:
delimited fields terminated by '\001'
collection terminated by '\002'
map keys terminated by '\003'
lines terminated by '\004'
null defined as '\n';
file_format:
sequencefile,textfile(defalt),rcfile,orc,parquet,avro
示例
eg1:
1.create table test_manager(id int);
2.create external table test_external(id int);
3.create table test_location(id int) location '/test_location';
4.drop table test_external;
5.drop table test_manager;
6.drop table test_location;
删除表的时候,内部表不管是否指定location,均会删除文件夹,外部表一定不会
eg2:
1.create table customers(id int, name string, phone string) comment 'this is customers table' row format delimited fields terminated by ',' location '/customers';
2.create table customers2 like customers;
3.create table customers3 as select * from customers;(mapreduce操作)
eg3:
create table complex_table_test(id int,name string,flag boolean,score array<int>,tech map<string,string>,other struct<phone:string,email:string>)
row format delimited fields terminated by '\;'
collection items terminated by ','
map keys terminated by ':'
location 'hdfs://lvmama:8020/complex_table_test';
eg4:hive&hbase关联
create external table hive_users(key string, id int, name string,phone string)
row format serde 'org.apache.hadoop.hive.hbase.HBaseSerDe'
stored by 'org.apache.hadoop.hive.hbase.HBaseStrageHandler' with serdeproperties ('hbase.columns.mapping'=':key;f:id;f:name;f:phone')
tblproperties('hbase.table.name'='users');
查看表格式信息:
describe formatted hive_users;
查看建表信息:
show create table hive_users;
DDL:
describe,drop,truncate,alter,
导入数据:
1.load data [local] inpath 'filepath' [overwrite] into table table_name;
2.insert (overwrite|into) table tablename1 select_statement1 from from_statement where_statement
示例
eg1:
create database test_hbase;
use test_hbase;
set hive.cli.print.current.db=true;(设置打印当前数据库名称)
create table classes(classid int comment 'this is classid,not null', classname string comment 'this is class name') row format delimited fields terminated by ',';
create table students(studentid int comment 'this is student id, not null',classid int comment 'this is classid,can set to null',studentname string) row format delimited fields terminated by ',';
load data local inpath '/home/hadoop/datas/classes.txt' [overwrite] into table classes;
load data local inpath '/home/hadoop/datas/students.txt' [overwrite] into table students;
create table test1(id int);
create table test2(id int);
from students insert into table test1 select studentid insert overwrite table test2 select distinct classid where classid is not null;
select命令详解:
1.[with CommonTableExpression(,CommonTableExpression)*] --公用的table表达式
2.select [all|distinct]select_expr,select_expr,.. --返回的查询列表
3.from table_reference --from语句,一般可以放在with之后,select之前
4.[where where_condition] --where过滤条件
5.[group by colName(Asc|desc)] --分组条件
6.[order|sort by colName(Asc|desc)] --排序条件
7.[limit number] --limit条件
join命令:
内连接,外链接,半连接,笛卡尔连接
格式:
left_table_reference [join type] right table_ref [join_conditionn]* ([join type] right_table_ref [join_conditionn]*)*
导出数据:
insert overwrite [local](local&hdfs) directory directory1 [row format row_format] [stored as file_format] select ... from ...
eg1:
from (select classes.classname, students.studentname from classes join students on classes.classid = students.classid) as tmp insert overwrite local directory '/home/hadoop/datas/result/' select * from tmp insert overwrite directory '/test/result/01' row format delimited fields terminated by ',' select col1,col2;
执行sql文件并输出结果到文档:
hive --database test -f test.sql >> result.txt
-e "hql":在linux系统中执行hive语句
-f "filepath":执行linux中的包含hql的文件
HQL函数:
UDF:支持 一个输入 一个输出
继承类org.apache.hadoop.hive.ql.exec.UDF
重载evaluate方法
UDAF:支持 多个输入 一个输出
继承类org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver
UDTF:
show functions;
add jar linux_jar_path
create [temporary] function [dbname.]function_name as class_name;
drop [temporary] function [if exists] [dbname.]function_name;