版本:
hive-0.13.1-cdh5.3.6
介绍:
数据仓库平台 将hadoop上的数据操作通SQL结合 类SQL语言HiveSQL 转换为相应的Mapreduce代码进行执行
用户接口(shell客户端 JDBC ODBC web接口) 元数据库(定义在hive中的表结构信息 默认derby 一般mysql) 解析器(HQL=>mapreduce) 数据仓库(hdfs组成的数据存储容器)
hive安装:
内嵌模式 本地模式 *远程模式
配置信息:http://archive.cloudera.com/cdh5/cdh/5/hive-0.13.1-cdh5.3.6/
地址:http://archive.cloudera.com/cdh5/cdh/5/
1.安装mysql数据库
安装完数据库(能正常使用)后:
1
2
3
4
5
6
| sudo vi /etc/my.cnf
[mysql]
default-character-set=utf8
[mysqld]
character-set-server=utf8
lower_case_table_names=1 //不区分大小写
|
创建hive元数据的mysql用户
1
2
3
4
5
6
7
8
| mysqladmin -u root password root
use mysql;
select user,host,password from user;
delete from user where password="";
create user 'hive' identified by 'hive';
create database hive;
grant all privileges on hive.* to 'hive'@'%' with grant option;
FLUSH PRIVILEGES;
|
使用新用户登录然后修改数据库字符编码
1
| alter database hive character set latin1;
|
2.hive本地模式安装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
| tar -xvf hive-0.13.1-cdh5.3.6.tar.gz -C /opt/softs/
sudo ln -s /opt/softs/hive-0.13.1-cdh5.3.6/ /opt/hive
sudo vi /etc/profile
export HIVE_HOME=/opt/hive
export PATH=$PATH:$HIVE_HOME/bin
cp mysql-connector-java-5.1.38.jar $HIVE_HOME/lib/
cp $HIVE_HOME/conf/hive-default.xml.template $HIVE_HOME/conf/hive-site.xml
vi $HIVE_HOME/conf/hive-site.xml
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>thrift://lvmama:9083</value>
<description>该参数指定了 Hive 的数据存储目录,默认位置在 HDFS 上面的 /user/hive/warehouse 路径下。</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://lvmama:3306/hive?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=utf8</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hive</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/data/hive/querylog</value> ${system:java.io.tmpdir}/${system:user.name}
<description>Location of Hive run time structured log file</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/data/hive/log</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
<name>hive.exec.local.scratchdir</name>
<value>/data/hive/tmp</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/data/hive/tmp/resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
</configuration>
|
运行:
1
2
3
4
| schematool -initSchema -dbType mysql
hive --service metastore &
hive # 进入hive-shell
create t(id int); # 后hdfs中会出现hive文件夹
|
hive web界面的安装:(没有hwi,待补全)
hive shell 命令:
1
2
3
4
5
6
7
8
9
10
11
| hive --service help # 查看有哪些服务
hive -H # 查看帮助信息
HQL:
TINTINT(1byte),SMALLINT(2byte),INT(4byte),BIGINT(5byte),FLOAT(4byte),DOUBLE(8byte),BOOLEAN(-),STRING(2G)
show describe(databases tables partition) explain(执行计划)
create (database|schema) [if not exists] database_name [comment database_comment] [location hdfs_path] [with dbproperties (property_name=value,name=value,...)]
eg.create database if not exists bigdata comment 'this is a test database';
describe databse|schema [extended] database_name
drop database|schema [if exists] database_name [restrict|cascade]
eg.drop database bigdata;
use database_name
|
开启服务:
1
| zkServer.sh start && start-all.sh && start-hbase.sh
|
停止服务:
1
| stop-hbase.sh && stop-all.sh && zkServer.sh stop
|
删除目录:
1
| rm -rf ~/hdfs/* ~/zookeeper/logs/* ~/zookeeper.out ~/zookeeper/data/* $HADOOP_HOME/logs/* /home/hadoop/hbase/tmp/* && hdfs namenode -format
|
hadoop日志:
1
| sz $HADOOP_HOME/logs/*.log
|
问题总结
问题
1
| ls: 无法访问/opt/spark/lib/spark-assembly-*.jar: 没有那个文件或目录
|
解决
1
| vim $HIVE_HOME/bin/hive 将 lib/spark-assembly-*.jar 替换成 jars/*.jar
|
问题
1
2
| Mkdirs failed to create file:/XXX/XXXX
at org.apache.hadoop.util.RunJar.ensureDirectory(RunJar.java:111)
|
解决2
1
| hive.exec.scratchdir该参数指定了 Hive 的数据临时文件目录,默认位置为 HDFS 上面的 /tmp/hive 路径下
|