1、先對即將安裝的服務進行規劃
Ip | host | 安裝軟件 |
進程 |
10.10.10.5 |
master | hadoop、zookeeper | NameNode |
DFSZKFailoverController | |||
JournalNode |
|||
DataNode | |||
ResourceManager | |||
jobHistoryServer | |||
NodeManager | |||
10.10.10.6 |
slave1 | hadoop、zookeeper | NameNode |
DFSZKFailoverController | |||
JournalNode | |||
dataNode | |||
ResourceManager | |||
NodeManager | |||
QuoruPeerMain | |||
10.10.10.6 | slave2 | hadoop、zookeeper | JournalNode |
DataNode | |||
NodeManager | |||
QuorumPeerMain |
環境準備
關閉防火牆
systemctl stop iptables.service
systemctl disable iptables.service
systemctl disable iptables.service
1、上傳安裝包 hadoop-2.6.0-cdh5.16.2.tar.gz zookeeper-3.4.5-cdh5.16.2.tar.gz 到 /opt/soft 目錄下
2、設置主機名
master:
hostname master
vi /etc/sysconfig/network
slave1:
hostname slave1
slave2:
hostname slave2
配置ip 和hostname 的映射關係
vim cat /etc/hosts
通過 將修改後的文件發到slave1 和slave2
scp /etc/hosts root@slave1:/etc/
scp /etc/hosts root@slave2:/etc/
我在三臺服務上配置了互信,因此可以直接發送,若不能直接發送,可百度看看互信怎麼配置
3、 配置jdk 環境 hadoop zookeeper
如圖我的jdk jar 包 解壓的文件在 /usr/local/jdk 、hadoop:/opt/soft2/hadoop zookeeper:/opt/soft2/zookeeper
4、修改zookeeper 配置
cd /opt/soft2/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
主要修改dataDir,zk 存放數據的路徑
mkdir /opt/soft2/zookeeper/zkData
使用 scp -r zookeeper slave1:/opt/soft2/
使用 scp -r zookeeper slave2:/opt/soft2/
將zookeeper 文件整個拷貝到其餘節點
在每個 節點data目錄中根據根據配置文件的
master中 echo 1 > /opt/soft2/zookeeper/zkData/myid
slave1中 echo 2 > /opt/soft2/zookeeper/zkData/myid
slave2中 echo 3 > /opt/soft2/zookeeper/zkData/myid
安裝hadoop
修改hadoop 的配置文件
cd /opt/soft2/hadoop/etc/hadoop
vim hadoop-env.sh
配置jdk 環境
配置hadoop的核心配置
vim core-site.xml
<configuration>
<!--Yarn 需要使用 fs.defaultFS 指定NameNode URI -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!--==============================Trash機制======================================= -->
<property>
<name>fs.trash.checkpoint.interval</name>
<value>0</value>
</property>
<property>
<!--多少分鐘.Trash下的CheckPoint目錄會被刪除,該配置服務器設置優先級大於客戶端,默認:0 不刪除 -->
<name>fs.trash.interval</name>
<value>10080</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/soft2/hadoop/data</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<!--指定ZooKeeper超時間隔,單位毫秒 -->
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>2000</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec
</value>
</property>
</configuration>
vim hdfs-site.xml 配置hdfs
<configuration>
<!--HDFS超級用戶 -->
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<!--開啓web hdfs -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/soft2/hadoop/data/dfsname</value>
<description> namenode 存放name table(fsimage)本地目錄(需要修改)</description>
</property>
<property>
<name>dfs.namenode.edits.dir</name>
<value>${dfs.namenode.name.dir}</value>
<description>namenode粗放 transaction file(edits)本地目錄(需要修改)</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/soft2/hadoop/data/dfsdata</value>
<description>datanode存放block本地目錄(需要修改)</description>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 塊大小128M (默認128M) -->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<!--======================================================================= -->
<!--HDFS高可用配置 -->
<!--指定hdfs的nameservice爲mycluster,需要和core-site.xml中的保持一致 -->
<property>
</property>
<!-- 塊大小128M (默認128M) -->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<!--======================================================================= -->
<!--HDFS高可用配置 -->
<!--指定hdfs的nameservice爲mycluster,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<!--設置NameNode IDs 此版本最大隻支持兩個NameNode -->
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<!-- Hdfs HA: dfs.namenode.rpc-address.[nameservice ID] rpc 通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>master:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>slave1:8020</value>
</property>
<!-- Hdfs HA: dfs.namenode.http-address.[nameservice ID] http 通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>master:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.ruozeclusterg10.nn2</name>
<value>ruozedata002:50070</value>
</property>
<!--==================Namenode editlog同步 ============================================ -->
<!--保證數據恢復 -->
<property>
<name>dfs.journalnode.http-address</name>
<value>0.0.0.0:8480</value>
</property>
<property>
<name>dfs.journalnode.rpc-address</name>
<value>0.0.0.0:8485</value>
</property>
<property>
<!--設置JournalNode服務器地址,QuorumJournalManager 用於存儲editlog -->
<!--格式:qjournal://<host1:port1>;<host2:port2>;<host3:port3>/<journalId> 端口同journalnode.rpc-address -->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master:8485;slave1:8485;slave2:8485/mycluster</value>
</property>
<property>
<!--JournalNode存放數據地址 -->
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/data/dfs/jn</value>
</property>
<!--==================DataNode editlog同步 ============================================ -->
<property>
<!--DataNode,Client連接Namenode識別選擇Active NameNode策略 -->
<!-- 配置失敗自動切換實現方式 -->
<name>dfs.client.failover.proxy.provider.ruozeclusterg10</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--==================Namenode fencing:=============================================== -->
<!--Failover後防止停掉的Namenode啓動,造成兩個服務 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<!--多少milliseconds 認爲fencing失敗 -->
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!--==================NameNode auto failover base ZKFC and Zookeeper====================== -->
<!--開啓基於Zookeeper -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--動態許可datanode連接namenode列表 -->
<property>
<name>dfs.hosts</name>
<value>/opt/soft2/hadoop/etc/hadoop/slaves</value>
</property>
</configuration>
修改mapred-site.xml
配置中不存在該配置
cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<configuration>
<!-- 配置 MapReduce Applications -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- JobHistory Server ============================================================== -->
<!-- 配置 MapReduce JobHistory Server 地址 ,默認端口10020 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- 配置 MapReduce JobHistory Server web ui 地址, 默認端口19888 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>slave1:19888</value>
</property>
<!-- 配置 Map段輸出的壓縮,snappy-->
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
</configuration>
vim slaves 將下列添加進去
master
slave1
slave2
vim yarn-env.sh
vim yarn-site.xml
<configuration>
<!-- nodemanager 配置 ================================================= -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:23344</value>
<description>Address where the localizer IPC is.</description>
</property>
<property>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:23999</value>
<description>NM Webapp address.</description>
</property>
<!-- HA 配置 =============================================================== -->
<!-- Resource Manager Configs -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 使嵌入式自動故障轉移。HA環境啓動,與 ZKRMStateStore 配合 處理fencing -->
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<!-- 集羣名稱,確保HA選舉時對應的集羣 -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!--這裏RM主備結點需要單獨指定,(可選)
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm2</value>
</property>
-->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<!-- ZKRMStateStore 配置 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<!-- Client訪問RM的RPC地址 (applications manager interface) -->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>master:23140</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>slave1:23140</value>
</property>
<!-- AM訪問RM的RPC地址(scheduler interface) -->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>master:23130</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>slave1:23130</value>
</property>
<!-- RM admin interface -->
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>master:23141</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>slave1:23141</value>
</property>
<!--NM訪問RM的RPC端口 -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>master:23125</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>slave1:23125</value>
</property>
<!-- RM web application 地址 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>slave1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>master:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>slave1:23189</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
<discription>單個任務可申請最少內存,默認1024MB</discription>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
<discription>單個任務可申請最大內存,默認8192MB</discription>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
</configuration>
啓動zk
三臺電腦均執行 : 啓動 zkServer.sh start 查看狀態 zkServer.sh status
啓動hadoop(hdfs+yarn)
1、三臺電腦均啓動日誌 JournalNode
hadoop-daemon.sh start journalnode
2、格式化hadoop
hadoop namenode -format
將生成的元數據發送到各個節點
[root@master hadoop]# scp -r data slave1:/opt/soft2/hadoop/
fsimage_0000000000000000000 100% 317 0.3KB/s 00:00
VERSION 100% 202 0.2KB/s 00:00
fsimage_0000000000000000000.md5 100% 62 0.1KB/s 00:00
seen_txid
3、初始化zkfc
hdfs zkfc -formatZK
4、啓動hdfs的分佈式文件系統
start-dfs.sh
5、啓動yarn
start-yarn.sh
關閉集羣
關閉yarn stop-yarn.sh
關閉hdfs stop-dfs.sh
關閉zookeeper: 所有的節點都執行 zkServer.sh stop
啓動集羣
1、啓動zookeeper 所有的節點都執行 zkServer.sh start 2、啓動hadoop start-dfs.sh start-yarn.sh 另一個備份節點 yarn-daemon.sh start resourcemanager
監控集羣: hdfs dfsadmin -report