Corosync用來實現多個機器互相通訊(維持心跳)的,而pacemaker是在corosync上層來統一管理整個集羣的運行。Corosync是未來的發展方向。在以後的新項目裏,一般採用Corosync,而hb_gui可以提供很好的HA管理功能,可以實現圖形化的管理。另外相關的圖形化有RHCS的套件luci+ricci。
高可用羣集的一致性:硬件、軟件以及時間的一致性
ip地址規劃:
node1.a.com 192.168.101.10
node2.a.com 192.168.101.20
VIP 192.168.101.100
拓撲:
一:環境準備:
node1.a.com配置:
1:ip地址設置:
2:修改主機名:
[root@lyt ~]# vim /etc/sysconfig/network
[root@lyt ~]# init 6 #重啓使主機名生效
[root@node1 ~]# hostname #查看主機名
3:編輯dns緩存文件:
[root@node1 ~]# vim /etc/hosts
4:同步時間:
[root@node1 ~]# hwclock –s
5:編輯本地yum:
[root@node1 ~]# vim /etc/yum.repos.d/rhel-debuginfo.repo
node2.a.com配置:
1:ip地址配置
2:修改主機名:
[root@lyt ~]# vim /etc/sysconfig/network
[root@lyt ~]# init 6 #是主機名生效
[root@node2 ~]# hostname #查看主機名
3:同步時間:
[root@node2 ~]# hwclock –s
在node1.a.com和node2.a.com上設置無障礙通訊:
[root@node1 ~]# ssh-keygen -t rsa #使用rsa算法的得出一個鑰匙對
[root@node1 ~]# cd .ssh/
[root@node1 .ssh]# ssh-copy-id -i id_rsa.pub node2 #將公鑰拷貝到node2.a.com中,此處使用它的別名node2,他會自動尋找位置,不用指明存放位置
[root@node1 .ssh]# scp /etc/hosts node2:/etc/ #將dns緩存文件拷貝到node2中
[root@node2 ~]# ssh-keygen -t rsa #使用rsa算法算出一個鑰匙對
[root@node2 ~]# cd .ssh/
[root@node2 .ssh]# ssh-copy-id -i id_rsa.pub node1 #將公鑰拷貝到node1
[root@node2 .ssh]# scp node1:/etc/yum.repos.d/rhel-debuginfo.repo /etc/yum.repos.d/ #將node1上的yum文件拷貝到node2上
二:安裝相關軟件包:
node1上的配置:
[root@node1 ~]# ll
total 3376
-rw-r--r-- 1 root root 271360 Jul 15 23:25 cluster-glue-1.0.6-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 133254 Jul 15 23:25 cluster-glue-libs-1.0.6-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 170052 Jul 15 23:25 corosync-1.2.7-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 158502 Jul 15 23:25 corosynclib-1.2.7-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 165591 Jul 15 23:25 heartbeat-3.0.3-2.3.el5.i386.rpm
-rw-r--r-- 1 root root 289600 Jul 15 23:25 heartbeat-libs-3.0.3-2.3.el5.i386.rpm
-rw-r--r-- 1 root root 60458 Jul 15 23:25 libesmtp-1.0.4-5.el5.i386.rpm
-rw-r--r-- 1 root root 207085 Jul 15 23:25 openais-1.1.3-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 94614 Jul 15 23:25 openaislib-1.1.3-1.6.el5.i386.rpm
-rw-r--r-- 1 root root 796813 Jul 15 23:25 pacemaker-1.1.5-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 207925 Jul 15 23:25 pacemaker-cts-1.1.5-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 332026 Jul 15 23:25 pacemaker-libs-1.1.5-1.1.el5.i386.rpm
-rw-r--r-- 1 root root 32818 Jul 15 23:25 perl-TimeDate-1.16-5.el5.noarch.rpm
-rw-r--r-- 1 root root 388632 Jul 15 23:25 resource-agents-1.0.4-1.1.el5.i386.rpm
[root@node1 ~]# mkdir /mnt/cdrom
[root@node1 ~]# mount /dev/cdrom /mnt/cdrom/ #掛載本地光盤
[root@node1 ~]# yum localinstall *.rpm –y --nogpgcheck #安裝該目錄所有的rpm包
[root@node1 ~]# scp *.rpm node2:/root #將所有的rpm軟件包拷貝到node2的/root下
[root@node1 ~]# yum install httpd –y #安裝httpd服務器
[root@node1 ~]# echo "node1.a.com" >/var/www/html/index.html #編輯網頁
node2上的配置:
[root@node2 ~]# mkdir /mnt/cdrom
[root@node2 ~]# mount /dev/cdrom /mnt/cdrom/ #創建掛載點用於掛載本地光盤
[root@node2 ~]# yum localinstall *.rpm –y --nogpgcheck #使用localinstall用於解決不是系統自帶的rpm包的依賴光盤軟件包,它可以調用光盤上的軟件包
[root@node2 ~]# yum install httpd –y #安裝httpd服務器
[root@node2 ~]# echo "node2.a.com" >/var/www/html/index.html #編輯網頁內容
三:修改配置文件
node1.a.com配置
[root@node1 ~]# cd /etc/corosync/
[root@node1 corosync]# cp -p corosync.conf.example corosync.conf #生成corosync的配置文件
[root@node1 corosync]# vim corosync.conf #編輯該文件
1 # Please read the corosync.conf.5 manual page
2 compatibility: whitetank
3
4 totem {
5 version: 2 #版本號
6 secauth: off #是否代開安全認證
7 threads: 0 #多少個現成認證,0表示無限制
8 interface {
9 ringnumber: 0
10 bindnetaddr: 192.168.101.0 #通過哪個網絡地址進行通訊,可以給個網絡地址(給成192.168.2.0)
11 mcastaddr: 226.94.1.1
12 mcastport: 5405
13 }
14 }
15
16 logging {
17 fileline: off
18 to_stderr: no #是否發送標準出錯
19 to_logfile: yes #日誌
20 to_syslog: yes #系統日誌 (建議關掉一個),會降低性能
21 logfile: /var/log/cluster/corosync.log #(手動創建目錄)
22 debug: off
23 timestamp: on #日誌中是否記錄時間
24 logger_subsys {
25 subsys: AMF
26 debug: off
27 }
28 }
29
30 amf {
31 mode: disabled
32 }
33 service {
34 ver: 0
35 name: pacemaker #使用到了pacemaker
36 }
37 aisexec { #使用到openais的一些子選項
38 user: root
39 group: root
40 }
[root@node1 corosync]# corosync-keygen #產生認證文件
[root@node1 corosync]# scp -p authkey corosync.conf node2:/etc/corosync/ #將文件拷貝到node2節點(-p表示帶上文件屬性)
[root@node1 corosync]# mkdir /var/log/cluster #創建目錄cluster
[root@node1 corosync]# ssh node2 'mkdir /var/log/cluster' #在node1上位node2創建目錄cluster
[root@node1 corosync]# service corosync start #啓動corosync服務
[root@node1 corosync]# ssh node2 'service corosync start' #在node1上將node2上的corosync服務啓動
[root@node1 corosync]#grep -i -e "corosync cluster engine" -e "configuration file" /var/log/messages #驗證corosync引擎是否正常啓動了
Jul 15 13:24:50 lyt smartd[3205]: Opened configuration file /etc/smartd.conf
Jul 15 13:24:50 lyt smartd[3205]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 13:31:09 lyt smartd[3030]: Opened configuration file /etc/smartd.conf
Jul 15 13:31:09 lyt smartd[3030]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 20:04:48 lyt smartd[3060]: Opened configuration file /etc/smartd.conf
Jul 15 20:04:48 lyt smartd[3060]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 21:37:01 lyt smartd[3286]: Opened configuration file /etc/smartd.conf
Jul 15 21:37:01 lyt smartd[3286]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 22:25:02 lyt smartd[2994]: Opened configuration file /etc/smartd.conf
Jul 15 22:25:02 lyt smartd[2994]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 00:41:47 node1 smartd[3000]: Opened configuration file /etc/smartd.conf
Jul 16 00:41:47 node1 smartd[3000]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 01:08:14 node1 corosync[3190]: [MAIN ] Corosync Cluster Engine ('1.2.7'): started and ready to provide service.
Jul 16 01:08:14 node1 corosync[3190]: [MAIN ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
[root@node1 corosync]# grep -i totem /var/log/messages #查看初始化成員節點通知是否發出
Jul 16 01:08:14 node1 corosync[3190]: [TOTEM ] Initializing transport (UDP/IP).
Jul 16 01:08:14 node1 corosync[3190]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Jul 16 01:08:14 node1 corosync[3190]: [TOTEM ] The network interface is down.
Jul 16 01:08:15 node1 corosync[3190]: [TOTEM ] Process pause detected for 649 ms, flushing membership messages.
Jul 16 01:08:15 node1 corosync[3190]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jul 16 01:22:04 node1 corosync[3279]: [TOTEM ] Initializing transport (UDP/IP).
Jul 16 01:22:04 node1 corosync[3279]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Jul 16 01:22:04 node1 corosync[3279]: [TOTEM ] The network interface [192.168.101.10] is now up.
Jul 16 01:22:07 node1 corosync[3279]: [TOTEM ] Process pause detected for 536 ms, flushing membership messages.
Jul 16 01:22:07 node1 corosync[3279]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
[root@node1 corosync]# grep -i pcmk_startup /var/log/messages #檢查pacemaker時候已經啓動了
Jul 16 01:08:15 node1 corosync[3190]: [pcmk ] info: pcmk_startup: CRM: Initialized
Jul 16 01:08:15 node1 corosync[3190]: [pcmk ] Logging: Initialized pcmk_startup
Jul 16 01:08:15 node1 corosync[3190]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
Jul 16 01:08:15 node1 corosync[3190]: [pcmk ] info: pcmk_startup: Service: 9
Jul 16 01:08:15 node1 corosync[3190]: [pcmk ] info: pcmk_startup: Local hostname: node1.a.com
Jul 16 01:22:06 node1 corosync[3279]: [pcmk ] info: pcmk_startup: CRM: Initialized
Jul 16 01:22:06 node1 corosync[3279]: [pcmk ] Logging: Initialized pcmk_startup
Jul 16 01:22:06 node1 corosync[3279]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
Jul 16 01:22:06 node1 corosync[3279]: [pcmk ] info: pcmk_startup: Service: 9
Jul 16 01:22:06 node1 corosync[3279]: [pcmk ] info: pcmk_startup: Local hostname: node1.a.com
[root@node1 corosync]# grep -i error: /var/log/messages |grep -v unpack_resources #便面stonith的錯誤(此處有錯誤需要修改)
Jul 16 01:09:18 node1 pengine: [3200]: ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
Jul 16 01:09:18 node1 pengine: [3200]: ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
Jul 16 01:09:18 node1 pengine: [3200]: ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Jul 16 01:21:59 node1 pengine: [3200]: ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
Jul 16 01:21:59 node1 pengine: [3200]: ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
Jul 16 01:21:59 node1 pengine: [3200]: ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Jul 16 01:23:11 node1 pengine: [3289]: ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
Jul 16 01:23:11 node1 pengine: [3289]: ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
Jul 16 01:23:11 node1 pengine: [3289]: ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
在node2.a.com上查看:
[root@node2 ~]# grep -i -e "corosync cluster engine" -e "configuration file" /var/log/messages #驗證corosync引擎是否正常啓動了
Jul 15 13:24:50 lyt smartd[3205]: Opened configuration file /etc/smartd.conf
Jul 15 13:24:50 lyt smartd[3205]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 13:31:09 lyt smartd[3030]: Opened configuration file /etc/smartd.conf
Jul 15 13:31:09 lyt smartd[3030]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 20:04:48 lyt smartd[3060]: Opened configuration file /etc/smartd.conf
Jul 15 20:04:48 lyt smartd[3060]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 15 21:37:01 lyt smartd[3286]: Opened configuration file /etc/smartd.conf
Jul 15 21:37:01 lyt smartd[3286]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 01:07:12 lyt smartd[3361]: Opened configuration file /etc/smartd.conf
Jul 16 01:07:12 lyt smartd[3361]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 01:10:47 lyt smartd[3364]: Opened configuration file /etc/smartd.conf
Jul 16 01:10:47 lyt smartd[3364]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 03:26:50 node2 smartd[3033]: Opened configuration file /etc/smartd.conf
Jul 16 03:26:50 node2 smartd[3033]: Configuration file /etc/smartd.conf was parsed, found DEVICESCAN, scanning devices
Jul 16 03:53:49 node2 corosync[3267]: [MAIN ] Corosync Cluster Engine ('1.2.7'): started and ready to provide service.
Jul 16 03:53:49 node2 corosync[3267]: [MAIN ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
[root@node2 ~]# grep -i totem /var/log/messages #查看初始化成員節點通知是否發出
Jul 16 03:53:49 node2 corosync[3267]: [TOTEM ] Initializing transport (UDP/IP).
Jul 16 03:53:49 node2 corosync[3267]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Jul 16 03:53:49 node2 corosync[3267]: [TOTEM ] The network interface is down.
Jul 16 03:53:51 node2 corosync[3267]: [TOTEM ] Process pause detected for 744 ms, flushing membership messages.
Jul 16 03:53:51 node2 corosync[3267]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jul 16 04:06:48 node2 corosync[29324]: [TOTEM ] Initializing transport (UDP/IP).
Jul 16 04:06:48 node2 corosync[29324]: [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
Jul 16 04:06:48 node2 corosync[29324]: [TOTEM ] The network interface [192.168.101.20] is now up.
Jul 16 04:06:57 node2 corosync[29324]: [TOTEM ] Process pause detected for 2825 ms, flushing membership messages.
Jul 16 04:06:58 node2 corosync[29324]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
Jul 16 04:07:00 node2 corosync[29324]: [TOTEM ] A processor joined or left the membership and a new membership was formed.
[root@node2 ~]# grep -i pcmk_startup /var/log/messages #檢查pacemaker時候已經啓動了
Jul 16 03:53:49 node2 corosync[3267]: [pcmk ] info: pcmk_startup: CRM: Initialized
Jul 16 03:53:50 node2 corosync[3267]: [pcmk ] Logging: Initialized pcmk_startup
Jul 16 03:53:50 node2 corosync[3267]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
Jul 16 03:53:50 node2 corosync[3267]: [pcmk ] info: pcmk_startup: Service: 9
Jul 16 03:53:50 node2 corosync[3267]: [pcmk ] info: pcmk_startup: Local hostname: node2.a.com
Jul 16 04:06:50 node2 corosync[29324]: [pcmk ] info: pcmk_startup: CRM: Initialized
Jul 16 04:06:50 node2 corosync[29324]: [pcmk ] Logging: Initialized pcmk_startup
Jul 16 04:06:50 node2 corosync[29324]: [pcmk ] info: pcmk_startup: Maximum core file size is: 4294967295
Jul 16 04:06:51 node2 corosync[29324]: [pcmk ] info: pcmk_startup: Service: 9
Jul 16 04:06:51 node2 corosync[29324]: [pcmk ] info: pcmk_startup: Local hostname: node2.a.com
[root@node2 ~]# grep -i error: /var/log/messages |grep -v unpack_resources #便面stonith的錯誤(此處有錯誤需要修改)
Jul 16 03:54:53 node2 pengine: [3277]: ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
Jul 16 03:54:53 node2 pengine: [3277]: ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
Jul 16 03:54:53 node2 pengine: [3277]: ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Jul 16 04:06:45 node2 pengine: [3277]: ERROR: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
Jul 16 04:06:45 node2 pengine: [3277]: ERROR: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
Jul 16 04:06:45 node2 pengine: [3277]: ERROR: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
四:定義羣集資源:
node1.a.com的配置:
[root@node1 ~]# crm
crm(live)# configure
crm(live)configure# property stonith-enabled=false #在上一步驟中,stonith報錯,所以將stonith關閉
crm(live)configure# commit #提交
crm(live)configure# primitive webip ocf:heartbeat:IPaddr params ip=192.168.101.100 #資源名稱是webip,後邊的ip地址是VIP
crm(live)configure# commit #提交
crm(live)configure# primitive webserver lsb:httpd #定義資源名稱webserver,資源是httpd服務
crm(live)configure# commit #提交
crm(live)configure# group web webip webserver #定義組名web,包含了webip和webserver
crm(live)configure# show #查看定義的資源
crm(live)configure# commit #提交
crm(live)configure# end #結束
crm(live)# status #查看狀態
[root@node1 ~]# service httpd status 查看httpd的運行狀態
在node2.a.com上查看:
[root@node2 ~]# service httpd status 查看httpd的運行狀態
[root@node2 ~]# crm configure
crm(live)configure# property no-quorum-policy=ignore #再借點node2上關閉票數的功能
crm(live)configure# commit
測試:
模擬node1節點停止工作:
[root@node1 ~]# service corosync stop #關閉node1節點的corosync服務
在node2上查看:
[root@node2 ~]# service httpd status #查看httpd狀態
注:在node1.a.com上啓用corosync服務後,節點node1並不會將資源搶奪過來,這是爲了保證羣集的穩定性!!!