一、環境說明
192.168.5.65 master(redis、sentinel)
192.168.5.66 slave1(redis、sentinel)
192.168.5.71 slave2(redis、sentinel)
#hosts文件配置
cat >> /etc/hosts << EOF 192.168.5.65 redis65 192.168.5.66 redis65 192.168.5.71 redis71 EOF
二、修改系統參數
#修改最大可打開文件數
cat >> /etc/security/limits.conf << EOF * soft nofile 102400 * hard nofile 102400 EOF
#TCP監聽隊列大小
echo "net.core.somaxconn = 32767" >> /etc/sysctl.conf sysctl -p
#OOM相關:vm.overcommit_memory
echo "vm.overcommit_memory=1" >> /etc/sysctl.conf sysctl -p
#開啓內核的“Transparent Huge Pages (THP)”特性
echo never > /sys/kernel/mm/transparent_hugepage/enabled echo "echo never > /sys/kernel/mm/transparent_hugepage/enabled" >>/etc/rc.local chmod +x /etc/rc.local
三、安裝redis
[root@redis65 /]# cd /opt [root@redis65 opt]# wget http://download.redis.io/releases/redis-5.0.0.tar.gz [root@redis65 opt]# tar -zxvf redis-5.0.0.tar.gz [root@redis65 opt]# cd redis-5.0.0/ [root@redis65 redis-5.0.0]# make [root@redis65 redis-5.0.0]# make install PREFIX=/usr/local/redis
#創建實例目錄
[root@redis65 /]# mkdir -p /data/redis/redis_7001
##redis65 7001配置文件(master)
cat >> /data/redis/redis_7001/redis_7001.conf <<EOF bind 0.0.0.0 protected-mode no port 7001 timeout 0 tcp-keepalive 0 databases 16 daemonize yes loglevel notice requirepass zxc789 #redis主從 masterauth zxc789 slave-serve-stale-data yes slave-read-only yes repl-disable-tcp-nodelay no slave-priority 90 #rdb持久化 dir /data/redis/redis_7001 dbfilename dump.rdb #分別表示900秒(15分鐘)內有1個更改,300秒(5分鐘)內有10個更改以及60秒內有10000個更改 save 900 1 save 300 10 save 60 10000 stop-writes-on-bgsave-error yes rdbcompression yes rdbchecksum yes #AOF持久化 appendonly yes appendfilename "appendonly.aof" appendfsync everysec no-appendfsync-on-rewrite no auto-aof-rewrite-percentage 100 auto-aof-rewrite-min-size 1024mb aof-load-truncated yes aof-rewrite-incremental-fsync yes lua-time-limit 5000 #記錄慢查詢 slowlog-log-slower-than 10000 slowlog-max-len 128 notify-keyspace-events "" hash-max-ziplist-entries 512 hash-max-ziplist-value 64 list-max-ziplist-entries 512 list-max-ziplist-value 64 set-max-intset-entries 512 zset-max-ziplist-entries 128 zset-max-ziplist-value 64 activerehashing yes client-output-buffer-limit normal 0 0 0 client-output-buffer-limit slave 256mb 64mb 60 client-output-buffer-limit pubsub 32mb 8mb 60 hz 10 pidfile /data/redis/redis_7001/redis_7001.pid logfile /data/redis/redis_7001/redis_7001.log EOF
##redis66 7001配置文件(slave)
cat >> /data/redis/redis_7001/redis_7001.conf <<EOF bind 0.0.0.0 protected-mode no port 7001 daemonize yes timeout 0 tcp-keepalive 0 loglevel notice databases 16 requirepass zxc789 #連接redis master slaveof 192.168.5.65 7001 masterauth zxc789 slave-serve-stale-data yes slave-read-only yes repl-disable-tcp-nodelay no slave-priority 90 #rdb持久化 dir /data/redis/redis_7001 dbfilename dump.rdb ##分別表示900秒(15分鐘)內有1個更改,300秒(5分鐘)內有10個更改以及60秒內有10000個更改 save 900 1 save 300 10 save 60 10000 stop-writes-on-bgsave-error yes rdbcompression yes rdbchecksum yes #AOF持久化 appendonly yes appendfilename "appendonly.aof" appendfsync everysec no-appendfsync-on-rewrite no auto-aof-rewrite-percentage 100 auto-aof-rewrite-min-size 1024mb aof-load-truncated yes aof-rewrite-incremental-fsync yes lua-time-limit 5000 #記錄慢查詢 slowlog-log-slower-than 10000 slowlog-max-len 128 notify-keyspace-events "" hash-max-ziplist-entries 512 hash-max-ziplist-value 64 list-max-ziplist-entries 512 list-max-ziplist-value 64 set-max-intset-entries 512 zset-max-ziplist-entries 128 zset-max-ziplist-value 64 activerehashing yes client-output-buffer-limit normal 0 0 0 client-output-buffer-limit slave 256mb 64mb 60 client-output-buffer-limit pubsub 32mb 8mb 60 hz 10 pidfile /data/redis/redis_7001/redis_7001.pid logfile /data/redis/redis_7001/redis_7001.log EOF
##redis啓動文件
cat >> /etc/init.d/redis_7001 <<EOF # chkconfig: 2345 10 90 # description: Start and Stop redis PATH=/usr/local/bin:/sbin:/usr/bin:/bin REDISPORT=7001 EXEC=/usr/local/redis-cluster/bin/redis-server REDIS_CLI=/usr/local/redis-cluster/bin/redis-cli PIDFILE=/data/redis/redis_7001/redis_7001.pid CONF="/data/redis/redis_7001/redis_7001.conf" #AUTH="1234" case "$1" in start) if [ -f $PIDFILE ] then echo "$PIDFILE exists, process is already running or crashed." else echo "Starting Redis server..." $EXEC $CONF fi if [ "$?"="0" ] then echo "Redis is running..." fi ;; stop) if [ ! -f $PIDFILE ] then echo "$PIDFILE exists, process is not running." else PID=$(cat $PIDFILE) echo "Stopping..." $REDIS_CLI -p $REDISPORT -a 'zxc789' SHUTDOWN 2>/dev/null sleep 2 while [ -x $PIDFILE ] do echo "Waiting for Redis to shutdown..." sleep 1 done echo "Redis stopped" fi ;; restart|force-reload) ${0} stop ${0} start ;; *) echo "Usage: /etc/init.d/redis_7001 {start|stop|restart|force-reload}" >&2 exit 1 esac EOF
chmod +x /etc/init.d/redis_7001 /etc/init.d/redis_7001 start chkconfig redis_7001 on
# 在redis-cli -h 127.0.0.1 -a 'password'這裏會有一個問題,當在shell中輸入以上命令時,控制檯總會輸出一串“Warning: Using a password with '-a' option on the command line interface may not be safe.”
#解決辦法將標準錯誤去除即可,加了2>/dev/null,將標準錯誤丟棄即可,如:redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null
# 同步確認
[root@redis65 data]# redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.65:7001> info replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14919 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14919
[root@redis66 /]# redis-cli -h 192.168.5.66 -p 7001 -c -a 'zxc789' 2>/dev/null
192.168.5.66:7001> INFO replication # Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:9 master_sync_in_progress:0 slave_repl_offset:112 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:112 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:112
同期正常時:
master_link_status:up
master_repl_offset #和slave_repl_offset相等,
master_last_io_seconds_ago #在10秒內。
#shell腳本隨機插入數據測試
cat >> /data/11.sh <<EOF #!/bin/bash for ((i=0;i<100;i++)) do echo -en "cldjiowehjijcvjsopdjfjpjj" | redis-cli -p 7001 -a 'zxc789' -x set name$i 2>/dev/null done EOF
chmod +x /data/11.sh time sh /data/11.sh
# slave只讀不允許寫
192.168.5.66:7001> set yaya 465464654 (error) READONLY You can't write against a read only replica.
#停止slave後往master裏寫數據
cat >> /data/11.sh <<EOF #!/bin/bash for ((i=101;i<150;i++)) do echo -en "64we64f54f4qwe464cvw6e4" | redis-cli -p 7001 -a 'zxc789' -x set name$i 2>/dev/null done EOF
#master寫完數據再開啓slave,看slave是否有同步數據
192.168.5.65:7001> info keyspace # Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.65:7001> DBSIZE (integer) 149 192.168.5.65:7001> get name112 "64we64f54f4qwe464cvw6e4" 192.168.5.65:7001> INFO replication # Replication role:master connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14549 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14549 192.168.5.65:7001> INFO replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14563,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14563 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:1 repl_backlog_histlen:14563 192.168.5.66:7001> info keyspace # Keyspace db0:keys=149,expires=0,avg_ttl=0 192.168.5.66:7001> DBSIZE (integer) 149 192.168.5.66:7001> get name112 "64we64f54f4qwe464cvw6e4" 192.168.5.66:7001> INFO replication # Replication role:slave master_host:192.168.5.65 master_port:7001 master_link_status:up master_last_io_seconds_ago:1 master_sync_in_progress:0 slave_repl_offset:14633 slave_priority:100 slave_read_only:1 connected_slaves:0 master_replid:c8cbd4ae635da25193060cacaaebf8a696227476 master_replid2:0000000000000000000000000000000000000000 master_repl_offset:14633 second_repl_offset:-1 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:14550 repl_backlog_histlen:84
# 從上面可以看到數據已同步
三、redis哨兵模式
Redis哨兵爲Redis提供了高可用性。實際上這意味着你可以使用哨兵模式創建一個可以不用人爲干預而應對各種故障的Redis部署,哨兵模式還提供了其他的附加功能,如監控,通知,爲客戶端提供配置。
監控:哨兵不斷的檢查master和slave是否正常的運行。
通知:當監控的某臺Redis實例發生問題時,可以通過API通知系統管理員和其他的應用程序。
自動故障轉移:如果一個master不正常運行了,哨兵可以啓動一個故障轉移進程,將一個slave升級成爲master,其他的slave被重新配置使用新的master,並且應用程序使用Redis服務端通知的新地址。
配置提供者:哨兵作爲Redis客戶端發現的權威來源:客戶端連接到哨兵請求當前可靠的master的地址。如果發生故障,哨兵將報告新地址。
1、sentinel.conf配置文件(主從都一樣)
[root@redis65 data]# mkdir -p /data/redis/sentinel_27001 [root@redis65 data]# cat >> /data/redis/sentinel_27001/sentinel_27001.conf <<EOF
port 27001 #1表示在sentinel集羣中只要有兩個節點檢測到redis主節點出故障就進行切換 #如果3s內mymaster無響應,則認爲mymaster宕機了 #如果10秒後,mysater仍沒活過來,則啓動failover sentinel monitor mymaster 192.168.5.65 7001 2 sentinel down-after-milliseconds mymaster 3000 sentinel failover-timeout mymaster 10000 daemonize yes #指定工作目錄 dir "/data/redis/sentinel_27001/" protected-mode no logfile "/data/redis/sentinel_27001/sentinel_27001.log" #redis主節點密碼 sentinel auth-pass mymaster zxc789 # Generated by CONFIG REWRITE EOF
2、啓動所有的sentinel實例
[root@redis65 data]# redis-sentinel /data/redis/sentinel_27001/sentinel_27001.conf [root@redis65 data]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 17731/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 17731/redis-sentine
[root@redis66 redis_7001]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 15100/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 15100/redis-sentine
[root@db71 /]# netstat -tnlp|grep 27001 tcp 0 0 0.0.0.0:27001 0.0.0.0:* LISTEN 13831/redis-sentine tcp6 0 0 :::27001 :::* LISTEN 13831/redis-sentine
#sentinel監控日誌,通過日誌可以看到一個master、三個slave
15100:X 11 Jan 2019 16:33:19.813 * +slave slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:19.998 * +slave slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:20.042 * +slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:33:21.797 * +sentinel sentinel bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 192.168.5.65 27001 @ mymaster 192.168.5.65 7001 15100:X 11 Jan 2019 16:34:00.915 * +sentinel sentinel 8437221283819083fba5f2f07f9fb3ec98169af4 192.168.5.71 27001 @ mymaster 192.168.5.65 7001
3、sentinel常用命令
[root@db71 aa]# redis-cli -p 27001 127.0.0.1:27001> sentinel masters 1) 1) "name" 2) "mymaster" 3) "ip" 4) "192.168.5.65" 5) "port" 6) "7001" 7) "runid" 8) "29541866b0235fb9d8b11ca9e8e0a334cb547e47" 127.0.0.1:27001> sentinel slaves mymaster 1) 1) "name" 2) "192.168.5.71:7001" 3) "ip" 4) "192.168.5.71" 5) "port" 6) "7001" 2) 1) "name" 2) "192.168.5.65:7002" 3) "ip" 4) "192.168.5.65" 5) "port" 6) "7002" 7) "runid" 8) "a27281370ab282263e9e38766d3743112b52ae99" 9) "flags" 10) "slave" 3) 1) "name" 2) "192.168.5.66:7001" 3) "ip" 4) "192.168.5.66" 5) "port" 6) "7001" 7) "runid" 8) "496bc3511d0ecc4e8d4965d699f851f053c94f37" 9) "flags" 10) "slave"
sentinel masters 列出所有監視的主節點
sentinel slaves <master name> 獲取某主節點的所有從節點信息
sentinel get-master-addr-by-name <master> 根據主節點名稱來獲取其對應的ip地址
sentinel reset 清除所有操作狀態,包括故障轉移
sentinel failover <master name> 手動將主節點轉移到某節點
4、master宕機,故障轉移
[root@redis65 data]# redis-cli -h 192.168.5.65 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.65:7001> info replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.66,port=7001,state=online,offset=14919,lag=1 slave1:ip=192.168.5.65,port=7002,state=online,offset=14919,lag=1 slave2:ip=192.168.5.71,port=7001,state=online,offset=14919,lag=0
#關閉master
[root@redis65 /]# /etc/init.d/redis_7001 stop
#通過sentinel日誌查看sentinel選舉新的master的過程
cat /data/redis/sentinel_27001/sentinel_27001.log 17799:X 11 Jan 2019 22:51:51.383 # +sdown master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.383 # +odown master mymaster 192.168.5.65 7001 #quorum 1/1 17799:X 11 Jan 2019 22:51:51.383 # +new-epoch 1 17799:X 11 Jan 2019 22:51:51.383 # +try-failover master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.423 # +vote-for-leader bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 1 17799:X 11 Jan 2019 22:51:51.443 # 0d39de811fec35f16192a0143b4099822837b34b voted for 0d39de811fec35f16192a0143b4099822837b34b 1 17799:X 11 Jan 2019 22:51:51.462 # 8437221283819083fba5f2f07f9fb3ec98169af4 voted for bd0bf09a1dcb2cadce4bed1c19e2b0c766dbeee1 1 17799:X 11 Jan 2019 22:51:51.489 # +elected-leader master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.489 # +failover-state-select-slave master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.579 # +selected-slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.579 * +failover-state-send-slaveof-noone slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:51.650 * +failover-state-wait-promotion slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.701 # +promoted-slave slave 192.168.5.71:7001 192.168.5.71 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.701 # +failover-state-reconf-slaves master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:52.702 * +slave-reconf-sent slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.462 * +slave-reconf-inprog slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.462 * +slave-reconf-done slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:53.520 * +slave-reconf-sent slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.469 * +slave-reconf-inprog slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.469 * +slave-reconf-done slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.568 # +failover-end master mymaster 192.168.5.65 7001 17799:X 11 Jan 2019 22:51:54.568 # +switch-master mymaster 192.168.5.65 7001 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.65:7002 192.168.5.65 7002 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.66:7001 192.168.5.66 7001 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:54.568 * +slave slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001 17799:X 11 Jan 2019 22:51:57.648 # +sdown slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001
// 通過日誌可以看到選舉192.168.5.71 7001爲新和master,爲什麼是192.168.5.71 7001它爲新的master咧,那是因爲192.168.5.71 7001這個配置文件中replica-priority 參數設的最小
#查看新的主從信息
[root@db71 aa]# redis-cli -h 192.168.5.71 -p 7001 -c -a 'zxc789' 2>/dev/null 192.168.5.71:7001> info replication # Replication role:master connected_slaves:2 slave0:ip=192.168.5.65,port=7002,state=online,offset=3341911,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=3341911,lag=1 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:3341911 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:2293336 repl_backlog_histlen:1048576
#查看其它slave的配置文件,發現自動修改爲同步新和master
#192.168.5.71 7001配置文件中連接master的配置已刪除
#開啓原來的master 192.168.5.65 7001
[root@redis65 data]# /etc/init.d/redis_7001 start Starting Redis server... Redis is running...
#通過sentinel日誌查看原master的啓動變化(-sdown:說明恢復服務)
cat /data/redis/sentinel_27001/sentinel_27001.log 17799:X 12 Jan 2019 10:22:17.351 * +reboot slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001 17799:X 12 Jan 2019 10:22:17.402 # -sdown slave 192.168.5.65:7001 192.168.5.65 7001 @ mymaster 192.168.5.71 7001
#在新的master實例上看看原來的master是否變成slaveb
192.168.5.71:7001> info Replication # Replication role:master connected_slaves:3 slave0:ip=192.168.5.65,port=7002,state=online,offset=11868218,lag=0 slave1:ip=192.168.5.66,port=7001,state=online,offset=11868079,lag=1 slave2:ip=192.168.5.65,port=7001,state=online,offset=11868218,lag=0 master_replid:b9e65b745379e49560ae5647a2b35f59417e6451 master_replid2:c38f8eee4a6a7e71614cf4d0bf38add46a5f8cb0 master_repl_offset:11868218 second_repl_offset:3305650 repl_backlog_active:1 repl_backlog_size:1048576 repl_backlog_first_byte_offset:10819643 repl_backlog_histlen:1048576
// 可以看到原來的master變成slave了,原master 192.168.5.65:7001配置文件也發生了變化,自動添加了連接新master的命令
#如果當原master 192.168.5.65:7001重啓後,因爲配置文件或者網絡的原因,沒有變成新master下面的一個slave,客戶端仍然向裏面寫數據,一但恢復新的主從關係,這部份寫入的數據將會丟失,爲了避免這種情況出現,就要做以下的設置
min-slaves-to-write 1
min-slaves-max-lag 10
// 通過上面的配置,當一個redis是master時,如果它不能向至少一個slave寫數據(上面的min-slaves-to-write指定了slave的數量),它將會拒絕接受客戶端的寫請求。由於複製是異步的,master無法向slave寫數據意味着slave要麼斷開連接了,要麼不在指定時間內向master發送同步數據的請求了(上面的min-slaves-max-lag指定了這個時間)。