1
2
3
4
5
6
7
8
9
10
11
|
# vmstat 3 輸出樣例: procs -----------memory---------- ---swap-- -----io---- --system-- -----cpu------ r b swpd free buff cache si so bi bo in cs us sy id wa st 0 0 0 2540988 522188 5130400 0 0 2 32 4 2 4 1 96 0 0 1 0 0 2540988 522188 5130400 0 0 0 720 1199 665 1 0 99 0 0 0 0 0 2540956 522188 5130400 0 0 0 0 1151 1569 4 1 95 0 0 0 0 0 2540956 522188 5130500 0 0 0 6 1117 439 1 0 99 0 0 0 0 0 2540940 522188 5130512 0 0 0 536 1189 932 1 0 98 0 0 0 0 0 2538444 522188 5130588 0 0 0 0 1187 1417 4 1 96 0 0 0 0 0 2490060 522188 5130640 0 0 0 18 1253 1123 5 1 94 0 0 |
1
2
|
# uptime 18:02:41 up 41 days, 23:42, 1 user, load average: 0.00, 0.00, 0.00 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
[root@testredis scripts] # cat performance.sh #!/bin/bash #監控cpu系統負載 IP=` ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` cpu_num=` grep -c 'model name' /proc/cpuinfo ` count_uptime=`uptime | wc -w` load_15=`uptime | awk '{print $' $count_uptime '}' ` average_load=` echo "scale=2;a=$load_15/$cpu_num;if(length(a)==scale(a)) print 0;print a" | bc ` average_int=` echo $average_load | cut -f 1 -d "." ` load_warn=0.70 if [ $average_int -gt 0 ] then echo "$IP服務器單個核心15分鐘的平均負載爲$average_load,超過警戒值1.0,請立即處理!!!$(date +%Y%m%d/%H:%M:%S)" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log echo "$IP服務器單個核心15分鐘的平均負載爲$average_load,超過警戒值1.0,請立即處理!!!$(date +%Y%m%d/%H:%M:%S)" | mail -s "$IP服務器系統負載嚴重告警" [email protected] else echo "$IP服務器單個核心15分鐘的平均負載值爲$average_load,負載正常 $(date +%Y%m%d/%H:%M:%S)" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log fi #監控cpu使用率 cpu_idle=` top -b -n 1 | grep Cpu | awk '{print $5}' | cut -f 1 -d "." ` if [ $cpu_idle -lt 20 ] then echo "$IP服務器cpu剩餘$cpu_idle%,使用率已經超過80%,請及時處理。" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log else echo "$IP服務器cpu剩餘$cpu_idle%,使用率正常" >> /usr/monitor/performance/performance_ $( date +%Y%m%d).log fi |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
[root@testredis scripts] # cat process.sh #!/bin/bash IP=` ifconfig eth0 | grep "inet addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` tomcat_dir= "/opt/apache-tomcat-7.0.8" mysql_dir= "/usr/local/mysql/bin/mysqld_safe" vsftp_dir= "/usr/sbin/vsftpd" ssh_dir= "/usr/sbin/sshd" for dir in $tomcat_dir $mysql_dir $vsftp_dir $ssh_dir do process_count=$( ps -ef | grep "$dir" | grep - v grep | wc -l) for service in tomcat mysql vsftp ssh do echo "$dir" | grep -q "$service" if [ $? - eq 0 ] then if [ $process_count - eq 0 ] then echo "$service is down at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/process/process_ $( date +%Y%m%d).log echo "$service is down at $(date +%Y%m%d%H:%M:%S)" | mail -s "$IP服務器 $service服務關閉告警" [email protected] else echo "$service is running at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/process/process_ $( date +%Y%m%d).log fi else continue fi done done |
1
2
3
4
5
6
7
8
9
10
11
12
|
#!/bin/bash # R1=` cat /sys/class/net/eth0/statistics/rx_bytes ` T1=` cat /sys/class/net/eth0/statistics/tx_bytes ` sleep 1 R2=` cat /sys/class/net/eth0/statistics/rx_bytes ` T2=` cat /sys/class/net/eth0/statistics/tx_bytes ` TBPS=` expr $T2 - $T1` RBPS=` expr $R2 - $R1` TKBPS=` expr $TBPS / 1024` RKBPS=` expr $RBPS / 1024` echo "上傳速率 eth0: $TKBPS kb/s 下載速率 eth0: $RKBPS kb/s at $(date +%Y%m%d%H:%M:%S)" >> /usr/monitor/network/network_ $( date +%Y%m%d).log |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
[root@testredis scripts] # cat tongji.sh #!/bin/bash TX=0; RX=0; MAX_TX=0; MAX_RX=0; while read line do a=` echo $line | grep "eth0" | awk '{print $3}' ` if [ $a - ge 0 ] then TX=$a if [ $TX - ge $MAX_TX ] then MAX_TX=$TX fi fi b=` echo $line | grep "eth0" | awk '{print $7}' ` if [ $b - ge 0 ] then RX=$b if [ $RX - ge $MAX_RX ] then MAX_RX=$RX fi fi done < /usr/monitor/network/network_ $( date +%Y%m%d).log echo "最高上傳速度爲 $MAX_TX kb/s at $(date +%Y%m%d)" >> /usr/monitor/network/tongji .log echo "最高下載速度爲 $MAX_RX kb/s at $(date +%Y%m%d)" >> /usr/monitor/network/tongji .log |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
[root@Test scripts] # cat sys-warning.sh #!/bin/bash #監控系統負載與CPU、內存、硬盤、登錄用戶數,超出警戒值則發郵件告警。 #提取本服務器的IP地址信息 IP=` ifconfig eth0 | grep "inter addr" | cut -f 2 -d ":" | cut -f 1 -d " " ` # 1、監控系統負載的變化情況,超出時發郵件告警: #抓取cpu的總核數 cpu_num=` cat /proc/cpuinfo | grep -c "model name" ` #抓取當前系統15分鐘的平均負載值 load_15=`uptime | awk '{print $12}' ` #計算當前系統單個核心15分鐘的平均負載值,結果小於1.0時前面個位數補0。 average_load=` echo "scale=2;a=$load_15/$cpu_num;if(length(a)==scale(a)) print 0;print a" | bc ` #取上面平均負載值的個位整數 average_int=` echo $average_load | cut -f 1 -d "." ` #設置系統單個核心15分鐘的平均負載的告警值爲0.70(即使用超過70%的時候告警)。 load_warn=0.70 #當單個核心15分鐘的平均負載值大於等於1.0(即個位整數大於0) ,直接發郵件告警;如果小於1.0則進行二次比較 if [ $average_int > 0 ]; then echo "$IP服務器單個核心15分鐘的系統平均負載爲$average_load,超過警戒值1.0,請立即處理." | mutt -s "$IP 服務器系統負載嚴重告警." [email protected] else #當前系統15分鐘平均負載值與告警值進行比較(當大於告警值0.70時會返回1,小於時會返回0) load_now=` expr $average_load \> $load_warn` #如果系統單個核心15分鐘的平均負載值大於告警值0.70(返回值爲1),則發郵件給管理員 if [ $load_now == 1 ]; then echo "$IP服務器單個核心15分鐘的系統平均負載爲$average_load,超過警戒值0.70,請及時處理." | mutt -s "$IP 服務器系統負載告警" [email protected] fi fi # 2、監控系統cpu的情況,當使用超過80%的時候發告警郵件: #取當前空閒cpu百份比值(只取整數部分) cpu_idle=` top -b -n 1 | grep Cpu | awk '{print $5}' | cut -f 1 -d "." ` #設置空閒cpu的告警值爲20%,如果當前cpu使用超過80%(即剩餘小於20%),立即發郵件告警 if (($cpu_idle < 20)); then fi # 3、監控系統交換分區swap的情況,當使用超過80%的時候發告警郵件: #系統分配的交換分區總量 swap_total=` free -m | grep Swap | awk '{print $2}' ` #當前剩餘的交換分區free大小 swap_free=` free -m | grep Swap | awk '{print $4}' ` #當前已使用的交換分區used大小 swap_used=` free -m | grep Swap | awk '{print $3}' ` if (($swap_used != 0)); then #如果交換分區已被使用,則計算當前剩餘交換分區free所佔總量的百分比,用小數來表示,要在小數點前面補一個整數位0 swap_per=0` echo "scale=2;$swap_free/$swap_total" | bc ` #設置交換分區的告警值爲20%(即使用超過80%的時候告警)。 swap_warn=0.20 #當前剩餘交換分區百分比與告警值進行比較(當大於告警值(即剩餘20%以上)時會返回1,小於(即剩餘不足20%)時會返回0 ) swap_now=` expr $swap_per \> $swap_warn` #如果當前交換分區使用超過80%(即剩餘小於20%,上面的返回值等於0),立即發郵件告警 if (($swap_now == 0)); then echo "$IP服務器swap交換分區只剩下 $swap_free M 未使用,剩餘不足20%,使用率已經超過80%,請及時處理。" | mutt -s "$IP 服務器內存告警" | [email protected] fi fi # 4、監控系統硬盤根分區使用的情況,當使用超過80%的時候發告警郵件: #取當前根分區(/dev/sda3)已用的百份比值(只取整數部分) disk_sda3=` df -h | grep /dev/sda3 | awk '{print $5}' | cut -f 1 -d "%" ` #設置空閒硬盤容量的告警值爲80%,如果當前硬盤使用超過80%,立即發郵件告警 if (($disk_sda3 > 80)); then fi #5、監控系統用戶登錄的情況,當用戶數超過3個的時候發告警郵件: #取當前用戶登錄數(只取數值部分) users =`uptime | awk '{print $6}' ` #設置登錄用戶數的告警值爲3個,如果當前用戶數超過3個,立即發郵件告警 if (($ users >= 3)); then fi |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
|
[root@Test scripts] # cat check_linux.sh #!/bin/bash os_check() { if [ -e /etc/redhat-release ]; then REDHAT=` cat /etc/redhat-release | cut -d ' ' -f1` else DEBIAN=` cat /etc/issue | cut -d ' ' -f1` fi if [ "$REDHAT" == "CentOS" -o "$REDHAT" == "Red" ]; then P_M=yum elif [ "$DEBIAN" == "Ubuntu" -o "$DEBIAN" == "ubutnu" ]; then P_M=apt-get else Operating system does not support. exit 1 fi } if [ $LOGNAME != root ]; then echo "Please use the root account operation." exit 1 fi if ! which vmstat &> /dev/null ; then echo "vmstat command not found, now the install." sleep 1 os_check $P_M install procps -y echo "-----------------------------------------------------------------------" fi if ! which iostat &> /dev/null ; then echo "iostat command not found, now the install." sleep 1 os_check $P_M install sysstat -y echo "-----------------------------------------------------------------------" fi while true ; do select input in cpu_load disk_load disk_use disk_inode mem_use tcp_status cpu_top10 mem_top10 traffic quit; do case $input in cpu_load) #CPU利用率與負載 echo "---------------------------------------" i=1 while [[ $i - le 3 ]]; do echo -e "\033[32m 參考值${i}\033[0m" UTIL=`vmstat | awk '{if(NR==3)print 100-$15"%"}' ` USER=`vmstat | awk '{if(NR==3)print $13"%"}' ` SYS=`vmstat | awk '{if(NR==3)print $14"%"}' ` IOWAIT=`vmstat | awk '{if(NR==3)print $16"%"}' ` echo "Util: $UTIL" echo "User use: $USER" echo "System use: $SYS" echo "I/O wait: $IOWAIT" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; disk_load) #硬盤I/O負載 echo "---------------------------------------" i=1 while [[ $i - le 3 ]]; do echo -e "\033[32m 參考值${i}\033[0m" UTIL=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$NF"%"}' ` READ=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$6"KB"}' ` WRITE=`iostat -x -k | awk '/^[v|s]/{OFS=": ";print $1,$7"KB"}' ` IOWAIT=`vmstat | awk '{if(NR==3)print $16"%"}' ` echo -e "Util:" echo -e "${UTIL}" echo -e "I/O Wait: $IOWAIT" echo -e "Read/s:\n$READ" echo -e "Write/s:\n$WRITE" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; disk_use) #硬盤利用率 DISK_LOG= /tmp/disk_use .tmp DISK_TOTAL=` fdisk -l | awk '/^Disk.*bytes/&&/\/dev/{printf $2" ";printf "%d",$3;print "GB"}' ` USE_RATE=` df -h | awk '/^\/dev/{print int($5)}' ` for i in $USE_RATE; do if [ $i -gt 90 ]; then PART=` df -h | awk '{if(int($5)==' '' $i '' ') print $6}' ` echo "$PART = ${i}%" >> $DISK_LOG fi done echo "---------------------------------------" echo -e "Disk total:\n${DISK_TOTAL}" if [ -f $DISK_LOG ]; then echo "---------------------------------------" cat $DISK_LOG echo "---------------------------------------" rm -f $DISK_LOG else echo "---------------------------------------" echo "Disk use rate no than 90% of the partition." echo "---------------------------------------" fi break ;; disk_inode) #硬盤inode利用率 INODE_LOG= /tmp/inode_use .tmp INODE_USE=` df -i | awk '/^\/dev/{print int($5)}' ` for i in $INODE_USE; do if [ $i -gt 90 ]; then PART=` df -h | awk '{if(int($5)==' '' $i '' ') print $6}' ` echo "$PART = ${i}%" >> $INODE_LOG fi done if [ -f $INODE_LOG ]; then echo "---------------------------------------" rm -f $INODE_LOG else echo "---------------------------------------" echo "Inode use rate no than 90% of the partition." echo "---------------------------------------" fi break ;; mem_use) #內存利用率 echo "---------------------------------------" MEM_TOTAL=` free -m | awk '{if(NR==2)printf "%.1f",$2/1024}END{print "G"}' ` USE=` free -m | awk '{if(NR==3) printf "%.1f",$3/1024}END{print "G"}' ` FREE=` free -m | awk '{if(NR==3) printf "%.1f",$4/1024}END{print "G"}' ` CACHE=` free -m | awk '{if(NR==2) printf "%.1f",($6+$7)/1024}END{print "G"}' ` echo -e "Total: $MEM_TOTAL" echo -e "Use: $USE" echo -e "Free: $FREE" echo -e "Cache: $CACHE" echo "---------------------------------------" break ;; tcp_status) #網絡連接狀態 echo "---------------------------------------" COUNT=` netstat -antp | awk '{status[$6]++}END{for(i in status) print i,status[i]}' ` echo -e "TCP connection status:\n$COUNT" echo "---------------------------------------" ;; cpu_top10) #佔用CPU高的前10個進程 echo "---------------------------------------" CPU_LOG= /tmp/cpu_top .tmp i=1 while [[ $i - le 3 ]]; do #ps aux |awk '{if($3>0.1)print "CPU: "$3"% -->",$11,$12,$13,$14,$15,$16,"(PID:"$2")" |"sort -k2 -nr |head -n 10"}' > $CPU_LOG ps aux | awk '{if($3>0.1){{printf "PID: "$2" CPU: "$3"% --> "}for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $i}}' | sort -k4 -nr | head -10 > $CPU_LOG #循環從11列(進程名)開始打印,如果i等於最後一行,就打印i的列並換行,否則就打印i的列 if [[ -n ` cat $CPU_LOG` ]]; then echo -e "\033[32m 參考值${i}\033[0m" cat $CPU_LOG > $CPU_LOG else echo "No process using the CPU." break fi i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; mem_top10) #佔用內存高的前10個進程 echo "---------------------------------------" MEM_LOG= /tmp/mem_top .tmp i=1 while [[ $i - le 3 ]]; do #ps aux |awk '{if($4>0.1)print "Memory: "$4"% -->",$11,$12,$13,$14,$15,$16,"(PID:"$2")" |"sort -k2 -nr |head -n 10"}' > $MEM_LOG ps aux | awk '{if($4>0.1){{printf "PID: "$2" Memory: "$3"% --> "}for(i=11;i<=NF;i++)if(i==NF)printf $i"\n";else printf $i}}' | sort -k4 -nr | head -10 > $MEM_LOG if [[ -n ` cat $MEM_LOG` ]]; then echo -e "\033[32m 參考值${i}\033[0m" cat $MEM_LOG > $MEM_LOG else echo "No process using the Memory." break fi i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; traffic) #查看網絡流量 while true ; do read -p "Please enter the network card name(eth[0-9] or em[0-9]): " eth #if [[ $eth =~ ^eth[0-9]$ ]] || [[ $eth =~ ^em[0-9]$ ]] && [[ `ifconfig |grep -c "\<$eth\>"` -eq 1 ]]; then if [ ` ifconfig | grep -c "\<$eth\>" ` - eq 1 ]; then break else echo "Input format error or Don't have the card name, please input again." fi done echo "---------------------------------------" echo -e " In ------ Out" i=1 while [[ $i - le 3 ]]; do #OLD_IN=`ifconfig $eth |awk '/RX bytes/{print $2}' |cut -d: -f2` #OLD_OUT=`ifconfig $eth |awk '/RX bytes/{print $6}' |cut -d: -f2` OLD_IN=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $4;else if(NR==5)print $6}' ` #CentOS6和CentOS7 ifconfig輸出進出流量信息位置不同,CentOS6中RX與TX行號等於8,CentOS7中RX行號是5,TX行號是5,所以就做了個判斷. OLD_OUT=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $9;else if(NR==7)print $6}' ` sleep 1 NEW_IN=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $4;else if(NR==5)print $6}' ` NEW_OUT=` ifconfig $eth | awk -F '[: ]+' '/bytes/{if(NR==8)print $9;else if(NR==7)print $6}' ` IN=` awk 'BEGIN{printf "%.1f\n",' $((${NEW_IN}-${OLD_IN})) '/1024/128}' ` OUT=` awk 'BEGIN{printf "%.1f\n",' $((${NEW_OUT}-${OLD_OUT})) '/1024/128}' ` echo "${IN}MB/s ${OUT}MB/s" i=$(($i+1)) sleep 1 done echo "---------------------------------------" break ;; quit) exit 0 ;; *) echo "---------------------------------------" echo "Please enter the number." echo "---------------------------------------" break ;; esac done done |