nagios安裝
apt-get install nagios3
#Nagios web administration password
#123123
默認安裝apache2 監聽80端口
訪問 http://10.10.100.54/nagios3/
默認管理員是nagiosadmin 密碼自己設置的
密碼文件/etc/nagios3/htpasswd.users
修改管理員密碼
htpasswd -c /etc/nagios3/htpasswd.users admin
New password:
123123
Re-type new password:
123123
設置管理員權限
vim /etc/nagios3/cgi.cfg
use_authentication=1
authorized_for_system_information=admin
authorized_for_configuration_information=admin
authorized_for_system_commands=admin
authorized_for_all_services=admin
authorized_for_all_hosts=admin
authorized_for_all_service_commands=admin
authorized_for_all_host_commands=admin
手動執行任務
vim /etc/nagios3/nagios.cfg
check_external_commands=1 #允許手動這頁面上執行任務 **System>Scheduling Queue**
錯誤
#有可能報錯
Error: Could not stat() command file '/var/lib/nagios3/rw/nagios.cmd'!
sudo /etc/init.d/nagios3 stop
sudo dpkg-statoverride --update --add nagios www-data 2710 /var/lib/nagios3/rw
sudo dpkg-statoverride --update --add nagios nagios 751 /var/lib/nagios3
sudo /etc/init.d/nagios3 start
重啓服務就可以看到頁面的Current Status>Hosts已經默認監控本機
添加需要監控的主機
vim /etc/nagios3/conf.d/hosts.cfg #默認沒這文件
define host {
use generic-host
host_name lvs #主機名稱
alias lvs #主機別名不設置默認爲host_name
address 10.10.100.100 #需要監控主機的IP
check_interval 1#檢查的間隔 1分鐘
}
定義主機組
vim /etc/nagios3/conf.d/hostgroups_nagios2.cfg
# Some generic hostgroup definitions
# A simple wildcard hostgroup
define hostgroup {
hostgroup_name all
alias All Servers
members * #所有的主機的組
}
# A list of your Debian GNU/Linux servers
define hostgroup {
hostgroup_name debian-servers
alias Debian GNU/Linux Servers
members localhost
}
# A list of your web servers
define hostgroup {
hostgroup_name http-servers
alias HTTP servers
members localhost,lvs #指定組的主機,按,分割,可以添加多個,
}
# A list of your ssh-accessible servers
define hostgroup {
hostgroup_name ssh-servers
alias SSH servers
members localhost
}
#添加ftp監控
define hostgroup {
hostgroup_name ftp-servers
alias FTP Servers
members lvs
}
定義服務項
vim /etc/nagios3/conf.d/services_nagios2.cfg
# check that web services are running
define service {
hostgroup_name http-servers
service_description HTTP
check_command check_http
use generic-service
notification_interval 0 ; set > 0 if you want to be renotified
}
# check that ssh services are running
define service {
hostgroup_name ssh-servers
service_description SSH
check_command check_ssh
use generic-service
notification_interval 0 ; set > 0 if you want to be renotified
}
# check that ssh services are running
define service {
hostgroup_name ftp-servers #這必須在hostgroups_nagios2.cfg有這個主機組
service_description FTP
check_command check_ftp ;檢查FTP,插件默認路徑爲/usr/lib/nagios/plugins/
use generic-service
notification_interval 1 ; 通知間隔 0爲不通知
}
}
NRPE
我要監控遠程主機的 CPU、硬盤空間、內存等等
Nagios 提供了一個外掛插件,叫 NRPE
它可以讓 nagios server 在固定時間去抓 nagios client 被監控的項目回來判斷是否ok。 和zabbix agent功能類似
遠端
#這需要監控的機器上安裝
apt-get install nagios-nrpe-server
vim /etc/nagios/nrpe.cfg
......
allowed_hosts=10.10.100.54 #Nagios服務器端的地址
......
#添加監控項
#當前登錄的用戶大於1警告(warning),大於2危急(critical)
command[check_users]=/usr/lib/nagios/plugins/check_users -w 1 -c 2
#同load average
#當1分鐘多於15個進程等待,5分鐘多於10個,15分鐘多於5個則爲警告狀態
#當1分鐘多於30個進程等待,5分鐘多於25個,15分鐘多於20個則爲危急狀態
command[check_load]=/usr/lib/nagios/plugins/check_load -w 15,10,5 -c 30,25,20
#如果空閒空間小於40%就是警告閥值
#如果空閒空間小於10%就是危急閥值
#-p分區
command[check_hda1]=/usr/lib/nagios/plugins/check_disk -w 40% -c 10% -p /dev/sda1
#檢查進程
command[check_zombie_procs]=/usr/lib/nagios/plugins/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/lib/nagios/plugins/check_procs -w 150 -c 200
......
#重啓服務
service nagios-nrpe-server restart
監控端
apt-get install nagios-nrpe-plugin
#檢查通訊是否正常
/usr/lib/nagios/plugins/check_nrpe -H 10.10.100.100
NRPE v2.15
vim /etc/nagios-plugins/config/check_nrpe.cfg
.......
#添加
define service {
use generic-service
hostgroup_name all
service_description NRPE check_hda1 #和監控端的 command[check_hda1]對應
check_command check_nrpe_1arg!check_hda1
notification_interval 0
}
define service {
use generic-service
hostgroup_name all
service_description NRPE check_total_procs
check_command check_nrpe_1arg!check_total_procs
notification_interval 0
}
define service {
use generic-service
hostgroup_name all
service_description NRPE check_users
check_command check_nrpe_1arg!check_users
notification_interval 0
}
重啓服務後就可以看到效果
通知
定義聯繫人
vim /etc/nagios3/conf.d/contacts_nagios2.cfg
define contact{
contact_name hu #聯繫人稱呼
service_notification_period 24x7 #當服務出現異常時,發送通知的時間段,這個時間段"7x24"在timeperiods_nagios2.cfg文件中定義
host_notification_period 24x7 #當主機出現異常時,發送通知的時間段,這個時間段"7x24"在timeperiods_nagios2.cfg文件中定義
service_notification_options w,u,c,r #這個定義的是"通知可以被髮出的情況"。w(warn)表示警告狀態,u(unknown)表示不明狀態,c(criticle)表示緊急狀態,r(recover)表示恢復狀態。也就是在服務出現警告狀態、未知狀態、緊急狀態和重新恢復狀態時都發送通知給使用者。
host_notification_options d,r #定義主機在什麼狀態下需要發送通知給使用者,d(down)表示宕機狀態,r(recovery)表示重新恢復狀態。
service_notification_commands notify-service-by-email #服務故障時,發送通知的方式,可以是郵件和短信,這裏發送的方式是郵件,在commands.cfg文件中定義
host_notification_commands notify-host-by-email #主機故障時,發送通知的方式,可以是郵件和短信,這裏發送的方式是郵件,在commands.cfg文件中定義
email xxx@xxx.com #發送郵件的郵箱
}
定義聯繫人組
define contactgroup{
contactgroup_name hus
members hu #多個人用,分割
}
定義服務器異常的聯繫人
vim /etc/nagios-plugins/config/check_nrpe.cfg
define service {
use generic-service
hostgroup_name all
service_description NRPE check_hda1
check_command check_nrpe_1arg!check_hda1
contact_groups hus #如果有異常通知hus組的人
}
添加發送郵件
vim /etc/nagios3/commands.cfg
#測試使用sendEmail發送QQ郵件
define command{
command_name notify-host-by-email #contact裏面定義的host_notification_commands
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主機: $HOSTALIAS$ is $HOSTSTATE$ **" -xu 9656951@qq.com -xp xxxxxx
}
define command{
command_name notify-service-by-email #contact裏面定義的service_notification_commands
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$" |sendEmail -f 9656951@qq.com -t $CONTACTEMAIL$ -s smtp.qq.com -u "** 主機: $HOSTALIAS$ 服務: $SERVICEDESC$ is $SERVICESTATE$ **" -xu 9656951@qq.com -xp xxx
}