需求
1.jenkins在應用發版過程中的java進程重啓,會導致監控系統報警,此類發版過程報警運維人員可忽略;
2.jenkins在應用在發版過程中的java進程重啓,nginx代理會進行摘節點操作,保證服務對外無影響;
3.zabbix監控系統需求對nginx的摘節點行爲進行監控(發版過程中的摘節點不進行報警);
4.對nginx摘節點行爲進行excel記錄,記錄time,upstream,name
爲保證監控系統檢查到切實有效的報警信息,我們需要對監控進行優化。
思路
1.通過nginx的後端節點狀態頁面,獲取當前nginx中有哪些後端節點;
2.調用jenkins api獲取jenkins當前處於構建狀態的列表,若running列表中沒有對應的構建任務,則不是發版導致摘節點,則報警並記錄到excel中;若有,則說明是jenkins發版導致的摘節點,不進行報警。
實現
1.python 腳本
#!/usr/bin/bin python
#-*- coding: UTF-8 -*-
'''
auth:yanggd
date:2019-03-25
comment:
1.結合zabbix監控內網nginx的節點狀態,若爲down則報警
2.生成摘節點報表,記錄time,upstream,name
3.安裝python-jenkins,requests,xlwt,xlrd,xlutils
4.使用jenkins api排查發版過程中的摘節點
'''
import jenkins
import json
import requests
import datetime
#寫excel
import xlwt
#讀excel
import xlrd
import os
import sys
from xlutils.copy import copy
#連接jenkins api
def getJenkins():
jenkins_url = 'http://jenkins.test.cn'
username = 'admin'
password = '12345678'
server = jenkins.Jenkins(jenkins_url, username, password)
return server
#新建xls並添加數據
def writeExcel(down_list, file):
header = ['time', 'upstream', 'name']
workbook = xlwt.Workbook()
worksheet = workbook.add_sheet('nginx_upstream_status')
#寫表頭
i = 0
for each_header in header:
worksheet.write(0, i, each_header)
i += 1
#寫內容
row = 1
for each_row in down_list:
col = 0
for each_col in header:
worksheet.write(row, col, each_row[each_col])
col += 1
row += 1
workbook.save(file)
#追加xls數據
def writeExcel_add(down_list, file):
header = ['time', 'upstream', 'name']
workbook = xlrd.open_workbook(file)
worksheet = workbook.sheet_by_name('nginx_upstream_status')
rows_old = worksheet.nrows
new_workbook = copy(workbook)
new_worksheet = new_workbook.get_sheet(0)
row = 0
for each_row in down_list:
col = 0
for each_col in header:
new_worksheet.write(row+rows_old, col, each_row[each_col])
col += 1
row += 1
new_workbook.save(file)
#自動發現
def nodeDiscovery():
node_list = []
#nignx upstream 狀態監控頁,注意修改此url,一共有兩處
upstream_url = 'http://192.168.3.141/upstream_status?format=json'
res = requests.get(upstream_url)
for node in res.json()['servers']['server']:
node_dic = {}
node_dic['{#UPSTREAM}'] = node['upstream']
node_dic['{#NAME}'] = node['name']
#node_dic['{#STATUS}'] = node['status']
node_list.append(node_dic)
node_info = {"data":node_list}
print json.dumps(node_info,sort_keys=True, indent=4, separators=(', ', ': '))
#節點狀態
def nodeStatus(upstream, name):
#報表名
mulu = '/App/zabbix/etc/zabbix_agentd.conf.d'
xls = 'upstream_status.xls'
#新建字典,用於映射nginx upstream和jenkins 項目的映射關係
upstream_dic = {
"test1" : "prod-test1",
"test2" : "prod-test2",
}
#空字典,用於存儲down節點信息
down_dic = {}
#空列表,用於存儲多個down節點
down_list = []
#獲取時間
time = datetime.datetime.now()
#nignx upstream 狀態監控頁
upstream_url = 'http://192.168.3.141/upstream_status?format=json'
res = requests.get(upstream_url)
num = 0
for node in res.json()['servers']['server']:
if node['name'] == name and node['status'] == 'down':
#檢查jenkins是否正在發版
server = getJenkins()
#獲取jenkins處於構建狀態的列表
running_list = server.get_running_builds()
#判斷running_list是否爲空
count = 0
if len(running_list):
print name + "test"
for item in running_list:
if upstream_dic[upstream] == item['name']:
count += 1
#jenkins running列表中沒有對應的構建任務,則不是發版導致摘節點
if count == 0:
#節點down,打印1
print 1
#添加down節點信息
down_dic['time'] = str(time)
down_dic['upstream'] = upstream
down_dic['name'] = name
#添加down節點
down_list.append(down_dic)
file = os.path.join(mulu, xls)
#若文件不存在則創建文件,若存在則追加內容
if os.path.exists(file):
writeExcel_add(down_list, file)
else:
writeExcel(down_list, file)
#for down_item in down_list:
# print down_item['time'] + down_item['upstream'] + down_item['name']
else:
#節點down,打印1
print 1
#添加down節點信息
down_dic['time'] = str(time)
down_dic['upstream'] = upstream
down_dic['name'] = name
#添加down節點
down_list.append(down_dic)
file = os.path.join(mulu, xls)
#若文件不存在則創建文件,若存在則追加內容
if os.path.exists(file):
writeExcel_add(down_list, file)
else:
writeExcel(down_list, file)
#for down_item in down_list:
# print down_item['time'] + down_item['upstream'] + down_item['name']
num += 1
if num == 0:
#節點沒有down,打印2
print 2
if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: python " + sys.argv[0] + ' [nodeDiscovery|nodeStatus]'
else:
if sys.argv[1] == 'nodeDiscovery':
nodeDiscovery()
elif sys.argv[1] == 'nodeStatus':
if len(sys.argv) != 4:
print "Usage: python " + sys.argv[0] + ' nodeStatus upstream name status'
else:
upstream = sys.argv[2]
name = sys.argv[3]
nodeStatus(upstream, name)
else:
print "Usage: python " + sys.argv[0] + ' ' + '[nodeDiscovery|nodeStatus]'
2.zabbix監控設置
#nginx upstream status監控
UserParameter=upstream.discovery,python /App/zabbix/etc/zabbix_agentd.conf.d/upstream_status.py nodeDiscovery
UserParameter=upstream.status[*],python /App/zabbix/etc/zabbix_agentd.conf.d/upstream_status.py nodeStatus '$1' '$2'