實時文本採集器

背景: 類似tail命令,實時採集文本增加內容。下列實時採集zabbix的指標文件內容並輸出到Influxdb。

初版:

# coding=utf-8
import sys
import time
import requests
import json

class Tail():
        # def __init__(self,file_name,callback=sys.stdout.write):
        def __init__(self,file_name, influx_url, db_name, table_name):
                self.file_name = file_name
                self.influx_url = influx_url
                self.db_name = db_name
                self.table_name = table_name
                # self.callback = callback

        def follow(self,n=10):
                try:
                        with open(self.file_name) as f:
                                self._file = f
                                self._file.seek(0,2)
                                self.file_length = self._file.tell()
                                self.showLastLine(n)
                                while True:
                                        line = self._file.readline()
                                        if line:
                                                self.inser_db(line)
                                                print(line)
                                                # self.callback(line)
                                        time.sleep(1)
                except Exception as e:
                        print('打開文件失敗,囧,看看文件是不是不存在,或者權限有問題')
                        print(e)

        def showLastLine(self, n):
                len_line = 100
                read_len = len_line*n
                while True:
                        if read_len>self.file_length:
                                self._file.seek(0)
                                last_lines = self._file.read().split('\n')[-n:]
                                break
                        self._file.seek(-read_len, 2)
                        last_words = self._file.read(read_len)
                        count = last_words.count('\n')
                        if count>=n:
                                last_lines = last_words.split('\n')[-n:]
                                break
                        else:
                                if count==0:
                                        len_perline = read_len
                                else:
                                        len_perline = read_len/count
                                read_len = len_perline * n
                # self.inser_db(last_lines)
                print('last:\n')
                for line in last_lines:
                        print(line+'\n')
                print('end last.\n')
                        # self.callback(line+'\n')

        def inser_db(self, i):
                if i:
                        url1 = 'http://%s/write?db=%s' % (self.influx_url, self.db_name)
                        # data_str = ''
                        # for i in dataList:
                                # if 'host' in i.keys():
                        print(i)
                        i = json.loads(i)
                        data_str = '%s,host=%s,label=%s,name=%s value=%s %s000000000' % (self.table_name, str(i['host']['host']).replace(' ', '\ '), str(i['applications'][0]).replace(' ', '\ '), str(i['name']).replace(' ', '\ '), str(i['value']), str(i['clock'])) + '\n'

                        print(data_str)
                        rep = requests.post(url1, data=data_str)
                        print(rep)
                        print(rep.status_code)
                return ''
                # for i in dataList:
                #     data_str = ''


if __name__ == '__main__':
        py_tail = Tail('/data/zabbix_export_data/history-history-syncer-1.ndjson', '127.0.0.1:8086', 'test', 'test3')
        py_tail.follow()

 

上面基本實現了實時採集且推送數據功能。

# curl -i -XPOST 'http://127.0.0.1:8086/write?db=metrics' --data-binary 'test,host=127.0.0.1,monitor_name=test count=1'

但是!!

有個問題是,當文件切割或文件達到限定大小後將成備份文件而系統會產生新的同名文件這個就不能用了。

 

解決上面問題版本:

#!-*- coding: utf-8 -*-

################################################################################
#
# Copyright (c) 2015 XX.com, Inc. All Rights Reserved
#
################################################################################

################################################################################
# This module provide ...
# Third libs those depend on:
################################################################################

"""
Compiler Python 2.7.10
Authors: y
"""

"""SYS LIBS
"""


import os
import re
import sys
import time
import json
import requests

import traceback

"""THIRD LIBS
"""

try:
    # import the third libs there.
    pass
except ImportError as e:
    print e
    os._exit(-1)

"""CUSTOM libs
Strongly recommend using abs path when using custmo libs.
"""

# Good behaviors.
# It means refusing called like from xxx import *
# When `__all__` is []
__all__ = []

reload(sys)
sys.setdefaultencoding('utf-8')


def send_error(msg):
    """ Send error to email.
    """

    print msg


#********************************************************
#* Global defines start.                                *
#********************************************************

#********************************************************
#* Global defines end.                                  *
#********************************************************


class Tail(object):
    """
    Python-Tail - Unix tail follow implementation in Python.

    python-tail can be used to monitor changes to a file.

    Example:
        import tail

        # Create a tail instance
        t = tail.Tail('file-to-be-followed')

        # Register a callback function to be called when a new line is found in the followed file.
        # If no callback function is registerd, new lines would be printed to standard out.
        t.register_callback(callback_function)

        # Follow the file with 5 seconds as sleep time between iterations.
        # If sleep time is not provided 1 second is used as the default time.
        t.follow(s=5)
    """

    ''' Represents a tail command. '''
    def __init__(self, tailed_file):
        ''' Initiate a Tail instance.
            Check for file validity, assigns callback function to standard out.

            Arguments:
                tailed_file - File to be followed. '''
        self.check_file_validity(tailed_file)
        self.tailed_file = tailed_file
        self.callback = sys.stdout.write

        self.try_count = 0

        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the end of file
            self.file_.seek(0, 2)
        except:
            raise

    def reload_tailed_file(self):
        """ Reload tailed file when it be empty be `echo "" > tailed file`, or
            segmentated by logrotate.
        """
        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the head of file
            self.file_.seek(0, 1)

            return True
        except:
            return False



    def follow(self, s=0.01):
        """ Do a tail follow. If a callback function is registered it is called with every new line.
        Else printed to standard out.

        Arguments:
            s - Number of seconds to wait between each iteration; Defaults to 1. """

        while True:
            try:
                _size = os.path.getsize(self.tailed_file)
            except Exception as e:
                print('[ERROR] no such file or directory')
                time.sleep(1)
                continue
            if _size < self.size:
                while self.try_count < 10:
                    if not self.reload_tailed_file():
                        self.try_count += 1
                    else:
                        self.try_count = 0
                        self.size = os.path.getsize(self.tailed_file)
                        break
                    time.sleep(0.1)

                if self.try_count == 10:
                    raise Exception("Open %s failed after try 10 times" % self.tailed_file)
            else:
                self.size = _size

            curr_position = self.file_.tell()
            line = self.file_.readline()
            if not line:
                self.file_.seek(curr_position)
            elif not line.endswith("\n"):
                self.file_.seed(curr_position)
            else:
                self.callback(line)
            time.sleep(s)

    def register_callback(self, func):
        """ Overrides default callback function to provided function. """
        self.callback = func

    def check_file_validity(self, file_):
        """ Check whether the a given file exists, readable and is a file """
        if not os.access(file_, os.F_OK):
            raise TailError("File '%s' does not exist" % (file_))
        if not os.access(file_, os.R_OK):
            raise TailError("File '%s' not readable" % (file_))
        if os.path.isdir(file_):
            raise TailError("File '%s' is a directory" % (file_))


class TailError(Exception):
    """ Custom error type.
    """

    def __init__(self, msg):
        """ Init.
        """
        self.message = msg

    def __str__(self):
        """ str.
        """
        return self.message

if __name__ == '__main__':
    t = Tail("/data/zabbix_export_data/history-history-syncer-1.ndjson")
    def inser_db(insert_str):
        if insert_str:
            url1 = 'http://127.0.0.1:8086/write?db=test'
            # data_str = ''
            # for i in dataList:
                    # if 'host' in i.keys():
            # print(i)
            i = json.loads(insert_str)
            data_str = '%s,host=%s,label=%s,name=%s value=%s %s000000000' % ('test3', str(i['host']['host']).replace(' ', '\ '), str(i['applications'][0]).replace(' ', '\ '), str(i['name']).replace(' ', '\ '), str(i['value']), str(i['clock'])) + '\n'

            print(data_str)
            rep = requests.post(url1, data=data_str)
            print(rep)
            print(rep.status_code)
        return ''
    # def print_msg(msg):
    #     print msg

    t.register_callback(inser_db)

    t.follow()

""" vim: set ts=4 sw=4 sts=4 tw=100 et: """

  

就這樣哈~  

 

 

Thanks all~

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章