實時文本採集器

背景：類似tail命令，實時採集文本增加內容。下列實時採集zabbix的指標文件內容並輸出到Influxdb。

初版：

# coding=utf-8
import sys
import time
import requests
import json

class Tail():
        # def __init__(self,file_name,callback=sys.stdout.write):
        def __init__(self,file_name, influx_url, db_name, table_name):
                self.file_name = file_name
                self.influx_url = influx_url
                self.db_name = db_name
                self.table_name = table_name
                # self.callback = callback

        def follow(self,n=10):
                try:
                        with open(self.file_name) as f:
                                self._file = f
                                self._file.seek(0,2)
                                self.file_length = self._file.tell()
                                self.showLastLine(n)
                                while True:
                                        line = self._file.readline()
                                        if line:
                                                self.inser_db(line)
                                                print(line)
                                                # self.callback(line)
                                        time.sleep(1)
                except Exception as e:
                        print('打開文件失敗，囧，看看文件是不是不存在，或者權限有問題')
                        print(e)

        def showLastLine(self, n):
                len_line = 100
                read_len = len_line*n
                while True:
                        if read_len>self.file_length:
                                self._file.seek(0)
                                last_lines = self._file.read().split('\n')[-n:]
                                break
                        self._file.seek(-read_len, 2)
                        last_words = self._file.read(read_len)
                        count = last_words.count('\n')
                        if count>=n:
                                last_lines = last_words.split('\n')[-n:]
                                break
                        else:
                                if count==0:
                                        len_perline = read_len
                                else:
                                        len_perline = read_len/count
                                read_len = len_perline * n
                # self.inser_db(last_lines)
                print('last:\n')
                for line in last_lines:
                        print(line+'\n')
                print('end last.\n')
                        # self.callback(line+'\n')

        def inser_db(self, i):
                if i:
                        url1 = 'http://%s/write?db=%s' % (self.influx_url, self.db_name)
                        # data_str = ''
                        # for i in dataList:
                                # if 'host' in i.keys():
                        print(i)
                        i = json.loads(i)
                        data_str = '%s,host=%s,label=%s,name=%s value=%s %s000000000' % (self.table_name, str(i['host']['host']).replace(' ', '\ '), str(i['applications'][0]).replace(' ', '\ '), str(i['name']).replace(' ', '\ '), str(i['value']), str(i['clock'])) + '\n'

                        print(data_str)
                        rep = requests.post(url1, data=data_str)
                        print(rep)
                        print(rep.status_code)
                return ''
                # for i in dataList:
                #     data_str = ''


if __name__ == '__main__':
        py_tail = Tail('/data/zabbix_export_data/history-history-syncer-1.ndjson', '127.0.0.1:8086', 'test', 'test3')
        py_tail.follow()

上面基本實現了實時採集且推送數據功能。

# curl -i -XPOST 'http://127.0.0.1:8086/write?db=metrics' --data-binary 'test,host=127.0.0.1,monitor_name=test count=1'

但是！！

有個問題是，當文件切割或文件達到限定大小後將成備份文件而系統會產生新的同名文件這個就不能用了。

解決上面問題版本:

#!-*- coding: utf-8 -*-

################################################################################
#
# Copyright (c) 2015 XX.com, Inc. All Rights Reserved
#
################################################################################

################################################################################
# This module provide ...
# Third libs those depend on:
################################################################################

"""
Compiler Python 2.7.10
Authors: y
"""

"""SYS LIBS
"""


import os
import re
import sys
import time
import json
import requests

import traceback

"""THIRD LIBS
"""

try:
    # import the third libs there.
    pass
except ImportError as e:
    print e
    os._exit(-1)

"""CUSTOM libs
Strongly recommend using abs path when using custmo libs.
"""

# Good behaviors.
# It means refusing called like from xxx import *
# When `__all__` is []
__all__ = []

reload(sys)
sys.setdefaultencoding('utf-8')


def send_error(msg):
    """ Send error to email.
    """

    print msg


#********************************************************
#* Global defines start.                                *
#********************************************************

#********************************************************
#* Global defines end.                                  *
#********************************************************


class Tail(object):
    """
    Python-Tail - Unix tail follow implementation in Python.

    python-tail can be used to monitor changes to a file.

    Example:
        import tail

        # Create a tail instance
        t = tail.Tail('file-to-be-followed')

        # Register a callback function to be called when a new line is found in the followed file.
        # If no callback function is registerd, new lines would be printed to standard out.
        t.register_callback(callback_function)

        # Follow the file with 5 seconds as sleep time between iterations.
        # If sleep time is not provided 1 second is used as the default time.
        t.follow(s=5)
    """

    ''' Represents a tail command. '''
    def __init__(self, tailed_file):
        ''' Initiate a Tail instance.
            Check for file validity, assigns callback function to standard out.

            Arguments:
                tailed_file - File to be followed. '''
        self.check_file_validity(tailed_file)
        self.tailed_file = tailed_file
        self.callback = sys.stdout.write

        self.try_count = 0

        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the end of file
            self.file_.seek(0, 2)
        except:
            raise

    def reload_tailed_file(self):
        """ Reload tailed file when it be empty be `echo "" > tailed file`, or
            segmentated by logrotate.
        """
        try:
            self.file_ = open(self.tailed_file, "r")
            self.size = os.path.getsize(self.tailed_file)

            # Go to the head of file
            self.file_.seek(0, 1)

            return True
        except:
            return False



    def follow(self, s=0.01):
        """ Do a tail follow. If a callback function is registered it is called with every new line.
        Else printed to standard out.

        Arguments:
            s - Number of seconds to wait between each iteration; Defaults to 1. """

        while True:
            try:
                _size = os.path.getsize(self.tailed_file)
            except Exception as e:
                print('[ERROR] no such file or directory')
                time.sleep(1)
                continue
            if _size < self.size:
                while self.try_count < 10:
                    if not self.reload_tailed_file():
                        self.try_count += 1
                    else:
                        self.try_count = 0
                        self.size = os.path.getsize(self.tailed_file)
                        break
                    time.sleep(0.1)

                if self.try_count == 10:
                    raise Exception("Open %s failed after try 10 times" % self.tailed_file)
            else:
                self.size = _size

            curr_position = self.file_.tell()
            line = self.file_.readline()
            if not line:
                self.file_.seek(curr_position)
            elif not line.endswith("\n"):
                self.file_.seed(curr_position)
            else:
                self.callback(line)
            time.sleep(s)

    def register_callback(self, func):
        """ Overrides default callback function to provided function. """
        self.callback = func

    def check_file_validity(self, file_):
        """ Check whether the a given file exists, readable and is a file """
        if not os.access(file_, os.F_OK):
            raise TailError("File '%s' does not exist" % (file_))
        if not os.access(file_, os.R_OK):
            raise TailError("File '%s' not readable" % (file_))
        if os.path.isdir(file_):
            raise TailError("File '%s' is a directory" % (file_))


class TailError(Exception):
    """ Custom error type.
    """

    def __init__(self, msg):
        """ Init.
        """
        self.message = msg

    def __str__(self):
        """ str.
        """
        return self.message

if __name__ == '__main__':
    t = Tail("/data/zabbix_export_data/history-history-syncer-1.ndjson")
    def inser_db(insert_str):
        if insert_str:
            url1 = 'http://127.0.0.1:8086/write?db=test'
            # data_str = ''
            # for i in dataList:
                    # if 'host' in i.keys():
            # print(i)
            i = json.loads(insert_str)
            data_str = '%s,host=%s,label=%s,name=%s value=%s %s000000000' % ('test3', str(i['host']['host']).replace(' ', '\ '), str(i['applications'][0]).replace(' ', '\ '), str(i['name']).replace(' ', '\ '), str(i['value']), str(i['clock'])) + '\n'

            print(data_str)
            rep = requests.post(url1, data=data_str)
            print(rep)
            print(rep.status_code)
        return ''
    # def print_msg(msg):
    #     print msg

    t.register_callback(inser_db)

    t.follow()

""" vim: set ts=4 sw=4 sts=4 tw=100 et: """

就這樣哈~　　

Thanks all~

實時文本採集器

10分鐘搞定Mysql主從部署配置

如何使用 JS 判斷用戶是否處於活躍狀態

「Pygors跨平臺GUI」2：安裝MinGW-w64、MSYS2還是WSL2

[轉帖]

python列出centos7內存使用前50的進程信息

「Pygors跨平臺GUI」1：Pygors跨平臺GUI應用研究

一鍵自動化博客發佈工具,用過的人都說好(掘金篇)

lightdb數據庫超時相關控制參數

lightdb秒級增加列和刪除列（not null帶默認值）

Java ThreadPoolShutdown

遞歸處理複製變量目錄按原路徑複製到新目錄的腳本

fastapi後臺任務模塊<BackgroundTasks>源碼理解

django插入數據庫報錯 1366, "Incorrect string value: '\\xF0\\x9F\\x9A\\xA8

centos7安裝mysql 5.7

html橫向類時間軸的任務流寫法

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結