Easticsearch 數據遷移至influxdb python
需求:將Easticsearch部分數據遷移至influxdb中。
見過從mysql,influxdb遷移至Easticsearch中的,沒見過從Easticsearch遷移至influxdb中,遷移的數據是一些實時性的流量數據,influxdb時序性數據庫對這類數據的支撐比較客觀。
解決方案:大批量從Easticsearch取數據,兩種方案。1.from...size 2.scroll (類似於數據庫的遊標) 腳本採用第二種scroll方案對Easticsearch 查詢取數據。循環通過scrool_id進行查詢並寫入influxdb中。
#!/usr/bin/env python #coding=utf-8 import sys import json import datetime import elasticsearch from influxdb import InfluxDBClient #連接Easticsearch class ES(object): @classmethod def connect_host(cls): url = "http://192.168.121.33:9202/" es = elasticsearch.Elasticsearch(url,timeout=120) return es es = ES.connect_host() #連接influxdb client = InfluxDBClient(host="192.168.121.33", port="8086", username='admin', password='admin', database='esl') client.create_database('esl') #DSL查詢語法 data = { "query": { "match_all" : {}}, "size": 100 } # 設置要過濾返回的字段值,要什麼字段。 'hits.hits._source.resource_id', 'hits.hits._source.timestamp', 'hits.hits._source.counter_volume', 'hits.hits._source.@timestamp', ] # 指定search_type="scan"模式,並返回_scroll_id給es.scroll獲取數據使用 res = es.search( index='pipefilter_meters*', doc_type ='canaledge.flow.bytes', body=data, search_type="scan", scroll="10m" ) scroll_id = res['_scroll_id'] response= es.scroll(scroll_id=scroll_id, scroll= "10m",filter_path=return_fields,) scroll_id = response['_scroll_id'] #獲取第二次scroll_id hits = response['hits']['hits'] in_data = [] while len(hits) > 0: for i in hits: res_id = i['_source']['resource_id'] r_id, r_type = res_id.split(':') datas = { "measurement": "es_net", "tags": { "resource_id": r_id, "type": r_type }, "time": i['_source']['timestamp'], "fields": { "counter_volume": i['_source']['counter_volume'] } } in_data.append(datas) #循環寫入influxdb client.write_points(in_data) in_data = [] #每次循環完重新定義列表爲空 data = { "query": { "match_all" : {}}, "size": 100 } ## 設置要過濾返回的字段值,要什麼字段。 '_scroll_id', 'hits.hits._source.resource_id', 'hits.hits._source.timestamp', 'hits.hits._source.counter_volume', 'hits.hits._source.@timestamp', ] ## 指定search_type="scan"模式,並返回_scroll_id給es.scroll獲取數據使用 response= es.scroll(scroll_id=scroll_id, scroll= "10m",filter_path=return_fields,) #調試 #if not response.get('hits'): # print response # sys.exit(1) #else: hits = response['hits']['hits'] scroll_id = response["_scroll_id"] #獲取第三次scroll_id