python多進程示例

下面這個示例的功能包含:

(1)實現python多進程;

(2)進程間共享變量company_queue (通過Manager().Queue()實現)

(3)每個進程最後輸出一個獨立的結果(保存路徑通過參數傳入);

(4)實現json數據整合成一個dataframe並輸出到文件(支持csv和excel兩種格式);

(5)部分函數的具體實現略去。

 

示例代碼:

#coding=utf-8
import json
import xlsxwriter
import pandas as pd
from multiprocessing import Pool, Process, Manager, Queue
    
def get_all_data(companyName):
	"""
	根據公司名取獲取相關數據
	"""
    try:
        all_result = []
    except Exception as e:
        print(str(e))
    return all_result

def get_all_company(file_path):
	"""
	加載所有的公司
	"""
    company_list = []
    return company_list

def get_all_dataframe(all_result, temp_dic):
    """
    將結果保存到csv或者xlsx文件中,默認csv文件
    """
    for key in temp_dic:
        temp_dic[key] = str(temp_dic[key])
    temp_dic = pd.DataFrame(temp_dic, index=[0]) 
    #print(temp_dic)
    if len(all_result) == 0:
        all_result = temp_dic
    else:
        all_result = all_result.append(temp_dic)
    return all_result

def Run(company_queue, output_path, mode):
	"""
	某個進程所執行的整體過程
	"""
    all_result = pd.DataFrame()
    while company_queue.empty() == False:
        companyName = company_queue.get()
        all_result = get_all_data(companyName)

    if mode == 0:
        file_path = output_path + '.csv'
        all_result.to_csv(file_path, index=False)
    else:
        file_path = output_path + '.xlsx'
        writer = pd.ExcelWriter(file_path, engine='xlsxwriter')
        all_result.to_excel(writer,'Sheet1', index=False)
        writer.save()

if __name__ == '__main__':
    #file_path = './data/all_test_company.txt'
    file_path = './company_file.txt'
    output_path = './result/result_'
    company_list = get_all_company(file_path)
   
    max_process = 10

    company_queue = Manager().Queue()
    for conpany_name in company_list:
        company_queue.put(conpany_name)

    pool = Pool(processes=max_process)
    for i in range(max_process):
        pool.apply_async(Run, args=(company_queue, output_path+str(i), 1, ))    
    
    pool.close() #執行完close後不會有新的進程加入到pool,join函數等待所有子進程結束
    pool.join() #調用join之前,先調用close函數,否則會出錯。
    
    print('finished!')

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章