前言
女朋友想考中傳,幫女朋友看中傳官網那個成績統計的是個啥啊… …全糊在一坨都不分專業錄取,還是我郵好,各個專業每年的錄取統計的命明明白白的。然後看到一個可能是錄取了的小哥的github吧,分析了下學生考號組成,我算是搞懂了怎麼給他分專業分析錄取了,用python代碼實現了下。
小哥的github:參考了部分代碼
首先解析這一坨的全部成績
import re
import os
import pandas as pd
def selcet(rule, read_path, write_path):
'''
rule: 匹配規則(正則)
read_path: 輸入文件路徑
write_path: 輸出文件路徑
'''
with open(read_path, encoding="gbk") as file:
data = pd.read_csv(file)
cnt = 0
for i in range(len(data)):
# 取到15位準考證號
number = data.iloc[[i], 0].to_string()[-15:]
if re.match(rule, number):
cnt += 1 # 計數
df = data.iloc[[i], :]
# 打印到控制檯
# print(df)
# 輸出到csv文件,index要設置爲False,否則會多一列索引值
df.to_csv(write_path, mode="a", index=False,
header=None, encoding="gbk")
print("一共找到了{}個數據".format(cnt))
if __name__ == "__main__":
# 實例:10033 0001 16 1 005
# 原理:學校號 地區號 學院號 方向 流水
front_rule = "^10033[0-9]{4}"
last_rule = "[0-9]{4}$"
for college_number in range(20):
new_rule = front_rule + str(college_number).rjust(2,'0') + last_rule
# 輸入,輸出文件目錄
read_path = os.getcwd() + r'\data\完整成績.csv'
write_path = os.getcwd() + r'\data\outdata' + str(college_number).rjust(2,'0') + '.csv'
# 根據需要改三個參數
selcet(new_rule, read_path, write_path)
這裏小改了他的代碼,讀取了一個完整成績的csv然後將20個專業方向都分別輸出到對應的csv裏面了:
裏面長這樣:
然後用py畫成散點圖分析平均分和中位數
import csv
import numpy as np
import matplotlib.pyplot as plt
import os
def myPlot(number,score,label,picname):
savepath = picname + label + ".png"
x = range(number)
p = plt.scatter(x, score, marker='*', color='r', s=30)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
average = np.mean(score)
median = np.median(score)
plt.hlines(average, 0, number, 'r', '--', label='average:' + str(round(average,2)))
plt.hlines(median, 0, number, 'b', '--', label='median:' + str(round(median,2)))
plt.legend(loc='lower right')
plt.title(label)
plt.xticks(x)
my_y_ticks = np.arange(min(score), max(score) + (max(score) - min(score))/10, (max(score) - min(score))/10 )
plt.yticks(my_y_ticks)
plt.savefig(savepath)
plt.close()
#plt.show() # show不得.......
base_dir = r'.\data\outdata'
dataForm = r'.csv'
for collegeNumber in range(20):
currentCSV = base_dir + str(collegeNumber).rjust(2,'0') + dataForm
print("當前處理" + currentCSV)
with open(currentCSV) as f:
render = csv.reader(f) # reader(迭代器對象)--> 迭代器對象
lesson1,lesson2,lesson3,lesson4,score = [],[],[],[],[]
for row in render:
lesson1.append(int(row[2]))
lesson2.append(int(row[3]))
lesson3.append(int(row[4]))
lesson4.append(int(row[5]))
score.append(int(row[6]))
save_path = ".\data\outdata" + str(collegeNumber).rjust(2,'0') + "\\"
if not os.path.exists(save_path):
os.mkdir(save_path)
myPlot(len(score), score,'總成績',save_path)
myPlot(len(lesson1), lesson1, '政治', save_path)
myPlot(len(lesson2), lesson2, '英語', save_path)
myPlot(len(lesson3), lesson3, '科一', save_path)
myPlot(len(lesson4), lesson4, '科二', save_path)
效果如下(以學院16爲例子):
完整解析的數據下載
內含各個專業錄取人數和分數以及上述數據處理圖片:傳送門