股票數據分析(雷達圖)--python

csv數據下載連接:https://pan.baidu.com/s/1KTS5WzfH4z9Y4U4rIG-3Ig
在這裏插入圖片描述

代碼:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans # 引入機器學習


# 用來正常顯示中文
plt.rcParams['font.sans-serif'] = ['SimHei']
# 用來正常顯示符號
plt.rcParams['axes.unicode_minus'] = False

class GuPiaoData():

    def detedData(self,filePath): # 探索數據
        df = pd.read_csv(filePath)
        # print(df)
        describe = df.describe(include='all')
        # print(describe.T)
        df.to_excel('data/Gupiaodata01.xls')
        describe.T.to_excel('data/gupiaodata_describe.xls')
    def cleanData(self,filePath):
        '''
        清洗無效數據:空值行,不在合理的數據範圍的行
        :param filepath:
        :return:
        '''
        df = pd.read_excel(filePath)
        #過濾非法值
        filter1 = df['Turnover rate'].notnull()#轉手率
        filter2 = df['Range of Rise and Fall'].notnull() # 漲跌幅
        filter3 = df['Price-earning ratio'].notnull() #市盈率
        filter4 = df['Price-to-book ratio'].notnull()  # 市淨率
        filter5 = df['Marketing rate'].notnull()  # 市銷率
        filter6 = df['Realization rate'].notnull()  # 市現率
        filters = filter1 & filter2 & filter3 & filter4 & filter5 & filter6
        df=df[filters]
        df.to_excel('data/gupiaodata_clean.xls')
    def chooseData(self,filepath):
        '''
        從清洗後的數據中選取需要使用的列
        :param filepath: 清洗完成之後保存的數據文件路徑
        :return:
        '''
        df = pd.read_excel(filepath)
        df =df[['Turnover rate','Range of Rise and Fall','Price-earning ratio',
                'Price-to-book ratio','Marketing rate','Realization rate']]
        df.to_excel('data/Gupiaodata_coredata.xls')
    def transformData(self,filePath):
        #對數據進行轉換
        df = pd.read_excel(filePath)
        df['換手率'] = df['Turnover rate']
        df['漲跌率'] = df['Range of Rise and Fall']
        df['市盈率'] = df['Price-earning ratio']
        df['市淨率'] = df['Price-to-book ratio']
        df['市銷率'] = df['Marketing rate']
        df['市現率']  = df['Realization rate']

        df = df[['換手率','漲跌率','市盈率','市淨率','市銷率','市現率']]
        df.to_excel('data/Gupiaodata_coretransformdata.xls')
    def standarData(self,filepath):
        '''
        一般標準化的方式:(原數據-平均值)/標準差
        :param filepath:
        :return:
        '''
        df = pd.read_excel(filepath)
        df = (df - np.mean(df,axis=0))/np.std(df,axis=0)
        df[['換手率','漲跌率','市盈率','市淨率','市銷率','市現率']].to_excel('data/Gupiao_stdcoredata.xls')
        pass
    def classifyData(self,filepath,k =5):

        df = pd.read_excel(filepath)
        kmeans = KMeans(k)
        kmeans.fit(df[['換手率','漲跌率','市盈率','市淨率','市銷率','市現率']])
        print(kmeans.cluster_centers_)
        print(kmeans.labels_)
        # return kmeans.cluster_centers_()
        df['label'] = kmeans.labels_
        #df.to_excel('data/air_result.xls')
        coreData = pd.DataFrame(kmeans.cluster_centers_)
        #coreData.to_excel('data/air_core.xls')

        # 繪製雷達圖
        # 組織數據
        #構造x軸值
        xdata = np.linspace(0,2*np.pi,k,endpoint=False)
        xdata = np.concatenate((xdata, [xdata[0]]))

        ydata1 = np.concatenate((coreData[0],[coreData[0][0]]))
        ydata2= np.concatenate((coreData[1], [coreData[1][0]]))
        ydata3 = np.concatenate((coreData[2], [coreData[2][0]]))
        ydata4 = np.concatenate((coreData[3], [coreData[3][0]]))
        ydata5 = np.concatenate((coreData[4], [coreData[4][0]]))
        ydata6 = np.concatenate((coreData[5], [coreData[5][0]]))

        fig = plt.figure()
        ax = fig.add_subplot(111,polar=True)

        ax.plot(xdata,ydata1,  'r--', linewidth=1,label='換手率')
        ax.plot(xdata, ydata2, 'g--', linewidth=1, label='漲跌率')
        ax.plot(xdata, ydata3, 'b--', linewidth=1, label='市盈率')
        ax.plot(xdata, ydata4, 'o--', linewidth=1, label='市淨率')
        ax.plot(xdata, ydata5, 'y--', linewidth=1, label='市銷率')
        ax.plot(xdata, ydata6, 'b--', linewidth=1, label='市現率')

        ax.set_thetagrids(xdata*180/np.pi,['換手率','漲跌率','市盈率','市淨率','市銷率','市現率'])

        ax.set_rlim(-2,4)
        plt.legend(loc = 'best')
        plt.show()


if __name__ == '__main__':
    gupiao = GuPiaoData()
    # gupiao.detedData('szgupiaodata.csv')
    # gupiao.cleanData('data/Gupiaodata01.xls')
    # gupiao.chooseData('data/Gupiaodata_clean.xls')
    #gupiao.transformData('data/Gupiaodata_coredata.xls')
    # gupiao.standarData('data/Gupiaodata_coretransformdata.xls')
    gupiao.classifyData('data/Gupiao_stdcoredata.xls',k=6)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章