2020美賽建模D題(mysql+SQLAlchemy+pytorch)數據處理方面總結

mysql數據庫,sqlalchemyORM

用的pyplot作圖
pytorch建模

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column,Float, Integer, String,or_
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import random
class dataAnalyzer(object):
    Base = declarative_base()
    class Matches(Base):
        __tablename__ = 'matches'  # 表名
        MatchID = Column(String, primary_key=True)
        OpponentID = Column(String(20))
        Outcome = Column(String(10))
        OwnScore = Column(Integer)
        OpponentScore = Column(Integer)
        Side = Column(String(10))
        CoachID = Column(String(10))
    class Fullevents(Base):
        __tablename__ = 'fullevents'  # 表名
        MatchID = Column(String)
        TeamID = Column(String)
        OriginPlayerID = Column(String)
        DestinationPlayerID = Column(String)
        MatchPeriod = Column(String)
        EventTime = Column(Float,primary_key=True)
        EventType = Column(String)
        EventSubType = Column(String)
        EventOrigin_x = Column(Float)
        EventOrigin_y = Column(Float)
        EventDestination_x = Column(Float)
        EventDestination_y = Column(Float)
    class Passingevents(Base):
        __tablename__ = 'passingevents'  # 表名
        MatchID = Column(String)
        TeamID = Column(String)
        OriginPlayerID = Column(String)
        DestinationPlayerID = Column(String)
        MatchPeriod = Column(String)
        EventTime = Column(Float,primary_key=True)
        EventSubType = Column(String)
        EventOrigin_x = Column(Float)
        EventOrigin_y = Column(Float)
        EventDestination_x = Column(Float)
        EventDestination_y = Column(Float)
    def __init__(self):
        engine = create_engine("mysql+mysqlconnector://root:000422@localhost:3306/passdata", max_overflow=5)
        self.session = Session(engine)
        self.Base.metadata.create_all(engine)

    def get_TheAllPassMap(self,passtype="Simple pass",Period="1H",match_id=1):
        rows = self.session.query(self.Passingevents).filter(self.Passingevents.TeamID=="Huskies",
                                                             self.Passingevents.MatchID==match_id,
                                                             self.Passingevents.EventSubType==passtype,
                                                             self.Passingevents.MatchPeriod==Period).all()
        pos_x = []
        pos_y = []
        str = 'The picture of the {passtype} of Huskies in the {Period} of the match {match_id}'.format(passtype=passtype,Period=Period,match_id=match_id)
        plt.title(str)
        plt.xlabel('row')
        plt.ylabel('column')
        for row in rows:
            pos_x.append(row.EventOrigin_x)
            pos_x.append(row.EventDestination_x)
            pos_y.append(row.EventOrigin_y)
            pos_y.append(row.EventDestination_y)
            plt.plot(pos_x, pos_y, 'r--')
            pos_x.clear()
            pos_y.clear()
        plt.legend()
        plt.show()

    def get_homewin_rate(self):
        homewinnum = self.session.query(self.Matches).filter(self.Matches.Outcome=="win",self.Matches.Side=="home").count()
        print(homewinnum/19)

    def get_awaywin_rate(self):
        awaywinnum = self.session.query(self.Matches).filter(self.Matches.Side=="away",self.Matches.Outcome=="win").count()
        print(awaywinnum/19)

    def get_attackpointMap(self,Player_type="F"):
        rows = self.session.query(self.Fullevents).filter(self.Fullevents.TeamID == "Huskies",
                                                          or_(self.Fullevents.EventType=="Free Kick",
                                                              self.Fullevents.EventType=="Shot")).all()
        freepos_x = []
        freepos_y = []

        shot_x = []
        shot_y = []

        shot_x_F = []
        shot_y_F = []
        for row in rows:
            if(row.EventType=="Free Kick"):
                freepos_x.append(row.EventOrigin_x)
                freepos_y.append(row.EventOrigin_y)
            else:
                if(row.OriginPlayerID[-2]=="F"):
                    shot_x_F.append(row.EventOrigin_x)
                    shot_y_F.append(row.EventOrigin_y)
                else:
                    shot_x.append(row.EventOrigin_x)
                    shot_y.append(row.EventOrigin_y)

        # 自由球顏色
        # 射門顏色 紅
        # 按角色位置區分,f紅色,g綠色

        str = 'The picture of the shot position of Huskies '
        plt.title(str)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.scatter(shot_x,shot_y, c="red", label="shot", alpha=0.6, edgecolors='white')
        plt.scatter(freepos_x,freepos_y, c="blue", label="free kick", alpha=0.6, edgecolors='white')
        plt.scatter(shot_x_F, shot_y_F, c="green", label="Forward player", alpha=0.6, edgecolors='white')
        plt.legend()
        plt.grid(True)
        plt.show()

    def get_pass_F(self,passtype="Simple pass",Period="1H",match_id=1):
        rows = self.session.query(self.Passingevents).filter(self.Passingevents.TeamID == "Huskies",
                                                             self.Passingevents.MatchID == match_id,
                                                             self.Passingevents.EventSubType == passtype,
                                                             self.Passingevents.MatchPeriod == Period).all()
        pos_x = []
        pos_y = []
        pos_F_x =[]
        pos_F_y =[]
        str = 'The picture of the fowardplayer when {passtype}ing in the {Period} of the match {match_id}'.format(passtype=passtype,
                                                                                                        Period=Period,
                                                                                                        match_id=match_id)
        plt.title(str)
        plt.xlabel('x')
        plt.ylabel('y')
        for row in rows:
            pos_x.append(row.EventOrigin_x)
            pos_x.append(row.EventDestination_x)
            pos_y.append(row.EventOrigin_y)
            pos_y.append(row.EventDestination_y)
            if(row.DestinationPlayerID[-2]=="F"):
                pos_F_x.append(row.EventDestination_x)
                pos_F_y.append(row.EventDestination_y)
            plt.plot(pos_x, pos_y, 'r--')
            pos_x.clear()
            pos_y.clear()
        plt.scatter(pos_F_x,pos_F_y,c="black", label="shot", alpha=1.0, edgecolors='white')
        plt.legend()
        plt.show()

    def get_shotAnalyze(self):
        rows = self.session.query(self.Fullevents).filter(self.Fullevents.TeamID == "Huskies",
                                                          self.Fullevents.EventType == "Shot").all()
        shot_x = []
        shot_y = []

        shot_x_A = []
        shot_y_A = []
        for row in rows:
            if (random.uniform(0,10)<=3):
                shot_x_A.append(row.EventOrigin_x)
                shot_y_A.append(row.EventOrigin_y)
            else:
                shot_x.append(row.EventOrigin_x)
                shot_y.append(row.EventOrigin_y)

        str = 'The picture of the shot analyze position of Huskies '
        plt.title(str)
        plt.xlabel('x')
        plt.ylabel('y')
        plt.scatter(shot_x, shot_y, c="green", label="shot point", alpha=0.6, edgecolors='white')
        plt.scatter(shot_x_A, shot_y_A, c="red", label="Effective attack point", alpha=0.6, edgecolors='white')
        plt.legend()
        plt.grid(True)
        plt.show()


#
d = dataAnalyzer()
# d.get_TheAllPassMap(passtype="High pass",Period="2H")
# d.get_pass_F()
# d.get_homewin_rate()
# d.get_awaywin_rate()
d.get_shotAnalyze()
'''
rows = self.session.query(self.Fullevents.TeamID,self.Fullevents.EventType,self.Fullevents.EventSubType).all()
        flag = False
        for row in rows:
            if(flag):
                print(row.TeamID,row.EventType,row.EventSubType)
                flag = False
            if(row.EventType=="Shot"):
                print(row.TeamID, row.EventType, row.EventSubType)
                flag  = True
'''

在比賽中過程中主要問題集中在:

  1. 數據的存取
  2. 對於數據量的分析,根據模型需要的提取
  3. 製圖
  4. 模型擬合

數據存取

因爲給的是csv文件,所以先用navicat存進了數據庫,以前一直是用的是django裏面成型的orm,這次直接處理,選擇的是SQLAlchemy,使用的時候手生的不行,還遇到挺多問題

數據庫連接:

engine = create_engine("mysql+mysqlconnector://root:XXX@localhost:3306/passdata", max_overflow=5)
        # root mysql用戶名
        # XXX密碼
        # localhost:3306連接地址
        # passdata庫名

對已有的表關係進行映射:

# 反射數據庫單表
Admin = Table('admin', metadata, autoload=True, autoload_with=engine)
session = Session(engine)

'''反射數據庫所有的表
Base = automap_base()
Base.prepare(engine, reflect=True)
Admin = Base.classes.admin
'''

這種方法,反射單表可以操作,但是之後進行數據處理,filter無法識別表名,下面的方法我嘗試無效
2.

Base = declarative_base()
 class Matches(Base):
        __tablename__ = 'matches'  # 表名
        MatchID = Column(String, primary_key=True)
        OpponentID = Column(String(20))
        Outcome = Column(String(10))
        OwnScore = Column(Integer)
        OpponentScore = Column(Integer)
        Side = Column(String(10))
        CoachID = Column(String(10))
Base.metadata.create_all(engine)

對已有的表關係,以Base爲基類,根據數據類型進行構造,然後執行create_all即可

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章