今天用sklearn模塊中的決策樹部分簡單的嘗試了一下泰坦尼克生還問題
import pandas as pd
from sklearn.tree import DecisionTreeClassifier as DTC
data=pd.read_csv('train.csv')
data=data.drop('PassengerId',1)
data.loc[data['Sex']=='male','Sex']=1
data.loc[data['Sex']=='female','Sex']=0
x=data.loc[:,['Pclass','Sex','Parch','Fare','SibSp']]
y=data.loc[:,'Survived']
#dtc=DTC(criterion='entropy')
dtc=DTC(criterion='gini')
dtc.fit(x,y)
print(dtc.score(x,y))
test=pd.read_csv('test.csv')
test.loc[test['Sex']=='male','Sex']=1
test.loc[test['Sex']=='female','Sex']=0
testpart=test.loc[1:100,['Pclass','Sex','Parch','Fare','SibSp']]
#print(testpart)
print(dtc.predict(testpart))