1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
import numpy as np
import matplotlib.pyplot as plt
## 加载iris数据集
from sklearn.datasets import load_iris
iris = load_iris()
X,y = iris.data[:,:2], iris.target #只取前2个特征
## 分割测试集与训练集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, stratify = y, random_state= 42)
## 适配决策树,并计算准确率
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)
accuracy_score(y_test, y_pred)
## 画出决策树示意图
from sklearn.tree import export_graphviz
from io import StringIO
from IPython.display import Image
import pydot
def create_png(clf):
dot_iris = StringIO()
export_graphviz(clf.out_file = dot_iris, feature_name = iris.feature_names[:2], filled = True)
graphs = pydot.graph_from_dot_data(dot_iris.getvalue())
return graphs[0].create_png()
Image(create_png(dtc))
## 用GridSearchCV搜索最佳参数
from sklearn.model_selection import GridSearchCV
dtc = DecisionTreeClassifier()
grid = {'criterion': ['gini','entropy'],
'max_depth': [3,5,7,9,20]
}
gs = GridSearchCV(dtc, param_grid=grid, cv=5)
gs.fit(X_train, y_train)
## 查看网格搜索得到的模型的准确率
accuracy_score(y_test, gs.predict(X_test))
## 查看搜索结果
gs.cv_results_
gs.best_estimator_
## 查看网格搜索得到的决策树示意图
Image(create_png(gs.best_estimator_))
## 探索最大深度对决策树性能的影响
grid=['max_depth':range(3,50)]
gs2 = GridSearchCV(dtc, param_grid=grid, cv=5)
gs2.fit(X_train, y_train)
gs2.cv_results_['mean_test_score']
plt.plot(range(3,50), gs2.cv_results_['mean_test_score'])
|