生活随笔
收集整理的這篇文章主要介紹了
机器学习实战-集成学习-23
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
集成學習-泰坦尼克號船員獲救預測
import pandas titanic
= pandas
.read_csv
("titanic_train.csv")
titanic
titanic
["Age"] = titanic
["Age"].fillna
(titanic
["Age"].median
())
print(titanic
.describe
())
print(titanic
["Sex"].unique
())
titanic
.loc
[titanic
["Sex"] == "male", "Sex"] = 0
titanic
.loc
[titanic
["Sex"] == "female", "Sex"] = 1
print(titanic
["Embarked"].unique
())
titanic
["Embarked"] = titanic
["Embarked"].fillna
('S')
titanic
.loc
[titanic
["Embarked"] == "S", "Embarked"] = 0
titanic
.loc
[titanic
["Embarked"] == "C", "Embarked"] = 1
titanic
.loc
[titanic
["Embarked"] == "Q", "Embarked"] = 2
from sklearn
.preprocessing
import StandardScaler
predictors
= ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
x_data
= titanic
[predictors
]
y_data
= titanic
["Survived"]
scaler
= StandardScaler
()
x_data
= scaler
.fit_transform
(x_data
)
邏輯回歸
from sklearn
import model_selection
from sklearn
.linear_model
import LogisticRegression
LR
= LogisticRegression
()
scores
= model_selection
.cross_val_score
(LR
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
神經網絡
from sklearn
.neural_network
import MLPClassifier
mlp
= MLPClassifier
(hidden_layer_sizes
=(20,10),max_iter
=1000)
scores
= model_selection
.cross_val_score
(mlp
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
KNN
from sklearn
import neighborsknn
= neighbors
.KNeighborsClassifier
(21)
scores
= model_selection
.cross_val_score
(knn
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
決策樹
from sklearn
import tree
dtree
= tree
.DecisionTreeClassifier
(max_depth
=5, min_samples_split
=4)
scores
= model_selection
.cross_val_score
(dtree
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
隨機森林
from sklearn
.ensemble
import RandomForestClassifierRF1
= RandomForestClassifier
(random_state
=1, n_estimators
=10, min_samples_split
=2)
scores
= model_selection
.cross_val_score
(RF1
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
RF2
= RandomForestClassifier
(n_estimators
=100, min_samples_split
=4)
scores
= model_selection
.cross_val_score
(RF2
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
Bagging
from sklearn
.ensemble
import BaggingClassifierbagging_clf
= BaggingClassifier
(RF2
, n_estimators
=20)
scores
= model_selection
.cross_val_score
(bagging_clf
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
Adaboost
from sklearn
.ensemble
import AdaBoostClassifier
adaboost
= AdaBoostClassifier
(bagging_clf
,n_estimators
=10)
scores
= model_selection
.cross_val_score
(adaboost
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
Stacking
from sklearn
.ensemble
import VotingClassifier
from mlxtend
.classifier
import StackingClassifier sclf
= StackingClassifier
(classifiers
=[bagging_clf
, mlp
, LR
], meta_classifier
=LogisticRegression
())sclf2
= VotingClassifier
([('adaboost',adaboost
), ('mlp',mlp
), ('LR',LR
),('knn',knn
),('dtree',dtree
)])
scores
= model_selection
.cross_val_score
(sclf2
, x_data
, y_data
, cv
=3)
print(scores
.mean
())
集成學習-乳腺癌預測
import numpy
as np
import pandas
as pd
import seaborn
as sns
import matplotlib
.pyplot
as plt
import warnings
warnings
.filterwarnings
("ignore")
df
= pd
.read_csv
("data.csv")
df
.head
()
df
= df
.drop
('id', axis
=1)
df
.diagnosis
.unique
()
df
['diagnosis'] = df
['diagnosis'].map({'M':1,'B':0})
df
.head
()
df
.describe
()
plt
.figure
(figsize
=(20,20))
p
=sns
.heatmap
(df
.corr
(), annot
=True ,square
=True)
plt
.show
()
print(df
.diagnosis
.value_counts
())
p
=df
.diagnosis
.value_counts
().plot
(kind
="bar")
plt
.show
()
x_data
= df
.drop
(['diagnosis'], axis
=1)
y_data
= df
['diagnosis']
from sklearn
.model_selection
import train_test_split
x_train
,x_test
,y_train
,y_test
= train_test_split
(x_data
, y_data
, test_size
=0.3, stratify
=y_data
)
from sklearn
.metrics
import accuracy_score
from sklearn
.neural_network
import MLPClassifier
from sklearn
.neighbors
import KNeighborsClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.ensemble
import RandomForestClassifier
, AdaBoostClassifier
, BaggingClassifierclassifiers
= [KNeighborsClassifier
(3),LogisticRegression
(),MLPClassifier
(hidden_layer_sizes
=(20,50),max_iter
=10000),DecisionTreeClassifier
(),RandomForestClassifier
(max_depth
=9,min_samples_split
=3),AdaBoostClassifier
(),BaggingClassifier
(),]log
= []
for clf
in classifiers
:clf
.fit
(x_train
, y_train
)name
= clf
.__class__
.__name__
print("="*30)print(name
)print('****Results****')test_predictions
= clf
.predict
(x_test
)acc
= accuracy_score
(y_test
, test_predictions
)print("Accuracy: {:.4%}".format(acc
))log
.append
([name
, acc
*100])print("="*30)
log
= pd
.DataFrame
(log
)
log
log
.rename
(columns
={0: 'Classifier', 1:'Accuracy'}, inplace
=True)
sns
.barplot
(x
='Accuracy', y
='Classifier', data
=log
, color
="b")plt
.xlabel
('Accuracy %')
plt
.title
('Classifier Accuracy')
plt
.show
()
總結
以上是生活随笔為你收集整理的机器学习实战-集成学习-23的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。