梳理各算法基础应用及场景
knn:
?? ?import numpy as np
?? ?from sklearn.neighbors import KNeighborsClassifier
?? ?X = np.array([[1, 1], [1, 1.5], [2, 2], [4, 3], [4, 4]])
?? ?y = np.array(['A', 'A', 'A', 'B', 'B'])
?? ?knn = KNeighborsClassifier(n_neighbors=3)
?? ?# 訓練模型
?? ?knn.fit(X, y)
?? ?# 預測
?? ?pred = knn.predict([[3, 2]])
?? ?pred_proba = knn.predict_proba([[3, 2]])
LinearRegression:
?? ?import numpy as np
?? ?import matplotlib.pyplot as plt
?? ?from sklearn.linear_model import LinearRegression
?? ?#模擬數據
?? ?x = np.linspace(0, 10, 50)
?? ?noise = np.random.uniform(-2,2,size=50)
?? ?y = 5 * x + 6 + noise
?? ?#創建模型
?? ?liner = LinearRegression()
?? ?#擬合模型
?? ?liner.fit(np.reshape(x,(-1,1)),np.reshape(y,(-1,1)))
?? ?#預測
?? ?y_pred = liner.predict(np.reshape(x,(-1,1)))
?? ?
logistics regression:
?? ?# 定義多項式回歸,degree的值可以調節多項式的特征
?? ?poly_reg ?= PolynomialFeatures(degree=5)
?? ?# 特征處理
?? ?x_poly = poly_reg.fit_transform(x_data)
?? ?# 定義邏輯回歸模型
?? ?logistic = linear_model.LogisticRegression()
?? ?# 訓練模型
?? ?logistic.fit(x_poly, y_data)
?? ?# 獲取數據值所在的范圍
?? ?x_min, x_max = x_data[:, 0].min() - 1, x_data[:, 0].max() + 1
?? ?y_min, y_max = x_data[:, 1].min() - 1, x_data[:, 1].max() + 1
?? ?# 生成網格矩陣
?? ?xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
?? ??? ??? ??? ??? ??? ? np.arange(y_min, y_max, 0.02))
?? ?z = logistic.predict(poly_reg.fit_transform(np.c_[xx.ravel(), yy.ravel()]))# ravel與flatten類似,多維數據轉一維。flatten不會改變原始數據,ravel會改變原始數據
?? ?z = z.reshape(xx.shape)
?? ?# 等高線圖
?? ?cs = plt.contourf(xx, yy, z)
?? ?# 樣本散點圖
?? ?plt.scatter(x_data[:, 0], x_data[:, 1], c=y_data)
?? ?plt.show()
?? ?print('score:',logistic.score(x_poly,y_data))#得分很高
?? ?
Decision Tree:
?? ?import numpy as np
?? ?from sklearn.tree import DecisionTreeRegressor
?? ?dt_reg = DecisionTreeRegressor(criterion='mse', max_depth=3)
?? ?dt_reg.fit(x, y)
?? ?x_test = np.linspace(-3, 3, 50).reshape(-1, 1)
?? ?y_hat = dt_reg.predict(x_test)
RandomForestClassifier:
?? ?from sklearn.metrics import accuracy_score
?? ?from sklearn.datasets import load_iris
?? ?iris = load_iris()
?? ?X = iris.data[:, :2] ?# 花萼長度和寬度
?? ?y = iris.target
?? ?X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
?? ?rnd_clf = RandomForestClassifier(n_estimators=15, max_leaf_nodes=16, n_jobs=1, oob_score=True)
?? ?rnd_clf.fit(X_train, y_train)
?? ?print(rnd_clf.oob_score_)
?? ?
kmeans:
?? ?k_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
?? ?begin_time = time.time() ?# 記錄訓練開始時間
?? ?k_means.fit(X) # 聚類模型
dbscan:
?? ?import pandas as pd
?? ?import matplotlib.pyplot as plt
?? ?from sklearn.cluster import DBSCAN
?? ?data_path = 'data.csv'
?? ?# 讀取數據文件
?? ?data_frame = pd.read_csv(data_path, encoding='gbk')
?? ?# DBSCAN聚類
?? ?def dbscan_cluster(x_label, y_label):
?? ??? ?# 生成DBSCAN模型
?? ??? ?clu = DBSCAN(eps=4, min_samples=5)
?? ??? ?X_value = data_frame[[x_label, y_label]].values
?? ??? ?# 開始進行K-Means聚類
?? ??? ?clu.fit(X_value)
?? ??? ?# 輸出樣本所屬的簇
?? ??? ?print('樣本所屬簇編號:', clu.labels_)
?? ??? ?# 可視化聚類屬性(散點圖)
?? ??? ?# 參數設置
?? ??? ?plt.rcParams['font.sans-serif'] = ['SimHei'] ?# 用來正常顯示中文標簽
?? ??? ?plt.rcParams['axes.unicode_minus'] = False ?# 用來正常顯示負號
?? ??? ?# 以簇編號作為顏色區分依據
?? ??? ?plt.scatter(data_frame[x_label], data_frame[y_label], c=clu.labels_)
?? ??? ?plt.title('DBSCAN聚類結果')
?? ??? ?plt.xlabel(x_label)
?? ??? ?plt.ylabel(y_label)
?? ??? ?plt.show()
?? ?if __name__ == '__main__':
?? ??? ?dbscan_cluster('當月MOU', '當月DOU')
數據標準化:
?? ?from sklearn.preprocessing import StandardScaler
?? ?#標準歸一化
?? ?data = [[0, 0], [0, 0], [1, 1], [1, 1]]
?? ?scaler = StandardScaler()
?? ?print(scaler.fit(data))
?? ?print(scaler.mean_)
?? ?print(scaler.transform(data))
?? ?
?? ?
?? ?from sklearn.preprocessing import MinMaxScaler
?? ?#最大最小值歸一化
?? ?data = [[-1, 2], [-0.5, 6], [1, 10], [0, 18]]
?? ?scaler = MinMaxScaler()
?? ?print(scaler.fit_transform(data))
?? ?print(scaler.fit(data))
?? ?print(scaler.data_max_)
梯度下降SGDRegressor、BGDRegressor、MBGDRegressor:
?? ?from sklearn.linear_model import SGDRegressor
?? ?sgd1=SGDRegressor()
?? ?sgd1.fit(x_train_standard,y_train1)
?? ?print(sgd1.coef_)
?? ?print(sgd1.intercept_)
?? ?print(sgd1.score(x_test_standard,y_test1))
?? ?
總結
以上是生活随笔為你收集整理的梳理各算法基础应用及场景的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: mysql分区-索引
- 下一篇: kibana安装步骤