當前位置：首頁 > 编程语言 > python >内容正文

python

Python—实训day10—Matplotlib数据可视化和scikit-learn构建模型

發布時間：2023/12/18 python 23 豆豆

生活随笔收集整理的這篇文章主要介紹了 Python—实训day10—Matplotlib数据可视化和scikit-learn构建模型小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1.Matplotlib數據可視化基礎

import matplotlib.pyplot as plt

import numpy as np

plt.rcParams['font.sans-serif'] = 'SimHei'

#------------1散點圖scatter

x = np.arange(0, 1, 0.05)

y = x**2

y1 = x**4

#散點圖

plt.scatter(x, y)

plt.show() #顯示圖形

##----一張圖

plt.figure(figsize=(8, 6)) #設置畫布大小

plt.scatter(x, y)

plt.title('散點圖') #標題

plt.xlabel('x') #x軸標簽

plt.ylabel('y') #y軸標簽

plt.xlim(0.2, 0.6) #x軸刻度范圍

plt.ylim(0, 0.4) #y軸刻度范圍

plt.show() #顯示圖形

##-----多圖繪制

#第一步：設置畫布和子圖

fig = plt.figure(figsize=(12, 8)) #設置畫布大小

fig.add_subplot(1, 2, 1) #子圖1

#第二步：繪制圖形

plt.scatter(x, y)

plt.title('y=x^2')

fig.add_subplot(1, 2, 2) #子圖2

plt.scatter(x, y1)

plt.title('y=x^4')

#第三步：顯示圖形

plt.show() #顯示圖形

##----多個函數繪制在一張圖中

plt.scatter(x, y)

plt.scatter(x, y1)

plt.legend(['y=x^2', 'y=x^4']) #添加圖例

plt.show()

#------------2折線圖plot

#折線圖

plt.plot(x, y)

plt.show()

#豐富圖形

plt.plot(x, y, c='r', linestyle='--', marker='*') #c設置線條顏色。linestyle設置線條類型。marker設置點的類型

plt.title('折線圖') #標題

plt.xlabel('x') #x軸標簽

plt.ylabel('y') #y軸標簽

plt.show()

#繪制多圖：散點圖和折線圖

##第一步：設置畫布與子圖

fig = plt.figure(figsize=(4, 8))

fig.add_subplot(2, 1, 1) #子圖1

##第二步：繪制圖形

plt.scatter(x, y)

fig.add_subplot(2, 1, 2) #子圖2

plt.plot(x, y1)

##第三步：保存、顯示圖形(先保存后顯示)

plt.savefig(r'F:\Desktop\a.png') #保存圖形

plt.show()

#------------3直方圖bar

data = [100, 170, 160, 250, 150, 160, 120]

label = ['星期一', '星期二', '星期三', '星期四', '星期五', '星期六', '星期日']

plt.bar(range(len(data)), data)

plt.xticks(range(len(data)), label) #x軸刻度

plt.xlabel('星期') #x軸標簽

plt.ylabel('銷售額') #y軸標簽

plt.title('本周銷售額')

plt.show() #顯示圖形

#------------4餅圖pie

#餅圖

plt.pie(data)

plt.show()

#豐富圖形

plt.pie(data, labels=label, autopct='%.2f%%') #labels設置每一項名稱。autopct設置指定數值的顯示方式

plt.title('本周銷售額餅圖')

plt.show()

#------------5箱線圖boxplot

plt.boxplot(data)

plt.show()

#繪制多個箱子

data1 = [100, 170, 120, 200, 150, 160, 120]

plt.boxplot([data, data1])

plt.show()

2.使用scikit-learn構建模型

#==================1使用sklearn轉換器處理數據

#------------1.1加載datasets模塊中數據集

from sklearn.datasets import load_boston #sklearn庫安裝命令：pip install scikit-learn -i https://mirrors.aliyun.com/pypi/simple

boston = load_boston() #加載波士頓房價數據

boston['data'] #特征數據

boston['target'] #標簽數據

#------------1.2將數據集劃分為訓練集和測試集

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #按8:2的比例劃分為訓練集和測試集

print('訓練集的特征數據形狀為：', X_train.shape)

print('測試集的特征數據形狀為：', X_test.shape)

print('訓練集的標簽數據形狀為：', y_train.shape)

print('測試集的標簽數據形狀為：', y_test.shape)

#------------1.3使用sklearn轉換器進行數據預處理與降維

#----1.3.1預處理

#--離差標準化處理

from sklearn.preprocessing import MinMaxScaler

minmaxscaler = MinMaxScaler().fit(X_train) #提取特征，生成規則

X_train_scaler = minmaxscaler.transform(X_train) #轉換特征，應用規則

X_test_scaler = minmaxscaler.transform(X_test) #轉換特征，應用規則

#利用訓練集生成規則，之后便可這個規則同時應用于訓練集和測試集

#--標準差標準化處理

from sklearn.preprocessing import StandardScaler

standardscaler = StandardScaler().fit(X_train) #提取特征，生成規則

X_train_standard = standardscaler.transform(X_train) #轉換特征，應用規則

X_test_standard = standardscaler.transform(X_test)

#----1.3.2降維

from sklearn.decomposition import PCA

pca = PCA(n_components=8).fit(X_train_standard) #生成規則

X_train_standard_8 = pca.transform(X_train_standard) #應用規則

X_test_standard_8 = pca.transform(X_test_standard)

print('X_train_standard_8的形狀為：', X_train_standard_8.shape)

print('X_test_standard_8的形狀為：', X_test_standard_8.shape)

#==================2構建并評價聚類模型

from sklearn.datasets import load_iris

from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

from sklearn.metrics import calinski_harabasz_score

iris = load_iris() #加載鳶尾花數據

data = iris['data'] #特征數據

#構建KMeans模型并訓練

model = KMeans(n_clusters=3).fit(data)

model.labels_ #聚類標簽

model.cluster_centers_ #聚類中心

#聚類可視化

plt.scatter(data[model.labels_==0, 0], data[model.labels_==0, 1])

plt.scatter(data[model.labels_==1, 0], data[model.labels_==1, 1])

plt.scatter(data[model.labels_==2, 0], data[model.labels_==2, 1])

plt.legend([0, 1, 2])

plt.show()

#指數評價法

calinski_harabasz_score(data, model.labels_)

總結

以上是生活随笔為你收集整理的Python—实训day10—Matplotlib数据可视化和scikit-learn构建模型的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：学习方法书籍很好推荐
下一篇： Python—实训day11—Pyech