當前位置：首頁 > 编程语言 > python >内容正文

python

python mse函数_Python 线性回归处理糖尿病数据计算MSE等

發布時間：2024/3/12 python 32 豆豆

生活随笔收集整理的這篇文章主要介紹了 python mse函数_Python 线性回归处理糖尿病数据计算MSE等小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

import pandas as pd

import seaborn as sns

from math import sqrt

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split

# 通過read_csv來讀取我們的目的數據集

adv_data = pd.read_csv('C:/Users/24224/Documents/課程內容集合/srf/diabetes.csv',engine='python')

# 得到我們所需要的數據集且查看其前幾列以及數據形狀

print('head:', adv_data.head(), '\nShape:', adv_data.shape)

print("1CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

# 數據描述

print(adv_data.describe())

print("2CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

# 缺失值檢驗

print(adv_data[adv_data.isnull() == True].count())

print("3CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

adv_data.boxplot()

plt.savefig("boxplot.jpg")

plt.show()

'''

##相關系數矩陣 r(相關系數) = x和y的協方差/(x的標準差*y的標準差) == cov(x,y)/σx*σy

# 相關系數0~0.3弱相關0.3~0.6中等程度相關0.6~1強相關

print(adv_data.corr())

print("4CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

# 建立散點圖來查看數據集里的數據分布

# seaborn的pairplot函數繪制X的每一維度和對應Y的散點圖。通過設置size和aspect參數來調節顯示的大小和比例。

sns.pairplot(adv_data, x_vars=['Pregnancies', 'Glucose', 'BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age'], y_vars='Outcome', height=7, aspect=0.8, kind='reg')

plt.savefig("pairplot.jpg")

plt.show()

'''

# 利用sklearn里面的包來對數據集進行劃分，以此來創建訓練集和測試集

# train_size表示訓練集所占總數據集的比例

X_train, X_test, Y_train, Y_test = train_test_split(adv_data.loc[:, 'Pregnancies':'Age'], adv_data.Outcome, test_size=.20,train_size=.80)

print("原始數據特征:", adv_data.loc[:, 'Pregnancies':'Age'].shape,

",訓練數據特征:", X_train.shape,

",測試數據特征:", X_test.shape)

print("5CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

print("原始數據標簽:", adv_data.Outcome.shape,

",訓練數據標簽:", Y_train.shape,

",測試數據標簽:", Y_test.shape)

print("6CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

model = LinearRegression()

model.fit(X_train, Y_train)

a = model.intercept_ # 截距

b = model.coef_ # 回歸系數

print("最佳擬合線:截距", a, ",回歸系數：", b)

print("7CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

score = model.score(X_test, Y_test)

print(score)

print("8CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC")

'''

target = a+b[0]*adv_data.iloc[:, 0]+b[1]*adv_data.iloc[:, 1]+b[2]*adv_data.iloc[:, 2]+b[3]*adv_data.iloc[:, 3]+b[4]*adv_data.iloc[:, 4]\

+b[5]*adv_data.iloc[:, 5]+b[6]*adv_data.iloc[:, 6]+b[7]*adv_data.iloc[:, 7]

prediction = adv_data.iloc[:, -1]

error = []

for i in range(len(target)):

error.append(target[i] - prediction[i])

print("Errors: ", error)

squaredError = []

absError = []

for val in error:

squaredError.append(val * val) #target-prediction之差平方

absError.append(abs(val))#誤差絕對值

print("Square Error: ", squaredError)

print("Absolute Value of Error: ", absError)

print("MSE = ", sum(squaredError) / len(squaredError)) #均方誤差MSE

print("RMSE = ", sqrt(sum(squaredError) / len(squaredError))) #均方根誤差RMSE

print("MAE = ", sum(absError) / len(absError))#平均絕對誤差MAE

總結

以上是生活随笔為你收集整理的python mse函数_Python 线性回归处理糖尿病数据计算MSE等的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： python工具集
下一篇： websocket python爬虫_p

python

python mse函数_Python 线性回归处理糖尿病数据 计算MSE等

總結

python mse函数_Python 线性回归处理糖尿病数据计算MSE等