python数据分类_Python数据挖掘实践—KNN分类
# -*- coding: UTF-8 -*-
import math
import csv
import random
import operator
'''@author:hunter@time:2017.03.31'''
class KNearestNeighbor(object):
def __init__(self):
pass
def loadDataset(self,filename, split, trainingSet, testSet): # 加載數據集 split以某個值為界限分類train和test
with open(filename, 'r') as csvfile:
lines = csv.reader(csvfile) #讀取所有的行
dataset = list(lines) #轉化成列表
for x in range(len(dataset)-1):
for y in range(4):
dataset[x][y] = float(dataset[x][y])
if random.random() < split: # 將所有數據加載到train和test中
trainingSet.append(dataset[x])
else:
testSet.append(dataset[x])
def calculateDistance(self,testdata, traindata, length): # 計算距離
distance = 0 # length表示維度 數據共有幾維
for x in range(length):
distance += pow((testdata[x]-traindata[x]), 2)
return math.sqrt(distance)
def getNeighbors(self,trainingSet, testInstance, k): # 返回最近的k個邊距
distances = []
length = len(testInstance)-1
for x in range(len(trainingSet)): #對訓練集的每一個數計算其到測試集的實際距離
dist = self.calculateDistance(testInstance, trainingSet[x], length)
print('訓練集:{}-距離:{}'.format(trainingSet[x], dist))
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1)) # 把距離從小到大排列
neighbors = []
for x in range(k): #排序完成后取前k個距離
neighbors.append(distances[x][0])
print(neighbors)
return neighbors
def getResponse(self,neighbors): # 根據少數服從多數,決定歸類到哪一類
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1] # 統(tǒng)計每一個分類的多少
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
print(classVotes.items())
sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True) #reverse按降序的方式排列
return sortedVotes[0][0]
def getAccuracy(self,testSet, predictions): # 準確率計算
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]: #predictions是預測的和testset實際的比對
correct += 1
print('共有{}個預測正確,共有{}個測試數據'.format(correct,len(testSet)))
return (correct/float(len(testSet)))*100.0
def Run(self):
trainingSet = []
testSet = []
split = 0.75
self.loadDataset(r'testdata.txt', split, trainingSet, testSet) #數據劃分
print('Train set: ' + str(len(trainingSet)))
print('Test set: ' + str(len(testSet)))
#generate predictions
predictions = []
k = 3 # 取最近的3個數據
# correct = []
for x in range(len(testSet)): # 對所有的測試集進行測試
neighbors = self.getNeighbors(trainingSet, testSet[x], k) #找到3個最近的鄰居
result = self.getResponse(neighbors) # 找這3個鄰居歸類到哪一類
predictions.append(result)
# print(correct)
accuracy = self.getAccuracy(testSet,predictions)
print('Accuracy: ' + repr(accuracy) + '%')
if __name__ == '__main__':
a = KNearestNeighbor()
a.Run()
總結
以上是生活随笔為你收集整理的python数据分类_Python数据挖掘实践—KNN分类的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 苹果x的型号有哪些
- 下一篇: python2异步编程_python异步