KNN算法基本实例
KNN算法是機器學習領域中一個最基本的經典算法。它屬于無監督學習領域的算法并且在模式識別,數據挖掘和特征提取領域有著廣泛的應用。
給定一些預處理數據,通過一個屬性把這些分類坐標分成不同的組。這就是KNN的思路。
下面,舉個例子來說明一下。圖中的數據點包含兩個特征:
現在,給出數據點的另外一個節點,通過分析訓練節點來把這些節點分類。沒有分來的及誒但我們標記為白色,如下所示:
直觀來講,如果我們把那些節點花道一個圖片上,我們可能就能確定一些特征,或組。現在,給一個沒有分類的點,我們可以通過觀察它距離那個組位置最近來確定它屬于哪個組。意思就是,假如一個點距離紅色的組最近,我們就可以把這個點歸為紅色的組。簡而言之,我們可以把第一個點(2.5,7)歸類為綠色,把第二個點(5.5,4.5)歸類為紅色。
算法流程:
假設m是訓練樣本的數量,p是一個未知的節點。
1 把所有訓練的樣本放到也數組arr[]中。這個意思就是這個數組中每個元素就可以使用元組(x,y)表示。
2 偽碼
for i=0 to m: Calculate Euclidean distance d(arr[i], p).
3 標記設置S為K的最小距離。這里每個距離都和一個已經分類的數據點相關。
4 返回在S之間的大多數標簽。
實際程序C代碼:
// C++ program to find groups of unknown
// Points using K nearest neighbour algorithm.
#include <bits/stdc++.h>
using namespace std;
struct Point
{
int val; // Group of point
double x, y; // Co-ordinate of point
double distance; // Distance from test point
};
// Used to sort an array of points by increasing
// order of distance
bool comparison(Point a, Point b)
{
return (a.distance < b.distance);
}
// This function finds classification of point p using
// k nearest neighbour algorithm. It assumes only two
// groups and returns 0 if p belongs to group 0, else
// 1 (belongs to group 1).
int classifyAPoint(Point arr[], int n, int k, Point p)
{
// Fill distances of all points from p
for (int i = 0; i < n; i++)
arr[i].distance =
sqrt((arr[i].x - p.x) * (arr[i].x - p.x) +
(arr[i].y - p.y) * (arr[i].y - p.y));
// Sort the Points by distance from p
sort(arr, arr+n, comparison);
// Now consider the first k elements and only
// two groups
int freq1 = 0; // Frequency of group 0
int freq2 = 0; // Frequency of group 1
for (int i = 0; i < k; i++)
{
if (arr[i].val == 0)
freq1++;
else if (arr[i].val == 1)
freq2++;
}
return (freq1 > freq2 ? 0 : 1);
}
// Driver code
int main()
{
int n = 17; // Number of data points
Point arr[n];
arr[0].x = 1;
arr[0].y = 12;
arr[0].val = 0;
arr[1].x = 2;
arr[1].y = 5;
arr[1].val = 0;
arr[2].x = 5;
arr[2].y = 3;
arr[2].val = 1;
arr[3].x = 3;
arr[3].y = 2;
arr[3].val = 1;
arr[4].x = 3;
arr[4].y = 6;
arr[4].val = 0;
arr[5].x = 1.5;
arr[5].y = 9;
arr[5].val = 1;
arr[6].x = 7;
arr[6].y = 2;
arr[6].val = 1;
arr[7].x = 6;
arr[7].y = 1;
arr[7].val = 1;
arr[8].x = 3.8;
arr[8].y = 3;
arr[8].val = 1;
arr[9].x = 3;
arr[9].y = 10;
arr[9].val = 0;
arr[10].x = 5.6;
arr[10].y = 4;
arr[10].val = 1;
arr[11].x = 4;
arr[11].y = 2;
arr[11].val = 1;
arr[12].x = 3.5;
arr[12].y = 8;
arr[12].val = 0;
arr[13].x = 2;
arr[13].y = 11;
arr[13].val = 0;
arr[14].x = 2;
arr[14].y = 5;
arr[14].val = 1;
arr[15].x = 2;
arr[15].y = 9;
arr[15].val = 0;
arr[16].x = 1;
arr[16].y = 7;
arr[16].val = 0;
/*Testing Point*/
Point p;
p.x = 2.5;
p.y = 7;
// Parameter to decide groupr of the testing point
int k = 3;
printf ("The value classified to unknown point"
" is %d.
", classifyAPoint(arr, n, k, p));
return 0;
}
View Code
實際程序python代碼:
1 # Python3 program to find groups of unknown
2 # Points using K nearest neighbour algorithm.
3
4 import math
5
6 def classifyAPoint(points,p,k=3):
7 '''
8 This function finds classification of p using
9 k nearest neighbour algorithm. It assumes only two
10 groups and returns 0 if p belongs to group 0, else
11 1 (belongs to group 1).
12
13 Parameters -
14 points : Dictionary of training points having two keys - 0 and 1
15 Each key have a list of training data points belong to that
16
17 p : A touple ,test data point of form (x,y)
18
19 k : number of nearest neighbour to consider, default is 3
20 '''
21
22 distance=[]
23 for group in points:
24 for feature in points[group]:
25
26 #calculate the euclidean distance of p from training points
27 euclidean_distance = math.sqrt((feature[0]-p[0])**2 +(feature[1]-p[1])**2)
28
29 # Add a touple of form (distance,group) in the distance list
30 distance.append((euclidean_distance,group))
31
32 # sort the distance list in ascending order
33 # and select first k distances
34 distance = sorted(distance)[:k]
35
36 freq1 = 0 #frequency of group 0
37 freq2 = 0 #frequency og group 1
38
39 for d in distance:
40 if d[1] == 0:
41 freq1 += 1
42 elif d[1] == 1:
43 freq2 += 1
44
45 return 0 if freq1>freq2 else 1
46
47 # driver function
48 def main():
49
50 # Dictionary of training points having two keys - 0 and 1
51 # key 0 have points belong to class 0
52 # key 1 have points belong to class 1
53
54 points = {0:[(1,12),(2,5),(3,6),(3,10),(3.5,8),(2,11),(2,9),(1,7)],
55 1:[(5,3),(3,2),(1.5,9),(7,2),(6,1),(3.8,1),(5.6,4),(4,2),(2,5)]}
56
57 # testing point p(x,y)
58 p = (2.5,7)
59
60 # Number of neighbours
61 k = 3
62
63 print("The value classified to unknown point is: {}".
64 format(classifyAPoint(points,p,k)))
65
66 if __name__ == '__main__':
67 main()
68
69 # This code is contributed by Atul Kumar (www.fb.com/atul.kr.007)
View Code
總結
- 上一篇: 图像传感器的光电参数和选择标准
- 下一篇: python写课堂派的登录,考勤,以及测