當前位置：首頁 > 编程资源 > 综合教程 >内容正文

综合教程

KNN算法基本实例

發布時間：2023/12/31 综合教程 29 生活家

生活随笔收集整理的這篇文章主要介紹了 KNN算法基本实例小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

　　KNN算法是機器學習領域中一個最基本的經典算法。它屬于無監督學習領域的算法并且在模式識別，數據挖掘和特征提取領域有著廣泛的應用。

給定一些預處理數據，通過一個屬性把這些分類坐標分成不同的組。這就是KNN的思路。

　　下面，舉個例子來說明一下。圖中的數據點包含兩個特征：

　　現在，給出數據點的另外一個節點，通過分析訓練節點來把這些節點分類。沒有分來的及誒但我們標記為白色，如下所示：

　　直觀來講，如果我們把那些節點花道一個圖片上，我們可能就能確定一些特征，或組。現在，給一個沒有分類的點，我們可以通過觀察它距離那個組位置最近來確定它屬于哪個組。意思就是，假如一個點距離紅色的組最近，我們就可以把這個點歸為紅色的組。簡而言之，我們可以把第一個點（2.5,7）歸類為綠色，把第二個點（5.5,4.5）歸類為紅色。

　　算法流程：

　　假設m是訓練樣本的數量，p是一個未知的節點。

　　1 把所有訓練的樣本放到也數組arr[]中。這個意思就是這個數組中每個元素就可以使用元組（x,y）表示。

　　2 偽碼

for i=0 to m:
  Calculate Euclidean distance d(arr[i], p).

　　3 標記設置S為K的最小距離。這里每個距離都和一個已經分類的數據點相關。

　　4 返回在S之間的大多數標簽。

　　實際程序C代碼：

// C++ program to find groups of unknown
// Points using K nearest neighbour algorithm.
#include <bits/stdc++.h>
using namespace std;
 
struct Point
{
    int val;     // Group of point
    double x, y;     // Co-ordinate of point
    double distance; // Distance from test point
};
 
// Used to sort an array of points by increasing
// order of distance
bool comparison(Point a, Point b)
{
    return (a.distance < b.distance);
}
 
// This function finds classification of point p using
// k nearest neighbour algorithm. It assumes only two
// groups and returns 0 if p belongs to group 0, else
// 1 (belongs to group 1).
int classifyAPoint(Point arr[], int n, int k, Point p)
{
    // Fill distances of all points from p
    for (int i = 0; i < n; i++)
        arr[i].distance =
            sqrt((arr[i].x - p.x) * (arr[i].x - p.x) +
                 (arr[i].y - p.y) * (arr[i].y - p.y));
 
    // Sort the Points by distance from p
    sort(arr, arr+n, comparison);
 
    // Now consider the first k elements and only
    // two groups
    int freq1 = 0;     // Frequency of group 0
    int freq2 = 0;     // Frequency of group 1
    for (int i = 0; i < k; i++)
    {
        if (arr[i].val == 0)
            freq1++;
        else if (arr[i].val == 1)
            freq2++;
    }
 
    return (freq1 > freq2 ? 0 : 1);
}
 
// Driver code
int main()
{
    int n = 17; // Number of data points
    Point arr[n];
 
    arr[0].x = 1;
    arr[0].y = 12;
    arr[0].val = 0;
 
    arr[1].x = 2;
    arr[1].y = 5;
    arr[1].val = 0;
 
    arr[2].x = 5;
    arr[2].y = 3;
    arr[2].val = 1;
 
    arr[3].x = 3;
    arr[3].y = 2;
    arr[3].val = 1;
 
    arr[4].x = 3;
    arr[4].y = 6;
    arr[4].val = 0;
 
    arr[5].x = 1.5;
    arr[5].y = 9;
    arr[5].val = 1;
 
    arr[6].x = 7;
    arr[6].y = 2;
    arr[6].val = 1;
 
    arr[7].x = 6;
    arr[7].y = 1;
    arr[7].val = 1;
 
    arr[8].x = 3.8;
    arr[8].y = 3;
    arr[8].val = 1;
 
    arr[9].x = 3;
    arr[9].y = 10;
    arr[9].val = 0;
 
    arr[10].x = 5.6;
    arr[10].y = 4;
    arr[10].val = 1;
 
    arr[11].x = 4;
    arr[11].y = 2;
    arr[11].val = 1;
 
    arr[12].x = 3.5;
    arr[12].y = 8;
    arr[12].val = 0;
 
    arr[13].x = 2;
    arr[13].y = 11;
    arr[13].val = 0;
 
    arr[14].x = 2;
    arr[14].y = 5;
    arr[14].val = 1;
 
    arr[15].x = 2;
    arr[15].y = 9;
    arr[15].val = 0;
 
    arr[16].x = 1;
    arr[16].y = 7;
    arr[16].val = 0;
 
    /*Testing Point*/
    Point p;
    p.x = 2.5;
    p.y = 7;
 
    // Parameter to decide groupr of the testing point
    int k = 3;
    printf ("The value classified to unknown point"
            " is %d.
", classifyAPoint(arr, n, k, p));
    return 0;
}

View Code

　　實際程序python代碼：

 1 # Python3 program to find groups of unknown
 2 # Points using K nearest neighbour algorithm.
 3  
 4 import math
 5  
 6 def classifyAPoint(points,p,k=3):
 7     '''
 8      This function finds classification of p using
 9      k nearest neighbour algorithm. It assumes only two
10      groups and returns 0 if p belongs to group 0, else
11       1 (belongs to group 1).
12  
13       Parameters - 
14           points : Dictionary of training points having two keys - 0 and 1
15                    Each key have a list of training data points belong to that 
16  
17           p : A touple ,test data point of form (x,y)
18  
19           k : number of nearest neighbour to consider, default is 3 
20     '''
21  
22     distance=[]
23     for group in points:
24         for feature in points[group]:
25  
26             #calculate the euclidean distance of p from training points 
27             euclidean_distance = math.sqrt((feature[0]-p[0])**2 +(feature[1]-p[1])**2)
28  
29             # Add a touple of form (distance,group) in the distance list
30             distance.append((euclidean_distance,group))
31  
32     # sort the distance list in ascending order
33     # and select first k distances
34     distance = sorted(distance)[:k]
35  
36     freq1 = 0 #frequency of group 0
37     freq2 = 0 #frequency og group 1
38  
39     for d in distance:
40         if d[1] == 0:
41             freq1 += 1
42         elif d[1] == 1:
43             freq2 += 1
44  
45     return 0 if freq1>freq2 else 1
46  
47 # driver function
48 def main():
49  
50     # Dictionary of training points having two keys - 0 and 1
51     # key 0 have points belong to class 0
52     # key 1 have points belong to class 1
53  
54     points = {0:[(1,12),(2,5),(3,6),(3,10),(3.5,8),(2,11),(2,9),(1,7)],
55               1:[(5,3),(3,2),(1.5,9),(7,2),(6,1),(3.8,1),(5.6,4),(4,2),(2,5)]}
56  
57     # testing point p(x,y)
58     p = (2.5,7)
59  
60     # Number of neighbours 
61     k = 3
62  
63     print("The value classified to unknown point is: {}".
64           format(classifyAPoint(points,p,k)))
65  
66 if __name__ == '__main__':
67     main()
68      
69 # This code is contributed by Atul Kumar (www.fb.com/atul.kr.007)

View Code

總結

以上是生活随笔為你收集整理的KNN算法基本实例的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：图像传感器的光电参数和选择标准
下一篇： python写课堂派的登录，考勤，以及测