sklearn的KNN文档中实例代码的注释
生活随笔
收集整理的這篇文章主要介紹了
sklearn的KNN文档中实例代码的注释
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
鏈接是:
http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.radius_neighbors_graph
Example1.py
#-*- encoding:utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8') import numpy as np from sklearn.neighbors import NearestNeighbors samples=[[0,0,2],[1,0,0],[0,0,1]]#-------------------------------------------------- neigh=NearestNeighbors(2,0.4) neigh.fit(samples)#無監(jiān)督訓(xùn)練,Samples作為候選列表 print neigh #輸出結(jié)果為: #NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=2, p=2, radius=0.4) print"--------------------------------------------------" neigh.kneighbors([[0,0,1.3]],2,return_distance=False) print"--------------------------------------------------" nbrs = neigh.radius_neighbors([[0, 0, 1.3]], 0.4, return_distance=False) print nbrs#這個(gè)意思是和(0,0,1.3)最近的點(diǎn)是samples列表中的第2個(gè)點(diǎn)(從0開始數(shù)起) print np.asarray(nbrs[0][0])#相比arrary而言,asarrary不會(huì)占用新的內(nèi)存 #參考鏈接:https://blog.csdn.net/gobsd/article/details/56485177Example2.py
#-*- encoding:utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8')samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(n_neighbors=1) neigh.fit(samples) #這個(gè)代碼是無監(jiān)督訓(xùn)練,意思是把上面三個(gè)點(diǎn)作為下面這個(gè)點(diǎn)(1,1,1)的備選項(xiàng)print(neigh.kneighbors([[1., 1., 1.]])) #(array([[ 0.5]]), array([[2]], dtype=int64)) #返回結(jié)果的含義是,最近的距離是0.5,離(1,1,1)最近的點(diǎn)是訓(xùn)練集中的第2個(gè)(從第0個(gè)開始數(shù)數(shù)) #也就是說離(1,1,1)最近的點(diǎn)是訓(xùn)練集中的(1,1,0.5) print"---------------------------------------------------------" X = [[0., 1., 0.], [1., 0., 1.]] print neigh.kneighbors(X, return_distance=False) #結(jié)果為[[1] [2]] #離(0,1,0)最近的點(diǎn)是訓(xùn)練集中的第1個(gè)數(shù)據(jù)(從第0個(gè)開始數(shù)數(shù)) #離(1,0,1)最近的點(diǎn)是訓(xùn)練集中的第2個(gè)數(shù)據(jù)(從第0個(gè)開始數(shù)數(shù))Example3.py
#-*- encoding:Utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8') import numpy as np samples = [[0., 0., 0.], [0., .5, 0.], [1., 1., .5]] from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(radius=2.6) neigh.fit(samples) rng = neigh.radius_neighbors([[1., 1., 1.]])print rng print"rng[0][0]=",rng[0][0]#運(yùn)行結(jié)果是[1.5 0.5],因?yàn)?#xff08;0-1)2+(0.5-1)2+(0-1)2=2.25,所以開放后是1.5 print"rng[0][0]=",rng[1][0]#運(yùn)行結(jié)果是[1,2]表示,離(1,1,1)距離在1.5以內(nèi)的兩個(gè)點(diǎn),分別是第1個(gè)點(diǎn)和第2個(gè)點(diǎn)print(np.asarray(rng[0][0])) print(np.asarray(rng[1][0])) # The first array returned contains the distances to all points which are closer than 1.6, # while the second array returned contains their indices. # In general, multiple points can be queried at the same time.import numpy print"-------------------下面單獨(dú)計(jì)算歐氏距離--------------------------------------" def calEuclideanDistance(vec1,vec2): print"vec1-vec2=",vec1-vec2print"numpy.square(vec1-vec2)=",sum(numpy.square(vec1 - vec2))dist = numpy.sqrt(numpy.sum(numpy.square(vec1 - vec2))) return dist v1 = [0,0.5,0] v2 = [1,1,1] v1 = numpy.array(v1) v2 = numpy.array(v2) print calEuclideanDistance(v1,v2)Example4.py
#-*- encoding:utf-8 -*- import sys reload(sys) sys.setdefaultencoding('utf-8')print"-------------------------------------------" X = [[0], [3], [1]]#這里表示3個(gè)點(diǎn) print"X=",X from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(n_neighbors=2,radius=1) neigh.fit(X) print"-------------------------------------------" A = neigh.kneighbors_graph(X) print A.toarray()#返回的這個(gè)矩陣的意思就是,如果是屬于前k個(gè)最近點(diǎn),那么就在矩陣上面標(biāo)記1,否則標(biāo)記0 # 也即是說,矩陣的每行必定會(huì)標(biāo)記出k個(gè)非零值 # [[1. 0. 1.] # [0. 1. 1.] # [1. 0. 1.]]print"-------------------------------------------" A = neigh.kneighbors_graph(X,mode='distance')#當(dāng)取mode為distance時(shí),會(huì)對(duì)上面的bool矩陣標(biāo)記上具體的距離 print A.toarray() # [[0. 0. 1.] # [0. 0. 2.] # [1. 0. 0.]] # 這個(gè)運(yùn)行結(jié)果是對(duì)上一個(gè)運(yùn)行結(jié)果的更進(jìn)一步的修正。 #可以看做是上一個(gè)運(yùn)行結(jié)果的K=1時(shí)候的結(jié)果,并且把結(jié)果中的“標(biāo)記1”改為具體的兩點(diǎn)間的距離數(shù)值#參考鏈接: #http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.radius_neighbors_graph #可能需要圖論的知識(shí)再加一個(gè)predict_probe函數(shù)的用法:
#-*- encoding:utf-8 -*- import sys reload(sys) #這個(gè)代碼是監(jiān)督學(xué)習(xí) sys.setdefaultencoding('utf-8') X = [[0], [1], [2], [3]] y = [0, 0, 1, 1] from sklearn.neighbors import KNeighborsClassifier neigh = KNeighborsClassifier(n_neighbors=3) neigh.fit(X, y) print(neigh.predict([[1.1]])) print(neigh.predict_proba([[0]]))#返回概率估計(jì):0被預(yù)測為標(biāo)簽0的概率,0被預(yù)測為標(biāo)簽1的概率 print(neigh.predict_proba([[3]]))#返回概率估計(jì):3被預(yù)測為標(biāo)簽0的概率,3被預(yù)測為標(biāo)簽1的概率注意:
為了簡單起見,上面的例子中,都是一維數(shù)據(jù)。
總結(jié)
以上是生活随笔為你收集整理的sklearn的KNN文档中实例代码的注释的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: sklearn中knn的各种用法总结
- 下一篇: 用scikit-learn进行LDA降维