K-Means聚类算法Java实现
生活随笔
收集整理的這篇文章主要介紹了
K-Means聚类算法Java实现
小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
K-Means聚類算法
目的:將數(shù)據(jù)分為K組
基本思路
初始聚類中心的選擇會(huì)對(duì)最終求出的分類結(jié)果有一定的影響,所以初始點(diǎn)的選取盡量離散,間隔大
K-Means算法對(duì)大數(shù)據(jù)挖掘有很高的效率,它的時(shí)間復(fù)雜度為O(NKT),其中N表示數(shù)據(jù)集中的對(duì)象個(gè)數(shù),K表示聚類個(gè)數(shù),T表示迭代次數(shù)
例題
? 將以下數(shù)據(jù)分為三類 consumption.csv
Id,R,F,M 1,27,6,232.61 2,3,5,1507.11 3,4,16,817.62 4,3,11,232.81 5,14,7,1913.05 6,19,6,220.07 7,5,2,615.83 8,26,2,1059.66 9,21,9,304.82 10,2,21,1227.96 11,15,2,521.02 12,26,3,438.22 13,17,11,1744.55 14,30,16,1957.44 15,5,7,1713.79 16,4,21,1768.11 17,93,2,1016.34 18,16,3,950.36 19,4,1,754.93 20,27,1,294.23 21,5,1,195.3 22,17,3,1845.34 23,12,13,1434.29 24,21,3,275.85 25,18,5,449.76 26,30,21,1628.68 27,4,2,1795.41 28,7,12,1786.24 29,18,1,679.44 30,60,7,5318.81 31,4,22,873.68 32,16,1,654.69 33,3,2,230.37 34,14,11,1165.68 35,13,21,1276.31 36,10,16,334.21 37,5,5,759.19 38,1,1,1383.39 39,24,8,3280.77 40,19,4,154.65 41,9,1,501.38 42,1,24,1721.93 43,14,1,107.18 44,10,1,973.36 45,10,17,764.55 46,7,6,1251.4 47,23,11,923.28 48,15,1,1011.18 49,1,15,1847.61 50,3,21,1669.46 51,10,3,1758.05 52,30,8,1865.99 53,28,8,1791.44 54,4,15,874.6 55,24,5,557.17 56,16,2,1025.35 57,7,2,1261.47 58,66,4,2920.81 59,4,2,1266.02 60,21,11,626.37 61,6,4,1105.63 62,26,21,1465.58 63,8,21,630.74 64,26,2,1546.45 65,14,11,1577.91 66,17,6,170.16 67,20,5,1558.75 68,5,5,1272.06 69,26,3,111.02 70,15,7,1578.37 71,26,24,720.26 72,25,16,873.22 73,4,7,935.19 74,23,11,723.67 75,15,9,1833.01 76,6,3,681.26 77,78,11,1461.63 78,15,17,560.57 79,9,18,1761.19 80,8,7,1707.25 81,28,2,227.14 82,22,3,223.57 83,8,6,940.46 84,23,6,256.3 85,5,1,312.44 86,15,14,929.52 87,27,15,1296.66 88,22,11,591.62 89,2,2,755.72 90,18,17,1424.07 91,61,8,940.93 92,3,7,414.24 93,1,14,576.56 94,12,22,1037.14 95,26,5,1200.17 96,1,3,1727.36 97,13,16,503.71 98,19,7,703.36 99,12,17,1583.05 100,3,18,602.9 101,5,1,798.41 102,25,7,1202.09 103,85,4,1605.36 104,28,21,1222.34 105,25,19,593.17 106,8,6,94.75 107,14,1,89.7 108,21,15,1061.56 109,29,15,978.85 110,14,3,155.9 111,20,5,938.15 112,3,24,1477.97 113,10,6,1976.23 114,8,5,181.17 115,17,4,499.65 116,49,1,76.22 117,13,11,267.1 118,23,1,137.62 119,65,5,1383.47 120,20,22,1311.2 121,22,13,496.61 122,21,6,1921.8 123,14,11,304.1 124,26,1,468.09 125,27,9,432.67 126,30,1,368.35 127,11,4,759.69 128,26,3,1110.81 129,28,1,53 130,39,11,1314.21 131,11,6,1895.95 132,23,1,417.23 133,3,2,679.58 134,5,1,533.97 135,24,8,1134.64 136,25,6,825.39 137,10,6,165.39 138,29,9,1234.64 139,80,11,1829.32 140,23,1,89 141,4,2,1557.88 142,3,8,1328.01 143,15,7,304.65 144,17,23,1505.55 145,16,7,711.1 146,16,1,539.76 147,5,1,65.83 148,16,3,776.21 149,22,18,1820.61 150,19,4,1997 151,4,22,1846.69 152,23,7,1252.41 153,7,13,987.17 154,3,6,1130.03 155,18,1,148.32 156,28,1,135.57 157,6,2,1641.79 158,7,2,242.83 159,21,8,1803.02 160,12,12,1557.95 161,25,4,1494.81 162,26,13,1280.06 163,28,1,160 164,22,9,440.12 165,14,1,746.95 166,12,2,351.09 167,6,2,556.91 168,7,3,957.83 169,16,16,1212.37 170,11,2,946.65 171,16,13,1442.68 172,5,12,1612.7 173,0,21,1281.68 174,9,13,1928.8 175,24,7,335.35 176,3,8,1589.35 177,20,11,797.72 178,17,1,793.47 179,13,16,569.47 180,10,3,149.5 181,17,21,515.38 182,8,4,187.76 183,20,7,1441.83 184,27,1,121.61 185,25,11,934.58 186,16,15,591.06 187,15,4,951.31 188,12,11,914 189,3,22,1058 190,9,2,1111.51 191,17,9,458.52 192,27,18,927.59 193,73,1,1370.25 194,17,1,946.53 195,10,1,1474.41 196,16,3,1661.03 197,0,9,1465.18 198,17,3,1813.45 199,5,7,772.54 200,4,1,172.82 201,14,4,552.37 202,12,8,946.28 203,26,2,651.99 204,6,9,857.79 205,7,4,1016.55 206,5,6,1766.44 207,25,3,908.53 208,28,2,403.75 209,25,4,1270.75 210,13,3,1157.92 211,13,1,497.09 212,2,1,216.78 213,23,16,1454.58 214,2,17,1027.58 215,24,12,722.09 216,15,7,282.19 217,11,4,106.96 218,18,1,999.75 219,24,14,1139.33 220,24,5,836.72 221,3,2,1678.54 222,3,17,1337.34 223,1,4,1335.77 224,11,2,810.2 225,29,11,943.9 226,51,12,5135.77 227,6,9,984.12 228,6,5,1413.55 229,1,6,381.95 230,6,14,788.22 231,29,1,80.8 232,21,1,611.13 233,24,4,1766.35 234,0,2,1516.76 235,9,6,1925.2 236,17,1,344.23 237,49,1,204.1 238,5,2,1257.59 239,7,3,1095.09 240,2,1,123.76 241,3,2,696.82 242,26,2,1487.35 243,19,3,1278.43 244,28,14,627.97 245,12,1,95 246,14,4,1827.01 247,10,6,754.05 248,19,2,922.93 249,12,12,257.4 250,1,14,676.34 251,3,19,984.32 252,27,32,1914.06 253,13,4,1953.81 254,1,4,768.02 255,61,13,1379.86 256,42,1,1054.24 257,21,11,298.34 258,17,5,841.04 259,8,9,1757.87 260,22,11,1010.7算法分析
? 采用map存儲(chǔ)數(shù)據(jù),key存id,value使用List存儲(chǔ)R、F、M,中心點(diǎn)可以使用三個(gè)List存儲(chǔ),最大迭代次數(shù)m由命令行輸入
算法代碼實(shí)現(xiàn)
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.*;public class Test {static String filePath = System.getProperty("user.dir")+"\\src\\sources\\consumption.csv";static Map<Integer,List<Float>> map = new HashMap<>();//總數(shù)據(jù)static Map<Integer,List<Float>> map1 = new HashMap<>();//第一類數(shù)據(jù)static Map<Integer,List<Float>> map2 = new HashMap<>();//第二類數(shù)據(jù)static Map<Integer,List<Float>> map3 = new HashMap<>();//第三類數(shù)據(jù)static List<Float> list1 = new ArrayList();//第一個(gè)中心static List<Float> list2 = new ArrayList();//第二個(gè)中心static List<Float> list3 = new ArrayList();//第三個(gè)中心//判斷是否是數(shù)字public static boolean isNumeric(String str){for (int i = str.length();--i>=0;){if (!Character.isDigit(str.charAt(i))){return false;}}return true;}//讀取數(shù)據(jù),存入mappublic static void ReadFile(){BufferedReader br = null;String line = "";String csvSplitBy = ",";try {br = new BufferedReader(new FileReader(filePath));while ((line = br.readLine()) != null) {// 分割點(diǎn)為List<String> post = Arrays.asList(line.split(csvSplitBy));if (isNumeric(post.get(0))) {int x = Integer.parseInt(post.get(0));List<Float> list = new ArrayList<>();list.add(Float.valueOf(post.get(1)));list.add(Float.valueOf(post.get(2)));list.add(Float.valueOf(post.get(3)));map.put(x,list);}}} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (br != null) {try {br.close();} catch (IOException e) {e.printStackTrace();}}}}//第一次,產(chǎn)生三個(gè)隨機(jī)點(diǎn)public static void RandPoint(){Random r = new Random();list1 = map.get((r.nextInt(942)));list2 = map.get((r.nextInt(942)));list3 = map.get((r.nextInt(942)));System.out.print(list1.toString());System.out.print(list2.toString());System.out.println(list3.toString());}//給定一個(gè)map的value,判斷他是哪個(gè)類,給數(shù)據(jù)分類public static void IsKM(List<Float> list,int index){float x1 = Math.abs(list1.get(0)-list.get(0))+Math.abs(list1.get(1)-list.get(1))+Math.abs(list1.get(2)-list.get(2));float x2 = Math.abs(list2.get(0)-list.get(0))+Math.abs(list2.get(1)-list.get(1))+Math.abs(list2.get(2)-list.get(2));float x3 = Math.abs(list3.get(0)-list.get(0))+Math.abs(list3.get(1)-list.get(1))+Math.abs(list3.get(2)-list.get(2));float min = (x1<x2)?x1:x2;min = (min<x3)?min:x3;if (min == x1){map1.put(index,list);//System.out.println(index + "屬于第1類,中心點(diǎn)為"+list1.toString());}if(min == x2){map2.put(index,list);//System.out.println(index + "屬于第2類,中心點(diǎn)為"+list2.toString());}if(min == x3){map3.put(index,list);//System.out.println(index + "屬于第3類,中心點(diǎn)為"+list3.toString());}}//計(jì)算map中數(shù)據(jù)與中心點(diǎn)的距離public static void KMeans(int m) {for (int i = 0;i<m;i++){map1.clear();map2.clear();map3.clear();for (Map.Entry<Integer,List<Float>> entry : map.entrySet()) {IsKM(entry.getValue(),entry.getKey());}NewPoint();System.out.print(list1.toString());System.out.print(list2.toString());System.out.println(list3.toString());}System.out.println("第一個(gè)中心點(diǎn)"+map1);System.out.println("第二個(gè)中心點(diǎn)"+map2);System.out.println("第三個(gè)中心點(diǎn)"+map3);}//計(jì)算三個(gè)類的新中心public static void NewPoint(){//重置中心點(diǎn)list1.clear();list2.clear();list3.clear();//一列數(shù)據(jù)的和float sum1 = 0;float sum2 = 0;float sum3 = 0;for (Map.Entry<Integer,List<Float>> entry : map1.entrySet()) {//System.out.println(entry.getValue());//map最后一個(gè)value為空,要進(jìn)行一波判斷if (entry.getValue().size()>0) {sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list1.add(sum1/map1.size());list1.add(sum2/map1.size());list1.add(sum3/map1.size());sum1=0;sum2=0;sum3=0;for (Map.Entry<Integer,List<Float>> entry : map2.entrySet()) {if (entry.getValue().size()>0){sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list2.add(sum1/map2.size());list2.add(sum2/map2.size());list2.add(sum3/map2.size());sum1=0;sum2=0;sum3=0;for (Map.Entry<Integer,List<Float>> entry : map3.entrySet()) {if (entry.getValue().size()>0){sum1 = sum1 + entry.getValue().get(0);sum2 = sum2 + entry.getValue().get(1);sum3 = sum3 + entry.getValue().get(2);}}list3.add(sum1/map3.size());list3.add(sum2/map3.size());list3.add(sum3/map3.size());}public static void main(String[] args) {System.out.print("請(qǐng)輸入迭代次數(shù):");Scanner input = new Scanner(System.in);int m = input.nextInt();//讀取數(shù)據(jù)ReadFile();//生成第一次的中心點(diǎn)System.out.print("第一次隨機(jī)生成中心點(diǎn):");RandPoint();//分類,求中心,再分類KMeans(m);} }總結(jié)
以上是生活随笔為你收集整理的K-Means聚类算法Java实现的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: NASA 机智号火星直升机完成第 64
- 下一篇: java美元兑换,(Java实现) 美元