當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

44 R关联分析——Apriori算法

發布時間：2023/12/31 编程问答 29 豆豆

生活随笔收集整理的這篇文章主要介紹了 44 R关联分析——Apriori算法小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

install.packages("gridBase") install.packages("arules") install.packages("arulesViz") install.packages("graphlayouts")library(gridBase) library(arules) library(arulesViz)#現有購買記錄 tr_list=list(c("Bread", "Milk"),c("Bread", "Diaper", "Beer", "Eggs"),c("Milk","Diaper", "Beer", "Coke"),c("Bread", "Milk","Diaper","Beer"),c("Bread", "Milk", "Diaper","Coke"))#命名各個購物車 names(tr_list)=paste("tr",c(1:length(tr_list)),sep="")tr_list#調用as函數，將鏈表轉化為事務類型 trans=as(tr_list,"transactions") trans summary(trans)#####展示事務 ##使用LIST函數LIST(trans)###查看數據 inspect(trans)#調用image函數可視化檢查事務數據 image(trans)trans@data trans@data@i trans@data@p trans@itemInfo#data.frame model trans@itemInfo$labels#factor vector model trans@itemsetInfo#data.frame model trans@itemsetInfo$transactionID # shopping cart transactionID#查看每個籃子的商品個數 size(trans)###根據事務大小進行篩選filter_trans=trans[size(trans)>=3] inspect(filter_trans)###將矩陣格式的數據轉化為事務類型tr1=c(0,1,rep(0,3),1) tr2=c(1,1,0,1,1,0) tr3=c(1,0,1,1,0,1) tr4=c(1,1,0,1,0,1) tr5=c(0,1,1,1,0,1)tr_matrix=matrix(cbind(tr1,tr2,tr3,tr4,tr5),byrow=T,nrow=5)dimnames(tr_matrix)=list(paste("tr",c(1:nrow(tr_matrix)),sep=""),c("Bear","Bread","Coke","Diaper","Eggs","Milk")) tr_matrix trans2=as(tr_matrix,"transactions") inspect(trans2) trans2@data#將數據框類型的數據轉換成事務類型 trID=c(rep(1,2),rep(2:5,each=4)) item=c("Bread", "Milk", "Bread", "Diaper", "Beer", "Eggs", "Milk","Diaper", "Beer", "Coke", "Bread", "Milk","Diaper","Beer", "Bread", "Milk", "Diaper","Coke") tran=cbind(trID,item) tran trans3=as(tran,"transactionss")#錯誤做法 Wrong pratice#True tr_df=as.data.frame(tran) tr_df=as.data.frame(tran)tr_split=split(tr_df[,"item"],tr_df[,"trID"]) trans3=as(tr_split,"transactions")#也可以這樣做 tr_dataf=data.frame(trID=c(rep(1,2),rep(2:5,each=4)),item=c("Bread", "Milk","Bread", "Diaper", "Beer", "Eggs","Milk","Diaper", "Beer", "Coke","Bread", "Milk","Diaper","Beer","Bread", "Milk", "Diaper","Coke")) tr_dataf trans4=as(split(tr_dataf[,"item"],tr_dataf[,"trID"]),"transactions") trans4 inspect(trans4)as.data.frame(tr_dataf[,1])library(openxlsx) #loading data readt=read.xlsx("shoppingcart.xlsx") readt#view data type apply(readt,2,class)#Transform the data into a form that the apriori algorithm can process trans5=as(split(readt[,"ProID"],readt[,"UserId"]),"transactions") inspect(trans5)read2=read.transactions("shoppingcart2.csv",format="single",sep=",",cols=c("UserId","ProId"),header=T)inspect(read2)#When the examples don't have user ID ,you must use foemat="basket" #read2=read.transactions("shoppingcart2.csv",format="basket",sep=",",cols=c("UserId","ProId"),skip=1)############################################# remove spaces # splitblank=function(x){ # for (i in 1:length(read2[,2])) { # # a=unlist(strsplit(read2[,2][i],split=" ")) # b=which(a!="") # c=paste(a[b],collapse = "") # read2[,2][i]=c # } # read2[,2][2] # # } # # d=splitblank(read2[,2]) # read2 # e=vector() # for (i in 1:length(read2[,2])){ # a=print(read2[,2][i]) # e=append(e,a) # } # e # read2[,2]=e # read2 # colnames(read2)[2]=e#View the support of each itemitemFrequency(trans,type="relative") itemFrequency(trans,type="absolute")#Plot frequency/support bar chart,displays the set of related items for the item itemFrequencyPlot(trans,col=c("orange","yellow","brown","green","tomato","violet"))#Use Eclat() mining frequency itemsets freqsets=eclat(trans) inspect(freqsets)#you also can add parameter restrictionsfrequentsets=eclat(trans,parameter = list(support=0.25,maxlen=10)) summary(frequentsets) inspect(sort(frequentsets,by="support"))###generate association rules-------------------------------------------------------------------# n items,it has up to 2^n -1 items，up to 3^n-2^(n+1) rulesrules=apriori(trans,parameter =list(support=0.25,confidence=0.5,target="rules")) inspect(rules) summary(rules)#Sort rules according to confidence,and view some of the previous rulesrules.sorted=sort(rules,by="confidence",decreasing = T) rules.sorted inspect(rules.sorted)#Judge whether the rule is redundanceredundant=is.redundant(rules.sorted)redundant#Find redundant rules rules.redundant=rules.sorted[redundant] inspect(rules.redundant)#drop redundant rulesrules.pruned=rules.sorted[!redundant] inspect(rules.pruned)#relation diagramsortrules=sort(rules,by="lift") inspect(sortrules)### draw a picturelibrary(arulesViz) plot(rules.pruned,measure="confidence",method="graph",control=list(type="items"),shading="lift")#interactive plot(rules,measure = c("support","lift"),shading = "confidence",interactive = T)#View one rule Milk_rule=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="rhs",lhs="Milk")) inspect(Milk_rule)plot(Milk_rule,by="lift",main="Milk_rule by lift",method="graph",control =list(type="items"))#Draw a balloon diagram of association rules,more than two rules can be drawnplot(c(rules.pruned,Milk_rule),main="Milk_rules by grouped")plot(c(rules.pruned,Milk_rule),method="grouped",main="Milk_rules by grouped")# Using Apriori algorithm to generate the right milk ruleRhs_Milk=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="lhs",rhs="Milk"))inspect(Rhs_Milk)redundant1=is.redundant(Rhs_Milk) Rhr=Rhs_Milk[!redundant1] inspect(Rhr)install.packages("wordcloud2") library(wordcloud2)

總結

以上是生活随笔為你收集整理的44 R关联分析——Apriori算法的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：高德地图WebAPI：行驶距离测量
下一篇： 2022.11.6 第二十九次周报