44 R关联分析——Apriori算法
生活随笔
收集整理的這篇文章主要介紹了
44 R关联分析——Apriori算法
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
install.packages("gridBase")
install.packages("arules")
install.packages("arulesViz")
install.packages("graphlayouts")library(gridBase)
library(arules)
library(arulesViz)#現有購買記錄
tr_list=list(c("Bread", "Milk"),c("Bread", "Diaper", "Beer", "Eggs"),c("Milk","Diaper", "Beer", "Coke"),c("Bread", "Milk","Diaper","Beer"),c("Bread", "Milk", "Diaper","Coke"))#命名各個購物車
names(tr_list)=paste("tr",c(1:length(tr_list)),sep="")tr_list#調用as函數,將鏈表轉化為事務類型
trans=as(tr_list,"transactions")
trans
summary(trans)#####展示事務
##使用LIST函數LIST(trans)###查看數據
inspect(trans)#調用image函數可視化檢查事務數據
image(trans)trans@data
trans@data@i
trans@data@p
trans@itemInfo#data.frame model
trans@itemInfo$labels#factor vector model
trans@itemsetInfo#data.frame model
trans@itemsetInfo$transactionID # shopping cart transactionID#查看每個籃子的商品個數
size(trans)###根據事務大小進行篩選filter_trans=trans[size(trans)>=3]
inspect(filter_trans)###將矩陣格式的數據轉化為事務類型tr1=c(0,1,rep(0,3),1)
tr2=c(1,1,0,1,1,0)
tr3=c(1,0,1,1,0,1)
tr4=c(1,1,0,1,0,1)
tr5=c(0,1,1,1,0,1)tr_matrix=matrix(cbind(tr1,tr2,tr3,tr4,tr5),byrow=T,nrow=5)dimnames(tr_matrix)=list(paste("tr",c(1:nrow(tr_matrix)),sep=""),c("Bear","Bread","Coke","Diaper","Eggs","Milk"))
tr_matrix
trans2=as(tr_matrix,"transactions")
inspect(trans2)
trans2@data#將數據框類型的數據轉換成事務類型
trID=c(rep(1,2),rep(2:5,each=4))
item=c("Bread", "Milk",
"Bread", "Diaper", "Beer", "Eggs",
"Milk","Diaper", "Beer", "Coke",
"Bread", "Milk","Diaper","Beer",
"Bread", "Milk", "Diaper","Coke")
tran=cbind(trID,item)
tran
trans3=as(tran,"transactionss")#錯誤做法 Wrong pratice#True
tr_df=as.data.frame(tran)
tr_df=as.data.frame(tran)tr_split=split(tr_df[,"item"],tr_df[,"trID"])
trans3=as(tr_split,"transactions")#也可以這樣做
tr_dataf=data.frame(trID=c(rep(1,2),rep(2:5,each=4)),item=c("Bread", "Milk","Bread", "Diaper", "Beer", "Eggs","Milk","Diaper", "Beer", "Coke","Bread", "Milk","Diaper","Beer","Bread", "Milk", "Diaper","Coke"))
tr_dataf
trans4=as(split(tr_dataf[,"item"],tr_dataf[,"trID"]),"transactions")
trans4
inspect(trans4)as.data.frame(tr_dataf[,1])library(openxlsx)
#loading data
readt=read.xlsx("shoppingcart.xlsx")
readt#view data type
apply(readt,2,class)#Transform the data into a form that the apriori algorithm can process
trans5=as(split(readt[,"ProID"],readt[,"UserId"]),"transactions")
inspect(trans5)read2=read.transactions("shoppingcart2.csv",format="single",sep=",",cols=c("UserId","ProId"),header=T)inspect(read2)#When the examples don't have user ID ,you must use foemat="basket"
#read2=read.transactions("shoppingcart2.csv",format="basket",sep=",",cols=c("UserId","ProId"),skip=1)############################################# remove spaces
# splitblank=function(x){
# for (i in 1:length(read2[,2])) {
#
# a=unlist(strsplit(read2[,2][i],split=" "))
# b=which(a!="")
# c=paste(a[b],collapse = "")
# read2[,2][i]=c
# }
# read2[,2][2]
#
# }
#
# d=splitblank(read2[,2])
# read2
# e=vector()
# for (i in 1:length(read2[,2])){
# a=print(read2[,2][i])
# e=append(e,a)
# }
# e
# read2[,2]=e
# read2
# colnames(read2)[2]=e#View the support of each itemitemFrequency(trans,type="relative")
itemFrequency(trans,type="absolute")#Plot frequency/support bar chart,displays the set of related items for the item
itemFrequencyPlot(trans,col=c("orange","yellow","brown","green","tomato","violet"))#Use Eclat() mining frequency itemsets
freqsets=eclat(trans)
inspect(freqsets)#you also can add parameter restrictionsfrequentsets=eclat(trans,parameter = list(support=0.25,maxlen=10))
summary(frequentsets)
inspect(sort(frequentsets,by="support"))###generate association rules-------------------------------------------------------------------# n items,it has up to 2^n -1 items,up to 3^n-2^(n+1) rulesrules=apriori(trans,parameter =list(support=0.25,confidence=0.5,target="rules"))
inspect(rules)
summary(rules)#Sort rules according to confidence,and view some of the previous rulesrules.sorted=sort(rules,by="confidence",decreasing = T)
rules.sorted
inspect(rules.sorted)#Judge whether the rule is redundanceredundant=is.redundant(rules.sorted)redundant#Find redundant rules
rules.redundant=rules.sorted[redundant]
inspect(rules.redundant)#drop redundant rulesrules.pruned=rules.sorted[!redundant]
inspect(rules.pruned)#relation diagramsortrules=sort(rules,by="lift")
inspect(sortrules)### draw a picturelibrary(arulesViz)
plot(rules.pruned,measure="confidence",method="graph",control=list(type="items"),shading="lift")#interactive
plot(rules,measure = c("support","lift"),shading = "confidence",interactive = T)#View one rule
Milk_rule=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="rhs",lhs="Milk"))
inspect(Milk_rule)plot(Milk_rule,by="lift",main="Milk_rule by lift",method="graph",control =list(type="items"))#Draw a balloon diagram of association rules,more than two rules can be drawnplot(c(rules.pruned,Milk_rule),main="Milk_rules by grouped")plot(c(rules.pruned,Milk_rule),method="grouped",main="Milk_rules by grouped")# Using Apriori algorithm to generate the right milk ruleRhs_Milk=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="lhs",rhs="Milk"))inspect(Rhs_Milk)redundant1=is.redundant(Rhs_Milk)
Rhr=Rhs_Milk[!redundant1]
inspect(Rhr)install.packages("wordcloud2")
library(wordcloud2)
總結
以上是生活随笔為你收集整理的44 R关联分析——Apriori算法的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 高德地图WebAPI:行驶距离测量
- 下一篇: 2022.11.6 第二十九次周报