|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 糖逗 于 2020-11-18 15:35 编辑
参考书籍:《机器学习实战》
- import numpy as np
- def loadDataSet():
- return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
- def createC1(dataSet):
- C1 = []
- for transaction in dataSet:
- for item in transaction:#遍历list中的每个list中的元素
- if not [item] in C1:#为了使用frozenset对每个元素映射,用[item]
- C1.append([item])
- C1.sort()#从小到大排序
- #https://www.runoob.com/python/python-func-frozenset.html
- return list(map(frozenset, C1))#之后要将集合作为字典键使用,frozenset可以实现而set不行
- def scanD(D, Ck, minSupport):
- ssCnt = {}
- for tid in D:
- for can in Ck:
- #https://www.runoob.com/python3/ref-set-issubset.html
- if can.issubset(tid):
- if can not in ssCnt:
- ssCnt[can] = 1
- else:
- ssCnt[can] += 1
- numItems = float(len(D))
- retList = []
- supportData = {}
- for key in ssCnt:#遍历每个键
- support = ssCnt[key]/numItems
- if support >= minSupport:
- retList.insert(0, key)#每次在最前面加入
- supportData[key] = support
- return retList, supportData#返回满足支持度阈值的元素和所有元素支持度
- def aprioriGen(Lk, k):
- retList = []
- lenLk = len(Lk)
- for i in range(lenLk):
- for j in range(i + 1, lenLk):
- L1 = list(Lk[i])[: k - 2]#前k-2个相同时,将两个集合合并,这样做会漏项吧??
- L2 = list(Lk[j])[: k - 2]
- L1.sort()
- L2.sort()
- if L1 == L2:
- retList.append(Lk[i] | Lk[j])#集合合并
- return retList
- def apriori(dataSet, minSupport = 0.5):
- C1 = createC1(dataSet)
- D = list(map(set, dataSet))
- L1, supportData = scanD(D, C1, minSupport)
- L = [L1]
- k = 2#作为L中的index,k生成的组合数
- while (len(L[k - 2]) > 0):
- Ck = aprioriGen(L[k - 2], k)
- Lk, supK = scanD(D, Ck, minSupport)
- supportData.update(supK)
- L.append(Lk)
- k += 1
- return L, supportData
- def generateRules(L, supportData, minConf = 0.7):#生成关联规则
- bigRuleList = []#L已经经过了第一次minsupport筛选了,相当于剪枝
- for i in range(1, len(L)):
- for freqSet in L[i]:
- H1 = [frozenset([item]) for item in freqSet]
- if (i > 1):#元素超过两个以上的需要建立规则
- rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
- else:
- calcConf(freqSet, H1, supportData, bigRuleList, minConf)
- return bigRuleList
- def calcConf(freqSet, H, supportData, brl, minConf = 0.7):#可信度值计算,H是freqSet的子集
- prunedH = []
- for conseq in H:
- conf = supportData[freqSet]/supportData[freqSet-conseq]
- if conf >= minConf:
- print(freqSet-conseq,'-->',conseq,'conf:', conf)
- brl.append((freqSet-conseq, conseq, conf))
- prunedH.append(conseq)
- return prunedH
- def rulesFromConseq(freqSet, H, supportData, brl, minConf = 0.7):
- m = len(H[0])
- if (len(freqSet) > (m + 1)): #因为后文需要将集合合并成一个每组m+1的集合,所以m+1要小于freqSet
- Hmp1 = aprioriGen(H, m + 1)
- Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)
- if (len(Hmp1) > 1):#假设2->1,3 ; 3->1,2;则想进一步验证2,3 -> 1
- rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)
- return
- if __name__ == "__main__":
- dataSet = loadDataSet()
- L, suppData = apriori(dataSet, minSupport = 0.5)
- rules = generateRules(L, suppData, minConf = 0.7)
复制代码 |
评分
-
参与人数 1 | 荣誉 +5 |
鱼币 +5 |
贡献 +3 |
收起
理由
|
昨非
| + 5 |
+ 5 |
+ 3 |
无条件支持楼主! |
查看全部评分
|