python3关联规则Apriori代码模版.pdfVIP

  • 0
  • 0
  • 约5.01千字
  • 约 10页
  • 2022-11-26 发布于上海
  • 举报
python3关联规则Apriori代码模版 #!/usr/bin/env python3 # -*- coding: utf-8 -*- from numpy import * def loadDataSet(): return [[a, c, e], [b, d], [b, c], [a, b, c, d], [a, b], [b, c], [a, b], [a, b, c, e], [a, b, c], [a, c, e]] def createC1(dataSet): C1 = [] for transaction in dataSet: for item in transaction: if not [item] in C1: C1.append([item]) C1.sort() # 映射为frozenset唯⼀性的,可使⽤其构造字典 return list(map(frozenset, C1)) # 从候选K项集到频繁K项集(⽀持度计算) def scanD(D, Ck, minSupport): ssCnt = {} for tid in D: for can in Ck: if can.issubset(tid): if not can in ssCnt: ssCnt[can] = 1 else: ssCnt[can] += 1 numItems = float(len(D)) retList = [] supportData = {} for key in ssCnt: support = ssCnt[key] / numItems if support = minSupport: retList.insert(0, key) supportData[key] = support return retList, supportData def calSupport(D, Ck, min_support): dict_sup = {} for i in D: for j in Ck: if j.issubset(i): if not j in dict_sup: dict_sup[j] = 1 else: dict_sup[j] += 1 sumCount = float(len(D)) supportData = {} relist = [] for i in dict_sup: temp_sup = dict_sup[i] / sumCount if temp_sup = min_support: relist.append(i) supportData[i] = temp_sup # 此处可设置返回全部的⽀持度数据(或者频繁项集的⽀持度数据)return relist, supportData # 改进剪枝算法 def aprioriGen(Lk, k): # 创建候选K项集 ##LK为频繁K项集 retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i + 1, lenLk): L1 = list(Lk[i])[:k - 2] L2 = list(Lk[j])[:k - 2] L1.sort() L2.sort() if L1 L2: # 前k-1项相等,则可相乘,这样可防⽌重复项出现 # 进⾏剪枝(a1为k项集中的⼀个元素,b为它的所有k-1项⼦集) a = Lk[i] | Lk[j] # a为frozenset()集合 a1 = list(a) b = [] # 遍历取出每⼀个元素,转换为set ,依次从a1中剔除该元素,并加⼊到b中 for q in range(len(a1)): t = [a1[q]] tt = frozenset(set(a1) - set(t)) b.append(tt) t = 0 for w in b: # 当b (即所有k-1项⼦集)都是Lk (频繁的)的⼦集,则保留,否则删除。 if w in Lk: t += 1 if t len(b): retList.append(b[0] | b[1]) return retList def apriori(dataSet, minSupport=0.2): C1 = createC1(dataSet) D = list(map(set, dataSet)) # 使⽤list()转换为列表 L1, support

文档评论(0)

1亿VIP精品文档

相关文档