OoDickoO 发表于 2018-3-29 11:07:30

决策树运行错误

代码如下:
from numpy import *
import math
import copy
import pickle as pp
class ID3DTree(object):
        def __init__(self):
                self.tree = {}
                self.dataSet = []
                self.labels = []

def loadDataSet(self,path,labels):
        recordlist = []
        fp = open(path,"rb")
        content = fb.read()
        fp.close()
        rowlist = content.splitlines()
        recordlist =
        self.dataSet = recordlist
        self.labels = labels

def train(self):
        labels = copy.deepcopy(self.labels)
        self.tree = self.buildTree(self.dataSet,labels)

def buildTree(self,dataSet,labels):
        catelist = for data in dataSet]
        if catelist.count(catelist) == len(catelist):
                return catelist
        if len(dataSet) == 1:
                return self.maxCate(catelist)
       
        besfFeat = self.getBestFeat(dataSet)
        bestFeatLabel = labels
        tree = {bestFeatLabel:{}}
        del(labels)
        uniqueVals = set( for data in dataSet])
        for value in uniqueVals:
                subLabels = labels[:]
                splitDataSet = self.splitDataSet(dataSet,besfFeat,value)
                subTree = self.buildTree(splitDataSet,subLabels)
                tree = subTree
        return tree
       
def maxCate(self,catelist):
        items = dict([(catelist.count(i),i) for i in catelist])
        return items
       
def getBestFeat(self,dataSet):
        numFeatures = len(dataSet) - 1
        baseEntropy = self.computeEntropy(dataSet)
        bestInfoGain = 0.0;
        besfFeature = -1
        for i in xrange(numFeatures):
                uniqueVals = set( for data in dataSet])
                newEntropy = 0.0
                for value in uniqueVals:
                        subDataSet = self.splitDataSet(dataSet,i,value)
                        prob = len(subDataSet)/float(len(dataSet))
                        newEntropy += prob * self.computeEntropy(subDataSet)
                infoGain = baseEntropy - newEntropy
                if (infoGain > bestInfoGain):
                        bestInfoGain = infoGain
                        besfFeature = id
        return besfFeature
       
def computeEntropy(self,dataSet):
        datalen = float(len(dataSet))
        catelist = for data in dataSet]
        items = dict([(i,catelist.count(i)) for i in catelist])
        infoEntropy = 0.0
        for key in items:
                prob = float(items)/datalen
                infoEntropy -= prob * math.log(prob,2)
        return infoEntropy

def splitDataSet(self,dataSet,axis,value):
        rtnList = []
        for featVec in dataSet:
                if featVec == value:
                        rFeatVec = featVec[:axis]
                        rFeatVec.extend(featVec)
                        rtnList.append(rFeatVec)
        return rtnList

BngThea 发表于 2018-3-29 11:40:32

难道是缩进问题?从你贴出的缩进来看

OoDickoO 发表于 2018-3-29 14:47:56

BngThea 发表于 2018-3-29 11:40
难道是缩进问题?从你贴出的缩进来看

缩进没问题,要是有问题早就报错了

BngThea 发表于 2018-3-29 15:41:59

OoDickoO 发表于 2018-3-29 14:47
缩进没问题,要是有问题早就报错了

提示的信息说类中没有这个函数

OoDickoO 发表于 2018-3-29 15:44:06

BngThea 发表于 2018-3-29 15:41
提示的信息说类中没有这个函数

有这个函数,提示是说我这个ID3DTree调用不了loadDataSet这个函数
页: [1]
查看完整版本: 决策树运行错误