- 1 def buildStump(dataArr,classLabels,D):
- 2 '''
- 3 建立一个单层决策树
- 4 输人为权重向量D,
- 5 返回具有最小错误率的单层决策树、最小的错误率以及估计的类别向量
- 6 '''
- 7dataMatrix = mat(dataArr); labelMat = mat(classLabels).T
- 8m,n = shape(dataMatrix)
- 9numSteps = 10.0; bestStump = {}; bestClasEst = mat(zeros((m,1)))
- 10minError = inf#
- 11 foriinrange(n):#对数据集中的每一个特征
- 12rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max();
- 13stepSize = (rangeMax-rangeMin)/numSteps
- 14 forjinrange(-1,int(numSteps)+1):#对每个步长
- 15 forinequalin['lt','gt']:#对每个不等号
- 16threshVal = (rangeMin + float(j) * stepSize)
- 17predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
- 18errArr = mat(ones((m,1)))
- 19errArr[predictedVals == labelMat] = 0
- 20weightedError = D.T*errArr#计算加权错误率
- 21 #print("split: dim %d, thresh %.2f, thresh ineqal: %s, the weighted error is %.3f" % (i, threshVal, inequal, weightedError))
- 22 #如果错误率低于minError,则将当前单层决策树设为最佳单层决策树
- 23 ifweightedError < minError:
- 24minError = weightedError
- 25bestClasEst = predictedVals.copy()
- 26bestStump['dim'] = i
- 27bestStump['thresh'] = threshVal
- 28bestStump['ineq'] = inequal
- 29 return bestStump,minError,bestClasEst
- 30
- 31 def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
- 32 '''
- 33 通过阈值比较对数据进行分类
- 34 '''
- 35retArray = ones((shape(dataMatrix)[0],1))
- 36 ifthreshIneq =='lt':
- 37retArray[dataMatrix[:,dimen] <= threshVal] = -1.038 else:
- 39retArray[dataMatrix[:,dimen] > threshVal] = -1.040 returnretArray
来源: