|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- # -*- coding: utf-8 -*-
- from numpy import *
- import time
- import matplotlib.pyplot as plt
- import operator
- #核函数(点积函数)
- def calcKernelValue(x1,x2):
- value=0
- for i in range(len(x1)):
- value=value+x1[i]*x2[i]
- return float(value)
- #计算核函数矩阵,存放K<x1,x2>的值
- def calcKernelMatrix(dataSetIn):
- kernelMatrix=zeros((len(dataSetIn),len(dataSetIn)))
- for i in range(len(dataSetIn)):
- for j in range(len(dataSetIn)):
- kernelMatrix[i][j]=calcKernelValue(dataSetIn[i],dataSetIn[j])
- return kernelMatrix
- class SVM:
- def __init__(self,dataSetIn,labelsIn,maxiter,C,error_admit):
- self.X=dataSetIn
- self.Y=labelsIn
- self.m=len(labelsIn)#样本数
- self.alphas = [0]*self.m#阿尔法参数数组
- self.b=0
- self.maxiter=maxiter#最大迭代次数
- self.C=C#惩罚因子
- self.error_admit=error_admit#容错值
- self.kernelValue = calcKernelMatrix(dataSetIn)#核函数矩阵
- self.fx = [0]*self.m
- self.E = [0]*self.m
- #计算g(xi)=∑[k从1到N]αk*yk*K(xi,xk)+b
- def calcfx(svm):
- for i in range(svm.m):
- svm.fx[i]=sum(map(operator.mul,map(operator.mul,svm.alphas,svm.Y),svm.kernelValue[i]))+svm.b
- #计算E(xi)
- def calcE(svm):
- for k in range(svm.m):
- svm.E[k]=svm.fx[k]-svm.Y[k]
- #通过比较|Ei-Ek|,获取第二个阿尔法参数下标j
- def findSecondAlphas(svm,i):
- maxEdiff=-99999
- j=-1
- for k in range(svm.m):
- if i!=k and abs(svm.E[i]-svm.E[k])>maxEdiff:
- maxEdiff=abs(svm.E[i]-svm.E[k])
- j=k
- j=int(random.uniform(0, svm.m))
- #这里如果用<span style="font-family: Arial, Helvetica, sans-serif;">比较|Ei-Ek|的方法获取第二个拉格朗日乘子的话,会迭代进入死循环,所以我改为用随机获取第二个乘子,则结果好很多.最终的支持向量也分布在分割面附近</span>
- while i==j:
- j=int(random.uniform(0, svm.m))
- return j
- #构建SVM分类器
- def structure_svm(dataSet,labels,maxiter=1000,C=0.6,error_admit=0):
- svm=SVM(dataSet,labels,maxiter,C,error_admit)
- continue_iter=True
- iter_num=1
- while iter_num<svm.maxiter and continue_iter:
- continue_iter=False
- iter_num+=1
- #更新fx(x)和E(x)
- calcfx(svm)
- calcE(svm)
- #KKT条件:
- #当 αi=0 <=> yi*Ei>0
- #0<αi<C <=> yi*Ei=0
- #αi=C <=> yi*Ei<0
- #所以
- #如果yi*Ei < 0并且αi < C 则违反了KKT条件
- #因为原本yi*Ei < 0 应该对应的是αi = C
- #同理,yi*Ei > 0并且αi > 0则违反了KKT条件
- #因为原本yi*Ei > 0对应的应该是αi =0
- i=-1
- for k in range(svm.m):
- yi_Ei=svm.Y[k]*svm.E[k]
- #先遍历间隔边界上的支持向量点,即0<αi<C的点,也叫做非边界数据样本
- if svm.alphas[k]>0 and svm.alphas[k]<C and yi_Ei!=0 :
- i=k
- continue_iter=True
- break
- #在遍历整个样本集,查找违反KKT条件的样本
- if not continue_iter:
- for k in range(svm.m):
- yi_Ei=svm.Y[k]*svm.E[k]
- if (yi_Ei<-error_admit and svm.alphas[k]<C) or (yi_Ei>error_admit and svm.alphas[k]>0) or (yi_Ei==0 and (svm.alphas[k]==0 or svm.alphas[k]==C)):
- i=k
- continue_iter=True
- break
-
-
- if continue_iter and i>=0:
- j=findSecondAlphas(svm,i)
- #确定边界
- #如果yi!=yj 则:L=max(0,αj-αi),H=min(C,C+αj-αi)
- #如果yi=yj 则:L=max(0,αi+αj-C),H=min(C,αi+αj)
- #αj=αj-yi(Ei-Ej)/η η=2K<xi,xj>-K<xi,xi>-K<xj,xj>
- #αj= H if αj>H
- # = αj if L<=αj<=H
- # = L if αj<L
- #αi=αi+yiyj(αj'old-αj)
- L,H=0,0
- if svm.Y[i]!=svm.Y[j]:
- L=max(0,svm.alphas[j]-svm.alphas[i])
- H=min(C,C+svm.alphas[j]-svm.alphas[i])
- else:
- L=max(0,svm.alphas[j]+svm.alphas[i]-C)
- H=min(C,svm.alphas[j]+svm.alphas[i])
-
- eta=2*svm.kernelValue[i,j]-svm.kernelValue[i,i]-svm.kernelValue[j,j]
- #更新alpha_j
- #print(svm.E[i],svm.E[j],svm.E[i]-svm.E[j],eta)
- alphas_j_new=svm.alphas[j]-svm.Y[j]*(svm.E[i]-svm.E[j])/eta
- if alphas_j_new>H:
- alphas_j_new=H
- elif alphas_j_new<L:
- alphas_j_new=L
- else:
- alphas_j_new=alphas_j_new
- #更新alpha_i
- #print(svm.alphas[j],alphas_j_new,svm.alphas[j]-alphas_j_new)
- alphas_i_new=svm.alphas[i]+svm.Y[i]*svm.Y[j]*(svm.alphas[j]-alphas_j_new)
- #更新b值
- #b1=b-Ei-yi*(αi-αi'old)K<xi,xi>-yj*(αj-αj'old)K<xi,xj>
- #b2=b=Ej-yi*(αi-αi'old)K<xi,xj>-yj*(αj-αj'old)K<xj,xj>
- #如果0<αi<C和0<αi<C,那么b1=b2
- #如果αi=0或者αi=C,同时αj=0或者αj=C,则取中间值b=(b1+b2)/2
- #总之:
- #b= b1 if 0<αi<C
- # = b2 if 0<αj<C
- # = (b1+b2)/2 otherwise
- b1=svm.b-svm.E[i]-svm.Y[i]*(alphas_i_new-svm.alphas[i])*svm.kernelValue[i,i]-svm.Y[j]*(alphas_j_new-svm.alphas[j])*svm.kernelValue[i,j]
- b2=svm.b-svm.E[j]-svm.Y[i]*(alphas_i_new-svm.alphas[i])*svm.kernelValue[i,j]-svm.Y[j]*(alphas_j_new-svm.alphas[j])*svm.kernelValue[j,j]
- if alphas_i_new>0 and alphas_i_new<C:
- b=b1
- elif alphas_j_new>0 and alphas_j_new<C:
- b=b2
- else:
- b=(b1+b2)/2
- alphas_i_old=svm.alphas[i]
- alphas_j_old=svm.alphas[j]
- b_old=svm.b
- svm.alphas[i]=alphas_i_new
- svm.alphas[j]=alphas_j_new
- svm.b=b
- print(svm.alphas)
- return svm
- #画图
- def showSVM(svm):
- #画出所有样本点
- for i in xrange(svm.m):
- if svm.Y[i] == -1:
- plt.plot(svm.X[i][0], svm.X[i][1], 'or' ,c='red')
- elif svm.Y[i] == 1:
- plt.plot(svm.X[i][0], svm.X[i][1], 'or' ,c='green')
- for i in xrange(svm.m):
- if svm.alphas[i] != 0:
- plt.plot(svm.X[i][0], svm.X[i][1], 'or' ,c='blue')
-
- w1,w2=0,0
- min_x,max_x=99999,-99999
- #计算w,和最小,最大的x1
- for i in xrange(svm.m):
- w1 += svm.alphas[i]*svm.Y[i]*svm.X[i][0]
- w2 += svm.alphas[i]*svm.Y[i]*svm.X[i][1]
- if min_x>svm.X[i][0]:
- min_x=svm.X[i][0]
- if max_x<svm.X[i][0]:
- max_x=svm.X[i][0]
- #计算x2
- y_min_x = float(-svm.b - w1 * min_x) / w2
- y_max_x = float(-svm.b - w1 * max_x) / w2
- #画出分界线
- plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
- plt.show()
-
- dataSet = []
- labels = []
- fileIn = open('E:\\testSet.txt')
- for line in fileIn.readlines():
- lineArr = line.strip().split(' ')
- dataSet.append([float(lineArr[0]), float(lineArr[1])])
- labels.append(float(lineArr[2]))
- svm=structure_svm(dataSet,labels)
- showSVM(svm)
复制代码
看提示应该是
operator.mul 函数不支持一个 list (map函数返回的list) 和一个 int (svm.kernelValue)作为实参进行计算吧
|
|