|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 糖逗 于 2020-11-14 13:01 编辑
参考书籍:《机器学习实战》
1.传统线性回归import numpy as np
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat
def standRegres(xArr, yArr):
xMat = np.mat(xArr)
yMat = np.mat(yArr).T
xTx = xMat.T * xMat
if np.linalg.det(xTx) == 0:#矩阵不可逆,矩阵行列式为0
print("This matrix is singular, cannot do inverse")
return
ws = np.linalg.inv(xTx)*(xMat.T * yMat)
return ws
if __name__ == "__main__":
import matplotlib.pyplot as plt
xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
ws = standRegres(xArr, yArr)
xMat = np.mat(xArr)
yMat = np.mat(yArr)
yHat = xMat * ws
fig = plt.figure()
ax = fig.add_subplot(111)
#https://blog.csdn.net/lilong117194/article/details/78288795
ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
xCopy = xMat.copy()
xCopy.sort(0)#升序
yHat = xCopy * ws
ax.plot(xCopy[:, 1], yHat)
plt.show()
2.局部加权线性回归import numpy as np
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat
def lwlr(testPoint, xArr, yArr, k = 1.0):
xMat = np.mat(xArr)
yMat = np.mat(yArr).T
m = np.shape(xMat)[0]
weights = np.mat(np.eye((m)))
for j in range(m):
diffMat = testPoint - xMat[j,:]
weights[j,j] = np.exp(diffMat * diffMat.T/(-2.0*k**2))#径向基核函数
xTx = xMat.T * (weights * xMat)
if np.linalg.det(xTx) == 0.0:
print("This matrix is singular, cannot do inverse")
return
ws = np.linalg.inv(xTx) * (xMat.T * weights * yMat)
return testPoint * ws
def lwlrTest(testArr, xArr, yArr, k = 1.0):
m = np.shape(testArr)[0]
yHat = np.zeros(m)
for i in range(m):
yHat[i] = lwlr(testArr[i],xArr,yArr,k)
return yHat
if __name__ == "__main__":
xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
lwlr(xArr[0], xArr, yArr, 1)#单个点的估计
yHat = lwlrTest(xArr, xArr, yArr, 0.003)
xMat = np.mat(xArr)
srtInd = xMat[:, 1].argsort(0)
xSort = xMat[srtInd][:, 0, :]
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(xSort[:, 1], yHat[srtInd])
ax.scatter(xMat[:, 1].flatten().A[0], np.mat(yArr).T.flatten().A[0],
s = 2, c = 'red')
plt.show()
3.岭回归import numpy as np
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat
def ridgeRegres(xMat, yMat, lam = 0.2):
xTx = xMat.T * xMat
denom = xTx + np.eye(np.shape(xMat)[1]) * lam
if np.linalg.det(denom) == 0:
print("This matrix is singular, cannot do inverse")
return
ws = np.linalg.inv(denom) * (xMat.T * yMat)
return ws
def ridgeTest(xArr, yArr):
xMat = np.mat(xArr)
yMat = np.mat(yArr).T
yMean = np.mean(yMat,0)
yMat = yMat - yMean
xMeans = np.mean(xMat,0)
xVar = np.var(xMat,0)
xMat = (xMat - xMeans)/xVar
numTestPts = 30#30个不同的lambda
wMat = np.zeros((numTestPts, np.shape(xMat)[1]))
for i in range(numTestPts):
ws = ridgeRegres(xMat, yMat, np.exp(i-10))
wMat[i, :] = ws.T
return wMat
if __name__ == "__main__":
abX, abY = loadDataSet(r'C:\...\abalone.txt')
ridgeWeights = ridgeTest(abX, abY)
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(ridgeWeights)#一共有八条线,代表不同lambda下8个特征的取值情况
plt.show()
4.前向逐步回归import numpy as np
def loadDataSet(fileName):
numFeat = len(open(fileName).readline().split('\t')) - 1
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat):
lineArr.append(float(curLine[i]))
dataMat.append(lineArr)
labelMat.append(float(curLine[-1]))
return dataMat, labelMat
def regularize(xMat):
inMat = xMat.copy()
inMeans = np.mean(inMat,0)
inVar = np.var(inMat,0)
inMat = (inMat - inMeans)/inVar
return inMat
def rssError(yArr, yHatArr):
return ((yArr-yHatArr)**2).sum()
def stageWise(xArr, yArr, eps = 0.01, numIt = 100):
xMat = np.mat(xArr)
yMat = np.mat(yArr).T
yMean = np.mean(yMat, 0)
yMat = yMat - yMean
xMat = regularize(xMat)
m, n = np.shape(xMat)
ws = np.zeros((n, 1))
returnMat = np.zeros((numIt, n))
wsMax = ws.copy()
for i in range(numIt):
lowestError = np.inf;
for j in range(n):
for sign in [-1, 1]:#增大或减少特征值
wsTest = ws.copy()
wsTest[j] += eps * sign
yTest = xMat * wsTest
rssE = rssError(yMat.A,yTest.A)
if rssE < lowestError:
lowestError = rssE
wsMax = wsTest
ws = wsMax.copy()
returnMat[i,:] = ws.T
return returnMat
if __name__ == "__main__":
xArr, yArr = loadDataSet(r'C:\...\abalone.txt')
res = stageWise(xArr, yArr, 0.01, 5000)
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(res)#一共有八条线,代表不同lambda下8个特征的取值情况
plt.show()
|
|