|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 糖逗 于 2020-11-14 13:01 编辑
参考书籍:《机器学习实战》
1.传统线性回归
- import numpy as np
- def loadDataSet(fileName):
- numFeat = len(open(fileName).readline().split('\t')) - 1
- dataMat = []
- labelMat = []
- fr = open(fileName)
- for line in fr.readlines():
- lineArr = []
- curLine = line.strip().split('\t')
- for i in range(numFeat):
- lineArr.append(float(curLine[i]))
- dataMat.append(lineArr)
- labelMat.append(float(curLine[-1]))
- return dataMat, labelMat
- def standRegres(xArr, yArr):
- xMat = np.mat(xArr)
- yMat = np.mat(yArr).T
- xTx = xMat.T * xMat
- if np.linalg.det(xTx) == 0:#矩阵不可逆,矩阵行列式为0
- print("This matrix is singular, cannot do inverse")
- return
- ws = np.linalg.inv(xTx)*(xMat.T * yMat)
- return ws
- if __name__ == "__main__":
- import matplotlib.pyplot as plt
- xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
- ws = standRegres(xArr, yArr)
- xMat = np.mat(xArr)
- yMat = np.mat(yArr)
- yHat = xMat * ws
- fig = plt.figure()
- ax = fig.add_subplot(111)
- #https://blog.csdn.net/lilong117194/article/details/78288795
- ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
-
- xCopy = xMat.copy()
- xCopy.sort(0)#升序
- yHat = xCopy * ws
- ax.plot(xCopy[:, 1], yHat)
- plt.show()
复制代码
2.局部加权线性回归
- import numpy as np
- def loadDataSet(fileName):
- numFeat = len(open(fileName).readline().split('\t')) - 1
- dataMat = []
- labelMat = []
- fr = open(fileName)
- for line in fr.readlines():
- lineArr = []
- curLine = line.strip().split('\t')
- for i in range(numFeat):
- lineArr.append(float(curLine[i]))
- dataMat.append(lineArr)
- labelMat.append(float(curLine[-1]))
- return dataMat, labelMat
- def lwlr(testPoint, xArr, yArr, k = 1.0):
- xMat = np.mat(xArr)
- yMat = np.mat(yArr).T
- m = np.shape(xMat)[0]
- weights = np.mat(np.eye((m)))
- for j in range(m):
- diffMat = testPoint - xMat[j,:]
- weights[j,j] = np.exp(diffMat * diffMat.T/(-2.0*k**2))#径向基核函数
- xTx = xMat.T * (weights * xMat)
- if np.linalg.det(xTx) == 0.0:
- print("This matrix is singular, cannot do inverse")
- return
- ws = np.linalg.inv(xTx) * (xMat.T * weights * yMat)
- return testPoint * ws
- def lwlrTest(testArr, xArr, yArr, k = 1.0):
- m = np.shape(testArr)[0]
- yHat = np.zeros(m)
- for i in range(m):
- yHat[i] = lwlr(testArr[i],xArr,yArr,k)
- return yHat
- if __name__ == "__main__":
- xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
- lwlr(xArr[0], xArr, yArr, 1)#单个点的估计
- yHat = lwlrTest(xArr, xArr, yArr, 0.003)
- xMat = np.mat(xArr)
- srtInd = xMat[:, 1].argsort(0)
- xSort = xMat[srtInd][:, 0, :]
- import matplotlib.pyplot as plt
- fig = plt.figure()
- ax = fig.add_subplot(111)
- ax.plot(xSort[:, 1], yHat[srtInd])
- ax.scatter(xMat[:, 1].flatten().A[0], np.mat(yArr).T.flatten().A[0],
- s = 2, c = 'red')
- plt.show()
复制代码
3.岭回归
- import numpy as np
- def loadDataSet(fileName):
- numFeat = len(open(fileName).readline().split('\t')) - 1
- dataMat = []
- labelMat = []
- fr = open(fileName)
- for line in fr.readlines():
- lineArr = []
- curLine = line.strip().split('\t')
- for i in range(numFeat):
- lineArr.append(float(curLine[i]))
- dataMat.append(lineArr)
- labelMat.append(float(curLine[-1]))
- return dataMat, labelMat
- def ridgeRegres(xMat, yMat, lam = 0.2):
- xTx = xMat.T * xMat
- denom = xTx + np.eye(np.shape(xMat)[1]) * lam
- if np.linalg.det(denom) == 0:
- print("This matrix is singular, cannot do inverse")
- return
- ws = np.linalg.inv(denom) * (xMat.T * yMat)
- return ws
- def ridgeTest(xArr, yArr):
- xMat = np.mat(xArr)
- yMat = np.mat(yArr).T
- yMean = np.mean(yMat,0)
- yMat = yMat - yMean
- xMeans = np.mean(xMat,0)
- xVar = np.var(xMat,0)
- xMat = (xMat - xMeans)/xVar
- numTestPts = 30#30个不同的lambda
- wMat = np.zeros((numTestPts, np.shape(xMat)[1]))
- for i in range(numTestPts):
- ws = ridgeRegres(xMat, yMat, np.exp(i-10))
- wMat[i, :] = ws.T
- return wMat
- if __name__ == "__main__":
- abX, abY = loadDataSet(r'C:\...\abalone.txt')
- ridgeWeights = ridgeTest(abX, abY)
- import matplotlib.pyplot as plt
- fig = plt.figure()
- ax = fig.add_subplot(111)
- ax.plot(ridgeWeights)#一共有八条线,代表不同lambda下8个特征的取值情况
- plt.show()
复制代码
4.前向逐步回归
- import numpy as np
- def loadDataSet(fileName):
- numFeat = len(open(fileName).readline().split('\t')) - 1
- dataMat = []
- labelMat = []
- fr = open(fileName)
- for line in fr.readlines():
- lineArr = []
- curLine = line.strip().split('\t')
- for i in range(numFeat):
- lineArr.append(float(curLine[i]))
- dataMat.append(lineArr)
- labelMat.append(float(curLine[-1]))
- return dataMat, labelMat
- def regularize(xMat):
- inMat = xMat.copy()
- inMeans = np.mean(inMat,0)
- inVar = np.var(inMat,0)
- inMat = (inMat - inMeans)/inVar
- return inMat
- def rssError(yArr, yHatArr):
- return ((yArr-yHatArr)**2).sum()
- def stageWise(xArr, yArr, eps = 0.01, numIt = 100):
- xMat = np.mat(xArr)
- yMat = np.mat(yArr).T
- yMean = np.mean(yMat, 0)
- yMat = yMat - yMean
- xMat = regularize(xMat)
- m, n = np.shape(xMat)
- ws = np.zeros((n, 1))
- returnMat = np.zeros((numIt, n))
- wsMax = ws.copy()
- for i in range(numIt):
- lowestError = np.inf;
- for j in range(n):
- for sign in [-1, 1]:#增大或减少特征值
- wsTest = ws.copy()
- wsTest[j] += eps * sign
- yTest = xMat * wsTest
- rssE = rssError(yMat.A,yTest.A)
- if rssE < lowestError:
- lowestError = rssE
- wsMax = wsTest
- ws = wsMax.copy()
- returnMat[i,:] = ws.T
- return returnMat
- if __name__ == "__main__":
- xArr, yArr = loadDataSet(r'C:\...\abalone.txt')
- res = stageWise(xArr, yArr, 0.01, 5000)
- import matplotlib.pyplot as plt
- fig = plt.figure()
- ax = fig.add_subplot(111)
- ax.plot(res)#一共有八条线,代表不同lambda下8个特征的取值情况
- plt.show()
复制代码 |
|