鱼C论坛

 找回密码
 立即注册
查看: 2006|回复: 3

[技术交流] python实现线性回归

[复制链接]
发表于 2020-11-14 10:23:01 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能^_^

您需要 登录 才可以下载或查看,没有账号?立即注册

x
本帖最后由 糖逗 于 2020-11-14 13:01 编辑

参考书籍:《机器学习实战》

1.传统线性回归
import numpy as np

def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t')) - 1
    dataMat = []
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat


def standRegres(xArr, yArr):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    xTx = xMat.T * xMat
    if np.linalg.det(xTx) == 0:#矩阵不可逆,矩阵行列式为0
        print("This matrix is singular, cannot do inverse")
        return
    ws = np.linalg.inv(xTx)*(xMat.T * yMat)
    return ws




if __name__ == "__main__":
    import matplotlib.pyplot as plt

    xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
    ws = standRegres(xArr, yArr)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr)
    yHat = xMat * ws

    fig = plt.figure()
    ax = fig.add_subplot(111)
    #https://blog.csdn.net/lilong117194/article/details/78288795
    ax.scatter(xMat[:, 1].flatten().A[0], yMat.T[:, 0].flatten().A[0])
    
    xCopy = xMat.copy()
    xCopy.sort(0)#升序
    yHat = xCopy * ws
    ax.plot(xCopy[:, 1], yHat)
    plt.show()


2.局部加权线性回归
import numpy as np

def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t')) - 1
    dataMat = []
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat

def lwlr(testPoint, xArr, yArr, k = 1.0):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    m = np.shape(xMat)[0]
    weights = np.mat(np.eye((m)))
    for j in range(m):                      
        diffMat = testPoint - xMat[j,:]     
        weights[j,j] = np.exp(diffMat * diffMat.T/(-2.0*k**2))#径向基核函数
    xTx = xMat.T * (weights * xMat)
    if np.linalg.det(xTx) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return
    ws = np.linalg.inv(xTx) * (xMat.T * weights * yMat)
    return testPoint * ws

def lwlrTest(testArr, xArr, yArr, k = 1.0):  
    m = np.shape(testArr)[0]
    yHat = np.zeros(m)
    for i in range(m):
        yHat[i] = lwlr(testArr[i],xArr,yArr,k)
    return yHat

if __name__ == "__main__":
    xArr, yArr = loadDataSet(r'C:\...\ex0.txt')
    lwlr(xArr[0], xArr, yArr, 1)#单个点的估计
    yHat = lwlrTest(xArr, xArr, yArr, 0.003)
    xMat = np.mat(xArr)
    srtInd = xMat[:, 1].argsort(0)
    xSort = xMat[srtInd][:, 0, :]
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(xSort[:, 1], yHat[srtInd])
    ax.scatter(xMat[:, 1].flatten().A[0], np.mat(yArr).T.flatten().A[0], 
               s = 2, c = 'red')
    plt.show()


3.岭回归
import numpy as np

def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t')) - 1
    dataMat = []
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat

def ridgeRegres(xMat, yMat, lam = 0.2):
    xTx = xMat.T * xMat
    denom = xTx + np.eye(np.shape(xMat)[1]) * lam
    if np.linalg.det(denom) == 0:
        print("This matrix is singular, cannot do inverse")
        return
    ws = np.linalg.inv(denom) * (xMat.T * yMat)
    return ws

def ridgeTest(xArr, yArr):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    yMean = np.mean(yMat,0)
    yMat = yMat - yMean    
    xMeans = np.mean(xMat,0) 
    xVar = np.var(xMat,0)      
    xMat = (xMat - xMeans)/xVar
    numTestPts = 30#30个不同的lambda
    wMat = np.zeros((numTestPts, np.shape(xMat)[1]))
    for i in range(numTestPts):
        ws = ridgeRegres(xMat, yMat, np.exp(i-10))
        wMat[i, :] = ws.T
    return wMat

if __name__ == "__main__":
    abX, abY = loadDataSet(r'C:\...\abalone.txt')
    ridgeWeights = ridgeTest(abX, abY)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(ridgeWeights)#一共有八条线,代表不同lambda下8个特征的取值情况
    plt.show()

4.前向逐步回归
import numpy as np

def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t')) - 1
    dataMat = []
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat

def regularize(xMat):
    inMat = xMat.copy()
    inMeans = np.mean(inMat,0)   
    inVar = np.var(inMat,0)     
    inMat = (inMat - inMeans)/inVar
    return inMat

def rssError(yArr, yHatArr): 
    return ((yArr-yHatArr)**2).sum()

def stageWise(xArr, yArr, eps = 0.01, numIt = 100):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    yMean = np.mean(yMat, 0)
    yMat = yMat - yMean     
    xMat = regularize(xMat)
    m, n = np.shape(xMat)
    ws = np.zeros((n, 1))
    returnMat = np.zeros((numIt, n))
    wsMax = ws.copy()
    for i in range(numIt):
        lowestError = np.inf; 
        for j in range(n):
            for sign in [-1, 1]:#增大或减少特征值
                wsTest = ws.copy()
                wsTest[j] += eps * sign
                yTest = xMat * wsTest
                rssE = rssError(yMat.A,yTest.A)
                if rssE < lowestError:
                    lowestError = rssE
                    wsMax = wsTest
        ws = wsMax.copy()
        returnMat[i,:] = ws.T
    return returnMat

if __name__ == "__main__":
    xArr, yArr = loadDataSet(r'C:\...\abalone.txt')
    res = stageWise(xArr, yArr, 0.01, 5000)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(res)#一共有八条线,代表不同lambda下8个特征的取值情况
    plt.show()

本帖被以下淘专辑推荐:

想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复

使用道具 举报

 楼主| 发表于 2020-11-14 12:32:37 | 显示全部楼层
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复

使用道具 举报

 楼主| 发表于 2020-11-14 13:08:09 | 显示全部楼层
补充一个知识点:python中x.I就是对矩阵x求逆矩阵
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复 支持 反对

使用道具 举报

 楼主| 发表于 2020-12-7 16:06:04 | 显示全部楼层
本帖最后由 糖逗 于 2020-12-7 16:20 编辑

基于批梯度参数更新的线性回归
import numpy as np
import matplotlib.pyplot as plt
#计算损失函数
def compute_error_for_line_given_points(b, w, points):
    totalError = 0
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (w * x + b)) ** 2
    return totalError / len(points)

#计算梯度(批梯度下降,计算一次梯度使用所有样本),更新参数
#一次参数更新计算
def step_gradient(b_current, w_current, points, learningRate):
    b_gradient = 0
    w_gradient = 0
    N = len(points)
    for i in range(N):
        x = points[i, 0]
        y = points[i, 1]
        #grad_b = 2(wx + b - y)
        b_gradient += (2 / N) * ((w_current * x + b_current) - y)
        #grad_w = 2(wx+b-y)*x
        w_gradient += (2/ N) * x * ((w_current * x + b_current) - y)
    new_b = b_current - (learningRate * b_gradient)
    new_w = w_current - (learningRate * w_gradient)
    return [new_b, new_w]

#多次梯度下降计算迭代
def gradient_descent_runner(points, starting_b, starting_w, learning_rate, num_iterations):
    b = starting_b
    w = starting_w
    for i in range(num_iterations):
        b, w = step_gradient(b, w, np.array(points), learning_rate)
    return [b, w]


if __name__ == "__main__":
    points = [[6, 24], [2, 80], [9, 2], [1, 100], [2, 98], [10, 12], [4, 77], [4.5, 60],[6.5, 15]]
    plt.scatter(x = np.array(points)[:,0] , y = np.array(points)[:,1])
    b, w = gradient_descent_runner(points, 100, -10, 0.02, 500)#初始化参数选取很重要,尤其是学习率
    x = np.random.rand(100) * 10
    y = np.array([])
    for i in range(len(x)):
        y = np.append(y, w * x[i] + b)
    plt.plot(x, y)

代码源自:《Tensorflow深度学习》(龙良曲)
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2025-1-17 06:10

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表