手撕经典算法 #5 机器学习篇

本文最后更新于：2025年5月4日下午

本文对机器学习中经典的算法进行了简单的实现和注释。包括：

回归算法（线性回归、逻辑回归、Softmax 回归）
反向传播算法
SGD 优化器

线性回归

import numpy as np

class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate      # 学习率
        self.n_iters = n_iters       # 迭代次数
        self.weights = None          # 模型权重
        self.bias = None             # 偏置项

    def fit(self, X, y):
        # 初始化参数
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)  # 权重初始化为0
        self.bias = 0                        # 偏置初始化为0

        # 梯度下降迭代
        for _ in range(self.n_iters):
            # 前向传播：计算预测值（直接线性组合）
            y_pred = np.dot(X, self.weights) + self.bias

            # 计算梯度（均方误差导数）
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))  # 权重梯度
            db = (1 / n_samples) * np.sum(y_pred - y)         # 偏置梯度

            # 参数更新
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        """ 预测输出 """
        return np.dot(X, self.weights) + self.bias

逻辑回归

import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.1, n_iters=1000):
        self.lr = learning_rate      # 学习率
        self.n_iters = n_iters       # 迭代次数
        self.weights = None          # 模型权重
        self.bias = None             # 偏置项

    def _sigmoid(self, z):
        """ Sigmoid激活函数 """
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        # 初始化参数
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)  # 权重初始化为0
        self.bias = 0                        # 偏置初始化为0

        # 梯度下降迭代
        for _ in range(self.n_iters):
            # 前向传播：计算预测值
            linear = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear)

            # 手动计算梯度，注意这里跟线性回归完全相同！
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))  # 权重梯度
            db = (1 / n_samples) * np.sum(y_pred - y)         # 偏置梯度

            # 更新参数
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict_prob(self, X):
        """ 预测概率 """
        linear = np.dot(X, self.weights) + self.bias
        return self._sigmoid(linear)

    def predict(self, X, threshold=0.5):
        """ 分类预测（阈值默认0.5） """
        return (self.predict_prob(X) >= threshold).astype(int)

Softmax 回归

class SoftmaxRegression:
    def __init__(self, n_classes, learning_rate=0.01, n_iters=1000):
        self.n_classes = n_classes    # 类别数
        self.lr = learning_rate       # 学习率
        self.n_iters = n_iters        # 迭代次数
        self.W = None                 # 权重矩阵 (k x n)
        self.b = None                 # 偏置向量 (k x 1)

    def _softmax(self, Z):
        exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
        return exp_Z / exp_Z.sum(axis=1, keepdims=True)

    def _one_hot(self, y):
        # 将标签 y 转换为独热编码
        m = y.shape[0]
        y_one_hot = np.zeros((m, self.n_classes))
        y_one_hot[np.arange(m), y] = 1
        return y_one_hot

    def fit(self, X, y):
        m, n = X.shape
        y_one_hot = self._one_hot(y)  # 标签转为独热编码 (m x k)

        # 初始化参数
        self.W = np.zeros((self.n_classes, n))  # (k x n)
        self.b = np.zeros(self.n_classes)       # (k, )

        # 梯度下降迭代
        for _ in range(self.n_iters):
            # 前向传播：计算概率
            Z = np.dot(X, self.W.T) + self.b    # (m x k)
            P = self._softmax(Z)                # (m x k)

            # 计算梯度
            dZ = P - y_one_hot                  # (m x k)
            dW = (1/m) * np.dot(dZ.T, X)        # (k x n)
            db = (1/m) * np.sum(dZ, axis=0)     # (k, )

            # 更新参数
            self.W -= self.lr * dW
            self.b -= self.lr * db

    def predict(self, X):
        Z = np.dot(X, self.W.T) + self.b
        P = self._softmax(Z)
        return np.argmax(P, axis=1)  # 返回概率最大的类别