单层神经网络:结构与TensorFlow实现

1. 单层神经网络结构

基本结构组成:

输入层 → 输出层
    ↖ W, b ↗

组件说明:

输入层:接收原始数据,节点数 = 特征维度

输出层:产生预测结果,节点数 = 输出类别数

权重矩阵 W:连接输入和输出的参数

偏置向量 b:每个输出节点的偏置项

激活函数:引入非线性变换


2. 数学原理

前向传播公式:

z = W · x + b
a = σ(z)

其中:

x:输入向量 (n×1)

W:权重矩阵 (m×n)

b:偏置向量 (m×1)

σ:激活函数

a:输出向量 (m×1)

常用激活函数:

Sigmoid:σ(z) = 1/(1+e^(-z))

ReLU:σ(z) = max(0, z)

Softmax:用于多分类


3. TensorFlow 代码实现

基础版本实现:

import tensorflow as tf
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
 
class SingleLayerNN:
    def __init__(self, input_dim, output_dim, learning_rate=0.01):
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        # 初始化参数
        self.W = tf.Variable(
            tf.random.normal([input_dim, output_dim], stddev=0.1),
            name="weights"
        )
        self.b = tf.Variable(
            tf.zeros([output_dim]),
            name="bias"
        )
        
        self.optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
    
    def forward(self, x):
        """前向传播"""
        z = tf.matmul(x, self.W) + self.b
        return tf.nn.sigmoid(z)  # 使用sigmoid激活函数
    
    def compute_loss(self, y_true, y_pred):
        """计算二分类交叉熵损失"""
        return tf.reduce_mean(
            tf.keras.losses.binary_crossentropy(y_true, y_pred)
        )
    
    def train_step(self, x, y):
        """单次训练步骤"""
        with tf.GradientTape() as tape:
            predictions = self.forward(x)
            loss = self.compute_loss(y, predictions)
        
        # 计算梯度并更新参数
        gradients = tape.gradient(loss, [self.W, self.b])
        self.optimizer.apply_gradients(zip(gradients, [self.W, self.b]))
        
        return loss
    
    def predict(self, x):
        """预测"""
        return self.forward(x)
    
    def accuracy(self, y_true, y_pred):
        """计算准确率"""
        predictions = tf.cast(y_pred > 0.5, tf.float32)
        return tf.reduce_mean(
            tf.cast(tf.equal(y_true, predictions), tf.float32)
        )
 
# 生成示例数据
def generate_data():
    X, y = make_classification(
        n_samples=1000, 
        n_features=2, 
        n_redundant=0, 
        n_informative=2,
        n_clusters_per_class=1,
        random_state=42
    )
    return X.astype(np.float32), y.reshape(-1, 1).astype(np.float32)
 
# 训练模型
def train_model():
    # 准备数据
    X, y = generate_data()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # 创建模型
    model = SingleLayerNN(input_dim=2, output_dim=1, learning_rate=0.01)
    
    # 训练循环
    epochs = 1000
 
    train_losses = [ ]
 
 
    test_accuracies = [ ]
 
    
    for epoch in range(epochs):
        # 训练
        train_loss = model.train_step(X_train, y_train)
        
        # 每100轮评估一次
        if epoch % 100 == 0:
            train_pred = model.predict(X_train)
            test_pred = model.predict(X_test)
            
            train_acc = model.accuracy(y_train, train_pred)
            test_acc = model.accuracy(y_test, test_pred)
            
            train_losses.append(train_loss.numpy())
            test_accuracies.append(test_acc.numpy())
            
            print(f"Epoch {epoch}: Loss = {train_loss:.4f}, "
                  f"Train Acc = {train_acc:.4f}, Test Acc = {test_acc:.4f}")
    
    return model, train_losses, test_accuracies
 
# 运行训练
model, losses, accuracies = train_model()

使用 Keras 的高级实现:

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
 
# 生成模拟数据
def generate_data(n_samples=1000):
    """生成二分类数据集"""
    np.random.seed(42)
    
    # 生成两个类别的数据
    n_samples_per_class = n_samples // 2
    
    # 类别0:中心在(-2, -2)
    class0_x = np.random.randn(n_samples_per_class, 2) + np.array([-2, -2])
    class0_y = np.zeros((n_samples_per_class, 1))
    
    # 类别1:中心在(2, 2)
    class1_x = np.random.randn(n_samples_per_class, 2) + np.array([2, 2])
    class1_y = np.ones((n_samples_per_class, 1))
    
    # 合并数据
    X = np.vstack([class0_x, class1_x])
    y = np.vstack([class0_y, class1_y])
    
    # 打乱数据
    indices = np.random.permutation(n_samples)
    X = X[indices]
    y = y[indices]
    
    return X, y
 
# 自定义单层神经网络实现
class SimpleNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # 初始化权重和偏置
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -250, 250)))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def backward(self, X, y, output):
        m = X.shape[0]
        dz2 = output - y
        dW2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m
        
        da1 = np.dot(dz2, self.W2.T)
        dz1 = da1 * self.sigmoid_derivative(self.a1)
        dW1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m
        
        return dW1, db1, dW2, db2
    
    def update_weights(self, dW1, db1, dW2, db2, learning_rate):
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
    
    def train(self, X, y, epochs=1000, learning_rate=0.01):
        losses = []
        accuracies = []
        
        for i in range(epochs):
            # 前向传播
            output = self.forward(X)
            
            # 计算损失
            loss = -np.mean(y * np.log(output) + (1 - y) * np.log(1 - output))
            
            # 计算准确率
            predictions = (output > 0.5).astype(int)
            accuracy = np.mean(predictions == y)
            
            # 反向传播
            dW1, db1, dW2, db2 = self.backward(X, y, output)
            self.update_weights(dW1, db1, dW2, db2, learning_rate)
            
            if i % 100 == 0:
                losses.append(loss)
                accuracies.append(accuracy)
                print(f'Epoch {i}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}')
        
        return losses, accuracies
 
def train_model():
    """训练自定义神经网络模型"""
    X, y = generate_data()
    
    # 初始化模型
    model = SimpleNeuralNetwork(input_size=2, hidden_size=4, output_size=1)
    
    # 训练模型
    losses, accuracies = model.train(X, y, epochs=1000, learning_rate=0.01)
    
    # 评估模型
    predictions = model.forward(X)
    final_accuracy = np.mean((predictions > 0.5) == y)
    print(f"Custom Model - Final Accuracy: {final_accuracy:.4f}")
    
    return model, losses, accuracies
 
def create_keras_model():
    """使用Keras创建单层神经网络"""
    model = Sequential([
        Dense(1, input_dim=2, activation='sigmoid', name='single_layer')
    ])
    
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    return model
 
# 使用Keras模型
def train_keras_model():
    X, y = generate_data()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    model = create_keras_model()
    
    # 训练模型
    history = model.fit(
        X_train, y_train,
        epochs=1000,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=0
    )
    
    # 评估模型
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Keras Model - Test Accuracy: {test_acc:.4f}")
    
    return model, history
 
# 可视化结果
def plot_results(model, X, y):
    """可视化决策边界"""
    plt.figure(figsize=(12, 4))
    
    # 绘制原始数据
    plt.subplot(1, 2, 1)
    plt.scatter(X[y[:,0]==0, 0], X[y[:,0]==0, 1], c='red', label='Class 0')
    plt.scatter(X[y[:,0]==1, 0], X[y[:,0]==1, 1], c='blue', label='Class 1')
    plt.title('Original Data')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    
    # 绘制决策边界
    plt.subplot(1, 2, 2)
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))
    
    # 预测整个网格
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X[y[:,0]==0, 0], X[y[:,0]==0, 1], c='red', label='Class 0')
    plt.scatter(X[y[:,0]==1, 0], X[y[:,0]==1, 1], c='blue', label='Class 1')
    plt.title('Decision Boundary')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
 
# 运行示例
if __name__ == "__main__":
    # 生成数据用于可视化
    X, y = generate_data()
    
    # 使用Keras实现
    print("=== Keras单层神经网络 ===")
    keras_model, history = train_keras_model()
    
    # 可视化
    plot_results(keras_model, X, y)

运行得到下图



4. 关键特点

优点:

  • 简单易懂:结构清晰,易于理解和实现
  • 训练快速:参数少,计算复杂度低
  • 可解释性强:权重直接对应特征重要性

局限性:

  • 只能解决线性可分问题
  • 表达能力有限
  • 无法学习复杂特征

5. 应用场景

1. 二分类问题(如垃圾邮件分类)

2. 线性回归(使用线性激活函数)

3. 简单模式识别

4. 教学和入门学习

这个单层神经网络是理解深度学习的基础,虽然简单但包含了神经网络的所有核心概念。


版权声明:本文为CSDN博主「你一身傲骨怎能输」的原创文章,
遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/limenga102/article/details/154789681

最新文章