一级欧美一级日韩毛片99,国产精品青青青高清在线,黄色网址在线免费观看

完整案例

我們使用Kaggle上的Iris數據集來分類不同類型的鳶尾花。我們將代碼從零開始實現Adaboost算法，包含數據預處理、模型訓練和圖形可視化。

在代碼中：

Adaboost算法的實現。

使用決策邊界、分類效果、樣本權重分布等可視化圖形來直觀展示訓練過程。

import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split # 讀取并處理數據集 data = pd.read_csv("Iris.csv") data = data[data["Species"] != "Iris-virginica"] # 選擇兩個分類，便于二分類 X = data[["SepalLengthCm", "SepalWidthCm"]].values y = np.where(data["Species"] == "Iris-setosa", -1, 1) # 將類別映射為-1和1 # 分割數據集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Adaboost的基礎實現 class WeakClassifier: def __init__(self): self.threshold = None self.feature_index = None self.polarity = 1 def train(self, X, y, weights): n_samples, n_features = X.shape min_error = float('inf') # 嘗試每個特征 for feature_i in range(n_features): feature_values = X[:, feature_i] possible_thresholds = np.unique(feature_values) for threshold in possible_thresholds: for polarity in [1, -1]: predictions = np.ones(y.shape) predictions[polarity * feature_values < polarity * threshold] = -1 error = np.sum(weights[y != predictions]) if error < min_error: self.polarity = polarity self.threshold = threshold self.feature_index = feature_i min_error = error def predict(self, X): feature_values = X[:, self.feature_index] predictions = np.ones(X.shape[0]) predictions[self.polarity * feature_values < self.polarity * self.threshold] = -1 return predictions class AdaBoost: def __init__(self, n_classifiers=10): self.n_classifiers = n_classifiers self.classifiers = [] self.alphas = [] self.errors = [] self.sample_weights_history = [] def train(self, X, y): n_samples, _ = X.shape weights = np.full(n_samples, 1 / n_samples) for _ in range(self.n_classifiers): classifier = WeakClassifier() classifier.train(X, y, weights) predictions = classifier.predict(X) error = np.dot(weights, predictions != y) alpha = 0.5 * np.log((1 - error) / (error + 1e-10)) weights *= np.exp(-alpha * y * predictions) weights /= np.sum(weights) self.classifiers.append(classifier) self.alphas.append(alpha) self.errors.append(error) self.sample_weights_history.append(weights.copy()) def predict(self, X): clf_preds = [alpha * clf.predict(X) for clf, alpha in zip(self.classifiers, self.alphas)] return np.sign(np.sum(clf_preds, axis=0)) # 訓練模型 adaboost = AdaBoost(n_classifiers=10) adaboost.train(X_train, y_train) # 可視化1：決策邊界 def plot_decision_boundary(X, y, model, ax): x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) Z = model.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, alpha=0.3, cmap='coolwarm') ax.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolor='k', s=20) fig, ax = plt.subplots(1, 1, figsize=(10, 6)) plot_decision_boundary(X_test, y_test, adaboost, ax) ax.set_title("Adaboost Decision Boundary") plt.show() # 可視化2：樣本權重變化圖 plt.figure(figsize=(10, 6)) for i, weights in enumerate(adaboost.sample_weights_history): plt.plot(range(1, len(weights) + 1), weights, label=f'Iteration {i + 1}') plt.xlabel('Sample Index') plt.ylabel('Sample Weight') plt.title('Sample Weight Distribution Over Iterations') plt.legend(loc='upper right') plt.show() # 可視化3：分類器權重變化圖 plt.figure(figsize=(10, 6)) plt.plot(range(1, len(adaboost.alphas) + 1), adaboost.alphas, marker='o', color='b') plt.xlabel('Iteration') plt.ylabel('Alpha (Classifier Weight)') plt.title('Weak Classifier Weights (Alpha) Over Iterations') plt.grid(True) plt.show() # 可視化4：模型誤差隨迭代次數變化圖 plt.figure(figsize=(10, 6)) plt.plot(range(1, len(adaboost.errors) + 1), adaboost.errors, marker='o', color='r') plt.xlabel('Iteration') plt.ylabel('Error Rate') plt.title('Model Error Rate Over Iterations') plt.grid(True) plt.show()