国产**a大片毛片,欧美高清不卡视频,亚洲精品欧美精品中文字幕

from sklearn.preprocessing import LabelEncoder # 創建一個LabelEncoder對象 label_encoder = LabelEncoder() # 對Type列進行編碼 df['Type_encoded'] = label_encoder.fit_transform(df['Type']) # 查看編碼后的結果 encoded_types = df[['Type', 'Type_encoded']].drop_duplicates().reset_index(drop=True) print(encoded_types)

使用 LabelEncoder 將數據框 df 中的 Type 列的類別標簽轉換為數值編碼，并輸出每個類別標簽及其對應的編碼結果。

from sklearn.svm import SVC from sklearn.model_selection import train_test_split from hyperopt import fmin, tpe, hp, rand from sklearn.metrics import accuracy_score from sklearn import svm from sklearn import datasets # 分割數據集 X = df.drop(['Type', 'Type_encoded'], axis = 1) y = df['Type_encoded'] Xtrain,Xtest,Ytrain,Ytest = train_test_split(X, y, test_size = 0.3, stratify=df['Type_encoded']) #分離訓練集和測試集

超參數搜索

# 定義超參數空間 parameter_space_svc = { 'C': hp.loguniform('C', np.log(100), np.log(1000)), # 懲罰項 'kernel': hp.choice('kernel', ['rbf', 'poly']), # 核函數類型（選擇rbf或poly） 'gamma': hp.loguniform('gamma', np.log(100), np.log(1000)), # 核函數的系數 } # 初始化計數器 count = 0 # 定義優化目標函數 def func(args): global count count += 1 print(f"\nIteration {count}: Hyperparameters - {args}") # 創建SVM分類器，傳遞超參數 clf = svm.SVC(**args) # 訓練模型 clf.fit(Xtrain, Ytrain) # 預測測試集 prediction = clf.predict(Xtest) # 計算準確率 score = accuracy_score(Ytest, prediction) print(f'Test accuracy: {score}') # 由于fmin函數默認是最小化目標函數，所以返回負準確率作為目標 return -score # 使用TPE算法進行超參數優化，最大評估次數為100 best = fmin(func, parameter_space_svc, algo=tpe.suggest, max_evals=100) # 將最佳的核函數類型從索引值轉換為相應的字符串 kernel_list = ['rbf', 'poly'] best['kernel'] = kernel_list[best['kernel']] # 將最佳超參數保存到 best_params_ 中 best_params_ = { 'C': best['C'], 'kernel': best['kernel'], 'gamma': best['gamma'] } # 輸出最佳超參數 print('\nBest hyperparameters:', best_params_)

使用貝葉斯優化方法通過定義懲罰項、核函數類型和核函數系數的超參數空間，并利用 hyperopt 庫中的 TPE 算法在 100 次迭代內尋找支持向量機 (SVM) 的最佳超參數組合，以最大化模型在測試集上的預測準確率，最終輸出優化后的超參數配置。

模型訓練

# 創建SVM分類器，并使用最佳超參數進行配置 clf = SVC( C=best_params_['C'], # 懲罰項參數 kernel=best_params_['kernel'], # 核函數類型 gamma=best_params_['gamma'], # 核函數系數 decision_function_shape='ovr', # 多分類問題時使用"ovr"（一對多）策略 cache_size=5000, # 緩存大小，單位為MB probability=True ) # 使用訓練數據進行模型訓練 clf.fit(Xtrain, Ytrain)

模型評價指標輸出

from sklearn.metrics import confusion_matrix import seaborn as sns import matplotlib.pyplot as plt # 輸出混淆矩陣 conf_matrix = confusion_matrix(Ytest, pred) # 繪制熱力圖 plt.figure(figsize=(10, 7), dpi = 1200) sns.heatmap(conf_matrix, annot=True, annot_kws={'size':15}, fmt='d', cmap='YlGnBu') plt.xlabel('Predicted Label', fontsize=12) plt.ylabel('True Label', fontsize=12) plt.title('Confusion matrix heat map', fontsize=15) plt.show()

SHAP 分析

import shap # 使用一個小的子集作為背景數據（可以是Xtest的一個子集） background = shap.sample(Xtest, 100) # 使用KernelExplainer explainer = shap.KernelExplainer(clf.predict_proba, background) # 計算測試集的shap值 shap_values = explainer.shap_values(Xtest.iloc[0:20,:]) # 這里自己定義用多少個樣本或者用全部運行速度相關我使用了20個樣本

# 計算每個類別的特征貢獻度 importance_class_0 = np.abs(shap_values_class_0).mean(axis=0) importance_class_1 = np.abs(shap_values_class_1).mean(axis=0) importance_class_2 = np.abs(shap_values_class_2).mean(axis=0) importance_class_3 = np.abs(shap_values_class_3).mean(axis=0) importance_class_4 = np.abs(shap_values_class_4).mean(axis=0)

importance_df = pd.DataFrame({ '類別0': importance_class_0, '類別1': importance_class_1, '類別2': importance_class_2, '類別3': importance_class_3, '類別4': importance_class_4 }, index=Xtrain.columns) # 根據Type和Type_encoded對照表修改列名 type_mapping = { 0: '類型A', 1: '類型B', 2: '類型C', 3: '類型D', 4: '類型E' } importance_df.columns = [type_mapping[int(col.split('類別')[1])] for col in importance_df.columns] importance_df

創建一個數據框 importance_df，將每個類別的特征重要性值進行匯總，并根據 Type 和 Type_encoded 對照表將列名從類別編號轉換為實際的類別名稱。

# 添加一列用于存儲行的和 importance_df['row_sum'] = importance_df.sum(axis=1) # 按照行和對DataFrame進行排序 sorted_importance_df = importance_df.sort_values(by='row_sum', ascending=True) # 刪除用于排序的行和列 sorted_importance_df = sorted_importance_df.drop(columns=['row_sum']) elements = sorted_importance_df.index # 使用 Seaborn 的顏色調色板，設置為 Set2，以獲得對比度更高的顏色 colors = sns.color_palette("Set2", n_colors=len(sorted_importance_df.columns)) # 創建圖形和坐標軸對象，設置圖形大小為12x6英寸，分辨率為1200 DPI fig, ax = plt.subplots(figsize=(12, 6), dpi=1200) # 初始化一個數組，用于記錄每個條形圖的底部位置，初始為0 bottom = np.zeros(len(elements)) # 遍歷每個類別并繪制水平條形圖 for i, column in enumerate(sorted_importance_df.columns): ax.barh( sorted_importance_df.index, # y軸的特征名稱 sorted_importance_df[column], # 當前類別的SHAP值 left=bottom, # 設置條形圖的起始位置 color=colors[i], # 使用調色板中的顏色 label=column # 為圖例添加類別名稱 ) # 更新底部位置，以便下一個條形圖能夠正確堆疊 bottom += sorted_importance_df[column] # 設置x軸標簽和標題 ax.set_xlabel('mean(SHAP value|)(average impact on model output magnitude)', fontsize=12) ax.set_ylabel('Features (特征)', fontsize=12) ax.set_title('Feature Importance by Class (各類別下的特征重要性)', fontsize=15) # 設置y軸刻度和標簽 ax.set_yticks(np.arange(len(elements))) ax.set_yticklabels(elements, fontsize=10) # 在條形圖的末尾添加文本標簽 for i, el in enumerate(elements): ax.text(bottom[i], i, ' ' + str(el), va='center', fontsize=9) # 添加圖例，并設置圖例的字體大小和標題 ax.legend(title='Class (類別)', fontsize=10, title_fontsize=12) # 禁用y軸的刻度和標簽 ax.set_yticks([]) # 移除y軸刻度 ax.set_yticklabels([]) # 移除y軸刻度標簽 ax.set_ylabel('') # 移除y軸標簽 # 移除頂部和右側的邊框，以獲得更清晰的圖形 ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.show()

對特征的 SHAP 重要性值進行排序，以便在水平堆疊條形圖中按從最長到最短的順序展示每個特征在各類別下的貢獻度，同時使用 Seaborn 的調色板和 matplotlib 繪制了圖形，以清晰地展示每個類別的特征重要性。

我們有何不同？

API服務商零注冊

多API并行試用

數據驅動選型，提升決策效率

查看全部API→

#AI文本生成大模型API

對比大模型API的內容創意新穎性、情感共鳴力、商業轉化潛力

25個渠道

一鍵對比試用API 限時免費

#AI深度推理大模型API

對比大模型API的邏輯推理準確性、分析深度、可視化建議合理性

10個渠道

一鍵對比試用API 限時免費

數據預處理

超參數搜索

模型訓練

模型評價指標輸出

SHAP 分析

灰狼優化算法(GWO)：從理論到深度學習中的實踐應用

梯度提升集成：LightGBM與XGBoost組合預測

我們有何不同？

熱門場景實測，選對API

#AI文本生成大模型API

#AI深度推理大模型API