亚洲国产精品成人精品软件,伊人色综合久久天天伊,国产免费一区二区三区香蕉精

import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['font.sans-serif'] = 'SimHei' # 設置中文顯示
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_excel('數據.xlsx',index_col=0, parse_dates=['數據時間'])

2.2 在不同的數據集上進行EMD分解

2.2.1 數據集劃分

train_ratio = 0.7  # 訓練集所占比例

n = len(df)

train_size = int(n * train_ratio)



train_df = df.iloc[:train_size]

test_df = df.iloc[train_size:]



print("訓練集長度:", len(train_df))

print("測試集長度:", len(test_df))

在這里只把數據集劃分為訓練集、測試集來驗證EMD分解可能會在不同數據集下產生不同數量的分解數IMF，從而不能建立模型。

2.2.2 EMD在訓練集上分解

from PyEMD import EMD

# 執行EMD分解

emd = EMD()

IMFs = emd(np.array(train_df).reshape(-1))



# 可視化原始信號和IMFs

plt.figure(figsize=(20, 15))



plt.subplot(len(IMFs) + 1, 1, 1)

plt.plot(train_df.index, np.array(train_df).reshape(-1), 'r')

plt.title('原始信號')



for i, IMF in enumerate(IMFs):

    plt.subplot(len(IMFs) + 1, 1, i + 2)

    plt.plot(train_df.index, IMF)

    plt.title(f'IMF {i + 1}')



plt.tight_layout()

plt.show()

可以發現在訓練集上EMD分解存在8個IMF。

2.2.3 EMD在測試集上分解

emd = EMD()

IMFs = emd(np.array(test_df).reshape(-1))



plt.figure(figsize=(20, 15))



plt.subplot(len(IMFs) + 1, 1, 1)

plt.plot(test_df.index, np.array(test_df).reshape(-1), 'r')

plt.title('原始信號')



for i, IMF in enumerate(IMFs):

    plt.subplot(len(IMFs) + 1, 1, i + 2)

    plt.plot(test_df.index, IMF)

    plt.title(f'IMF {i + 1}')



plt.tight_layout()

plt.show()

可以發現在測試集上EMD分解只存在6個IMF，與在訓練集上的8個IMF數量不相等，從而導致模型不能正常構建于是只能考慮對整體數據進行EMD分解保證IMF數量一致，然后在進行數據集劃分，當然這種做法存在數據泄露問題。

2.3 整體數據進行EMD分解

from PyEMD import EMD



emd = EMD()

IMFs = emd(np.array(df).reshape(-1))



# 計算殘差

residue = np.array(df).reshape(-1) - np.sum(IMFs, axis=0)



plt.figure(figsize=(20, 15))



plt.subplot(len(IMFs) + 2, 1, 1)

plt.plot(df.index, np.array(df).reshape(-1), 'r')

plt.title('原始信號')



for i, IMF in enumerate(IMFs):

    plt.subplot(len(IMFs) + 2, 1, i + 2)

    plt.plot(df.index, IMF)

    plt.title(f'IMF {i + 1}')



plt.subplot(len(IMFs) + 2, 1, len(IMFs) + 2)

plt.plot(df.index, residue)

plt.title('殘差')



plt.tight_layout()

plt.show()

EMD分解信號時會產生殘差，這些殘差是原始信號與所有提取的固有模態函數（IMFs）之和的差異，在建模時，可以考慮將殘差加入模型中，以提高模型的性能，也可以不納入模型構建，根據實際情況選擇，這里我們只考慮IMF。

2.4 對各個IMF劃分數據集

# 保存模態分解結果

imf_df = pd.DataFrame(index=df.index)

for i, IMF in enumerate(IMFs):

    imf_df[f'IMF{i + 1}'] = IMF



# 定義劃分比例

train_ratio = 0.7

val_ratio = 0.1

test_ratio = 0.2



# 計算劃分的索引

train_split = int(train_ratio * len(imf_df))

val_split = int((train_ratio + val_ratio) * len(imf_df))



# 劃分數據集

train_set = imf_df.iloc[:train_split]

val_set = imf_df.iloc[train_split:val_split]

test_set = imf_df.iloc[val_split:]

將數據集按照70%的訓練集、10%的驗證集和20%的測試集進行劃分，訓練集用于訓練模型，驗證集用于調整模型超參數和監控模型性能，測試集用于評估模型的泛化性能。

2.5 數據集標準化

from sklearn.preprocessing import MinMaxScaler



def normalize_dataframe(train_set, val_set, test_set):

    scaler = MinMaxScaler()

    scaler.fit(train_set)  # 在訓練集上擬合歸一化模型



    train = pd.DataFrame(scaler.transform(train_set), columns=train_set.columns, index = train_set.index)

    val = pd.DataFrame(scaler.transform(val_set), columns=val_set.columns, index = val_set.index)

    test = pd.DataFrame(scaler.transform(test_set), columns=test_set.columns, index = test_set.index)

    return train, val, test, scaler



train, val, test, scaler = normalize_dataframe(train_set, val_set, test_set)

對數據進行標準化處理，以提高模型的訓練穩定性和收斂速度。

2.6 時間窗口劃分

def prepare_data(data, win_size, target_feature_idxs):

    num_features = data.shape[1]

    X = []  

    y = []  

    for i in range(len(data) - win_size):

        temp_x = data[i:i + win_size, :]  

        temp_y = [data[i + win_size, idx] for idx in target_feature_idxs]  

        X.append(temp_x)

        y.append(temp_y)

    X = np.asarray(X)

    y = np.asarray(y)



    return X, y



win_size = 12 # 時間窗口

target_feature_idxs = np.arange(9) # 指定待預測特征列索引

train_x, train_y = prepare_data(train.values, win_size, target_feature_idxs)

val_x, val_y = prepare_data(val.values, win_size, target_feature_idxs)

test_x, test_y = prepare_data(test.values, win_size, target_feature_idxs)

print("訓練集形狀:", train_x.shape, train_y.shape)

print("驗證集形狀:", val_x.shape, val_y.shape)

print("測試集形狀:", test_x.shape, test_y.shape)

這里不對時間窗口這個內容作過多描述。

2.7 模型構建

from tensorflow.keras.layers import LSTM, Dense

from tensorflow.keras.models import Model

from tensorflow.keras.layers import Input

from tensorflow.keras.layers import Conv1D, MaxPooling1D, Reshape, Flatten



# 輸入維度

input_shape = Input(shape=(train_x.shape[1], train_x.shape[2]))



# LSTM層

lstm_layer = LSTM(256, activation='relu')(input_shape)

# 卷積

shape = Reshape((256, 1))(lstm_layer)

conv = Conv1D(filters=64, kernel_size=7, activation='relu')(shape)

maxpool = MaxPooling1D(pool_size=2)(conv)

fal = Flatten()(maxpool)

# 全連接層

dense_1 = Dense(128, activation='relu')(fal)

dense_2 = Dense(64, activation='relu')(dense_1)



# 輸出層

output_1 = Dense(1, name='IMF1')(dense_2)

output_2 = Dense(1, name='IMF2')(dense_2)

output_3 = Dense(1, name='IMF3')(dense_2)

output_4 = Dense(1, name='IMF4')(dense_2)

output_5 = Dense(1, name='IMF5')(dense_2)

output_6 = Dense(1, name='IMF6')(dense_2)

output_7 = Dense(1, name='IMF7')(dense_2)

output_8 = Dense(1, name='IMF8')(dense_2)

output_9 = Dense(1, name='IMF9')(dense_2)



model = Model(inputs = input_shape, outputs=[output_1, output_2, output_3, output_4, output_5, output_6, 

                                             output_7, output_8, output_9])

model.compile(loss='mse', optimizer='adam')

# 模型擬合

history = model.fit(train_x, [train_y[:,i] for i in range(train_y.shape[1])], epochs=50, batch_size=32, 

                    validation_data=(val_x, [val_y[:,i] for i in range(val_y.shape[1])]))



plt.figure()

plt.plot(history.history['loss'], c='b', label='loss')

plt.plot(history.history['val_loss'], c='g', label='val_loss')

plt.legend()

plt.show()

model.summary()

該模型是一個結合了LSTM和卷積神經網絡的多輸入多輸出模型，用于處理時序數據，其中每個輸出對應于經驗模態分解（EMD）產生的不同的固有模態函數（IMF），當然也可以單獨對每一個IMF建立一個不同參數的預測模型，不一定要像這樣統一建模。

2.8 模型向后預測

def predict_next_11_days(model, input_data):

    input_sequence = input_data.copy()



    # 預測未來 11 天的數據

    future_predictions = []

    for _ in range(11):

        predictions = model.predict(np.expand_dims(input_sequence[-1], axis=0))

        next_data = np.append(input_sequence[-1, 1:], np.array(predictions).reshape(1,9), axis=0)

        input_sequence = np.append(input_sequence, [next_data], axis=0)

        future_predictions.append(predictions)

    future_predictions = np.array(future_predictions).reshape(11, 9)



    return future_predictions



future_predictions = predict_next_11_days(model, test_x[-1:])

future_predictions

2.9 反歸一化

min_values = train_set.min()

max_values = train_set.max()



# 測試集預測結果反歸一化

IMFs = []

for i in range(9):  

    IMF = y_pred[i] * (max_values[i] - min_values[i]) + min_values[i]

    IMFs.append(IMF)



# 向后預測反歸一化

IMFn = []

for i in range(9): 

    IMF = future_predictions[:,i] * (max_values[i] - min_values[i]) + min_values[i]

    IMFn.append(IMF)

2.10 模型預測可視化

plt.figure(figsize=(20, 30), dpi=300)



for i in range(9):  # 假設有12個IMF

    plt.subplot(3, 3, i+1)

    plt.plot(train_set[f'IMF{i+1}'], label='訓練集', color='blue', alpha=0.8)

    plt.plot(val_set[f'IMF{i+1}'], label='驗證集', color='gold', alpha=0.8)

    plt.plot(test_set[f'IMF{i+1}'], label='測試集', color='r', alpha=0.8)

    plt.plot(pd.date_range(start='2020-12-19', end='2021-08-31', freq='D'), IMFs[i].reshape(-1), label='測試集預測', color='navy', alpha=0.8)

    plt.plot(pd.date_range(start='2021-08-31', end='2021-09-10', freq='D'), IMFn[i].reshape(-1), label='向后預測10天', color='limegreen', alpha=0.8)

    plt.legend()

    plt.title(f'IMF{i+1}')

    plt.grid(True)



plt.tight_layout()

plt.show()

可以發現模型預測，在每一個IMF的表現是不一樣的，甚至可能差異比較大，這里我們就可以對這種預測結果不理想的IMF進行單獨建立模型，以提高其模型擬合度，模型擬合效果較好的還是考慮統一建模以減輕工作量。

2.11 各IMF預測結果相加得到最后的預測結果

sum_IMFs = np.sum(IMFs, axis=0)

sum_IMFn = np.sum(IMFn, axis=0)



plt.figure(figsize=(15, 5))

plt.plot(np.sum(train_set, axis=1), label='訓練集', color='blue', alpha=0.8)

plt.plot(np.sum(val_set, axis=1), label='驗證集', color='gold', alpha=0.8)

plt.plot(np.sum(test_set, axis=1), label='測試集', color='r', alpha=0.8)

plt.plot(pd.date_range(start='2020-12-19', end='2021-08-31', freq='D'), sum_IMFs, label='測試集預測', color='navy', alpha=0.8)

plt.plot(pd.date_range(start='2021-08-31', end='2021-09-10', freq='D'), sum_IMFn, label='向后預測10天', color='limegreen', alpha=0.8)

plt.legend()

plt.title('預測結果')

plt.grid(True)

plt.tight_layout()

plt.show()

mse = metrics.mean_squared_error(np.sum(test_set.iloc[12:], axis=1), np.array([i for arr in sum_IMFs for i in arr]))

rmse = np.sqrt(mse)

mae = metrics.mean_absolute_error(np.sum(test_set.iloc[12:], axis=1), np.array([i for arr in sum_IMFs for i in arr]))

r2 = r2_score(np.sum(test_set.iloc[12:], axis=1), np.array([i for arr in sum_IMFs for i in arr]))

print("均方誤差 (MSE):", mse)

print("均方根誤差 (RMSE):", rmse)

print("平均絕對誤差 (MAE):", mae)

print("擬合優度:", r2)