import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = 'SimHei' # 設置中文顯示
plt.rcParams['axes.unicode_minus'] = False
df_train = pd.read_excel('訓練集.xlsx', index_col=0, parse_dates=['date'])
df_test = pd.read_csv('測試集.csv', index_col=0, parse_dates=['date'])
df_train
其中df_train和df_test分別為模型的訓練集和測試集,數據不存在缺失值、異常值。
2.2.1 訓練集時序圖
plt.figure(figsize=(15, 10))
plt.subplot(3, 1, 1)
plt.plot(df_train['meantemp'], color='y', alpha=0.3)
plt.title('meantemp時序圖')
plt.grid(True)
plt.subplot(3, 1, 2)
plt.plot(df_train['humidity'], color='y', alpha=0.3)
plt.title('humidity時序圖')
plt.grid(True)
plt.subplot(3, 1, 3)
plt.plot(df_train['meanpressure'], color='y', alpha=0.3)
plt.title('meanpressure時序圖')
plt.grid(True)
plt.show()
2.2.2?測試集時序圖
plt.figure(figsize=(15, 10))
plt.subplot(3, 1, 1)
plt.plot(df_test['meantemp'], color='g', alpha=0.3)
plt.title('meantemp時序圖')
plt.grid(True)
plt.subplot(3, 1, 2)
plt.plot(df_test['humidity'], color='g', alpha=0.3)
plt.title('humidity時序圖')
plt.grid(True)
plt.subplot(3, 1, 3)
plt.plot(df_test['meanpressure'], color='g', alpha=0.3)
plt.title('meanpressure時序圖')
plt.grid(True)
plt.show()
from sklearn.preprocessing import MinMaxScaler
def normalize_dataframe(train_df, test_df):
scaler = MinMaxScaler()
scaler.fit(train_df) # 在訓練集上擬合歸一化模型
train_data = pd.DataFrame(scaler.transform(train_df), columns=train_df.columns, index = df_train.index)
test_data = pd.DataFrame(scaler.transform(test_df), columns=test_df.columns, index = df_test.index)
return train_data, test_data
data_train, data_test = normalize_dataframe(df_train, df_test)
data_train
歸一化時只使用訓練集的統計量,并將歸一化后的轉換應用于訓練集和測試集,避免直接對所有數據集進行歸一化處理從而產生信息泄露。
def prepare_data(data, win_size, target_feature_idxs):
num_features = data.shape[1]
X = []
y = []
for i in range(len(data) - win_size):
temp_x = data[i:i + win_size, :]
temp_y = [data[i + win_size, idx] for idx in target_feature_idxs]
X.append(temp_x)
y.append(temp_y)
X = np.asarray(X)
y = np.asarray(y)
return X, y
win_size = 12 # 時間窗口
target_feature_idxs = [0, 1, 2] # 指定待預測特征列索引
train_x, train_y = prepare_data(data_train.values, win_size, target_feature_idxs)
test_x, test_y = prepare_data(data_test.values, win_size, target_feature_idxs)
print("訓練集形狀:", train_x.shape, train_y.shape)
print("測試集形狀:", test_x.shape, test_y.shape)
訓練集形狀 (1449, 12, 3) 表示:
1449:樣本數,即訓練集中有1449個樣本。
12:時間窗口大小,每個樣本有12個時間步長的數據,用于預測下一個時間步的數據。
3:特征數,每個時間步長有3個特征(meantemp、humidity 和 meanpressure)。
測試集形狀 (102, 12, 3) 表示:
102:樣本數,即測試集中有102個樣本。
12:時間窗口大小,每個樣本有12個時間步長的數據,用于預測下一個時間步的數據。
3:特征數,每個時間步長有3個特征(meantemp、humidity 和 meanpressure)。
from keras.layers import LSTM, Dense
from keras.models import Model
from keras.layers import Input
# 輸入維度
input_shape = Input(shape=(train_x.shape[1], train_x.shape[2]))
# LSTM層
lstm_layer = LSTM(128, activation='relu')(input_shape)
# 全連接層
dense_1 = Dense(64, activation='relu')(lstm_layer)
dense_2 = Dense(32, activation='relu')(dense_1)
# 輸出層
output_1 = Dense(1, name='meantemp')(dense_2)
output_2 = Dense(1, name='humidity')(dense_2)
output_3 = Dense(1, name='meanpressure')(dense_2)
model = Model(inputs = input_shape, outputs = [output_1, output_2, output_3])
model.compile(loss='mse', optimizer='adam')
# 模型擬合
history = model.fit(train_x, [train_y[:,i] for i in range(train_y.shape[1])], epochs=100, batch_size=32, validation_data=(test_x, [test_y[:,i] for i in range(test_y.shape[1])]))
plt.figure()
plt.plot(history.history['loss'], c='b', label='loss')
plt.plot(history.history['val_loss'], c='g', label='val_loss')
plt.legend()
plt.show()
model.summary()
這是一個多輸入多輸出的 LSTM 模型,接受包含12個時間步長和3個特征的輸入序列,在經過一層128個神經元的 LSTM 層和兩個全連接層后,輸出三個單獨的預測結果,分別是 meantemp、humidity 和 meanpressure。
from sklearn import metrics
y_pred = model.predict(test_x)
# 計算均方誤差(MSE)
mse_meantemp = metrics.mean_squared_error(test_y[:,0], np.array([i for arr in y_pred[0] for i in arr]))
# 計算均方根誤差(RMSE)
rmse_meantemp = np.sqrt(mse_meantemp)
# 計算平均絕對誤差(MAE)
mae_meantemp = metrics.mean_absolute_error(test_y[:,0], np.array([i for arr in y_pred[0] for i in arr]))
from sklearn.metrics import r2_score # 擬合優度
r2_meantemp = r2_score(test_y[:,0], np.array([i for arr in y_pred[0] for i in arr]))
print("meantemp均方誤差 (MSE):", mse_meantemp)
print("meantemp均方根誤差 (RMSE):", rmse_meantemp)
print("meantemp平均絕對誤差 (MAE):", mae_meantemp)
print("meantemp擬合優度:", r2_meantemp)
mse_humidity = metrics.mean_squared_error(test_y[:,1], np.array([i for arr in y_pred[1] for i in arr]))
rmse_humidity = np.sqrt(mse_humidity)
mae_humidity = metrics.mean_absolute_error(test_y[:,1], np.array([i for arr in y_pred[1] for i in arr]))
r2_humidity = r2_score(test_y[:,1], np.array([i for arr in y_pred[1] for i in arr]))
print("humidity均方誤差 (MSE):", mse_humidity)
print("humidity均方根誤差 (RMSE):", rmse_humidity)
print("humidity平均絕對誤差 (MAE):", mae_humidity)
print("humidity擬合優度:", r2_humidity)
mse_meanpressure = metrics.mean_squared_error(test_y[:,2], np.array([i for arr in y_pred[2] for i in arr]))
rmse_meanpressure = np.sqrt(mse_meanpressure)
mae_meanpressure= metrics.mean_absolute_error(test_y[:,2], np.array([i for arr in y_pred[2] for i in arr]))
r2_meanpressure = r2_score(test_y[:,2], np.array([i for arr in y_pred[2] for i in arr]))
print("meanpressure均方誤差 (MSE):", mse_meanpressure)
print("meanpressure均方根誤差 (RMSE):", rmse_meanpressure)
print("meanpressure平均絕對誤差 (MAE):", mae_meanpressure)
print("meanpressure擬合優度:", r2_meanpressure)
def predict_next_11_days(model, input_data):
input_sequence = input_data.copy()
# 預測未來 11 天的數據
future_predictions = []
for _ in range(11):
predictions = model.predict(np.expand_dims(input_sequence[-1], axis=0))
next_data = np.append(input_sequence[-1, 1:], np.array(predictions).reshape(1,3), axis=0)
input_sequence = np.append(input_sequence, [next_data], axis=0)
future_predictions.append(predictions)
future_predictions = np.array(future_predictions).reshape(11, 3)
return future_predictions
future_predictions = predict_next_11_days(model, test_x[-1:])
future_predictions
# 反歸一化
train_max_meantemp = np.max(df_train['meantemp'])
train_min_meantemp = np.min(df_train['meantemp'])
train_max_humidity = np.max(df_train['humidity'])
train_min_humidity = np.min(df_train['humidity'])
train_max_meanpressure = np.max(df_train['meanpressure'])
train_min_meanpressure = np.min(df_train['meanpressure'])
series_meantemp = np.array(future_predictions[:, 0])*(train_max_meantemp - train_min_meantemp)+train_min_meantemp
series_humidity = np.array(future_predictions[:, 1])*(train_max_humidity - train_min_humidity)+train_min_humidity
series_meanpressure = np.array(future_predictions[:, 2])*(train_max_meanpressure - train_min_meanpressure)+train_min_meanpressure
plt.figure(figsize=(20, 15), dpi =300)
plt.subplot(3,2,1)
plt.plot(pd.date_range(start='2013-01-13', end='2016-12-31', freq='D'), df_train.iloc[12::]['meantemp'],
label='訓練集', color='blue', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['meantemp'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[0]*(train_max_meantemp-train_min_meantemp)+train_min_meantemp,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_meantemp, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('meantemp')
plt.grid(True)
plt.xlabel('time')
plt.ylabel('°C')
plt.subplot(3,2, 2)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['meantemp'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[0]*(train_max_meantemp-train_min_meantemp)+train_min_meantemp,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_meantemp, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('meantemp')
plt.grid(True)
plt.xlabel('time')
plt.ylabel('°C')
plt.subplot(3,2,3)
plt.plot(pd.date_range(start='2013-01-13', end='2016-12-31', freq='D'), df_train.iloc[12::]['humidity'],
label='訓練集', color='blue', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['humidity'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[1]*(train_max_humidity-train_min_humidity)+train_min_humidity,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_humidity, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('humidity')
plt.grid(True)
plt.xlabel('time')
plt.ylabel('°C')
plt.subplot(3,2,4)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['humidity'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[1]*(train_max_humidity-train_min_humidity)+train_min_humidity,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_humidity, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('humidity')
plt.grid(True)
plt.xlabel('time')
plt.ylabel('°C')
plt.subplot(3,2,5)
plt.plot(pd.date_range(start='2013-01-13', end='2016-12-31', freq='D'), df_train.iloc[12::]['meanpressure'],
label='訓練集', color='blue', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['meanpressure'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[2]*(train_max_meanpressure-train_min_meanpressure)+train_min_meanpressure,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_meanpressure, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('meanpressure')
plt.grid(True)
plt.xlabel('pa')
plt.ylabel('°C')
plt.subplot(3,2,6)
plt.plot(pd.date_range(start='2017-01-01', end='2017-04-24', freq='D'), df_test['meanpressure'],
label='測試集', color='gold', alpha=0.8)
plt.plot(pd.date_range(start='2017-01-13', end='2017-04-24', freq='D'),
y_pred[2]*(train_max_meanpressure-train_min_meanpressure)+train_min_meanpressure,
label='測試集預測', color='navy', alpha=0.8)
plt.plot(pd.date_range(start='2017-04-24', end='2017-05-04', freq='D'),
series_meanpressure, label='向后預測10天', color='limegreen', alpha=0.8)
plt.legend()
plt.title('meanpressure')
plt.grid(True)
plt.xlabel('time')
plt.ylabel('pa')
plt.show()