1、摘要
本文主要讲解:python_经验模态分解EMD_粒子群算法PSO_LSTM_公交短时客流预测
主要思路:
- 整理特征:天气、风力、时间、工作日和非工作日、节假日和非节假日、温度等
- 对客流量进行经验模态分解EMD(效果不是很好,请酌情选择)
- 构建LSTM网络,优化器选择Adam
- reshape训练集和测试集,适配LSTM网络的输入尺寸
- 设置 batch_size和epochs,开始训练
- 使用粒子群算法PSO遍历LSTM的网络结构(要花点时间),从而找到最优参数,要遍历的参数有:神经网络第一层神经元个数,dropout比率,batch_size
- 评估模型、保存模型、画出模型预测结果的图
2、数据介绍
3、完整代码
import csv
import math
import os
import random
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PyEMD import EMD
from keras.layers.core import Dense, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from other.bus_flow_pred.pso import PSO
os.chdir(r'E:\项目文件\基于改进的LSTM短时客流预测\数据')
src = 'E:\项目文件\基于改进的LSTM短时客流预测\emd_pso_lstm\\'
total_data = pd.read_excel(r'上车训练数据.xlsx', usecols=range(1, 9))
test_data = pd.read_excel(r'上车测试数据.xlsx', usecols=range(1, 9))
# 对上车人数标准化的标尺
min_max_scaler = preprocessing.MinMaxScaler()
# 对上车人数进行归一化
total_data['card_id'] = min_max_scaler.fit_transform(total_data['card_id'].values.reshape(-1, 1))
test_data['card_id'] = min_max_scaler.fit_transform(test_data['card_id'].values.reshape(-1, 1))
class DataLoader():
"""一个用于EMD-lstm模型数据加载和转换的类"""
def __init__(self, filename, cols, input_timesteps, seq_len):
"""
:param filename: the name of the file contains the data, type: .csv
:param cols: the features
:param input_timesteps: the length of looking back (1 month or 1 year), unit: hours
:param seq_len: the sum of input_timesteps and pre_len
"""
self.dataframe = pd.read_excel(filename)
self.test_data = pd.read_excel(r'上车测试数据.xlsx', usecols=range(1, 9))
self.test_data = pd.concat([self.test_data[:61], self.test_data], axis=0)
self.cols = cols
self.len_train_windows = None
self.input_timesteps = input_timesteps
self.seq_len = seq_len
print('the input cols are:', self.cols)
self.Normalization()
def scale_EMD(self):
train_pro = self.cols[1:]
self.IMFs = EMD().emd(self.dataframe['card_id'].values)
print('the signal is decomposed into ' + str(self.IMFs.shape[0]) + ' parts')
self.df_names_IMF = locals()
for ind, IMF in enumerate(self.IMFs):
IMF_name = 'IMF' + str(ind) + '_card_id'
data = {
IMF_name: self.IMFs[ind]}
IMF_i = pd.DataFrame(data=data).reset_index()
fe = self.dataframe[train_pro].reset_index()
self.df_names_IMF['IMF' + str(ind)] = pd.merge(IMF_i, fe, on='index')
self.test_IMFs = EMD().emd(self.test_data['card_id'].values)
self.test_names_IMF = locals()
for ind, IMF in enumerate(self.test_IMFs):
IMF_name = 'IMF' + str(ind) + '_card_id'
data = {
IMF_name: self.test_IMFs[ind]}
IMF_i = pd.DataFrame(data=data).reset_index()
fe = self.test_data[train_pro].reset_index()
self.test_names_IMF['IMF' + str(ind)] = pd.merge(IMF_i, fe, on='index')
def make_train_test_data(self):
ts = 61
train_x = self.data_train[:, 2:]
train_y = self.data_train[:, 1:2]
test_x = self.test_data[:, 2:]
test_y = self.test_data[:, 1:2]
# ############# 构建训练和预测集 ###################
ts_train_x = np.array([])
ts_train_y = np.array([])
ts_test_x = np.array([])
ts_test_y = np.array([])
# 构建训练数据集
print('训练数据的原始shape:', train_x.shape)
for i in range(train_x.shape[0]):
if i + ts == train_x.shape[0]:
break
ts_train_x = np.append(ts_train_x, train_x[i: i + ts, :])
ts_train_y = np.append(ts_train_y, train_y[i + ts])
# 构建预测数据集
print('预测数据的原始shape:', test_x.shape)
for i in range(test_x.shape[0]):
if i + ts == test_x.shape[0]:
break
ts_test_x = np.append(ts_test_x, test_x[i: i + ts, :])
ts_test_y = np.append(ts_test_y, test_y[i + ts])
return ts_train_x.reshape((train_x.shape[0] - ts, ts, train_x.shape[1])), ts_train_y, \
ts_test_x.reshape((test_x.shape[0] - ts, ts, test_x.shape[1])), ts_test_y
def Normalization(self):
'''
对训练数据进行规范化处理,并对验证和测试数据应用相同的量表
'''
self.scale_EMD()
IMF_number = 8
print('processing the data of IM' + str(IMF_number))
if IMF_number in range(self.IMFs.shape[0]):
self.data_train_original = self.df_names_IMF['IMF' + str(IMF_number)]
else:
print("Oops!IMF_number was no valid number. it must between 0 and " + str(self.IMFs.shape[0] - 1))
# 因为算法默认去除掉了第一块,所以默认再加一块
self.data_train_original = pd.concat([self.data_train_original[:61], self.data_train_original], axis=0)
self.min_max_scaler = preprocessing.MinMaxScaler()
self.data_train = self.min_max_scaler.fit_transform(self.data_train_original.values)
self.len_train = len(self.data_train_original)
IMF_test_number = 6
if IMF_test_number in range(self.test_IMFs.shape[0]):
self.test_original = self.test_names_IMF['IMF' + str(IMF_test_number)]
else:
print("Oops!IMF_number was no valid number. it must between 0 and " + str(self.test_IMFs.shape[0] - 1))
self.min_max_scaler = preprocessing.MinMaxScaler()
self.test_data = self.min_max_scaler.fit_transform(self.test_original.values)
self.len_test = len(self.test_original)
data = DataLoader(
filename=os.path.join(r'E:\项目文件\基于改进的LSTM短时客流预测\数据\\', r'上车训练数据.xlsx'),
cols=['card_id', 'feng_1.0', 'feng_2.0', 'feng_3.0', 'work_1', 'work_2', 'tianqi_average_scale',
'temperature_average_scale'],
input_timesteps=61,
seq_len=61
)
X_train, y_train, X_test, y_test = data.make_train_test_data()
def model_test_score(model, X_test, y_test):
y_hat = model.predict(X_test)
y_t = y_test.reshape(-1, 1)
temp = pd.DataFrame(y_hat)
temp['yhat'] = y_hat
temp['y'] = y_t
temp_rmse = np.sqrt(mean_squared_error(temp.y, temp.yhat))
temp_mse = mean_squared_error(temp.y, temp.yhat)
print('test RMSE: %.3f' % temp_rmse)
print('test MSE: %.3f' % temp_mse)
return temp_rmse, temp_mse
def build_model(layers, neurons, d):
model_lstm = Sequential()
# 对每天61条记录进行分块
model_lstm.add(LSTM(neurons[0], input_shape=(layers[1], layers[2]), return_sequences=False))
model_lstm.add(Dropout(d))
model_lstm.add(LSTM(32, input_shape=(61, 1), return_sequences=False))
model_lstm.add(Dropout(d))
model_lstm.add(Dense(8, kernel_initializer="uniform", activation='relu'))
model_lstm.add(Dense(1, kernel_initializer="uniform", activation='linear'))
# adam = keras.optimizers.Adam(decay=0.2)
model_lstm.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
model_lstm.summary()
return model_lstm
def model_score(model, X_train, y_train, X_test, y_test):
trainScore = model.evaluate(X_train, y_train, verbose=0)
print('Train Score: %.5f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))
testScore = model.evaluate(X_test, y_test, verbose=0)
print('Test Score: %.5f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))
return trainScore[0], testScore[0]
def writeOneCsv(relate_record, src):
with open(src, 'a', newline='\n') as csvFile:
writer = csv.writer(csvFile)
writer.writerow(relate_record)
def training_bus(X):
shape = [1, 61, 7]
neurons = [int(X[0]), 32, 1]
dropout = round(X[1], 4)
batch_size = int(X[2])
model = build_model(shape, neurons, dropout)
model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=12,
validation_data=(X_test, y_test),
verbose=1)
trainScore, testScore = model_score(model, X_train, y_train, X_test, y_test)
model_test_score(model, X_test, y_test)
model.save(
src + 'neurons' + str(int(X[0])) + '_dropout' + str(dropout) + '_batch_size' + str(batch_size) + '.h5')
finish = [int(X[0]), dropout, batch_size, round(trainScore, 4), round(testScore, 4)]
model_scores.append(finish)
writeOneCsv(finish, src + '模型参数效果比较.csv')
# 训练完成后可直接加载模型
# model_lstm = load_model('LSTM_bus_' + str(X[0]) + '_' + str(X[1]) + '_' + str(X[2]) + '_' + '.h5')
pred = model.predict(X_test)
le = len(pred)
y_t = y_test.reshape(-1, 1)
return pred, le, y_t
def function(ps, test, le):
ss = sum(((abs(test - ps)) / test) / le)
return ss
# (1) data size
INPUT_SIZE = 1
OUTPUT_SIZE = 1
# (2) PSO Parameters
MAX_EPISODES = 20
MAX_EP_STEPS = 20
c1 = 2
c2 = 2
w = 0.5
pN = 20 # 粒子数量
model_scores = [['neuron0', 'Dropout', 'batch_size', 'trainScore', 'testScore']]
# (3) LSTM Parameters
dim = 3 # 搜索维度
X = np.zeros((pN, dim)) # 所有粒子的位置和速度
V = np.zeros((pN, dim))
pbest = np.zeros((pN, dim)) # 个体经历的最佳位置和全局最佳位置
gbest = np.zeros(dim)
p_fit = np.zeros(pN) # 每个个体的历史最佳适应值
print(p_fit.shape)
print(p_fit.shape)
t1 = time.time()
# 程序
#程序
pso_example = PSO(popsize=pN, maxgen=20, dim=dim, popmin=-50, popmax=50, c1=c1, c2=c2, w1=w, vmax=3, vmin=-3)
pso_infromation = pso_example.iter_optimize()
pso_example.drawpicrure(pso_infromation)
'''
神经网络第一层神经元个数,dropout比率,batch_size '''
UP = [64, 0.14, 29]
DOWN = [32, 0.05, 16]
# (4) 开始搜索
for i_episode in range(MAX_EPISODES):
"""初始化s"""
random.seed(8)
fit = -1e5 # 全局最佳适应值
# 初始粒子适应度计算
print("计算初始全局最优")
for i in range(pN):
for j in range(dim):
V[i][j] = random.uniform(0, 1)
if j == 1:
X[i][j] = random.uniform(DOWN[j], UP[j])
else:
X[i][j] = round(random.randint(DOWN[j], UP[j]), 0)
pbest[i] = X[i]
le, pred, y_t = training_bus(X[i])
NN = 1
tmp = function(pred, y_t, le)
p_fit[i] = tmp
if tmp > fit:
fit = tmp
gbest = X[i]
print("初始全局最优参数:{:}".format(gbest))
fitness = [] # 适应度函数
for j in range(MAX_EP_STEPS):
fit2 = []
plt.title("第{}次迭代".format(i_episode))
for i in range(pN):
le, pred, y_t = training_bus(X[i])
temp = function(pred, y_t, le)
fit2.append(temp / 1000)
if temp > p_fit[i]: # 更新个体最优
p_fit[i] = temp
pbest[i] = X[i]
if p_fit[i] > fit: # 更新全局最优
gbest = X[i]
fit = p_fit[i]
print("搜索步数:{:}".format(j))
print("个体最优参数:{:}".format(pbest))
print("全局最优参数:{:}".format(gbest))
for i in range(pN):
V[i] = w * V[i] + c1 * random.uniform(0, 1) * (pbest[i] - X[i]) + c2 * random.uniform(0, 1) * (gbest - X[i])
ww = 1
for k in range(dim):
if DOWN[k] < X[i][k] + V[i][k] < UP[k]:
continue
else:
ww = 0
X[i] = X[i] + V[i] * ww
fitness.append(fit)
print('Running time: ', time.time() - t1)
def writeCsv(relate_record, src):
import csv
with open(src, 'w', newline='\n') as csvFile:
writer = csv.writer(csvFile)
for row in relate_record:
try:
writer.writerow(row)
except Exception as e:
print(e)
print(row)
# writeCsvUTF8(relate_record,bus)
writeCsv(model_scores, src + 'model_scores.csv')
转载:https://blog.csdn.net/qq_30803353/article/details/114791140
查看评论