Pytorch LSTM预测趋势

数据集-data.csv

使用文章:https://www.jianshu.com/p/38df71cad1f6里的数据集(航班数变化趋势),这里直接拷贝在下面:

Jan-49,112
Feb-49,118
Mar-49,132
Apr-49,129
May-49,121
Jun-49,135
Jul-49,148
Aug-49,148
Sep-49,136
Oct-49,119
Nov-49,104
Dec-49,118
Jan-50,115
Feb-50,126
Mar-50,141
Apr-50,135
May-50,125
Jun-50,149
Jul-50,170
Aug-50,170
Sep-50,158
Oct-50,133
Nov-50,114
Dec-50,140
Jan-51,145
Feb-51,150
Mar-51,178
Apr-51,163
May-51,172
Jun-51,178
Jul-51,199
Aug-51,199
Sep-51,184
Oct-51,162
Nov-51,146
Dec-51,166
Jan-52,171
Feb-52,180
Mar-52,193
Apr-52,181
May-52,183
Jun-52,218
Jul-52,230
Aug-52,242
Sep-52,209
Oct-52,191
Nov-52,172
Dec-52,194
Jan-53,196
Feb-53,196
Mar-53,236
Apr-53,235
May-53,229
Jun-53,243
Jul-53,264
Aug-53,272
Sep-53,237
Oct-53,211
Nov-53,180
Dec-53,201
Jan-54,204
Feb-54,188
Mar-54,235
Apr-54,227
May-54,234
Jun-54,264
Jul-54,302
Aug-54,293
Sep-54,259
Oct-54,229
Nov-54,203
Dec-54,229
Jan-55,242
Feb-55,233
Mar-55,267
Apr-55,269
May-55,270
Jun-55,315
Jul-55,364
Aug-55,347
Sep-55,312
Oct-55,274
Nov-55,237
Dec-55,278
Jan-56,284
Feb-56,277
Mar-56,317
Apr-56,313
May-56,318
Jun-56,374
Jul-56,413
Aug-56,405
Sep-56,355
Oct-56,306
Nov-56,271
Dec-56,306
Jan-57,315
Feb-57,301
Mar-57,356
Apr-57,348
May-57,355
Jun-57,422
Jul-57,465
Aug-57,467
Sep-57,404
Oct-57,347
Nov-57,305
Dec-57,336
Jan-58,340
Feb-58,318
Mar-58,362
Apr-58,348
May-58,363
Jun-58,435
Jul-58,491
Aug-58,505
Sep-58,404
Oct-58,359
Nov-58,310
Dec-58,337
Jan-59,360
Feb-59,342
Mar-59,406
Apr-59,396
May-59,420
Jun-59,472
Jul-59,548
Aug-59,559
Sep-59,463
Oct-59,407
Nov-59,362
Dec-59,405
Jan-60,417
Feb-60,391
Mar-60,419
Apr-60,461
May-60,472
Jun-60,535
Jul-60,622
Aug-60,606
Sep-60,508
Oct-60,461
Nov-60,390
Dec-60,432

导入模块

这个示例比较简单,导入几个常用的就够了:

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt

初始化定义

设置下gpu就行了:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

数据预处理

需要读入数据,然后按照序列长度2来设置数据集,代码如下:

with open("data.csv", "r", encoding="utf-8") as f:
    data = f.read()
data = [row.split(',') for row in  data.split("\n")]
value = [int(each[1]) for each in data]
# 数据是每一天的航班数
li_x = []
li_y = []
seq = 2
# 因为数据集较少,序列长度太长会影响结果
for i in range(len(data) - seq - 1):
    # 输入就是[x,x+1]天的航班数,输出时x+2天的航班数
    li_x.append(value[i: i+seq])
    li_y.append(value[i+seq+1])
# 分训练和测试集
train_x = (torch.tensor(li_x[:-30]).float() / 1000.).reshape(-1, seq, 1).to(device)
train_y = (torch.tensor(li_y[:-30]).float() / 1000.).reshape(-1, 1).to(device)
test_x = (torch.tensor(li_x[-30:]).float() / 1000.).reshape(-1, seq, 1).to(device)
test_y = (torch.tensor(li_y[-30:]).float() / 1000.).reshape(-1, 1).to(device)

定义网络模型

网络模型就使用一个lstm+全连接实现,代码如下:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=16, num_layers=1, batch_first=True)
        # 输入格式是1,输出隐藏层大小是16
        # 对于小数据集num_layers不要设置大,否则会因为模型变复杂而导致效果会变差
        # num_layers顾名思义就是有几个lstm层,假如设置成2,就相当于连续经过两个lstm层
        # 原来的输入格式是:(seq, batch, shape)
        # 设置batch_first=True以后,输入格式就可以改为:(batch, seq, shape),更符合平常使用的习惯
        self.linear = nn.Linear(16*seq, 1)
    def forward(self, x):
        x, (h, c) = self.lstm(x)
        x = x.reshape(-1, 16*seq)
        x = self.linear(x)
        return x

定义损失函数和优化器

model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
loss_fun = nn.MSELoss()

训练模型

model.train()
for epoch in range(300):
    output = model(train_x)
    loss = loss_fun(output, train_y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 20 == 0 and epoch > 0:
        test_loss = loss_fun(model(test_x), test_y)
        print("epoch:{}, loss:{}, test_loss: {}".format(epoch, loss, test_loss))

测试模型

通过可视化查看预测趋势和实际趋势的差别:

model.eval()
result = list((model(train_x).data.reshape(-1))*1000) + list((model(test_x).data.reshape(-1))*1000)
# 通过模型计算预测结果并解码后保存到列表里
plt.plot(value, label="real")
# 原来的走势
plt.plot(result, label="pred")
# 模型预测的走势
plt.legend(loc='best')


测试结果(根据seed的不同,每次跑出的结果可能会有偏差)

完整代码

import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
with open("data.csv", "r", encoding="utf-8") as f:
    data = f.read()
data = [row.split(',') for row in  data.split("\n")]
value = [int(each[1]) for each in data]
# 数据是每一天的航班数
li_x = []
li_y = []
seq = 2
# 因为数据集较少,序列长度太长会影响结果
for i in range(len(data) - seq - 1):
    # 输入就是[x,x+1]天的航班数,输出时x+2天的航班数
    li_x.append(value[i: i+seq])
    li_y.append(value[i+seq+1])
# 分训练和测试集
train_x = (torch.tensor(li_x[:-30]).float() / 1000.).reshape(-1, seq, 1).to(device)
train_y = (torch.tensor(li_y[:-30]).float() / 1000.).reshape(-1, 1).to(device)
test_x = (torch.tensor(li_x[-30:]).float() / 1000.).reshape(-1, seq, 1).to(device)
test_y = (torch.tensor(li_y[-30:]).float() / 1000.).reshape(-1, 1).to(device)
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=16, num_layers=1, batch_first=True)
        # 输入格式是1,输出隐藏层大小是16,对于序列比较短的数据num_layers不要设置大,否则效果会变差
        # 原来的输入格式是:(seq, batch, shape),设置batch_first=True以后,输入格式就可以改为:(batch, seq, shape),更符合平常使用的习惯
        self.linear = nn.Linear(16*seq, 1)
    def forward(self, x):
        x, (h, c) = self.lstm(x)
        x = x.reshape(-1, 16*seq)
        x = self.linear(x)
        return x
        
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
loss_fun = nn.MSELoss()
model.train()
for epoch in range(300):
    output = model(train_x)
    loss = loss_fun(output, train_y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if epoch % 20 == 0 and epoch > 0:
        test_loss = loss_fun(model(test_x), test_y)
        print("epoch:{}, loss:{}, test_loss: {}".format(epoch, loss, test_loss))
model.eval()
result = list((model(train_x).data.reshape(-1))*1000) + list((model(test_x).data.reshape(-1))*1000)
# 通过模型计算预测结果并解码后保存到列表里
plt.plot(value, label="real")
# 原来的走势
plt.plot(result, label="pred")
# 模型预测的走势
plt.legend(loc='best')

https://www.jianshu.com/p/894268d66a5d

「点点赞赏,手留余香」

    还没有人赞赏,快来当第一个赞赏的人吧!
0 条回复 A 作者 M 管理员
    所有的伟大,都源于一个勇敢的开始!
欢迎您,新朋友,感谢参与互动!欢迎您 {{author}},您在本站有{{commentsCount}}条评论