View file src/colab/time_series_prediction.py - Download
# -*- coding: utf-8 -*-
"""time_series_prediction.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/136ZcJz0lL8rjgGo58Q0sbaxYKSo4d1Jr
https://medium.com/dejunhuang/learning-day-27-implementing-rnn-in-pytorch-for-time-series-prediction-3ddb6190e83d
A simple prediction task
- train model with 50 data points generated by sin function
- feed only 1 point and predict the next point, and feed the prediction for the next prediction, for approx. 50 times. x ->pred1->pred2 …. >pred50
- since there is only 1 point at each step, the feature_dim=1
- change data representation to bring batch forward as first dimension, therefore x = [batch, seq_len, feature_dim]. So in RNN setup, add new argument batch_first=True
Implement RNN with nn.Module
- Largely similar to the way setting up CNN
- Involve a linear layer at the end to compress output from [batch, seq_len, mem_dim] to [seq_len, 1] so that output can compare with y
- hidden_size = memory_dim = 10
"""
import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt
# number of points
num_time_steps = 50
hidden_size = 10
input_size = 1
output_size = 1
lr = 0.01
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.RNN(
input_size=input_size,
hidden_size=hidden_size,
num_layers=1,
batch_first=True,
# use batch_first for input with another data shape with b first
)
# compress output to the same dim as y
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden_prev):
out, hidden_prev = self.rnn(x, hidden_prev)
# [1, seq, h] => [seq, h] (batch=1)
out = out.reshape(-1, hidden_size) # stack batch and seq
# linear layer so that output is not [seq,h] but [seq, 1]
# so it is comparable with y, for loss calculation
out = self.linear(out) # [seq, h] => [seq, 1]
out = out.unsqueeze(dim=0) # => [1, seq, 1]
return out, hidden_prev
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.zeros(1, 1, hidden_size) # [b, layer, mem_size]
for iter in range(6000):
# randomly generate start point from 0 to 2
start = np.random.randint(3, size=1)[0]
# eg. from 0 to 10, create 50 points in between
time_steps = np.linspace(start, start + 10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
# x: 49 points 0-49; y: 49 points 1-50
x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps - 1, 1) # [b, seq_len, fea_len]
y = torch.tensor(data[1:]).float().reshape(1, num_time_steps - 1, 1) # [b, seq_len, fea_len]
output, hidden_prev = model(x, hidden_prev)
hidden_prev = hidden_prev.detach()
# print(f"output {output.shape}, y {y.shape}")
loss = criterion(output, y)
model.zero_grad()
# optimizer.zero_grad()
# both zero_grad() are the same if model.parameters() is feed to the same optimizer
# only matters if multiple models using same optimizer or multiple optims used for a model
loss.backward()
optimizer.step()
if iter % 1000 == 0:
print(f"iteration: {iter:4d}, loss {loss.item()}")
start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start+10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps-1, 1)
preds = []
input_x = x[:, 0, :] # select first point
for _ in range(x.shape[1]):
input_x = input_x.reshape(1, 1, 1) # reshape it for model feeding
pred, hidden_prev = model(input_x, hidden_prev)
# print(pred.shape)
# print(hidden_prev.shape)
input_x = pred
preds.append(pred.detach().numpy().ravel()[0])
x = x.data.numpy().ravel()
plt.scatter(time_steps[:-1], x.ravel(), s=10, label='original')
plt.plot(time_steps[:-1], x.ravel())
plt.scatter(time_steps[1:], preds, label='predicted')
plt.legend()
plt.show()
import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt
# number of points
num_time_steps = 50
hidden_size = 10
input_size = 1
output_size = 1
lr = 0.01
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.rnn = nn.RNN(
input_size=input_size,
hidden_size=hidden_size,
num_layers=1,
batch_first=True,
# use batch_first for input with another data shape with b first
)
# compress output to the same dim as y
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden_prev):
out, hidden_prev = self.rnn(x, hidden_prev)
# [1, seq, h] => [seq, h] (batch=1)
out = out.reshape(-1, hidden_size) # stack batch and seq
# linear layer so that output is not [seq,h] but [seq, 1]
# so it is comparable with y, for loss calculation
out = self.linear(out) # [seq, h] => [seq, 1]
out = out.unsqueeze(dim=0) # => [1, seq, 1]
return out, hidden_prev
model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)
hidden_prev = torch.zeros(1, 1, hidden_size) # [b, layer, mem_size]
# randomly generate start point from 0 to 2
start = np.random.randint(3, size=1)[0]
print(start)
# eg. from 0 to 10, create 50 points in between
time_steps = np.linspace(start, start + 10, num_time_steps)
print(time_steps)
data = np.sin(time_steps)
print(data)
data = data.reshape(num_time_steps, 1)
print(data)
# x: 49 points 0-49; y: 49 points 1-50
x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps - 1, 1) # [b, seq_len, fea_len]
y = torch.tensor(data[1:]).float().reshape(1, num_time_steps - 1, 1) # [b, seq_len, fea_len]
print(x)
print(y)
output, hidden_prev = model(x, hidden_prev)
print(output)
hidden_prev = hidden_prev.detach()
loss = criterion(output, y)
print(loss)
print(x)
print(x.shape)
preds = []
input_x = x[:, 0, :] # select first point
print(input_x)
input_x = input_x.reshape(1, 1, 1)
print(input_x)
pred, hidden_prev = model(input_x, hidden_prev)
print(pred)
input_x = pred
print(preds)
preds.append(pred.detach().numpy().ravel()[0])
print(preds)
"""Additional learning points
model.zero_grad() vs optimizer.zero_grad()
- the are the same if all model.parameters() were fed to the optimizer
- it only matters when multiple models are using the same optimizer, or multiple optimizers were used for different parts of a model (ref)
Prediction is made point by point
- RNN is flexible in the seq_len, ie. the number of data points/words
- How many points fed in = how many points being predicted
- It is interesting that the input seq_len can be as small as 1 as shown in the above example; do not have to feed in a bunch of data points for prediction
Question
- At the linear step, why batch size and seq_len can be stacked before linear layer? (out = out.reshape(-1, hidden_size) # stack batch and seq
Ans: perhaps it doesn’t matter.
- for b=1, [1, seq, h] => [seq, h], output = [seq, 1] after linear layer
- for b=3, [3, seq, h] => [3*seq, h], output = [3*seq, 1] after linear layer
- We just need to segregate each sequence again from the output to get the prediction for each sequence
Reference
https://www.udemy.com/course/deeplearning-pytorch/
"""