View file src/colab/time_series_prediction.py - Download

# -*- coding: utf-8 -*-
"""time_series_prediction.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/136ZcJz0lL8rjgGo58Q0sbaxYKSo4d1Jr

https://medium.com/dejunhuang/learning-day-27-implementing-rnn-in-pytorch-for-time-series-prediction-3ddb6190e83d

A simple prediction task
- train model with 50 data points generated by sin function
- feed only 1 point and predict the next point, and feed the prediction for the next prediction, for approx. 50 times. x ->pred1->pred2 …. >pred50
- since there is only 1 point at each step, the feature_dim=1
- change data representation to bring batch forward as first dimension, therefore x = [batch, seq_len, feature_dim]. So in RNN setup, add new argument batch_first=True

Implement RNN with nn.Module
- Largely similar to the way setting up CNN
- Involve a linear layer at the end to compress output from [batch, seq_len, mem_dim] to [seq_len, 1] so that output can compare with y
- hidden_size = memory_dim = 10
"""

import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt

# number of points
num_time_steps = 50
hidden_size = 10
input_size = 1
output_size = 1
lr = 0.01


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
            # use batch_first for input with another data shape with b first
        )
        # compress output to the same dim as y
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev)
        # [1, seq, h] => [seq, h]  (batch=1)
        out = out.reshape(-1, hidden_size)  # stack batch and seq

        # linear layer so that output is not [seq,h] but [seq, 1]
        # so it is comparable with y, for loss calculation
        out = self.linear(out)  # [seq, h] => [seq, 1]
        out = out.unsqueeze(dim=0)  # => [1, seq, 1]
        return out, hidden_prev



model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)

hidden_prev = torch.zeros(1, 1, hidden_size)  # [b, layer, mem_size]

for iter in range(6000):
    # randomly generate start point from 0 to 2
    start = np.random.randint(3, size=1)[0]
    # eg. from 0 to 10, create 50 points in between
    time_steps = np.linspace(start, start + 10, num_time_steps)
    data = np.sin(time_steps)
    data = data.reshape(num_time_steps, 1)
    # x: 49 points 0-49; y: 49 points 1-50
    x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps - 1, 1)  # [b, seq_len, fea_len]
    y = torch.tensor(data[1:]).float().reshape(1, num_time_steps - 1, 1)  # [b, seq_len, fea_len]

    output, hidden_prev = model(x, hidden_prev)
    hidden_prev = hidden_prev.detach()

    # print(f"output {output.shape}, y {y.shape}")
    loss = criterion(output, y)
    model.zero_grad()
    # optimizer.zero_grad()
    # both zero_grad() are the same if model.parameters() is feed to the same optimizer
    # only matters if multiple models using same optimizer or multiple optims used for a model
    loss.backward()
    optimizer.step()

    if iter % 1000 == 0:
        print(f"iteration: {iter:4d}, loss {loss.item()}")

start = np.random.randint(3, size=1)[0]
time_steps = np.linspace(start, start+10, num_time_steps)
data = np.sin(time_steps)
data = data.reshape(num_time_steps, 1)
x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps-1, 1)

preds = []
input_x = x[:, 0, :]  # select first point
for _ in range(x.shape[1]):
    input_x = input_x.reshape(1, 1, 1)  # reshape it for model feeding
    pred, hidden_prev = model(input_x, hidden_prev)
    # print(pred.shape)
    # print(hidden_prev.shape)
    input_x = pred
    preds.append(pred.detach().numpy().ravel()[0])

x = x.data.numpy().ravel()
plt.scatter(time_steps[:-1], x.ravel(), s=10, label='original')
plt.plot(time_steps[:-1], x.ravel())

plt.scatter(time_steps[1:], preds, label='predicted')
plt.legend()
plt.show()

import torch
from torch import nn, optim
import numpy as np
import matplotlib.pyplot as plt

# number of points
num_time_steps = 50
hidden_size = 10
input_size = 1
output_size = 1
lr = 0.01


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=1,
            batch_first=True,
            # use batch_first for input with another data shape with b first
        )
        # compress output to the same dim as y
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev)
        # [1, seq, h] => [seq, h]  (batch=1)
        out = out.reshape(-1, hidden_size)  # stack batch and seq

        # linear layer so that output is not [seq,h] but [seq, 1]
        # so it is comparable with y, for loss calculation
        out = self.linear(out)  # [seq, h] => [seq, 1]
        out = out.unsqueeze(dim=0)  # => [1, seq, 1]
        return out, hidden_prev



model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)

hidden_prev = torch.zeros(1, 1, hidden_size)  # [b, layer, mem_size]

# randomly generate start point from 0 to 2
start = np.random.randint(3, size=1)[0]
print(start)

# eg. from 0 to 10, create 50 points in between
time_steps = np.linspace(start, start + 10, num_time_steps)
print(time_steps)

data = np.sin(time_steps)
print(data)

data = data.reshape(num_time_steps, 1)
print(data)

# x: 49 points 0-49; y: 49 points 1-50
x = torch.tensor(data[:-1]).float().reshape(1, num_time_steps - 1, 1)  # [b, seq_len, fea_len]
y = torch.tensor(data[1:]).float().reshape(1, num_time_steps - 1, 1)  # [b, seq_len, fea_len]
print(x)
print(y)

output, hidden_prev = model(x, hidden_prev)
print(output)

hidden_prev = hidden_prev.detach()

loss = criterion(output, y)
print(loss)

print(x)
print(x.shape)

preds = []

input_x = x[:, 0, :]  # select first point
print(input_x)

input_x = input_x.reshape(1, 1, 1)
print(input_x)

pred, hidden_prev = model(input_x, hidden_prev)
print(pred)

input_x = pred

print(preds)

preds.append(pred.detach().numpy().ravel()[0])
print(preds)

"""Additional learning points

model.zero_grad() vs optimizer.zero_grad()
- the are the same if all model.parameters() were fed to the optimizer
- it only matters when multiple models are using the same optimizer, or multiple optimizers were used for different parts of a model (ref)

Prediction is made point by point
- RNN is flexible in the seq_len, ie. the number of data points/words
- How many points fed in = how many points being predicted
- It is interesting that the input seq_len can be as small as 1 as shown in the above example; do not have to feed in a bunch of data points for prediction

Question
- At the linear step, why batch size and seq_len can be stacked before linear layer? (out = out.reshape(-1, hidden_size) # stack batch and seq

Ans: perhaps it doesn’t matter.

- for b=1, [1, seq, h] => [seq, h], output = [seq, 1] after linear layer
- for b=3, [3, seq, h] => [3*seq, h], output = [3*seq, 1] after linear layer
- We just need to segregate each sequence again from the output to get the prediction for each sequence

Reference

https://www.udemy.com/course/deeplearning-pytorch/

"""