This is a minimalistic demo on how to model time-series with RNN, including training and inference.

Rather than relying on the mx.symbol.RNN operator which provides a convenient and efficient implementation of common RNN cells such as the LSTM and GRU, the following example relies on a manual construction of the RNN cells and provides a structure from which alternative cells can be implemented.

library("readr")
library("dplyr")
library("plotly")
library("mxnet")

Data preparation

In this step, a dataset made of multiple univariate time-series is built. 500 independent times series (n) with each 100 observations (seq_len) are created.

The modeling task will be to predict the next value given the previous ones. The target variable can therefore be obtained by shifting the features by one timestep in the future.

The resulting data is of shape [1, 100, 500] corresponding to [num_features, seq_length, samples]. In multivariate time-series, the dataset would follow the same structure, except that the num_features would be > 1.

# number of timestamps
seq_len = 100

# number of samples
n = 500

# return a random starting point of the time series
set.seed(12)
seeds <- runif(n, min = 0, max = 24)

# generate the time series of seq_length for each starting point
pts <- sapply(seeds, function(x) sin(x + pi/12 * (0:(seq_len))))

# build the features matrix
x <- pts[1:seq_len, ]
x <- array(x, dim = c(1, seq_len, n))

# build the target array - same as feature but one timestep forward
y <- pts[-1, ]

# plot time series from 5 first samples
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,2], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,3], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,4], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,5], type = "scatter", mode = "lines") %>% 
  layout(title = "Sample of 5 time series")
# plot first time series and its associated target
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines", name = "feature") %>% 
  add_trace(x = 1:dim(x)[2], y = y[,1], type = "scatter", mode = "lines", name = "target") %>% 
  layout(title = "Sample of one time series")

Create data iterators

The training and evaluation data are obtained by splitting the first 400 and the remaining observations into different mx.io.arrayiter iterators.

batch.size = 32

# take first 400 samples for train - remaining 100 for evaluation
train_ids <- 1:400

train.data <- mx.io.arrayiter(data = x[,,train_ids, drop = F], label = y[, train_ids], 
                              batch.size = batch.size, shuffle = TRUE)

eval.data <- mx.io.arrayiter(data = x[,,-train_ids, drop = F], label = y[, -train_ids], 
                              batch.size = batch.size, shuffle = FALSE)

Model architecture

R-package provides a helper function, rnn.graph.unroll, to build the explicit graphs for LSTM and GRU. Here a graph with only 2 time-steps is built so as to have a reasonable size for display.

symbol <- rnn.graph.unroll(seq_len = 2, 
                           num_rnn_layer =  1, 
                           num_hidden = 50,
                           input_size = NULL,
                           num_embed = NULL, 
                           num_decode = 1,
                           masking = F, 
                           loss_output = "linear",
                           dropout = 0.2, 
                           ignore_label = -1,
                           cell_type = "lstm",
                           output_last_state = F,
                           config = "one-to-one")

graph = graph.viz(symbol, type = "graph", direction = "TD", 
                  shape=list(data = c(1, 2, batch.size), 
                             label = c(2, batch.size)))

Now the complete graph can be built by specifying a sequence length matching the data (seq_len = 50).

symbol <- rnn.graph.unroll(seq_len = seq_len, 
                           num_rnn_layer =  2, 
                           num_hidden = 50,
                           input_size = NULL,
                           num_embed = NULL, 
                           num_decode = 1,
                           masking = F, 
                           loss_output = "linear",
                           dropout = 0.1, 
                           ignore_label = -1,
                           cell_type = "lstm",
                           output_last_state = F,
                           config = "one-to-one")

Fit a LSTM model

Prior to training, a custom metric is defined to handle labels that are in a 2D (matrix) format rather than in a flat vector as in vanilla regression models.

mx.metric.mse.seq <- mx.metric.custom("MSE", function(label, pred) {
  label = mx.nd.reshape(label, shape = -1)
  pred = mx.nd.reshape(pred, shape = -1)
  res <- mx.nd.mean(mx.nd.square(label-pred))
  return(as.array(res))
})
ctx <- mx.cpu()

initializer <- mx.init.Xavier(rnd_type = "gaussian", 
                              factor_type = "avg", 
                              magnitude = 2.5)

optimizer <- mx.opt.create("adadelta", rho = 0.9, eps = 1e-5, wd = 0,
                           clip_gradient = 1, rescale.grad = 1/batch.size)

logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(period = 10, logger = logger)

system.time(
  model <- mx.model.buckets(symbol = symbol,
                            train.data = train.data, 
                            eval.data = eval.data, 
                            num.round = 60, ctx = ctx, verbose = TRUE,
                            metric = mx.metric.mse.seq, 
                            initializer = initializer, optimizer = optimizer, 
                            batch.end.callback = NULL, 
                            epoch.end.callback = epoch.end.callback)
)
##    user  system elapsed 
##  428.75  361.55  156.08
plot_ly(x = seq_len(length(logger$train)), y = logger$train, type = "scatter", 
        mode = "markers+lines", name = "train") %>% 
  add_trace(y = logger$eval, type = "scatter", 
            mode = "markers+lines", name = "eval")

Inference on test data

Setup inference data. Need to apply preprocessing to inference sequence and convert into a infer data iterator.

Build the inference graph

Two graphs are built:

  1. Graph whose seq length matches the number of timesteps of the inference data. This will provide the state of the model that will subsequently feed the inference iterations.
  2. Graph with a seq length of one used to iterate from the previous prediction state up up to the number of the desired inference steps.
ctx <- mx.cpu()

pred_length = 80
data = mx.nd.array(x[, , 1, drop = F])
infer_length_ini <- dim(data)[2]

symbol.infer.ini <- rnn.graph.unroll(seq_len = infer_length_ini,
                                     num_rnn_layer = 2, 
                                     num_hidden = 50,
                                     input_size = NULL,
                                     num_embed = NULL, 
                                     num_decode = 1,
                                     masking = F, 
                                     loss_output = "linear",
                                     dropout = 0, 
                                     ignore_label = -1,
                                     cell_type = "lstm",
                                     output_last_state = T,
                                     config = "one-to-one")

symbol.infer <- rnn.graph.unroll(seq_len = 1,
                                 num_rnn_layer = 2, 
                                 num_hidden = 50,
                                 input_size = NULL,
                                 num_embed = NULL, 
                                 num_decode = 1,
                                 masking = F, 
                                 loss_output = "linear",
                                 dropout = 0, 
                                 ignore_label = -1,
                                 cell_type = "lstm",
                                 output_last_state = T,
                                 config = "one-to-one")

Inference

predict <- numeric()

infer <- mx.infer.rnn.one.unroll(infer.data = data, 
                                 symbol = symbol.infer.ini,
                                 num_hidden = 50,
                                 arg.params = model$arg.params,
                                 aux.params = model$aux.params,
                                 init_states = NULL,
                                 ctx = ctx)

pred = mx.nd.array(y[seq_len, 1, drop = F])
real = sin(seeds[1] + pi/12 * (seq_len+1):(seq_len+pred_length))

for (i in 1:pred_length) {
  
  data = mx.nd.reshape(pred, shape = c(1,1,1))
  
  infer <- mx.infer.rnn.one.unroll(infer.data = data, 
                                   symbol = symbol.infer,
                                   num_hidden = 50,
                                   arg.params = model$arg.params,
                                   aux.params = model$aux.params,
                                   init_states = infer[-1],
                                   ctx = ctx)
  
  pred <- infer[[1]]
  predict <- c(predict, as.numeric(as.array(pred)))
}

Plot predictions against real values

data = mx.nd.array(x[, , 1, drop = F])
label = mx.nd.array(y[, 1, drop = F])

plot_ly(x = 1:dim(y)[1], y = y[,1], type = "scatter", mode="lines", name = "hist") %>% 
  add_trace(x = dim(y)[1] + 1:length(real), y = real, type = "scatter", mode="lines", name = "real") %>% 
  add_trace(x = dim(y)[1] + 1:length(predict), y = predict, type = "scatter", mode="lines", name = "pred")