This is a minimalistic demo on how to model time-series with RNN, including training and inference. It uses fused RNN using the mx.symbol.RNN operator. Starting from MXNet 1.3.1, CPU is now supported. On prior versions, needs to be run on GPU.

library("readr")
library("dplyr")
library("plotly")
library("mxnet")

Data preparation

In this step, a dataset made of multiple univariate time-series is built. 500 independent times series (n) with each 100 observations (seq_len) are created.

The modeling task will be to predict the next value given the previous ones. The target variable can therefore be obtained by shifting the features by one timestep in the future.

The resulting data is of shape [1, 100, 500] corresponding to [num_features, seq_length, samples]. In multivariate time-series, the dataset would follow the same structure, except that the num_features would be > 1.

# number of timestamps
seq_len <- 100

# number of samples
n <- 500

# return a random starting point of the time series
set.seed(12)
seeds <- runif(n, min = 0, max = 24)

# generate the time series of seq_length for each starting point
pts <- sapply(seeds, function(x) sin(x + pi/12 * (0:(seq_len))))

# build the features matrix
x <- pts[1:seq_len, ]
x <- array(x, dim = c(1, seq_len, n))

# build the target array - same as feature but one timestep forward
y <- pts[-1, ]

# plot time series from 5 first samples
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,2], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,3], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,4], type = "scatter", mode = "lines") %>% 
  add_trace(x = 1:dim(x)[2], y = x[,,5], type = "scatter", mode = "lines") %>% 
  layout(title = "Sample of 5 time series")
# plot first time series and its associated target
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines", name = "feature") %>% 
  add_trace(x = 1:dim(x)[2], y = y[,1], type = "scatter", mode = "lines", name = "target") %>% 
  layout(title = "Sample of one time series")

Create data iterators

The training and evaluation data are obtained by splitting the first 400 and the remaining observations into different mx.io.arrayiter iterators.

batch.size = 32

# take first 400 samples for train - remaining 100 for evaluation
train_ids <- 1:400

train.data <- mx.io.arrayiter(data = x[,,train_ids, drop = F], label = y[, train_ids], 
                              batch.size = batch.size, shuffle = TRUE)

eval.data <- mx.io.arrayiter(data = x[,,-train_ids, drop = F], label = y[, -train_ids], 
                              batch.size = batch.size, shuffle = FALSE)

Model architecture

data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
parameters <- mx.symbol.Variable("rnn_weight")
rnn_input_state <- mx.symbol.Variable("rnn_input_state")
rnn_input_state_cell <- mx.symbol.Variable("rnn_input_state_cell")

swap_in <- mx.symbol.swapaxes(data, dim1 = 0, dim2 = 1)
RNN <- mx.symbol.RNN(data = swap_in, 
                     parameters = parameters,
                     state = rnn_input_state,
                     state_cell = rnn_input_state_cell,
                     state_size = 32, 
                     num_layers = 2,
                     p = 0.1,
                     bidirectional = F,
                     state_outputs = T,
                     mode = "lstm",
                     name = "RNN")
swap_out <- mx.symbol.swapaxes(data = RNN[[1]], dim1 = 0, dim2 = 1)
decode <- mx.symbol.FullyConnected(data = swap_out, num_hidden = 1, flatten = F)
decode <- mx.symbol.reshape(decode, shape = c(1, -1))
label <- mx.symbol.reshape(label, shape = -1)
loss <- mx.symbol.LinearRegressionOutput(data = decode, label = label, name = "loss")

graph.viz(symbol = loss, type = "graph", direction = "LR", 
          shape=list(data = c(1, seq_len, batch.size), 
                     label = c(seq_len, batch.size)),
          graph.width.px = 800, graph.height.px = 220)

Fit a LSTM model

Needs a custom metric to handle labels in a matrix rather than flat format in the iterator.

mx.metric.mse.seq <- mx.metric.custom("MSE", function(label, pred) {
  label <- mx.nd.reshape(label, shape = -1)
  pred <- mx.nd.reshape(pred, shape = -1)
  res <- mx.nd.mean(mx.nd.square(label-pred))
  return(as.array(res))
})
ctx <- mx.gpu()

initializer <- mx.init.Xavier(rnd_type = "gaussian", 
                              factor_type = "avg", 
                              magnitude = 2.5)

optimizer <- mx.opt.create("adam", learning.rate = 1e-3, beta1 = 0.9, beta2 = 0.999, 
                           wd = 1e-8, clip_gradient = 1, rescale.grad = 1 / batch.size)

logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(period = 10, logger = logger)

system.time(
  model <- mx.model.buckets(symbol = loss,
                            train.data = train.data, 
                            eval.data = eval.data, 
                            num.round = 50, ctx = ctx, verbose = TRUE,
                            metric = mx.metric.mse.seq, 
                            initializer = initializer, optimizer = optimizer, 
                            batch.end.callback = NULL, 
                            epoch.end.callback = epoch.end.callback)
)
##    user  system elapsed 
##   13.93    4.24   18.71
plot_ly(x = seq_len(length(logger$train)), y = logger$train, type = "scatter", 
        mode = "markers+lines", name = "train") %>% 
  add_trace(y = logger$eval, type = "scatter", 
            mode = "markers+lines", name = "eval")

Inference on test data

Setup inference data. Need to apply preprocessing to inference sequence and convert into a infer data iterator.

Inference

internals <- model$symbol$get.internals()
sym_state <- internals$get.output(which(internals$outputs %in% "RNN_state"))
sym_state_cell <- internals$get.output(which(internals$outputs %in% "RNN_state_cell"))
sym_output <- internals$get.output(which(internals$outputs %in% "loss_output"))
symbol <- mx.symbol.Group(sym_output, sym_state, sym_state_cell)

pred_length = 100
predict <- numeric()

data <- mx.nd.array(x[, , 1, drop = F])
label <- mx.nd.array(y[, 1, drop = F])

pred <- mx.nd.array(y[seq_len, 1, drop = F])
real <- sin(seeds[1] + pi/12 * (seq_len+1):(seq_len+pred_length))

infer.data <- mx.io.arrayiter(data = data, label = label, 
                              batch.size = 1, shuffle = FALSE)

infer <- mx.infer.rnn.one(infer.data = infer.data, 
                          symbol = symbol,
                          arg.params = model$arg.params, 
                          aux.params = model$aux.params, 
                          input.params = NULL,
                          ctx = ctx)

for (i in 1:pred_length) {
  
  data <- mx.nd.reshape(pred, shape = c(1,1,1))
  label <- pred
  
  infer.data <- mx.io.arrayiter(data = data, label = label,  
                                batch.size = 1, shuffle = FALSE)
  
  infer <- mx.infer.rnn.one(infer.data = infer.data, 
                            symbol = symbol,
                            ctx = ctx,
                            arg.params = model$arg.params,
                            aux.params = model$aux.params,
                            input.params = list(rnn_input_state = infer[[2]], 
                                                rnn_input_state_cell = infer[[3]]))
  
  pred <- infer[[1]]
  predict <- c(predict, as.numeric(as.array(pred)))
  
}

Plot predictions against real values

data <- mx.nd.array(x[, , 1, drop = F])
label <- mx.nd.array(y[, 1, drop = F])

plot_ly(x = 1:dim(y)[1], y = y[,1], type = "scatter", mode="lines", name = "hist") %>% 
  add_trace(x = dim(y)[1] + 1:length(real), y = real, type = "scatter", mode="lines", name = "real") %>% 
  add_trace(x = dim(y)[1] + 1:length(predict), y = predict, type = "scatter", mode="lines", name = "pred")