This is a minimalistic demo on how to model time-series with RNN, including training and inference.
Rather than relying on the mx.symbol.RNN
operator which provides a convenient and efficient implementation of common RNN cells such as the LSTM and GRU, the following example relies on a manual construction of the RNN cells and provides a structure from which alternative cells can be implemented.
library("readr")
library("dplyr")
library("plotly")
library("mxnet")
In this step, a dataset made of multiple univariate time-series is built. 500 independent times series (n) with each 100 observations (seq_len) are created.
The modeling task will be to predict the next value given the previous ones. The target variable can therefore be obtained by shifting the features by one timestep in the future.
The resulting data is of shape [1, 100, 500] corresponding to [num_features, seq_length, samples]. In multivariate time-series, the dataset would follow the same structure, except that the num_features would be > 1.
# number of timestamps
seq_len = 100
# number of samples
n = 500
# return a random starting point of the time series
set.seed(12)
seeds <- runif(n, min = 0, max = 24)
# generate the time series of seq_length for each starting point
pts <- sapply(seeds, function(x) sin(x + pi/12 * (0:(seq_len))))
# build the features matrix
x <- pts[1:seq_len, ]
x <- array(x, dim = c(1, seq_len, n))
# build the target array - same as feature but one timestep forward
y <- pts[-1, ]
# plot time series from 5 first samples
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,2], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,3], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,4], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,5], type = "scatter", mode = "lines") %>%
layout(title = "Sample of 5 time series")
# plot first time series and its associated target
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines", name = "feature") %>%
add_trace(x = 1:dim(x)[2], y = y[,1], type = "scatter", mode = "lines", name = "target") %>%
layout(title = "Sample of one time series")
The training and evaluation data are obtained by splitting the first 400 and the remaining observations into different mx.io.arrayiter
iterators.
batch.size = 32
# take first 400 samples for train - remaining 100 for evaluation
train_ids <- 1:400
train.data <- mx.io.arrayiter(data = x[,,train_ids, drop = F], label = y[, train_ids],
batch.size = batch.size, shuffle = TRUE)
eval.data <- mx.io.arrayiter(data = x[,,-train_ids, drop = F], label = y[, -train_ids],
batch.size = batch.size, shuffle = FALSE)
R-package provides a helper function, rnn.graph.unroll
, to build the explicit graphs for LSTM and GRU. Here a graph with only 2 time-steps is built so as to have a reasonable size for display.
symbol <- rnn.graph.unroll(seq_len = 2,
num_rnn_layer = 1,
num_hidden = 50,
input_size = NULL,
num_embed = NULL,
num_decode = 1,
masking = F,
loss_output = "linear",
dropout = 0.2,
ignore_label = -1,
cell_type = "lstm",
output_last_state = F,
config = "one-to-one")
graph = graph.viz(symbol, type = "graph", direction = "TD",
shape=list(data = c(1, 2, batch.size),
label = c(2, batch.size)))
Now the complete graph can be built by specifying a sequence length matching the data (seq_len = 50).
symbol <- rnn.graph.unroll(seq_len = seq_len,
num_rnn_layer = 2,
num_hidden = 50,
input_size = NULL,
num_embed = NULL,
num_decode = 1,
masking = F,
loss_output = "linear",
dropout = 0.1,
ignore_label = -1,
cell_type = "lstm",
output_last_state = F,
config = "one-to-one")
Prior to training, a custom metric is defined to handle labels that are in a 2D (matrix) format rather than in a flat vector as in vanilla regression models.
mx.metric.mse.seq <- mx.metric.custom("MSE", function(label, pred) {
label = mx.nd.reshape(label, shape = -1)
pred = mx.nd.reshape(pred, shape = -1)
res <- mx.nd.mean(mx.nd.square(label-pred))
return(as.array(res))
})
ctx <- mx.cpu()
initializer <- mx.init.Xavier(rnd_type = "gaussian",
factor_type = "avg",
magnitude = 2.5)
optimizer <- mx.opt.create("adadelta", rho = 0.9, eps = 1e-5, wd = 0,
clip_gradient = 1, rescale.grad = 1/batch.size)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(period = 10, logger = logger)
system.time(
model <- mx.model.buckets(symbol = symbol,
train.data = train.data,
eval.data = eval.data,
num.round = 60, ctx = ctx, verbose = TRUE,
metric = mx.metric.mse.seq,
initializer = initializer, optimizer = optimizer,
batch.end.callback = NULL,
epoch.end.callback = epoch.end.callback)
)
## user system elapsed
## 428.75 361.55 156.08
plot_ly(x = seq_len(length(logger$train)), y = logger$train, type = "scatter",
mode = "markers+lines", name = "train") %>%
add_trace(y = logger$eval, type = "scatter",
mode = "markers+lines", name = "eval")
Setup inference data. Need to apply preprocessing to inference sequence and convert into a infer data iterator.
Two graphs are built:
ctx <- mx.cpu()
pred_length = 80
data = mx.nd.array(x[, , 1, drop = F])
infer_length_ini <- dim(data)[2]
symbol.infer.ini <- rnn.graph.unroll(seq_len = infer_length_ini,
num_rnn_layer = 2,
num_hidden = 50,
input_size = NULL,
num_embed = NULL,
num_decode = 1,
masking = F,
loss_output = "linear",
dropout = 0,
ignore_label = -1,
cell_type = "lstm",
output_last_state = T,
config = "one-to-one")
symbol.infer <- rnn.graph.unroll(seq_len = 1,
num_rnn_layer = 2,
num_hidden = 50,
input_size = NULL,
num_embed = NULL,
num_decode = 1,
masking = F,
loss_output = "linear",
dropout = 0,
ignore_label = -1,
cell_type = "lstm",
output_last_state = T,
config = "one-to-one")
predict <- numeric()
infer <- mx.infer.rnn.one.unroll(infer.data = data,
symbol = symbol.infer.ini,
num_hidden = 50,
arg.params = model$arg.params,
aux.params = model$aux.params,
init_states = NULL,
ctx = ctx)
pred = mx.nd.array(y[seq_len, 1, drop = F])
real = sin(seeds[1] + pi/12 * (seq_len+1):(seq_len+pred_length))
for (i in 1:pred_length) {
data = mx.nd.reshape(pred, shape = c(1,1,1))
infer <- mx.infer.rnn.one.unroll(infer.data = data,
symbol = symbol.infer,
num_hidden = 50,
arg.params = model$arg.params,
aux.params = model$aux.params,
init_states = infer[-1],
ctx = ctx)
pred <- infer[[1]]
predict <- c(predict, as.numeric(as.array(pred)))
}
data = mx.nd.array(x[, , 1, drop = F])
label = mx.nd.array(y[, 1, drop = F])
plot_ly(x = 1:dim(y)[1], y = y[,1], type = "scatter", mode="lines", name = "hist") %>%
add_trace(x = dim(y)[1] + 1:length(real), y = real, type = "scatter", mode="lines", name = "real") %>%
add_trace(x = dim(y)[1] + 1:length(predict), y = predict, type = "scatter", mode="lines", name = "pred")