This is a minimalistic demo on how to model time-series with RNN, including training and inference. It uses fused RNN using the mx.symbol.RNN
operator. Starting from MXNet 1.3.1, CPU is now supported. On prior versions, needs to be run on GPU.
library("readr")
library("dplyr")
library("plotly")
library("mxnet")
In this step, a dataset made of multiple univariate time-series is built. 500 independent times series (n) with each 100 observations (seq_len) are created.
The modeling task will be to predict the next value given the previous ones. The target variable can therefore be obtained by shifting the features by one timestep in the future.
The resulting data is of shape [1, 100, 500] corresponding to [num_features, seq_length, samples]. In multivariate time-series, the dataset would follow the same structure, except that the num_features would be > 1.
# number of timestamps
seq_len <- 100
# number of samples
n <- 500
# return a random starting point of the time series
set.seed(12)
seeds <- runif(n, min = 0, max = 24)
# generate the time series of seq_length for each starting point
pts <- sapply(seeds, function(x) sin(x + pi/12 * (0:(seq_len))))
# build the features matrix
x <- pts[1:seq_len, ]
x <- array(x, dim = c(1, seq_len, n))
# build the target array - same as feature but one timestep forward
y <- pts[-1, ]
# plot time series from 5 first samples
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,2], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,3], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,4], type = "scatter", mode = "lines") %>%
add_trace(x = 1:dim(x)[2], y = x[,,5], type = "scatter", mode = "lines") %>%
layout(title = "Sample of 5 time series")
# plot first time series and its associated target
plot_ly(x = 1:dim(x)[2], y = x[,,1], type = "scatter", mode = "lines", name = "feature") %>%
add_trace(x = 1:dim(x)[2], y = y[,1], type = "scatter", mode = "lines", name = "target") %>%
layout(title = "Sample of one time series")
The training and evaluation data are obtained by splitting the first 400 and the remaining observations into different mx.io.arrayiter
iterators.
batch.size = 32
# take first 400 samples for train - remaining 100 for evaluation
train_ids <- 1:400
train.data <- mx.io.arrayiter(data = x[,,train_ids, drop = F], label = y[, train_ids],
batch.size = batch.size, shuffle = TRUE)
eval.data <- mx.io.arrayiter(data = x[,,-train_ids, drop = F], label = y[, -train_ids],
batch.size = batch.size, shuffle = FALSE)
data <- mx.symbol.Variable("data")
label <- mx.symbol.Variable("label")
parameters <- mx.symbol.Variable("rnn_weight")
rnn_input_state <- mx.symbol.Variable("rnn_input_state")
rnn_input_state_cell <- mx.symbol.Variable("rnn_input_state_cell")
swap_in <- mx.symbol.swapaxes(data, dim1 = 0, dim2 = 1)
RNN <- mx.symbol.RNN(data = swap_in,
parameters = parameters,
state = rnn_input_state,
state_cell = rnn_input_state_cell,
state_size = 32,
num_layers = 2,
p = 0.1,
bidirectional = F,
state_outputs = T,
mode = "lstm",
name = "RNN")
swap_out <- mx.symbol.swapaxes(data = RNN[[1]], dim1 = 0, dim2 = 1)
decode <- mx.symbol.FullyConnected(data = swap_out, num_hidden = 1, flatten = F)
decode <- mx.symbol.reshape(decode, shape = c(1, -1))
label <- mx.symbol.reshape(label, shape = -1)
loss <- mx.symbol.LinearRegressionOutput(data = decode, label = label, name = "loss")
graph.viz(symbol = loss, type = "graph", direction = "LR",
shape=list(data = c(1, seq_len, batch.size),
label = c(seq_len, batch.size)),
graph.width.px = 800, graph.height.px = 220)
Needs a custom metric to handle labels in a matrix rather than flat format in the iterator.
mx.metric.mse.seq <- mx.metric.custom("MSE", function(label, pred) {
label <- mx.nd.reshape(label, shape = -1)
pred <- mx.nd.reshape(pred, shape = -1)
res <- mx.nd.mean(mx.nd.square(label-pred))
return(as.array(res))
})
ctx <- mx.gpu()
initializer <- mx.init.Xavier(rnd_type = "gaussian",
factor_type = "avg",
magnitude = 2.5)
optimizer <- mx.opt.create("adam", learning.rate = 1e-3, beta1 = 0.9, beta2 = 0.999,
wd = 1e-8, clip_gradient = 1, rescale.grad = 1 / batch.size)
logger <- mx.metric.logger()
epoch.end.callback <- mx.callback.log.train.metric(period = 10, logger = logger)
system.time(
model <- mx.model.buckets(symbol = loss,
train.data = train.data,
eval.data = eval.data,
num.round = 50, ctx = ctx, verbose = TRUE,
metric = mx.metric.mse.seq,
initializer = initializer, optimizer = optimizer,
batch.end.callback = NULL,
epoch.end.callback = epoch.end.callback)
)
## user system elapsed
## 13.93 4.24 18.71
plot_ly(x = seq_len(length(logger$train)), y = logger$train, type = "scatter",
mode = "markers+lines", name = "train") %>%
add_trace(y = logger$eval, type = "scatter",
mode = "markers+lines", name = "eval")
Setup inference data. Need to apply preprocessing to inference sequence and convert into a infer data iterator.
internals <- model$symbol$get.internals()
sym_state <- internals$get.output(which(internals$outputs %in% "RNN_state"))
sym_state_cell <- internals$get.output(which(internals$outputs %in% "RNN_state_cell"))
sym_output <- internals$get.output(which(internals$outputs %in% "loss_output"))
symbol <- mx.symbol.Group(sym_output, sym_state, sym_state_cell)
pred_length = 100
predict <- numeric()
data <- mx.nd.array(x[, , 1, drop = F])
label <- mx.nd.array(y[, 1, drop = F])
pred <- mx.nd.array(y[seq_len, 1, drop = F])
real <- sin(seeds[1] + pi/12 * (seq_len+1):(seq_len+pred_length))
infer.data <- mx.io.arrayiter(data = data, label = label,
batch.size = 1, shuffle = FALSE)
infer <- mx.infer.rnn.one(infer.data = infer.data,
symbol = symbol,
arg.params = model$arg.params,
aux.params = model$aux.params,
input.params = NULL,
ctx = ctx)
for (i in 1:pred_length) {
data <- mx.nd.reshape(pred, shape = c(1,1,1))
label <- pred
infer.data <- mx.io.arrayiter(data = data, label = label,
batch.size = 1, shuffle = FALSE)
infer <- mx.infer.rnn.one(infer.data = infer.data,
symbol = symbol,
ctx = ctx,
arg.params = model$arg.params,
aux.params = model$aux.params,
input.params = list(rnn_input_state = infer[[2]],
rnn_input_state_cell = infer[[3]]))
pred <- infer[[1]]
predict <- c(predict, as.numeric(as.array(pred)))
}
data <- mx.nd.array(x[, , 1, drop = F])
label <- mx.nd.array(y[, 1, drop = F])
plot_ly(x = 1:dim(y)[1], y = y[,1], type = "scatter", mode="lines", name = "hist") %>%
add_trace(x = dim(y)[1] + 1:length(real), y = real, type = "scatter", mode="lines", name = "real") %>%
add_trace(x = dim(y)[1] + 1:length(predict), y = predict, type = "scatter", mode="lines", name = "pred")