Skip to content

RNN Models

RNN-based models for time series with regularization and DenseNet variants.

RNNDropout

RNNDropout(p: float = 0.5)

Bases: Module

Dropout with probability p that is consistent on the seq_len dimension (variational dropout).

Source code in tsfast/models/rnn.py
def __init__(self, p: float = 0.5):
    super().__init__()
    self.p = p

WeightDropout

WeightDropout(module: Module, weight_p: float, layer_names: str | list[str] = 'weight_hh_l0')

Bases: Module

A module that wraps another layer in which some weights will be replaced by 0 during training.

Parameters:

Name Type Description Default
module Module

wrapped RNN module.

required
weight_p float

weight dropout probability.

required
layer_names str | list[str]

name(s) of the parameters to apply dropout to.

'weight_hh_l0'
Source code in tsfast/models/rnn.py
def __init__(
    self,
    module: nn.Module,
    weight_p: float,
    layer_names: str | list[str] = "weight_hh_l0",
):
    super().__init__()
    self.module = module
    self.weight_p = weight_p
    self.layer_names = [layer_names] if isinstance(layer_names, str) else list(layer_names)
    for layer in self.layer_names:
        # Makes a copy of the weights of the selected layers.
        w = getattr(self.module, layer)
        delattr(self.module, layer)
        self.register_parameter(f"{layer}_raw", nn.Parameter(w.data))
        setattr(self.module, layer, w.clone())
        if isinstance(self.module, (nn.RNNBase, nn.modules.rnn.RNNBase)):
            self.module.flatten_parameters = self._do_nothing

RNN

RNN(input_size: int, hidden_size: int, num_layers: int, hidden_p: float = 0.0, input_p: float = 0.0, weight_p: float = 0.0, rnn_type: str = 'gru', ret_full_hidden: bool = False, normalization: str = '', **kwargs)

Bases: Module

Multi-layer RNN with dropout and normalization, inspired by https://arxiv.org/abs/1708.02182.

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
hidden_size int

number of hidden units per layer.

required
num_layers int

number of stacked RNN layers.

required
hidden_p float

dropout probability applied between hidden layers.

0.0
input_p float

dropout probability applied to the input.

0.0
weight_p float

weight dropout probability applied within each RNN cell.

0.0
rnn_type str

recurrent cell type, one of 'gru', 'lstm', or 'rnn'.

'gru'
ret_full_hidden bool

if True, return stacked hidden outputs from all layers.

False
normalization str

normalization between layers ('' or 'layernorm').

''
**kwargs

additional keyword arguments forwarded to the underlying nn.RNN/nn.GRU/nn.LSTM.

{}
Source code in tsfast/models/rnn.py
def __init__(
    self,
    input_size: int,
    hidden_size: int,
    num_layers: int,
    hidden_p: float = 0.0,
    input_p: float = 0.0,
    weight_p: float = 0.0,
    rnn_type: str = "gru",
    ret_full_hidden: bool = False,
    normalization: str = "",
    **kwargs,
):
    super().__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.hidden_p = hidden_p
    self.input_p = input_p
    self.weight_p = weight_p
    self.rnn_type = rnn_type
    self.ret_full_hidden = ret_full_hidden
    self.normalization = normalization

    self.rnns = nn.ModuleList(
        [
            self._one_rnn(input_size if i == 0 else hidden_size, hidden_size, weight_p, rnn_type, **kwargs)
            for i in range(num_layers)
        ]
    )

    self.input_dp = RNNDropout(input_p)
    self.hidden_dps = nn.ModuleList([RNNDropout(hidden_p) for _ in range(num_layers)])

    if normalization == "":
        self.norm_layers = [None] * num_layers
    elif normalization == "layernorm":
        self.norm_layers = nn.ModuleList(
            [nn.LayerNorm(hidden_size, elementwise_affine=False) for _ in range(num_layers)]
        )
    else:
        raise ValueError("Invalid value for normalization")

state_size property

state_size: int

Total flat state dimension needed to initialize this RNN.

unflatten_state

unflatten_state(flat: Tensor) -> list

Convert [batch, state_size] flat tensor to per-layer hidden state list.

Returns:

Type Description
list

For GRU/RNN: list[[1, batch, hidden_size]].

list

For LSTM: list[tuple[[1, batch, hidden_size], [1, batch, hidden_size]]].

Source code in tsfast/models/rnn.py
def unflatten_state(self, flat: Tensor) -> list:
    """Convert ``[batch, state_size]`` flat tensor to per-layer hidden state list.

    Returns:
        For GRU/RNN: ``list[[1, batch, hidden_size]]``.
        For LSTM: ``list[tuple[[1, batch, hidden_size], [1, batch, hidden_size]]]``.
    """
    bs = flat.shape[0]
    if self.rnn_type == "lstm":
        flat = flat.view(bs, self.num_layers, 2, self.hidden_size)
        return [
            (flat[:, i, 0].unsqueeze(0).contiguous(), flat[:, i, 1].unsqueeze(0).contiguous())
            for i in range(self.num_layers)
        ]
    else:
        flat = flat.view(bs, self.num_layers, self.hidden_size)
        return [flat[:, i].unsqueeze(0).contiguous() for i in range(self.num_layers)]

unflatten_sequence

unflatten_sequence(flat_seq: Tensor) -> Tensor

Convert [batch, seq_len, state_size] to [num_layers, batch, seq_len, hidden_size].

For LSTM, extracts only h-states (matching ret_full_hidden output format).

Source code in tsfast/models/rnn.py
def unflatten_sequence(self, flat_seq: Tensor) -> Tensor:
    """Convert ``[batch, seq_len, state_size]`` to ``[num_layers, batch, seq_len, hidden_size]``.

    For LSTM, extracts only h-states (matching ``ret_full_hidden`` output format).
    """
    bs, seq_len, _ = flat_seq.shape
    if self.rnn_type == "lstm":
        # Layout: [h0, c0, h1, c1, ...] — extract h-states only
        flat_seq = flat_seq.view(bs, seq_len, self.num_layers, 2, self.hidden_size)
        return flat_seq[:, :, :, 0, :].permute(2, 0, 1, 3).contiguous()
    else:
        flat_seq = flat_seq.view(bs, seq_len, self.num_layers, self.hidden_size)
        return flat_seq.permute(2, 0, 1, 3).contiguous()

Sequential_RNN

Sequential_RNN(input_size: int, hidden_size: int, num_layers: int, hidden_p: float = 0.0, input_p: float = 0.0, weight_p: float = 0.0, rnn_type: str = 'gru', ret_full_hidden: bool = False, normalization: str = '', **kwargs)

Bases: RNN

RNN variant that returns only the output tensor, discarding hidden state.

Source code in tsfast/models/rnn.py
def __init__(
    self,
    input_size: int,
    hidden_size: int,
    num_layers: int,
    hidden_p: float = 0.0,
    input_p: float = 0.0,
    weight_p: float = 0.0,
    rnn_type: str = "gru",
    ret_full_hidden: bool = False,
    normalization: str = "",
    **kwargs,
):
    super().__init__()
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.hidden_p = hidden_p
    self.input_p = input_p
    self.weight_p = weight_p
    self.rnn_type = rnn_type
    self.ret_full_hidden = ret_full_hidden
    self.normalization = normalization

    self.rnns = nn.ModuleList(
        [
            self._one_rnn(input_size if i == 0 else hidden_size, hidden_size, weight_p, rnn_type, **kwargs)
            for i in range(num_layers)
        ]
    )

    self.input_dp = RNNDropout(input_p)
    self.hidden_dps = nn.ModuleList([RNNDropout(hidden_p) for _ in range(num_layers)])

    if normalization == "":
        self.norm_layers = [None] * num_layers
    elif normalization == "layernorm":
        self.norm_layers = nn.ModuleList(
            [nn.LayerNorm(hidden_size, elementwise_affine=False) for _ in range(num_layers)]
        )
    else:
        raise ValueError("Invalid value for normalization")

SimpleRNN

SimpleRNN(input_size: int, output_size: int, num_layers: int = 1, hidden_size: int = 100, linear_layers: int = 0, return_state: bool = False, **kwargs)

Bases: Module

Simple RNN with a linear output head.

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
output_size int

number of output features per timestep.

required
num_layers int

number of stacked RNN layers.

1
hidden_size int

number of hidden units per RNN layer.

100
linear_layers int

number of hidden linear layers in the output head.

0
return_state bool

if True, return (output, hidden_state) instead of just output.

False
**kwargs

additional keyword arguments forwarded to RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(
    self,
    input_size: int,
    output_size: int,
    num_layers: int = 1,
    hidden_size: int = 100,
    linear_layers: int = 0,
    return_state: bool = False,
    **kwargs,
):
    super().__init__()
    self.return_state = return_state
    self.rnn = RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, **kwargs)
    self.final = SeqLinear(
        hidden_size, output_size, hidden_size=hidden_size, hidden_layer=linear_layers, act=nn.LeakyReLU
    )

ResidualBlock_RNN

ResidualBlock_RNN(input_size: int, hidden_size: int, **kwargs)

Bases: Module

Two-layer RNN block with a residual skip connection.

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
hidden_size int

number of hidden units in each RNN layer.

required
**kwargs

additional keyword arguments forwarded to RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(self, input_size: int, hidden_size: int, **kwargs):
    super().__init__()
    self.rnn1 = RNN(input_size, hidden_size, num_layers=1, **kwargs)
    self.rnn2 = RNN(hidden_size, hidden_size, num_layers=1, **kwargs)
    self.residual = (
        SeqLinear(input_size, hidden_size, hidden_layer=0) if hidden_size != input_size else nn.Identity()
    )

SimpleResidualRNN

SimpleResidualRNN(input_size: int, output_size: int, num_blocks: int = 1, hidden_size: int = 100, **kwargs)

Bases: Sequential

Sequential stack of residual RNN blocks with a linear output head.

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
output_size int

number of output features per timestep.

required
num_blocks int

number of stacked residual RNN blocks.

1
hidden_size int

number of hidden units per block.

100
**kwargs

additional keyword arguments forwarded to ResidualBlock_RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(self, input_size: int, output_size: int, num_blocks: int = 1, hidden_size: int = 100, **kwargs):
    super().__init__()
    for i in range(num_blocks):
        self.add_module(
            "rnn%d" % i, ResidualBlock_RNN(input_size if i == 0 else hidden_size, hidden_size, **kwargs)
        )

    self.add_module("linear", SeqLinear(hidden_size, output_size, hidden_size, hidden_layer=1))

DenseLayer_RNN

DenseLayer_RNN(input_size: int, hidden_size: int, **kwargs)

Bases: Module

Two-layer RNN that concatenates its output with the input (DenseNet-style).

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
hidden_size int

growth rate (number of new features produced).

required
**kwargs

additional keyword arguments forwarded to RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(self, input_size: int, hidden_size: int, **kwargs):
    super().__init__()
    self.rnn1 = RNN(input_size, hidden_size, num_layers=1, **kwargs)
    self.rnn2 = RNN(hidden_size, hidden_size, num_layers=1, **kwargs)

DenseBlock_RNN

DenseBlock_RNN(num_layers: int, num_input_features: int, growth_rate: int, **kwargs)

Bases: Sequential

Sequential block of DenseNet-style RNN layers with feature concatenation.

Parameters:

Name Type Description Default
num_layers int

number of dense RNN layers in this block.

required
num_input_features int

number of input features entering the block.

required
growth_rate int

number of new features each dense layer adds.

required
**kwargs

additional keyword arguments forwarded to DenseLayer_RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(self, num_layers: int, num_input_features: int, growth_rate: int, **kwargs):
    super().__init__()
    for i in range(num_layers):
        self.add_module(
            "denselayer%d" % i, DenseLayer_RNN(num_input_features + i * growth_rate, growth_rate, **kwargs)
        )

DenseNet_RNN

DenseNet_RNN(input_size: int, output_size: int, growth_rate: int = 32, block_config: tuple = (3, 3), num_init_features: int = 32, **kwargs)

Bases: Sequential

DenseNet architecture using RNN layers with transition layers between blocks.

Parameters:

Name Type Description Default
input_size int

number of input features per timestep.

required
output_size int

number of output features per timestep.

required
growth_rate int

number of new features each dense layer adds.

32
block_config tuple

tuple specifying the number of layers in each dense block.

(3, 3)
num_init_features int

number of features produced by the initial RNN layer.

32
**kwargs

additional keyword arguments forwarded to RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(
    self,
    input_size: int,
    output_size: int,
    growth_rate: int = 32,
    block_config: tuple = (3, 3),
    num_init_features: int = 32,
    **kwargs,
):
    super().__init__()
    self.add_module("rnn0", Sequential_RNN(input_size, num_init_features, 1, **kwargs))

    # Each denseblock
    num_features = num_init_features
    for i, num_layers in enumerate(block_config):
        self.add_module(
            "denseblock%d" % i, DenseBlock_RNN(num_layers, num_features, growth_rate=growth_rate, **kwargs)
        )
        num_features = num_features + num_layers * growth_rate

        self.add_module("transition%d" % i, Sequential_RNN(num_features, num_features // 2, 1, **kwargs))
        num_features = num_features // 2
    self.add_module("final", SeqLinear(num_features, output_size, hidden_layer=0))

SeperateRNN

SeperateRNN(input_list: list[list[int]], output_size: int, num_layers: int = 1, hidden_size: int = 100, linear_layers: int = 1, **kwargs)

Bases: Module

RNN that processes input channel groups separately before merging.

Parameters:

Name Type Description Default
input_list list[list[int]]

list of index lists, each defining a group of input channels.

required
output_size int

number of output features per timestep.

required
num_layers int

number of stacked RNN layers in the merging RNN.

1
hidden_size int

total hidden size (split evenly across per-group RNNs).

100
linear_layers int

number of hidden linear layers in the output head.

1
**kwargs

additional keyword arguments forwarded to RNN.

{}
Source code in tsfast/models/rnn.py
def __init__(
    self,
    input_list: list[list[int]],
    output_size: int,
    num_layers: int = 1,
    hidden_size: int = 100,
    linear_layers: int = 1,
    **kwargs,
):
    super().__init__()
    self.input_list = input_list

    rnn_width = hidden_size // len(input_list)

    self.rnns = nn.ModuleList(
        [RNN(input_size=len(n), hidden_size=rnn_width, num_layers=1, **kwargs) for n in input_list]
    )

    self.rnn = RNN(input_size=rnn_width * len(input_list), hidden_size=hidden_size, num_layers=num_layers, **kwargs)
    self.final = SeqLinear(hidden_size, output_size, hidden_size=hidden_size, hidden_layer=linear_layers)

RNNLearner

RNNLearner(dls, loss_func=nn.L1Loss(), metrics: list | None = None, n_skip: int = 0, num_layers: int = 1, hidden_size: int = 100, sub_seq_len: int | None = None, opt_func=torch.optim.Adam, input_norm: type | None = StandardScaler, output_norm: type | None = None, augmentations: list | None = None, transforms: list | None = None, aux_losses: list | None = None, grad_clip: float | None = None, cuda_graph: bool = False, **kwargs)

Create a Learner with a SimpleRNN model and standard training setup.

Parameters:

Name Type Description Default
dls

DataLoaders providing training and validation data.

required
loss_func

loss function for training.

L1Loss()
metrics list | None

list of metric functions evaluated during validation.

None
n_skip int

number of initial timesteps to skip in loss and metric computation.

0
num_layers int

number of stacked RNN layers.

1
hidden_size int

number of hidden units per RNN layer.

100
sub_seq_len int | None

sub-sequence length for TBPTT; enables stateful training when set.

None
opt_func

optimizer constructor.

Adam
input_norm type | None

scaler class for input normalization, or None to disable.

StandardScaler
output_norm type | None

scaler class for output denormalization, or None to disable.

None
augmentations list | None

list of augmentation transforms (train only).

None
transforms list | None

list of transforms (train + valid).

None
aux_losses list | None

list of auxiliary loss functions.

None
grad_clip float | None

max gradient norm for clipping, or None to disable.

None
cuda_graph bool

if True and sub_seq_len is set, use CudaGraphTbpttLearner for faster training.

False
**kwargs

additional keyword arguments forwarded to SimpleRNN.

{}
Source code in tsfast/models/rnn.py
def RNNLearner(
    dls,
    loss_func=nn.L1Loss(),
    metrics: list | None = None,
    n_skip: int = 0,
    num_layers: int = 1,
    hidden_size: int = 100,
    sub_seq_len: int | None = None,
    opt_func=torch.optim.Adam,
    input_norm: type | None = StandardScaler,
    output_norm: type | None = None,
    augmentations: list | None = None,
    transforms: list | None = None,
    aux_losses: list | None = None,
    grad_clip: float | None = None,
    cuda_graph: bool = False,
    **kwargs,
):
    """Create a Learner with a SimpleRNN model and standard training setup.

    Args:
        dls: DataLoaders providing training and validation data.
        loss_func: loss function for training.
        metrics: list of metric functions evaluated during validation.
        n_skip: number of initial timesteps to skip in loss and metric computation.
        num_layers: number of stacked RNN layers.
        hidden_size: number of hidden units per RNN layer.
        sub_seq_len: sub-sequence length for TBPTT; enables stateful training when set.
        opt_func: optimizer constructor.
        input_norm: scaler class for input normalization, or None to disable.
        output_norm: scaler class for output denormalization, or None to disable.
        augmentations: list of augmentation transforms (train only).
        transforms: list of transforms (train + valid).
        aux_losses: list of auxiliary loss functions.
        grad_clip: max gradient norm for clipping, or None to disable.
        cuda_graph: if True and sub_seq_len is set, use CudaGraphTbpttLearner for faster training.
        **kwargs: additional keyword arguments forwarded to ``SimpleRNN``.
    """
    if metrics is None:
        metrics = [fun_rmse]

    inp, out = get_io_size(dls)
    if sub_seq_len:
        kwargs.setdefault("return_state", True)
    model = SimpleRNN(inp, out, num_layers, hidden_size, **kwargs)
    model = ScaledModel.from_dls(model, dls, input_norm, output_norm)

    if sub_seq_len:
        cls = CudaGraphTbpttLearner if cuda_graph else TbpttLearner
    else:
        cls = Learner
    extra = {"sub_seq_len": sub_seq_len} if sub_seq_len else {}
    return cls(
        model,
        dls,
        loss_func=loss_func,
        metrics=metrics,
        n_skip=n_skip,
        opt_func=opt_func,
        lr=3e-3,
        augmentations=augmentations,
        transforms=transforms,
        aux_losses=aux_losses,
        grad_clip=grad_clip,
        **extra,
    )

AR_RNNLearner

AR_RNNLearner(dls, alpha: float = 0, beta: float = 0, metrics: list | None = None, n_skip: int = 0, opt_func=torch.optim.Adam, input_norm: type | None = StandardScaler, **kwargs)

Create a Learner with an autoregressive RNN model.

Parameters:

Name Type Description Default
dls

DataLoaders providing training and validation data.

required
alpha float

activation regularization penalty weight.

0
beta float

temporal activation regularization penalty weight.

0
metrics list | None

metric functions for validation, or None for default RMSE.

None
n_skip int

number of initial timesteps to skip in metric computation.

0
opt_func

optimizer constructor.

Adam
input_norm type | None

scaler class for input normalization, or None to disable.

StandardScaler
**kwargs

additional keyword arguments forwarded to SimpleRNN.

{}
Source code in tsfast/models/rnn.py
def AR_RNNLearner(
    dls,
    alpha: float = 0,
    beta: float = 0,
    metrics: list | None = None,
    n_skip: int = 0,
    opt_func=torch.optim.Adam,
    input_norm: type | None = StandardScaler,
    **kwargs,
):
    """Create a Learner with an autoregressive RNN model.

    Args:
        dls: DataLoaders providing training and validation data.
        alpha: activation regularization penalty weight.
        beta: temporal activation regularization penalty weight.
        metrics: metric functions for validation, or None for default RMSE.
        n_skip: number of initial timesteps to skip in metric computation.
        opt_func: optimizer constructor.
        input_norm: scaler class for input normalization, or None to disable.
        **kwargs: additional keyword arguments forwarded to ``SimpleRNN``.
    """
    if metrics is None:
        metrics = [fun_rmse]

    inp, out = get_io_size(dls)
    ar_model = AR_Model(SimpleRNN(inp + out, out, **kwargs), ar=False)
    rnn_module = ar_model.model.rnn

    model = ScaledModel.from_dls(ar_model, dls, input_norm, autoregressive=True)

    return Learner(
        model,
        dls,
        loss_func=nn.L1Loss(),
        metrics=metrics,
        n_skip=n_skip,
        opt_func=opt_func,
        lr=3e-3,
        transforms=[prediction_concat(t_offset=0)],
        aux_losses=[
            ActivationRegularizer(modules=[rnn_module], alpha=alpha),
            TemporalActivationRegularizer(modules=[rnn_module], beta=beta),
        ],
    )