Skip to content

Data Readers

Signal readers for time series data.

HDF5Signals

HDF5Signals(names: list[str], dataset: str | None = None)

Temporal reader: reads named 1-D datasets from HDF5 files.

Uses np.memmap for contiguous datasets (~2x faster than h5py), falls back to h5py for chunked/compressed datasets.

Parameters:

Name Type Description Default
names list[str]

dataset column names to extract

required
dataset str | None

HDF5 group name containing the datasets, None for root

None
Source code in tsfast/tsdata/readers.py
def __init__(
    self,
    names: list[str],
    dataset: str | None = None,
):
    self.names = names
    self.dataset = dataset
    self._len_cache: dict[str, int] = {}
    self._mmap_cache: dict[str, dict[str, np.ndarray | None]] = {}
    self._dtype: np.dtype | None = None

read

read(path: str, l_slc: int, r_slc: int) -> np.ndarray

Read columns into pre-allocated array -> [seq_len, n_features].

Source code in tsfast/tsdata/readers.py
def read(self, path: str, l_slc: int, r_slc: int) -> np.ndarray:
    """Read columns into pre-allocated array -> [seq_len, n_features]."""
    self._probe(path)
    mmaps = self._mmap_cache[path]
    n = len(self.names)
    out = np.empty((r_slc - l_slc, n), dtype=self._dtype)
    h5py_indices = []
    for i, name in enumerate(self.names):
        mm = mmaps[name]
        if mm is not None:
            out[:, i] = mm[l_slc:r_slc]
        else:
            h5py_indices.append(i)
    if h5py_indices:
        with h5py.File(path, "r") as f:
            ds = f if self.dataset is None else f[self.dataset]
            for i in h5py_indices:
                out[:, i] = ds[self.names[i]][l_slc:r_slc]
    return out

file_len

file_len(path: str) -> int

Length of first named dataset. Cached per path.

Source code in tsfast/tsdata/readers.py
def file_len(self, path: str) -> int:
    """Length of first named dataset. Cached per path."""
    if path not in self._len_cache:
        self._probe(path)
    return self._len_cache[path]

HDF5Attrs

HDF5Attrs(names: list[str], dataset: str | None = None, dtype: dtype = np.float32)

Scalar reader: reads named HDF5 attributes.

Parameters:

Name Type Description Default
names list[str]

attribute names to extract

required
dataset str | None

HDF5 group name containing the attributes, None for root

None
dtype dtype

output data type

float32
Source code in tsfast/tsdata/readers.py
def __init__(
    self,
    names: list[str],
    dataset: str | None = None,
    dtype: np.dtype = np.float32,
):
    self.names = names
    self.dataset = dataset
    self.dtype = dtype

Resampled

Resampled(block: HDF5Signals, fs_idx: int | None = None, dt_idx: int | None = None, fast_resample: bool = True)

Wraps a temporal reader, reading in original space and resampling to target rate.

Parameters:

Name Type Description Default
block HDF5Signals

temporal reader with read(path, l_slc, r_slc) and file_len(path)

required
fs_idx int | None

column index of sampling rate, scaled by resampling factor

None
dt_idx int | None

column index of time step, scaled by resampling factor

None
fast_resample bool

use linear interpolation (True) or FFT resampling (False)

True
Source code in tsfast/tsdata/readers.py
def __init__(
    self,
    block: HDF5Signals,
    fs_idx: int | None = None,
    dt_idx: int | None = None,
    fast_resample: bool = True,
):
    self.block = block
    self.fs_idx = fs_idx
    self.dt_idx = dt_idx
    self.fast_resample = fast_resample

read

read(path: str, l_slc: int, r_slc: int, factor: float) -> np.ndarray

Read and resample a window. l_slc/r_slc are in resampled coordinates.

Source code in tsfast/tsdata/readers.py
def read(self, path: str, l_slc: int, r_slc: int, factor: float) -> np.ndarray:
    """Read and resample a window. l_slc/r_slc are in resampled coordinates."""
    if factor == 1.0:
        return self.block.read(path, l_slc, r_slc)

    target_len = r_slc - l_slc
    l_orig = math.floor(l_slc / factor)
    r_orig = min(math.ceil(r_slc / factor) + 2, self.file_len(path))
    raw = self.block.read(path, l_orig, r_orig)

    if self.fast_resample:
        resampled = resample_interp(raw, factor)
    else:
        resampled = fft_resample(raw, int(raw.shape[0] * factor), window=("kaiser", 14.0))

    if self.fs_idx is not None:
        resampled[:, self.fs_idx] = raw[0, self.fs_idx] * factor
    if self.dt_idx is not None:
        resampled[:, self.dt_idx] = raw[0, self.dt_idx] / factor

    return resampled[:target_len]

file_len

file_len(path: str) -> int

Length in original (un-resampled) coordinates.

Source code in tsfast/tsdata/readers.py
def file_len(self, path: str) -> int:
    """Length in original (un-resampled) coordinates."""
    return self.block.file_len(path)

CSVSignals

CSVSignals(columns: list[str], delimiter: str = ',')

Temporal reader: reads named columns from CSV files.

Parameters:

Name Type Description Default
columns list[str]

column names to extract

required
delimiter str

CSV delimiter character

','
Source code in tsfast/tsdata/readers.py
def __init__(self, columns: list[str], delimiter: str = ","):
    self.columns = columns
    self.delimiter = delimiter
    self._data_cache: dict[str, np.ndarray] = {}
    self._len_cache: dict[str, int] = {}

read

read(path: str, l_slc: int, r_slc: int) -> np.ndarray

Read columns and slice -> [seq_len, n_features].

Source code in tsfast/tsdata/readers.py
def read(self, path: str, l_slc: int, r_slc: int) -> np.ndarray:
    """Read columns and slice -> [seq_len, n_features]."""
    return self._load(path)[l_slc:r_slc]

file_len

file_len(path: str) -> int

Row count excluding header. Cached per path.

Source code in tsfast/tsdata/readers.py
def file_len(self, path: str) -> int:
    """Row count excluding header. Cached per path."""
    if path not in self._len_cache:
        self._load(path)
    return self._len_cache[path]

Cached

Cached(block)

Wrapper that caches full file data in memory on first read.

Parameters:

Name Type Description Default
block

any signal reader to wrap

required
Source code in tsfast/tsdata/readers.py
def __init__(self, block):
    self.block = block
    self._data_cache: dict[str, np.ndarray] = {}

FilenameScalar

FilenameScalar(pattern: str = '(\\d+\\.?\\d*)')

Scalar reader: extracts numbers from filenames via regex.

Parameters:

Name Type Description Default
pattern str

regex with capture groups to extract from the filename stem

'(\\d+\\.?\\d*)'
Source code in tsfast/tsdata/readers.py
def __init__(self, pattern: str = r"(\d+\.?\d*)"):
    self._pattern = re.compile(pattern)
    self._n_features = self._pattern.groups
    # Reverse groupindex (name→number) to (number→name)
    idx_to_name = {v: k for k, v in self._pattern.groupindex.items()}
    self._signal_names = [idx_to_name.get(i + 1, f"scalar_{i}") for i in range(self._n_features)]

read

read(path: str) -> np.ndarray

Search filename stem and return captured groups as float32 array.

Source code in tsfast/tsdata/readers.py
def read(self, path: str) -> np.ndarray:
    """Search filename stem and return captured groups as float32 array."""
    stem = Path(path).stem
    m = self._pattern.search(stem)
    if m is None:
        raise ValueError(f"Pattern {self._pattern.pattern!r} did not match filename {stem!r}")
    return np.array([float(g) for g in m.groups()], dtype=np.float32)