From c7d12b61bb8203089e979d6e081e7537446b1790 Mon Sep 17 00:00:00 2001 From: ChayanBhansali <96097973+ChayanBhansali@users.noreply.github.com> Date: Wed, 7 Aug 2024 20:36:35 +0530 Subject: [PATCH 1/2] incorrect onnxruntime-gpu version , changed from 1.8.1 to 1.18.1 --- .../TimeSeriesPredictionPlatform/triton/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt b/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt index d7a2cd4dd..89f00109a 100644 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt +++ b/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt @@ -16,7 +16,7 @@ natsort>=7.0.0 networkx==2.5 numpy onnx>=1.8.0,<1.9.0 -onnxruntime-gpu==1.8.1 +onnxruntime-gpu==1.18.1 pycuda>=2019.1.2 PyYAML>=5.2 tabulate>=0.8.7 From e40aff80470bf639e6efda7c4f727507cf3cd5eb Mon Sep 17 00:00:00 2001 From: ChayanBhansali <96097973+ChayanBhansali@users.noreply.github.com> Date: Mon, 12 Aug 2024 19:30:57 +0530 Subject: [PATCH 2/2] updated readme file in tspp --- .../TimeSeriesPredictionPlatform/README.md | 2 +- .../models/dcrnn.py | 283 --------------- .../models/deepar.py | 147 -------- .../models/deepar_v2.py | 147 -------- .../models/ensembling.py | 123 ------- .../models/gnn.py | 296 --------------- .../models/interpretability.py | 31 -- .../models/lstm.py | 66 ---- .../models/mtgnn.py | 338 ------------------ .../models/nbeats.py | 154 -------- .../models/nhits.py | 156 -------- .../models/stat_models.py | 72 ---- .../models/tft.py | 123 ------- .../models/trivial_model.py | 53 --- .../models/tspp_xgboost.py | 111 ------ 15 files changed, 1 insertion(+), 2101 deletions(-) delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md b/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md index aae2800be..34c1ba6b8 100755 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md +++ b/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md @@ -153,7 +153,7 @@ For more information about how to get started with NGC containers, refer to the 2. Enter the Deep Learning Examples TSPP repository: ``` -cd DeeplearningExamples/Tools/TimeSeriesPredictionPlatform +cd DeeplearningExamples/Tools/PyTorch/TimeSeriesPredictionPlatform ``` 3. Copy the relevant temporal fusion transformer [code](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Forecasting/TFT/modeling.py) to the TSPP: ``` diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py deleted file mode 100644 index 66433025a..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py +++ /dev/null @@ -1,283 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import torch -import torch.nn as nn -import scipy.sparse as sp -from scipy.sparse import linalg -import dgl -import dgl.function as fn -import dgl.ops as ops - -from .tft_pyt.modeling import LazyEmbedding - -def calculate_normalized_laplacian(adj): - """ - # L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2 - # D = diag(A 1) - :param adj: - :return: - """ - adj = sp.coo_matrix(adj) - d = np.array(adj.sum(1)) - d_inv_sqrt = np.power(d, -0.5).flatten() - d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. - d_mat_inv_sqrt = sp.diags(d_inv_sqrt) - normalized_laplacian = sp.eye(adj.shape[0]) - adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() - return normalized_laplacian - - -def calculate_random_walk_matrix(adj_mx): - d = np.array(adj_mx.sum(1)) - d_inv = np.power(d, -1).flatten() - d_inv[np.isinf(d_inv)] = 0. - d_mat_inv = np.diag(d_inv) - random_walk_mx = d_mat_inv.dot(adj_mx) - random_walk_mx = torch.from_numpy(random_walk_mx) - return random_walk_mx - - -def calculate_dual_random_walk_matrix(adj_mx): - L0 = calculate_random_walk_matrix(adj_mx).T - L1 = calculate_random_walk_matrix(adj_mx.T).T - return L0, L1 - - -def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True): - if undirected: - adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) - L = calculate_normalized_laplacian(adj_mx) - if lambda_max is None: - lambda_max, _ = linalg.eigsh(L, 1, which='LM') - lambda_max = lambda_max[0] - L = sp.csr_matrix(L) - M, _ = L.shape - I = sp.identity(M, format='csr', dtype=L.dtype) - L = (2 / lambda_max * L) - I - L = L.astype(np.float32).todense() - return torch.from_numpy(L) - - -class DCGRUCell(torch.nn.Module): - def __init__(self, num_units, max_diffusion_step, nonlinearity='tanh'): - super().__init__() - self._activation = torch.tanh if nonlinearity == 'tanh' else torch.relu - self._num_units = num_units - self.gconv1 = Gconv(self._num_units*2, self._num_units, max_diffusion_step, 0.0) - self.gconv2 = Gconv(self._num_units, self._num_units, max_diffusion_step, 0.0) - - def forward(self, graph, inputs, hx): - """Gated recurrent unit (GRU) with Graph Convolution. - """ - _inputs = torch.cat([inputs, hx], dim=-1) - x = self.gconv1(graph, _inputs) - - value = torch.sigmoid(x) - r, u = value.chunk(2, dim=-1) - - _inputs = torch.cat([inputs, r * hx], dim=-1) - c = self.gconv2(graph, _inputs) - - if self._activation is not None: - c = self._activation(c) - - new_state = u * hx + (1.0 - u) * c - return new_state - - -class Gconv(torch.nn.Module): - def __init__(self, output_size, hidden_size, max_diffusion_step, bias_start=0.0): - assert max_diffusion_step > 0 - super().__init__() - self.output_size = output_size - self.hidden_size = hidden_size - self._max_diffusion_step = max_diffusion_step - - self.num_matrices = 2 * self._max_diffusion_step + 1 - self.lin = torch.nn.LazyLinear(self.output_size) - def _reset_parameters(self): - torch.nn.init.xavier_normal_(self.weight) - torch.nn.init.constant_(self.bias, bias_start) - bound_method = _reset_parameters.__get__(self.lin, self.lin.__class__) - self.lin.reset_parameters = bound_method - - @staticmethod - def calculate_random_walk_matrix(adj_mx): - d = adj_mx.sum(1) - d_inv = d.pow(-1) - d_inv[torch.isinf(d_inv)] = 0. - random_walk_mx = d_inv.unsqueeze(1).mul(adj_mx) - return random_walk_mx - - - def rwLaplacian(self,feat, graph): - rev = graph.reverse() - - # L0 - out_degree = ops.copy_e_sum(rev, graph.edata['w']) #adj_mx.sum(1) - graph.ndata['_h'] = feat[...,0] * out_degree.pow(-1).unsqueeze(-1) - graph.update_all(fn.u_mul_e('_h', 'w', 'm') , fn.sum('m', '_h')) - - # L1 - in_degree = ops.copy_e_sum(graph, graph.edata['w']) #adj_mx.sum(0) - rev.edata['w'] = graph.edata['w'] - rev.ndata['_h'] = feat[...,1] * in_degree.pow(-1).unsqueeze(-1) - rev.update_all(fn.u_mul_e('_h', 'w', 'm') , fn.sum('m', '_h')) - - return torch.stack((graph.ndata.pop('_h'), rev.ndata.pop('_h')), dim=-1) - - def forward(self, graph, inputs): - batch_size = graph.batch_size - - # Caching - # We assume that all graphs are the same in sructure! - if not hasattr(self, 'adj_mx'): - with torch.no_grad(): - samples = dgl.unbatch(graph) - adj_mx = torch.sparse_coo_tensor(indices=samples[0].adjacency_matrix().coalesce().indices().to(inputs.device), - values=samples[0].edata['w'].to(inputs.device)).to_dense() - L0 = Gconv.calculate_random_walk_matrix(adj_mx).T - L1 = Gconv.calculate_random_walk_matrix(adj_mx.T).T - self.register_buffer('adj_mx', adj_mx, persistent=False) - self.register_buffer('L0', L0, persistent=False) - self.register_buffer('L1', L1, persistent=False) - if hasattr(self, f'L_{batch_size}'): - L = getattr(self, f'L_{batch_size}') - else: - L = torch.block_diag(*[l for l in (self.L0,self.L1) for _ in range(batch_size)]).to_sparse() - setattr(self, f'L_{batch_size}', L) - - x0 = torch.cat((inputs,inputs), dim=0) - x1 = torch.sparse.mm(L, x0) - dif_outs = [inputs, *x1.chunk(2, dim=0)] - - for k in range(2, self._max_diffusion_step + 1): - x2 = 2 * torch.sparse.mm(L, x1) - x0 - dif_outs += x2.chunk(2, dim=0) - x1, x0 = x2, x1 - - x = torch.stack(dif_outs, dim=-1) - x = x.reshape(graph.num_nodes(), -1) - x = self.lin(x) - return x - - - -class RNNStack(nn.Module): - def __init__(self, num_rnn_layers, max_diffusion_step, rnn_units, nonlinearity='tanh'): - super().__init__() - self.num_rnn_layers = num_rnn_layers - self.rnn_units = rnn_units - self.dcgru_layers = nn.ModuleList([DCGRUCell(rnn_units, max_diffusion_step, nonlinearity=nonlinearity) for _ in range(self.num_rnn_layers)]) - - def forward(self, graph, inputs, hidden_state=None): - if hidden_state is None: - hidden_state = inputs.new_zeros((self.num_rnn_layers, graph.num_nodes(), self.rnn_units)) - - hidden_states = [] - output = inputs - for layer_num, dcgru_layer in enumerate(self.dcgru_layers): - next_hidden_state = dcgru_layer(graph, output, hidden_state[layer_num]) - hidden_states.append(next_hidden_state) - output = next_hidden_state - - return output, torch.stack(hidden_states) # runs in O(num_layers) so not too slow - -class DCRNN(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.max_diffusion_step = int(config.get('max_diffusion_step', 2)) - self.num_nodes = int(config.get('num_nodes', 1)) - self.num_rnn_layers = int(config.get('num_rnn_layers', 1)) - self.rnn_units = int(config.get('rnn_units')) - self.activation = config.get('activation') - self.output_dim = int(config.get('output_dim', 1)) - self.horizon = int(config.get('horizon', 1)) # for the decoder - self.encoder_model = RNNStack(self.num_rnn_layers, self.max_diffusion_step, self.rnn_units, self.activation) - self.projection_layer = nn.Linear(self.rnn_units, self.output_dim) - self.decoder_model = RNNStack(self.num_rnn_layers, self.max_diffusion_step, self.rnn_units, self.activation) - self.cl_decay_steps = int(config.get('cl_decay_steps', 1000)) - self.use_curriculum_learning = bool(config.get('use_curriculum_learning', False)) - self.seq_len = int(config.get('encoder_length')) # for the encoder - self.batches_seen = 0 - - self.use_embedding = config.use_embedding - ### New embedding - if self.use_embedding: - self.config.hidden_size = self.config.input_dim - self.embedding = LazyEmbedding(self.config) - self.include_static_data = config.get('include_static_data', False) - #### - - def _compute_sampling_threshold(self, batches_seen): - return self.cl_decay_steps / ( - self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps)) - - def encoder(self, graph): - encoder_hidden_state = None - h = graph.ndata['h'] - for t in range(self.seq_len): - _, encoder_hidden_state = self.encoder_model(graph, h[:,t], encoder_hidden_state) - - return encoder_hidden_state - - def decoder(self, graph, encoder_hidden_state, labels=None): - decoder_hidden_state = encoder_hidden_state - decoder_input = encoder_hidden_state.new_zeros((graph.num_nodes(), 1)) - - outputs = [] - - for t in range(self.horizon): - decoder_output, decoder_hidden_state = self.decoder_model(graph, decoder_input, decoder_hidden_state) - decoder_output = self.projection_layer(decoder_output) - decoder_input = decoder_output - outputs.append(decoder_output) - if self.training and self.use_curriculum_learning: - c = np.random.uniform(0, 1) - if c < self._compute_sampling_threshold(self.batches_seen): - decoder_input = labels[:,t].view(-1,1) - outputs = torch.stack(outputs, dim=1) - return outputs - - def forward(self, batch): - if self.use_embedding: - # New embedding - _batch = { - k:v[:, :self.seq_len] - if v is not None and v.numel() else None - for k,v in batch.ndata.items() - if 'ID' not in k and 'id' not in k - } - emb = self.embedding(_batch) - emb = [e.view(*e.shape[:-2], -1) for e in emb if e is not None] - emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], self.seq_len, *emb[0].shape[1:]) - if not self.include_static_data: - emb = emb[1:] - batch.ndata['h'] = torch.cat(emb, dim=-1) - #### - else: - t = batch.ndata['k_cont'][:, :self.seq_len, 2:] - t = torch.einsum('btk,k->bt', t, t.new([1, 0.16])) - batch.ndata['h'] = torch.cat([batch.ndata['target'][:, :self.seq_len], t.unsqueeze(-1)], dim=-1) - - if self.training: - labels = batch.ndata['target'][:, self.seq_len:].view(-1, self.num_nodes, self.horizon).transpose(1,2) - else: - labels = None - - encoder_hidden_state = self.encoder(batch) - outputs = self.decoder(batch, encoder_hidden_state, labels) - self.batches_seen += 1 - return outputs diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py deleted file mode 100644 index d46846b97..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -from .tft_pyt.modeling import LazyEmbedding - -class DeepAR(nn.Module): - def __init__(self, config): - super().__init__() - - self.config = config - self.encoder_length = config.encoder_length - self.register_buffer('quantiles', torch.FloatTensor(config.quantiles), persistent=False) - self.use_embedding = self.config.use_embedding - - if self.config.use_embedding: - ### New Embedding - # DeepAR can't currenty work with observed data - config.num_historic_vars -= len(config.temporal_observed_categorical_inp_lens) - config.num_historic_vars -= config.temporal_observed_continuous_inp_size - config.temporal_observed_categorical_inp_lens = [] - config.temporal_observed_continuous_inp_size = 0 - _config = config.copy() - _config.hidden_size = self.config.embedding_dim - self.embedding_v2 = LazyEmbedding(_config) - inp_size = (config.num_static_vars + config.num_historic_vars) * config.embedding_dim - else: - self.embedding = nn.ModuleList([ - nn.Embedding(n, config.embedding_dim) - for n in config.static_categorical_inp_lens + config.temporal_known_categorical_inp_lens - ]) - - inp_size = config.temporal_known_continuous_inp_size + len(self.embedding) * config.embedding_dim + 1 # +1 for target - - self.lstm = nn.LSTM(input_size=inp_size, - hidden_size=config.hidden_size, - num_layers=config.num_layers, - bias=True, - batch_first=True, - dropout=config.dropout) - for names in self.lstm._all_weights: - for name in filter(lambda n: "bias" in n, names): - bias = getattr(self.lstm, name) - n = bias.size(0) - start, end = n // 4, n // 2 - bias.data[start:end].fill_(1.) - - self.relu = nn.ReLU() - self.distribution_mu = nn.Linear(config.hidden_size * config.num_layers, 1) - self.distribution_presigma = nn.Linear(config.hidden_size * config.num_layers, 1) - self.distribution_sigma = nn.Softplus() - - def _roll_data(x): - if x is None: - return None - x = torch.roll(x, 1, 1) - x[:,0] = 0 - return x - def forward(self, batch): - if self.use_embedding: - return self._forward_v2(batch) - else: - return self._forward_v1(batch) - - - def _forward_v2(self, batch): - batch = batch.copy() # shallow copy to replace observables in this scope - batch['target'] = DeepAR._roll_data(batch['target']) - batch['weight'] = DeepAR._roll_data(batch['weight']) - batch['o_cat'] = None - batch['o_cont'] = None - - emb = self.embedding_v2(batch) - emb = [x for x in emb if x is not None] - emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], emb[1].shape[1], *emb[0].shape[1:]) - emb = torch.cat(emb, axis=-2) - emb = emb.view(*emb.shape[:-2], -1) - - state = None - mus = [] - sigs = [] - for t in range(emb.shape[1]): - zero_index = (batch['target'][:, t, 0] == 0) - if t > 0 and torch.sum(zero_index) > 0: - _x = torch.matmul(mu[zero_index].unsqueeze(-1), self.embedding_v2.t_tgt_embedding_vectors) - _x = _x + self.embedding_v2.t_tgt_embedding_bias - emb[zero_index, t, -self.config.embedding_dim:] = _x # target embedding is the last to be concatenated - mu, sigma, state = self._forward_ar(emb[:,t].unsqueeze(1), state) - - mus.append(mu) - sigs.append(sigma) - - mus = torch.stack(mus, dim=1) - sigs = torch.stack(sigs, dim=1) - - return torch.stack((mus, sigs), dim=-1) - - def _forward_v1(self, batch): - cat = torch.cat([batch['s_cat'], batch['k_cat']], dim=-1).permute(2,0,1) - emb = torch.cat([e(t) for e, t in zip(self.embedding, cat)], dim=-1) - target = torch.roll(batch['target'], 1, 1) - target[:, 0] = 0 - x = torch.cat((target, batch['k_cont'], emb), dim=-1) - - state = None - mus = [] - sigs = [] - for t in range(x.shape[1]): - zero_index = (x[:, t, 0] == 0) - if t > 0 and torch.sum(zero_index) > 0: - x[zero_index, t, 0] = mu[zero_index] - - mu, sigma, state = self._forward_ar(x[:, t].unsqueeze(1), state) - - mus.append(mu) - sigs.append(sigma) - - mus = torch.stack(mus, dim=1) - sigs = torch.stack(sigs, dim=1) - - return torch.stack((mus, sigs), dim=-1) - - def _forward_ar(self, x, state): - output, state = self.lstm(x, state) - hidden = state[0] - hidden_permute = hidden.permute(1, 2, 0).contiguous().view(hidden.shape[1], -1) - pre_sigma = self.distribution_presigma(hidden_permute) - mu = self.distribution_mu(hidden_permute) - sigma = self.distribution_sigma(pre_sigma) # softplus to make sure standard deviation is positive - return torch.squeeze(mu), torch.squeeze(sigma), state - - def predict(self, batch): - preds = self.forward(batch) - preds = preds[:, self.encoder_length:, :] - preds = torch.stack([preds[..., 0] + preds[..., 1] * torch.erfinv(2 * q - 1) * 1.4142135623730951 for q in self.quantiles], dim=-1) - return preds diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py deleted file mode 100644 index 0ef0ed4e4..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -'''Defines the neural network, loss function and metrics''' - -import math -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.autograd import Variable -from .tft_pyt.modeling import LazyEmbedding -import torch -from torch import nn - -class AutoregressiveLSTM(nn.Module): - def __init__(self, input_size, hidden_size, embed_size, num_layers, dropout, tgt_embed): - super(AutoregressiveLSTM, self).__init__() - self.hidden_size = hidden_size - self.embed_size = embed_size - self.num_layers = num_layers - self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout) - self.tgt_embed = tgt_embed - - # This is a modification to the more general algorithm implemented here that uses all the layer's hidden states to make a final prediction - # This is not what is described in the paper but is what reference implementation did - # In this particular case it is used for expected value (mu) estimation - self.mu_proj = nn.Linear(hidden_size * num_layers, 1) - self.sig_proj = nn.Sequential( - nn.Linear(hidden_size * num_layers, 1), - nn.Softplus() - ) - - - def forward(self, inputs, embedded_labels, hidden=None, mask=None): - # Inputs should be all covariate embeddings and embedded labels should be target emdeddings - - mus = [] - sigs = [] - for i in range(inputs.shape[1]): - input = inputs[:,i] - if embedded_labels is None or mask is None: - mu_embed = self.tgt_embed(mu) - input = torch.cat((input, mu_embed), dim=-1) - elif i and mask[:,i].any(): - mu_embed = self.tgt_embed(mu) - input = torch.cat((input, torch.where(mask[:, i], mu_embed, embedded_labels[:, i])), dim=-1) - else: - input = torch.cat((input, embedded_labels[:, i]), dim=-1) - - _, hidden = self.lstm(input.unsqueeze(1), hidden) - hidden_permute = hidden[0].permute(1, 2, 0).contiguous().view(hidden[0].shape[1], -1) - mu = self.mu_proj(hidden_permute) - - sig = self.sig_proj(hidden_permute) - - mus.append(mu) - sigs.append(sig) - - mus = torch.cat(mus, dim=1) - sigs = torch.cat(sigs, dim=1) - return mus, sigs, hidden - - -class DeepAR(nn.Module): - def __init__(self, config): - super().__init__() - - self.config = config - self.encoder_length = config.encoder_length - self.example_length = config.example_length - self.register_buffer('quantiles', torch.FloatTensor(config.quantiles), persistent=False) - self.use_embedding = self.config.use_embedding - self.drop_variance = self.config.get('drop_variance', False) - - _config = config.copy() - _config.hidden_size = self.config.embedding_dim - _config.num_historic_vars -= len(config.temporal_observed_categorical_inp_lens) - _config.num_historic_vars -= config.temporal_observed_continuous_inp_size - _config.temporal_observed_categorical_inp_lens = [] - _config.temporal_observed_continuous_inp_size = 0 - - self.embedding_v2 = LazyEmbedding(_config) - tgt_embed = lambda x: torch.matmul(x, self.embedding_v2.t_tgt_embedding_vectors) + self.embedding_v2.t_tgt_embedding_bias - - inp_size = (config.num_static_vars + config.num_future_vars + config.temporal_target_size) * config.embedding_dim - - self.encoder = AutoregressiveLSTM(input_size=inp_size, - hidden_size=config.hidden_size, - embed_size=config.embedding_dim, - num_layers=config.num_layers, - dropout=config.dropout, - tgt_embed=tgt_embed) - - def _roll_data(x): - if x is None: - return None - x = torch.roll(x, 1, 1) - x[:,0] = 0 - return x - - - def forward(self, batch, predict=False): - batch = batch.copy() # shallow copy to replace observables in this scope - batch['target'] = DeepAR._roll_data(batch['target']) - batch['weight'] = DeepAR._roll_data(batch['weight']) - batch['o_cat'] = None - batch['o_cont'] = None - - s_emb, k_emb, _, tgt_emb = self.embedding_v2(batch) - s_emb = s_emb.unsqueeze(1).expand(s_emb.shape[0], tgt_emb.shape[1], *s_emb.shape[1:]) - - feat = torch.cat((s_emb, k_emb) , axis=-2) - feat = feat.view(*feat.shape[:-2], -1) - tgt_emb = tgt_emb.view(*tgt_emb.shape[:-2], -1) - - if batch['weight'] is not None: - mask = batch['weight'] == 0 - else: - mask = batch['target'] == 0 - - if predict: - mask[:, self.encoder_length:] = True - - mus, sigs, _ = self.encoder(feat, embedded_labels=tgt_emb, mask=mask) - - if self.drop_variance: - return mus.unsqueeze(-1) - return torch.stack((mus, sigs), dim=-1) - - def predict(self, batch): - preds = self.forward(batch, predict=True) - preds = preds[:,self.encoder_length:, :] - if self.drop_variance: - return preds - preds = torch.stack([preds[...,0] + preds[...,1] * torch.erfinv(2 * q - 1) * 1.4142135623730951 for q in self.quantiles], dim=-1) - return preds diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py deleted file mode 100644 index 227368245..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os - -import hydra -from omegaconf import OmegaConf -import torch -from torch import nn, Tensor -from typing import Dict -from training.utils import to_device - - -class ModelEnsemble(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.reduction_stategy = config.get('reduction_strategy', 'mean') - self.model_weights = [] - self.model_list = [] - for model_info in self.config.model_list: - self.model_weights.append(model_info.get('weight', 1.0)) - - model_dir = model_info.dir - with open(os.path.join(model_dir, '.hydra/config.yaml'), 'rb') as f: - cfg = OmegaConf.load(f) - model: nn.Module = hydra.utils.instantiate(cfg.model) - if not(cfg.dataset.config.get('xgb', False) or cfg.dataset.config.get('stat', False)): - # reduce gpu memory usage - state_dict = torch.load(os.path.join(model_dir, model_info.get('checkpoint', 'best_checkpoint.zip')), map_location='cpu')['model_state_dict'] - model.load_state_dict(state_dict) - else: - raise ValueError('XGB and stat models are currently not supported by ensembling.') - self.model_list.append(model) - - self.num_devices = min(torch.cuda.device_count(), len(self.model_list)) - model_splits = [self.model_list[i::self.num_devices] for i in range(self.num_devices)] - model_splits = [nn.ModuleList(x).to(f'cuda:{i}') for i, x in enumerate(model_splits)] - self.model_splits = nn.ModuleList(model_splits) - - self.model_weights = [x for y in [self.model_weights[i::self.num_devices] for i in range(self.num_devices)] for x in y] - - @staticmethod - def _reduce_preds(preds, weights, reduction_stategy): - if reduction_stategy not in ['mean', 'sum']: - raise ValueError(f'Unknown reduction strategy: {reduction_stategy}') - - result = sum(p * w for p, w in zip(preds, weights)) - if reduction_stategy == 'mean': - result /= sum(weights) - - return result - - @staticmethod - def _replicate(batch, n): - return [to_device(batch, i) for i in range(n)] - - @torch.no_grad() - def forward(self, x: Dict[str, Tensor]) -> Tensor: - _x = self._replicate(x, self.num_devices) - preds = [[] for _ in range(self.num_devices)] - - for i, (data, split) in enumerate(zip(_x, self.model_splits)): - for model in split: - test_method_name = 'predict' if hasattr(model, 'predict') else '__call__' - test_method = getattr(model, test_method_name) - pred = test_method(data) - # Move all preds to cpu. Probably it will have a terrible performance, but we have tons of memory there - preds[i].append(pred.cpu()) - - preds = [x for y in preds for x in y] - preds = self._reduce_preds(preds, self.model_weights, self.reduction_stategy) - - return preds - -class XGBEnsemble: - def __init__(self, config): - super().__init__() - self.config = config - self.reduction_stategy = config.get('reduction_strategy', 'mean') - self.model_weights = [] - self.model_list = [] - for model_info in self.config.model_list: - self.model_weights.append(model_info.get('weight', 1.0)) - - model_dir = model_info.dir - with open(os.path.join(model_dir, '.hydra/config.yaml'), 'rb') as f: - cfg = OmegaConf.load(f) - model: nn.Module = hydra.utils.instantiate(cfg.model) - model.load(model_dir) - self.model_list.append(model) - - @staticmethod - def _reduce_preds(preds, weights, reduction_stategy): - if reduction_stategy not in ['mean', 'sum']: - raise ValueError(f'Unknown reduction strategy: {reduction_stategy}') - - result = sum(p * w for p, w in zip(preds, weights)) - if reduction_stategy == 'mean': - result /= sum(weights) - - return result - - def predict(self, x, i): - preds = [] - - for model in self.model_list: - pred = model.predict(x, i) - preds.append(pred) - - preds = self._reduce_preds(preds, self.model_weights, self.reduction_stategy) - - return preds diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py deleted file mode 100644 index a8ee7aaf5..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -from typing import Optional, Tuple, Dict -from torch import Tensor -import dgl -from dgl.nn.pytorch.conv import GraphConv - -import networkx as nx -import numpy as np -from copy import copy - -def list_contract_nodes_(graph, l_nodes): - """ - l_nodes: List[List[Int]]: nodes to merge - Returns node mapping - """ - pooled_feat = [] - _nodes_flat = [x for y in l_nodes for x in y] - - _unmerged_nodes = list(range(graph.num_nodes())) - for n in _nodes_flat: - _unmerged_nodes.remove(n) - - node_mapping = {i:[n] for i,n in enumerate(_unmerged_nodes)} - num_nodes = graph.num_nodes() - i = 0 - while l_nodes: - nodes = l_nodes.pop() - # Add features - ndata = {k:v[nodes].mean() for k, v in graph.ndata.items()} - pooled_feat.append({k: v[nodes].mean(dim=0) for k,v in graph.ndata.items()}) - # Add edges - predecessors = torch.cat([graph.predecessors(n) for n in nodes]) - successors = torch.cat([graph.successors(n) for n in nodes]) - nidx = graph.num_nodes() - graph.add_edges(torch.full_like(predecessors, nidx), predecessors) - graph.add_edges(torch.full_like(successors, nidx), successors) - # Add key to super node mapping - node_mapping[num_nodes - len(_nodes_flat) + i] = nodes - i += 1 - - graph.remove_nodes(_nodes_flat) - - # Insert pooled features - pooled_feat = {k: torch.stack([d[k] for d in pooled_feat], dim=0) for k in graph.ndata.keys()} - for k, v in pooled_feat.items(): - graph.ndata[k][-v.shape[0]:] = v - - return graph, node_mapping - -def coarsen(graph): - g_nx = graph.cpu().to_networkx().to_undirected() - g_nx = nx.Graph(g_nx) - matching = nx.algorithms.matching.max_weight_matching(g_nx) - matching = [list(x) for x in matching] - g, s_node_map = list_contract_nodes_(graph, matching) - return g, s_node_map - -class SpatialPooling(nn.Module): - def __init__(self): - super().__init__() - self.s_node_map = None - self.cached_graph = None - self.ukey = f'feat_{id(self)}' - - def forward(self, graph, feat): - self.cached_graph = graph - _graph = copy(graph) - _graph.ndata[self.ukey] = feat - g, s_node_map = coarsen(_graph) - self.s_node_map = s_node_map - return g, g.ndata[self.ukey] - - def unpool(self, feat): - """ Unpools by copying values""" - _feat = [] - for k,v in self.s_node_map.items(): - for node in v: - _feat.append((node, feat[k])) - u_feat = torch.stack([t[1] for t in sorted(_feat, key=lambda x: x[0])]) - return self.cached_graph, u_feat - -class TFTEmbedding(nn.Module): - def __init__(self, config): - super().__init__() - self.s_cat_inp_lens = config.static_categorical_inp_lens - self.t_cat_k_inp_lens = config.temporal_known_categorical_inp_lens - self.t_cat_o_inp_lens = config.temporal_observed_categorical_inp_lens - self.s_cont_inp_size = config.static_continuous_inp_size - self.t_cont_k_inp_size = config.temporal_known_continuous_inp_size - self.t_cont_o_inp_size = config.temporal_observed_continuous_inp_size - self.t_tgt_size = config.temporal_target_size - - self.hidden_size = config.hidden_size - - # There are 7 types of input: - # 1. Static categorical - # 2. Static continuous - # 3. Temporal known a priori categorical - # 4. Temporal known a priori continuous - # 5. Temporal observed categorical - # 6. Temporal observed continuous - # 7. Temporal observed targets (time series obseved so far) - - self.s_cat_embed = nn.ModuleList([ - nn.Embedding(n, self.hidden_size) for n in self.s_cat_inp_lens]) if self.s_cat_inp_lens else None - self.t_cat_k_embed = nn.ModuleList([ - nn.Embedding(n, self.hidden_size) for n in self.t_cat_k_inp_lens]) if self.t_cat_k_inp_lens else None - self.t_cat_o_embed = nn.ModuleList([ - nn.Embedding(n, self.hidden_size) for n in self.t_cat_o_inp_lens]) if self.t_cat_o_inp_lens else None - - self.s_cont_embedding_vectors = nn.Parameter(torch.Tensor(self.s_cont_inp_size, self.hidden_size)) if self.s_cont_inp_size else None - self.t_cont_k_embedding_vectors = nn.Parameter(torch.Tensor(self.t_cont_k_inp_size, self.hidden_size)) if self.t_cont_k_inp_size else None - self.t_cont_o_embedding_vectors = nn.Parameter(torch.Tensor(self.t_cont_o_inp_size, self.hidden_size)) if self.t_cont_o_inp_size else None - self.t_tgt_embedding_vectors = nn.Parameter(torch.Tensor(self.t_tgt_size, self.hidden_size)) - - self.s_cont_embedding_bias = nn.Parameter(torch.zeros(self.s_cont_inp_size, self.hidden_size)) if self.s_cont_inp_size else None - self.t_cont_k_embedding_bias = nn.Parameter(torch.zeros(self.t_cont_k_inp_size, self.hidden_size)) if self.t_cont_k_inp_size else None - self.t_cont_o_embedding_bias = nn.Parameter(torch.zeros(self.t_cont_o_inp_size, self.hidden_size)) if self.t_cont_o_inp_size else None - self.t_tgt_embedding_bias = nn.Parameter(torch.zeros(self.t_tgt_size, self.hidden_size)) - - if self.s_cont_embedding_vectors is not None: - torch.nn.init.xavier_normal_(self.s_cont_embedding_vectors) - if self.t_cont_k_embedding_vectors is not None: - torch.nn.init.xavier_normal_(self.t_cont_k_embedding_vectors) - if self.t_cont_o_embedding_vectors is not None: - torch.nn.init.xavier_normal_(self.t_cont_o_embedding_vectors) - torch.nn.init.xavier_normal_(self.t_tgt_embedding_vectors) - - def _apply_embedding(self, - cat: Optional[Tensor], - cont: Optional[Tensor], - cat_emb: Optional[nn.ModuleList], - cont_emb: Tensor, - cont_bias: Tensor, - ) -> Tuple[Optional[Tensor], Optional[Tensor]]: - e_cat = torch.stack([embed(cat[...,i]) for i, embed in enumerate(cat_emb)], dim=-2) if cat is not None else None - if cont is not None: - #the line below is equivalent to following einsums - #e_cont = torch.einsum('btf,fh->bthf', cont, cont_emb) - #e_cont = torch.einsum('bf,fh->bhf', cont, cont_emb) - e_cont = torch.mul(cont.unsqueeze(-1), cont_emb) - e_cont = e_cont + cont_bias - else: - e_cont = None - - if e_cat is not None and e_cont is not None: - return torch.cat([e_cat, e_cont], dim=-2) - elif e_cat is not None: - return e_cat - elif e_cont is not None: - return e_cont - else: - return None - - def forward(self, x: Dict[str, Tensor]): - # temporal/static categorical/continuous known/observed input - x = {k:v for k,v in x.items() if v.numel()} - s_cat_inp = x.get('s_cat', None) - s_cont_inp = x.get('s_cont', None) - t_cat_k_inp = x.get('k_cat', None) - t_cont_k_inp = x.get('k_cont', None) - t_cat_o_inp = x.get('o_cat', None) - t_cont_o_inp = x.get('o_cont', None) - t_tgt_obs = x['target'] # Has to be present - - # Static inputs are expected to be equal for all timesteps - # For memory efficiency there is no assert statement - s_cat_inp = s_cat_inp[:,0,:] if s_cat_inp is not None else None - s_cont_inp = s_cont_inp[:,0,:] if s_cont_inp is not None else None - - s_inp = self._apply_embedding(s_cat_inp, - s_cont_inp, - self.s_cat_embed, - self.s_cont_embedding_vectors, - self.s_cont_embedding_bias) - t_known_inp = self._apply_embedding(t_cat_k_inp, - t_cont_k_inp, - self.t_cat_k_embed, - self.t_cont_k_embedding_vectors, - self.t_cont_k_embedding_bias) - t_observed_inp = self._apply_embedding(t_cat_o_inp, - t_cont_o_inp, - self.t_cat_o_embed, - self.t_cont_o_embedding_vectors, - self.t_cont_o_embedding_bias) - - # Temporal observed targets - # t_observed_tgt = torch.einsum('btf,fh->btfh', t_tgt_obs, self.t_tgt_embedding_vectors) - t_observed_tgt = torch.matmul(t_tgt_obs.unsqueeze(3).unsqueeze(4), self.t_tgt_embedding_vectors.unsqueeze(1)).squeeze(3) - t_observed_tgt = t_observed_tgt + self.t_tgt_embedding_bias - - return s_inp, t_known_inp, t_observed_inp, t_observed_tgt - -class GCGRUCell(nn.Module): - def __init__(self, input_size, hidden_size): - super().__init__() - self.conv_i = GraphConv(input_size, 3 * hidden_size) #According to https://arxiv.org/pdf/1903.05631.pdf - self.conv_h = GraphConv(hidden_size, 3 * hidden_size) # this should be ChebConv - self.hidden_size = hidden_size - self.state = None - def forward(self, graph, feat, hx): - i = self.conv_i(graph, feat) - h = self.conv_h(graph, hx) - i_r, i_z, i_n = torch.chunk(i, 3, dim=-1) - h_r, h_z, h_n = torch.chunk(h, 3, dim=-1) - r = torch.sigmoid(i_r + h_r) - z = torch.sigmoid(i_z + h_z) - n = torch.tanh(i_n + r * h_n) - h = (1-z) * n + z * hx - - return h - -class GCGRU(nn.Module): - def __init__(self, input_size, hidden_size, num_layers): - super().__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.num_layers = num_layers - cells = [GCGRUCell(input_size, hidden_size)] - cells += [GCGRUCell(hidden_size, hidden_size) for _ in range(num_layers - 1)] - self.cells = nn.ModuleList(cells) - - def forward(self, graph, input, hx=None): - if hx is None: - hx = [torch.zeros(graph.num_nodes(), self.hidden_size, - dtype=input.dtype, device=input.device)] * self.num_layers - - - out = [] - states = [] - intermediate = [input[:,t,...] for t in range(input.shape[1])] - - for i, cell in enumerate(self.cells): - inner_out = [] - h = hx[i] - for x in intermediate: - h = cell(graph, x, h) - inner_out.append(h) - out.append(inner_out) - intermediate = inner_out - - output = torch.stack(out[-1], dim=1) - - return output, out[-1] - - -class ToyModel(nn.Module): - def __init__(self, config): - super().__init__() - self.encoder_steps = config.encoder_length - self.num_future_vars = config.num_future_vars - self.num_historic_vars = config.num_historic_vars - self.num_static_vars = config.num_static_vars - self.hidden_size = config.hidden_size - self.embedding = TFTEmbedding(config) - - self.static_proj = nn.Linear(config.hidden_size * self.num_static_vars, config.num_layers * config.hidden_size) - self.history_recurrent = GCGRU(config.hidden_size, config.hidden_size, config.num_layers) - self.future_recurrent = GCGRU(config.hidden_size, config.hidden_size, config.num_layers) - self.history_down_proj = nn.Linear(self.num_historic_vars * config.hidden_size, config.hidden_size) - self.future_down_proj = nn.Linear(self.num_future_vars * config.hidden_size, config.hidden_size) - self.out_proj = nn.Linear(config.hidden_size, 1) - - def forward(self, graph): - s_inp, t_known_inp, t_observed_inp, t_observed_tgt = self.embedding(graph.ndata) - s_inp = s_inp.view(s_inp.shape[0], -1) - init_state = self.static_proj(s_inp) - init_state = init_state.view(init_state.shape[0], -1, self.hidden_size).transpose(0,1) - - feat = torch.cat([t_known_inp, t_observed_inp, t_observed_tgt], dim=2) - historic_feat = feat[:,:self.encoder_steps,:] - historic_feat = historic_feat.view(historic_feat.shape[0], historic_feat.shape[1], -1) - historic_feat = self.history_down_proj(historic_feat) - history, state = self.history_recurrent(graph, historic_feat, hx=init_state) - - future_feat = t_known_inp[:,self.encoder_steps:, :] - future_feat = future_feat.view(future_feat.shape[0], future_feat.shape[1], -1) - future_feat = self.future_down_proj(future_feat) - future, _ = self.future_recurrent(graph, future_feat, hx=state) - out = self.out_proj(future) - - return out diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py deleted file mode 100644 index b155fcf5d..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from abc import ABCMeta, abstractmethod - - -class InterpretableModelBase(object, metaclass=ABCMeta): - def __init__(self, *args, **kwargs): - self.interpretable = True - self.activations = {} - - @abstractmethod - def _register_interpretable_hooks(self): - return - - def enable_activations_dump(self): - self._register_interpretable_hooks() - - @abstractmethod - def get_activations(self, sample_number, *args, **kwargs): - return diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py deleted file mode 100755 index c843ba056..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -from torch import Tensor - -from models.tft_pyt.modeling import * - - -class LSTM(nn.Module): - """ - Implementation from LSTM portion of https://arxiv.org/abs/1912.09363 - """ - - def __init__(self, config): - super().__init__() - - self.encoder_steps = config.encoder_length # this determines from how distant past we want to use data from - - self.mask_nans = config.missing_data_strategy == "mask" - - self.embedding = TFTEmbedding(config) - self.static_encoder = StaticCovariateEncoder(config) - - self.history_vsn = VariableSelectionNetwork(config, config.num_historic_vars) - self.history_encoder = nn.LSTM(config.hidden_size, config.hidden_size, batch_first=True) - self.future_vsn = VariableSelectionNetwork(config, config.num_future_vars) - self.future_encoder = nn.LSTM(config.hidden_size, config.hidden_size, batch_first=True) - - self.output_proj = nn.Linear(config.hidden_size, 1) - - def forward(self, x: Tensor) -> Tensor: - s_inp, t_known_inp, t_observed_inp, t_observed_tgt = self.embedding(x) - - # Static context - cs, ce, ch, cc = self.static_encoder(s_inp) - ch, cc = ch.unsqueeze(0), cc.unsqueeze(0) # lstm initial states - - # Temporal input - _historical_inputs = [t_known_inp[:, : self.encoder_steps, :], t_observed_tgt[:, : self.encoder_steps, :]] - if t_observed_inp is not None: - _historical_inputs.insert(0, t_observed_inp[:, : self.encoder_steps, :]) - - historical_inputs = torch.cat(_historical_inputs, dim=-2) - future_inputs = t_known_inp[:, self.encoder_steps:] - - # Encoders - historical_features, _ = self.history_vsn(historical_inputs, cs) - history, state = self.history_encoder(historical_features, (ch, cc)) - future_features, _ = self.future_vsn(future_inputs, cs) - future, _ = self.future_encoder(future_features, state) - - output = self.output_proj(future) - return output diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py deleted file mode 100644 index 1c810317e..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -import torch.nn as nn -import torch.nn.functional as F -import numbers -import pickle - -from .tft_pyt.modeling import LazyEmbedding - -# This is copied from torch source and adjusted to take in indices of nodes as well -class LayerNorm(nn.Module): - __constants__ = ['normalized_shape', 'weight', 'bias', 'eps', 'elementwise_affine'] - def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True): - super(LayerNorm, self).__init__() - if isinstance(normalized_shape, numbers.Integral): - normalized_shape = (normalized_shape,) - self.normalized_shape = tuple(normalized_shape) - self.eps = eps - self.elementwise_affine = elementwise_affine - if self.elementwise_affine: - self.weight = nn.Parameter(torch.Tensor(*normalized_shape)) - self.bias = nn.Parameter(torch.Tensor(*normalized_shape)) - else: - self.register_parameter('weight', None) - self.register_parameter('bias', None) - self.reset_parameters() - - def reset_parameters(self): - if self.elementwise_affine: - nn.init.ones_(self.weight) - nn.init.zeros_(self.bias) - - def forward(self, input, idx): - if self.elementwise_affine: - return F.layer_norm(input, tuple(input.shape[1:]), self.weight[:,idx,:], self.bias[:,idx,:], self.eps) - else: - return F.layer_norm(input, tuple(input.shape[1:]), self.weight, self.bias, self.eps) - - def extra_repr(self): - return '{normalized_shape}, eps={eps}, ' \ - 'elementwise_affine={elementwise_affine}'.format(**self.__dict__) - - -class GraphConstructor(nn.Module): - - def __init__(self, nnodes, k, dim, alpha=3, static_feat=None): - super().__init__() - self.nnodes = nnodes - if static_feat is not None: - xd = static_feat.shape[1] - self.lin1 = nn.Linear(xd, dim) - self.lin2 = nn.Linear(xd, dim) - else: - self.emb1 = nn.Embedding(nnodes, dim) - self.emb2 = nn.Embedding(nnodes, dim) - self.lin1 = nn.Linear(dim, dim) - self.lin2 = nn.Linear(dim, dim) - - self.k = k - self.dim = dim - self.alpha = alpha - self.static_feat = static_feat - - def forward(self, idx): - if self.static_feat is None: - nodevec1 = self.emb1(idx) - nodevec2 = self.emb2(idx) - else: - nodevec1 = self.static_feat[idx, :] - nodevec2 = nodevec1 - - nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1)) - nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2)) - - #a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0)) - # This comes from (AB^T)^T = BA^T - m = torch.mm(nodevec1, nodevec2.transpose(1, 0)) - a = m - m.transpose(1,0) - ##### - adj = F.relu(torch.tanh(self.alpha*a)) - mask = adj.new_zeros((idx.size(0), idx.size(0))) - s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1) - mask.scatter_(1, t1, 1) - adj = adj*mask - return adj - -class MixProp(nn.Module): - def __init__(self, c_in, c_out, gdep, alpha): - super().__init__() - self.linear = torch.nn.Conv2d((gdep+1)*c_in, c_out, kernel_size=(1, 1)) - self.gdep = gdep - self.alpha = alpha - - def forward(self, x, adj): - adj = adj + torch.eye(adj.size(0), device=adj.device) - d = adj.sum(1) - a = adj / d.unsqueeze(-1) - h = x - out = [h] - for i in range(self.gdep): - h = torch.einsum('ncwl,vw->ncvl', h, a) - h = self.alpha * x + (1 - self.alpha) * h - out.append(h) - ho = torch.cat(out, dim=1) - ho = self.linear(ho) - return ho - -class GCModule(nn.Module): - - def __init__(self, conv_channels, residual_channels, gcn_depth, propalpha): - super().__init__() - self.gc1 = MixProp(conv_channels, residual_channels, gcn_depth, propalpha) - self.gc2 = MixProp(conv_channels, residual_channels, gcn_depth, propalpha) - - def forward(self, x, adj): - x1 = self.gc1(x, adj) - x2 = self.gc2(x, adj.transpose(1, 0)) - return x1 + x2 - -class DilatedInception(nn.Module): - def __init__(self, cin, cout, dilation_factor=2): - super().__init__() - self.kernel_set = [2,3,6,7] - cout = int(cout / len(self.kernel_set)) - self.tconv = nn.ModuleList([nn.Conv2d(cin, cout, (1, k), dilation=(1, dilation_factor)) for k in self.kernel_set]) - - def forward(self,input): - x = [] - for conv in self.tconv: - x.append(conv(input)) - - # This truncation is described in the paper and seemingly drops some information - # Information drop is counteracted by padding time dimension with 0. - # Ex: for the largest filter of size 7 input is paddded by 7 zeros. - for i in range(len(self.kernel_set)): - x[i] = x[i][...,-x[-1].size(3):] - x = torch.cat(x,dim=1) - return x - -class TCModule(nn.Module): - def __init__(self, residual_channels, conv_channels, dilation_factor): - super().__init__() - self.filter = DilatedInception(residual_channels, conv_channels, dilation_factor) - self.gate = DilatedInception(residual_channels, conv_channels, dilation_factor) - - def forward(self, x): - f = self.filter(x) - f = torch.tanh(f) - g = self.gate(x) - g = torch.sigmoid(g) - x = f * g - return x - - -class MTGNNLayer(nn.Module): - def __init__(self, - r_channels, - c_channels, - s_channels, - kernel_size, - dilation_factor, - dropout, - num_nodes, - use_gcn, - gcn_depth, - propalpha): - super().__init__() - self.use_gcn = use_gcn - self.tc_module = TCModule(r_channels, c_channels, dilation_factor) - self.skip_conv = nn.Conv2d(in_channels=c_channels, - out_channels=s_channels, - kernel_size=(1, kernel_size)) - self.dropout = nn.Dropout(dropout) - self.ln = LayerNorm((r_channels, num_nodes, kernel_size),elementwise_affine=True) - - if use_gcn: - self.out_module = GCModule(c_channels, r_channels, gcn_depth, propalpha) - else: - self.out_module = nn.Conv2d(in_channels=c_channels, - out_channels=r_channels, - kernel_size=(1, 1)) - def forward(self, x, idx, adp): - residual = x - x = self.tc_module(x) - x = self.dropout(x) - s = x - s = self.skip_conv(s) - if self.use_gcn: - x = self.out_module(x, adp) - else: - x = self.out_module(x) - - x = x + residual[:, :, :, -x.size(3):] - x = self.ln(x,idx) - - return x, s - - -class MTGNN(nn.Module): - def __init__(self, config): - super().__init__() - self.config = config - self.use_gcn = config.use_gcn - self.gcn_depth = config.gcn_depth - self.predefined_adj = config.get('predefined_adj') - if self.predefined_adj is not None: - A = pickle.load(open(self.predefined_adj, 'rb')) - self.register_buffer('predefined_adj', A) - self.propalpha = config.propalpha - self.tanhalpha = config.tanhalpha - - self.num_nodes = config.num_nodes - self.dropout = config.dropout - self.in_dim = config.in_dim - self.out_dim = config.example_length - config.encoder_length - self.residual_channels = config.residual_channels - self.conv_channels = config.conv_channels - self.skip_channels = config.skip_channels - self.end_channels = config.end_channels - self.subgraph_size = config.subgraph_size - self.node_dim = config.node_dim - self.dilation_exponential = config.dilation_exponential - self.seq_length = config.encoder_length - self.num_layers = config.num_layers - self.use_embedding = config.use_embedding - - ### New embedding - if self.use_embedding: - self.config.hidden_size = self.config.in_dim - self.embedding = LazyEmbedding(self.config) - - self.include_static_data = config.include_static_data - #### - - - self.layers = nn.ModuleList() - self.start_conv = nn.LazyConv2d(out_channels=self.residual_channels, kernel_size=(1, 1)) - self.gc = GraphConstructor(self.num_nodes, self.subgraph_size, self.node_dim, alpha=self.tanhalpha) - - kernel_size = 7 - - def rf_size(c,q,m): - assert q >= 1 - if q > 1: - return int(1 + (c-1)*(q**m - 1)/(q-1)) - return m*(c-1) + 1 - - self.receptive_field = rf_size(kernel_size, self.dilation_exponential, self.num_layers) - new_dilation = 1 - for j in range(self.num_layers): - rfs = rf_size(kernel_size, self.dilation_exponential, j+1) - kernel_len = max(self.seq_length, self.receptive_field) - rfs + 1 - - self.layers.append(MTGNNLayer(self.residual_channels, self.conv_channels, self.skip_channels, - kernel_len, new_dilation, self.dropout, self.num_nodes, self.use_gcn, - self.gcn_depth, self.propalpha - ) - ) - - new_dilation *= self.dilation_exponential - - self.end_conv_1 = nn.Conv2d(in_channels=self.skip_channels, - out_channels=self.end_channels, - kernel_size=(1, 1)) - self.end_conv_2 = nn.Conv2d(in_channels=self.end_channels, - out_channels=self.out_dim, - kernel_size=(1, 1)) - - if self.seq_length > self.receptive_field: - self.skip0 = nn.LazyConv2d(out_channels=self.skip_channels, kernel_size=(1, self.seq_length)) - self.skipE = nn.Conv2d(in_channels=self.residual_channels, - out_channels=self.skip_channels, - kernel_size=(1, self.seq_length - self.receptive_field + 1) - ) - - else: - self.skip0 = nn.LazyConv2d(out_channels=self.skip_channels, kernel_size=(1, self.receptive_field)) - self.skipE = nn.Conv2d(in_channels=self.residual_channels, out_channels=self.skip_channels, kernel_size=(1, 1)) - - idx = torch.arange(self.num_nodes) - self.register_buffer('idx', idx) - - def forward(self, batch, idx=None): - if self.use_embedding: - batch = {k: v[:, :self.seq_length] if v is not None else None for k, v in batch.items()} - emb = self.embedding(batch) - emb = [e.view(*e.shape[:-2], -1) for e in emb if e is not None] - emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], self.seq_length, *emb[0].shape[1:]) - if not self.include_static_data: - emb = emb[1:] - input = torch.cat(emb, dim=-1).transpose(1, 3) - else: - - # TSPP compatibility code - t = batch['k_cont'][:, :self.seq_length, 0, 2:] - t = torch.einsum('btk,k->bt', t, t.new([1, 0.16])) - t = t.unsqueeze(-1).expand(*t.shape, self.num_nodes) - target = batch['target'][:, :self.seq_length].squeeze(-1) - input = torch.stack((target, t), dim=1).transpose(2, 3) - #### - - seq_len = input.size(3) - assert seq_len == self.seq_length, 'input sequence length not equal to preset sequence length' - if idx is None: - idx = self.idx - - if self.seq_length < self.receptive_field: - input = nn.functional.pad(input, (self.receptive_field - self.seq_length, 0, 0, 0)) - - if self.use_gcn: - if not self.predefined_adj: - adp = self.gc(idx) - else: - adp = self.predefined_adj - - x = self.start_conv(input) # 1x1 conv for upscaling. Acts like a linear procection on 1 dim - skip = self.skip0(F.dropout(input, self.dropout, training=self.training)) - for layer in self.layers: - x, s = layer(x, idx, adp) - skip = skip + s - - skip = self.skipE(x) + skip - x = F.relu(skip) - x = F.relu(self.end_conv_1(x)) - x = self.end_conv_2(x) - return x diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py deleted file mode 100644 index bc5365cf5..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import torch -from torch import nn - - -class Block(nn.Module): - - def __init__(self, units, thetas_dim, backcast_length, forecast_length): - super(Block, self).__init__() - self.thetas_dim = thetas_dim - self.backcast_length = backcast_length - self.forecast_length = forecast_length - ff = [nn.Linear(backcast_length, units), nn.ReLU()] + [item for _ in range(3) for item in (nn.Linear(units, units), nn.ReLU())] - self.ff = nn.Sequential(*ff) - - if self.thetas_dim: # generic block skips this stage - self.theta_b_fc = nn.Linear(units, thetas_dim, bias=False) - self.ff.add_module(str(len(self.ff)), self.theta_b_fc) - - -class SeasonalityBlock(Block): - - def __init__(self, units, thetas_dim, backcast_length, forecast_length): - - if not thetas_dim: - # Auto determine according to paper: horizon/2 sines, horizon/2 cosines - thetas_dim = forecast_length - - super(SeasonalityBlock, self).__init__(units, thetas_dim, backcast_length, - forecast_length) - - def get_seasonality_basis(num_thetas, linspace): - p = num_thetas - p1, p2 = (p // 2, p // 2) if p % 2 == 0 else (p // 2, p // 2 + 1) - s1 = [np.cos(2 * np.pi * i * linspace) for i in range(p1)] - s2 = [np.sin(2 * np.pi * i * linspace) for i in range(p2)] - s = np.stack(s1+s2) - return torch.FloatTensor(s) - - self.forecast_length = forecast_length - linspace = np.concatenate([np.arange(backcast_length) / backcast_length, np.arange(forecast_length) / forecast_length]) - self.register_buffer('basis', get_seasonality_basis(self.thetas_dim, linspace)) - - def forward(self, x): - x = squeeze_last_dim(x) - x = self.ff(x) - x = x.mm(self.basis) - backcast, forecast = x[:,:-self.forecast_length], x[:,-self.forecast_length:] - return backcast, forecast - - -class TrendBlock(Block): - - def __init__(self, units, thetas_dim, backcast_length, forecast_length): - super(TrendBlock, self).__init__(units, thetas_dim, backcast_length, - forecast_length) - - self.forecast_length = forecast_length - linspace = np.concatenate([np.arange(backcast_length) / backcast_length, np.arange(forecast_length) / forecast_length]) - basis = np.stack([linspace ** i for i in range(thetas_dim)]) - self.register_buffer('basis', torch.FloatTensor(basis)) - - def forward(self, x): - x = squeeze_last_dim(x) - x = self.ff(x) - x = x.mm(self.basis) - backcast, forecast = x[:, :-self.forecast_length], x[:, -self.forecast_length:] - return backcast, forecast - - -class GenericBlock(Block): - - def __init__(self, units, thetas_dim, backcast_length, forecast_length): - - super(GenericBlock, self).__init__(units, None, backcast_length, forecast_length) - - self.backcast_fc = nn.Linear(units, backcast_length) - self.forecast_fc = nn.Linear(units, forecast_length) - - def forward(self, x): - x = squeeze_last_dim(x) - x = self.ff(x) - - backcast = self.backcast_fc(x) - forecast = self.forecast_fc(x) - - return backcast, forecast - -class NBeatsNet(nn.Module): - BLOCK_MAP = {'seasonality': SeasonalityBlock, - 'trend': TrendBlock, - 'generic': GenericBlock - } - - def __init__(self, config): - super(NBeatsNet, self).__init__() - model_config = config - - self.forecast_length = config.example_length - config.encoder_length - self.backcast_length = config.encoder_length - self.stacks = nn.ModuleList([self.create_stack(c) for c in config.stacks]) - - def create_stack(self, stack_config): - blocks = nn.ModuleList() - - for block_id in range(stack_config.num_blocks): - block_init = NBeatsNet.BLOCK_MAP[stack_config.type] - if stack_config.share_weights and block_id != 0: - block = blocks[-1] # pick up the last one when we share weights. - else: - block = block_init(units = stack_config.hidden_size, - thetas_dim=stack_config.theta_dim, - backcast_length=self.backcast_length, - forecast_length=self.forecast_length) - blocks.append(block) - return blocks - - def forward(self, batch_dict): - backcast = batch_dict['target'][:,:self.backcast_length,:] - backcast = squeeze_last_dim(backcast) - forecast = backcast.new_zeros(size=(backcast.size()[0], self.forecast_length,)) - for stack in self.stacks: - for block in stack: - b, f = block(backcast) - backcast = backcast - b - forecast = forecast + f - forecast = forecast.unsqueeze(2) - return forecast - - -def squeeze_last_dim(tensor): - if len(tensor.shape) == 3 and tensor.shape[-1] == 1: # (128, 10, 1) => (128, 10). - return tensor[..., 0] - return tensor - - -def linear_space(backcast_length, forecast_length): - ls = np.arange(-backcast_length, forecast_length, 1) / forecast_length - b_ls = ls[:backcast_length] - f_ls = ls[backcast_length:] - return b_ls, f_ls diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py deleted file mode 100644 index 739f2cbe8..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py +++ /dev/null @@ -1,156 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F - - -class _IdentityBasis(nn.Module): - def __init__(self, backcast_size: int, forecast_size: int, interpolation_mode: str): - super().__init__() - assert (interpolation_mode in ['linear','nearest']) or ('cubic' in interpolation_mode) - self.forecast_size = forecast_size - self.backcast_size = backcast_size - self.interpolation_mode = interpolation_mode - - def forward(self, theta: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - - backcast = theta[:, :self.backcast_size] - knots = theta[:, self.backcast_size:] - - if self.interpolation_mode in ['nearest','linear']: - knots = knots[:,None,:] - forecast = F.interpolate(knots, size=self.forecast_size, mode=self.interpolation_mode) - forecast = forecast[:,0,:] - elif 'cubic' in self.interpolation_mode: - batch_size = len(backcast) - knots = knots[:,None,None,:] - forecast = torch.zeros((len(knots), self.forecast_size)).to(knots.device) - n_batches = int(np.ceil(len(knots)/batch_size)) - for i in range(n_batches): - forecast_i = F.interpolate(knots[i*batch_size:(i+1)*batch_size], size=self.forecast_size, mode='bicubic') - forecast[i*batch_size:(i+1)*batch_size] += forecast_i[:,0,0,:] - - return backcast, forecast - -ACTIVATIONS = ['ReLU', - 'Softplus', - 'Tanh', - 'SELU', - 'LeakyReLU', - 'PReLU', - 'Sigmoid'] - -POOLING = ['MaxPool1d', - 'AvgPool1d'] - -class NHITSBlock(nn.Module): - """ - N-HiTS block which takes a basis function as an argument. - """ - def __init__(self, - input_size: int, - n_theta: int, - n_mlp_layers: int, - hidden_size: int, - basis: nn.Module, - n_pool_kernel_size: int, - pooling_mode: str, - dropout_prob: float, - activation: str): - """ - """ - super().__init__() - - n_time_in_pooled = int(np.ceil(input_size/n_pool_kernel_size)) - - self.dropout_prob = dropout_prob - - assert activation in ACTIVATIONS, f'{activation} is not in {ACTIVATIONS}' - assert pooling_mode in POOLING, f'{pooling_mode} is not in {POOLING}' - - activ = getattr(nn, activation)() - - self.pooling_layer = getattr(nn, pooling_mode)(kernel_size=n_pool_kernel_size, - stride=n_pool_kernel_size, ceil_mode=True) - - # Block MLPs - mlp = [nn.Linear(n_time_in_pooled, hidden_size)] - mlp += [item for _ in range(n_mlp_layers) for item in (nn.Linear(hidden_size, hidden_size), activ)] - layers = mlp + [nn.Linear(hidden_size, n_theta)] - - self.layers = nn.Sequential(*layers) - self.basis = basis - - def forward(self, insample_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - - # Pooling - insample_y = insample_y.unsqueeze(1) - insample_y = self.pooling_layer(insample_y) - insample_y = insample_y.squeeze(1) - - # Compute local projection weights and projection - theta = self.layers(insample_y) - backcast, forecast = self.basis(theta) - return backcast, forecast - -class NHITS(nn.Module): - def __init__(self, config): - super().__init__() - assert len(config.n_pool_kernel_size) == len(config.n_freq_downsample) == len(config.n_blocks) - - self.config = config - self.input_size = config.encoder_length - self.h = config.example_length - config.encoder_length - - blocks = self.create_stack(config) - self.blocks = torch.nn.ModuleList(blocks) - - def create_stack(self, config): - - block_list = [] - for n, k, f in zip(config.n_blocks, config.n_pool_kernel_size, config.n_freq_downsample): - for _ in range(n): - n_theta = (self.input_size + max(self.h//f, 1) ) - basis = _IdentityBasis(backcast_size=self.input_size, - forecast_size=self.h, - interpolation_mode=config.interpolation_mode) - nbeats_block = NHITSBlock(input_size=self.input_size, - n_theta=n_theta, - n_mlp_layers=config.n_mlp_layers, - hidden_size=config.hidden_size, - n_pool_kernel_size=k, - pooling_mode=config.pooling_mode, - basis=basis, - dropout_prob=config.dropout_prob_theta, - activation=config.activation) - block_list.append(nbeats_block) - - return block_list - - def forward(self, batch): - residuals = batch['target'][:, :self.input_size, 0] - - forecast = residuals[:, -1:] # Level with Naive1 - block_forecasts = [ forecast.repeat(1, self.h) ] - - for i, block in enumerate(self.blocks): - backcast, block_forecast = block(insample_y=residuals) - residuals = residuals - backcast - forecast = forecast + block_forecast - - return forecast.unsqueeze(2) diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py deleted file mode 100755 index e10dbac9b..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# SPDX-License-Identifier: Apache-2.0 -from abc import ABC -import os -import pmdarima as pm -import numpy as np -import pickle as pkl - -class StatModel(ABC): - def __init__(self, config): - self.horizon = config.example_length - config.encoder_length - self.config = config - - def fit(self, label, data): - return - - def predict(self, data, i): - return - - def save(self): - return - - def load(self, path): - return - -class AutoARIMA(StatModel): - def __init__(self, config): - super().__init__(config) - self.models = {} - - def fit(self, example): - id, label, data = example['id'], example['endog'], example['exog'] - data = data if data.shape[-1] != 0 else None - model = pm.auto_arima(label, X=data, **self.config) - self.model = model - - def predict(self, example): - model = self.model - if len(example['endog_update']) != 0: - model.update(example['endog_update'], X=data if (data := example['exog_update']).shape[-1] != 0 else None) - # Issue is related to https://github.com/alkaline-ml/pmdarima/issues/492 - try: - preds = model.predict(self.horizon, X=data if (data := example['exog']).shape[-1] != 0 else None) - except ValueError as e: - if "Input contains NaN, infinity or a value too large for dtype('float64')." in str(e): - print(str(e)) - preds = np.empty(self.horizon) - preds.fill(self.model.arima_res_.data.endog[-1]) - else: - raise - return preds - - def save(self): - with open('arima.pkl', 'wb') as f: - pkl.dump(self.models, f) - - def load(self, path): - with open(os.path.join(path, 'arima.pkl'), 'rb') as f: - self.models = pkl.load(f) diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py deleted file mode 100644 index 70af5a607..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch -from data.data_utils import InputTypes, DataTypes -from matplotlib import pyplot as plt -from models.interpretability import InterpretableModelBase -from models.tft_pyt.modeling import TemporalFusionTransformer -from mpl_toolkits.axes_grid1 import make_axes_locatable - - -class InterpretableTFTBase(InterpretableModelBase): - def __init__(self, *args, **kwargs): - super(InterpretableTFTBase, self).__init__(*args, **kwargs) - - @classmethod - def _get_future_features(cls, features): - future_features = [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN - and feature.feature_embed_type == DataTypes.CATEGORICAL] \ - + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN - and feature.feature_embed_type == DataTypes.CONTINUOUS] - return future_features - - @classmethod - def _get_history_features(cls, features): - history_features = [feature.name for feature in features if feature.feature_type == InputTypes.OBSERVED - and feature.feature_embed_type == DataTypes.CATEGORICAL] \ - + [feature.name for feature in features if feature.feature_type == InputTypes.OBSERVED - and feature.feature_embed_type == DataTypes.CONTINUOUS] \ - + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN - and feature.feature_embed_type == DataTypes.CATEGORICAL] \ - + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN - and feature.feature_embed_type == DataTypes.CONTINUOUS] \ - + [feature.name for feature in features if feature.feature_type == InputTypes.TARGET] - - return history_features - - @classmethod - def _get_heatmap_fig(cls, tensor, features, max_size=16, min_size=4): - shape = tensor.shape - ratio = max(max(shape) // max_size, 1) - fig_size = max(shape[1] / ratio, min_size), max(shape[0] / ratio, min_size) - fig = plt.figure(figsize=fig_size) - ticks = list(range(shape[0])) - plt.yticks(ticks, features) - plt.xlabel('Time step') - plt.imshow(tensor, cmap='hot', interpolation='nearest') - plt.colorbar() - return fig - - @classmethod - def _get_vsn_fig(cls, activations, sample_number, features): - _, sparse_matrix = activations - sample_sparse_matrix = sparse_matrix[sample_number] - final_tensor = sample_sparse_matrix.permute(1, 0) - fig = cls._get_heatmap_fig(final_tensor.detach().cpu(), features) - return fig - - @classmethod - def _get_attention_heatmap_fig(cls, heads, max_size=16, min_size=4): - row_size = max(min_size, max_size / len(heads)) - fig, axes = plt.subplots(1, len(heads), figsize=(max_size, row_size)) - for i, (head, ax) in enumerate(zip(heads, axes), 1): - im = ax.imshow(head, cmap='hot', interpolation='nearest') - if i < len(heads): - ax.set_title(f'HEAD {i}') - else: - ax.set_title('MEAN') - divider = make_axes_locatable(ax) - cax = divider.append_axes('right', size='5%', pad=0.05) - fig.colorbar(im, cax=cax, orientation='vertical') - return fig - - @classmethod - def _get_attn_heads(cls, activations, sample_number): - heads = [] - _, attn_prob = activations - sample_attn_prob = attn_prob[sample_number] - n_heads = sample_attn_prob.shape[0] - for head_index in range(n_heads): - head = sample_attn_prob[head_index] - heads.append(head.detach().cpu()) - mean_head = torch.mean(sample_attn_prob, dim=0) - heads.append(mean_head.detach().cpu()) - fig = cls._get_attention_heatmap_fig(heads) - return fig - - def _get_activation(self, name): - def hook(model, input, output): - self.activations[name] = output - - return hook - - def get_activations(self, sample_number, features): - assert self.activations, "There are no activations available" - return { - "history_vsn": self._get_vsn_fig(self.activations['history_vsn'], sample_number, - self._get_history_features(features)), - "future_vsn": self._get_vsn_fig(self.activations['future_vsn'], sample_number, - self._get_future_features(features)), - "attention": self._get_attn_heads(self.activations['attention'], sample_number) - } - - def _register_interpretable_hooks(self): - self.TFTpart2.history_vsn.register_forward_hook(self._get_activation('history_vsn')) - self.TFTpart2.future_vsn.register_forward_hook(self._get_activation('future_vsn')) - self.TFTpart2.attention.register_forward_hook(self._get_activation('attention')) - - -class InterpretableTFT(TemporalFusionTransformer, InterpretableTFTBase): - def __init__(self, *args, **kwargs): - TemporalFusionTransformer.__init__(self, *args, **kwargs) - InterpretableTFTBase.__init__(self, *args, **kwargs) diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py deleted file mode 100755 index 902473dab..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# SPDX-License-Identifier: Apache-2.0 -import torch -import torch.nn as nn - - -class TrivialModel(nn.Module): - def __init__(self, config): - super().__init__() - self.bias = nn.Parameter(torch.zeros(1)) - self.encoder_length = config.encoder_length - self.example_length = config.example_length - self.predict_steps = self.example_length - self.encoder_length - self.output_dim = len(config.get("quantiles", [""])) - - - def forward(self, batch): - t = next(t for t in batch.values() if t is not None) - bs = t.shape[0] - return torch.ones([bs, self.example_length - self.encoder_length, self.output_dim]).to(device=t.device) + self.bias - - def predict(self, batch): - targets = batch["target"].clone() - prev_predictions = targets.roll(1, 1) - return prev_predictions[:, -self.predict_steps :, :] - - def test_with_last(self, batch): - bs = max([tensor.shape[0] if tensor is not None else 0 for tensor in batch.values()]) - values = ( - batch["target_masked"] - .clone()[:, -1, :] - .reshape((bs, 1, self.output_dim)) - ) - - return torch.cat((self.example_length - self.encoder_length) * [values], dim=1) - - def test_with_previous_window(self, batch): - targets = batch["target"].clone() - prev_predictions = targets.roll(self.predict_steps, 1) - return prev_predictions[:, -self.predict_steps :, :] diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py deleted file mode 100644 index 65f4885ea..000000000 --- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cudf -import pandas as pd - -import pynvml -import numpy as np -import xgboost as xgb -import os -import glob - -import dask_cudf -from distributed_utils import create_client -#Deal with the pateince and log_interval. Also objective, cluster -class TSPPXGBoost(): - def __init__(self, config): - self.config = config - self.models = [] - - def fit(self, train, label, valid, valid_label, **kwargs): - train = train.drop(['_id_', '_timestamp_'], axis=1, errors='ignore') - valid = valid.drop(['_id_', '_timestamp_'], axis=1, errors='ignore') - X = xgb.DeviceQuantileDMatrix(cudf.from_pandas(train), label=cudf.from_pandas(label)) - V = xgb.DMatrix(cudf.from_pandas(valid), label=cudf.from_pandas(valid_label)) - model = xgb.train(params=self.config, - dtrain=X, - num_boost_round=self.config.n_rounds, - evals=[(X, 'train'), (V, 'valid')], - early_stopping_rounds=kwargs.get('patience', 5), - verbose_eval=kwargs.get("log_interval", 25), - ) - self.models.append(model) - - def predict(self, test, i): - test = test.drop(['_id_', '_timestamp_'], axis=1, errors='ignore') - model = self.models[i] - X = xgb.DMatrix(cudf.from_pandas(test)) - return model.predict(X) - - def save(self, path): - os.makedirs(os.path.join(path, 'checkpoints'), exist_ok=True) - for i in range(len(self.models)): - model = self.models[i] - model.save_model(os.path.join(path, f'checkpoints/xgb_{i+1}.model')) - - def load(self, path): - self.models = [] - for i in range(self.config.example_length - self.config.encoder_length): - p = os.path.join(path, f'checkpoints/xgb_{i+1}.model') - model = xgb.Booster() - model.load_model(p) - self.models.append(model) - -class TSPPDaskXGBoost(): - def __init__(self, config): - self.config = config - self.models = [] - self.client = create_client(config) - self.npartitions = self.config.cluster.npartitions - - def fit(self, train, label, valid, valid_label, **kwargs): - X = xgb.dask.DaskDeviceQuantileDMatrix(self.client, - dask_cudf.from_cudf(cudf.from_pandas(train), npartitions=self.npartitions), - label=dask_cudf.from_cudf(cudf.from_pandas(label), npartitions=self.npartitions)) - V = xgb.dask.DaskDMatrix(self.client, - dask_cudf.from_cudf(cudf.from_pandas(valid), npartitions=self.npartitions), - label=dask_cudf.from_cudf(cudf.from_pandas(valid_label), npartitions=self.npartitions)) - model = xgb.dask.train(client=self.client, - params=self.config, - dtrain=X, - num_boost_round=self.config.n_rounds, - evals=[(X, 'train'), (V, 'valid')], - early_stopping_rounds=kwargs.get('patience', 5), - verbose_eval=kwargs.get("log_interval", 25), - ) - self.models.append(model) - self.client.restart() - - def predict(self, test, i): - test = test.reset_index(drop=True) - model = self.models[i] - test = dask_cudf.from_cudf(cudf.from_pandas(test), npartitions=self.npartitions) - test = xgb.dask.DaskDMatrix(self.client, test) - out = xgb.dask.predict(self.client, model, test) - return out.compute() - - def save(self, path): - os.makedirs(os.path.join(path, 'checkpoints'), exist_ok=True) - for i in range(len(self.models)): - model = self.models[i] - model['booster'].save_model(os.path.join(path, f'checkpoints/xgb_{i+1}.model')) - - def load(self, path): - self.models = [] - for i in range(self.config.example_length - self.config.encoder_length): - p = os.path.join(path, f'checkpoints/xgb_{i+1}.model') - model = {'booster': xgb.dask.Booster()} - model['booster'].load_model(p) - self.models.append(model)