From c7d12b61bb8203089e979d6e081e7537446b1790 Mon Sep 17 00:00:00 2001
From: ChayanBhansali <96097973+ChayanBhansali@users.noreply.github.com>
Date: Wed, 7 Aug 2024 20:36:35 +0530
Subject: [PATCH 1/2] incorrect onnxruntime-gpu version , changed from 1.8.1 to
 1.18.1

---
 .../TimeSeriesPredictionPlatform/triton/requirements.txt        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt b/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt
index d7a2cd4dd..89f00109a 100644
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt
+++ b/Tools/PyTorch/TimeSeriesPredictionPlatform/triton/requirements.txt
@@ -16,7 +16,7 @@ natsort>=7.0.0
 networkx==2.5
 numpy
 onnx>=1.8.0,<1.9.0
-onnxruntime-gpu==1.8.1
+onnxruntime-gpu==1.18.1
 pycuda>=2019.1.2
 PyYAML>=5.2
 tabulate>=0.8.7

From e40aff80470bf639e6efda7c4f727507cf3cd5eb Mon Sep 17 00:00:00 2001
From: ChayanBhansali <96097973+ChayanBhansali@users.noreply.github.com>
Date: Mon, 12 Aug 2024 19:30:57 +0530
Subject: [PATCH 2/2] updated readme file in tspp

---
 .../TimeSeriesPredictionPlatform/README.md    |   2 +-
 .../models/dcrnn.py                           | 283 ---------------
 .../models/deepar.py                          | 147 --------
 .../models/deepar_v2.py                       | 147 --------
 .../models/ensembling.py                      | 123 -------
 .../models/gnn.py                             | 296 ---------------
 .../models/interpretability.py                |  31 --
 .../models/lstm.py                            |  66 ----
 .../models/mtgnn.py                           | 338 ------------------
 .../models/nbeats.py                          | 154 --------
 .../models/nhits.py                           | 156 --------
 .../models/stat_models.py                     |  72 ----
 .../models/tft.py                             | 123 -------
 .../models/trivial_model.py                   |  53 ---
 .../models/tspp_xgboost.py                    | 111 ------
 15 files changed, 1 insertion(+), 2101 deletions(-)
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py
 delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py
 delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py
 delete mode 100755 Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py
 delete mode 100644 Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py

diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md b/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md
index aae2800be..34c1ba6b8 100755
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md
+++ b/Tools/PyTorch/TimeSeriesPredictionPlatform/README.md
@@ -153,7 +153,7 @@ For more information about how to get started with NGC containers, refer to the
 2. Enter the Deep Learning Examples TSPP repository:
 
 ```
-cd DeeplearningExamples/Tools/TimeSeriesPredictionPlatform
+cd DeeplearningExamples/Tools/PyTorch/TimeSeriesPredictionPlatform
 ```
 3. Copy the relevant temporal fusion transformer [code](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Forecasting/TFT/modeling.py) to the TSPP:
 ```
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py
deleted file mode 100644
index 66433025a..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/dcrnn.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-import torch.nn as nn
-import scipy.sparse as sp
-from scipy.sparse import linalg
-import dgl
-import dgl.function as fn
-import dgl.ops as ops
-
-from .tft_pyt.modeling import LazyEmbedding
-
-def calculate_normalized_laplacian(adj):
-    """
-    # L = D^-1/2 (D-A) D^-1/2 = I - D^-1/2 A D^-1/2
-    # D = diag(A 1)
-    :param adj:
-    :return:
-    """
-    adj = sp.coo_matrix(adj)
-    d = np.array(adj.sum(1))
-    d_inv_sqrt = np.power(d, -0.5).flatten()
-    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
-    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
-    normalized_laplacian = sp.eye(adj.shape[0]) - adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
-    return normalized_laplacian
-
-
-def calculate_random_walk_matrix(adj_mx):
-    d = np.array(adj_mx.sum(1))
-    d_inv = np.power(d, -1).flatten()
-    d_inv[np.isinf(d_inv)] = 0.
-    d_mat_inv = np.diag(d_inv)
-    random_walk_mx = d_mat_inv.dot(adj_mx)
-    random_walk_mx = torch.from_numpy(random_walk_mx)
-    return random_walk_mx
-
-
-def calculate_dual_random_walk_matrix(adj_mx):
-    L0 = calculate_random_walk_matrix(adj_mx).T
-    L1 = calculate_random_walk_matrix(adj_mx.T).T
-    return L0, L1
-
-
-def calculate_scaled_laplacian(adj_mx, lambda_max=2, undirected=True):
-    if undirected:
-        adj_mx = np.maximum.reduce([adj_mx, adj_mx.T])
-    L = calculate_normalized_laplacian(adj_mx)
-    if lambda_max is None:
-        lambda_max, _ = linalg.eigsh(L, 1, which='LM')
-        lambda_max = lambda_max[0]
-    L = sp.csr_matrix(L)
-    M, _ = L.shape
-    I = sp.identity(M, format='csr', dtype=L.dtype)
-    L = (2 / lambda_max * L) - I
-    L = L.astype(np.float32).todense()
-    return torch.from_numpy(L)
-
-
-class DCGRUCell(torch.nn.Module):
-    def __init__(self, num_units, max_diffusion_step, nonlinearity='tanh'):
-        super().__init__()
-        self._activation = torch.tanh if nonlinearity == 'tanh' else torch.relu
-        self._num_units = num_units
-        self.gconv1 = Gconv(self._num_units*2, self._num_units, max_diffusion_step, 0.0)
-        self.gconv2 = Gconv(self._num_units, self._num_units, max_diffusion_step, 0.0)
-
-    def forward(self, graph, inputs, hx):
-        """Gated recurrent unit (GRU) with Graph Convolution.
-        """
-        _inputs = torch.cat([inputs, hx], dim=-1)
-        x = self.gconv1(graph, _inputs)
-
-        value = torch.sigmoid(x)
-        r, u = value.chunk(2, dim=-1)
-
-        _inputs = torch.cat([inputs, r * hx], dim=-1)
-        c = self.gconv2(graph, _inputs)
-
-        if self._activation is not None:
-            c = self._activation(c)
-
-        new_state = u * hx + (1.0 - u) * c
-        return new_state
-
-
-class Gconv(torch.nn.Module):
-    def __init__(self, output_size, hidden_size, max_diffusion_step, bias_start=0.0):
-        assert max_diffusion_step > 0
-        super().__init__()
-        self.output_size = output_size
-        self.hidden_size = hidden_size
-        self._max_diffusion_step = max_diffusion_step
-
-        self.num_matrices = 2 * self._max_diffusion_step + 1
-        self.lin = torch.nn.LazyLinear(self.output_size)
-        def _reset_parameters(self):
-            torch.nn.init.xavier_normal_(self.weight)
-            torch.nn.init.constant_(self.bias, bias_start)
-        bound_method = _reset_parameters.__get__(self.lin, self.lin.__class__)
-        self.lin.reset_parameters = bound_method
-
-    @staticmethod
-    def calculate_random_walk_matrix(adj_mx):
-        d = adj_mx.sum(1)
-        d_inv = d.pow(-1)
-        d_inv[torch.isinf(d_inv)] = 0.
-        random_walk_mx = d_inv.unsqueeze(1).mul(adj_mx)
-        return random_walk_mx
-
-
-    def rwLaplacian(self,feat, graph):
-        rev = graph.reverse()
-
-        # L0
-        out_degree = ops.copy_e_sum(rev, graph.edata['w']) #adj_mx.sum(1)
-        graph.ndata['_h'] = feat[...,0] * out_degree.pow(-1).unsqueeze(-1)
-        graph.update_all(fn.u_mul_e('_h', 'w', 'm') , fn.sum('m', '_h'))
-
-        # L1
-        in_degree = ops.copy_e_sum(graph, graph.edata['w']) #adj_mx.sum(0)
-        rev.edata['w'] = graph.edata['w']
-        rev.ndata['_h'] = feat[...,1] * in_degree.pow(-1).unsqueeze(-1)
-        rev.update_all(fn.u_mul_e('_h', 'w', 'm') , fn.sum('m', '_h'))
-
-        return torch.stack((graph.ndata.pop('_h'), rev.ndata.pop('_h')), dim=-1)
-
-    def forward(self, graph, inputs):
-        batch_size = graph.batch_size
-
-        # Caching
-        # We assume that all graphs are the same in sructure!
-        if not hasattr(self, 'adj_mx'):
-            with torch.no_grad():
-                samples = dgl.unbatch(graph)
-                adj_mx = torch.sparse_coo_tensor(indices=samples[0].adjacency_matrix().coalesce().indices().to(inputs.device),
-                        values=samples[0].edata['w'].to(inputs.device)).to_dense()
-                L0 = Gconv.calculate_random_walk_matrix(adj_mx).T
-                L1 = Gconv.calculate_random_walk_matrix(adj_mx.T).T
-                self.register_buffer('adj_mx', adj_mx, persistent=False)
-                self.register_buffer('L0', L0, persistent=False)
-                self.register_buffer('L1', L1, persistent=False)
-        if hasattr(self, f'L_{batch_size}'):
-            L = getattr(self, f'L_{batch_size}')
-        else:
-            L = torch.block_diag(*[l for l in (self.L0,self.L1) for _ in range(batch_size)]).to_sparse()
-            setattr(self, f'L_{batch_size}', L)
-
-        x0 = torch.cat((inputs,inputs), dim=0)
-        x1 = torch.sparse.mm(L, x0)
-        dif_outs = [inputs, *x1.chunk(2, dim=0)] 
-
-        for k in range(2, self._max_diffusion_step + 1):
-            x2 = 2 * torch.sparse.mm(L, x1) - x0
-            dif_outs += x2.chunk(2, dim=0)
-            x1, x0 = x2, x1
-
-        x = torch.stack(dif_outs, dim=-1)
-        x = x.reshape(graph.num_nodes(), -1)
-        x = self.lin(x)
-        return x
-
-
-
-class RNNStack(nn.Module):
-    def __init__(self, num_rnn_layers, max_diffusion_step, rnn_units, nonlinearity='tanh'):
-        super().__init__()
-        self.num_rnn_layers = num_rnn_layers
-        self.rnn_units = rnn_units
-        self.dcgru_layers = nn.ModuleList([DCGRUCell(rnn_units, max_diffusion_step, nonlinearity=nonlinearity) for _ in range(self.num_rnn_layers)])
-
-    def forward(self, graph, inputs, hidden_state=None):
-        if hidden_state is None:
-            hidden_state = inputs.new_zeros((self.num_rnn_layers, graph.num_nodes(), self.rnn_units))
-                                     
-        hidden_states = []
-        output = inputs
-        for layer_num, dcgru_layer in enumerate(self.dcgru_layers):
-            next_hidden_state = dcgru_layer(graph, output, hidden_state[layer_num])
-            hidden_states.append(next_hidden_state)
-            output = next_hidden_state
-
-        return output, torch.stack(hidden_states)  # runs in O(num_layers) so not too slow
-
-class DCRNN(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.config = config
-        self.max_diffusion_step = int(config.get('max_diffusion_step', 2))
-        self.num_nodes = int(config.get('num_nodes', 1))
-        self.num_rnn_layers = int(config.get('num_rnn_layers', 1))
-        self.rnn_units = int(config.get('rnn_units'))
-        self.activation = config.get('activation')
-        self.output_dim = int(config.get('output_dim', 1))
-        self.horizon = int(config.get('horizon', 1))  # for the decoder
-        self.encoder_model = RNNStack(self.num_rnn_layers, self.max_diffusion_step, self.rnn_units, self.activation)
-        self.projection_layer = nn.Linear(self.rnn_units, self.output_dim)
-        self.decoder_model = RNNStack(self.num_rnn_layers, self.max_diffusion_step, self.rnn_units, self.activation)
-        self.cl_decay_steps = int(config.get('cl_decay_steps', 1000))
-        self.use_curriculum_learning = bool(config.get('use_curriculum_learning', False))
-        self.seq_len = int(config.get('encoder_length'))  # for the encoder
-        self.batches_seen = 0
-
-        self.use_embedding = config.use_embedding
-        ### New embedding
-        if self.use_embedding:
-            self.config.hidden_size = self.config.input_dim
-            self.embedding = LazyEmbedding(self.config)
-        self.include_static_data = config.get('include_static_data', False)
-        ####
-
-    def _compute_sampling_threshold(self, batches_seen):
-        return self.cl_decay_steps / (
-                self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps))
-
-    def encoder(self, graph):
-        encoder_hidden_state = None
-        h = graph.ndata['h']
-        for t in range(self.seq_len):
-            _, encoder_hidden_state = self.encoder_model(graph, h[:,t], encoder_hidden_state)
-
-        return encoder_hidden_state
-
-    def decoder(self, graph, encoder_hidden_state, labels=None):
-        decoder_hidden_state = encoder_hidden_state
-        decoder_input = encoder_hidden_state.new_zeros((graph.num_nodes(), 1))
-
-        outputs = []
-
-        for t in range(self.horizon):
-            decoder_output, decoder_hidden_state = self.decoder_model(graph, decoder_input, decoder_hidden_state)
-            decoder_output = self.projection_layer(decoder_output)
-            decoder_input = decoder_output
-            outputs.append(decoder_output)
-            if self.training and self.use_curriculum_learning:
-                c = np.random.uniform(0, 1)
-                if c < self._compute_sampling_threshold(self.batches_seen):
-                    decoder_input = labels[:,t].view(-1,1)
-        outputs = torch.stack(outputs, dim=1)
-        return outputs
-
-    def forward(self, batch):
-        if self.use_embedding:
-            # New embedding
-            _batch = {
-                    k:v[:, :self.seq_len] 
-                    if v is not None and v.numel() else None 
-                    for k,v in batch.ndata.items() 
-                    if 'ID' not in k and 'id' not in k
-                    }
-            emb = self.embedding(_batch)
-            emb = [e.view(*e.shape[:-2], -1) for e in emb if e is not None]
-            emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], self.seq_len, *emb[0].shape[1:])
-            if not self.include_static_data:
-                emb = emb[1:]
-            batch.ndata['h'] = torch.cat(emb, dim=-1)
-            ####
-        else:
-            t = batch.ndata['k_cont'][:, :self.seq_len, 2:]
-            t = torch.einsum('btk,k->bt', t, t.new([1, 0.16]))
-            batch.ndata['h'] = torch.cat([batch.ndata['target'][:, :self.seq_len], t.unsqueeze(-1)], dim=-1)
-
-        if self.training:
-            labels = batch.ndata['target'][:, self.seq_len:].view(-1, self.num_nodes, self.horizon).transpose(1,2)
-        else:
-            labels = None
-
-        encoder_hidden_state = self.encoder(batch)
-        outputs = self.decoder(batch, encoder_hidden_state, labels)
-        self.batches_seen += 1
-        return outputs
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py
deleted file mode 100644
index d46846b97..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-from .tft_pyt.modeling import LazyEmbedding
-
-class DeepAR(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-
-        self.config = config
-        self.encoder_length = config.encoder_length
-        self.register_buffer('quantiles', torch.FloatTensor(config.quantiles), persistent=False)
-        self.use_embedding = self.config.use_embedding
-
-        if self.config.use_embedding:
-            ### New Embedding
-            # DeepAR can't currenty work with observed data
-            config.num_historic_vars -= len(config.temporal_observed_categorical_inp_lens)
-            config.num_historic_vars -= config.temporal_observed_continuous_inp_size
-            config.temporal_observed_categorical_inp_lens = []
-            config.temporal_observed_continuous_inp_size = 0
-            _config = config.copy()
-            _config.hidden_size = self.config.embedding_dim
-            self.embedding_v2 = LazyEmbedding(_config)
-            inp_size = (config.num_static_vars + config.num_historic_vars) * config.embedding_dim
-        else:
-            self.embedding = nn.ModuleList([
-                nn.Embedding(n, config.embedding_dim) 
-                for n in config.static_categorical_inp_lens + config.temporal_known_categorical_inp_lens
-                ])
-
-            inp_size = config.temporal_known_continuous_inp_size + len(self.embedding) * config.embedding_dim + 1 # +1 for target
-
-        self.lstm = nn.LSTM(input_size=inp_size,
-                            hidden_size=config.hidden_size,
-                            num_layers=config.num_layers,
-                            bias=True,
-                            batch_first=True,
-                            dropout=config.dropout)
-        for names in self.lstm._all_weights:
-            for name in filter(lambda n: "bias" in n, names):
-                bias = getattr(self.lstm, name)
-                n = bias.size(0)
-                start, end = n // 4, n // 2
-                bias.data[start:end].fill_(1.)
-
-        self.relu = nn.ReLU()
-        self.distribution_mu = nn.Linear(config.hidden_size * config.num_layers, 1)
-        self.distribution_presigma = nn.Linear(config.hidden_size * config.num_layers, 1)
-        self.distribution_sigma = nn.Softplus()
-
-    def _roll_data(x):
-        if x is None:
-            return None
-        x = torch.roll(x, 1, 1)
-        x[:,0] = 0
-        return x
-    def forward(self, batch):
-        if self.use_embedding:
-            return self._forward_v2(batch)
-        else:
-            return self._forward_v1(batch)
-
-
-    def _forward_v2(self, batch):
-        batch = batch.copy() # shallow copy to replace observables in this scope
-        batch['target'] = DeepAR._roll_data(batch['target'])
-        batch['weight'] = DeepAR._roll_data(batch['weight'])
-        batch['o_cat'] = None
-        batch['o_cont'] = None
-
-        emb = self.embedding_v2(batch)
-        emb = [x for x in emb if x is not None]
-        emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], emb[1].shape[1], *emb[0].shape[1:])
-        emb = torch.cat(emb, axis=-2)
-        emb = emb.view(*emb.shape[:-2], -1)
-
-        state = None
-        mus = []
-        sigs = []
-        for t in range(emb.shape[1]):
-            zero_index = (batch['target'][:, t, 0] == 0)
-            if t > 0 and torch.sum(zero_index) > 0:
-                _x = torch.matmul(mu[zero_index].unsqueeze(-1), self.embedding_v2.t_tgt_embedding_vectors)
-                _x = _x + self.embedding_v2.t_tgt_embedding_bias
-                emb[zero_index, t, -self.config.embedding_dim:] = _x  # target embedding is the last to be concatenated
-            mu, sigma, state = self._forward_ar(emb[:,t].unsqueeze(1), state)
-
-            mus.append(mu)
-            sigs.append(sigma)
-
-        mus = torch.stack(mus, dim=1)
-        sigs = torch.stack(sigs, dim=1)
-
-        return torch.stack((mus, sigs), dim=-1)
-
-    def _forward_v1(self, batch):
-        cat = torch.cat([batch['s_cat'], batch['k_cat']], dim=-1).permute(2,0,1)
-        emb = torch.cat([e(t) for e, t in zip(self.embedding, cat)], dim=-1)
-        target = torch.roll(batch['target'], 1, 1)
-        target[:, 0] = 0
-        x = torch.cat((target, batch['k_cont'], emb), dim=-1)
-
-        state = None
-        mus = []
-        sigs = []
-        for t in range(x.shape[1]):
-            zero_index = (x[:, t, 0] == 0)
-            if t > 0 and torch.sum(zero_index) > 0:
-                x[zero_index, t, 0] = mu[zero_index]
-
-            mu, sigma, state = self._forward_ar(x[:, t].unsqueeze(1), state)
-
-            mus.append(mu)
-            sigs.append(sigma)
-
-        mus = torch.stack(mus, dim=1)
-        sigs = torch.stack(sigs, dim=1)
-
-        return torch.stack((mus, sigs), dim=-1)
-
-    def _forward_ar(self, x, state):
-        output, state = self.lstm(x, state)
-        hidden = state[0]
-        hidden_permute = hidden.permute(1, 2, 0).contiguous().view(hidden.shape[1], -1)
-        pre_sigma = self.distribution_presigma(hidden_permute)
-        mu = self.distribution_mu(hidden_permute)
-        sigma = self.distribution_sigma(pre_sigma)  # softplus to make sure standard deviation is positive
-        return torch.squeeze(mu), torch.squeeze(sigma), state
-
-    def predict(self, batch):
-        preds = self.forward(batch)
-        preds = preds[:, self.encoder_length:, :]
-        preds = torch.stack([preds[..., 0] + preds[..., 1] * torch.erfinv(2 * q - 1) * 1.4142135623730951 for q in self.quantiles], dim=-1)
-        return preds
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py
deleted file mode 100644
index 0ef0ed4e4..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/deepar_v2.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-'''Defines the neural network, loss function and metrics'''
-
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Variable
-from .tft_pyt.modeling import LazyEmbedding
-import torch
-from torch import nn
-
-class AutoregressiveLSTM(nn.Module):
-    def __init__(self, input_size, hidden_size, embed_size, num_layers, dropout, tgt_embed):
-        super(AutoregressiveLSTM, self).__init__()
-        self.hidden_size = hidden_size
-        self.embed_size = embed_size
-        self.num_layers = num_layers
-        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
-        self.tgt_embed = tgt_embed
-
-        # This is a modification to the more general algorithm implemented here that uses all the layer's hidden states to make a final prediction
-        # This is not what is described in the paper but is what reference implementation did
-        # In this particular case it is used for expected value (mu) estimation
-        self.mu_proj = nn.Linear(hidden_size * num_layers, 1)
-        self.sig_proj = nn.Sequential(
-            nn.Linear(hidden_size * num_layers, 1),
-            nn.Softplus()
-        )
-
-        
-    def forward(self, inputs, embedded_labels, hidden=None, mask=None):
-        # Inputs should be all covariate embeddings and embedded labels should be target emdeddings
-
-        mus = []
-        sigs = []
-        for i in range(inputs.shape[1]):
-            input = inputs[:,i]
-            if embedded_labels is None or mask is None:
-                mu_embed = self.tgt_embed(mu)
-                input = torch.cat((input, mu_embed), dim=-1)
-            elif i and mask[:,i].any():
-                mu_embed = self.tgt_embed(mu)
-                input = torch.cat((input, torch.where(mask[:, i], mu_embed, embedded_labels[:, i])), dim=-1)
-            else:
-                input = torch.cat((input, embedded_labels[:, i]), dim=-1)
-
-            _, hidden = self.lstm(input.unsqueeze(1), hidden)
-            hidden_permute = hidden[0].permute(1, 2, 0).contiguous().view(hidden[0].shape[1], -1)
-            mu = self.mu_proj(hidden_permute)
-
-            sig = self.sig_proj(hidden_permute)
-
-            mus.append(mu)
-            sigs.append(sig)
-
-        mus = torch.cat(mus, dim=1)
-        sigs = torch.cat(sigs, dim=1)
-        return mus, sigs, hidden
-
-
-class DeepAR(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-
-        self.config = config
-        self.encoder_length = config.encoder_length
-        self.example_length = config.example_length
-        self.register_buffer('quantiles', torch.FloatTensor(config.quantiles), persistent=False)
-        self.use_embedding = self.config.use_embedding
-        self.drop_variance = self.config.get('drop_variance', False)
-
-        _config = config.copy()
-        _config.hidden_size = self.config.embedding_dim
-        _config.num_historic_vars -= len(config.temporal_observed_categorical_inp_lens)
-        _config.num_historic_vars -= config.temporal_observed_continuous_inp_size
-        _config.temporal_observed_categorical_inp_lens = []
-        _config.temporal_observed_continuous_inp_size = 0
-
-        self.embedding_v2 = LazyEmbedding(_config)
-        tgt_embed = lambda x: torch.matmul(x, self.embedding_v2.t_tgt_embedding_vectors) + self.embedding_v2.t_tgt_embedding_bias
-
-        inp_size = (config.num_static_vars + config.num_future_vars + config.temporal_target_size) * config.embedding_dim
-
-        self.encoder = AutoregressiveLSTM(input_size=inp_size,
-                                            hidden_size=config.hidden_size, 
-                                            embed_size=config.embedding_dim, 
-                                            num_layers=config.num_layers,
-                                            dropout=config.dropout,
-                                            tgt_embed=tgt_embed)
-
-    def _roll_data(x):
-        if x is None:
-            return None
-        x = torch.roll(x, 1, 1)
-        x[:,0] = 0
-        return x
-
-
-    def forward(self, batch, predict=False):
-        batch = batch.copy() # shallow copy to replace observables in this scope
-        batch['target'] = DeepAR._roll_data(batch['target'])
-        batch['weight'] = DeepAR._roll_data(batch['weight'])
-        batch['o_cat'] = None
-        batch['o_cont'] = None
-
-        s_emb, k_emb, _, tgt_emb = self.embedding_v2(batch)
-        s_emb = s_emb.unsqueeze(1).expand(s_emb.shape[0], tgt_emb.shape[1], *s_emb.shape[1:])
-
-        feat  = torch.cat((s_emb, k_emb) , axis=-2)
-        feat = feat.view(*feat.shape[:-2], -1)
-        tgt_emb = tgt_emb.view(*tgt_emb.shape[:-2], -1)
-
-        if batch['weight'] is not None:
-            mask = batch['weight'] == 0
-        else:
-            mask = batch['target'] == 0
-        
-        if predict:
-            mask[:, self.encoder_length:] = True
-
-        mus, sigs, _ = self.encoder(feat, embedded_labels=tgt_emb, mask=mask)
-        
-        if self.drop_variance:
-            return mus.unsqueeze(-1)
-        return torch.stack((mus, sigs), dim=-1)
-
-    def predict(self, batch):
-        preds = self.forward(batch, predict=True)
-        preds = preds[:,self.encoder_length:, :]
-        if self.drop_variance:
-            return preds
-        preds = torch.stack([preds[...,0] + preds[...,1] * torch.erfinv(2 * q - 1) * 1.4142135623730951 for q in self.quantiles], dim=-1)
-        return preds
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py
deleted file mode 100644
index 227368245..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/ensembling.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-import hydra
-from omegaconf import OmegaConf
-import torch
-from torch import nn, Tensor
-from typing import Dict
-from training.utils import to_device
-
-
-class ModelEnsemble(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.config = config
-        self.reduction_stategy = config.get('reduction_strategy', 'mean')
-        self.model_weights = []
-        self.model_list = []
-        for model_info in self.config.model_list:
-            self.model_weights.append(model_info.get('weight', 1.0))
-
-            model_dir = model_info.dir
-            with open(os.path.join(model_dir, '.hydra/config.yaml'), 'rb') as f:
-                cfg = OmegaConf.load(f)
-            model: nn.Module = hydra.utils.instantiate(cfg.model)
-            if not(cfg.dataset.config.get('xgb', False) or cfg.dataset.config.get('stat', False)):
-                # reduce gpu memory usage
-                state_dict = torch.load(os.path.join(model_dir, model_info.get('checkpoint', 'best_checkpoint.zip')), map_location='cpu')['model_state_dict']
-                model.load_state_dict(state_dict)
-            else:
-                raise ValueError('XGB and stat models are currently not supported by ensembling.')
-            self.model_list.append(model)
-
-        self.num_devices = min(torch.cuda.device_count(), len(self.model_list))
-        model_splits = [self.model_list[i::self.num_devices] for i in range(self.num_devices)]
-        model_splits = [nn.ModuleList(x).to(f'cuda:{i}') for i, x in enumerate(model_splits)]
-        self.model_splits = nn.ModuleList(model_splits)
-
-        self.model_weights = [x for y in [self.model_weights[i::self.num_devices] for i in range(self.num_devices)] for x in y]
-
-    @staticmethod
-    def _reduce_preds(preds, weights, reduction_stategy):
-        if reduction_stategy not in ['mean', 'sum']:
-            raise ValueError(f'Unknown reduction strategy: {reduction_stategy}')
-
-        result = sum(p * w for p, w in zip(preds, weights))
-        if reduction_stategy == 'mean':
-            result /= sum(weights)
-
-        return result
-
-    @staticmethod
-    def _replicate(batch, n):
-        return [to_device(batch, i) for i in range(n)]
-
-    @torch.no_grad()
-    def forward(self, x: Dict[str, Tensor]) -> Tensor:
-        _x = self._replicate(x, self.num_devices)
-        preds = [[] for _ in range(self.num_devices)]
-
-        for i, (data, split) in enumerate(zip(_x, self.model_splits)):
-            for model in split:
-                test_method_name = 'predict' if hasattr(model, 'predict') else '__call__'
-                test_method = getattr(model, test_method_name)
-                pred = test_method(data)
-                # Move all preds to cpu. Probably it will have a terrible performance, but we have tons of memory there
-                preds[i].append(pred.cpu())
-
-        preds = [x for y in preds for x in y]
-        preds = self._reduce_preds(preds, self.model_weights, self.reduction_stategy)
-
-        return preds
-
-class XGBEnsemble:
-    def __init__(self, config):
-        super().__init__()
-        self.config = config
-        self.reduction_stategy = config.get('reduction_strategy', 'mean')
-        self.model_weights = []
-        self.model_list = []
-        for model_info in self.config.model_list:
-            self.model_weights.append(model_info.get('weight', 1.0))
-
-            model_dir = model_info.dir
-            with open(os.path.join(model_dir, '.hydra/config.yaml'), 'rb') as f:
-                cfg = OmegaConf.load(f)
-            model: nn.Module = hydra.utils.instantiate(cfg.model)
-            model.load(model_dir)
-            self.model_list.append(model)
-
-    @staticmethod
-    def _reduce_preds(preds, weights, reduction_stategy):
-        if reduction_stategy not in ['mean', 'sum']:
-            raise ValueError(f'Unknown reduction strategy: {reduction_stategy}')
-
-        result = sum(p * w for p, w in zip(preds, weights))
-        if reduction_stategy == 'mean':
-            result /= sum(weights)
-
-        return result
-
-    def predict(self, x, i):
-        preds = []
-
-        for model in self.model_list:
-            pred = model.predict(x, i)
-            preds.append(pred)
-
-        preds = self._reduce_preds(preds, self.model_weights, self.reduction_stategy)
-
-        return preds
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py
deleted file mode 100644
index a8ee7aaf5..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/gnn.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-from typing import Optional, Tuple, Dict
-from torch import Tensor
-import dgl
-from dgl.nn.pytorch.conv import GraphConv
-
-import networkx as nx
-import numpy as np
-from copy import copy
-
-def list_contract_nodes_(graph, l_nodes):
-    """
-    l_nodes: List[List[Int]]: nodes to merge
-    Returns node mapping
-    """
-    pooled_feat = []
-    _nodes_flat = [x for y in l_nodes for x in y]
-
-    _unmerged_nodes = list(range(graph.num_nodes()))
-    for n in _nodes_flat:
-        _unmerged_nodes.remove(n)
-
-    node_mapping = {i:[n] for i,n in enumerate(_unmerged_nodes)}
-    num_nodes = graph.num_nodes()
-    i = 0
-    while l_nodes:
-        nodes = l_nodes.pop()
-        # Add features
-        ndata = {k:v[nodes].mean() for k, v in graph.ndata.items()}
-        pooled_feat.append({k: v[nodes].mean(dim=0) for k,v in graph.ndata.items()})
-        # Add edges
-        predecessors = torch.cat([graph.predecessors(n) for n in nodes])
-        successors = torch.cat([graph.successors(n) for n in nodes])
-        nidx = graph.num_nodes()
-        graph.add_edges(torch.full_like(predecessors, nidx), predecessors)
-        graph.add_edges(torch.full_like(successors, nidx), successors)
-        # Add key to super node mapping
-        node_mapping[num_nodes - len(_nodes_flat) + i] = nodes
-        i += 1
-
-    graph.remove_nodes(_nodes_flat)
-
-    # Insert pooled features
-    pooled_feat = {k: torch.stack([d[k] for d in pooled_feat], dim=0) for k in graph.ndata.keys()}
-    for k, v in pooled_feat.items():
-        graph.ndata[k][-v.shape[0]:] = v
-
-    return graph, node_mapping
-
-def coarsen(graph):
-    g_nx = graph.cpu().to_networkx().to_undirected()
-    g_nx = nx.Graph(g_nx)
-    matching = nx.algorithms.matching.max_weight_matching(g_nx)
-    matching = [list(x) for x in matching]
-    g, s_node_map = list_contract_nodes_(graph, matching)
-    return g, s_node_map
-
-class SpatialPooling(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.s_node_map = None
-        self.cached_graph = None
-        self.ukey = f'feat_{id(self)}'
-
-    def forward(self, graph, feat):
-        self.cached_graph = graph
-        _graph = copy(graph)
-        _graph.ndata[self.ukey] = feat
-        g, s_node_map = coarsen(_graph)
-        self.s_node_map = s_node_map
-        return g, g.ndata[self.ukey]
-
-    def unpool(self, feat):
-        """ Unpools by copying values"""
-        _feat = []
-        for k,v in self.s_node_map.items():
-            for node in v:
-                _feat.append((node, feat[k]))
-        u_feat = torch.stack([t[1] for t in sorted(_feat, key=lambda x: x[0])])
-        return self.cached_graph, u_feat
-
-class TFTEmbedding(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.s_cat_inp_lens    = config.static_categorical_inp_lens
-        self.t_cat_k_inp_lens  = config.temporal_known_categorical_inp_lens
-        self.t_cat_o_inp_lens  = config.temporal_observed_categorical_inp_lens
-        self.s_cont_inp_size   = config.static_continuous_inp_size
-        self.t_cont_k_inp_size = config.temporal_known_continuous_inp_size
-        self.t_cont_o_inp_size = config.temporal_observed_continuous_inp_size
-        self.t_tgt_size        = config.temporal_target_size
-
-        self.hidden_size = config.hidden_size
-
-        # There are 7 types of input:
-        # 1. Static categorical
-        # 2. Static continuous
-        # 3. Temporal known a priori categorical
-        # 4. Temporal known a priori continuous
-        # 5. Temporal observed categorical
-        # 6. Temporal observed continuous
-        # 7. Temporal observed targets (time series obseved so far)
-
-        self.s_cat_embed = nn.ModuleList([
-            nn.Embedding(n, self.hidden_size) for n in self.s_cat_inp_lens]) if self.s_cat_inp_lens else None
-        self.t_cat_k_embed = nn.ModuleList([
-            nn.Embedding(n, self.hidden_size) for n in self.t_cat_k_inp_lens]) if self.t_cat_k_inp_lens else None
-        self.t_cat_o_embed = nn.ModuleList([
-            nn.Embedding(n, self.hidden_size) for n in self.t_cat_o_inp_lens]) if self.t_cat_o_inp_lens else None
-
-        self.s_cont_embedding_vectors = nn.Parameter(torch.Tensor(self.s_cont_inp_size, self.hidden_size)) if self.s_cont_inp_size else None
-        self.t_cont_k_embedding_vectors = nn.Parameter(torch.Tensor(self.t_cont_k_inp_size, self.hidden_size)) if self.t_cont_k_inp_size else None
-        self.t_cont_o_embedding_vectors = nn.Parameter(torch.Tensor(self.t_cont_o_inp_size, self.hidden_size)) if self.t_cont_o_inp_size else None
-        self.t_tgt_embedding_vectors = nn.Parameter(torch.Tensor(self.t_tgt_size, self.hidden_size))
-
-        self.s_cont_embedding_bias = nn.Parameter(torch.zeros(self.s_cont_inp_size, self.hidden_size)) if self.s_cont_inp_size else None
-        self.t_cont_k_embedding_bias = nn.Parameter(torch.zeros(self.t_cont_k_inp_size, self.hidden_size)) if self.t_cont_k_inp_size else None
-        self.t_cont_o_embedding_bias = nn.Parameter(torch.zeros(self.t_cont_o_inp_size, self.hidden_size)) if self.t_cont_o_inp_size else None
-        self.t_tgt_embedding_bias = nn.Parameter(torch.zeros(self.t_tgt_size, self.hidden_size))
-
-        if self.s_cont_embedding_vectors is not None:
-            torch.nn.init.xavier_normal_(self.s_cont_embedding_vectors)
-        if self.t_cont_k_embedding_vectors is not None:
-            torch.nn.init.xavier_normal_(self.t_cont_k_embedding_vectors)
-        if self.t_cont_o_embedding_vectors is not None:
-            torch.nn.init.xavier_normal_(self.t_cont_o_embedding_vectors)
-        torch.nn.init.xavier_normal_(self.t_tgt_embedding_vectors)
-
-    def _apply_embedding(self,
-            cat: Optional[Tensor],
-            cont: Optional[Tensor],
-            cat_emb: Optional[nn.ModuleList], 
-            cont_emb: Tensor,
-            cont_bias: Tensor,
-            ) -> Tuple[Optional[Tensor], Optional[Tensor]]:
-        e_cat = torch.stack([embed(cat[...,i]) for i, embed in enumerate(cat_emb)], dim=-2) if cat is not None else None
-        if cont is not None:
-            #the line below is equivalent to following einsums
-            #e_cont = torch.einsum('btf,fh->bthf', cont, cont_emb)
-            #e_cont = torch.einsum('bf,fh->bhf', cont, cont_emb)
-            e_cont = torch.mul(cont.unsqueeze(-1), cont_emb)
-            e_cont = e_cont + cont_bias
-        else:
-            e_cont = None
-
-        if e_cat is not None and e_cont is not None:
-            return torch.cat([e_cat, e_cont], dim=-2)
-        elif e_cat is not None:
-            return e_cat
-        elif e_cont is not None:
-            return e_cont
-        else:
-            return None
-
-    def forward(self, x: Dict[str, Tensor]):
-        # temporal/static categorical/continuous known/observed input 
-        x = {k:v for k,v in x.items() if v.numel()}
-        s_cat_inp = x.get('s_cat', None)
-        s_cont_inp = x.get('s_cont', None)
-        t_cat_k_inp = x.get('k_cat', None)
-        t_cont_k_inp = x.get('k_cont', None)
-        t_cat_o_inp = x.get('o_cat', None)
-        t_cont_o_inp = x.get('o_cont', None)
-        t_tgt_obs = x['target'] # Has to be present
-
-        # Static inputs are expected to be equal for all timesteps
-        # For memory efficiency there is no assert statement
-        s_cat_inp = s_cat_inp[:,0,:] if s_cat_inp is not None else None
-        s_cont_inp = s_cont_inp[:,0,:] if s_cont_inp is not None else None
-
-        s_inp = self._apply_embedding(s_cat_inp,
-                                      s_cont_inp,
-                                      self.s_cat_embed,
-                                      self.s_cont_embedding_vectors,
-                                      self.s_cont_embedding_bias)
-        t_known_inp = self._apply_embedding(t_cat_k_inp,
-                                            t_cont_k_inp,
-                                            self.t_cat_k_embed,
-                                            self.t_cont_k_embedding_vectors,
-                                            self.t_cont_k_embedding_bias)
-        t_observed_inp = self._apply_embedding(t_cat_o_inp,
-                                               t_cont_o_inp,
-                                               self.t_cat_o_embed,
-                                               self.t_cont_o_embedding_vectors,
-                                               self.t_cont_o_embedding_bias)
-
-        # Temporal observed targets
-        # t_observed_tgt = torch.einsum('btf,fh->btfh', t_tgt_obs, self.t_tgt_embedding_vectors)
-        t_observed_tgt = torch.matmul(t_tgt_obs.unsqueeze(3).unsqueeze(4), self.t_tgt_embedding_vectors.unsqueeze(1)).squeeze(3)
-        t_observed_tgt = t_observed_tgt + self.t_tgt_embedding_bias
-
-        return s_inp, t_known_inp, t_observed_inp, t_observed_tgt
-
-class GCGRUCell(nn.Module):
-    def __init__(self, input_size, hidden_size):
-        super().__init__()
-        self.conv_i = GraphConv(input_size, 3 * hidden_size) #According to https://arxiv.org/pdf/1903.05631.pdf
-        self.conv_h = GraphConv(hidden_size, 3 * hidden_size) # this should be ChebConv
-        self.hidden_size = hidden_size
-        self.state = None
-    def forward(self, graph, feat, hx):
-        i = self.conv_i(graph, feat)
-        h = self.conv_h(graph, hx)
-        i_r, i_z, i_n = torch.chunk(i, 3, dim=-1)
-        h_r, h_z, h_n = torch.chunk(h, 3, dim=-1)
-        r = torch.sigmoid(i_r + h_r)
-        z = torch.sigmoid(i_z + h_z)
-        n = torch.tanh(i_n + r * h_n)
-        h = (1-z) * n + z * hx
-        
-        return h
-    
-class GCGRU(nn.Module):
-    def __init__(self, input_size, hidden_size, num_layers):
-        super().__init__()
-        self.input_size = input_size
-        self.hidden_size = hidden_size
-        self.num_layers = num_layers
-        cells = [GCGRUCell(input_size, hidden_size)]
-        cells += [GCGRUCell(hidden_size, hidden_size) for _ in range(num_layers - 1)]
-        self.cells = nn.ModuleList(cells)
-        
-    def forward(self, graph, input, hx=None):
-        if hx is None:
-            hx = [torch.zeros(graph.num_nodes(), self.hidden_size,
-                    dtype=input.dtype, device=input.device)] * self.num_layers
-        
- 
-        out = []
-        states = []
-        intermediate = [input[:,t,...] for t in range(input.shape[1])]
-
-        for i, cell in enumerate(self.cells):
-            inner_out = []
-            h = hx[i]
-            for x in intermediate:
-                h = cell(graph, x, h)
-                inner_out.append(h)
-            out.append(inner_out)
-            intermediate = inner_out
-
-        output = torch.stack(out[-1], dim=1)
-
-        return output, out[-1]
-                
-
-class ToyModel(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.encoder_steps = config.encoder_length
-        self.num_future_vars = config.num_future_vars
-        self.num_historic_vars = config.num_historic_vars
-        self.num_static_vars = config.num_static_vars
-        self.hidden_size = config.hidden_size
-        self.embedding = TFTEmbedding(config)
-
-        self.static_proj = nn.Linear(config.hidden_size * self.num_static_vars, config.num_layers * config.hidden_size)
-        self.history_recurrent = GCGRU(config.hidden_size, config.hidden_size, config.num_layers)
-        self.future_recurrent = GCGRU(config.hidden_size, config.hidden_size, config.num_layers)
-        self.history_down_proj = nn.Linear(self.num_historic_vars * config.hidden_size, config.hidden_size)
-        self.future_down_proj = nn.Linear(self.num_future_vars * config.hidden_size, config.hidden_size)
-        self.out_proj = nn.Linear(config.hidden_size, 1)
-
-    def forward(self, graph):
-        s_inp, t_known_inp, t_observed_inp, t_observed_tgt = self.embedding(graph.ndata)
-        s_inp = s_inp.view(s_inp.shape[0], -1)
-        init_state = self.static_proj(s_inp)
-        init_state = init_state.view(init_state.shape[0], -1, self.hidden_size).transpose(0,1)
-
-        feat = torch.cat([t_known_inp, t_observed_inp, t_observed_tgt], dim=2)
-        historic_feat = feat[:,:self.encoder_steps,:]
-        historic_feat = historic_feat.view(historic_feat.shape[0], historic_feat.shape[1], -1)
-        historic_feat = self.history_down_proj(historic_feat)
-        history, state = self.history_recurrent(graph, historic_feat, hx=init_state)
-        
-        future_feat = t_known_inp[:,self.encoder_steps:, :]
-        future_feat = future_feat.view(future_feat.shape[0], future_feat.shape[1], -1)
-        future_feat = self.future_down_proj(future_feat)
-        future, _ = self.future_recurrent(graph, future_feat, hx=state)
-        out = self.out_proj(future)
-
-        return out
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py
deleted file mode 100644
index b155fcf5d..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/interpretability.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from abc import ABCMeta, abstractmethod
-
-
-class InterpretableModelBase(object, metaclass=ABCMeta):
-    def __init__(self, *args, **kwargs):
-        self.interpretable = True
-        self.activations = {}
-
-    @abstractmethod
-    def _register_interpretable_hooks(self):
-        return
-
-    def enable_activations_dump(self):
-        self._register_interpretable_hooks()
-
-    @abstractmethod
-    def get_activations(self, sample_number, *args, **kwargs):
-        return
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py
deleted file mode 100755
index c843ba056..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/lstm.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#           http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-from torch import Tensor
-
-from models.tft_pyt.modeling import *
-
-
-class LSTM(nn.Module):
-    """
-    Implementation from LSTM portion of https://arxiv.org/abs/1912.09363
-    """
-
-    def __init__(self, config):
-        super().__init__()
-
-        self.encoder_steps = config.encoder_length  # this determines from how distant past we want to use data from
-
-        self.mask_nans = config.missing_data_strategy == "mask"
-
-        self.embedding = TFTEmbedding(config)
-        self.static_encoder = StaticCovariateEncoder(config)
-
-        self.history_vsn = VariableSelectionNetwork(config, config.num_historic_vars)
-        self.history_encoder = nn.LSTM(config.hidden_size, config.hidden_size, batch_first=True)
-        self.future_vsn = VariableSelectionNetwork(config, config.num_future_vars)
-        self.future_encoder = nn.LSTM(config.hidden_size, config.hidden_size, batch_first=True)
-
-        self.output_proj = nn.Linear(config.hidden_size, 1)
-
-    def forward(self, x: Tensor) -> Tensor:
-        s_inp, t_known_inp, t_observed_inp, t_observed_tgt = self.embedding(x)
-
-        # Static context
-        cs, ce, ch, cc = self.static_encoder(s_inp)
-        ch, cc = ch.unsqueeze(0), cc.unsqueeze(0)  # lstm initial states
-
-        # Temporal input
-        _historical_inputs = [t_known_inp[:, : self.encoder_steps, :], t_observed_tgt[:, : self.encoder_steps, :]]
-        if t_observed_inp is not None:
-            _historical_inputs.insert(0, t_observed_inp[:, : self.encoder_steps, :])
-
-        historical_inputs = torch.cat(_historical_inputs, dim=-2)
-        future_inputs = t_known_inp[:, self.encoder_steps:]
-
-        # Encoders
-        historical_features, _ = self.history_vsn(historical_inputs, cs)
-        history, state = self.history_encoder(historical_features, (ch, cc))
-        future_features, _ = self.future_vsn(future_inputs, cs)
-        future, _ = self.future_encoder(future_features, state)
-
-        output = self.output_proj(future)
-        return output
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py
deleted file mode 100644
index 1c810317e..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/mtgnn.py
+++ /dev/null
@@ -1,338 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numbers
-import pickle
-
-from .tft_pyt.modeling import LazyEmbedding
-
-# This is copied from torch source and adjusted to take in indices of nodes as well
-class LayerNorm(nn.Module):
-    __constants__ = ['normalized_shape', 'weight', 'bias', 'eps', 'elementwise_affine']
-    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
-        super(LayerNorm, self).__init__()
-        if isinstance(normalized_shape, numbers.Integral):
-            normalized_shape = (normalized_shape,)
-        self.normalized_shape = tuple(normalized_shape)
-        self.eps = eps
-        self.elementwise_affine = elementwise_affine
-        if self.elementwise_affine:
-            self.weight = nn.Parameter(torch.Tensor(*normalized_shape))
-            self.bias = nn.Parameter(torch.Tensor(*normalized_shape))
-        else:
-            self.register_parameter('weight', None)
-            self.register_parameter('bias', None)
-        self.reset_parameters()
-
-    def reset_parameters(self):
-        if self.elementwise_affine:
-            nn.init.ones_(self.weight)
-            nn.init.zeros_(self.bias)
-
-    def forward(self, input, idx):
-        if self.elementwise_affine:
-            return F.layer_norm(input, tuple(input.shape[1:]), self.weight[:,idx,:], self.bias[:,idx,:], self.eps)
-        else:
-            return F.layer_norm(input, tuple(input.shape[1:]), self.weight, self.bias, self.eps)
-
-    def extra_repr(self):
-        return '{normalized_shape}, eps={eps}, ' \
-            'elementwise_affine={elementwise_affine}'.format(**self.__dict__)
-
-
-class GraphConstructor(nn.Module):
-
-    def __init__(self, nnodes, k, dim, alpha=3, static_feat=None):
-        super().__init__()
-        self.nnodes = nnodes
-        if static_feat is not None:
-            xd = static_feat.shape[1]
-            self.lin1 = nn.Linear(xd, dim)
-            self.lin2 = nn.Linear(xd, dim)
-        else:
-            self.emb1 = nn.Embedding(nnodes, dim)
-            self.emb2 = nn.Embedding(nnodes, dim)
-            self.lin1 = nn.Linear(dim, dim)
-            self.lin2 = nn.Linear(dim, dim)
-
-        self.k = k
-        self.dim = dim
-        self.alpha = alpha
-        self.static_feat = static_feat
-
-    def forward(self, idx):
-        if self.static_feat is None:
-            nodevec1 = self.emb1(idx)
-            nodevec2 = self.emb2(idx)
-        else:
-            nodevec1 = self.static_feat[idx, :]
-            nodevec2 = nodevec1
-
-        nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
-        nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2))
-
-        #a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0))
-        # This comes from (AB^T)^T = BA^T
-        m = torch.mm(nodevec1, nodevec2.transpose(1, 0))
-        a = m - m.transpose(1,0)
-        #####
-        adj = F.relu(torch.tanh(self.alpha*a))
-        mask = adj.new_zeros((idx.size(0), idx.size(0)))
-        s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1)
-        mask.scatter_(1, t1, 1)
-        adj = adj*mask
-        return adj
-
-class MixProp(nn.Module):
-    def __init__(self, c_in, c_out, gdep, alpha):
-        super().__init__()
-        self.linear = torch.nn.Conv2d((gdep+1)*c_in, c_out, kernel_size=(1, 1))
-        self.gdep = gdep
-        self.alpha = alpha
-
-    def forward(self, x, adj):
-        adj = adj + torch.eye(adj.size(0), device=adj.device)
-        d = adj.sum(1)
-        a = adj / d.unsqueeze(-1)
-        h = x
-        out = [h]
-        for i in range(self.gdep):
-            h = torch.einsum('ncwl,vw->ncvl', h, a)
-            h = self.alpha * x + (1 - self.alpha) * h
-            out.append(h)
-        ho = torch.cat(out, dim=1)
-        ho = self.linear(ho)
-        return ho
-
-class GCModule(nn.Module):
-
-    def __init__(self, conv_channels, residual_channels, gcn_depth, propalpha):
-        super().__init__()
-        self.gc1 = MixProp(conv_channels, residual_channels, gcn_depth, propalpha)
-        self.gc2 = MixProp(conv_channels, residual_channels, gcn_depth, propalpha)
-
-    def forward(self, x, adj):
-        x1 = self.gc1(x, adj)
-        x2 = self.gc2(x, adj.transpose(1, 0))
-        return x1 + x2
-
-class DilatedInception(nn.Module):
-    def __init__(self, cin, cout, dilation_factor=2):
-        super().__init__()
-        self.kernel_set = [2,3,6,7]
-        cout = int(cout / len(self.kernel_set))
-        self.tconv = nn.ModuleList([nn.Conv2d(cin, cout, (1, k), dilation=(1, dilation_factor)) for k in self.kernel_set])
-
-    def forward(self,input):
-        x = []
-        for conv in self.tconv:
-            x.append(conv(input))
-
-        # This truncation is described in the paper and seemingly drops some information
-        # Information drop is counteracted by padding time dimension with 0.
-        # Ex: for the largest filter of size 7 input is paddded by 7 zeros.
-        for i in range(len(self.kernel_set)):
-            x[i] = x[i][...,-x[-1].size(3):]
-        x = torch.cat(x,dim=1)
-        return x
-
-class TCModule(nn.Module):
-    def __init__(self, residual_channels, conv_channels, dilation_factor):
-        super().__init__()
-        self.filter = DilatedInception(residual_channels, conv_channels, dilation_factor)
-        self.gate = DilatedInception(residual_channels, conv_channels, dilation_factor)
-
-    def forward(self, x):
-        f = self.filter(x)
-        f = torch.tanh(f)
-        g = self.gate(x)
-        g = torch.sigmoid(g)
-        x = f * g
-        return x
-
-
-class MTGNNLayer(nn.Module):
-    def __init__(self,
-                 r_channels,
-                 c_channels,
-                 s_channels,
-                 kernel_size,
-                 dilation_factor,
-                 dropout,
-                 num_nodes,
-                 use_gcn,
-                 gcn_depth,
-                 propalpha):
-        super().__init__()
-        self.use_gcn = use_gcn
-        self.tc_module = TCModule(r_channels, c_channels, dilation_factor)
-        self.skip_conv = nn.Conv2d(in_channels=c_channels,
-                                   out_channels=s_channels,
-                                   kernel_size=(1, kernel_size))
-        self.dropout = nn.Dropout(dropout)
-        self.ln = LayerNorm((r_channels, num_nodes, kernel_size),elementwise_affine=True)
-
-        if use_gcn:
-            self.out_module = GCModule(c_channels, r_channels, gcn_depth, propalpha)
-        else:
-            self.out_module = nn.Conv2d(in_channels=c_channels,
-                                        out_channels=r_channels,
-                                        kernel_size=(1, 1))
-    def forward(self, x, idx, adp):
-        residual = x
-        x = self.tc_module(x)
-        x = self.dropout(x)
-        s = x
-        s = self.skip_conv(s)
-        if self.use_gcn:
-            x = self.out_module(x, adp)
-        else:
-            x = self.out_module(x)
-
-        x = x + residual[:, :, :, -x.size(3):]
-        x = self.ln(x,idx)
-
-        return x, s
-
-
-class MTGNN(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.config = config
-        self.use_gcn = config.use_gcn
-        self.gcn_depth = config.gcn_depth
-        self.predefined_adj = config.get('predefined_adj')
-        if self.predefined_adj is not None:
-            A = pickle.load(open(self.predefined_adj, 'rb'))
-            self.register_buffer('predefined_adj', A)
-        self.propalpha = config.propalpha
-        self.tanhalpha = config.tanhalpha
-
-        self.num_nodes = config.num_nodes
-        self.dropout = config.dropout
-        self.in_dim = config.in_dim
-        self.out_dim = config.example_length - config.encoder_length
-        self.residual_channels = config.residual_channels
-        self.conv_channels = config.conv_channels
-        self.skip_channels = config.skip_channels
-        self.end_channels = config.end_channels
-        self.subgraph_size = config.subgraph_size
-        self.node_dim = config.node_dim
-        self.dilation_exponential = config.dilation_exponential
-        self.seq_length = config.encoder_length
-        self.num_layers = config.num_layers
-        self.use_embedding = config.use_embedding
-
-        ### New embedding
-        if self.use_embedding:
-            self.config.hidden_size = self.config.in_dim
-            self.embedding = LazyEmbedding(self.config)
-
-        self.include_static_data = config.include_static_data
-        ####
-
-
-        self.layers = nn.ModuleList()
-        self.start_conv = nn.LazyConv2d(out_channels=self.residual_channels, kernel_size=(1, 1))
-        self.gc = GraphConstructor(self.num_nodes, self.subgraph_size, self.node_dim, alpha=self.tanhalpha)
-
-        kernel_size = 7
-
-        def rf_size(c,q,m):
-            assert q >= 1
-            if q > 1:
-                return int(1 + (c-1)*(q**m - 1)/(q-1))
-            return m*(c-1) + 1
-
-        self.receptive_field = rf_size(kernel_size, self.dilation_exponential, self.num_layers)
-        new_dilation = 1
-        for j in range(self.num_layers):
-            rfs = rf_size(kernel_size, self.dilation_exponential, j+1) 
-            kernel_len = max(self.seq_length, self.receptive_field) - rfs + 1
-
-            self.layers.append(MTGNNLayer(self.residual_channels, self.conv_channels, self.skip_channels,
-                                          kernel_len, new_dilation, self.dropout, self.num_nodes, self.use_gcn,
-                                          self.gcn_depth, self.propalpha
-                                          )
-                               )
-
-            new_dilation *= self.dilation_exponential
-
-        self.end_conv_1 = nn.Conv2d(in_channels=self.skip_channels,
-                                    out_channels=self.end_channels,
-                                    kernel_size=(1, 1))
-        self.end_conv_2 = nn.Conv2d(in_channels=self.end_channels,
-                                    out_channels=self.out_dim,
-                                    kernel_size=(1, 1))
-
-        if self.seq_length > self.receptive_field:
-            self.skip0 = nn.LazyConv2d(out_channels=self.skip_channels, kernel_size=(1, self.seq_length))
-            self.skipE = nn.Conv2d(in_channels=self.residual_channels,
-                                   out_channels=self.skip_channels,
-                                   kernel_size=(1, self.seq_length - self.receptive_field + 1)
-                                   )
-
-        else:
-            self.skip0 = nn.LazyConv2d(out_channels=self.skip_channels, kernel_size=(1, self.receptive_field))
-            self.skipE = nn.Conv2d(in_channels=self.residual_channels, out_channels=self.skip_channels, kernel_size=(1, 1))
-
-        idx = torch.arange(self.num_nodes)
-        self.register_buffer('idx', idx)
-
-    def forward(self, batch, idx=None):
-        if self.use_embedding:
-            batch = {k: v[:, :self.seq_length] if v is not None else None for k, v in batch.items()}
-            emb = self.embedding(batch)
-            emb = [e.view(*e.shape[:-2], -1) for e in emb if e is not None]
-            emb[0] = emb[0].unsqueeze(1).expand(emb[0].shape[0], self.seq_length, *emb[0].shape[1:])
-            if not self.include_static_data:
-                emb = emb[1:]
-            input = torch.cat(emb, dim=-1).transpose(1, 3)
-        else:
-
-            # TSPP compatibility code
-            t = batch['k_cont'][:, :self.seq_length, 0, 2:]
-            t = torch.einsum('btk,k->bt', t, t.new([1, 0.16]))
-            t = t.unsqueeze(-1).expand(*t.shape, self.num_nodes)
-            target = batch['target'][:, :self.seq_length].squeeze(-1)
-            input = torch.stack((target, t), dim=1).transpose(2, 3)
-            ####
-
-        seq_len = input.size(3)
-        assert seq_len == self.seq_length, 'input sequence length not equal to preset sequence length'
-        if idx is None:
-            idx = self.idx
-
-        if self.seq_length < self.receptive_field:
-            input = nn.functional.pad(input, (self.receptive_field - self.seq_length, 0, 0, 0))
-
-        if self.use_gcn:
-            if not self.predefined_adj:
-                adp = self.gc(idx)
-            else:
-                adp = self.predefined_adj
-
-        x = self.start_conv(input)  # 1x1 conv for upscaling. Acts like a linear procection on 1 dim
-        skip = self.skip0(F.dropout(input, self.dropout, training=self.training))
-        for layer in self.layers:
-            x, s = layer(x, idx, adp)
-            skip = skip + s
-
-        skip = self.skipE(x) + skip
-        x = F.relu(skip)
-        x = F.relu(self.end_conv_1(x))
-        x = self.end_conv_2(x)
-        return x
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py
deleted file mode 100644
index bc5365cf5..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nbeats.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import torch
-from torch import nn
-
-
-class Block(nn.Module):
-
-    def __init__(self, units, thetas_dim, backcast_length, forecast_length):
-        super(Block, self).__init__()
-        self.thetas_dim = thetas_dim
-        self.backcast_length = backcast_length
-        self.forecast_length = forecast_length
-        ff = [nn.Linear(backcast_length, units), nn.ReLU()] + [item for _ in range(3) for item in (nn.Linear(units, units), nn.ReLU())]
-        self.ff = nn.Sequential(*ff)
-
-        if self.thetas_dim: # generic block skips this stage
-            self.theta_b_fc = nn.Linear(units, thetas_dim, bias=False)
-            self.ff.add_module(str(len(self.ff)), self.theta_b_fc)
-
-
-class SeasonalityBlock(Block):
-
-    def __init__(self, units, thetas_dim, backcast_length, forecast_length):
-
-        if not thetas_dim:
-            # Auto determine according to paper: horizon/2 sines, horizon/2 cosines
-            thetas_dim = forecast_length
-
-        super(SeasonalityBlock, self).__init__(units, thetas_dim, backcast_length,
-                                               forecast_length)
-
-        def get_seasonality_basis(num_thetas, linspace):
-            p = num_thetas
-            p1, p2 = (p // 2, p // 2) if p % 2 == 0 else (p // 2, p // 2 + 1)
-            s1 = [np.cos(2 * np.pi * i * linspace) for i in range(p1)]
-            s2 = [np.sin(2 * np.pi * i * linspace) for i in range(p2)]
-            s = np.stack(s1+s2)
-            return torch.FloatTensor(s)
-
-        self.forecast_length = forecast_length
-        linspace = np.concatenate([np.arange(backcast_length) / backcast_length, np.arange(forecast_length) / forecast_length])
-        self.register_buffer('basis', get_seasonality_basis(self.thetas_dim, linspace))
-
-    def forward(self, x):
-        x = squeeze_last_dim(x)
-        x = self.ff(x)
-        x = x.mm(self.basis)
-        backcast, forecast = x[:,:-self.forecast_length], x[:,-self.forecast_length:]
-        return backcast, forecast
-
-
-class TrendBlock(Block):
-
-    def __init__(self, units, thetas_dim, backcast_length, forecast_length):
-        super(TrendBlock, self).__init__(units, thetas_dim, backcast_length,
-                                         forecast_length)
-
-        self.forecast_length = forecast_length
-        linspace = np.concatenate([np.arange(backcast_length) / backcast_length, np.arange(forecast_length) / forecast_length])
-        basis = np.stack([linspace ** i for i in range(thetas_dim)])
-        self.register_buffer('basis', torch.FloatTensor(basis))
-
-    def forward(self, x):
-        x = squeeze_last_dim(x)
-        x = self.ff(x)
-        x = x.mm(self.basis)
-        backcast, forecast = x[:, :-self.forecast_length], x[:, -self.forecast_length:]
-        return backcast, forecast
-
-
-class GenericBlock(Block):
-
-    def __init__(self, units, thetas_dim, backcast_length, forecast_length):
-
-        super(GenericBlock, self).__init__(units, None, backcast_length, forecast_length)
-
-        self.backcast_fc = nn.Linear(units, backcast_length)
-        self.forecast_fc = nn.Linear(units, forecast_length)
-
-    def forward(self, x):
-        x = squeeze_last_dim(x)
-        x = self.ff(x)
-
-        backcast = self.backcast_fc(x)
-        forecast = self.forecast_fc(x)
-
-        return backcast, forecast
-
-class NBeatsNet(nn.Module):
-    BLOCK_MAP = {'seasonality': SeasonalityBlock,
-                 'trend': TrendBlock,
-                 'generic': GenericBlock
-                 }
-
-    def __init__(self, config):
-        super(NBeatsNet, self).__init__()
-        model_config = config
-
-        self.forecast_length = config.example_length - config.encoder_length
-        self.backcast_length = config.encoder_length
-        self.stacks = nn.ModuleList([self.create_stack(c) for c in config.stacks])
-
-    def create_stack(self, stack_config):
-        blocks = nn.ModuleList()
-
-        for block_id in range(stack_config.num_blocks):
-            block_init = NBeatsNet.BLOCK_MAP[stack_config.type]
-            if stack_config.share_weights and block_id != 0:
-                block = blocks[-1]  # pick up the last one when we share weights.
-            else:
-                block = block_init(units = stack_config.hidden_size,
-                                   thetas_dim=stack_config.theta_dim,
-                                   backcast_length=self.backcast_length,
-                                   forecast_length=self.forecast_length)
-            blocks.append(block)
-        return blocks
-
-    def forward(self, batch_dict):
-        backcast = batch_dict['target'][:,:self.backcast_length,:]
-        backcast = squeeze_last_dim(backcast)
-        forecast = backcast.new_zeros(size=(backcast.size()[0], self.forecast_length,))
-        for stack in self.stacks:
-            for block in stack:
-                b, f = block(backcast)
-                backcast = backcast - b
-                forecast = forecast + f
-        forecast = forecast.unsqueeze(2)
-        return forecast
-
-
-def squeeze_last_dim(tensor):
-    if len(tensor.shape) == 3 and tensor.shape[-1] == 1:  # (128, 10, 1) => (128, 10).
-        return tensor[..., 0]
-    return tensor
-
-
-def linear_space(backcast_length, forecast_length):
-    ls = np.arange(-backcast_length, forecast_length, 1) / forecast_length
-    b_ls = ls[:backcast_length]
-    f_ls = ls[backcast_length:]
-    return b_ls, f_ls
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py
deleted file mode 100644
index 739f2cbe8..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/nhits.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Tuple
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class _IdentityBasis(nn.Module):
-    def __init__(self, backcast_size: int, forecast_size: int, interpolation_mode: str):
-        super().__init__()
-        assert (interpolation_mode in ['linear','nearest']) or ('cubic' in interpolation_mode)
-        self.forecast_size = forecast_size
-        self.backcast_size = backcast_size
-        self.interpolation_mode = interpolation_mode
-
-    def forward(self, theta: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-
-        backcast = theta[:, :self.backcast_size]
-        knots = theta[:, self.backcast_size:]
-
-        if self.interpolation_mode in ['nearest','linear']:
-            knots = knots[:,None,:]
-            forecast = F.interpolate(knots, size=self.forecast_size, mode=self.interpolation_mode)
-            forecast = forecast[:,0,:]
-        elif 'cubic' in self.interpolation_mode:
-            batch_size = len(backcast)
-            knots = knots[:,None,None,:]
-            forecast = torch.zeros((len(knots), self.forecast_size)).to(knots.device)
-            n_batches = int(np.ceil(len(knots)/batch_size))
-            for i in range(n_batches):
-                forecast_i = F.interpolate(knots[i*batch_size:(i+1)*batch_size], size=self.forecast_size, mode='bicubic')
-                forecast[i*batch_size:(i+1)*batch_size] += forecast_i[:,0,0,:]
-
-        return backcast, forecast
-
-ACTIVATIONS = ['ReLU',
-               'Softplus',
-               'Tanh',
-               'SELU',
-               'LeakyReLU',
-               'PReLU',
-               'Sigmoid']
-
-POOLING = ['MaxPool1d',
-           'AvgPool1d']
-
-class NHITSBlock(nn.Module):
-    """
-    N-HiTS block which takes a basis function as an argument.
-    """
-    def __init__(self, 
-                 input_size: int,
-                 n_theta: int, 
-                 n_mlp_layers: int,
-                 hidden_size: int,
-                 basis: nn.Module,
-                 n_pool_kernel_size: int,
-                 pooling_mode: str,
-                 dropout_prob: float,
-                 activation: str):
-        """
-        """
-        super().__init__()
-
-        n_time_in_pooled = int(np.ceil(input_size/n_pool_kernel_size))
-
-        self.dropout_prob = dropout_prob
-
-        assert activation in ACTIVATIONS, f'{activation} is not in {ACTIVATIONS}'
-        assert pooling_mode in POOLING, f'{pooling_mode} is not in {POOLING}'
-
-        activ = getattr(nn, activation)()
-
-        self.pooling_layer = getattr(nn, pooling_mode)(kernel_size=n_pool_kernel_size,
-                                                       stride=n_pool_kernel_size, ceil_mode=True)
-
-        # Block MLPs
-        mlp = [nn.Linear(n_time_in_pooled, hidden_size)]
-        mlp += [item for _ in range(n_mlp_layers) for item in (nn.Linear(hidden_size, hidden_size), activ)]
-        layers = mlp + [nn.Linear(hidden_size, n_theta)]
-
-        self.layers = nn.Sequential(*layers)
-        self.basis = basis
-
-    def forward(self, insample_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-
-        # Pooling
-        insample_y = insample_y.unsqueeze(1)
-        insample_y = self.pooling_layer(insample_y)
-        insample_y = insample_y.squeeze(1)
-
-        # Compute local projection weights and projection
-        theta = self.layers(insample_y)
-        backcast, forecast = self.basis(theta)
-        return backcast, forecast
-
-class NHITS(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        assert len(config.n_pool_kernel_size) == len(config.n_freq_downsample) == len(config.n_blocks)
-
-        self.config = config
-        self.input_size = config.encoder_length
-        self.h = config.example_length - config.encoder_length
-
-        blocks = self.create_stack(config)
-        self.blocks = torch.nn.ModuleList(blocks)
-
-    def create_stack(self, config):                     
-
-        block_list = []
-        for n, k, f in zip(config.n_blocks, config.n_pool_kernel_size, config.n_freq_downsample):
-            for _ in range(n):
-                n_theta = (self.input_size + max(self.h//f, 1) )
-                basis = _IdentityBasis(backcast_size=self.input_size,
-                                       forecast_size=self.h,
-                                       interpolation_mode=config.interpolation_mode)                 
-                nbeats_block = NHITSBlock(input_size=self.input_size,
-                                          n_theta=n_theta,
-                                          n_mlp_layers=config.n_mlp_layers,
-                                          hidden_size=config.hidden_size,
-                                          n_pool_kernel_size=k,
-                                          pooling_mode=config.pooling_mode,
-                                          basis=basis,
-                                          dropout_prob=config.dropout_prob_theta,
-                                          activation=config.activation)
-                block_list.append(nbeats_block)
-
-        return block_list
-
-    def forward(self, batch):
-        residuals = batch['target'][:, :self.input_size, 0]
-
-        forecast = residuals[:, -1:] # Level with Naive1
-        block_forecasts = [ forecast.repeat(1, self.h) ]
-
-        for i, block in enumerate(self.blocks):
-            backcast, block_forecast = block(insample_y=residuals)
-            residuals = residuals - backcast
-            forecast = forecast + block_forecast
-
-        return forecast.unsqueeze(2)
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py
deleted file mode 100755
index e10dbac9b..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/stat_models.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#           http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# SPDX-License-Identifier: Apache-2.0
-from abc import ABC
-import os
-import pmdarima as pm
-import numpy as np
-import pickle as pkl
-
-class StatModel(ABC):
-    def __init__(self, config):
-        self.horizon = config.example_length - config.encoder_length
-        self.config = config
-
-    def fit(self, label, data):
-        return
-
-    def predict(self, data, i):
-        return
-    
-    def save(self):
-        return
-
-    def load(self, path):
-        return
-
-class AutoARIMA(StatModel):
-    def __init__(self, config):
-        super().__init__(config)
-        self.models = {}
-
-    def fit(self, example):
-        id, label, data = example['id'], example['endog'], example['exog']
-        data = data if data.shape[-1] != 0 else None
-        model = pm.auto_arima(label, X=data, **self.config)
-        self.model = model
-
-    def predict(self, example):
-        model = self.model
-        if len(example['endog_update']) != 0:
-            model.update(example['endog_update'], X=data if (data := example['exog_update']).shape[-1] != 0 else None)
-        # Issue is related to https://github.com/alkaline-ml/pmdarima/issues/492
-        try:
-            preds = model.predict(self.horizon, X=data if (data := example['exog']).shape[-1] != 0 else None)
-        except ValueError as e:
-            if "Input contains NaN, infinity or a value too large for dtype('float64')." in str(e):
-                print(str(e))
-                preds = np.empty(self.horizon)
-                preds.fill(self.model.arima_res_.data.endog[-1])
-            else:
-                raise
-        return preds
-    
-    def save(self):
-        with open('arima.pkl', 'wb') as f:
-            pkl.dump(self.models, f)
-    
-    def load(self, path):
-        with open(os.path.join(path, 'arima.pkl'), 'rb') as f:
-            self.models = pkl.load(f)
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py
deleted file mode 100644
index 70af5a607..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tft.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from data.data_utils import InputTypes, DataTypes
-from matplotlib import pyplot as plt
-from models.interpretability import InterpretableModelBase
-from models.tft_pyt.modeling import TemporalFusionTransformer
-from mpl_toolkits.axes_grid1 import make_axes_locatable
-
-
-class InterpretableTFTBase(InterpretableModelBase):
-    def __init__(self, *args, **kwargs):
-        super(InterpretableTFTBase, self).__init__(*args, **kwargs)
-
-    @classmethod
-    def _get_future_features(cls, features):
-        future_features = [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN
-                           and feature.feature_embed_type == DataTypes.CATEGORICAL] \
-                        + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN
-                            and feature.feature_embed_type == DataTypes.CONTINUOUS]
-        return future_features
-
-    @classmethod
-    def _get_history_features(cls, features):
-        history_features = [feature.name for feature in features if feature.feature_type == InputTypes.OBSERVED
-                            and feature.feature_embed_type == DataTypes.CATEGORICAL] \
-                         + [feature.name for feature in features if feature.feature_type == InputTypes.OBSERVED
-                            and feature.feature_embed_type == DataTypes.CONTINUOUS] \
-                         + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN
-                            and feature.feature_embed_type == DataTypes.CATEGORICAL] \
-                         + [feature.name for feature in features if feature.feature_type == InputTypes.KNOWN
-                            and feature.feature_embed_type == DataTypes.CONTINUOUS] \
-                         + [feature.name for feature in features if feature.feature_type == InputTypes.TARGET]
-
-        return history_features
-
-    @classmethod
-    def _get_heatmap_fig(cls, tensor, features, max_size=16, min_size=4):
-        shape = tensor.shape
-        ratio = max(max(shape) // max_size, 1)
-        fig_size = max(shape[1] / ratio, min_size), max(shape[0] / ratio, min_size)
-        fig = plt.figure(figsize=fig_size)
-        ticks = list(range(shape[0]))
-        plt.yticks(ticks, features)
-        plt.xlabel('Time step')
-        plt.imshow(tensor, cmap='hot', interpolation='nearest')
-        plt.colorbar()
-        return fig
-
-    @classmethod
-    def _get_vsn_fig(cls, activations, sample_number, features):
-        _, sparse_matrix = activations
-        sample_sparse_matrix = sparse_matrix[sample_number]
-        final_tensor = sample_sparse_matrix.permute(1, 0)
-        fig = cls._get_heatmap_fig(final_tensor.detach().cpu(), features)
-        return fig
-
-    @classmethod
-    def _get_attention_heatmap_fig(cls, heads, max_size=16, min_size=4):
-        row_size = max(min_size, max_size / len(heads))
-        fig, axes = plt.subplots(1, len(heads), figsize=(max_size, row_size))
-        for i, (head, ax) in enumerate(zip(heads, axes), 1):
-            im = ax.imshow(head, cmap='hot', interpolation='nearest')
-            if i < len(heads):
-                ax.set_title(f'HEAD {i}')
-            else:
-                ax.set_title('MEAN')
-                divider = make_axes_locatable(ax)
-                cax = divider.append_axes('right', size='5%', pad=0.05)
-                fig.colorbar(im, cax=cax, orientation='vertical')
-        return fig
-
-    @classmethod
-    def _get_attn_heads(cls, activations, sample_number):
-        heads = []
-        _, attn_prob = activations
-        sample_attn_prob = attn_prob[sample_number]
-        n_heads = sample_attn_prob.shape[0]
-        for head_index in range(n_heads):
-            head = sample_attn_prob[head_index]
-            heads.append(head.detach().cpu())
-        mean_head = torch.mean(sample_attn_prob, dim=0)
-        heads.append(mean_head.detach().cpu())
-        fig = cls._get_attention_heatmap_fig(heads)
-        return fig
-
-    def _get_activation(self, name):
-        def hook(model, input, output):
-            self.activations[name] = output
-
-        return hook
-
-    def get_activations(self, sample_number, features):
-        assert self.activations, "There are no activations available"
-        return {
-            "history_vsn": self._get_vsn_fig(self.activations['history_vsn'], sample_number,
-                                             self._get_history_features(features)),
-            "future_vsn": self._get_vsn_fig(self.activations['future_vsn'], sample_number,
-                                            self._get_future_features(features)),
-            "attention": self._get_attn_heads(self.activations['attention'], sample_number)
-        }
-
-    def _register_interpretable_hooks(self):
-        self.TFTpart2.history_vsn.register_forward_hook(self._get_activation('history_vsn'))
-        self.TFTpart2.future_vsn.register_forward_hook(self._get_activation('future_vsn'))
-        self.TFTpart2.attention.register_forward_hook(self._get_activation('attention'))
-
-
-class InterpretableTFT(TemporalFusionTransformer, InterpretableTFTBase):
-    def __init__(self, *args, **kwargs):
-        TemporalFusionTransformer.__init__(self, *args, **kwargs)
-        InterpretableTFTBase.__init__(self, *args, **kwargs)
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py
deleted file mode 100755
index 902473dab..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/trivial_model.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#           http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# SPDX-License-Identifier: Apache-2.0
-import torch
-import torch.nn as nn
-
-
-class TrivialModel(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.bias = nn.Parameter(torch.zeros(1))
-        self.encoder_length = config.encoder_length
-        self.example_length = config.example_length
-        self.predict_steps = self.example_length - self.encoder_length
-        self.output_dim = len(config.get("quantiles", [""]))
-
-    
-    def forward(self, batch):
-        t = next(t for t in batch.values() if t is not None)
-        bs = t.shape[0]
-        return torch.ones([bs, self.example_length - self.encoder_length, self.output_dim]).to(device=t.device) + self.bias
-
-    def predict(self, batch):
-        targets = batch["target"].clone()
-        prev_predictions = targets.roll(1, 1)
-        return prev_predictions[:, -self.predict_steps :, :]
-
-    def test_with_last(self, batch):
-        bs = max([tensor.shape[0] if tensor is not None else 0 for tensor in batch.values()])
-        values = (
-            batch["target_masked"]
-            .clone()[:, -1, :]
-            .reshape((bs, 1, self.output_dim))
-        )
-
-        return torch.cat((self.example_length - self.encoder_length) * [values], dim=1)
-
-    def test_with_previous_window(self, batch):
-        targets = batch["target"].clone()
-        prev_predictions = targets.roll(self.predict_steps, 1)
-        return prev_predictions[:, -self.predict_steps :, :]
diff --git a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py b/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py
deleted file mode 100644
index 65f4885ea..000000000
--- a/Tools/PyTorch/TimeSeriesPredictionPlatform/models/tspp_xgboost.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#           http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cudf
-import pandas as pd
-
-import pynvml
-import numpy as np
-import xgboost as xgb
-import os
-import glob
-
-import dask_cudf
-from distributed_utils import create_client
-#Deal with the pateince and log_interval.  Also objective, cluster
-class TSPPXGBoost():
-    def __init__(self, config):
-        self.config = config
-        self.models = []
-
-    def fit(self, train, label, valid, valid_label, **kwargs):
-        train = train.drop(['_id_', '_timestamp_'], axis=1, errors='ignore')
-        valid = valid.drop(['_id_', '_timestamp_'], axis=1, errors='ignore')
-        X = xgb.DeviceQuantileDMatrix(cudf.from_pandas(train), label=cudf.from_pandas(label))
-        V = xgb.DMatrix(cudf.from_pandas(valid), label=cudf.from_pandas(valid_label))
-        model = xgb.train(params=self.config,
-                          dtrain=X,
-                          num_boost_round=self.config.n_rounds,
-                          evals=[(X, 'train'), (V, 'valid')],
-                          early_stopping_rounds=kwargs.get('patience', 5),
-                          verbose_eval=kwargs.get("log_interval", 25),
-                          )
-        self.models.append(model)
-
-    def predict(self, test, i):
-        test = test.drop(['_id_', '_timestamp_'], axis=1, errors='ignore')
-        model = self.models[i]
-        X = xgb.DMatrix(cudf.from_pandas(test))
-        return model.predict(X)
-
-    def save(self, path):
-        os.makedirs(os.path.join(path, 'checkpoints'), exist_ok=True)
-        for i in range(len(self.models)):
-            model = self.models[i]
-            model.save_model(os.path.join(path, f'checkpoints/xgb_{i+1}.model'))
-
-    def load(self, path):
-        self.models = []
-        for i in range(self.config.example_length - self.config.encoder_length):
-            p = os.path.join(path, f'checkpoints/xgb_{i+1}.model')
-            model = xgb.Booster()
-            model.load_model(p)
-            self.models.append(model)
-
-class TSPPDaskXGBoost():
-    def __init__(self, config):
-        self.config = config
-        self.models = []
-        self.client = create_client(config)
-        self.npartitions = self.config.cluster.npartitions
-
-    def fit(self, train, label, valid, valid_label, **kwargs):
-        X = xgb.dask.DaskDeviceQuantileDMatrix(self.client,
-                                               dask_cudf.from_cudf(cudf.from_pandas(train), npartitions=self.npartitions),
-                                               label=dask_cudf.from_cudf(cudf.from_pandas(label), npartitions=self.npartitions))
-        V = xgb.dask.DaskDMatrix(self.client,
-                                 dask_cudf.from_cudf(cudf.from_pandas(valid), npartitions=self.npartitions),
-                                 label=dask_cudf.from_cudf(cudf.from_pandas(valid_label), npartitions=self.npartitions))
-        model = xgb.dask.train(client=self.client,
-                          params=self.config,
-                          dtrain=X,
-                          num_boost_round=self.config.n_rounds,
-                          evals=[(X, 'train'), (V, 'valid')],
-                          early_stopping_rounds=kwargs.get('patience', 5),
-                          verbose_eval=kwargs.get("log_interval", 25),
-                          )
-        self.models.append(model)
-        self.client.restart()
-
-    def predict(self, test, i):
-        test = test.reset_index(drop=True)
-        model = self.models[i]
-        test = dask_cudf.from_cudf(cudf.from_pandas(test), npartitions=self.npartitions)
-        test = xgb.dask.DaskDMatrix(self.client, test)
-        out =  xgb.dask.predict(self.client, model, test)
-        return out.compute()
-
-    def save(self, path):
-        os.makedirs(os.path.join(path, 'checkpoints'), exist_ok=True)
-        for i in range(len(self.models)):
-            model = self.models[i]
-            model['booster'].save_model(os.path.join(path, f'checkpoints/xgb_{i+1}.model'))
-
-    def load(self, path):
-        self.models = []
-        for i in range(self.config.example_length - self.config.encoder_length):
-            p = os.path.join(path, f'checkpoints/xgb_{i+1}.model')
-            model = {'booster': xgb.dask.Booster()}
-            model['booster'].load_model(p)
-            self.models.append(model)