Source code for libcity.model.traffic_flow_prediction.DSAN

import numpy as np
import torch
import torch.nn as nn
from libcity.model.abstract_traffic_state_model import AbstractTrafficStateModel
from libcity.model.loss import masked_rmse_torch

[docs]def get_angles(pos, l, d): """ equ (5) Args: pos: row(r) / column(c) in equ (5) l: the l-th dimension, with shape (1, d) d: d dimension in total Returns: angles with shape (1, d) """ angle_rates = 1 / np.power(10000, (2 * (l // 2)) / np.float32(d)) return torch.tensor(pos * angle_rates)
[docs]def spatial_posenc(r, c, d, device): """ get SPE Args: r: row of the spatial position c: column of the spatial position d: d dimension in total Returns: """ angle_rads_r = get_angles(pos=r, l=np.arange(d)[np.newaxis, :], d=d) # l and ret with shape (1, d) angle_rads_c = get_angles(pos=c, l=np.arange(d)[np.newaxis, :], d=d) # l and ret with shape (1, d) pos_encoding = torch.zeros(size=angle_rads_r.shape, device=device) # shape (1, d) pos_encoding[:, 0::2] = torch.sin(angle_rads_r[:, 0::2]) # from 0 to d step 2 pos_encoding[:, 1::2] = torch.cos(angle_rads_c[:, 1::2]) # from 1 to d step 2 return pos_encoding[np.newaxis, ...] # (1, 1, d)
[docs]def cal_attention(Q, K, V, M, n_h): """ equ (3), calculate the attention mechanism performed by the i-th attention head Args: Q: query, shape (N, h, L_q, d) K: key, shape (N, h, L_k, d) V: value, shape (N, h, L_k, d) M: mask, shape (N, h, L_q, L_k) n_h: number of attention head Returns: Att: shape # (N, h, L_q, d) """ QK = torch.matmul(input=Q, other=K.transpose(-1, -2)) # (h, L_q, L_k) d = K.shape[-1] d_h = d / n_h # the split dimensionality in n_h attention heads QK_d_h = QK / np.sqrt(d_h) # (h, L_q, L_k) if M is not None: M = M.unsqueeze(2) M = M.repeat(QK_d_h.shape[0] // M.shape[0], 1, 1, 1, 1) QK_d_h += (M * -1e9) attention_weights = torch.softmax(input=QK_d_h, dim=-1) # (h, L_q, L_k) softmax along key axis output = torch.matmul(attention_weights, V) # (h, L_q, d) return output
[docs]def two_layer_ffn(d, num_hid, input_dim): """ implementation of two-layer feed-forward network Args: d: d-dimension representations num_hid: hidden layer size input_dim: input feature dimension Returns: """ return nn.Sequential( nn.Linear(input_dim, num_hid), nn.ReLU(), nn.Linear(num_hid, d) )
[docs]def ex_encoding(d, num_hid, input_dim): """ implementation of TPE Args: d: d-dimension representations num_hid: hidden layer size input_dim: input feature dimension Returns: """ return nn.Sequential( nn.Linear(in_features=input_dim, out_features=num_hid), nn.ReLU(), nn.Linear(in_features=num_hid, out_features=d), nn.Sigmoid() )
[docs]def create_look_ahead_mask(size): mask = 1 - torch.tril(torch.ones((size, size)), diagonal=-1) return mask.cuda()
[docs]def create_threshold_mask(inp): """ Args: inp: [batch_size, input_window, column, row, input_dim] Returns: """ oup = torch.sum(inp, dim=-1) shape = oup.shape oup = torch.reshape(oup, [shape[0], shape[1], -1]) mask = (oup == 0).float() return mask
[docs]def create_threshold_mask_tar(inp): oup = torch.sum(inp, dim=-1) mask = (oup == 0).float() return mask
[docs]def create_masks(inp_g, inp_l, tar): """ Args: inp_g: shape == [batch_size, input_window, column, row, input_dim] inp_l: shape == [batch_size, input_window, column, row, l_d, l_d, input_dim] torch.Size([64, 12, 192, 49, 2]) tar: shape == [batch_size, input_window, N, ext_dim] torch.Size([64, 12, 192, 8]) Returns: """ threshold_mask_g = create_threshold_mask(inp_g).unsqueeze(2) inp_l = inp_l.permute([0, 2, 3, 1, 4, 5, 6]) inp_l = torch.reshape(inp_l, [inp_l.shape[0] * inp_l.shape[1] * inp_l.shape[2], inp_l.shape[3], inp_l.shape[4], inp_l.shape[5], inp_l.shape[6]]) threshold_mask = create_threshold_mask(inp_l).unsqueeze(2) look_ahead_mask = create_look_ahead_mask(tar.shape[1]) tar = tar.permute(0, 2, 1, 3) tar = torch.reshape(tar, [tar.shape[0] * tar.shape[1], tar.shape[2], tar.shape[3]]) dec_target_threshold_mask = create_threshold_mask_tar( tar).unsqueeze(1).unsqueeze(2) combined_mask = torch.max(dec_target_threshold_mask, look_ahead_mask) return threshold_mask_g, threshold_mask, combined_mask
[docs]class Convs(nn.Module): """ Conv layers for input, to form a d-dimension representation """ def __init__(self, n_layer, n_filter, input_window, input_dim, r_d=0.1): """ Args: n_layer: num of conv layers n_filter: num of filters input_window: input window size input_dim: input dimension size r_d: dropout rate """ super(Convs, self).__init__() self.n_layer = n_layer self.input_window = input_window self.convs = nn.ModuleList( [nn.ModuleList([nn.Conv2d(in_channels=input_dim, out_channels=n_filter, kernel_size=(3, 3), padding=(1, 1)) for _ in range(input_window)])]) self.convs += nn.ModuleList( [nn.ModuleList([nn.Conv2d(in_channels=n_filter, out_channels=n_filter, kernel_size=(3, 3), padding=(1, 1)) for _ in range(input_window)]) for _ in range(n_layer - 1)]) self.dropouts = nn.ModuleList([nn.ModuleList( [nn.Dropout(r_d) for _ in range(input_window)]) for _ in range(n_layer)])
[docs] def forward(self, inps): """ Args: inps: with shape [batch_size, input_window, row, column, N_d, input_dim] or [batch_size, input_window, row, column, input_dim] Returns: """ outputs = list(torch.split(inps, 1, dim=1)) if len(inps.shape) == 6: for i in range(self.input_window): outputs[i] = outputs[i].permute([0, 1, 4, 5, 2, 3]) outputs[i] = torch.reshape(input=outputs[i], shape=[-1, outputs[i].shape[3], outputs[i].shape[4], outputs[i].shape[5]]) else: for i in range(self.input_window): outputs[i] = outputs[i].permute([0, 1, 4, 2, 3]) outputs[i] = torch.reshape(input=outputs[i], shape=[-1, outputs[i].shape[2], outputs[i].shape[3], outputs[i].shape[4]]) for i in range(self.n_layer): for j in range(self.input_window): outputs[j] = self.convs[i][j](outputs[j]) outputs[j] = torch.relu(outputs[j]) outputs[j] = self.dropouts[i][j](outputs[j]) output = torch.stack(outputs, dim=1) if len(inps.shape) == 6: output = torch.reshape(input=output, shape=[inps.shape[0], -1, output.shape[1], output.shape[2], output.shape[3], output.shape[4]]).permute([0, 2, 4, 5, 1, 3]) else: output = output.permute([0, 1, 3, 4, 2]) return output
[docs]class MSA(nn.Module): """ Multi-space attention """ def __init__(self, d, n_h, self_att=True): """ Args: d: d-dimension representations after B-layer CNN/FCN n_h: num of head self_att: whether use self attention """ super(MSA, self).__init__() self.d = d self.n_h = n_h self.d_h = d / n_h self.self_att = self_att assert d % n_h == 0 self.d_h = d // n_h if self_att: self.wx = nn.Linear(in_features=d, out_features=d * 3) else: self.wq = nn.Linear(in_features=d, out_features=d) self.wkv = nn.Linear(in_features=d, out_features=d * 2) self.wo = nn.Linear(in_features=d, out_features=d)
[docs] def split_heads(self, x): """ Args: x: shape == [batch_size, input_window, N, d] Returns: shape == [batch_size, input_window, n_h, N, d_h] """ shape = x.shape x = torch.reshape(x, [shape[0], shape[1], shape[2], self.n_h, self.d_h]) return x.permute([0, 1, 3, 2, 4])
[docs] def forward(self, V, K, Q, M): # linear if self.self_att: wx_o = self.wx(Q) Q, K, V = torch.split(tensor=wx_o, split_size_or_sections=wx_o.shape[-1] // 3, dim=-1) else: Q = self.wq(Q) wkv_o = self.wkv(K) K, V = torch.split(tensor=wkv_o, split_size_or_sections=wkv_o.shape[-1] // 2, dim=-1) # split head Q = self.split_heads(Q) K = self.split_heads(K) V = self.split_heads(V) scaled_attention = cal_attention(Q=Q, K=K, V=V, M=M, n_h=self.n_h) scaled_attention = scaled_attention.permute([0, 1, 3, 2, 4]) d_shape = scaled_attention.shape concat_attention = torch.reshape( scaled_attention, (d_shape[0], d_shape[1], d_shape[2], self.d)) output = self.wo(concat_attention) return output
[docs]class EncoderLayer(nn.Module): """ Enc-G implementation """ def __init__(self, d, n_h, num_hid, r_d=0.1): """ Args: d: d-dimension representations n_h: number of heads in Multi-space attention num_hid: hidden layer size r_d: drop out rate """ super(EncoderLayer, self).__init__() # msa self.msa = MSA(d=d, n_h=n_h) # ffn self.ffn = two_layer_ffn(d=d, num_hid=num_hid, input_dim=64) # normalization self.layernorm1 = nn.LayerNorm(normalized_shape=d, eps=1e-6) self.layernorm2 = nn.LayerNorm(normalized_shape=d, eps=1e-6) # dropout self.dropout1 = nn.Dropout(r_d) self.dropout2 = nn.Dropout(r_d)
[docs] def forward(self, x, mask): """ Args: x: shape == [batch_size, input_window, N, d] mask: Returns: """ # msa attn_output = self.msa(x, x, x, mask) attn_output = self.dropout1(attn_output) out1 = self.layernorm1(x + attn_output) # Residual # ffn ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output) out2 = self.layernorm2(out1 + ffn_output) # Residual return out2
[docs]class DecoderLayer(nn.Module): """ Enc-D / Dec-S / Dec-T implementation """ def __init__(self, d, n_h, num_hid, r_d=0.1, revert_q=False): """ Args: d: d-dimension representations n_h: number of heads in Multi-space attention num_hid: hidden layer size r_d: drop out rate revert_q: """ super(DecoderLayer, self).__init__() self.revert_q = revert_q self.msa1 = MSA(d=d, n_h=n_h) self.msa2 = MSA(d=d, n_h=n_h, self_att=False) self.ffn = two_layer_ffn(d=d, num_hid=num_hid, input_dim=d) self.layernorm1 = nn.LayerNorm(normalized_shape=[d], eps=1e-6) self.layernorm2 = nn.LayerNorm(normalized_shape=[d], eps=1e-6) self.layernorm3 = nn.LayerNorm(normalized_shape=[d], eps=1e-6) self.dropout1 = nn.Dropout(r_d) self.dropout2 = nn.Dropout(r_d) self.dropout3 = nn.Dropout(r_d)
[docs] def forward(self, x, kv, look_ahead_mask, threshold_mask): # first msa attn1 = self.msa1(x, x, x, look_ahead_mask) attn1 = self.dropout1(attn1) out1 = self.layernorm1(attn1 + x) if self.revert_q: out1_r = out1.permute([0, 2, 1, 3]) attn2 = self.msa2(kv, kv, out1_r, threshold_mask) attn2 = attn2.permute([0, 2, 1, 3]) else: kv = kv.repeat(out1.shape[0] // kv.shape[0], 1, 1, 1) attn2 = self.msa2(kv, kv, out1, threshold_mask) attn2 = self.dropout2(attn2) out2 = self.layernorm2(attn2 + out1) ffn_output = self.ffn(out2) ffn_output = self.dropout3(ffn_output) out3 = self.layernorm3(ffn_output + out2) return out3
[docs]class DAE(nn.Module): """ DAE Dynamic Attention Encoder """ def __init__(self, L, d, n_h, num_hid, conv_layer, input_window, input_dim, ext_dim, r_d=0.1): """ Dynamic Attention Encoder Args: L: num of Enc-G/Enc-D layers d: d-dimension representations n_h: number of heads in Multi-space attention num_hid: hidden layer size conv_layer: num of conv layers input_window: input window size input_dim: input dimension r_d: drop out rate """ super(DAE, self).__init__() self.d = d self.L = L # conv layers to get d-dimension representations self.convs_d = Convs(n_layer=conv_layer, n_filter=d, input_window=input_window, input_dim=input_dim, r_d=r_d) self.convs_g = Convs(n_layer=conv_layer, n_filter=d, input_window=input_window, input_dim=input_dim, r_d=r_d) # get TPE self.ex_encoder_d = ex_encoding(d=d, num_hid=num_hid, input_dim=ext_dim) self.ex_encoder_g = ex_encoding(d=d, num_hid=num_hid, input_dim=ext_dim) # dropout layer self.dropout_d = nn.Dropout(p=r_d) self.dropout_g = nn.Dropout(p=r_d) # self.Enc_G = nn.ModuleList( [EncoderLayer(d=d, n_h=n_h, num_hid=num_hid, r_d=r_d) for _ in range(L)]) self.Enc_D = nn.ModuleList( [DecoderLayer(d=d, n_h=n_h, num_hid=num_hid, r_d=r_d) for _ in range(L)])
[docs] def forward(self, x_d, x_g, ex, cors_d, cors_g, threshold_mask_d, threshold_mask_g): """ Args: x_d: a subset of 𝑿 that contains the closest neighbors that share strong correlations with v_i within a local block.(X_d in figure 4) x_g: all the training data (X in figure 4) ex: time-related features for Temporal Positional Encoding cors_d: Spatial Positional Encoding of x_d cors_g: Spatial Positional Encoding of x_g threshold_mask_d: threshold_mask_g: Returns: """ shape = x_d.shape TPE_d = self.ex_encoder_d(ex) TPE_g = self.ex_encoder_g(ex) SPE_d = cors_d SPE_g = cors_g x_d = self.convs_d(inps=x_d) x_g = self.convs_g(inps=x_g) x_d *= np.sqrt(self.d) x_g *= np.sqrt(self.d) x_d = x_d.reshape([shape[0], shape[1], -1, shape[4], self.d]) x_g = x_g.reshape([shape[0], shape[1], -1, self.d]) TPE_d = torch.reshape(input=TPE_d, shape=[TPE_d.shape[0], TPE_d.shape[1], TPE_d.shape[2] * TPE_d.shape[3], -1, TPE_d.shape[-1]]) TPE_g = torch.reshape(input=TPE_g, shape=[TPE_g.shape[0], TPE_g.shape[1], TPE_g.shape[2] * TPE_g.shape[3], TPE_g.shape[4]]) x_d = x_d + TPE_d + SPE_d x_g = x_g + TPE_g + SPE_g x_d = self.dropout_d(x_d) x_g = self.dropout_g(x_g) for i in range(self.L): x_g = self.Enc_G[i](x_g, threshold_mask_g) x_d_ = x_d.permute([0, 2, 1, 3, 4]) x_d_ = torch.reshape(x_d_, [x_d_.shape[0] * x_d_.shape[1], x_d_.shape[2], x_d_.shape[3], x_d_.shape[4]]) for i in range(self.L): x_d = self.Enc_D[i]( x_d_, x_g, threshold_mask_d, threshold_mask_g) return x_d
[docs]class SAD(nn.Module): def __init__(self, L, d, n_h, num_hid, conv_layer, ext_dim, input_window, output_window, device, r_d=0.1): """ Args: L: num of Enc-G/Enc-D layers d: d-dimension representations n_h: number of heads in Multi-space attention num_hid: hidden layer size conv_layer: num of conv layers ext_dim: external data dimension input_window: output_window: r_d: drop out rate """ super(SAD, self).__init__() self.d = d self.L = L self.pos_enc = spatial_posenc(0, 0, self.d, device) self.output_window = output_window self.ex_encoder = ex_encoding(d=d, num_hid=num_hid, input_dim=ext_dim) self.dropout = nn.Dropout(r_d) self.li_conv = nn.Sequential() self.li_conv.add_module("linear", nn.Linear(2, d)) self.li_conv.add_module("activation_relu", nn.ReLU()) for i in range(conv_layer - 1): self.li_conv.add_module( "linear{}".format(i), nn.Linear( d, d)) self.li_conv.add_module("activation_relu{}".format(i), nn.ReLU()) self.dec_s = nn.ModuleList( [DecoderLayer(d=d, n_h=n_h, num_hid=num_hid, r_d=r_d) for _ in range(L)]) self.linear = nn.Linear(in_features=input_window, out_features=output_window) self.dec_t = nn.ModuleList( [DecoderLayer(d=d, n_h=n_h, num_hid=num_hid, r_d=r_d, revert_q=True) for _ in range(L)])
[docs] def forward(self, x, ex, dae_output, look_ahead_mask): ex_enc = self.ex_encoder(ex) x = self.li_conv(x) x *= np.sqrt(self.d) ex_enc = torch.reshape(input=ex_enc, shape=[ex_enc.shape[0], ex_enc.shape[1], ex_enc.shape[2] * ex_enc.shape[3], ex_enc.shape[4]]) x = x + ex_enc + self.pos_enc x = self.dropout(x) x_s = x x_t = x x_s = x_s.unsqueeze(3).expand(-1, -1, -1, self.output_window, -1) x_s_ = x_s.permute([0, 2, 1, 3, 4]) x_s_ = torch.reshape(x_s_, [x_s_.shape[0] * x_s_.shape[1], x_s_.shape[2], x_s_.shape[3], x_s_.shape[4]]) # linear dae_output = dae_output.permute(0, 2, 3, 1) dae_output = self.linear(dae_output) dae_output = dae_output.permute(0, 3, 1, 2) for i in range(self.L): x_s = self.dec_s[i](x_s_, dae_output, look_ahead_mask, None) x_s_ = x_s.permute([0, 2, 1, 3]) x_t_ = x_t.permute([0, 2, 1, 3]) x_t_ = torch.reshape(x_t_, [x_t_.shape[0] * x_t_.shape[1], 1, x_t_.shape[2], x_t_.shape[3]]) for i in range(self.L): x_t = self.dec_t[i]( x_t_, x_s_, look_ahead_mask, None) output = x_t.squeeze(1) return output
[docs]class DsanUse(nn.Module): """ DSAN use """ def __init__(self, L, d, n_h, row, column, num_hid, conv_layer, input_window, output_window, input_dim, ext_dim, device, r_d=0.1): """ Args: L: num of layers in Enc-G/D / Dec-S/T d: d-dimension representations n_h: number of heads in Multi-space attention num_hid: conv_layer: num of conv layers input_window: input window size input_dim: input dimension r_d: dropout rate """ super(DsanUse, self).__init__() self.row = row self.column = column # DAE Dynamic Attention Encoder self.dae = DAE(L=L, d=d, n_h=n_h, num_hid=num_hid, conv_layer=conv_layer, input_window=input_window, input_dim=input_dim, ext_dim=ext_dim, r_d=r_d) # SAD Switch-Attention Decoder self.sad = SAD(L=L, d=d, n_h=n_h, num_hid=num_hid, conv_layer=conv_layer, ext_dim=ext_dim, input_window=input_window, output_window=output_window, device=device, r_d=r_d) # final layer self.final_layer = nn.Linear(d, input_dim)
[docs] def forward(self, dae_inp_g, dae_inp, dae_inp_ex, sad_inp, sad_inp_ex, cors, cors_g, threshold_mask, threshold_mask_g, look_ahead_mask): # DAE dae_output = self.dae( x_d=dae_inp, x_g=dae_inp_g, ex=dae_inp_ex, cors_d=cors, cors_g=cors_g, threshold_mask_d=threshold_mask, threshold_mask_g=threshold_mask_g ) # SAD sad_output = self.sad( x=sad_inp, ex=sad_inp_ex, dae_output=dae_output, look_ahead_mask=look_ahead_mask ) # final layer final_output = self.final_layer(sad_output) final_output = torch.tanh(final_output) final_output = torch.reshape(final_output, [-1, self.column, self.row, final_output.shape[-2], final_output.shape[-1]]) final_output = final_output.permute([0, 3, 2, 1, 4]) return final_output
[docs]class DSAN(AbstractTrafficStateModel): def __init__(self, config, data_feature): super().__init__(config, data_feature) # device self.device = config.get('device', torch.device('cpu')) # data_feature self._scaler = self.data_feature.get('scaler') # 用于数据归一化 # self.adj_mx = torch.tensor(self.data_feature.get('adj_mx'), device=self.device) self.len_row = self.data_feature.get('len_row', 16) # row self.len_column = self.data_feature.get('len_column', 12) # column self.num_nodes = self.data_feature.get('num_nodes', 1) # len_row * len_column self.feature_dim = self.data_feature.get('feature_dim', 1) # 输入维度 self.ext_dim = self.data_feature.get('ext_dim', 1) # 额外数据的维度 self.output_dim = self.data_feature.get('output_dim', 1) # b in paper # config self.input_window = config.get('input_window', 12) # l in paper self.output_window = config.get('output_window', 12) # F in paper self.L = config.get('L', 3) # num of layers in Enc-G/D / Dec-S/T self.d = config.get('d', 64) # d-dimension representations self.n_h = config.get('n_h', 8) # num of head in Multi-space Attention self.num_hid = 4 * self.d # hidden layer size self.B = config.get('B', 3) # num of layers in conv self.l_d = config.get('l_d', 3) self.r_d = config.get('r_d', 0.1) # dropout rate self.dsan = DsanUse(L=self.L, d=self.d, n_h=self.n_h, row=self.len_row, column=self.len_column, num_hid=self.num_hid, conv_layer=self.B, input_window=self.input_window, output_window=self.output_window, input_dim=self.output_dim, ext_dim=self.ext_dim, device=self.device, r_d=self.r_d)
[docs] def generate_x(self, batch): """ from batch['X'] to Args: batch: batch['X'].shape == [batch_size, input_window, row, column, feature_dim] batch['y'].shape == [batch_size, output_window, row, column, output_dim] Returns: dae_inp_g: X in figure(2) shape == [batch_size, input_window, row, column, output_dim] dae_inp: X_d in figure(2) shape == [batch_size, input_window, row, column, L_D, L_D output_dim] N_D = L_d * L_d ,L_d = 2 * l_d + 1 dae_inp_ex: external data for TPE shape == [batch_size, input_window, N, external_dim] sad_inp: x in figure(2) shape == [batch_size, output_window, N, output_dim] sad_inp_ex: external data for TPE shape == [batch_size, input_window, N, external_dim] cors: for SPE,shape == [1, 1, N_d, d] cors_g: for SPE, shape == [1, N, d] y: """ X = batch['X'][:, :, :, :, :self.output_dim] X_ext = batch['X'][:, :, :, :, self.output_dim:] X_shape = X.shape # [batch_size, input_window, row, column, feature_dim] l_d = self.l_d # dae_inp_g dae_inp_g = torch.reshape(input=X, shape=[X_shape[0], X_shape[1], X_shape[2], X_shape[3], X_shape[4]]) # dae_inp L_d = 2 * l_d + 1 # l_d: half length of the block (L_d = 2 * l_d + 1) dae_inp = torch.zeros(size=[X_shape[0], X_shape[1], X_shape[2], X_shape[3], L_d, L_d, X_shape[4]], device=self.device) for i in range(X_shape[2]): for j in range(X_shape[3]): dae_inp[:, :, i, j, max(0, l_d - i):min(L_d, X_shape[2] - i + l_d), max(0, l_d - j):min(L_d, X_shape[3] - j + l_d), :] = \ X[:, :, max(0, i - l_d):min(X_shape[2], i + l_d + 1), max(0, j - l_d):min(X_shape[3], j + l_d + 1), :] # dae_inp_ex dae_inp_ex = X_ext # sad_inp sad_inp = torch.reshape(input=X[:, -self.output_window:, :, :, :self.output_dim], shape=[X_shape[0], -1, X_shape[2] * X_shape[3], X_shape[4]]) # sad_inp_ex sad_inp_ex = X_ext[:, -self.output_window:, :, :, :] # cors cors = torch.zeros(size=[L_d, L_d, self.d], device=self.device) for i in range(L_d): for j in range(L_d): cors[i, j, :] = spatial_posenc(i - L_d // 2, j - L_d // 2, self.d, device=self.device) cors = torch.reshape(input=cors, shape=[1, 1, cors.shape[0] * cors.shape[1], cors.shape[2]]) # cors_g cors_g = torch.zeros(size=[self.len_row, self.len_column, self.d], device=self.device) for i in range(self.len_row): for j in range(self.len_column): cors_g[i, j, :] = spatial_posenc(i - self.len_row // 2, j - self.len_column // 2, self.d, device=self.device) cors_g = torch.reshape(input=cors_g, shape=[1, cors_g.shape[0] * cors_g.shape[1], cors_g.shape[2]]) # y y = batch['y'] return dae_inp_g, dae_inp, dae_inp_ex, sad_inp, sad_inp_ex, cors, cors_g, y
[docs] def predict(self, batch): # generate x dae_inp_g, dae_inp, dae_inp_ex, sad_inp, sad_inp_ex, cors, cors_g, y = \ self.generate_x(batch=batch) # generate mask threshold_mask_g, threshold_mask, combined_mask = create_masks( dae_inp_g[..., :self.output_dim], dae_inp[..., :self.output_dim], sad_inp) # reshape dae_inp = torch.reshape(input=dae_inp, shape=[dae_inp.shape[0], dae_inp.shape[1], dae_inp.shape[2], dae_inp.shape[3], dae_inp.shape[4] * dae_inp.shape[5], dae_inp.shape[6]]) res = self.dsan( dae_inp_g=dae_inp_g, dae_inp=dae_inp, dae_inp_ex=dae_inp_ex, sad_inp=sad_inp, sad_inp_ex=sad_inp_ex, cors=cors, cors_g=cors_g, threshold_mask=threshold_mask, threshold_mask_g=threshold_mask_g, look_ahead_mask=combined_mask ) return res
[docs] def calculate_loss(self, batch): y_true = batch['y'] y_pred = self.predict(batch) y_true = self._scaler.inverse_transform(y_true[..., :self.output_dim]) y_predicted = self._scaler.inverse_transform(y_pred[..., :self.output_dim]) return masked_rmse_torch(y_predicted, y_true, 0)