Source code for libcity.data.dataset.trajectory_encoder.strnn_encoder

import os
import numpy as np
from libcity.data.dataset.trajectory_encoder.abstract_trajectory_encoder import AbstractTrajectoryEncoder
from libcity.utils import parse_time, cal_basetime, cal_timeoff

parameter_list = ['dataset', 'min_session_len', 'min_sessions', 'traj_encoder', 'cut_method',
                  'window_size', 'history_type']


[docs]class StrnnEncoder(AbstractTrajectoryEncoder): def __init__(self, config): super().__init__(config) self.uid = 0 self.location2id = {} # 因为原始数据集中的部分 loc id 不会被使用到因此这里需要重新编码一下 self.loc_id = 0 self.tim_max = 0 # 记录最大的时间编码 self.feature_dict = {'current_loc': 'int', 'current_tim': 'int', 'target': 'int', 'target_tim': 'int', 'uid': 'int', 'current_dis': 'float' } parameters_str = '' for key in parameter_list: if key in self.config: parameters_str += '_' + str(self.config[key]) self.cache_file_name = os.path.join( './libcity/cache/dataset_cache/', 'trajectory_{}.json'.format(parameters_str)) self.geo_coord = {} path = "./raw_data/{}/{}.geo".format(config['dataset'], config['dataset']) f_geo = open(path) lines = f_geo.readlines() for i, line in enumerate(lines): if i == 0: continue tokens = line.strip().replace("\"", "").replace("[", "").replace("]", "").split(',') loc_id, loc_longi, loc_lati = int(tokens[0]), eval(tokens[2]), eval(tokens[3]) self.geo_coord[loc_id] = [loc_lati, loc_longi] f_geo.close()
[docs] def encode(self, uid, trajectories, negative_sample=None): """standard encoder use the same method as DeepMove Recode poi id. Encode timestamp with its hour. Args: uid ([type]): same as AbstractTrajectoryEncoder trajectories ([type]): same as AbstractTrajectoryEncoder trajectory1 = [ (location ID, timestamp, timezone_offset_in_minutes), (location ID, timestamp, timezone_offset_in_minutes), ..... ] """ # 直接对 uid 进行重编码 uid = self.uid self.uid += 1 encoded_trajectories = [] for index, traj in enumerate(trajectories): current_loc = [] current_tim = [] current_longi = [] current_lati = [] current_points = [] start_time = parse_time(traj[0][2]) # 以当天凌晨的时间作为计算 time_off 的基准 base_time = cal_basetime(start_time, True) for point in traj: loc = point[4] now_time = parse_time(point[2]) if loc not in self.location2id: self.location2id[loc] = self.loc_id self.loc_id += 1 current_points.append(loc) current_loc.append(self.location2id[loc]) current_lati.append(self.geo_coord[loc][0]) current_longi.append(self.geo_coord[loc][1]) time_code = int(cal_timeoff(now_time, base_time)) if time_code > self.tim_max: self.tim_max = time_code current_tim.append(time_code) # 完成当前轨迹的编码,下面进行输入的形成 trace = [] target = current_loc[-1] target_tim = current_tim[-1] current_loc = current_loc[:-1] current_tim = current_tim[:-1] lati = self.geo_coord[current_points[-1]][0] lati = np.array([lati for i in range(len(current_loc))]) longi = self.geo_coord[current_points[-1]][1] longi = np.array([longi for i in range(len(current_loc))]) current_dis = euclidean_dist(lati - current_lati[:-1], longi - current_longi[:-1]) trace.append(current_loc) trace.append(current_tim) trace.append(target) trace.append(target_tim) trace.append(uid) trace.append(current_dis) encoded_trajectories.append(trace) return encoded_trajectories
[docs] def gen_data_feature(self): loc_pad = self.loc_id tim_pad = self.tim_max + 1 dis_pad = 0.0 self.pad_item = { 'current_loc': loc_pad, 'current_tim': tim_pad, 'current_dis': dis_pad } self.data_feature = { 'loc_size': self.loc_id + 1, 'tim_size': self.tim_max + 2, 'uid_size': self.uid, 'loc_pad': loc_pad, 'tim_pad': tim_pad, 'dis_pad': dis_pad }
[docs]def euclidean_dist(x, y): return np.sqrt(np.power(x, 2) + np.power(y, 2)).tolist()