Transformer - 时间特征的处理
flyfish
ETTm1.csv有如下内容
假如有2016/7/1 0:45:00
有这样的时间字符串,如何变成时间特征列表
from typing import Listimport numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offsetclass TimeFeature:def __init__(self):passdef __call__(self, index: pd.DatetimeIndex) -> np.ndarray:passdef __repr__(self):return self.__class__.__name__ + "()"class SecondOfMinute(TimeFeature):"""Minute of hour encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return index.second / 59.0 - 0.5class MinuteOfHour(TimeFeature):"""Minute of hour encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return index.minute / 59.0 - 0.5class HourOfDay(TimeFeature):"""Hour of day encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return index.hour / 23.0 - 0.5class DayOfWeek(TimeFeature):"""Hour of day encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return index.dayofweek / 6.0 - 0.5class DayOfMonth(TimeFeature):"""Day of month encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return (index.day - 1) / 30.0 - 0.5class DayOfYear(TimeFeature):"""Day of year encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return (index.dayofyear - 1) / 365.0 - 0.5class MonthOfYear(TimeFeature):"""Month of year encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return (index.month - 1) / 11.0 - 0.5class WeekOfYear(TimeFeature):"""Week of year encoded as value between [-0.5, 0.5]"""def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:return (index.isocalendar().week - 1) / 52.0 - 0.5def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:"""Returns a list of time features that will be appropriate for the given frequency string.Parameters----------freq_strFrequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc."""features_by_offsets = {offsets.YearEnd: [],offsets.QuarterEnd: [MonthOfYear],offsets.MonthEnd: [MonthOfYear],offsets.Week: [DayOfMonth, WeekOfYear],offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],offsets.Minute: [MinuteOfHour,HourOfDay,DayOfWeek,DayOfMonth,DayOfYear,],offsets.Second: [SecondOfMinute,MinuteOfHour,HourOfDay,DayOfWeek,DayOfMonth,DayOfYear,],}offset = to_offset(freq_str)for offset_type, feature_classes in features_by_offsets.items():if isinstance(offset, offset_type):return [cls() for cls in feature_classes]supported_freq_msg = f"""Unsupported frequency {freq_str}The following frequencies are supported:Y - yearlyalias: AM - monthlyW - weeklyD - dailyB - business daysH - hourlyT - minutelyalias: minS - secondly"""raise RuntimeError(supported_freq_msg)
def printf_time_features():freq="h"dates=pd.to_datetime("2016/7/1 0:45:00")for feat in time_features_from_frequency_str(freq):print(feat,"\n")print(feat(dates))printf_time_features()
#返回适用于给定频率字符串的时间特征列表
# 频率字符串举例
# Y - yearly
# alias: A
# M - monthly
# W - weekly
# D - daily
# B - business days
# H - hourly
# T - minutely
# alias: min
# S - secondly
输出
输出4特征,时间字符串将编码为[-0.5,0.5]
之间的值
# HourOfDay()# -0.5
# DayOfWeek()# 0.16666666666666663
# DayOfMonth()# -0.5
# DayOfYear()# -0.0013698630136986245
batch_x_mark: tensor([[[-0.5000, 0.1667, -0.5000, -0.0014],[-0.5000, 0.1667, -0.5000, -0.0014],[-0.5000, 0.1667, -0.5000, -0.0014],[-0.5000, 0.1667, -0.5000, -0.0014],[-0.4565, 0.1667, -0.5000, -0.0014],[-0.4565, 0.1667, -0.5000, -0.0014],[-0.4565, 0.1667, -0.5000, -0.0014],[-0.4565, 0.1667, -0.5000, -0.0014],[-0.4130, 0.1667, -0.5000, -0.0014],[-0.4130, 0.1667, -0.5000, -0.0014],[-0.4130, 0.1667, -0.5000, -0.0014],[-0.4130, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014]]])
batch_y_mark: tensor([[[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3696, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.3261, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2826, 0.1667, -0.5000, -0.0014],[-0.2391, 0.1667, -0.5000, -0.0014],[-0.2391, 0.1667, -0.5000, -0.0014],[-0.2391, 0.1667, -0.5000, -0.0014],[-0.2391, 0.1667, -0.5000, -0.0014],[-0.1957, 0.1667, -0.5000, -0.0014],[-0.1957, 0.1667, -0.5000, -0.0014],[-0.1957, 0.1667, -0.5000, -0.0014],[-0.1957, 0.1667, -0.5000, -0.0014],[-0.1522, 0.1667, -0.5000, -0.0014],[-0.1522, 0.1667, -0.5000, -0.0014],[-0.1522, 0.1667, -0.5000, -0.0014],[-0.1522, 0.1667, -0.5000, -0.0014],[-0.1087, 0.1667, -0.5000, -0.0014],[-0.1087, 0.1667, -0.5000, -0.0014],[-0.1087, 0.1667, -0.5000, -0.0014],[-0.1087, 0.1667, -0.5000, -0.0014],[-0.0652, 0.1667, -0.5000, -0.0014],[-0.0652, 0.1667, -0.5000, -0.0014],[-0.0652, 0.1667, -0.5000, -0.0014],[-0.0652, 0.1667, -0.5000, -0.0014],[-0.0217, 0.1667, -0.5000, -0.0014],[-0.0217, 0.1667, -0.5000, -0.0014],[-0.0217, 0.1667, -0.5000, -0.0014],[-0.0217, 0.1667, -0.5000, -0.0014]]])
查看配置
打印属性值
print('\n'.join(['%s:%s' % item for item in self.__dict__.items()]) )
seq_len:24
label_len:12
pred_len:24
set_type:0
features:M
target:OT
scale:True
timeenc:1
freq:h
root_path:./dataset/ETT-small/
data_path:ETTm1.csv
scaler:StandardScaler()
batch_x, batch_y, batch_x_mark, batch_y_mark各自的形状
for i, (batch_x, , , ): torch.Size([1, 24, 7])for i, (, batch_y, , ): torch.Size([1, 36, 7])for i, (, , batch_x_mark, ): torch.Size([1, 24, 4])for i, (, , , batch_y_mark): torch.Size([1, 36, 4])