一、时间序列常用方法
- 对时间做一些移动/滞后、频率转换、采样等相关操作
# 导包
import numpy as np
import pandas as pd
index = pd.date_range("2024-2-8",periods=365,freq="D")
ts = pd.Series(np.random.randint(0,500,len(index)),index=index)
ts
# 执行结果
2024-02-08 381
2024-02-09 120
2024-02-10 209
2024-02-11 33
2024-02-12 367
...
2025-02-02 56
2025-02-03 395
2025-02-04 129
2025-02-05 129
2025-02-06 71
Freq: D, Length: 365, dtype: int32
- 移动
# 默认后移1位
ts.shift()
# 执行结果
2024-02-08 NaN
2024-02-09 381.0
2024-02-10 120.0
2024-02-11 209.0
2024-02-12 33.0
...
2025-02-02 12.0
2025-02-03 56.0
2025-02-04 395.0
2025-02-05 129.0
2025-02-06 129.0
Freq: D, Length: 365, dtype: float64
# 后移2位
ts.shift(periods=2)
# 执行结果
2024-02-08 NaN
2024-02-09 NaN
2024-02-10 381.0
2024-02-11 120.0
2024-02-12 209.0
...
2025-02-02 349.0
2025-02-03 12.0
2025-02-04 56.0
2025-02-05 395.0
2025-02-06 129.0
Freq: D, Length: 365, dtype: float64
ts.shift(periods=-2)
# 执行结果
2024-02-08 209.0
2024-02-09 33.0
2024-02-10 367.0
2024-02-11 154.0
2024-02-12 43.0
...
2025-02-02 129.0
2025-02-03 129.0
2025-02-04 71.0
2025-02-05 NaN
2025-02-06 NaN
Freq: D, Length: 365, dtype: float64
- 频率转换
# 天 -> 星期
ts.asfreq(pd.tseries.offsets.Week())
# 执行结果
2024-02-08 381
2024-02-15 68
2024-02-22 432
2024-02-29 191
2024-03-07 490
2024-03-14 297
2024-03-21 244
2024-03-28 405
2024-04-04 129
2024-04-11 492
2024-04-18 351
2024-04-25 226
2024-05-02 460
2024-05-09 375
2024-05-16 296
2024-05-23 428
2024-05-30 379
2024-06-06 431
2024-06-13 296
2024-06-20 274
2024-06-27 321
2024-07-04 242
2024-07-11 110
2024-07-18 298
2024-07-25 236
2024-08-01 281
2024-08-08 81
2024-08-15 415
2024-08-22 489
2024-08-29 14
2024-09-05 80
2024-09-12 197
2024-09-19 245
2024-09-26 261
2024-10-03 240
2024-10-10 324
2024-10-17 474
2024-10-24 283
2024-10-31 44
2024-11-07 98
2024-11-14 380
2024-11-21 13
2024-11-28 42
2024-12-05 292
2024-12-12 95
2024-12-19 147
2024-12-26 348
2025-01-02 379
2025-01-09 54
2025-01-16 235
2025-01-23 8
2025-01-30 479
2025-02-06 71
Freq: W, dtype: int32
# 天 -> 月
ts.asfreq(pd.tseries.offsets.MonthEnd())
# 执行结果
2024-02-29 191
2024-03-31 237
2024-04-30 344
2024-05-31 440
2024-06-30 425
2024-07-31 268
2024-08-31 325
2024-09-30 307
2024-10-31 44
2024-11-30 91
2024-12-31 289
2025-01-31 349
Freq: M, dtype: int32
# 天 -> 小时,数据由少变多:fill_value填充
ts.asfreq(pd.tseries.offsets.Hour(),fill_value=0)
# 执行结果
2024-02-08 00:00:00 381
2024-02-08 01:00:00 0
2024-02-08 02:00:00 0
2024-02-08 03:00:00 0
2024-02-08 04:00:00 0
...
2025-02-05 20:00:00 0
2025-02-05 21:00:00 0
2025-02-05 22:00:00 0
2025-02-05 23:00:00 0
2025-02-06 00:00:00 71
Freq: H, Length: 8737, dtype: int32
二、resample:根据日期维度进行数据聚合
- 按照分钟(T)、小时(H)、日(D)、周(W)、月(M)、年(Y)等来作为日期维度
# 导包
import numpy as np
import pandas as pd
index = pd.date_range("2024-2-8",periods=365,freq="D")
ts = pd.Series(np.random.randint(0,500,len(index)),index=index)
ts
# 执行结果
2024-02-08 484
2024-02-09 23
2024-02-10 135
2024-02-11 407
2024-02-12 169
...
2025-02-02 358
2025-02-03 52
2025-02-04 140
2025-02-05 305
2025-02-06 227
Freq: D, Length: 365, dtype: int32
# 重采样 resample
ts.resample("D").sum() # 以1天为单位进行汇总,求和
# 执行结果
2024-02-08 484
2024-02-09 23
2024-02-10 135
2024-02-11 407
2024-02-12 169
...
2025-02-02 358
2025-02-03 52
2025-02-04 140
2025-02-05 305
2025-02-06 227
Freq: D, Length: 365, dtype: int32
ts.resample("2D").sum() # 以2天为单位进行汇总,求和
# 执行结果
2024-02-08 507
2024-02-10 542
2024-02-12 397
2024-02-14 696
2024-02-16 670
...
2025-01-29 696
2025-01-31 911
2025-02-02 410
2025-02-04 445
2025-02-06 227
Freq: 2D, Length: 183, dtype: int32
ts.resample("2W").sum() # 以2周为单位进行汇总,求和
# 执行结果
2024-02-11 1049
2024-02-25 3436
2024-03-10 3220
2024-03-24 3876
2024-04-07 3139
2024-04-21 3295
2024-05-05 3379
2024-05-19 3297
2024-06-02 3369
2024-06-16 2960
2024-06-30 4095
2024-07-14 2866
2024-07-28 2571
2024-08-11 3603
2024-08-25 3739
2024-09-08 3119
2024-09-22 2961
2024-10-06 2966
2024-10-20 3809
2024-11-03 3701
2024-11-17 3496
2024-12-01 4087
2024-12-15 3275
2024-12-29 3301
2025-01-12 3018
2025-01-26 3032
2025-02-09 3260
Freq: 2W-SUN, dtype: int32
ts.resample("3M").sum() # 以3个月(季度)为单位进行汇总,求和
# 执行结果
2024-02-29 5418
2024-05-31 22268
2024-08-31 21430
2024-11-30 22536
2025-02-28 16267
Freq: 3M, dtype: int32
ts.resample("3M").sum().cumsum() # 以3个月(季度)为单位进行汇总,求和,累加
# 执行结果
2024-02-29 5418
2024-05-31 27686
2024-08-31 49116
2024-11-30 71652
2025-02-28 87919
Freq: 3M, dtype: int32
ts.resample("H").sum() # 以1小时为单位进行汇总,求和
# 执行结果
2024-02-08 00:00:00 484
2024-02-08 01:00:00 0
2024-02-08 02:00:00 0
2024-02-08 03:00:00 0
2024-02-08 04:00:00 0
...
2025-02-05 20:00:00 0
2025-02-05 21:00:00 0
2025-02-05 22:00:00 0
2025-02-05 23:00:00 0
2025-02-06 00:00:00 227
Freq: H, Length: 8737, dtype: int32
ts.resample("T").sum() # 以1分钟为单位进行汇总,求和
# 执行结果
2024-02-08 00:00:00 484
2024-02-08 00:01:00 0
2024-02-08 00:02:00 0
2024-02-08 00:03:00 0
2024-02-08 00:04:00 0
...
2025-02-05 23:56:00 0
2025-02-05 23:57:00 0
2025-02-05 23:58:00 0
2025-02-05 23:59:00 0
2025-02-06 00:00:00 227
Freq: T, Length: 524161, dtype: int32
ts.resample("S").sum() # 以1秒为单位进行汇总,求和
# 执行结果
2024-02-08 00:00:00 484
2024-02-08 00:00:01 0
2024-02-08 00:00:02 0
2024-02-08 00:00:03 0
2024-02-08 00:00:04 0
...
2025-02-05 23:59:56 0
2025-02-05 23:59:57 0
2025-02-05 23:59:58 0
2025-02-05 23:59:59 0
2025-02-06 00:00:00 227
Freq: S, Length: 31449601, dtype: int32
- DataFrame重采样
# DataFrame重采样
d = {
"price":[10,11,2,44,33,44,55,66],
"score":[40,30,20,50,60,70,80,10],
"week":pd.date_range("2024-2-8",periods=8,freq="W")
}
df = pd.DataFrame(d)
df
# 对week列进行按月汇总求和
df.resample("M",on="week").sum()
df.resample("M",on="week").apply(np.sum)
# 对week列进行按月汇总:price求平均值,score求和
df.resample("M",on="week").agg({"price":np.mean,"score":np.sum})
三、时区
# 导包
import numpy as np
import pandas as pd
index = pd.date_range("2024-2-8 00:00",periods=3,freq="D")
ts = pd.Series(np.random.randn(len(index)),index=index)
ts
# 执行结果
2024-02-08 1.866786
2024-02-09 -0.475396
2024-02-10 -0.723146
Freq: D, dtype: float64
# tz:timezone 时区
import pytz
# 常用的时区
pytz.common_timezones
# 时区表明
ts = ts.tz_localize(tz="UTC")
ts
# 执行结果
2024-02-08 00:00:00+00:00 1.866786
2024-02-09 00:00:00+00:00 -0.475396
2024-02-10 00:00:00+00:00 -0.723146
Freq: D, dtype: float64
# 时区转换
ts.tz_convert(tz="Asia/Shanghai")
# 执行结果
2024-02-08 08:00:00+08:00 1.866786
2024-02-09 08:00:00+08:00 -0.475396
2024-02-10 08:00:00+08:00 -0.723146
Freq: D, dtype: float64
© 版权声明
文章版权归作者所有,未经允许请勿转载。如内容涉嫌侵权,请在本页底部进入<联系我们>进行举报投诉!
THE END















暂无评论内容