Pythonによる状態空間モデル

In [1]:
# 基本のライブラリを読み込む
import numpy as np
import pandas as pd
from scipy import stats

# グラフ描画
from matplotlib import pylab as plt
import seaborn as sns
%matplotlib inline

# グラフを横長にする
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

# 統計モデル
import statsmodels.api as sm

データの読み込み

In [2]:
# 日付形式で読み込む
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m')
data = pd.read_csv('AirPassengers.csv', index_col='Month', date_parser=dateparse, dtype='float')
data.head()
Out[2]:
#Passengers
Month
1949-01-01 112.0
1949-02-01 118.0
1949-03-01 132.0
1949-04-01 129.0
1949-05-01 121.0
In [3]:
# 日付形式にする
ts = data['#Passengers'] 
ts.head()
Out[3]:
Month
1949-01-01    112.0
1949-02-01    118.0
1949-03-01    132.0
1949-04-01    129.0
1949-05-01    121.0
Name: #Passengers, dtype: float64
In [4]:
# プロット
plt.plot(ts)
Out[4]:
[<matplotlib.lines.Line2D at 0x2717d96d9e8>]

ローカルレベルモデルの推定

In [5]:
# ローカルレベルモデルの推定
mod_local_level = sm.tsa.UnobservedComponents(ts, 'local level')

# 最尤法によるパラメタの推定
res_local_level = mod_local_level.fit()

# 推定されたパラメタ一覧
print(res_local_level.summary())

# 推定された状態・トレンドの描画
rcParams['figure.figsize'] = 15, 15
fig = res_local_level.plot_components()
                        Unobserved Components Results                         
==============================================================================
Dep. Variable:            #Passengers   No. Observations:                  144
Model:                    local level   Log Likelihood                -705.955
Date:                Mon, 29 May 2017   AIC                           1415.909
Time:                        18:20:05   BIC                           1421.849
Sample:                    01-01-1949   HQIC                          1418.323
                         - 12-01-1960                                         
Covariance Type:                  opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular  1.312e-07     85.482   1.54e-09      1.000    -167.542     167.542
sigma2.level      1136.4043    176.695      6.431      0.000     790.088    1482.721
===================================================================================
Ljung-Box (Q):                      470.66   Jarque-Bera (JB):                 4.99
Prob(Q):                              0.00   Prob(JB):                         0.08
Heteroskedasticity (H):               9.22   Skew:                            -0.33
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.64
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

ローカル線形トレンドモデルの推定

In [6]:
# ローカル線形トレンドモデル

mod_trend = sm.tsa.UnobservedComponents(
    ts,
    'local linear trend'
)

# 最尤法によるパラメタの推定
# ワーニングが出たのでBFGS法で最適化する
res_trend = mod_trend.fit(method='bfgs')

# 推定されたパラメタ一覧
print(res_trend.summary())

# 推定された状態・トレンドの描画
rcParams['figure.figsize'] = 15, 20
fig = res_trend.plot_components()
Optimization terminated successfully.
         Current function value: 5.015259
         Iterations: 19
         Function evaluations: 30
         Gradient evaluations: 30
                        Unobserved Components Results                         
==============================================================================
Dep. Variable:            #Passengers   No. Observations:                  144
Model:             local linear trend   Log Likelihood                -722.197
Date:                Mon, 29 May 2017   AIC                           1450.394
Time:                        18:20:06   BIC                           1459.304
Sample:                    01-01-1949   HQIC                          1454.015
                         - 12-01-1960                                         
Covariance Type:                  opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular    86.5907    167.523      0.517      0.605    -241.749     414.930
sigma2.level         0.0004    652.086   6.22e-07      1.000   -1278.064    1278.065
sigma2.trend      1086.3825    392.310      2.769      0.006     317.468    1855.297
===================================================================================
Ljung-Box (Q):                      365.39   Jarque-Bera (JB):                 6.81
Prob(Q):                              0.00   Prob(JB):                         0.03
Heteroskedasticity (H):               7.89   Skew:                            -0.21
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.99
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

季節変動の取り込み

In [7]:
# 季節変動ありのローカルレベルモデル

mod_season_local_level = sm.tsa.UnobservedComponents(
    ts,
    'local level',
    seasonal=12)

# まずはNelder-Meadでパラメタを推定し、その結果を初期値としてまた最適化する。2回目はBFGSを使用。
res_season_local_level = mod_season_local_level.fit(
    method='bfgs', 
    maxiter=500, 
    start_params=mod_season_local_level.fit(method='nm', maxiter=500).params,
)

# 推定されたパラメタ一覧
print(res_season_local_level.summary())

# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_local_level.plot_components()
Optimization terminated successfully.
         Current function value: 3.969241
         Iterations: 148
         Function evaluations: 266
Optimization terminated successfully.
         Current function value: 3.969241
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                            Unobserved Components Results                            
=====================================================================================
Dep. Variable:                   #Passengers   No. Observations:                  144
Model:                           local level   Log Likelihood                -571.571
                   + stochastic seasonal(12)   AIC                           1149.142
Date:                       Mon, 29 May 2017   BIC                           1158.051
Time:                               18:20:09   HQIC                          1152.762
Sample:                           01-01-1949                                         
                                - 12-01-1960                                         
Covariance Type:                         opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular  1.088e-09     19.019   5.72e-11      1.000     -37.276      37.276
sigma2.level       172.0781     33.807      5.090      0.000     105.818     238.339
sigma2.seasonal     17.3675      6.233      2.787      0.005       5.152      29.583
===================================================================================
Ljung-Box (Q):                      507.24   Jarque-Bera (JB):                 3.83
Prob(Q):                              0.00   Prob(JB):                         0.15
Heteroskedasticity (H):               9.10   Skew:                             0.07
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.82
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [8]:
# 季節変動ありのローカル線形トレンドモデル

mod_season_trend = sm.tsa.UnobservedComponents(
    ts,
    'local linear trend',
    seasonal=12)

# まずはNelder-Meadでパラメタを推定し、その結果を初期値としてまた最適化する。2回目はBFGSを使用。
res_season_trend = mod_season_trend.fit(
    method='bfgs', 
    maxiter=500, 
    start_params=mod_season_trend.fit(method='nm', maxiter=500).params,
)

# 推定されたパラメタ一覧
print(res_season_trend.summary())

# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_trend.plot_components()
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 495
         Function evaluations: 827
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                            Unobserved Components Results                            
=====================================================================================
Dep. Variable:                   #Passengers   No. Observations:                  144
Model:                    local linear trend   Log Likelihood                -566.051
                   + stochastic seasonal(12)   AIC                           1140.103
Date:                       Mon, 29 May 2017   BIC                           1151.982
Time:                               18:20:11   HQIC                          1144.930
Sample:                           01-01-1949                                         
                                - 12-01-1960                                         
Covariance Type:                         opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular  5.508e-12     19.503   2.82e-13      1.000     -38.226      38.226
sigma2.level       161.5594     34.663      4.661      0.000      93.622     229.497
sigma2.trend      1.934e-11      0.095   2.03e-10      1.000      -0.187       0.187
sigma2.seasonal     18.8401      6.519      2.890      0.004       6.063      31.617
===================================================================================
Ljung-Box (Q):                      530.11   Jarque-Bera (JB):                 2.81
Prob(Q):                              0.00   Prob(JB):                         0.25
Heteroskedasticity (H):               9.84   Skew:                            -0.01
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.72
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

推定するパラメタの数を減らす

In [9]:
# 詳細は以下の資料を参照してください
# http://www.statsmodels.org/stable/generated/statsmodels.tsa.statespace.structural.UnobservedComponents.html#statsmodels.tsa.statespace.structural.UnobservedComponents

# 季節変動ありのローカル線形トレンドモデル
# ただし、トレンドの分散は無し

mod_season_trend_d = sm.tsa.UnobservedComponents(
    ts,
    'local linear deterministic trend',
    seasonal=12)

# まずはNelder-Meadでパラメタを推定し、その結果を初期値としてまた最適化する。2回目はBFGSを使用。
res_season_trend_d = mod_season_trend_d.fit(
    method='bfgs', 
    maxiter=500, 
    start_params=mod_season_trend_d.fit(method='nm', maxiter=500).params,
)

# 推定されたパラメタ一覧
print(res_season_trend_d.summary())

# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_trend_d.plot_components()
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 115
         Function evaluations: 209
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                               Unobserved Components Results                                
============================================================================================
Dep. Variable:                          #Passengers   No. Observations:                  144
Model:             local linear deterministic trend   Log Likelihood                -566.051
                          + stochastic seasonal(12)   AIC                           1138.103
Date:                              Mon, 29 May 2017   BIC                           1147.012
Time:                                      18:20:15   HQIC                          1141.723
Sample:                                  01-01-1949                                         
                                       - 12-01-1960                                         
Covariance Type:                                opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular  1.571e-09     19.239   8.16e-11      1.000     -37.707      37.707
sigma2.level       161.5592     33.317      4.849      0.000      96.259     226.859
sigma2.seasonal     18.8404      6.469      2.913      0.004       6.162      31.519
===================================================================================
Ljung-Box (Q):                      530.11   Jarque-Bera (JB):                 2.81
Prob(Q):                              0.00   Prob(JB):                         0.25
Heteroskedasticity (H):               9.84   Skew:                            -0.01
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.72
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [10]:
# 詳細は以下の資料を参照してください
# http://www.statsmodels.org/stable/generated/statsmodels.tsa.statespace.structural.UnobservedComponents.html#statsmodels.tsa.statespace.structural.UnobservedComponents

# 季節変動ありのローカル線形トレンドモデル
# ただし、トレンドの分散は無し

mod_season_rw = sm.tsa.UnobservedComponents(
    ts,
    'random walk with drift',
    seasonal=12)

# まずはNelder-Meadでパラメタを推定し、その結果を初期値としてまた最適化する。2回目はBFGSを使用。
res_season_rw = mod_season_rw.fit(
    method='bfgs', 
    maxiter=500, 
    start_params=mod_season_rw.fit(method='nm', maxiter=500).params,
)

# 推定されたパラメタ一覧
print(res_season_rw.summary())

# 推定された状態・トレンド・季節の影響の描画
rcParams['figure.figsize'] = 15, 20
fig = res_season_rw.plot_components()
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 66
         Function evaluations: 130
Optimization terminated successfully.
         Current function value: 3.930912
         Iterations: 0
         Function evaluations: 1
         Gradient evaluations: 1
                            Unobserved Components Results                            
=====================================================================================
Dep. Variable:                   #Passengers   No. Observations:                  144
Model:                random walk with drift   Log Likelihood                -566.051
                   + stochastic seasonal(12)   AIC                           1136.103
Date:                       Mon, 29 May 2017   BIC                           1142.042
Time:                               18:20:16   HQIC                          1138.516
Sample:                           01-01-1949                                         
                                - 12-01-1960                                         
Covariance Type:                         opg                                         
===================================================================================
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
sigma2.level      161.5581     31.748      5.089      0.000      99.333     223.783
sigma2.seasonal    18.8405      5.418      3.477      0.001       8.222      29.459
===================================================================================
Ljung-Box (Q):                      530.11   Jarque-Bera (JB):                 2.81
Prob(Q):                              0.00   Prob(JB):                         0.25
Heteroskedasticity (H):               9.84   Skew:                            -0.01
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.72
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

モデルの比較と将来予測

In [11]:
# 今まで計算してきたモデルのAICを格納する
aic_list = pd.DataFrame(index=range(6), columns=["model", "aic"])

aic_list.ix[0]["model"] = "res_local_level"
aic_list.ix[0]["aic"] = res_local_level.aic

aic_list.ix[1]["model"] = "res_trend"
aic_list.ix[1]["aic"] = res_trend.aic

aic_list.ix[2]["model"] = "res_season_local_level"
aic_list.ix[2]["aic"] = res_season_local_level.aic

aic_list.ix[3]["model"] = "res_season_trend"
aic_list.ix[3]["aic"] = res_season_trend.aic

aic_list.ix[4]["model"] = "res_season_trend_d"
aic_list.ix[4]["aic"] = res_season_trend_d.aic

aic_list.ix[5]["model"] = "res_season_rw"
aic_list.ix[5]["aic"] = res_season_rw.aic

# 結果の表示
aic_list
Out[11]:
model aic
0 res_local_level 1415.91
1 res_trend 1450.39
2 res_season_local_level 1149.14
3 res_season_trend 1140.1
4 res_season_trend_d 1138.1
5 res_season_rw 1136.1
In [12]:
# 予測
pred = res_season_rw.predict('1960-01-01', '1961-12-01')

# 実データと予測結果の図示
rcParams['figure.figsize'] = 15, 6
plt.plot(ts)
plt.plot(pred, "r")
Out[12]:
[<matplotlib.lines.Line2D at 0x2717e4d6c88>]
In [ ]: