Pythonによる時系列分析の基礎

In [1]:
# 基本のライブラリを読み込む
import numpy as np
import pandas as pd
from scipy import stats

# グラフ描画
from matplotlib import pylab as plt
import seaborn as sns
%matplotlib inline

# グラフを横長にする
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

# 統計モデル
import statsmodels.api as sm

時系列データの読み込み

In [2]:
# 普通にデータを読み込む
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/AirPassengers.html
dataNormal = pd.read_csv('AirPassengers.csv')
dataNormal.head()
Out[2]:
Month #Passengers
0 1949-01 112
1 1949-02 118
2 1949-03 132
3 1949-04 129
4 1949-05 121
In [3]:
# 日付形式で読み込む(dtype=floatで読み込まないと、あとでARIMAモデル推定時にエラーとなる)
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m')
data = pd.read_csv('AirPassengers.csv', index_col='Month', date_parser=dateparse, dtype='float')
data.head()
Out[3]:
#Passengers
Month
1949-01-01 112.0
1949-02-01 118.0
1949-03-01 132.0
1949-04-01 129.0
1949-05-01 121.0
In [4]:
# 日付形式にする
ts = data['#Passengers'] 
ts.head()
Out[4]:
Month
1949-01-01    112.0
1949-02-01    118.0
1949-03-01    132.0
1949-04-01    129.0
1949-05-01    121.0
Name: #Passengers, dtype: float64

時系列データの取り扱い

In [5]:
# プロット
plt.plot(ts)
Out[5]:
[<matplotlib.lines.Line2D at 0x17e8eeae978>]
In [6]:
# データの取得方法その1
ts['1949-01-01']
Out[6]:
112.0
In [7]:
# データの取得方法その2
from datetime import datetime
ts[datetime(1949,1, 1)]
Out[7]:
112.0
In [8]:
# 1949年のデータをすべて取ってくる
ts['1949']
Out[8]:
Month
1949-01-01    112.0
1949-02-01    118.0
1949-03-01    132.0
1949-04-01    129.0
1949-05-01    121.0
1949-06-01    135.0
1949-07-01    148.0
1949-08-01    148.0
1949-09-01    136.0
1949-10-01    119.0
1949-11-01    104.0
1949-12-01    118.0
Name: #Passengers, dtype: float64
In [9]:
# シフト
ts.shift().head()
Out[9]:
Month
1949-01-01      NaN
1949-02-01    112.0
1949-03-01    118.0
1949-04-01    132.0
1949-05-01    129.0
Name: #Passengers, dtype: float64
In [10]:
# 差分系列
diff = ts - ts.shift()
diff.head()
Out[10]:
Month
1949-01-01     NaN
1949-02-01     6.0
1949-03-01    14.0
1949-04-01    -3.0
1949-05-01    -8.0
Name: #Passengers, dtype: float64
In [11]:
# 対数差分系列
logDiff = np.log(ts) - np.log(ts.shift())

# NaNを取り除てから表示
logDiff.dropna().head()
Out[11]:
Month
1949-02-01    0.052186
1949-03-01    0.112117
1949-04-01   -0.022990
1949-05-01   -0.064022
1949-06-01    0.109484
Name: #Passengers, dtype: float64

自己相関係数の推定

In [12]:
# 自己相関を求める
ts_acf = sm.tsa.stattools.acf(ts, nlags=40)
ts_acf
Out[12]:
array([ 1.        ,  0.94804734,  0.87557484,  0.80668116,  0.75262542,
        0.71376997,  0.6817336 ,  0.66290439,  0.65561048,  0.67094833,
        0.70271992,  0.74324019,  0.76039504,  0.71266087,  0.64634228,
        0.58592342,  0.53795519,  0.49974753,  0.46873401,  0.44987066,
        0.4416288 ,  0.45722376,  0.48248203,  0.51712699,  0.53218983,
        0.49397569,  0.43772134,  0.3876029 ,  0.34802503,  0.31498388,
        0.28849682,  0.27080187,  0.26429011,  0.27679934,  0.2985215 ,
        0.32558712,  0.3370236 ,  0.30333486,  0.25397708,  0.21065534,
        0.17217092])
In [13]:
# 偏自己相関
ts_pacf = sm.tsa.stattools.pacf(ts, nlags=40, method='ols')
ts_pacf
Out[13]:
array([ 1.        ,  0.95893198, -0.32983096,  0.2018249 ,  0.14500798,
        0.25848232, -0.02690283,  0.20433019,  0.15607896,  0.56860841,
        0.29256358,  0.8402143 ,  0.61268285, -0.66597616, -0.38463943,
        0.0787466 , -0.02663483, -0.05805221, -0.04350748,  0.27732556,
       -0.04046447,  0.13739883,  0.3859958 ,  0.24203808, -0.04912986,
       -0.19599778, -0.15443575,  0.04484465,  0.18371541, -0.0906113 ,
       -0.06202938,  0.34827092,  0.09899499, -0.08396793,  0.36328898,
       -0.17956662,  0.15839435,  0.06376775, -0.27503705,  0.2707607 ,
        0.32002003])
In [14]:
#  自己相関のグラフ
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(ts, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(ts, lags=40, ax=ax2)

ARIMAモデルの推定

In [15]:
# たぶん和分過程なので、差分をとる
diff = ts - ts.shift()
diff = diff.dropna()
diff.head()
Out[15]:
Month
1949-02-01     6.0
1949-03-01    14.0
1949-04-01    -3.0
1949-05-01    -8.0
1949-06-01    14.0
Name: #Passengers, dtype: float64
In [16]:
# 差分系列のグラフ
plt.plot(diff)
Out[16]:
[<matplotlib.lines.Line2D at 0x17e8f2b25c0>]
In [17]:
# 差分系列への自動ARMA推定関数の実行
resDiff = sm.tsa.arma_order_select_ic(diff, ic='aic', trend='nc')
resDiff
Out[17]:
{'aic':              0            1            2
 0          NaN  1397.257791  1397.093436
 1  1401.852641  1412.615224  1385.496795
 2  1396.587654  1378.338024  1353.175689
 3  1395.021214  1379.614000  1351.138759
 4  1388.216680  1379.616584  1373.560615, 'aic_min_order': (3, 2)}
In [18]:
# P-3, q=2 が最善となったので、それをモデル化
from statsmodels.tsa.arima_model import ARIMA
ARIMA_3_1_2 = ARIMA(ts, order=(3, 1, 2)).fit(dist=False)
ARIMA_3_1_2.params
Out[18]:
const                  2.673500
ar.L1.D.#Passengers    0.261992
ar.L2.D.#Passengers    0.367828
ar.L3.D.#Passengers   -0.363473
ma.L1.D.#Passengers   -0.075050
ma.L2.D.#Passengers   -0.924847
dtype: float64
In [19]:
# 残差のチェック
# SARIMAじゃないので、周期性が残ってしまっている。。。
resid = ARIMA_3_1_2.resid
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)

SARIMAモデルの推定

In [20]:
# SARIMAモデルを「決め打ち」で推定する
import statsmodels.api as sm

SARIMA_3_1_2_111 = sm.tsa.SARIMAX(ts, order=(3,1,2), seasonal_order=(1,1,1,12)).fit()
print(SARIMA_3_1_2_111.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                        #Passengers   No. Observations:                  144
Model:             SARIMAX(3, 1, 2)x(1, 1, 1, 12)   Log Likelihood                -502.990
Date:                            Sun, 28 May 2017   AIC                           1021.980
Time:                                    08:51:11   BIC                           1045.738
Sample:                                01-01-1949   HQIC                          1031.634
                                     - 12-01-1960                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5383      1.665      0.323      0.747      -2.725       3.802
ar.L2          0.2830      0.969      0.292      0.770      -1.617       2.183
ar.L3         -0.0346      0.407     -0.085      0.932      -0.832       0.762
ma.L1         -0.9231      1.690     -0.546      0.585      -4.236       2.390
ma.L2         -0.0519      1.658     -0.031      0.975      -3.302       3.198
ar.S.L12      -0.8773      0.285     -3.077      0.002      -1.436      -0.318
ma.S.L12       0.7839      0.370      2.116      0.034       0.058       1.510
sigma2       124.6735     14.315      8.709      0.000      96.616     152.731
===================================================================================
Ljung-Box (Q):                       51.03   Jarque-Bera (JB):                13.51
Prob(Q):                              0.11   Prob(JB):                         0.00
Heteroskedasticity (H):               2.60   Skew:                             0.14
Prob(H) (two-sided):                  0.00   Kurtosis:                         4.55
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
In [21]:
# 残差のチェック
residSARIMA = SARIMA_3_1_2_111.resid
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(residSARIMA.values.squeeze(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(residSARIMA, lags=40, ax=ax2)
In [22]:
# 予測
pred = SARIMA_3_1_2_111.predict('1960-01-01', '1961-12-01')
print(pred)
1960-01-01    416.176480
1960-02-01    396.424264
1960-03-01    449.600236
1960-04-01    416.839716
1960-05-01    465.882299
1960-06-01    528.697636
1960-07-01    601.581939
1960-08-01    624.416081
1960-09-01    510.796085
1960-10-01    450.040032
1960-11-01    411.352843
1960-12-01    437.964418
1961-01-01    446.890143
1961-02-01    423.385117
1961-03-01    458.075858
1961-04-01    496.980199
1961-05-01    509.480862
1961-06-01    569.111454
1961-07-01    656.760904
1961-08-01    642.651272
1961-09-01    547.670535
1961-10-01    498.293133
1961-11-01    429.635692
1961-12-01    473.340270
Freq: MS, dtype: float64
In [23]:
# 実データと予測結果の図示
plt.plot(ts)
plt.plot(pred, "r")
Out[23]:
[<matplotlib.lines.Line2D at 0x17e8f8b62b0>]

総当たり法によるSARIMAモデル次数の決定

In [24]:
# 総当たりで、AICが最小となるSARIMAの次数を探す
max_p = 3
max_q = 3
max_d = 1
max_sp = 1
max_sq = 1
max_sd = 1

pattern = max_p*(max_q + 1)*(max_d + 1)*(max_sp + 1)*(max_sq + 1)*(max_sd + 1)

modelSelection = pd.DataFrame(index=range(pattern), columns=["model", "aic"])
pattern
Out[24]:
192
In [25]:
# 自動SARIMA選択
num = 0

for p in range(1, max_p + 1):
    for d in range(0, max_d + 1):
        for q in range(0, max_q + 1):
            for sp in range(0, max_sp + 1):
                for sd in range(0, max_sd + 1):
                    for sq in range(0, max_sq + 1):
                        sarima = sm.tsa.SARIMAX(
                            ts, order=(p,d,q), 
                            seasonal_order=(sp,sd,sq,12), 
                            enforce_stationarity = False, 
                            enforce_invertibility = False
                        ).fit()
                        modelSelection.ix[num]["model"] = "order=(" + str(p) + ","+ str(d) + ","+ str(q) + "), season=("+ str(sp) + ","+ str(sd) + "," + str(sq) + ")"
                        modelSelection.ix[num]["aic"] = sarima.aic
                        num = num + 1
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\base\model.py:496: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
In [26]:
modelSelection
Out[26]:
model aic
0 order=(1,0,0), season=(0,0,0) 1415.91
1 order=(1,0,0), season=(0,0,1) 1205.39
2 order=(1,0,0), season=(0,1,0) 1029.98
3 order=(1,0,0), season=(0,1,1) 944.385
4 order=(1,0,0), season=(1,0,0) 1017.32
5 order=(1,0,0), season=(1,0,1) 1007.03
6 order=(1,0,0), season=(1,1,0) 944.044
7 order=(1,0,0), season=(1,1,1) 945.44
8 order=(1,0,1), season=(0,0,0) 1390.45
9 order=(1,0,1), season=(0,0,1) 1192.29
10 order=(1,0,1), season=(0,1,0) 1014.25
11 order=(1,0,1), season=(0,1,1) 929.433
12 order=(1,0,1), season=(1,0,0) 1009.59
13 order=(1,0,1), season=(1,0,1) 989.176
14 order=(1,0,1), season=(1,1,0) 935.816
15 order=(1,0,1), season=(1,1,1) 935.915
16 order=(1,0,2), season=(0,0,0) 1381.52
17 order=(1,0,2), season=(0,0,1) 1275.48
18 order=(1,0,2), season=(0,1,0) 1009.29
19 order=(1,0,2), season=(0,1,1) 923.304
20 order=(1,0,2), season=(1,0,0) 1010.71
21 order=(1,0,2), season=(1,0,1) 984.278
22 order=(1,0,2), season=(1,1,0) 937.696
23 order=(1,0,2), season=(1,1,1) 929.569
24 order=(1,0,3), season=(0,0,0) 1355.62
25 order=(1,0,3), season=(0,0,1) 1319.41
26 order=(1,0,3), season=(0,1,0) 1000.8
27 order=(1,0,3), season=(0,1,1) 915.052
28 order=(1,0,3), season=(1,0,0) 1011.19
29 order=(1,0,3), season=(1,0,1) 979.4
... ... ...
162 order=(3,1,0), season=(0,1,0) 1003
163 order=(3,1,0), season=(0,1,1) 931.842
164 order=(3,1,0), season=(1,0,0) 997.193
165 order=(3,1,0), season=(1,0,1) 983.289
166 order=(3,1,0), season=(1,1,0) 916.573
167 order=(3,1,0), season=(1,1,1) 916.807
168 order=(3,1,1), season=(0,0,0) 1353.66
169 order=(3,1,1), season=(0,0,1) 1167.2
170 order=(3,1,1), season=(0,1,0) 997.603
171 order=(3,1,1), season=(0,1,1) 918.466
172 order=(3,1,1), season=(1,0,0) 993.436
173 order=(3,1,1), season=(1,0,1) 983.852
174 order=(3,1,1), season=(1,1,0) 911.376
175 order=(3,1,1), season=(1,1,1) 912.343
176 order=(3,1,2), season=(0,0,0) 1352
177 order=(3,1,2), season=(0,0,1) 1142.78
178 order=(3,1,2), season=(0,1,0) 999.602
179 order=(3,1,2), season=(0,1,1) 913.457
180 order=(3,1,2), season=(1,0,0) 997.152
181 order=(3,1,2), season=(1,0,1) 985.78
182 order=(3,1,2), season=(1,1,0) 913.265
183 order=(3,1,2), season=(1,1,1) 914.042
184 order=(3,1,3), season=(0,0,0) 1337.23
185 order=(3,1,3), season=(0,0,1) 1135.09
186 order=(3,1,3), season=(0,1,0) 988.935
187 order=(3,1,3), season=(0,1,1) 898.105
188 order=(3,1,3), season=(1,0,0) 992.115
189 order=(3,1,3), season=(1,0,1) 966.207
190 order=(3,1,3), season=(1,1,0) 910.008
191 order=(3,1,3), season=(1,1,1) 903.239

192 rows × 2 columns

In [27]:
# AIC最小モデル
modelSelection[modelSelection.aic == min(modelSelection.aic)]
Out[27]:
model aic
187 order=(3,1,3), season=(0,1,1) 898.105
In [28]:
# 参考:次数がすべて0だとエラーになる 
sarima = sm.tsa.SARIMAX(ts, order=(0,0,0), seasonal_order=(0,0,0,12), enforce_stationarity = False).fit()
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-66e886f31776> in <module>()
      1 # 参考:次数がすべて0だとエラーになる
----> 2 sarima = sm.tsa.SARIMAX(ts, order=(0,0,0), seasonal_order=(0,0,0,12), enforce_stationarity = False).fit()

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\sarimax.py in __init__(self, endog, exog, order, seasonal_order, trend, measurement_error, time_varying_regression, mle_regression, simple_differencing, enforce_stationarity, enforce_invertibility, hamilton_representation, **kwargs)
    508         # Initialize the statespace
    509         super(SARIMAX, self).__init__(
--> 510             endog, exog=exog, k_states=k_states, k_posdef=k_posdef, **kwargs
    511         )
    512 

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py in __init__(self, endog, k_states, exog, dates, freq, **kwargs)
     95 
     96         # Initialize the state-space representation
---> 97         self.initialize_statespace(**kwargs)
     98 
     99     def prepare_data(self):

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\mlemodel.py in initialize_statespace(self, **kwargs)
    128 
    129         # Instantiate the state space object
--> 130         self.ssm = KalmanSmoother(endog.shape[0], self.k_states, **kwargs)
    131         # Bind the data to the model
    132         self.ssm.bind(endog)

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\kalman_smoother.py in __init__(self, k_endog, k_states, k_posdef, results_class, **kwargs)
    341 
    342         super(KalmanSmoother, self).__init__(
--> 343             k_endog, k_states, k_posdef, results_class=results_class, **kwargs
    344         )
    345 

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\kalman_filter.py in __init__(self, k_endog, k_states, k_posdef, loglikelihood_burn, tolerance, results_class, **kwargs)
    211                  **kwargs):
    212         super(KalmanFilter, self).__init__(
--> 213             k_endog, k_states, k_posdef, **kwargs
    214         )
    215 

C:\Users\shinya\Anaconda3\lib\site-packages\statsmodels\tsa\statespace\representation.py in __init__(self, k_endog, k_states, k_posdef, initial_variance, nobs, dtype, design, obs_intercept, obs_cov, transition, state_intercept, selection, state_cov, **kwargs)
    276         # Get dimensions from transition equation
    277         if k_states < 1:
--> 278             raise ValueError('Number of states in statespace model must be a'
    279                              ' positive number.')
    280         self.k_states = k_states

ValueError: Number of states in statespace model must be a positive number.
In [29]:
bestSARIMA = sm.tsa.SARIMAX(ts, order=(3,1,3), seasonal_order=(0,1,1,12), enforce_stationarity = False, enforce_invertibility = False).fit()
In [30]:
print(bestSARIMA.summary())
                                 Statespace Model Results                                 
==========================================================================================
Dep. Variable:                        #Passengers   No. Observations:                  144
Model:             SARIMAX(3, 1, 3)x(0, 1, 1, 12)   Log Likelihood                -441.052
Date:                            Sun, 28 May 2017   AIC                            898.105
Time:                                    08:54:11   BIC                            921.863
Sample:                                01-01-1949   HQIC                           907.759
                                     - 12-01-1960                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.2231      0.097     -2.302      0.021      -0.413      -0.033
ar.L2         -0.1642      0.108     -1.515      0.130      -0.377       0.048
ar.L3          0.7244      0.094      7.704      0.000       0.540       0.909
ma.L1         -0.0837    114.702     -0.001      0.999    -224.895     224.728
ma.L2          0.1221    143.630      0.001      0.999    -281.388     281.632
ma.L3         -0.9797    250.827     -0.004      0.997    -492.592     490.633
ma.S.L12      -0.1583      0.118     -1.337      0.181      -0.390       0.074
sigma2       119.6719   3.06e+04      0.004      0.997   -5.99e+04    6.02e+04
===================================================================================
Ljung-Box (Q):                       36.68   Jarque-Bera (JB):                 4.39
Prob(Q):                              0.62   Prob(JB):                         0.11
Heteroskedasticity (H):               1.87   Skew:                             0.16
Prob(H) (two-sided):                  0.06   Kurtosis:                         3.90
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [31]:
# 残差のチェック
residSARIMA = bestSARIMA.resid
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(residSARIMA, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(residSARIMA, lags=40, ax=ax2)
In [32]:
# 予測
bestPred = bestSARIMA.predict('1960-01-01', '1961-12-01')
# 実データと予測結果の図示
plt.plot(ts)
plt.plot(bestPred, "r")
Out[32]:
[<matplotlib.lines.Line2D at 0x17e92ad23c8>]
In [ ]: