= ["SP500", "DTWEXAFEGS"] # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_r = ["DGS10", "BAMLH0A0HYM2"] factors_d
Price momentum
One month reversal and 2-12 month momentum are two ends of the spectrum. The general trend indicates that positive acceleration leads to reversals or negative acceleration leads to rebounds. An unsustainable acceleration leading to reversal can reconcile the one-month reversal and 2-12 month momentum. The key is that it implies that acceleration is not sustainable.
# "Momentum, Acceleration, and Reversal"
def pnl(x):
return np.nanprod(1 + x) - 1
= 20 order
= returns_df["returns"].shift(order).rolling(width - order, min_periods = 1).apply(pnl, raw = False).dropna() momentum_df
Time-series score
Suppose we are looking at \(n\) independent and identically distributed random variables, \(X_{1},X_{2},\ldots,X_{n}\). Since they are iid, each random variable \(X_{i}\) has to have the same mean, which we will call \(\mu\), and variance, which we will call \(\sigma^{2}\):
\[ \begin{aligned} \mathrm{E}\left(X_{i}\right)&=\mu\\ \mathrm{Var}\left(X_{i}\right)&=\sigma^{2} \end{aligned} \]
Let’s suppose we want to look at the average value of our \(n\) random variables:
\[ \begin{aligned} \bar{X}=\frac{X_{1}+X_{2}+\cdots+X_{n}}{n}=\left(\frac{1}{n}\right)\left(X_{1}+X_{2}+\cdots+X_{n}\right) \end{aligned} \]
We want to find the expected value and variance of the average, \(\mathrm{E}\left(\bar{X}\right)\) and \(\mathrm{Var}\left(\bar{X}\right)\).
Expected value
\[ \begin{aligned} \mathrm{E}\left(\bar{X}\right)&=\mathrm{E}\left[\left(\frac{1}{n}\right)\left(X_{1}+X_{2}+\cdots+X_{n}\right)\right]\\ &=\left(\frac{1}{n}\right)\mathrm{E}\left(X_{1}+X_{2}+\cdots+X_{n}\right)\\ &=\left(\frac{1}{n}\right)\left(n\mu\right)\\ &=\mu \end{aligned} \]
Variance
\[ \begin{aligned} \mathrm{Var}\left(\bar{X}\right)&=\mathrm{Var}\left[\left(\frac{1}{n}\right)\left(X_{1}+X_{2}+\cdots+X_{n}\right)\right]\\ &=\left(\frac{1}{n}\right)^{2}\mathrm{Var}\left(X_{1}+X_{2}+\cdots+X_{n}\right)\\ &=\left(\frac{1}{n}\right)^{2}\left(n\sigma^{2}\right)\\ &=\frac{\sigma^{2}}{n} \end{aligned} \]
def sd(x):
= sum(~np.isnan(x))
n_rows
if n_rows > 1:
= np.sqrt(np.nansum(x ** 2) / (n_rows - 1))
result else:
= np.nan
result
return result
# volatility scale only
= (momentum_df / momentum_df.rolling(width, min_periods = 1).apply(sd, raw = False)).dropna() score_df
# overall_df = score_df.mean(axis = 1)
# overall_df = overall_df / overall_df.rolling(width, min_periods = 1).apply(risk, raw = False)
# score_df.insert(loc = 0, column = "Overall", value = overall_df)
# score_df = score_df.dropna()
Outlier detection
import statsmodels.api as sm
Interquartile range
Outliers are defined as the regression residuals that fall below \(Q_{1}−1.5\times IQR\) or above \(Q_{3}+1.5\times IQR\):
- https://stats.stackexchange.com/a/1153
- https://stats.stackexchange.com/a/108951
- https://robjhyndman.com/hyndsight/tsoutliers/
def outliers(z):
= z.shape[1]
n_cols = []
result_ls
for j in range(n_cols):
= z.iloc[:, j]
y
if (n_cols == 0):
= sm.add_constant(range(len(y)))
x else:
= sm.add_constant(z.drop(z.columns[j], axis = 1))
x
= sm.WLS(y, x).fit().params
coef = coef.iloc[0] + np.dot(x.iloc[:, 1:], coef[1:])
predict = y - predict
resid
= resid.quantile(0.25)
lower = resid.quantile(0.75)
upper = upper - lower
iqr
= y[(resid < lower - 1.5 * iqr) | (resid > upper + 1.5 * iqr)]
total
= pd.DataFrame({"date": total.index, "symbol": total.name, "values": total})
total
result_ls.append(total)
= pd.concat(result_ls, ignore_index = True)
result = result.pivot_table(index = "date", columns = "symbol", values = "values")
result
return result
= outliers(score_df) outliers_df
Contour ellipsoid
Granger causality
from scipy.stats import chi2
\[ \begin{aligned} \left(R\hat{\beta}-r\right)^\mathrm{T}\left(R\hat{V}R^\mathrm{T}\right)^{-1}\left(R\hat{\beta}-r\right)\xrightarrow\quad\chi_{Q}^{2} \end{aligned} \]
- https://github.com/cran/lmtest/blob/master/R/waldtest.R
- https://en.wikipedia.org/wiki/Wald_test#Test(s)_on_multiple_parameters
- https://math.stackexchange.com/a/1591946
def granger_test(x, y, order):
# compute lagged observations
= x.shift(order)
lag_x = y.shift(order)
lag_y
# collect series
= pd.DataFrame({"x": x, "y": y, "lag_x": lag_x, "lag_y": lag_y})
df = sm.add_constant(df[["lag_y", "lag_x"]])
x
# fit full model
= sm.WLS(df["y"], x, missing = "drop").fit()
fit
= np.array([0, 0, 1])
R = fit.params
coef = 0 # technically a matrix (see Stack Exchange)
r
= np.dot(R, coef) - r
matmul = np.linalg.inv(np.atleast_2d(np.dot(R, np.dot(fit.cov_params(), R.T))))
matmul_mid = np.dot(matmul.T, np.dot(matmul_mid, matmul))
wald
= 1 - chi2.cdf(wald, 1)
result
return np.ravel(result)
def roll_lead_lag(x, y, width, order, p_value):
= len(x)
n_rows = x.name
x_name = y.name
y_name = []
x_y_ls = []
y_x_ls
for i in range(width - 1, n_rows):
= range(max(i - width + 1, 0), i + 1)
idx
= granger_test(x.iloc[idx], y.iloc[idx], order)
x_y = granger_test(y.iloc[idx], x.iloc[idx], order)
y_x
= (x_y < p_value) and (y_x > p_value)
x_y_status = (x_y > p_value) and (y_x < p_value)
y_x_status
x_y_ls.append(x_y_status)
y_x_ls.append(y_x_status)
= pd.DataFrame({x_name: x_y_ls, y_name: y_x_ls}, index = x.index[(width - 1):])
result
return result
= 0.05 p_value
= score_df.loc[:, "SP500"]
score_x_df = score_df.loc[:, "DGS10"] score_y_df
= roll_lead_lag(score_x_df, score_y_df, width, order, p_value) lead_lag_df