= ["SP500", "DTWEXAFEGS"] # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_r = ["DGS10", "BAMLH0A0HYM2"] factors_d
Decomposition
Underlying returns are structural bets that can be analyzed through dimension reduction techniques such as principal components analysis (PCA). Most empirical studies apply PCA to a covariance matrix (note: for multi-asset portfolios, use the correlation matrix because asset-class variances are on different scales) of equity returns (yield changes) and find that movements in the equity markets (yield curve) can be explained by a subset of principal components. For example, the yield curve can be decomposed in terms of shift, twist, and butterfly, respectively.
\[ \begin{aligned} \boldsymbol{\Sigma}&=\lambda_{1}\mathbf{v}_{1}\mathbf{v}_{1}^\mathrm{T}+\lambda_{2}\mathbf{v}_{2}\mathbf{v}_{2}^\mathrm{T}+\cdots+\lambda_{k}\mathbf{v}_{k}\mathbf{v}_{k}^\mathrm{T}\\ &=V\Lambda V^{\mathrm{T}} \end{aligned} \]
def eigen(x):
= np.linalg.eig(np.cov(x.T, ddof = 1))
L, V = L.argsort()[::-1]
idx = L[idx]
L = V[:, idx]
V
= {
result "values": L,
"vectors": V
}
return result
def eigen_decomp(x, comps):
= eigen(x)
LV = LV["values"][:comps]
L = LV["vectors"][:, :comps]
V
= np.dot(V, np.multiply(L, V.T))
result
return result
= 1 comps
* scale["periods"] * scale["overlap"] eigen_decomp(overlap_df, comps)
array([[ 2.99142972e-02, -3.64057676e-03, -1.43401778e-04,
2.56192976e-03],
[-3.64057676e-03, 4.43059017e-04, 1.74520289e-05,
-3.11787433e-04],
[-1.43401778e-04, 1.74520289e-05, 6.87432830e-07,
-1.22812607e-05],
[ 2.56192976e-03, -3.11787433e-04, -1.22812607e-05,
2.19409604e-04]])
# np.cov(overlap_df.T) * scale["periods"] * scale["overlap"]
Variance
We often look at the proportion of variance explained by the first \(i\) principal components as an indication of how many components are needed.
\[ \begin{aligned} \frac{\sum_{j=1}^{i}{\lambda_{j}}}{\sum_{j=1}^{k}{\lambda_{j}}} \end{aligned} \]
def variance_explained(x):
= eigen(x)
LV = LV["values"]
L
= L.cumsum() / L.sum()
result
return result
variance_explained(overlap_df)
array([0.87372155, 0.99215925, 0.99826262, 1. ])
Similarity
Also, a challenge of rolling PCA is to try to match the eigenvectors: may need to change the sign and order.
\[ \begin{aligned} \text{similarity}=\frac{\mathbf{v}_{t}\cdot\mathbf{v}_{t-1}}{\|\mathbf{v}_{t}\|\|\mathbf{v}_{t-1}\|} \end{aligned} \]
def similarity(V, V0):
= V.shape[1]
n_cols_v = V0.shape[1]
n_cols_v0 = np.zeros((n_cols_v, n_cols_v0))
result
for i in range(n_cols_v):
for j in range(n_cols_v0):
= np.dot(V[:, i], V0[:, j]) / \
result[i, j] * np.dot(V0[:, j], V0[:, j]))
np.sqrt(np.dot(V[:, i], V[:, i])
return result
def roll_eigen1(x, width, comp):
= len(x)
n_rows = []
result_ls
for i in range(width - 1, n_rows):
= range(max(i - width + 1, 0), i + 1)
idx
= eigen(x.iloc[idx])
LV = LV["vectors"]
V
- 1])
result_ls.append(V[:, comp
= pd.DataFrame(result_ls, index = x.index[(width - 1):],
result_df = x.columns)
columns
return result_df
= 1 comp
= roll_eigen1(overlap_df, width, comp) raw_df
def roll_eigen2(x, width, comp):
= len(x)
n_rows = []
V_ls = []
result_ls
for i in range(width - 1, n_rows):
= range(max(i - width + 1, 0), i + 1)
idx
= eigen(x.iloc[idx])
LV = LV["vectors"]
V
if i > width - 1:
# cosine = np.dot(V.T, V_ls[-1])
= similarity(V.T, V_ls[-1])
cosine = np.argmax(np.abs(cosine), axis = 1)
order = np.sign(np.diag(cosine[:, order])) * V[:, order]
V
V_ls.append(V)- 1])
result_ls.append(V[:, comp
= pd.DataFrame(result_ls, index = x.index[(width - 1):],
result_df = x.columns)
columns
return result_df
= roll_eigen2(overlap_df, width, comp) clean_df
Implied shocks
Product of the \(n\)th eigenvector and square root of the \(n\)th eigenvalue:
def roll_shocks(x, width, comp):
= len(x)
n_rows = []
V_ls = []
result_ls
for i in range(width - 1, n_rows):
= range(max(i - width + 1, 0), i + 1)
idx
= eigen(x.iloc[idx])
LV = LV["values"]
L = LV["vectors"]
V
if len(V_ls) > 1:
# cosine = np.dot(V.T, V_ls[-1])
= similarity(V.T, V_ls[-1])
cosine = np.argmax(np.abs(cosine), axis = 1)
order = L[order]
L = np.sign(np.diag(cosine[:, order])) * V[:, order]
V
= np.sqrt(L[comp - 1]) * V[:, comp - 1]
shocks
V_ls.append(V)
result_ls.append(shocks)
= pd.DataFrame(result_ls, index = x.index[(width - 1):],
result_df = x.columns)
columns
return result_df
= roll_shocks(overlap_df, width, comp) * np.sqrt(scale["periods"] * scale["overlap"]) shocks_df