<- c("SP500", "DTWEXAFEGS") # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_r <- c("DGS10", "BAMLH0A0HYM2") factors_d
<- "BAICX" # fund inception date is "2011-11-28" tickers
<- TRUE intercept
Regression analysis
Ordinary least squares
Coefficients
\[ \begin{aligned} \hat{\beta}=(X^\mathrm{T}WX)^{-1}X^\mathrm{T}Wy \end{aligned} \]
<- function(x, y, weights, intercept) {
lm_coef
if (intercept) x <- model.matrix(~ x)
<- solve(crossprod(x, diag(weights)) %*% x) %*% crossprod(x, diag(weights) %*% y)
result
return(result)
}
t(lm_coef(overlap_x_xts, overlap_y_xts, weights, intercept))
(Intercept) xSP500 xDTWEXAFEGS xDGS10 xBAMLH0A0HYM2
BAICX -8.291107e-06 0.1721972 -0.06432012 3.369528 2.04056
if (intercept) {
<- lm(overlap_y_xts ~ overlap_x_xts, weights = weights)
fit else {
} <- lm(overlap_y_xts ~ overlap_x_xts - 1, weights = weights)
fit
}
coef(fit)
(Intercept) overlap_x_xtsSP500 overlap_x_xtsDTWEXAFEGS
-8.291107e-06 1.721972e-01 -6.432012e-02
overlap_x_xtsDGS10 overlap_x_xtsBAMLH0A0HYM2
3.369528e+00 2.040560e+00
R-squared
\[ \begin{aligned} R^{2}=\frac{\hat{\beta}^\mathrm{T}(X^\mathrm{T}WX)\hat{\beta}}{y^\mathrm{T}Wy} \end{aligned} \]
<- function(x, y, weights, intercept) {
lm_rsq
<- lm_coef(x, y, weights, intercept)
coef
if (intercept) {
<- model.matrix(~ x)
x <- sweep(x, 2, apply(x, 2, weighted.mean, w = weights), "-")
x <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
y
}
<- (t(coef) %*% (crossprod(x, diag(weights)) %*% x) %*% coef) / (crossprod(y, diag(weights)) %*% y)
result
return(result)
}
lm_rsq(overlap_x_xts, overlap_y_xts, weights, intercept)
BAICX
BAICX 0.8368529
summary(fit)$r.squared
[1] 0.8368529
Standard errors
\[ \begin{aligned} \sigma_{\hat{\beta}}^{2}&=\sigma_{\varepsilon}^{2}(X^\mathrm{T}WX)^{-1}\\ &=\frac{(1-R^{2})}{n-p}(X^\mathrm{T}WX)^{-1}\\ &=\frac{SSE}{df_{E}}(X^\mathrm{T}WX)^{-1}\\ \sigma_{\hat{\alpha}}^{2}&=\sigma_{\varepsilon}^{2}\left(\frac{1}{n}+\mu^\mathrm{T}(X^\mathrm{T}WX)^{-1}\mu\right) \end{aligned} \]
<- function(x, y, weights, intercept) {
lm_se
<- nrow(x)
n_rows <- ncol(x)
n_cols
<- lm_rsq(x, y, weights, intercept)
rsq
if (intercept) {
<- model.matrix(~ x)
x <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
y
<- n_rows - n_cols - 1
df_resid
else {
} <- n_rows - n_cols
df_resid
}
<- crossprod(y, diag(weights)) %*% y
var_y <- as.numeric((1 - rsq) * var_y / df_resid)
var_resid
<- sqrt(var_resid * diag(solve(crossprod(x, diag(weights)) %*% x)))
result
return(result)
}
lm_se(overlap_x_xts, overlap_y_xts, weights, intercept)
(Intercept) xSP500 xDTWEXAFEGS xDGS10 xBAMLH0A0HYM2
5.063519e-05 2.032211e-02 3.664813e-02 1.990520e-01 1.726781e-01
coef(summary(fit))[ , "Std. Error"]
(Intercept) overlap_x_xtsSP500 overlap_x_xtsDTWEXAFEGS
5.063519e-05 2.032211e-02 3.664813e-02
overlap_x_xtsDGS10 overlap_x_xtsBAMLH0A0HYM2
1.990520e-01 1.726781e-01
Shapley values
\[ R^{2}_{i}=\sum_{S\subseteq N\setminus\{i\}}{\frac{|S|!\;(n-|S|-1)!}{n!}}(R^{2}(S\cup\{i\})-R^{2}(S)) \]
<- function(x, y, weights, intercept) {
lm_shap
<- nrow(x)
n_rows <- ncol(x)
n_cols <- 2 ^ n_cols
n_combn <- array(0, n_combn)
n_vec <- matrix(0, nrow = n_cols, ncol = n_combn)
ix_mat <- array(0, n_combn)
rsq <- array(0, n_cols)
result
# number of binary combinations
for (k in 1:n_combn) {
<- 0
n <- k - 1
n_size
# find the binary combination
for (j in 1:n_cols) {
if (n_size %% 2 == 0) {
<- n + 1
n
= j - 1 + 1
ix_mat[j, k]
}
<- n_size %/% 2
n_size
}
<- n
n_vec[k]
if (n > 0) {
<- which(ix_mat[ , k] != 0)
ix_subset<- x[ , ix_subset]
x_subset
<- lm_rsq(x_subset, y, weights, intercept)
rsq[k]
}
}
# calculate the exact Shapley value for r-squared
for (j in 1:n_cols) {
<- which(ix_mat[j, ] != 0)
ix_pos <- which(ix_mat[j, ] == 0)
ix_neg <- n_vec[ix_neg]
ix_n <- rsq[ix_pos] - rsq[ix_neg]
rsq_diff
for (k in 1:(n_combn / 2)) {
<- ix_n[k]
s <- factorial(s) * factorial(n_cols - s - 1) / factorial(n_cols)
weight <- result[j] + weight * rsq_diff[k]
result[j]
}
}
return(result)
}
lm_shap(overlap_x_xts, overlap_y_xts, weights, intercept)
[1] 0.2881453 0.1012853 0.2406915 0.2067309
Principal component regression
library(pls)
<- 1 comps
Coefficients
\[ \begin{aligned} W_{k}&=\mathbf{X}V_{k}=[\mathbf{X}\mathbf{v}_{1},\ldots,\mathbf{X}\mathbf{v}_{k}]\\ {\widehat{\gamma}}_{k}&=\left(W_{k}^\mathrm{T}W_{k}\right)^{-1}W_{k}^\mathrm{T}\mathbf{Y}\\ {\widehat{\boldsymbol{\beta}}}_{k}&=V_{k}{\widehat{\gamma}}_{k} \end{aligned} \]
<- function(x, y, comps) {
pcr_coef
<- sweep(x, 2, colMeans(x), "-")
x <- eigen(cov(x))
LV <- LV[["vectors"]]
V
<- x %*% V
W <- solve(crossprod(W)) %*% (crossprod(W, y))
gamma
<- V[ , 1:comps] %*% as.matrix(gamma[1:comps])
result
return(result)
}
<- scale(overlap_x_xts) scale_x_xts
t(pcr_coef(scale_x_xts, overlap_y_xts, comps))
[,1] [,2] [,3] [,4]
[1,] 0.0007589402 -0.0006048524 0.0003589513 0.0005820554
t(pcr_coef(overlap_x_xts, overlap_y_xts, comps))
[,1] [,2] [,3] [,4]
[1,] 0.3894184 -0.07539636 0.00743874 0.03004974
<- pcr(reformulate(termlabels = ".", response = tickers),
fit data = merge(scale_x_xts, overlap_y_xts), ncomp = comps)
coef(fit)[ , , 1]
SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
0.0007589402 -0.0006048524 0.0003589513 0.0005820554
R-squared
<- function(x, y, comps) {
pcr_rsq
<- pcr_coef(x, y, comps)
coef
<- sweep(x, 2, colMeans(x), "-")
x <- sweep(y, 2, colMeans(y), "-")
y
<- (t(coef) %*% crossprod(x) %*% coef) / crossprod(y)
result
return(result)
}
pcr_rsq(scale_x_xts, overlap_y_xts, comps)
BAICX
BAICX 0.7445836
pcr_rsq(overlap_x_xts, overlap_y_xts, comps)
BAICX
BAICX 0.5939657
R2(fit)$val[comps + 1]
[1] 0.7445836
Standard errors
\[ \begin{aligned} \text{Var}({\widehat{\boldsymbol{\beta}}}_{k})&=\sigma^{2}V_{k}(W_{k}^\mathrm{T}W_{k})^{-1}V_{k}^\mathrm{T}\\ &=\sigma^{2}V_{k}\text{diag}\left(\lambda_{1}^{-1},\ldots,\lambda_{k}^{-1}\right)V_{k}^\mathrm{T}\\ &=\sigma^{2}\sum_{j=1}^{k}{\frac{\mathbf{v}_{j}\mathbf{v}_{j}^\mathrm{T}}{\lambda_{j}}} \end{aligned} \]
# unable to verify the result
<- function(x, y, comps) {
pcr_se
<- nrow(x)
n_rows <- ncol(x)
n_cols
<- pcr_rsq(x, y, comps)
rsq
<- sweep(y, 2, colMeans(y), "-")
y
<- n_rows - n_cols - 1
df_resid
<- crossprod(y)
var_y <- as.numeric((1 - rsq) * var_y / df_resid)
var_resid
<- eigen(cov(x))
LV <- LV$values[1:comps] * (n_rows - 1)
L <- LV$vectors[ , 1:comps]
V
<- sqrt(var_resid * diag(V %*% sweep(t(V), 1, 1 / L, "*")))
result
return(result)
}
pcr_se(scale_x_xts, overlap_y_xts, comps)
[1] 2.828308e-05 2.254076e-05 1.337688e-05 2.169119e-05
pcr_se(overlap_x_xts, overlap_y_xts, comps)
[1] 0.0204865387 0.0039664551 0.0003913375 0.0015808580
Partial least squares
Risk decomposition
Standalone risk
\[ \begin{aligned} \text{SAR}_{k}&=\sqrt{w_{k}^{2}\sigma_{k}^{2}}\\ \text{SAR}_{\varepsilon}&=\sqrt{(1-R^{2})\sigma_{y}^{2}} \end{aligned} \]
<- function(x, y, weights, intercept) {
lm_sar
<- lm_coef(x, y, weights, intercept)
coef <- lm_rsq(x, y, weights, intercept)
rsq
if (intercept) x <- model.matrix(~ x)
<- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
sigma <- coef ^ 2 * diag(sigma[-ncol(sigma), -ncol(sigma)])
sar <- (1 - rsq) * sigma[ncol(sigma), ncol(sigma)]
sar_eps
<- sqrt(c(sigma[ncol(sigma), ncol(sigma)],
result
sar,
sar_eps))
return(result)
}
lm_sar(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])
[1] 0.066977835 0.000000000 0.021934302 0.003672181 0.037345734 0.030509146
[7] 0.027053335
Risk contribution
\[ \begin{aligned} \text{MCR}&=w^\mathrm{T}\frac{\partial\sigma_{y}}{\partial w}\\ &=w^\mathrm{T}\frac{\Sigma w}{\sigma_{y}}\\ \text{MCR}_{\varepsilon}&=\sigma_{y}-\sum_{k=1}^{n}\text{MCR}_{k} \end{aligned} \]
<- function(x, y, weights, intercept) {
lm_mcr
<- lm_coef(x, y, weights, intercept)
coef <- lm_rsq(x, y, weights, intercept)
rsq
if (intercept) x <- model.matrix(~ x)
<- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
sigma <- coef * sigma[-ncol(sigma), -ncol(sigma)] %*% coef / sqrt(sigma[ncol(sigma), ncol(sigma)])
mcr <- sqrt(sigma[ncol(sigma), ncol(sigma)]) - sum(mcr)
mcr_eps
<- c(sqrt(sigma[ncol(sigma), ncol(sigma)]),
result
mcr,
mcr_eps)
return(result)
}
lm_mcr(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])
[1] 0.066977835 0.000000000 0.016484491 0.001881363 0.020521169 0.017163572
[7] 0.010927241
Scenario analysis
Implied shocks
\[ \begin{aligned} \hat{\beta}&=(Z^\mathrm{T}WZ)^{-1}Z^\mathrm{T}WX \end{aligned} \]
<- function(shocks, x, z, weights) {
implied_shocks
<- solve(crossprod(z, diag(weights) %*% z)) %*% crossprod(z, diag(weights) %*% x)
beta
<- shocks %*% beta
result
return(result)
}
<- c(-0.1, 0.1)
shocks <- overlap_x_xts[ , 1:2] overlap_z_xts
implied_shocks(shocks, overlap_x_xts, overlap_z_xts, weights)
SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
[1,] -0.1 0.1 -0.009510651 -0.005161763
Stress P&L
<- function(shocks, x, y, z, weights, intercept) {
pnl_stress
<- lm_coef(x, y, weights, intercept)
coef
if (intercept) x <- model.matrix(~ x)
<- t(coef) * implied_shocks(shocks, x, z, weights)
result
return(result)
}
pnl_stress(shocks, overlap_x_xts, overlap_y_xts, overlap_z_xts, weights, intercept)
(Intercept) xSP500 xDTWEXAFEGS xDGS10 xBAMLH0A0HYM2
BAICX 5.29387e-05 -0.01721972 -0.006432012 -0.03204641 -0.01053289