factors_r <- c("SP500", "DTWEXAFEGS") # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_d <- c("DGS10", "BAMLH0A0HYM2")tickers <- "BAICX" # fund inception date is "2011-11-28" intercept <- TRUERegression analysis
Ordinary least squares
Coefficients
\[ \begin{aligned} \hat{\beta}=(X^\mathrm{T}WX)^{-1}X^\mathrm{T}Wy \end{aligned} \]
lm_coef <- function(x, y, weights, intercept) {
# cbind.xts() changes names, e.g., "(Intercept)" => "X.Intercept."
if (intercept) x <- cbind("(Intercept)" = 1, as.matrix(x))
result <- solve(crossprod(x, diag(weights)) %*% x) %*% crossprod(x, diag(weights) %*% y)
return(result)
}t(lm_coef(overlap_x_xts, overlap_y_xts, weights, intercept)) (Intercept) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
BAICX 0.0001536093 0.1779901 -0.0946832 3.050951 1.19191
if (intercept) {
form <- reformulate(termlabels = factors, response = tickers)
} else {
form <- reformulate(termlabels = factors, response = tickers, intercept = FALSE)
}
fit <- lm(form, data = overlap_xts, weights = weights)
coef(fit) (Intercept) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
0.0001536093 0.1779901032 -0.0946832009 3.0509506321 1.1919095431
R-squared
\[ \begin{aligned} R^{2}=\frac{\hat{\beta}^\mathrm{T}(X^\mathrm{T}WX)\hat{\beta}}{y^\mathrm{T}Wy} \end{aligned} \]
lm_rsq <- function(x, y, weights, intercept) {
coef <- lm_coef(x, y, weights, intercept)
if (intercept) {
x <- cbind(1, x)
x <- sweep(x, 2, apply(x, 2, weighted.mean, w = weights), "-")
y <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
}
result <- (t(coef) %*% (crossprod(x, diag(weights)) %*% x) %*% coef) / (crossprod(y, diag(weights)) %*% y)
colnames(result) <- "R-squared"
return(result)
}lm_rsq(overlap_x_xts, overlap_y_xts, weights, intercept) R-squared
BAICX 0.7256021
summary(fit)$r.squared[1] 0.7256021
Standard errors
\[ \begin{aligned} \sigma_{\hat{\beta}}^{2}&=\sigma_{\varepsilon}^{2}(X^\mathrm{T}WX)^{-1}\\ &=\frac{(1-R^{2})}{n-p}(X^\mathrm{T}WX)^{-1}\\ &=\frac{SSE}{df_{E}}(X^\mathrm{T}WX)^{-1}\\ \sigma_{\hat{\alpha}}^{2}&=\sigma_{\varepsilon}^{2}\left(\frac{1}{n}+\mu^\mathrm{T}(X^\mathrm{T}WX)^{-1}\mu\right) \end{aligned} \]
lm_se <- function(x, y, weights, intercept) {
n_rows <- nrow(x)
n_cols <- ncol(x)
rsq <- lm_rsq(x, y, weights, intercept)
if (intercept) {
# cbind.xts() changes names, e.g., "(Intercept)" => "X.Intercept."
x <- cbind("(Intercept)" = 1, as.matrix(x))
y <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
df_resid <- n_rows - n_cols - 1
} else {
df_resid <- n_rows - n_cols
}
var_y <- crossprod(y, diag(weights)) %*% y
var_resid <- as.numeric((1 - rsq) * var_y / df_resid)
result <- sqrt(var_resid * diag(solve(crossprod(x, diag(weights)) %*% x)))
return(result)
}lm_se(overlap_x_xts, overlap_y_xts, weights, intercept) (Intercept) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
6.384386e-05 1.973178e-02 3.373159e-02 2.656458e-01 2.285767e-01
coef(summary(fit))[ , "Std. Error"] (Intercept) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
6.384386e-05 1.973178e-02 3.373159e-02 2.656458e-01 2.285767e-01
Shapley values
\[ R^{2}_{i}=\sum_{S\subseteq N\setminus\{i\}}{\frac{|S|!\;(n-|S|-1)!}{n!}}(R^{2}(S\cup\{i\})-R^{2}(S)) \]
lm_shap <- function(x, y, weights, intercept) {
n_rows <- nrow(x)
n_cols <- ncol(x)
n_combn <- 2 ^ n_cols
n_vec <- array(0, n_combn)
ix_mat <- matrix(0, nrow = n_cols, ncol = n_combn)
rsq <- array(0, n_combn)
result <- array(0, n_cols)
# number of binary combinations
for (k in 1:n_combn) {
n <- 0
n_size <- k - 1
# find the binary combination
for (j in 1:n_cols) {
if (n_size %% 2 == 0) {
n <- n + 1
ix_mat[j, k] = j - 1 + 1
}
n_size <- n_size %/% 2
}
n_vec[k] <- n
if (n > 0) {
ix_subset<- which(ix_mat[ , k] != 0)
x_subset <- x[ , ix_subset]
rsq[k] <- lm_rsq(x_subset, y, weights, intercept)
}
}
# calculate the exact Shapley value for r-squared
for (j in 1:n_cols) {
ix_pos <- which(ix_mat[j, ] != 0)
ix_neg <- which(ix_mat[j, ] == 0)
ix_n <- n_vec[ix_neg]
rsq_diff <- rsq[ix_pos] - rsq[ix_neg]
for (k in 1:(n_combn / 2)) {
s <- ix_n[k]
weight <- factorial(s) * factorial(n_cols - s - 1) / factorial(n_cols)
result[j] <- result[j] + weight * rsq_diff[k]
}
}
return(result)
}lm_shap(overlap_x_xts, overlap_y_xts, weights, intercept)[1] 0.32792233 0.01160854 0.13432945 0.25174178
Principal component regression
library(pls) # "Error in mvrValstats(object = fit, estimate = 'train'): could not find function 'mvrValstats'"comps <- 1Coefficients
\[ \begin{aligned} W_{k}&=\mathbf{X}V_{k}=[\mathbf{X}\mathbf{v}_{1},\ldots,\mathbf{X}\mathbf{v}_{k}]\\ {\widehat{\gamma}}_{k}&=\left(W_{k}^\mathrm{T}W_{k}\right)^{-1}W_{k}^\mathrm{T}\mathbf{Y}\\ {\widehat{\boldsymbol{\beta}}}_{k}&=V_{k}{\widehat{\gamma}}_{k} \end{aligned} \]
pcr_coef <- function(x, y, comps) {
x <- sweep(x, 2, colMeans(x), "-")
LV <- eigen(cov(x))
V <- LV[["vectors"]]
W <- x %*% V
gamma <- solve(crossprod(W)) %*% (crossprod(W, y))
result <- V[ , 1:comps] %*% as.matrix(gamma[1:comps])
return(result)
}scale_x_xts <- scale(overlap_x_xts)t(pcr_coef(scale_x_xts, overlap_y_xts, comps)) [,1] [,2] [,3] [,4]
[1,] 0.000631489 3.208739e-05 -0.0001550032 0.0006375356
t(pcr_coef(overlap_x_xts, overlap_y_xts, comps)) [,1] [,2] [,3] [,4]
[1,] 0.2516995 0.001967459 -0.0007330177 0.01786966
fit <- pls::pcr(reformulate(termlabels = ".", response = tickers),
data = merge(scale_x_xts, overlap_y_xts), ncomp = comps)
coef(fit)[ , , 1] SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
6.314890e-04 3.208739e-05 -1.550032e-04 6.375356e-04
R-squared
pcr_rsq <- function(x, y, comps) {
coef <- pcr_coef(x, y, comps)
x <- sweep(x, 2, colMeans(x), "-")
y <- sweep(y, 2, colMeans(y), "-")
result <- (t(coef) %*% crossprod(x) %*% coef) / crossprod(y)
colnames(result) <- "R-squared"
return(result)
}pcr_rsq(scale_x_xts, overlap_y_xts, comps) R-squared
BAICX 0.4658215
pcr_rsq(overlap_x_xts, overlap_y_xts, comps) R-squared
BAICX 0.5472496
pls::R2(fit)$val[comps + 1][1] 0.4658215
Standard errors
\[ \begin{aligned} \text{Var}({\widehat{\boldsymbol{\beta}}}_{k})&=\sigma^{2}V_{k}(W_{k}^\mathrm{T}W_{k})^{-1}V_{k}^\mathrm{T}\\ &=\sigma^{2}V_{k}\text{diag}\left(\lambda_{1}^{-1},\ldots,\lambda_{k}^{-1}\right)V_{k}^\mathrm{T}\\ &=\sigma^{2}\sum_{j=1}^{k}{\frac{\mathbf{v}_{j}\mathbf{v}_{j}^\mathrm{T}}{\lambda_{j}}} \end{aligned} \]
# unable to verify the result
pcr_se <- function(x, y, comps) {
n_rows <- nrow(x)
n_cols <- ncol(x)
rsq <- pcr_rsq(x, y, comps)
y <- sweep(y, 2, colMeans(y), "-")
df_resid <- n_rows - n_cols - 1
var_y <- crossprod(y)
var_resid <- as.numeric((1 - rsq) * var_y / df_resid)
LV <- eigen(cov(x))
L <- LV$values[1:comps] * (n_rows - 1)
V <- LV$vectors[ , 1:comps]
result <- sqrt(var_resid * diag(V %*% sweep(t(V), 1, 1 / L, "*")))
return(result)
}pcr_se(scale_x_xts, overlap_y_xts, comps)[1] 4.302796e-05 2.186349e-06 1.056150e-05 4.343996e-05
pcr_se(overlap_x_xts, overlap_y_xts, comps)[1] 1.456701e-02 1.138659e-04 4.242313e-05 1.034200e-03
Partial least squares
Risk decomposition
Standalone risk
\[ \begin{aligned} \text{SAR}_{k}&=\sqrt{w_{k}^{2}\sigma_{k}^{2}}\\ \text{SAR}_{\varepsilon}&=\sqrt{(1-R^{2})\sigma_{y}^{2}} \end{aligned} \]
lm_sar <- function(x, y, weights, intercept) {
coef <- lm_coef(x, y, weights, intercept)
rsq <- lm_rsq(x, y, weights, intercept)
if (intercept) x <- cbind(1, x)
sigma <- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
sar <- coef ^ 2 * diag(sigma[-ncol(sigma), -ncol(sigma)])
sar_eps <- (1 - rsq) * sigma[ncol(sigma), ncol(sigma)]
result <- sqrt(c(sigma[ncol(sigma), ncol(sigma)],
sar,
sar_eps))
return(result)
}lm_sar(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])[1] 0.064279340 0.000000000 0.033455561 0.006041665 0.025004902 0.019473573
[7] 0.033671449
Risk contribution
\[ \begin{aligned} \text{MCR}&=w^\mathrm{T}\frac{\partial\sigma_{y}}{\partial w}\\ &=w^\mathrm{T}\frac{\Sigma w}{\sigma_{y}}\\ \text{MCR}_{\varepsilon}&=\sigma_{y}-\sum_{k=1}^{n}\text{MCR}_{k} \end{aligned} \]
lm_mcr <- function(x, y, weights, intercept) {
coef <- lm_coef(x, y, weights, intercept)
rsq <- lm_rsq(x, y, weights, intercept)
if (intercept) x <- cbind(1, x)
sigma <- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
mcr <- coef * sigma[-ncol(sigma), -ncol(sigma)] %*% coef / sqrt(sigma[ncol(sigma), ncol(sigma)])
mcr_eps <- sqrt(sigma[ncol(sigma), ncol(sigma)]) - sum(mcr)
result <- c(sqrt(sigma[ncol(sigma), ncol(sigma)]),
mcr,
mcr_eps)
return(result)
}lm_mcr(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])[1] 0.0642793401 0.0000000000 0.0247478666 0.0007043266 0.0080525048
[6] 0.0131365257 0.0176381163
Scenario analysis
Implied shocks
\[ \begin{aligned} \hat{\beta}&=(Z^\mathrm{T}WZ)^{-1}Z^\mathrm{T}WX \end{aligned} \]
implied_shocks <- function(shocks, x, z, weights) {
beta <- solve(crossprod(z, diag(weights) %*% z)) %*% crossprod(z, diag(weights) %*% x)
result <- shocks %*% beta
return(result)
}shocks <- c(-0.1, 0.1)
overlap_z_xts <- overlap_x_xts[ , 1:2]implied_shocks(shocks, overlap_x_xts, overlap_z_xts, weights) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
[1,] -0.1 0.1 -0.001318463 -0.006789592
Stress P&L
pnl_stress <- function(shocks, x, y, z, weights, intercept) {
coef <- lm_coef(x, y, weights, intercept)
# cbind.xts() changes names, e.g., "(Intercept)" => "X.Intercept."
if (intercept) x <- cbind("(Intercept)" = 1, as.matrix(x))
result <- t(coef) * implied_shocks(shocks, x, z, weights)
return(result)
}pnl_stress(shocks, overlap_x_xts, overlap_y_xts, overlap_z_xts, weights, intercept) (Intercept) SP500 DTWEXAFEGS DGS10 BAMLH0A0HYM2
BAICX -0.002182029 -0.01779901 -0.00946832 -0.004022565 -0.00809258