Risk

analysis
finance
r
Author
Published

November 23, 2025

factors_r <- c("SP500", "DTWEXAFEGS") # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_d <- c("DGS10", "BAMLH0A0HYM2")
tickers <- "BAICX" # fund inception date is "2011-11-28" 
intercept <- TRUE

Regression analysis

Ordinary least squares

Coefficients

\[ \begin{aligned} \hat{\beta}=(X^\mathrm{T}WX)^{-1}X^\mathrm{T}Wy \end{aligned} \]

lm_coef <- function(x, y, weights, intercept) {
  
  if (intercept) x <- model.matrix(~ x)
  
  result <- solve(crossprod(x, diag(weights)) %*% x) %*% crossprod(x, diag(weights) %*% y)
  
  return(result)
  
}
t(lm_coef(overlap_x_xts, overlap_y_xts, weights, intercept))
          (Intercept)    xSP500 xDTWEXAFEGS   xDGS10 xBAMLH0A0HYM2
adjclose 0.0001148044 0.1835515 -0.09882677 3.127433      1.137756
if (intercept) {
  fit <- lm(overlap_y_xts ~ overlap_x_xts, weights = weights)
} else {
  fit <- lm(overlap_y_xts ~ overlap_x_xts - 1, weights = weights)
}
    
coef(fit)
              (Intercept)        overlap_x_xtsSP500   overlap_x_xtsDTWEXAFEGS 
             0.0001148044              0.1835515084             -0.0988267686 
       overlap_x_xtsDGS10 overlap_x_xtsBAMLH0A0HYM2 
             3.1274329176              1.1377557240 

R-squared

\[ \begin{aligned} R^{2}=\frac{\hat{\beta}^\mathrm{T}(X^\mathrm{T}WX)\hat{\beta}}{y^\mathrm{T}Wy} \end{aligned} \]

lm_rsq <- function(x, y, weights, intercept) {
        
  coef <- lm_coef(x, y, weights, intercept)
  
  if (intercept) {
      
    x <- model.matrix(~ x)
    x <- sweep(x, 2, apply(x, 2, weighted.mean, w = weights), "-")
    y <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
      
  }
  
  result <- (t(coef) %*% (crossprod(x, diag(weights)) %*% x) %*% coef) / (crossprod(y, diag(weights)) %*% y)
  
  return(result)
    
}
lm_rsq(overlap_x_xts, overlap_y_xts, weights, intercept)
          adjclose
adjclose 0.7323695
summary(fit)$r.squared
[1] 0.7323695

Standard errors

\[ \begin{aligned} \sigma_{\hat{\beta}}^{2}&=\sigma_{\varepsilon}^{2}(X^\mathrm{T}WX)^{-1}\\ &=\frac{(1-R^{2})}{n-p}(X^\mathrm{T}WX)^{-1}\\ &=\frac{SSE}{df_{E}}(X^\mathrm{T}WX)^{-1}\\ \sigma_{\hat{\alpha}}^{2}&=\sigma_{\varepsilon}^{2}\left(\frac{1}{n}+\mu^\mathrm{T}(X^\mathrm{T}WX)^{-1}\mu\right) \end{aligned} \]

lm_se <- function(x, y, weights, intercept) {
    
  n_rows <- nrow(x)
  n_cols <- ncol(x)
  
  rsq <- lm_rsq(x, y, weights, intercept)
  
  if (intercept) {
      
    x <- model.matrix(~ x)
    y <- sweep(y, 2, apply(y, 2, weighted.mean, w = weights), "-")
    
    df_resid <- n_rows - n_cols - 1
      
  } else {
    df_resid <- n_rows - n_cols
  }
  
  var_y <- crossprod(y, diag(weights)) %*% y
  var_resid <- as.numeric((1 - rsq) * var_y / df_resid)
  
  result <- sqrt(var_resid * diag(solve(crossprod(x, diag(weights)) %*% x)))
  
  return(result)
    
}
lm_se(overlap_x_xts, overlap_y_xts, weights, intercept)
  (Intercept)        xSP500   xDTWEXAFEGS        xDGS10 xBAMLH0A0HYM2 
 6.290672e-05  1.951342e-02  3.409770e-02  2.558955e-01  2.250266e-01 
coef(summary(fit))[ , "Std. Error"]
              (Intercept)        overlap_x_xtsSP500   overlap_x_xtsDTWEXAFEGS 
             6.290672e-05              1.951342e-02              3.409770e-02 
       overlap_x_xtsDGS10 overlap_x_xtsBAMLH0A0HYM2 
             2.558955e-01              2.250266e-01 

Shapley values

\[ R^{2}_{i}=\sum_{S\subseteq N\setminus\{i\}}{\frac{|S|!\;(n-|S|-1)!}{n!}}(R^{2}(S\cup\{i\})-R^{2}(S)) \]

lm_shap <- function(x, y, weights, intercept) {
  
  n_rows <- nrow(x)
  n_cols <- ncol(x)
  n_combn <- 2 ^ n_cols
  n_vec <- array(0, n_combn)
  ix_mat <- matrix(0, nrow = n_cols, ncol = n_combn)
  rsq <- array(0, n_combn)
  result <- array(0, n_cols)
  
  # number of binary combinations
  for (k in 1:n_combn) {
    
    n <- 0
    n_size <- k - 1
    
    # find the binary combination
    for (j in 1:n_cols) {
      
      if (n_size %% 2 == 0) {
        
        n <- n + 1
        
        ix_mat[j, k] = j - 1 + 1
        
      }
      
      n_size <- n_size %/% 2
      
    }
    
    n_vec[k] <- n
    
    if (n > 0) {
      
      ix_subset<- which(ix_mat[ , k] != 0)
      x_subset <- x[ , ix_subset]
      
      rsq[k] <- lm_rsq(x_subset, y, weights, intercept)

    }
    
  }

  # calculate the exact Shapley value for r-squared
  for (j in 1:n_cols) {

    ix_pos <- which(ix_mat[j, ] != 0)
    ix_neg <- which(ix_mat[j, ] == 0)
    ix_n <- n_vec[ix_neg]
    rsq_diff <- rsq[ix_pos] - rsq[ix_neg]

    for (k in 1:(n_combn / 2)) {

      s <- ix_n[k]
      weight <- factorial(s) * factorial(n_cols - s - 1) / factorial(n_cols)
      result[j] <- result[j] + weight * rsq_diff[k]

    }
    
  }

  return(result)
  
}
lm_shap(overlap_x_xts, overlap_y_xts, weights, intercept)
[1] 0.31943714 0.01661091 0.15779886 0.23852257

Principal component regression

library(pls) # "Error in mvrValstats(object = fit, estimate = 'train'): could not find function 'mvrValstats'"
comps <- 1

Coefficients

\[ \begin{aligned} W_{k}&=\mathbf{X}V_{k}=[\mathbf{X}\mathbf{v}_{1},\ldots,\mathbf{X}\mathbf{v}_{k}]\\ {\widehat{\gamma}}_{k}&=\left(W_{k}^\mathrm{T}W_{k}\right)^{-1}W_{k}^\mathrm{T}\mathbf{Y}\\ {\widehat{\boldsymbol{\beta}}}_{k}&=V_{k}{\widehat{\gamma}}_{k} \end{aligned} \]

pcr_coef <- function(x, y, comps) {
  
  x <- sweep(x, 2, colMeans(x), "-")
  LV <- eigen(cov(x))
  V <- LV[["vectors"]]
  
  W <- x %*% V
  gamma <- solve(crossprod(W)) %*% (crossprod(W, y))
  
  result <- V[ , 1:comps] %*% as.matrix(gamma[1:comps])
  
  return(result)
  
}
scale_x_xts <- scale(overlap_x_xts)
t(pcr_coef(scale_x_xts, overlap_y_xts, comps))
             [,1]         [,2]          [,3]         [,4]
[1,] 0.0006366355 5.409393e-05 -0.0001367081 0.0006402239
t(pcr_coef(overlap_x_xts, overlap_y_xts, comps))
          [,1]        [,2]          [,3]       [,4]
[1,] 0.2530657 0.003433602 -0.0006267485 0.01777103
fit <- pls::pcr(reformulate(termlabels = ".", response = tickers),
                data = merge(scale_x_xts, overlap_y_xts), ncomp = comps)
coef(fit)[ , , 1]
        SP500    DTWEXAFEGS         DGS10  BAMLH0A0HYM2 
 6.366355e-04  5.409393e-05 -1.367081e-04  6.402239e-04 

R-squared

pcr_rsq <- function(x, y, comps) {
  
  coef <- pcr_coef(x, y, comps)
  
  x <- sweep(x, 2, colMeans(x), "-")
  y <- sweep(y, 2, colMeans(y), "-")
  
  result <- (t(coef) %*% crossprod(x) %*% coef) / crossprod(y)
  
  return(result)
  
}
pcr_rsq(scale_x_xts, overlap_y_xts, comps)
          adjclose
adjclose 0.4458173
pcr_rsq(overlap_x_xts, overlap_y_xts, comps)
          adjclose
adjclose 0.5256481
pls::R2(fit)$val[comps + 1]
[1] 0.4458173

Standard errors

\[ \begin{aligned} \text{Var}({\widehat{\boldsymbol{\beta}}}_{k})&=\sigma^{2}V_{k}(W_{k}^\mathrm{T}W_{k})^{-1}V_{k}^\mathrm{T}\\ &=\sigma^{2}V_{k}\text{diag}\left(\lambda_{1}^{-1},\ldots,\lambda_{k}^{-1}\right)V_{k}^\mathrm{T}\\ &=\sigma^{2}\sum_{j=1}^{k}{\frac{\mathbf{v}_{j}\mathbf{v}_{j}^\mathrm{T}}{\lambda_{j}}} \end{aligned} \]

# unable to verify the result
pcr_se <- function(x, y, comps) {
  
  n_rows <- nrow(x)
  n_cols <- ncol(x)
  
  rsq <- pcr_rsq(x, y, comps)
  
  y <- sweep(y, 2, colMeans(y), "-")
  
  df_resid <- n_rows - n_cols - 1
  
  var_y <- crossprod(y)
  var_resid <- as.numeric((1 - rsq) * var_y / df_resid)
  
  LV <- eigen(cov(x))
  L <- LV$values[1:comps] * (n_rows - 1)
  V <- LV$vectors[ , 1:comps]
  
  result <- sqrt(var_resid * diag(V %*% sweep(t(V), 1, 1 / L, "*")))
  
  return(result)
  
}
pcr_se(scale_x_xts, overlap_y_xts, comps)
[1] 4.516380e-05 3.837497e-06 9.698262e-06 4.541836e-05
pcr_se(overlap_x_xts, overlap_y_xts, comps)
[1] 1.529634e-02 2.075412e-04 3.788329e-05 1.074155e-03

Partial least squares

Risk decomposition

Standalone risk

\[ \begin{aligned} \text{SAR}_{k}&=\sqrt{w_{k}^{2}\sigma_{k}^{2}}\\ \text{SAR}_{\varepsilon}&=\sqrt{(1-R^{2})\sigma_{y}^{2}} \end{aligned} \]

lm_sar <- function(x, y, weights, intercept) {
  
  coef <- lm_coef(x, y, weights, intercept)
  rsq <- lm_rsq(x, y, weights, intercept)
  
  if (intercept) x <- model.matrix(~ x)
  
  sigma <- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
  sar <- coef ^ 2 * diag(sigma[-ncol(sigma), -ncol(sigma)])
  sar_eps <- (1 - rsq) * sigma[ncol(sigma), ncol(sigma)]
  
  result <- sqrt(c(sigma[ncol(sigma), ncol(sigma)],
                   sar,
                   sar_eps))
  
  return(result)
  
}
lm_sar(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])
[1] 0.065659480 0.000000000 0.034352498 0.006374233 0.027040194 0.018548342
[7] 0.033967631

Risk contribution

\[ \begin{aligned} \text{MCR}&=w^\mathrm{T}\frac{\partial\sigma_{y}}{\partial w}\\ &=w^\mathrm{T}\frac{\Sigma w}{\sigma_{y}}\\ \text{MCR}_{\varepsilon}&=\sigma_{y}-\sum_{k=1}^{n}\text{MCR}_{k} \end{aligned} \]

lm_mcr <- function(x, y, weights, intercept) {
  
  coef <- lm_coef(x, y, weights, intercept)
  rsq <- lm_rsq(x, y, weights, intercept)
  
  if (intercept) x <- model.matrix(~ x)
  
  sigma <- cov.wt(cbind(x, y), wt = weights, center = intercept)$cov
  mcr <- coef * sigma[-ncol(sigma), -ncol(sigma)] %*% coef / sqrt(sigma[ncol(sigma), ncol(sigma)])
  mcr_eps <- sqrt(sigma[ncol(sigma), ncol(sigma)]) - sum(mcr)
  
  result <- c(sqrt(sigma[ncol(sigma), ncol(sigma)]),
              mcr,
              mcr_eps)
  
  return(result)
  
}
lm_mcr(overlap_x_xts, overlap_y_xts, weights, intercept) * sqrt(scale[["periods"]] * scale[["overlap"]])
[1] 0.0656594798 0.0000000000 0.0249203161 0.0009503171 0.0100058000
[6] 0.0122105660 0.0175724806

Scenario analysis

Implied shocks

\[ \begin{aligned} \hat{\beta}&=(Z^\mathrm{T}WZ)^{-1}Z^\mathrm{T}WX \end{aligned} \]

implied_shocks <- function(shocks, x, z, weights) {
  
  beta <- solve(crossprod(z, diag(weights) %*% z)) %*% crossprod(z, diag(weights) %*% x)
  
  result <- shocks %*% beta
  
  return(result)
  
}
shocks <- c(-0.1, 0.1)
overlap_z_xts <- overlap_x_xts[ , 1:2]
implied_shocks(shocks, overlap_x_xts, overlap_z_xts, weights)
     SP500 DTWEXAFEGS        DGS10 BAMLH0A0HYM2
[1,]  -0.1        0.1 -0.002162996 -0.007236961

Stress P&L

pnl_stress <- function(shocks, x, y, z, weights, intercept) {
  
  coef <- lm_coef(x, y, weights, intercept)
  
  if (intercept) x <- model.matrix(~ x)
  
  result <- t(coef) * implied_shocks(shocks, x, z, weights)
  
  return(result)    
  
}
pnl_stress(shocks, overlap_x_xts, overlap_y_xts, overlap_z_xts, weights, intercept)
          (Intercept)      xSP500  xDTWEXAFEGS       xDGS10 xBAMLH0A0HYM2
adjclose -0.001187474 -0.01835515 -0.009882677 -0.006764626  -0.008233894