Risk

analysis
finance
python
Author
Published

November 23, 2025

factors_r = ["SP500", "DTWEXAFEGS"] # "SP500" does not contain dividends; note: "DTWEXM" discontinued as of Jan 2020
factors_d = ["DGS10", "BAMLH0A0HYM2"]
tickers = ["BAICX"] # fund inception date is "2011-11-28"
intercept = True

Regression analysis

import math
import statsmodels.api as sm

Ordinary least squares

Coefficients

\[ \begin{aligned} \hat{\beta}=(X^\mathrm{T}WX)^{-1}X^\mathrm{T}Wy \end{aligned} \]

def lm_coef(x, y, weights, intercept):
  
  if (intercept): x = sm.add_constant(x)
      
  result = np.dot(np.linalg.inv(np.dot(x.T, np.multiply(weights, x))),
                  np.dot(x.T, np.multiply(weights, y)))
  
  return np.ravel(result)
lm_coef(overlap_x_df, overlap_y_df, weights, intercept)
array([ 1.14803701e-04,  1.83552059e-01, -9.88266619e-02,  3.12743059e+00,
        1.13774830e+00])
if (intercept): overlap_x_df = sm.add_constant(overlap_x_df)
    
fit = sm.WLS(overlap_y_df, overlap_x_df, weights = weights).fit()

if (intercept): overlap_x_df = overlap_x_df.iloc[:, 1:]

np.array(fit.params)
array([ 1.14803701e-04,  1.83552059e-01, -9.88266619e-02,  3.12743059e+00,
        1.13774830e+00])

R-squared

\[ \begin{aligned} R^{2}=\frac{\hat{\beta}^\mathrm{T}(X^\mathrm{T}WX)\hat{\beta}}{y^\mathrm{T}Wy} \end{aligned} \]

def lm_rsq(x, y, weights, intercept):
          
  coef = np.matrix(lm_coef(x, y, weights, intercept))
  
  if (intercept):
    
    x = sm.add_constant(x)
    x = x - np.average(x, axis = 0, weights = weights.reshape(-1))
    y = y - np.average(y, axis = 0, weights = weights.reshape(-1))
      
  result = np.dot(coef, np.dot(np.dot(x.T, np.multiply(weights, x)), coef.T)) / \
    np.dot(y.T, np.multiply(weights, y))
  
  return result.item()
lm_rsq(overlap_x_df, overlap_y_df, weights, intercept)
0.7323693095420206
fit.rsquared
np.float64(0.7323693095420203)

Standard errors

\[ \begin{aligned} \sigma_{\hat{\beta}}^{2}&=\sigma_{\varepsilon}^{2}(X^\mathrm{T}WX)^{-1}\\ &=\frac{(1-R^{2})}{n-p}(X^\mathrm{T}WX)^{-1}\\ &=\frac{SSE}{df_{E}}(X^\mathrm{T}WX)^{-1}\\ \sigma_{\hat{\alpha}}^{2}&=\sigma_{\varepsilon}^{2}\left(\frac{1}{n}+\mu^\mathrm{T}(X^\mathrm{T}WX)^{-1}\mu\right) \end{aligned} \]

def lm_se(x, y, weights, intercept):
  
  n_rows = x.shape[0]
  n_cols = x.shape[1]
  
  rsq = lm_rsq(x, y, weights, intercept)
  
  if (intercept):
    
    x = sm.add_constant(x)
    y = y - np.average(y, axis = 0, weights = weights.reshape(-1))
    
    df_resid = n_rows - n_cols - 1 
    
  else:
    df_resid = n_rows - n_cols        
  
  var_y = np.dot(y.T, np.multiply(weights, y))
  var_resid = (1 - rsq) * var_y / df_resid
  
  result = np.sqrt(var_resid * np.linalg.inv(np.dot(x.T, np.multiply(weights, x))).diagonal())
  
  return np.ravel(result)
lm_se(overlap_x_df, overlap_y_df, weights, intercept)
array([6.29067301e-05, 1.95134213e-02, 3.40977069e-02, 2.55895496e-01,
       2.25026619e-01])
np.array(fit.bse)
array([6.29067301e-05, 1.95134213e-02, 3.40977069e-02, 2.55895496e-01,
       2.25026619e-01])

Shapley values

\[ R^{2}_{i}=\sum_{S\subseteq N\setminus\{i\}}{\frac{|S|!\;(n-|S|-1)!}{n!}}(R^{2}(S\cup\{i\})-R^{2}(S)) \]

def lm_shap(x, y, weights, intercept):

  n_rows = x.shape[0]
  n_cols = x.shape[1]
  n_combn = 2 ** n_cols
  n_vec = np.zeros(n_combn)
  ix_mat = np.zeros((n_cols, n_combn))
  rsq = np.zeros(n_combn)
  result = np.zeros(n_cols)
  
  # number of binary combinations
  for k in range(n_combn):
    
    n = 0
    n_size = k
    
    # find the binary combination
    for j in range(n_cols):
      
      if n_size % 2 == 0:
        
        n += 1
        
        ix_mat[j, k] = j + 1
          
      n_size //= 2
    
    n_vec[k] = n
    
    if n > 0:
      
      ix_subset = np.where(ix_mat[:, k] != 0)[0]
      x_subset = x.iloc[:, ix_subset]
      
      rsq[k] = lm_rsq(x_subset, y, weights, intercept)

  # calculate the exact Shapley value for r-squared
  for j in range(n_cols):
    
    ix_pos = np.where(ix_mat[j, :] != 0)[0]
    ix_neg = np.where(ix_mat[j, :] == 0)[0]
    ix_n = n_vec[ix_neg]
    rsq_diff = rsq[ix_pos] - rsq[ix_neg]

    for k in range(int(n_combn / 2)):
      
      s = int(ix_n[k])
      weight = math.factorial(s) * math.factorial(n_cols - s - 1) \
        / math.factorial(n_cols)
      result[j] += weight * rsq_diff[k]

  return result
lm_shap(overlap_x_df, overlap_y_df, weights, intercept)
array([0.31943761, 0.01661087, 0.15779872, 0.23852211])

Principal component regression

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
comps = 1

Coefficients

\[ \begin{aligned} W_{k}&=\mathbf{X}V_{k}=[\mathbf{X}\mathbf{v}_{1},\ldots,\mathbf{X}\mathbf{v}_{k}]\\ {\widehat{\gamma}}_{k}&=\left(W_{k}^\mathrm{T}W_{k}\right)^{-1}W_{k}^\mathrm{T}\mathbf{Y}\\ {\widehat{\boldsymbol{\beta}}}_{k}&=V_{k}{\widehat{\gamma}}_{k} \end{aligned} \]

def pcr_coef(x, y, comps):
  
  x = x - np.average(x, axis = 0)
  L, V = np.linalg.eig(np.cov(x.T, ddof = 1))
  idx = L.argsort()[::-1]
  V = V[:, idx]
  
  W = np.dot(x, V)
  gamma = np.dot(np.dot(np.linalg.inv(np.dot(W.T, W)), W.T), y)
  
  result = np.dot(V[:, :comps], gamma[:comps])
  
  return np.ravel(result)
scale_x_df = (overlap_x_df - np.average(overlap_x_df, axis = 0)) \
  / np.std(overlap_x_df, axis = 0, ddof = 1)
pcr_coef(scale_x_df, overlap_y_df, comps)
array([ 6.36635362e-04,  5.40939127e-05, -1.36708081e-04,  6.40223689e-04])
pcr_coef(overlap_x_df, overlap_y_df, comps)
array([ 0.25306574,  0.0034336 , -0.00062675,  0.01777104])
pca = PCA(n_components = len(factors))
pca_x_df = pca.fit_transform(scale_x_df)

fit = LinearRegression(fit_intercept = False).fit(pca_x_df, overlap_y_df)

gamma = fit.coef_
np.dot(pca.components_.T[:, :comps], gamma.T[:comps]).ravel()
array([ 6.36635362e-04,  5.40939127e-05, -1.36708081e-04,  6.40223689e-04])

R-squared

def pcr_rsq(x, y, comps):
  
  coef = np.matrix(pcr_coef(x, y, comps))
  
  x = x - np.average(x, axis = 0)
  y = y - np.average(y, axis = 0)
  
  result = np.dot(np.dot(coef, np.dot(x.T, x)), coef.T) / np.dot(y.T, y)
  
  return result.item()
pcr_rsq(scale_x_df, overlap_y_df, comps)
0.4458171464775532
pcr_rsq(overlap_x_df, overlap_y_df, comps)
0.5256483726427376

Standard errors

\[ \begin{aligned} \text{Var}({\widehat{\boldsymbol{\beta}}}_{k})&=\sigma^{2}V_{k}(W_{k}^\mathrm{T}W_{k})^{-1}V_{k}^\mathrm{T}\\ &=\sigma^{2}V_{k}\text{diag}\left(\lambda_{1}^{-1},\ldots,\lambda_{k}^{-1}\right)V_{k}^\mathrm{T}\\ &=\sigma^{2}\sum_{j=1}^{k}{\frac{\mathbf{v}_{j}\mathbf{v}_{j}^\mathrm{T}}{\lambda_{j}}} \end{aligned} \]

# unable to verify the result
def pcr_se(x, y, comps):
  
  n_rows = x.shape[0]
  n_cols = x.shape[1]
  
  rsq = pcr_rsq(x, y, comps)
  
  y = y - np.average(y, axis = 0)
  
  df_resid = n_rows - n_cols - 1
  
  var_y = np.dot(y.T, y)   
  var_resid = (1 - rsq) * var_y / df_resid
  
  # uses statsmodels for illustrative purposes
  pca = sm.multivariate.PCA(x, standardize = False, demean = True)
  L = pca.eigenvals[:comps]
  V = pca.eigenvecs.iloc[:, :comps]
  
  result = np.sqrt(var_resid * np.dot(V, np.dot(np.diag(1 / L), V.T)).diagonal())
  
  return np.ravel(result)
pcr_se(scale_x_df, overlap_y_df, comps)
array([4.51637963e-05, 3.83749726e-06, 9.69826105e-06, 4.54183571e-05])
pcr_se(overlap_x_df, overlap_y_df, comps)
array([1.52963357e-02, 2.07541068e-04, 3.78832664e-05, 1.07415462e-03])

Partial least squares

Risk decomposition

Standalone risk

\[ \begin{aligned} \text{SAR}_{k}&=\sqrt{w_{k}^{2}\sigma_{k}^{2}}\\ \text{SAR}_{\varepsilon}&=\sqrt{(1-R^{2})\sigma_{y}^{2}} \end{aligned} \]

def cov_wt(x, weights, center):
  
  sum_w = sum(weights)
  sumsq_w = sum(np.power(weights, 2))
  
  if (center):
  
    x = x - np.average(x, axis = 0, weights = weights.reshape(-1))
  
  result = np.dot(x.T, np.multiply(weights, x)) / (sum_w - sumsq_w / sum_w)
  
  return result
def lm_sar(x, y, weights, intercept):
    
  coef = lm_coef(x, y, weights, intercept)
  rsq = lm_rsq(x, y, weights, intercept)
  
  if (intercept): x = sm.add_constant(x)
  
  # sigma = np.cov(np.concatenate((x, y), axis = 1).T,
  #                aweights = weights.reshape(-1))
  sigma = cov_wt(np.concatenate((x, y), axis = 1), weights, intercept)
  sar = np.multiply(np.power(coef, 2).T, sigma[:-1, :-1].diagonal())
  sar_eps = (1 - rsq) * sigma[-1, -1]
  
  result = np.sqrt(np.concatenate((np.matrix(sigma[-1, -1]),
                                   np.matrix(sar),
                                   np.matrix(sar_eps)), axis = 1))
  
  return np.ravel(result)
lm_sar(overlap_x_df, overlap_y_df, weights, intercept) * np.sqrt(scale["periods"] * scale["overlap"])
array([0.06565947, 0.        , 0.0343526 , 0.00637423, 0.02704017,
       0.01854822, 0.03396764])

Risk contribution

\[ \begin{aligned} \text{MCR}&=w^\mathrm{T}\frac{\partial\sigma_{y}}{\partial w}\\ &=w^\mathrm{T}\frac{\Sigma w}{\sigma_{y}}\\ \text{MCR}_{\varepsilon}&=\sigma_{y}-\sum_{k=1}^{n}\text{MCR}_{k} \end{aligned} \]

def lm_mcr(x, y, weights, intercept):
    
  coef = np.matrix(lm_coef(x, y, weights, intercept)).T
  rsq = lm_rsq(x, y, weights, intercept)
      
  if (intercept): x = sm.add_constant(x)
  
#     sigma = np.cov(np.concatenate((x, y), axis = 1).T,
#                    aweights = weights.reshape(-1))
  sigma = cov_wt(np.concatenate((x, y), axis = 1), weights, intercept)
  mcr = np.multiply(coef, np.dot(sigma[:-1, :-1], coef)) / np.sqrt(sigma[-1, -1])
  mcr_eps = np.sqrt(sigma[-1, -1]) - sum(mcr)
  
  result = np.concatenate((np.sqrt(np.matrix(sigma[-1, -1])),
                           np.matrix(mcr).T,
                           np.matrix(mcr_eps)), axis = 1)
  
  return np.ravel(result)
lm_mcr(overlap_x_df, overlap_y_df, weights, intercept) * np.sqrt(scale["periods"] * scale["overlap"])
array([0.06565947, 0.        , 0.0249204 , 0.00095031, 0.01000579,
       0.01221048, 0.01757249])

Scenario analysis

Implied shocks

\[ \begin{aligned} \hat{\beta}&=(Z^\mathrm{T}WZ)^{-1}Z^\mathrm{T}WX \end{aligned} \]

def implied_shocks(shocks, x, z, weights):

  beta = np.linalg.lstsq(np.multiply(weights, z), np.multiply(weights, x), rcond = None)[0]
                   
  result = np.dot(shocks, beta)
  
  return np.ravel(result)
shocks = np.array([-0.1, 0.1])
overlap_z_df = overlap_x_df.iloc[:, [0, 1]]
implied_shocks(shocks, overlap_x_df, overlap_z_df, weights)
array([-0.1       ,  0.1       , -0.002163  , -0.00723696])

Stress P&L

def pnl_stress(shocks, x, y, z, weights, intercept):
  
  coef = lm_coef(x, y, weights, intercept)
  
  if (intercept): x = sm.add_constant(x)
  
  result = np.multiply(coef.T, implied_shocks(shocks, x, z, weights))
  
  return np.ravel(result)
pnl_stress(shocks, overlap_x_df, overlap_y_df, overlap_z_df, weights, intercept)
array([-0.00118747, -0.01835521, -0.00988267, -0.00676462, -0.00823384])