Statistics

algorithms
r
Author
Published

September 3, 2024

Usage

library(roll)
library(microbenchmark)
options(microbenchmark.unit = "us")
n_vars <- 10
n_obs <- 1000
weights <- 0.9 ^ (n_obs:1)

x <- matrix(rnorm(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
y <- matrix(rnorm(n_obs), nrow = n_obs, ncol = 1)
x_lgl <- x < 0

Rolling any

result <- microbenchmark("125" = roll_any(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_any(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_any(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_any(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq    mean median    uq    max neval
  125 114.7 139.95 150.537 147.15 156.5  228.6   100
  250 116.9 139.75 152.074 145.95 156.6  303.7   100
  500  98.2 139.40 151.976 149.15 157.8  216.6   100
 1000 115.1 135.25 157.961 144.40 154.5 1057.0   100

Rolling all

result <- microbenchmark("125" = roll_all(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_all(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_all(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_all(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr  min     lq    mean median     uq   max neval
  125 99.4 135.50 150.286 146.10 158.00 267.2   100
  250 98.3 133.35 148.466 144.65 156.75 390.0   100
  500 95.0 130.90 145.896 147.10 160.00 226.3   100
 1000 90.1 133.65 146.455 144.75 158.95 202.1   100

Rolling sums

\[ \begin{aligned} &\text{Expanding window} \\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_sum(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sum(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sum(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sum(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median    uq   max neval
  125  97.5 112.75 122.683 118.80 127.7 300.1   100
  250  99.1 112.85 124.132 120.65 128.8 226.5   100
  500 102.6 111.90 123.554 118.70 128.4 207.3   100
 1000  98.2 111.95 122.033 117.90 127.0 271.4   100

Rolling products

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{prod}_{w}\leftarrow\text{prod}_{w}\times\text{w}_{new}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}/\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_prod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_prod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_prod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_prod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 218.5 227.90 251.102 247.35 260.80 345.2   100
  250 217.2 226.70 251.912 246.30 264.25 357.8   100
  500 150.8 164.00 182.066 170.85 185.65 333.8   100
 1000 152.0 161.35 182.836 172.45 191.55 435.4   100

Rolling means

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times \text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_mean(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_mean(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_mean(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_mean(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 101.0 122.85 142.274 137.60 155.40 296.5   100
  250 100.4 120.60 139.205 129.05 156.40 283.1   100
  500  97.0 117.65 133.151 126.20 148.85 257.9   100
 1000  91.6 115.40 134.825 124.55 149.20 275.8   100

Rolling minimums

result <- microbenchmark("125" = roll_min(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_min(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_min(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_min(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125  95.3 112.00 139.049 123.55 151.25 349.4   100
  250 103.3 115.80 132.759 124.10 135.50 266.1   100
  500 105.6 115.45 139.345 127.45 146.55 234.2   100
 1000 109.1 121.10 149.072 130.90 172.95 257.9   100

Rolling maximums

result <- microbenchmark("125" = roll_max(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_max(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_max(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_max(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 103.5 115.20 136.284 125.20 141.10 365.9   100
  250 105.7 118.30 135.880 127.45 141.15 260.9   100
  500 106.3 113.90 131.312 124.20 135.90 257.3   100
 1000 107.0 122.35 137.647 129.25 144.95 271.7   100

Rolling index of minimums

result <- microbenchmark("125" = roll_idxmin(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmin(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmin(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmin(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 102.1 117.70 131.486 122.10 129.70 311.7   100
  250 109.7 119.80 132.141 126.15 132.25 262.4   100
  500  97.9 121.55 135.462 125.95 139.55 255.6   100
 1000 107.1 121.30 139.781 127.45 141.65 266.1   100

Rolling index of maximums

result <- microbenchmark("125" = roll_idxmax(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmax(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmax(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmax(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 104.0 113.85 122.674 118.45 122.30 211.7   100
  250 101.1 112.45 120.736 116.70 124.60 223.6   100
  500 108.0 114.00 122.074 118.25 123.30 207.2   100
 1000 101.8 117.40 127.304 121.35 128.35 327.0   100

Rolling medians

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_median(x, width = 125, min_obs = 1),
                         "250" = roll_median(x, width = 250, min_obs = 1),
                         "500" = roll_median(x, width = 500, min_obs = 1),
                         "1000" = roll_median(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min      lq     mean  median      uq    max neval
  125 998.5 1269.15 2041.492 1821.25 2367.95 4588.0   100
  250 993.9 1720.35 2090.681 2146.85 2343.20 4581.6   100
  500 911.0 1165.85 1864.731 1997.30 2161.90 4035.5   100
 1000 695.7  915.70 1372.392 1268.20 1598.30 3104.6   100

Rolling quantiles

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_quantile(x, width = 125, min_obs = 1),
                         "250" = roll_quantile(x, width = 250, min_obs = 1),
                         "500" = roll_quantile(x, width = 500, min_obs = 1),
                         "1000" = roll_quantile(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq    max neval
  125 1029.4 1304.80 2219.243 1822.75 2960.55 6729.8   100
  250 1048.9 1292.00 2071.168 1560.40 2925.35 4560.8   100
  500  912.3 1159.40 2048.737 1998.65 2722.30 4059.9   100
 1000  689.1  882.15 1503.607 1225.05 2062.85 4249.5   100

Rolling variances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{x}_{old}-\text{mean}_{prev_x}) \end{aligned} \]

result <- microbenchmark("125" = roll_var(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_var(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_var(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_var(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 151.2 166.15 187.485 177.10 188.00 381.4   100
  250 145.4 161.75 181.664 171.45 180.45 419.0   100
  500 134.1 162.30 178.867 169.45 179.65 434.3   100
 1000 134.9 151.05 170.475 160.80 174.25 340.6   100

Rolling standard deviations

result <- microbenchmark("125" = roll_sd(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sd(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sd(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sd(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 129.6 151.10 165.905 157.80 172.35 254.0   100
  250 133.3 149.40 165.677 156.15 169.00 303.9   100
  500 127.7 145.60 160.113 152.55 165.80 248.2   100
 1000 123.1 137.85 157.319 147.55 160.55 412.4   100

Rolling scaling and centering

result <- microbenchmark("125" = roll_scale(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_scale(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_scale(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_scale(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 152.6 160.40 183.221 171.05 191.10 412.4   100
  250 147.2 157.95 179.455 168.65 178.10 412.5   100
  500 145.0 156.10 169.814 167.55 176.75 231.3   100
 1000 140.9 149.90 168.838 161.30 174.05 321.9   100

Rolling covariances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{y}_{old}-\text{mean}_{prev_y}) \end{aligned} \]

result <- microbenchmark("125" = roll_cov(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cov(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cov(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cov(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq    max neval
  125 1026.1 1195.75 1352.020 1267.50 1352.35 6577.5   100
  250 1020.7 1190.05 1400.913 1245.65 1452.45 6350.3   100
  500  901.6 1104.55 1237.830 1176.90 1255.95 6050.7   100
 1000  704.7  955.95 1100.995 1005.70 1160.55 5898.7   100

Rolling correlations

result <- microbenchmark("125" = roll_cor(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cor(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cor(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cor(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq     max neval
  125 1170.3 1367.30 1664.142 1422.40 1503.65 13440.0   100
  250 1065.8 1303.30 1527.172 1367.95 1474.65 15934.2   100
  500  964.5 1208.05 1460.821 1303.35 1381.95  7325.5   100
 1000  840.7 1073.25 1145.763 1157.05 1207.10  1473.4   100

Rolling crossproducts

result <- microbenchmark("125" = roll_crossprod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_crossprod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_crossprod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_crossprod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq     mean median      uq    max neval
  125 702.6 891.10  974.453 962.60 1033.95 1373.0   100
  250 658.8 904.60 1080.126 960.60 1058.80 7107.9   100
  500 639.8 847.25 1010.647 887.85  965.40 6499.5   100
 1000 590.7 775.60  886.246 815.55  908.45 5817.0   100

Rolling linear models

\[ \begin{aligned} &\text{coef}=\text{cov}_{xx}^{-1}\times\text{cov}_{xy}\\ &\text{intercept}=\text{mean}_{y}-\text{coef}\times\text{mean}_{x}\\ &\text{rsq}=\frac{\text{coef}^{T}\times\text{cov}_{xx}\times\text{coef}}{\text{var}_{y}}\\ &\text{var}_{resid}=\frac{(1-\text{rsq})(\text{var}_{y})(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})}{\text{n}_{rows}-\text{n}_{cols}}\\ &\text{xx}=\text{cov}_{xx}\times(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})\\ &\text{se}_{coef}=\sqrt{\text{var}_{resid}\times\text{diag}(\text{xx}^{-1})}\\ &\text{se}_{intercept}=\sqrt{\text{var}_{resid}\left(1/\text{sum}_{w}+\text{mean}_{x}^{T}\text{xx}^{-1}\text{mean}_{x}\right)} \end{aligned} \]

result <- microbenchmark("125" = roll_lm(x, y, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_lm(x, y, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_lm(x, y, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_lm(x, y, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq     max neval
  125 2439.4 2776.00 5625.571 6399.90 6876.55 21705.2   100
  250 2410.3 2809.45 5735.484 6282.25 6780.80 22622.2   100
  500 2450.6 2655.45 5081.586 5970.20 6337.00 10713.7   100
 1000 2270.4 2445.50 4607.914 5641.30 6039.80  8600.6   100

References