Statistics

algorithms
r
Author
Published

February 6, 2025

Usage

library(roll)
library(microbenchmark)
options(microbenchmark.unit = "us")
n_vars <- 10
n_obs <- 1000
weights <- 0.9 ^ (n_obs:1)

x <- matrix(rnorm(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
y <- matrix(rnorm(n_obs), nrow = n_obs, ncol = 1)
x_lgl <- x < 0

Rolling any

result <- microbenchmark("125" = roll_any(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_any(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_any(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_any(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 109.3 142.20 169.486 155.25 174.45 1061.4   100
  250 106.7 142.40 157.148 152.10 167.35  213.0   100
  500 107.9 142.35 159.212 155.05 170.20  262.7   100
 1000 132.8 139.90 158.307 151.95 167.45  252.9   100

Rolling all

result <- microbenchmark("125" = roll_all(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_all(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_all(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_all(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr  min     lq    mean median     uq   max neval
  125 80.4  91.45 137.997 142.90 167.60 285.8   100
  250 80.6 102.30 143.115 147.65 170.70 240.3   100
  500 81.3  93.75 135.441 143.70 161.60 223.4   100
 1000 78.3  88.45 132.587 140.40 161.85 206.8   100

Rolling sums

\[ \begin{aligned} &\text{Expanding window} \\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_sum(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sum(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sum(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sum(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr  min     lq    mean median     uq   max neval
  125 87.9 139.25 170.833 169.75 201.20 267.5   100
  250 95.0 137.55 171.750 172.25 195.35 375.9   100
  500 90.4 132.65 164.327 164.80 191.50 293.1   100
 1000 90.0 134.15 166.771 170.95 196.75 245.1   100

Rolling products

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{prod}_{w}\leftarrow\text{prod}_{w}\times\text{w}_{new}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}/\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_prod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_prod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_prod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_prod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 213.4 252.90 275.295 265.70 292.55 390.0   100
  250 225.5 254.75 278.385 267.60 298.55 376.7   100
  500 163.9 186.65 205.888 201.85 219.30 286.3   100
 1000 149.2 181.00 203.196 191.90 218.55 430.2   100

Rolling means

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times \text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_mean(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_mean(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_mean(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_mean(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 117.2 149.25 170.305 163.55 176.35 461.9   100
  250 107.2 146.70 169.833 162.05 175.75 379.5   100
  500 106.2 152.35 167.391 163.85 184.30 218.1   100
 1000 112.7 149.00 164.917 161.55 173.90 260.7   100

Rolling minimums

result <- microbenchmark("125" = roll_min(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_min(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_min(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_min(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 112.3 147.05 171.000 159.20 180.60 362.0   100
  250 125.0 147.70 179.350 160.50 188.35 523.6   100
  500 115.1 147.40 170.712 163.50 180.75 323.2   100
 1000 124.7 151.10 173.534 162.75 186.85 292.4   100

Rolling maximums

result <- microbenchmark("125" = roll_max(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_max(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_max(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_max(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 106.1 146.60 171.327 162.10 191.15 261.9   100
  250 103.1 142.40 155.351 147.40 159.05 233.7   100
  500  98.5 143.05 165.113 152.55 187.75 401.0   100
 1000 109.3 142.95 164.423 148.00 192.65 296.4   100

Rolling index of minimums

result <- microbenchmark("125" = roll_idxmin(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmin(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmin(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmin(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr  min     lq    mean median     uq   max neval
  125 94.6 109.50 132.711 117.00 150.90 356.8   100
  250 94.2 107.85 130.772 114.80 149.20 241.7   100
  500 99.6 110.85 135.147 121.80 150.55 253.0   100
 1000 92.8 112.85 136.677 121.65 151.30 238.0   100

Rolling index of maximums

result <- microbenchmark("125" = roll_idxmax(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmax(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmax(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmax(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median    uq   max neval
  125 101.5 110.60 134.009 121.35 142.5 249.9   100
  250  98.2 110.05 140.704 118.40 159.4 356.6   100
  500  97.8 112.35 134.577 121.20 148.7 247.9   100
 1000 104.6 111.65 130.458 118.45 134.9 253.5   100

Rolling medians

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_median(x, width = 125, min_obs = 1),
                         "250" = roll_median(x, width = 250, min_obs = 1),
                         "500" = roll_median(x, width = 500, min_obs = 1),
                         "1000" = roll_median(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq     mean  median      uq    max neval
  125 769.8 808.30 1293.061  999.10 1500.50 3827.3   100
  250 770.3 853.55 1487.623 1353.60 1733.90 7022.2   100
  500 714.2 800.95 1219.497 1084.15 1339.35 3239.6   100
 1000 549.6 606.20  995.060  848.05 1220.15 2502.5   100

Rolling quantiles

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_quantile(x, width = 125, min_obs = 1),
                         "250" = roll_quantile(x, width = 250, min_obs = 1),
                         "500" = roll_quantile(x, width = 500, min_obs = 1),
                         "1000" = roll_quantile(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq     mean median      uq    max neval
  125 777.3 929.85 1731.837 2288.7 2346.15 2785.7   100
  250 774.4 924.55 1715.168 2271.1 2324.25 3421.5   100
  500 725.8 906.85 1796.412 2141.8 2205.85 3769.0   100
 1000 562.7 717.40 1346.449 1640.4 1681.05 2413.1   100

Rolling variances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{x}_{old}-\text{mean}_{prev_x}) \end{aligned} \]

result <- microbenchmark("125" = roll_var(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_var(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_var(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_var(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 120.0 157.75 171.517 164.80 176.10 280.8   100
  250 123.7 157.90 172.163 165.65 176.55 297.2   100
  500 118.6 147.95 169.459 158.15 174.90 531.2   100
 1000 110.4 144.20 159.941 151.60 168.05 286.6   100

Rolling standard deviations

result <- microbenchmark("125" = roll_sd(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sd(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sd(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sd(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 127.9 160.15 170.004 165.85 174.65 256.1   100
  250 134.1 157.35 169.746 164.25 174.00 268.6   100
  500 130.6 157.10 172.882 162.45 176.50 389.8   100
 1000 116.6 149.65 159.215 154.80 162.90 254.3   100

Rolling scaling and centering

result <- microbenchmark("125" = roll_scale(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_scale(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_scale(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_scale(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median    uq   max neval
  125 151.2 168.60 183.690 176.15 187.8 313.9   100
  250 143.0 167.70 184.715 177.20 187.8 320.0   100
  500 145.0 162.80 176.665 170.95 181.1 302.6   100
 1000 136.8 158.95 174.122 167.10 177.5 400.4   100

Rolling covariances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{y}_{old}-\text{mean}_{prev_y}) \end{aligned} \]

result <- microbenchmark("125" = roll_cov(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cov(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cov(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cov(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq     mean median      uq    max neval
  125 718.6 899.25 1033.401 971.10 1035.00 7392.7   100
  250 705.7 882.00 1105.836 955.40 1014.30 9270.8   100
  500 695.4 849.15  894.267 883.90  927.20 1167.2   100
 1000 615.7 743.25  912.085 776.75  821.65 7346.2   100

Rolling correlations

result <- microbenchmark("125" = roll_cor(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cor(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cor(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cor(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min      lq     mean  median      uq     max neval
  125 895.8 1717.90 2286.340 1973.90 2167.05 19938.1   100
  250 924.9 1219.95 1996.944 1884.70 2047.20 10822.8   100
  500 915.8 1631.45 1789.453 1784.40 1919.90  3615.8   100
 1000 773.2 1285.75 1679.886 1519.55 1621.85 10147.8   100

Rolling crossproducts

result <- microbenchmark("125" = roll_crossprod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_crossprod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_crossprod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_crossprod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 628.7 731.90 816.020 766.95 797.05 6009.4   100
  250 616.3 722.95 815.035 766.50 808.40 5969.4   100
  500 556.0 703.90 739.860 739.55 770.00  935.8   100
 1000 546.3 605.35 824.631 683.30 701.00 5892.1   100

Rolling linear models

\[ \begin{aligned} &\text{coef}=\text{cov}_{xx}^{-1}\times\text{cov}_{xy}\\ &\text{intercept}=\text{mean}_{y}-\text{coef}\times\text{mean}_{x}\\ &\text{rsq}=\frac{\text{coef}^{T}\times\text{cov}_{xx}\times\text{coef}}{\text{var}_{y}}\\ &\text{var}_{resid}=\frac{(1-\text{rsq})(\text{var}_{y})(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})}{\text{n}_{rows}-\text{n}_{cols}}\\ &\text{xx}=\text{cov}_{xx}\times(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})\\ &\text{se}_{coef}=\sqrt{\text{var}_{resid}\times\text{diag}(\text{xx}^{-1})}\\ &\text{se}_{intercept}=\sqrt{\text{var}_{resid}\left(1/\text{sum}_{w}+\text{mean}_{x}^{T}\text{xx}^{-1}\text{mean}_{x}\right)} \end{aligned} \]

result <- microbenchmark("125" = roll_lm(x, y, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_lm(x, y, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_lm(x, y, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_lm(x, y, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq     max neval
  125 2994.6 5997.95 6690.592 6427.15 6991.40 20588.8   100
  250 5508.5 6092.05 6747.117 6569.30 6989.70 20333.3   100
  500 5302.3 5934.55 6462.299 6199.15 6795.60  9760.8   100
 1000 2634.0 5680.15 6239.151 5976.65 6462.35 24719.7   100

References