Statistics

algorithms
r
Author
Published

April 6, 2025

Usage

library(roll)
library(microbenchmark)
options(microbenchmark.unit = "us")
n_vars <- 10
n_obs <- 1000
weights <- 0.9 ^ (n_obs:1)

x <- matrix(rnorm(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
y <- matrix(rnorm(n_obs), nrow = n_obs, ncol = 1)
x_lgl <- x < 0

Rolling any

result <- microbenchmark("125" = roll_any(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_any(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_any(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_any(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 129.0 181.35 214.252 192.25 218.70 1350.9   100
  250 167.1 178.00 200.785 192.75 214.60  432.0   100
  500 118.0 172.15 192.657 182.85 210.50  264.9   100
 1000  95.5 161.05 178.354 172.15 192.05  298.2   100

Rolling all

result <- microbenchmark("125" = roll_all(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_all(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_all(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_all(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 106.6 129.05 148.540 136.45 154.95 384.5   100
  250 104.8 127.85 152.501 144.05 170.55 265.4   100
  500 106.4 125.40 150.169 138.55 164.60 247.0   100
 1000  97.7 114.90 135.077 129.90 149.85 227.5   100

Rolling sums

\[ \begin{aligned} &\text{Expanding window} \\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_sum(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sum(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sum(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sum(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 134.4 147.35 175.617 163.10 179.40 303.3   100
  250 135.6 152.10 175.233 161.85 178.20 300.2   100
  500 131.5 142.75 165.794 158.95 169.50 293.5   100
 1000 125.4 141.85 166.895 159.00 169.05 388.9   100

Rolling products

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{prod}_{w}\leftarrow\text{prod}_{w}\times\text{w}_{new}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}/\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_prod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_prod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_prod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_prod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 347.9 385.75 423.830 414.20 432.80  753.0   100
  250 366.1 391.35 455.279 418.65 460.45 1932.3   100
  500 263.7 291.55 320.111 313.80 332.35  966.8   100
 1000 258.7 288.55 320.861 308.50 331.10 1087.4   100

Rolling means

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times \text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_mean(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_mean(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_mean(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_mean(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min    lq    mean median     uq   max neval
  125 133.3 149.9 178.344 170.70 181.15 420.6   100
  250 135.7 145.0 173.550 169.75 180.85 297.4   100
  500 135.1 147.9 173.448 166.40 186.75 295.0   100
 1000 132.0 142.4 167.098 162.00 179.30 265.6   100

Rolling minimums

result <- microbenchmark("125" = roll_min(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_min(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_min(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_min(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 150.6 166.85 211.422 190.40 239.55  515.0   100
  250 149.3 161.70 210.911 186.55 212.35 1266.6   100
  500 152.5 167.95 204.202 188.15 225.30  454.9   100
 1000 151.8 168.55 202.519 193.65 220.55  319.5   100

Rolling maximums

result <- microbenchmark("125" = roll_max(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_max(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_max(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_max(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 148.3 163.40 195.064 182.15 205.85 368.6   100
  250 149.2 159.75 200.023 176.30 220.35 342.3   100
  500 149.2 160.05 198.094 182.95 206.40 433.2   100
 1000 147.2 158.60 200.071 182.15 226.85 361.5   100

Rolling index of minimums

result <- microbenchmark("125" = roll_idxmin(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmin(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmin(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmin(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 137.5 162.35 182.197 174.45 190.95 381.2   100
  250 142.6 165.55 186.158 179.60 200.85 271.9   100
  500 139.1 161.00 176.800 173.05 182.00 264.7   100
 1000 141.0 162.50 185.109 178.05 195.95 466.2   100

Rolling index of maximums

result <- microbenchmark("125" = roll_idxmax(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmax(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmax(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmax(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 146.8 164.70 192.655 171.30 190.50 393.2   100
  250 144.1 158.20 182.576 168.75 184.80 436.7   100
  500 138.8 160.60 190.420 169.35 192.60 363.7   100
 1000 141.0 155.85 182.651 167.85 184.75 343.6   100

Rolling medians

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_median(x, width = 125, min_obs = 1),
                         "250" = roll_median(x, width = 250, min_obs = 1),
                         "500" = roll_median(x, width = 500, min_obs = 1),
                         "1000" = roll_median(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq    max neval
  125 1103.0 1246.75 1892.713 2002.90 2035.15 9965.7   100
  250 1111.1 1644.55 1813.722 1983.55 2002.15 2233.4   100
  500 1041.6 1810.35 1714.452 1841.60 1860.05 2301.7   100
 1000  782.2  902.55 1269.107 1410.30 1441.25 1822.8   100

Rolling quantiles

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_quantile(x, width = 125, min_obs = 1),
                         "250" = roll_quantile(x, width = 250, min_obs = 1),
                         "500" = roll_quantile(x, width = 500, min_obs = 1),
                         "1000" = roll_quantile(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq    max neval
  125 1077.1 1124.80 1519.669 1401.20 1996.75 4935.5   100
  250 1075.3 1117.95 1463.520 1392.15 1972.25 2134.1   100
  500  991.8 1052.00 1417.220 1305.55 1838.20 1948.4   100
 1000  769.3  828.65 1138.110 1018.80 1423.75 4404.5   100

Rolling variances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{x}_{old}-\text{mean}_{prev_x}) \end{aligned} \]

result <- microbenchmark("125" = roll_var(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_var(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_var(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_var(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 167.0 177.00 203.513 187.40 208.10 438.2   100
  250 165.1 175.20 195.914 184.45 211.45 299.8   100
  500 161.7 171.45 199.202 184.15 210.80 341.1   100
 1000 154.1 162.10 186.859 175.85 200.70 303.5   100

Rolling standard deviations

result <- microbenchmark("125" = roll_sd(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sd(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sd(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sd(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median    uq   max neval
  125 163.9 179.10 201.304  185.0 202.9 492.1   100
  250 164.7 176.10 199.809  183.9 205.6 317.4   100
  500 158.9 170.15 187.077  175.2 189.3 286.3   100
 1000 150.4 163.85 181.687  169.7 184.7 284.9   100

Rolling scaling and centering

result <- microbenchmark("125" = roll_scale(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_scale(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_scale(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_scale(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq   max neval
  125 187.1 199.90 226.142 210.20 246.45 331.9   100
  250 187.8 196.20 225.345 207.55 250.65 456.2   100
  500 180.3 194.65 216.011 202.90 231.75 292.2   100
 1000 174.0 182.70 208.997 192.15 215.20 380.3   100

Rolling covariances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{y}_{old}-\text{mean}_{prev_y}) \end{aligned} \]

result <- microbenchmark("125" = roll_cov(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cov(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cov(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cov(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq     max neval
  125 1067.2 1230.20 1642.465 1356.25 1568.05 11902.8   100
  250  968.0 1178.25 1312.100 1240.25 1330.65  2461.4   100
  500  943.5 1144.75 1400.392 1187.75 1272.00  7728.5   100
 1000  796.5 1001.85 1214.537 1062.45 1118.60  7784.9   100

Rolling correlations

result <- microbenchmark("125" = roll_cor(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cor(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cor(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cor(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median     uq     max neval
  125 1173.4 1455.00 2253.283 1687.75 3020.4 11280.9   100
  250 1145.7 1351.75 2083.530 1548.25 3048.2  3616.3   100
  500 1169.3 1349.00 2129.608 1548.80 2874.2 11464.6   100
 1000  876.1 1174.45 1856.686 1465.45 2447.0  6702.3   100

Rolling crossproducts

result <- microbenchmark("125" = roll_crossprod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_crossprod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_crossprod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_crossprod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr   min     lq    mean median     uq    max neval
  125 785.3 853.25 918.568 913.75 976.15 1193.7   100
  250 751.3 827.20 897.928 883.05 961.60 1179.6   100
  500 699.6 787.45 953.820 847.50 932.75 5066.9   100
 1000 687.6 733.00 920.345 771.80 859.95 5073.7   100

Rolling linear models

\[ \begin{aligned} &\text{coef}=\text{cov}_{xx}^{-1}\times\text{cov}_{xy}\\ &\text{intercept}=\text{mean}_{y}-\text{coef}\times\text{mean}_{x}\\ &\text{rsq}=\frac{\text{coef}^{T}\times\text{cov}_{xx}\times\text{coef}}{\text{var}_{y}}\\ &\text{var}_{resid}=\frac{(1-\text{rsq})(\text{var}_{y})(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})}{\text{n}_{rows}-\text{n}_{cols}}\\ &\text{xx}=\text{cov}_{xx}\times(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})\\ &\text{se}_{coef}=\sqrt{\text{var}_{resid}\times\text{diag}(\text{xx}^{-1})}\\ &\text{se}_{intercept}=\sqrt{\text{var}_{resid}\left(1/\text{sum}_{w}+\text{mean}_{x}^{T}\text{xx}^{-1}\text{mean}_{x}\right)} \end{aligned} \]

result <- microbenchmark("125" = roll_lm(x, y, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_lm(x, y, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_lm(x, y, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_lm(x, y, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr    min      lq     mean  median      uq     max neval
  125 3017.9 4526.45 5105.695 4817.10 5478.80 12264.4   100
  250 3023.7 4459.70 5039.013 4690.75 5408.05 10310.0   100
  500 3109.0 4416.25 4803.363 4585.90 4900.70  7667.2   100
 1000 2933.2 4290.65 4725.408 4476.65 4928.90 10803.2   100

References