Statistics

algorithms
r
Author
Published

September 3, 2024

Usage

library(roll)
library(microbenchmark)
options(microbenchmark.unit = "us")
n_vars <- 10
n_obs <- 1000
weights <- 0.9 ^ (n_obs:1)

x <- matrix(rnorm(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
y <- matrix(rnorm(n_obs), nrow = n_obs, ncol = 1)
x_lgl <- x < 0

Rolling any

result <- microbenchmark("125" = roll_any(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_any(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_any(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_any(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq      max neval
  125 155.401 174.1510 193.1620 191.8515 205.4005  279.501   100
  250 153.601 170.4510 190.5509 184.9010 202.1005  311.801   100
  500 150.301 171.8510 195.1450 186.6005 209.4505  339.901   100
 1000 150.800 167.3505 202.2100 186.2505 203.7020 1364.301   100

Rolling all

result <- microbenchmark("125" = roll_all(x_lgl, width = 125, min_obs = 1),
                         "250" = roll_all(x_lgl, width = 250, min_obs = 1),
                         "500" = roll_all(x_lgl, width = 500, min_obs = 1),
                         "1000" = roll_all(x_lgl, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq     max neval
  125 100.600 127.4505 169.1040 165.8505 192.7010 376.701   100
  250 100.501 122.4010 161.8061 162.9010 181.4015 273.901   100
  500 100.600 119.5510 167.5020 162.2510 194.6010 302.201   100
 1000  97.402 117.2510 164.2310 161.4015 193.9520 298.501   100

Rolling sums

\[ \begin{aligned} &\text{Expanding window} \\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_sum(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sum(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sum(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sum(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq     max neval
  125 114.501 157.3510 191.7750 180.5015 210.7010 467.002   100
  250 102.202 159.1005 185.4440 187.8005 202.1505 286.601   100
  500 105.001 151.4010 176.9360 172.1010 198.4015 283.700   100
 1000 106.201 159.2015 179.7351 177.5505 194.9505 300.701   100

Rolling products

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{prod}_{w}\leftarrow\text{prod}_{w}\times\text{w}_{new}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{prod}_{x}\leftarrow\text{prod}_{x}\times\text{x}_{new}/\text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_prod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_prod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_prod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_prod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq     max neval
  125 250.001 277.9005 318.8670 307.3515 348.5010 568.001   100
  250 246.801 276.6015 325.3111 302.7510 351.7510 711.501   100
  500 178.101 200.5015 232.0280 224.5510 256.8515 419.800   100
 1000 162.101 195.7020 238.0340 221.9515 258.4005 585.301   100

Rolling means

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}\\ &\text{Rolling window}\\ &\bullet\text{sum}_{x}\leftarrow\lambda\times\text{sum}_{x}+\text{w}_{new}\times\text{x}_{new}-\lambda\times\text{w}_{old}\times \text{x}_{old} \end{aligned} \]

result <- microbenchmark("125" = roll_mean(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_mean(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_mean(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_mean(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq      max neval
  125 119.201 161.3015 193.6870 174.8510 195.6505 1588.001   100
  250 112.501 163.2505 190.9079 176.6010 202.6510  880.002   100
  500 120.201 158.6010 180.5450 173.9510 197.3510  359.001   100
 1000 107.501 157.4505 180.6620 177.3515 195.3005  344.401   100

Rolling minimums

result <- microbenchmark("125" = roll_min(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_min(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_min(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_min(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq     max neval
  125 117.300 163.7505 184.9280 174.9515 197.6510 310.901   100
  250 116.202 167.1515 196.0671 185.5015 218.6510 335.800   100
  500 119.301 167.2510 194.2339 178.1505 198.8015 499.501   100
 1000 121.701 171.1015 195.1470 182.7010 202.8010 377.801   100

Rolling maximums

result <- microbenchmark("125" = roll_max(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_max(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_max(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_max(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq    mean   median      uq     max neval
  125 111.701 146.5005 172.609 164.3010 187.751 310.202   100
  250 114.400 150.3005 172.499 165.9005 184.351 437.100   100
  500 111.602 132.9010 161.899 163.1515 180.701 306.901   100
 1000 107.201 135.2010 174.435 164.9010 196.351 319.001   100

Rolling index of minimums

result <- microbenchmark("125" = roll_idxmin(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmin(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmin(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmin(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean  median       uq     max neval
  125 115.400 160.5015 184.9019 178.701 205.6510 336.401   100
  250 116.501 166.8005 194.3909 179.301 201.1505 646.801   100
  500 113.801 165.0010 188.3600 180.401 201.8505 452.201   100
 1000 109.601 163.4010 187.0079 177.501 198.0010 412.501   100

Rolling index of maximums

result <- microbenchmark("125" = roll_idxmax(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_idxmax(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_idxmax(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_idxmax(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq     max neval
  125 115.301 146.5010 173.3580 168.4015 191.3005 307.402   100
  250 111.401 159.2005 182.0070 173.1010 194.6015 427.000   100
  500 107.001 155.6005 173.9911 170.4510 186.1515 311.602   100
 1000 112.501 154.7005 170.8700 168.4010 187.3010 265.602   100

Rolling medians

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_median(x, width = 125, min_obs = 1),
                         "250" = roll_median(x, width = 250, min_obs = 1),
                         "500" = roll_median(x, width = 500, min_obs = 1),
                         "1000" = roll_median(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr     min        lq     mean   median       uq      max neval
  125 854.102 1073.5510 1311.442 1150.900 1377.451 3425.801   100
  250 911.101 1076.6510 1300.980 1136.451 1256.801 5033.201   100
  500 779.502  976.5510 1149.344 1045.301 1131.801 3926.901   100
 1000 606.201  764.1015  993.120  810.301 1004.552 5328.200   100

Rolling quantiles

# "'online' is only supported for equal 'weights'"
result <- microbenchmark("125" = roll_quantile(x, width = 125, min_obs = 1),
                         "250" = roll_quantile(x, width = 250, min_obs = 1),
                         "500" = roll_quantile(x, width = 500, min_obs = 1),
                         "1000" = roll_quantile(x, width = 1000, min_obs = 1))
print(result)
Unit: microseconds
 expr     min        lq      mean   median       uq      max neval
  125 811.101 1026.3510 1060.9119 1069.051 1099.151 1369.401   100
  250 811.002 1017.5010 1066.3910 1055.901 1093.052 1555.701   100
  500 749.901  898.6005  973.4019  977.451 1008.251 1511.501   100
 1000 572.302  698.5510  745.4090  744.501  782.301 1108.901   100

Rolling variances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{x}\leftarrow\lambda\times\text{sumsq}_{x}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{x}_{new}-\text{mean}_{prev_x})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{x}_{old}-\text{mean}_{prev_x}) \end{aligned} \]

result <- microbenchmark("125" = roll_var(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_var(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_var(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_var(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq    mean   median       uq     max neval
  125 170.201 202.0510 233.549 227.5015 248.6005 423.400   100
  250 151.802 201.4010 226.829 220.1005 245.7020 381.301   100
  500 162.802 200.1515 223.476 217.6510 237.1510 331.201   100
 1000 147.801 189.2510 213.937 205.2010 226.7510 382.201   100

Rolling standard deviations

result <- microbenchmark("125" = roll_sd(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_sd(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_sd(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_sd(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq      max neval
  125 160.900 201.1505 224.0679 218.0015 236.5510  468.001   100
  250 156.001 190.8015 224.9599 214.6005 229.3505 1116.601   100
  500 150.601 187.8010 215.3350 206.0510 221.8010  588.801   100
 1000 143.601 180.5505 198.2171 194.1010 216.4015  290.000   100

Rolling scaling and centering

result <- microbenchmark("125" = roll_scale(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_scale(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_scale(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_scale(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean  median       uq     max neval
  125 176.602 210.5010 252.3620 228.700 290.3515 478.501   100
  250 169.202 215.0005 251.8601 239.601 280.3510 475.501   100
  500 165.201 209.7510 244.4901 229.351 267.5505 551.601   100
 1000 156.602 198.0510 231.0959 221.351 256.6510 351.201   100

Rolling covariances

\[ \begin{aligned} &\text{Expanding window}\\ &\bullet\text{sum}_{w}\leftarrow\text{sum}_{w}+\text{w}_{new}\\ &\bullet\text{sumsq}_{w}\leftarrow\text{sumsq}_{w}+\text{w}_{new}^{2}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})\\ &\text{Rolling window}\\ &\bullet\text{sumsq}_{xy}\leftarrow\lambda\times\text{sumsq}_{xy}+\text{w}_{new}\times (\text{x}_{new}-\text{mean}_{x})(\text{y}_{new}-\text{mean}_{prev_y})-\\ &\lambda\times\text{w}_{old}\times (\text{x}_{old}-\text{mean}_{x})(\text{y}_{old}-\text{mean}_{prev_y}) \end{aligned} \]

result <- microbenchmark("125" = roll_cov(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cov(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cov(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cov(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq     mean   median       uq       max neval
  125 998.701 1113.801 1377.078 1226.601 1356.052  8082.701   100
  250 961.501 1100.802 1209.891 1171.201 1277.351  2014.701   100
  500 894.501 1069.101 1312.217 1149.051 1239.251  7821.201   100
 1000 888.701 1018.201 1250.754 1090.302 1205.251 13341.800   100

Rolling correlations

result <- microbenchmark("125" = roll_cor(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_cor(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_cor(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_cor(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr      min       lq     mean   median       uq      max neval
  125 1048.800 1294.651 1522.949 1453.451 1553.451 6897.701   100
  250 1014.001 1270.002 1462.462 1391.451 1505.152 7390.501   100
  500  874.800 1179.051 1378.010 1293.951 1460.501 6539.200   100
 1000  951.600 1072.700 1326.362 1186.252 1281.551 7988.001   100

Rolling crossproducts

result <- microbenchmark("125" = roll_crossprod(x, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_crossprod(x, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_crossprod(x, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_crossprod(x, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr     min       lq      mean   median        uq      max neval
  125 718.501 873.5015  974.6990 938.4515 1028.1005 1836.601   100
  250 696.601 823.2510  994.4279 915.5015  987.9010 7366.400   100
  500 679.101 807.3510 1105.9610 888.7515  972.7515 6518.101   100
 1000 574.601 713.6010  873.5160 766.6505  887.7510 7200.601   100

Rolling linear models

\[ \begin{aligned} &\text{coef}=\text{cov}_{xx}^{-1}\times\text{cov}_{xy}\\ &\text{intercept}=\text{mean}_{y}-\text{coef}\times\text{mean}_{x}\\ &\text{rsq}=\frac{\text{coef}^{T}\times\text{cov}_{xx}\times\text{coef}}{\text{var}_{y}}\\ &\text{var}_{resid}=\frac{(1-\text{rsq})(\text{var}_{y})(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})}{\text{n}_{rows}-\text{n}_{cols}}\\ &\text{xx}=\text{cov}_{xx}\times(\text{sum}_{w}-\text{sumsq}_{w}/\text{sum}_{w})\\ &\text{se}_{coef}=\sqrt{\text{var}_{resid}\times\text{diag}(\text{xx}^{-1})}\\ &\text{se}_{intercept}=\sqrt{\text{var}_{resid}\left(1/\text{sum}_{w}+\text{mean}_{x}^{T}\text{xx}^{-1}\text{mean}_{x}\right)} \end{aligned} \]

result <- microbenchmark("125" = roll_lm(x, y, width = 125, min_obs = 1, weights = weights),
                         "250" = roll_lm(x, y, width = 250, min_obs = 1, weights = weights),
                         "500" = roll_lm(x, y, width = 500, min_obs = 1, weights = weights),
                         "1000" = roll_lm(x, y, width = 1000, min_obs = 1, weights = weights))
print(result)
Unit: microseconds
 expr      min       lq     mean   median       uq     max neval
  125 2529.402 3426.251 5368.861 4212.900 7079.602 26056.1   100
  250 2319.401 3386.551 5159.040 4197.551 7001.951 12917.6   100
  500 2298.701 3299.950 4767.789 4101.251 6175.251 10306.1   100
 1000 2472.002 3242.301 4733.533 4084.001 6480.751 10329.1   100

References