--- title: "Examples of using runstats package " author: "Marta Karas" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteEngine{knitr::rmarkdown} %\VignetteIndexEntry{Examples of using runstats package} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", # fig.path = "man/figures/README-", out.width = "100%" ) ``` # runstats Package `runstats` provides methods for fast computation of running sample statistics for time series. The methods utilize Convolution Theorem to compute convolutions via Fast Fourier Transform (FFT). Implemented running statistics include: 1. mean, 2. standard deviation, 3. variance, 4. covariance, 5. correlation, 6. euclidean distance. ### Installation ```{r, eval = FALSE} # devtools::install_github("martakarass/runstats") install.packages("runstats") ``` ### Usage ```{r, eval = FALSE} library(runstats) ## Example: running correlation x0 <- sin(seq(0, 2 * pi * 5, length.out = 1000)) x <- x0 + rnorm(1000, sd = 0.1) pattern <- x0[1:100] out1 <- RunningCor(x, pattern) out2 <- RunningCor(x, pattern, circular = TRUE) ## Example: running mean x <- cumsum(rnorm(1000)) out1 <- RunningMean(x, W = 100) out2 <- RunningMean(x, W = 100, circular = TRUE) ``` ### Running statistics To better explain the details of running statistics, package's function `runstats.demo(func.name)` allows to visualize how the output of each running statistics method is generated. To run the demo, use `func.name` being one of the methods' names: 1. `"RunningMean"`, 2. `"RunningSd"`, 3. `"RunningVar"`, 4. `"RunningCov"`, 5. `"RunningCor"`, 6. `"RunningL2Norm"`. ```{r, eval = FALSE} ## Example: demo for running correlation method runstats.demo("RunningCor") ``` ![](https://i.imgur.com/JQObAu7.gif) ```{r, eval = FALSE} ## Example: demo for running mean method runstats.demo("RunningMean") ``` ![](https://i.imgur.com/FWah3aG.gif) ### Performance We use `rbenchmark` to measure elapsed time of `RunningCov` execution, for different lengths of time-series `x` and fixed length of the shorter pattern `y`. ```{r, echo=FALSE, include=FALSE} library(rbenchmark) library(ggplot2) ``` ```{r, eval = FALSE} library(rbenchmark) set.seed (20181010) x.N.seq <- 10^(3:7) x.list <- lapply(x.N.seq, function(N) runif(N)) y <- runif(100) ## Benchmark execution time of RunningCov out.df <- data.frame() for (x.tmp in x.list){ out.df.tmp <- benchmark("runstats" = runstats::RunningCov(x.tmp, y), replications = 10, columns = c("test", "replications", "elapsed", "relative", "user.self", "sys.self")) out.df.tmp$x_length <- length(x.tmp) out.df.tmp$pattern_length <- length(y) out.df <- rbind(out.df, out.df.tmp) } ``` ```{r, echo = FALSE, eval = FALSE} out.df.path <- "../inst/benchmark_results/2019-11-14-performance_rbenchmark_suite1.csv" write.csv(out.df, out.df.path, row.names = FALSE, quote = FALSE) ``` ```{r, echo = FALSE, eval = TRUE} out.df.path <- "../inst/benchmark_results/2019-11-14-performance_rbenchmark_suite1.csv" out.df <- read.csv(out.df.path) options("scipen" = 10, "digits" = 4) ``` ```{r} knitr::kable(out.df) ``` ##### Compare with a conventional method To compare `RunStats` performance with "conventional" loop-based way of computing running covariance in `R`, we use `rbenchmark` package to measure elapsed time of `RunStats::RunningCov` and running covariance implemented with `sapply` loop, for different lengths of time-series `x` and fixed length of the shorter time-series `y`. ```{r, eval = FALSE} ## Conventional approach RunningCov.sapply <- function(x, y){ l_x <- length(x) l_y <- length(y) sapply(1:(l_x - l_y + 1), function(i){ cov(x[i:(i+l_y-1)], y) }) } set.seed (20181010) out.df2 <- data.frame() for (x.tmp in x.list[c(1,2,3,4)]){ out.df.tmp <- benchmark("conventional" = RunningCov.sapply(x.tmp, y), "runstats" = runstats::RunningCov(x.tmp, y), replications = 10, columns = c("test", "replications", "elapsed", "relative", "user.self", "sys.self")) out.df.tmp$x_length <- length(x.tmp) out.df2 <- rbind(out.df2, out.df.tmp) } ``` ```{r, echo = FALSE, eval = FALSE} out.df.path <-"../inst/benchmark_results/2019-11-14-performance_rbenchmark_suite2.csv" write.csv(out.df2, out.df.path, row.names = FALSE, quote = FALSE) ``` ```{r, echo = FALSE, eval = TRUE} out.df.path <- "../inst/benchmark_results/2019-11-14-performance_rbenchmark_suite2.csv" out.df2 <- read.csv(out.df.path) ``` Benchmark results ```{r, fig.width=10, fig.height=5} library(ggplot2) plt1 <- ggplot(out.df2, aes(x = x_length, y = elapsed, color = test)) + geom_line() + geom_point(size = 3) + scale_x_log10() + theme_minimal(base_size = 14) + labs(x = "Vector length of x", y = "Elapsed [s]", color = "Method", title = "Running covariance rbenchmark") + theme(legend.position = "bottom") plt2 <- plt1 + scale_y_log10() + labs(y = "Log of elapsed [s]") cowplot::plot_grid(plt1, plt2, nrow = 1, labels = c('A', 'B')) ``` Platform information ```{r} sessioninfo::platform_info() ```