Title: | 'Memoisation' of Functions |
---|---|
Description: | Cache the results of a function so that when you call it again with the same arguments it returns the previously computed value. |
Authors: | Hadley Wickham [aut], Jim Hester [aut], Winston Chang [aut, cre], Kirill Müller [aut], Daniel Cook [aut], Mark Edmondson [ctb] |
Maintainer: | Winston Chang <[email protected]> |
License: | MIT + file LICENSE |
Version: | 2.0.1.9000 |
Built: | 2024-11-11 06:00:41 UTC |
Source: | https://github.com/r-lib/memoise |
Use a cache on the local filesystem that will persist between R sessions.
cache_filesystem(path, algo = "xxhash64", compress = FALSE)
cache_filesystem(path, algo = "xxhash64", compress = FALSE)
path |
Directory in which to store cached items. |
algo |
The hashing algorithm used for the cache, see
|
compress |
Argument passed to |
## Not run: # Use with Dropbox db <- cache_filesystem("~/Dropbox/.rcache") mem_runif <- memoise(runif, cache = db) # Use with Google Drive gd <- cache_filesystem("~/Google Drive/.rcache") mem_runif <- memoise(runif, cache = gd) ## End(Not run)
## Not run: # Use with Dropbox db <- cache_filesystem("~/Dropbox/.rcache") mem_runif <- memoise(runif, cache = db) # Use with Google Drive gd <- cache_filesystem("~/Google Drive/.rcache") mem_runif <- memoise(runif, cache = gd) ## End(Not run)
Google Cloud Storage Cache Google Cloud Storage backed cache, for remote caching.
cache_gcs( cache_name = googleCloudStorageR::gcs_get_global_bucket(), algo = "sha512", compress = FALSE )
cache_gcs( cache_name = googleCloudStorageR::gcs_get_global_bucket(), algo = "sha512", compress = FALSE )
cache_name |
Bucket name for storing cache files. |
algo |
The hashing algorithm used for the cache, see
|
compress |
Argument passed to |
## Not run: library(googleCloudStorageR) # Set GCS credentials. Sys.setenv("GCS_AUTH_FILE"="<google-service-json>", "GCS_DEFAULT_BUCKET"="unique-bucket-name") gcs <- cache_gcs("unique-bucket-name") mem_runif <- memoise(runif, cache = gcs) ## End(Not run)
## Not run: library(googleCloudStorageR) # Set GCS credentials. Sys.setenv("GCS_AUTH_FILE"="<google-service-json>", "GCS_DEFAULT_BUCKET"="unique-bucket-name") gcs <- cache_gcs("unique-bucket-name") mem_runif <- memoise(runif, cache = gcs) ## End(Not run)
A cache in memory, that lasts only in the current R session.
cache_memory(algo = "sha512")
cache_memory(algo = "sha512")
algo |
The hashing algorithm used for the cache, see
|
Amazon Web Services S3 Cache Amazon Web Services S3 backed cache, for remote caching.
cache_s3(cache_name, algo = "sha512", compress = FALSE)
cache_s3(cache_name, algo = "sha512", compress = FALSE)
cache_name |
Bucket name for storing cache files. |
algo |
The hashing algorithm used for the cache, see
|
compress |
Argument passed to |
## Not run: # Set AWS credentials. Sys.setenv("AWS_ACCESS_KEY_ID" = "<access key>", "AWS_SECRET_ACCESS_KEY" = "<access secret>") # Set up a unique bucket name. s3 <- cache_s3("unique-bucket-name") mem_runif <- memoise(runif, cache = s3) ## End(Not run)
## Not run: # Set AWS credentials. Sys.setenv("AWS_ACCESS_KEY_ID" = "<access key>", "AWS_SECRET_ACCESS_KEY" = "<access secret>") # Set up a unique bucket name. s3 <- cache_s3("unique-bucket-name") mem_runif <- memoise(runif, cache = s3) ## End(Not run)
Drops the cache of a memoised function for particular arguments.
drop_cache(f)
drop_cache(f)
f |
Memoised function. |
A function, with the same arguments as f
, that can be called to drop
the cached results of f
.
mem_sum <- memoise(sum) mem_sum(1, 2, 3) mem_sum(2, 3, 4) has_cache(mem_sum)(1, 2, 3) # TRUE has_cache(mem_sum)(2, 3, 4) # TRUE drop_cache(mem_sum)(1, 2, 3) # TRUE has_cache(mem_sum)(1, 2, 3) # FALSE has_cache(mem_sum)(2, 3, 4) # TRUE
mem_sum <- memoise(sum) mem_sum(1, 2, 3) mem_sum(2, 3, 4) has_cache(mem_sum)(1, 2, 3) # TRUE has_cache(mem_sum)(2, 3, 4) # TRUE drop_cache(mem_sum)(1, 2, 3) # TRUE has_cache(mem_sum)(1, 2, 3) # FALSE has_cache(mem_sum)(2, 3, 4) # TRUE
drop_cache
to
reset the cache only for particular arguments.Forget past results.
Resets the cache of a memoised function. Use drop_cache
to
reset the cache only for particular arguments.
forget(f)
forget(f)
f |
memoised function |
memoise
, is.memoised
, drop_cache
memX <- memoise(function() { Sys.sleep(1); runif(1) }) # The forget() function system.time(print(memX())) system.time(print(memX())) forget(memX) system.time(print(memX()))
memX <- memoise(function() { Sys.sleep(1); runif(1) }) # The forget() function system.time(print(memX())) system.time(print(memX())) forget(memX) system.time(print(memX()))
Test whether a memoised function has been cached for particular arguments.
has_cache(f)
has_cache(f)
f |
Function to test. |
A function, with the same arguments as f
, that can be called to test
if f
has cached results.
is.memoised
, memoise
, drop_cache
mem_sum <- memoise(sum) has_cache(mem_sum)(1, 2, 3) # FALSE mem_sum(1, 2, 3) has_cache(mem_sum)(1, 2, 3) # TRUE
mem_sum <- memoise(sum) has_cache(mem_sum)(1, 2, 3) # FALSE mem_sum(1, 2, 3) has_cache(mem_sum)(1, 2, 3) # TRUE
memoised = TRUE
, which is what is.memoised()
tests for.Test whether a function is a memoised copy.
Memoised copies of functions carry an attribute
memoised = TRUE
, which is what is.memoised()
tests for.
is.memoised(f)
is.memoised(f)
f |
Function to test. |
mem_lm <- memoise(lm) is.memoised(lm) # FALSE is.memoised(mem_lm) # TRUE
mem_lm <- memoise(lm) is.memoised(lm) # FALSE is.memoised(mem_lm) # TRUE
mf <- memoise(f)
creates mf
, a memoised copy of
f
. A memoised copy is basically a
lazier version of the same function: it saves the answers of
new invocations, and re-uses the answers of old ones. Under the right
circumstances, this can provide a very nice speedup indeed.
memoise( f, ..., envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), hash = function(x) rlang::hash(x) )
memoise( f, ..., envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), hash = function(x) rlang::hash(x) )
f |
Function of which to create a memoised copy. |
... |
optional variables to use as additional restrictions on caching, specified as one-sided formulas (no LHS). See Examples for usage. |
envir |
Environment of the returned function. |
cache |
Cache object. The default is a [cachem::cache_mem()] with a max size of 1024 MB. |
omit_args |
Names of arguments to ignore when calculating hash. |
hash |
A function which takes an R object as input and returns a string which is used as a cache key. |
There are two main ways to use the memoise
function. Say that
you wish to memoise glm
, which is in the stats
package; then you could use mem_glm <- memoise(glm)
, or you could use glm <- memoise(stats::glm)
.
The first form has the advantage that you still have easy access to
both the memoised and the original function. The latter is especially
useful to bring the benefits of memoisation to an existing block
of R code.
Two example situations where memoise
could be of use:
You're evaluating a function repeatedly over the rows (or larger chunks) of a dataset, and expect to regularly get the same input.
You're debugging or developing something, which involves
a lot of re-running the code. If there are a few expensive calls
in there, memoising them can make life a lot more pleasant.
If the code is in a script file that you're source()
ing,
take care that you don't just put glm <- memoise(stats::glm)
at the top of your file: that would reinitialise the memoised
function every time the file was sourced. Wrap it in if (!is.memoised(glm))
, or do the memoisation call
once at the R prompt, or put it somewhere else where it won't get
repeated.
It is recommended that functions in a package are not memoised at build-time,
but when the package is loaded. The simplest way to do this is within
.onLoad()
with, for example
# file.R fun <- function() { some_expensive_process() } # zzz.R .onLoad <- function(libname, pkgname) { fun <<- memoise::memoise(fun) }
forget
, is.memoised
,
timeout
, https://en.wikipedia.org/wiki/Memoization,
drop_cache
# a() is evaluated anew each time. memA() is only re-evaluated # when you call it with a new set of parameters. a <- function(n) { runif(n) } memA <- memoise(a) replicate(5, a(2)) replicate(5, memA(2)) # Caching is done based on parameters' value, so same-name-but- # changed-value correctly produces two different outcomes... N <- 4; memA(N) N <- 5; memA(N) # ... and same-value-but-different-name correctly produces # the same cached outcome. N <- 4; memA(N) N2 <- 4; memA(N2) # memoise() knows about default parameters. b <- function(n, dummy="a") { runif(n) } memB <- memoise(b) memB(2) memB(2, dummy="a") # This works, because the interface of the memoised function is the same as # that of the original function. formals(b) formals(memB) # However, it doesn't know about parameter relevance. # Different call means different caching, no matter # that the outcome is the same. memB(2, dummy="b") # You can create multiple memoisations of the same function, # and they'll be independent. memA(2) memA2 <- memoise(a) memA(2) # Still the same outcome memA2(2) # Different cache, different outcome # Multiple memoized functions can share a cache. cm <- cachem::cache_mem(max_size = 50 * 1024^2) memA <- memoise(a, cache = cm) memB <- memoise(b, cache = cm) # Don't do the same memoisation assignment twice: a brand-new # memoised function also means a brand-new cache, and *that* # you could as easily and more legibly achieve using forget(). # (If you're not sure whether you already memoised something, # use is.memoised() to check.) memA(2) memA <- memoise(a) memA(2) # Make a memoized result automatically time out after 10 seconds. memA3 <- memoise(a, cache = cachem::cache_mem(max_age = 10)) memA3(2)
# a() is evaluated anew each time. memA() is only re-evaluated # when you call it with a new set of parameters. a <- function(n) { runif(n) } memA <- memoise(a) replicate(5, a(2)) replicate(5, memA(2)) # Caching is done based on parameters' value, so same-name-but- # changed-value correctly produces two different outcomes... N <- 4; memA(N) N <- 5; memA(N) # ... and same-value-but-different-name correctly produces # the same cached outcome. N <- 4; memA(N) N2 <- 4; memA(N2) # memoise() knows about default parameters. b <- function(n, dummy="a") { runif(n) } memB <- memoise(b) memB(2) memB(2, dummy="a") # This works, because the interface of the memoised function is the same as # that of the original function. formals(b) formals(memB) # However, it doesn't know about parameter relevance. # Different call means different caching, no matter # that the outcome is the same. memB(2, dummy="b") # You can create multiple memoisations of the same function, # and they'll be independent. memA(2) memA2 <- memoise(a) memA(2) # Still the same outcome memA2(2) # Different cache, different outcome # Multiple memoized functions can share a cache. cm <- cachem::cache_mem(max_size = 50 * 1024^2) memA <- memoise(a, cache = cm) memB <- memoise(b, cache = cm) # Don't do the same memoisation assignment twice: a brand-new # memoised function also means a brand-new cache, and *that* # you could as easily and more legibly achieve using forget(). # (If you're not sure whether you already memoised something, # use is.memoised() to check.) memA(2) memA <- memoise(a) memA(2) # Make a memoized result automatically time out after 10 seconds. memA3 <- memoise(a, cache = cachem::cache_mem(max_age = 10)) memA3(2)
This function will return a number corresponding to the system time and remain stable until a given number of seconds have elapsed, after which it will update to the current time. This makes it useful as a way to timeout and invalidate a memoised cache after a certain period of time.
timeout(seconds, current = as.numeric(Sys.time()))
timeout(seconds, current = as.numeric(Sys.time()))
seconds |
Number of seconds after which to timeout. |
current |
The current time as a numeric. |
A numeric that will remain constant until the seconds have elapsed.
a <- function(n) { runif(n) } memA <- memoise(a, ~timeout(10)) memA(2)
a <- function(n) { runif(n) } memA <- memoise(a, ~timeout(10)) memA(2)