breaking change: new API for cache, based on values (no more functors)

This commit is contained in:
Simon Cruanes 2014-11-17 02:52:25 +01:00
parent 2420df32f3
commit 05ba0e5bba
2 changed files with 319 additions and 391 deletions

View file

@ -25,356 +25,258 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Memoization caches} *) (** {1 Memoization caches} *)
module type EQ = sig type 'a equal = 'a -> 'a -> bool
type t type 'a hash = 'a -> int
val equal : t -> t -> bool
let default_eq_ = Pervasives.(=)
let default_hash_ = Hashtbl.hash
(** {2 Value interface} *)
type ('a,'b) t = {
set : 'a -> 'b -> unit;
get : 'a -> 'b; (* or raise Not_found *)
clear : unit -> unit;
}
let clear c = c.clear ()
let with_cache c f x =
try
c.get x
with Not_found ->
let y = f x in
c.set x y;
y
let with_cache_rec c f =
let rec f' x = with_cache c (f f') x in
f'
let dummy = {
set=(fun _ _ -> ());
get=(fun _ -> raise Not_found);
clear=(fun _ -> ());
}
module Linear = struct
type ('a,'b) bucket =
| Empty
| Pair of 'a * 'b
type ('a,'b) t = {
eq : 'a equal;
arr : ('a,'b) bucket array;
mutable i : int; (* index for next assertion, cycles through *)
}
let make eq size =
assert (size>0);
{arr=Array.make size Empty; eq; i=0; }
let clear c =
Array.fill c.arr 0 (Array.length c.arr) Empty;
c.i <- 0
(* linear lookup *)
let rec search_ c i x =
if i=Array.length c.arr then raise Not_found;
match c.arr.(i) with
| Pair (x', y) when c.eq x x' -> y
| Pair _
| Empty -> search_ c (i+1) x
let get c x = search_ c 0 x
let set c x y =
c.arr.(c.i) <- Pair (x,y);
c.i <- (c.i + 1) mod Array.length c.arr
end end
let linear ?(eq=default_eq_) size =
let size = max size 1 in
let arr = Linear.make eq size in
{ get=(fun x -> Linear.get arr x);
set=(fun x y -> Linear.set arr x y);
clear=(fun () -> Linear.clear arr);
}
module Replacing = struct
type ('a,'b) bucket =
| Empty
| Pair of 'a * 'b
type ('a,'b) t = {
eq : 'a equal;
hash : 'a hash;
arr : ('a,'b) bucket array;
}
let make eq hash size =
assert (size>0);
{arr=Array.make size Empty; eq; hash }
let clear c =
Array.fill c.arr 0 (Array.length c.arr) Empty
let get c x =
let i = c.hash x mod Array.length c.arr in
match c.arr.(i) with
| Pair (x', y) when c.eq x x' -> y
| Pair _
| Empty -> raise Not_found
let set c x y =
let i = c.hash x mod Array.length c.arr in
c.arr.(i) <- Pair (x,y)
end
let replacing ?(eq=default_eq_) ?(hash=default_hash_) size =
let c = Replacing.make eq hash size in
{ get=(fun x -> Replacing.get c x);
set=(fun x y -> Replacing.set c x y);
clear=(fun () -> Replacing.clear c);
}
module type HASH = sig module type HASH = sig
include EQ type t
val hash : t -> int val equal : t equal
val hash : t hash
end end
(** Signature of a cache for values *) module LRU(X:HASH) = struct
module type S = sig
type 'a t
type key
val create : int -> 'a t
(** Create a new cache of the given size. *)
val clear : 'a t -> unit
(** Clear content of the cache *)
val with_cache : 'a t -> (key -> 'a) -> key -> 'a
(** Wrap the function with the cache. This means that
[with_cache cache f x] always returns the same value as
[f x], if [f x] returns, or raise the same exception.
However, [f] may not be called if [x] is in the cache. *)
val with_cache_rec : 'a t -> ((key -> 'a) -> key -> 'a) -> key -> 'a
(** Partially apply the given function with a cached version of itself.
It returns the specialized function.
[with_cache_rec cache f] applies [f] to a cached version of [f],
called [f'], so that [f' x = f f' x]. *)
end
(** Signature of a cache for pairs of values *)
module type S2 = sig
type 'a t
type key1
type key2
val create : int -> 'a t
(** Create a new cache of the given size. *)
val clear : 'a t -> unit
(** Clear content of the cache *)
val with_cache : 'a t -> (key1 -> key2 -> 'a) -> key1 -> key2 -> 'a
(** Wrap the function with the cache *)
end
(** {2 Dummy cache (no caching) *)
module Dummy(X : sig type t end) = struct
type 'a t = unit
and key = X.t
let create size = ()
let clear () = ()
let with_cache () f x = f x
let with_cache_rec () f x =
let rec f' x = f f' x in
f' x
end
module Dummy2(X : sig type t end)(Y : sig type t end) = struct
type 'a t = unit
and key1 = X.t
and key2 = Y.t
let create size = ()
let clear () = ()
let with_cache () f x1 x2 = f x1 x2
end
(** {2 Small linear cache} *)
(** This cache stores (key,value) pairs in an array, that is traversed
linearily. It is therefore only reasonable for small sizes (like 5). *)
module Linear(X : EQ) = struct
type 'a t = 'a bucket array
and 'a bucket = Empty | Pair of key * 'a | PairRaise of key * exn
and key = X.t
let create size =
assert (size >= 1);
Array.make size Empty
let clear cache =
Array.fill cache 0 (Array.length cache) Empty
(** Insert the bucket [b] into the cache *)
let insert cache b =
let n = Array.length cache in
(* shift other values toward the end *)
Array.blit cache 0 cache 1 (n-1);
cache.(0) <- b
(** Try to find [f x] in the cache, otherwise compute it
and cache the result *)
let with_cache cache f x =
let n = Array.length cache in
let rec search i =
(* function that performs the lookup *)
if i = n then begin
(* cache miss *)
try
let y = f x in
insert cache (Pair (x, y));
y
with e ->
insert cache (PairRaise (x, e));
raise e
end else match cache.(i) with
| Pair (x',y) when X.equal x x' -> y
| PairRaise (x', e) when X.equal x x' -> raise e
| _ -> search (i+1)
in
search 0
let with_cache_rec cache f x =
(* make a recursive version of [f] that uses the cache *)
let rec f' x = with_cache cache (fun x -> f f' x) x in
f' x
end
module Linear2(X : EQ)(Y : EQ) = struct
type 'a t = 'a bucket array
and 'a bucket = Empty | Assoc of key1 * key2 * 'a | AssocRaise of key1 * key2 * exn
and key1 = X.t
and key2 = Y.t
let create size =
assert (size >= 1);
Array.make size Empty
let clear cache =
Array.fill cache 0 (Array.length cache) Empty
(** Insert the binding [b] into the cache *)
let insert cache b =
let n = Array.length cache in
(* shift other values toward the end *)
Array.blit cache 0 cache 1 (n-1);
cache.(0) <- b
(** Try to find [f x] in the cache, otherwise compute it
and cache the result *)
let with_cache cache f x1 x2 =
let n = Array.length cache in
let rec search i =
(* function that performs the lookup *)
if i = n then begin
(* cache miss *)
try
let y = f x1 x2 in
insert cache (Assoc (x1, x2, y));
y
with e ->
insert cache (AssocRaise (x1, x2, e));
raise e
end else match cache.(i) with
| Assoc (x1',x2',y) when X.equal x1 x1' && Y.equal x2 x2' -> y
| AssocRaise (x1',x2',e) when X.equal x1 x1' && Y.equal x2 x2' -> raise e
| _ -> search (i+1)
in
search 0
end
(** {2 An imperative cache of fixed size for memoization of pairs} *)
module Replacing(X : HASH) = struct
type key = X.t
(** A slot of the array contains a (key, value, true)
if key->value is stored there (at index hash(key) % length),
(null, null, false) otherwise.
The first slot in the array contains the function
used to produce the value upon a cache miss. *)
type 'a t = 'a bucket array
and 'a bucket = Empty | Assoc of key * 'a | AssocRaise of key * exn
let create size =
Array.make size Empty
let clear c =
Array.fill c 0 (Array.length c) Empty
(** Try to find [f x] in the cache, otherwise compute it
and cache the result *)
let with_cache c f x =
let i = (X.hash x) mod (Array.length c) in
match c.(i) with
| Assoc (x', y) when X.equal x x' ->
y (* cache hit *)
| AssocRaise (x', e) when X.equal x x' ->
raise e (* cache hit *)
| _ -> (* cache miss *)
try
let y = f x in
c.(i) <- Assoc (x, y);
y
with e ->
c.(i) <- AssocRaise (x, e);
raise e
let with_cache_rec cache f x =
(* make a recursive version of [f] that uses the cache *)
let rec f' x = with_cache cache (fun x -> f f' x) x in
f' x
end
module Replacing2(X : HASH)(Y : HASH) = struct
(** A slot of the array contains a (key, value, true)
if key->value is stored there (at index hash(key) % length),
(null, null, false) otherwise.
The first slot in the array contains the function
used to produce the value upon a cache miss. *)
type 'a t = 'a bucket array
and 'a bucket = Empty | Assoc of key1 * key2 * 'a | AssocRaise of key1 * key2 * exn
and key1 = X.t
and key2 = Y.t
let create size =
Array.make size Empty
let clear c =
Array.fill c 0 (Array.length c) Empty
let with_cache c f x1 x2 =
let i = (((X.hash x1 + 17) lxor Y.hash x2) mod Array.length c) in
match c.(i) with
| Assoc (x1', x2', y) when X.equal x1 x1' && Y.equal x2 x2' ->
y (* cache hit *)
| AssocRaise (x1', x2', e) when X.equal x1 x1' && Y.equal x2 x2' ->
raise e (* cache hit *)
| _ -> (* cache miss *)
try
let y = f x1 x2 in
c.(i) <- Assoc (x1, x2, y);
y
with e ->
c.(i) <- AssocRaise (x1, x2, e);
raise e
end
(** {2 Hashtables with Least Recently Used eviction policy *)
(* TODO: handle exceptions *)
module LRU(X : HASH) = struct
type key = X.t type key = X.t
module H = Hashtbl.Make(X) module H = Hashtbl.Make(X)
type 'a t = { type 'a t = {
table : 'a node H.t; (* hashtable key -> node *) table : 'a node H.t; (* hashtable key -> node *)
first : 'a node; (* dummy node for the entry of the list *) mutable first : 'a node option;
mutable len : int; (* number of entries *) mutable last : 'a node option;
size : int; (* max size *) size : int; (* max size *)
} }
and 'a node = { and 'a node = {
mutable key : key; mutable key : key;
mutable value : 'a; mutable value : 'a;
mutable next : 'a node; mutable next : 'a node option;
mutable prev : 'a node; } (** Meta data for the value, making a chained list *)
} (** Meta data for the value *)
let create size = let make size =
let rec first = assert (size > 0);
{ key = Obj.magic 0; value = Obj.magic 0; next=first; prev=first; }
in
{ table = H.create size; { table = H.create size;
len = 0;
size; size;
first; first=None;
last=None;
} }
(** Clear the content of the cache *)
let clear c = let clear c =
c.len <- 0;
H.clear c.table; H.clear c.table;
c.first.next <- c.first; c.first <- None;
c.first.prev <- c.first; c.last <- None;
() ()
(** Find an element, or raise Not_found *) let get c x = (H.find c.table x).value
let find c x =
let n = H.find c.table x in
assert (X.equal n.key x);
n.value
(** Replace least recently used element of [c] by x->y *) let get_opt = function
let replace c x y = | None -> assert false
let n = c.first.next in | Some x -> x
(* remove old element *)
(* reverse the list *)
let rec reverse_ prev = function
| None -> prev
| Some n as node ->
let next = n.next in
n.next <- prev;
reverse_ node next
(* take first from queue *)
let take_ c =
match c.first with
| Some n ->
c.first <- n.next;
n
| None ->
(* re-fill front list *)
match reverse_ None c.last with
| None -> assert false
| Some n ->
c.first <- n.next;
n
let push_ c n =
n.next <- c.last;
c.last <- Some n
(* Replace least recently used element of [c] by x->y *)
let replace_ c x y =
(* remove old *)
let n = take_ c in
H.remove c.table n.key; H.remove c.table n.key;
(* insertion in hashtable *) (* add x->y *)
H.add c.table x n; H.add c.table x n;
(* re-use the node for x,y *)
n.key <- x; n.key <- x;
n.value <- y; n.value <- y;
(* remove from front of queue *) (* push at back of queue *)
n.next.prev <- c.first; push_ c n;
c.first.next <- n.next;
(* insert at back of queue *)
let last = c.first.prev in
last.next <- n;
c.first.prev <- n;
n.next <- c.first;
n.prev <- last;
() ()
(** Insert x->y in the cache, increasing its entry count *) (* Insert x->y in the cache, increasing its entry count *)
let insert c x y = let insert_ c x y =
c.len <- c.len + 1;
let n = { let n = {
key = x; key = x;
value = y; value = y;
next = c.first; next = c.last;
prev = c.first.prev;
} in } in
(* insertion in hashtable *)
H.add c.table x n; H.add c.table x n;
(* insertion at back of queue *) c.last <- Some n;
c.first.prev.next <- n;
c.first.prev <- n;
() ()
(** Try to find [f x] in the cache, otherwise compute it let set c x y =
and cache the result *) let len = H.length c.table in
let with_cache c f x = assert (len <= c.size);
try if len = c.size
find c x then replace_ c x y
with Not_found -> else insert_ c x y
let y = f x in
(if c.len = c.size
then replace c x y
else insert c x y);
y
let with_cache_rec cache f x =
(* make a recursive version of [f] that uses the cache *)
let rec f' x = with_cache cache (fun x -> f f' x) x in
f' x
end end
let lru (type a) ?(eq=default_eq_) ?(hash=default_hash_) size =
let module L = LRU(struct
type t = a
let equal = eq
let hash = hash
end) in
let c = L.make size in
{ get=(fun x -> L.get c x);
set=(fun x y -> L.set c x y);
clear=(fun () -> L.clear c);
}
module UNBOUNDED(X:HASH) = struct
type key = X.t
module H = Hashtbl.Make(X)
type 'a t = 'a H.t
let make size =
assert (size > 0);
H.create size
let clear c = H.clear c
let get c x = H.find c x
let set c x y = H.replace c x y
end
let unbounded (type a) ?(eq=default_eq_) ?(hash=default_hash_) size =
let module C = UNBOUNDED(struct
type t = a
let equal = eq
let hash = hash
end) in
let c = C.make size in
{ get=(fun x -> C.get c x);
set=(fun x y -> C.set c x y);
clear=(fun () -> C.clear c);
}

View file

@ -25,83 +25,109 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Memoization caches} *) (** {1 Memoization caches} *)
(** {2 Signatures} *) type 'a equal = 'a -> 'a -> bool
type 'a hash = 'a -> int
module type EQ = sig (** {2 Value interface}
type t
val equal : t -> t -> bool
end
module type HASH = sig Typical use case: one wants to memoize a function [f : 'a -> 'b]. Code sample:
include EQ {[
val hash : t -> int let f x =
end print_endline "call f";
x + 1;;
(** Signature of a cache for values *) let f' = with_cache (lru 256) f;;
module type S = sig f' 0;; (* prints *)
type 'a t f' 1;; (* prints *)
type key f' 0;; (* doesn't print, returns cached value *)
]}
val create : int -> 'a t @since NEXT_RELEASE *)
(** Create a new cache of the given size. *)
val clear : 'a t -> unit type ('a, 'b) t
(** Clear content of the cache *)
val with_cache : 'a t -> (key -> 'a) -> key -> 'a val clear : (_,_) t -> unit
(** Wrap the function with the cache. This means that (** Clear the content of the cache *)
[with_cache cache f x] always returns the same value as
val with_cache : ('a, 'b) t -> ('a -> 'b) -> 'a -> 'b
(** [with_cache c f] behaves like [f], but caches calls to [f] in the
cache [c]. It always returns the same value as
[f x], if [f x] returns, or raise the same exception. [f x], if [f x] returns, or raise the same exception.
However, [f] may not be called if [x] is in the cache. *) However, [f] may not be called if [x] is in the cache. *)
val with_cache_rec : 'a t -> ((key -> 'a) -> key -> 'a) -> key -> 'a val with_cache_rec : ('a,'b) t -> (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b
(** Partially apply the given function with a cached version of itself. (** [with_cache_rec c f] is a function that first, applies [f] to
It returns the specialized function. some [f' = fix f], such that recursive calls to [f'] are cached in [c].
[with_cache_rec cache f] applies [f] to a cached version of [f], It is similar to {!with_cache} but with a function that takes as
called [f'], so that [f' x = f f' x]. *) first argument its own recursive version.
Examples (memoized Fibonacci function):
{[
let fib = with_cache_rec (lru 256)
(fun fib' n -> match n with
| 1 | 2 -> 1
| _ -> fib' (n-1) + fib' (n-2)
);;
fib 70;;
]}
*)
val dummy : ('a,'b) t
(** dummy cache, never stores any value *)
val linear : ?eq:'a equal -> int -> ('a, 'b) t
(** Linear cache with the given size. It stores key/value pairs in
an array and does linear search at every call, so it should only be used
with small size.
@param eq optional equality predicate for keys *)
val replacing : ?eq:'a equal -> ?hash:'a hash ->
int -> ('a,'b) t
(** Replacing cache of the given size. Equality and hash functions can be
parametrized. It's a hash table that handles collisions by replacing
the old value with the new (so a cache entry is evicted when another
entry with the same hash (modulo size) is added).
Never grows wider than the given size. *)
val lru : ?eq:'a equal -> ?hash:'a hash ->
int -> ('a,'b) t
(** LRU cache of the given size ("Least Recently Used": keys that have not been
used recently are deleted first). Never grows wider. *)
val unbounded : ?eq:'a equal -> ?hash:'a hash ->
int -> ('a,'b) t
(** Unbounded cache, backed by a Hash table. Will grow forever
unless {!clear} is called manually. *)
(** {2 Binary Caches}
TODO
module C2 : sig
type ('a, 'b, 'c) t
val clear : (_,_,_) t -> unit
val with_cache : ('a, 'b, 'c) t -> ('a -> 'b -> 'c) -> 'a -> 'b -> 'c
val with_cache_rec : ('a,'b,'c) t ->
(('a -> 'b -> 'c) -> 'a -> 'b -> 'c) ->
'a -> 'b -> 'c
val dummy : ('a,'b,'c) t
val linear : ?eq1:('a -> 'a -> bool) -> ?eq2:('b -> 'b -> bool) ->
int -> ('a, 'b, 'c) t
val replacing : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) ->
?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) ->
int -> ('a,'b,'c) t
val lru : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) ->
?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) ->
int -> ('a,'b,'c) t
val unbounded : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) ->
?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) ->
int -> ('a,'b,'c) t
end end
*)
(** Signature of a cache for pairs of values *)
module type S2 = sig
type 'a t
type key1
type key2
val create : int -> 'a t
(** Create a new cache of the given size. *)
val clear : 'a t -> unit
(** Clear content of the cache *)
val with_cache : 'a t -> (key1 -> key2 -> 'a) -> key1 -> key2 -> 'a
(** Wrap the function with the cache *)
end
(** {2 Dummy cache (no caching)} *)
module Dummy(X : sig type t end) : S with type key = X.t
module Dummy2(X : sig type t end)(Y : sig type t end) : S2 with type key1 = X.t and type key2 = Y.t
(** {2 Small linear cache} *)
(** This cache stores (key,value) pairs in an array, that is traversed
linearily. It is therefore only reasonable for small sizes (like 5). *)
module Linear(X : EQ) : S with type key = X.t
module Linear2(X : EQ)(Y : EQ) : S2 with type key1 = X.t and type key2 = Y.t
(** {2 Hashtables that resolve collisions by replacing} *)
module Replacing(X : HASH) : S with type key = X.t
module Replacing2(X : HASH)(Y : HASH) : S2 with type key1 = X.t and type key2 = Y.t
(** {2 Hashtables with Least Recently Used eviction policy} *)
module LRU(X : HASH) : S with type key = X.t
(* TODO exception handling in LRU *)
(* TODO LRU2 *)