From 4ced583f338754b54b3e894b12a5c69273c3a9e5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 12 Nov 2014 00:42:28 +0100 Subject: [PATCH 01/39] merge back from stable --- Makefile | 2 +- _oasis | 4 +--- core/CCError.mli | 6 +++--- core/CCFun.mli | 2 +- core/CCInt.mli | 2 +- core/CCList.mli | 2 +- core/CCMap.mli | 2 +- core/CCOpt.mli | 4 ++-- core/CCSequence.mli | 4 ++-- core/CCSexp.mli | 12 ++++++------ pervasives/CCPervasives.ml | 2 +- 11 files changed, 20 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index bc534202..89e68738 100644 --- a/Makefile +++ b/Makefile @@ -116,4 +116,4 @@ update_next_tag: udpate_sequence: git subtree pull --prefix sequence sequence stable --squash -.PHONY: examples push_doc tags qtest update_sequence +.PHONY: examples push_doc tags qtest update_sequence update_next_tag diff --git a/_oasis b/_oasis index 1bf48bc4..17d82e36 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.4 +Version: dev Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause @@ -50,8 +50,6 @@ Library "containers" CCRandom, CCKTree, CCTrie, CCString, CCHashtbl, CCFlatHashtbl, CCSexp, CCMap BuildDepends: bytes - XMETARequires: cppo - FindlibName: containers Library "containers_string" Path: string diff --git a/core/CCError.mli b/core/CCError.mli index 17297bb6..95929888 100644 --- a/core/CCError.mli +++ b/core/CCError.mli @@ -26,7 +26,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Error Monad} -The variant is polymorphic in the error type since NEXT_RELEASE *) +The variant is polymorphic in the error type since 0.5 *) type 'a sequence = ('a -> unit) -> unit type 'a equal = 'a -> 'a -> bool @@ -60,7 +60,7 @@ val map : ('a -> 'b) -> ('a, 'err) t -> ('b, 'err) t val map_err : ('err1 -> 'err2) -> ('a, 'err1) t -> ('a, 'err2) t (** Map on error. - @since NEXT_RELEASE *) + @since 0.5 *) val map2 : ('a -> 'b) -> ('err -> 'err) -> ('a, 'err) t -> ('b, 'err) t (** Same as {!map}, but also with a function that can transform @@ -92,7 +92,7 @@ val fold : success:('a -> 'b) -> failure:('err -> 'b) -> ('a, 'err) t -> 'b (** {2 Wrappers} The functions {!guard}, {!wrap1}, {!wrap2} and {!wrap3} now return -exceptions in case of failure, @since NEXT_RELEASE *) +exceptions in case of failure, @since 0.5 *) val guard : (unit -> 'a) -> ('a, exn) t (** [guard f] runs [f ()] and returns its result wrapped in [`Ok]. If diff --git a/core/CCFun.mli b/core/CCFun.mli index 59af0e40..6ac21173 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -37,7 +37,7 @@ val (%>) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c val (@@) : ('a -> 'b) -> 'a -> 'b (** [f @@ x] is the same as [f x], but right-associative. - @since NEXT_RELEASE *) + @since 0.5 *) val id : 'a -> 'a (** Identity function *) diff --git a/core/CCInt.mli b/core/CCInt.mli index 3fcd33ac..12a9040c 100644 --- a/core/CCInt.mli +++ b/core/CCInt.mli @@ -39,7 +39,7 @@ val sign : t -> int val neg : t -> t (** [neg i = - i] - @since NEXT_RELEASE *) + @since 0.5 *) type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit diff --git a/core/CCList.mli b/core/CCList.mli index 939888c4..160348a5 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -35,7 +35,7 @@ val map : ('a -> 'b) -> 'a t -> 'b t val (>|=) : 'a t -> ('a -> 'b) -> 'b t (** Infix version of [map] with reversed arguments - @since NEXT_RELEASE *) + @since 0.5 *) val append : 'a t -> 'a t -> 'a t (** Safe version of append *) diff --git a/core/CCMap.mli b/core/CCMap.mli index 385c714a..2ff1d310 100644 --- a/core/CCMap.mli +++ b/core/CCMap.mli @@ -27,7 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Extensions of Standard Map} Provide useful functions and iterators on [Map.S] -@since NEXT_RELEASE *) +@since 0.5 *) type 'a sequence = ('a -> unit) -> unit type 'a printer = Buffer.t -> 'a -> unit diff --git a/core/CCOpt.mli b/core/CCOpt.mli index 838f04e7..2e810568 100644 --- a/core/CCOpt.mli +++ b/core/CCOpt.mli @@ -63,11 +63,11 @@ val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a val filter : ('a -> bool) -> 'a t -> 'a t (** Filter on 0 or 1 elements - @since NEXT_RELEASE *) + @since 0.5 *) val get : 'a -> 'a t -> 'a (** [get default x] unwraps [x], but if [x = None] it returns [default] instead. - @since NEXT_RELEASE *) + @since 0.5 *) val get_exn : 'a t -> 'a (** Open the option, possibly failing if it is [None] diff --git a/core/CCSequence.mli b/core/CCSequence.mli index 7e3b6a63..cd4084b3 100644 --- a/core/CCSequence.mli +++ b/core/CCSequence.mli @@ -606,7 +606,7 @@ module IO : sig val write_bytes_to : ?mode:int -> ?flags:open_flag list -> string -> Bytes.t t -> unit - (** @since NEXT_RELEASE *) + (** @since 0.5 *) val write_lines : ?mode:int -> ?flags:open_flag list -> string -> string t -> unit @@ -614,5 +614,5 @@ module IO : sig val write_bytes_lines : ?mode:int -> ?flags:open_flag list -> string -> Bytes.t t -> unit - (** @since NEXT_RELEASE *) + (** @since 0.5 *) end diff --git a/core/CCSexp.mli b/core/CCSexp.mli index 152b4b8e..596a08be 100644 --- a/core/CCSexp.mli +++ b/core/CCSexp.mli @@ -252,11 +252,11 @@ Sexp.Traverse.list_all pt_of_sexp sexp;; module Traverse : sig type 'a conv = t -> 'a option (** A converter from S-expressions to 'a is a function [sexp -> 'a option]. - @since NEXT_RELEASE *) + @since 0.5 *) val map_opt : ('a -> 'b option) -> 'a list -> 'b list option (** Map over a list, failing as soon as the function fails on any element - @since NEXT_RELEASE *) + @since 0.5 *) val list_any : 'a conv -> t -> 'a option (** [list_any f (List l)] tries [f x] for every element [x] in [List l], @@ -284,19 +284,19 @@ module Traverse : sig val to_list_with : (t -> 'a option) -> 'a list conv (** Expect a list, applies [f] to all the elements of the list, and succeeds only if [f] succeeded on every element - @since NEXT_RELEASE *) + @since 0.5 *) val to_pair : (t * t) conv (** Expect a list of two elements *) val to_pair_with : 'a conv -> 'b conv -> ('a * 'b) conv (** Same as {!to_pair} but applies conversion functions - @since NEXT_RELEASE *) + @since 0.5 *) val to_triple : (t * t * t) conv val to_triple_with : 'a conv -> 'b conv -> 'c conv -> ('a * 'b * 'c) conv - (* @since NEXT_RELEASE *) + (* @since 0.5 *) val get_field : string -> t conv (** [get_field name e], when [e = List [(n1,x1); (n2,x2) ... ]], extracts @@ -314,7 +314,7 @@ module Traverse : sig (** [field_list name f "(... (name a b c d) ...record)"] will look for a field based on the given [name], and expect it to have a list of arguments dealt with by [f] (here, "a b c d"). - @since NEXT_RELEASE *) + @since 0.5 *) val (>>=) : 'a option -> ('a -> 'b option) -> 'b option diff --git a/pervasives/CCPervasives.ml b/pervasives/CCPervasives.ml index b87db046..5e38bcd4 100644 --- a/pervasives/CCPervasives.ml +++ b/pervasives/CCPervasives.ml @@ -39,7 +39,7 @@ This module is meant to be opened if one doesn't want to use both, say, Changed [Opt] to [Option] to better reflect that this module is about the ['a option] type, with [module Option = CCOpt]. -@since NEXT_RELEASE +@since 0.5 *) module Array = struct include Array include CCArray end From 07f608009aa05605bdae4abc087394d1520a171a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 12 Nov 2014 11:02:22 +0100 Subject: [PATCH 02/39] fix Makefile: if qtest not installed, should still be able to build --- Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 89e68738..baeea3fc 100644 --- a/Makefile +++ b/Makefile @@ -82,9 +82,12 @@ QTEST_PREAMBLE='open CCFun;; ' qtest-gen: qtest-clean @mkdir -p qtest - @qtest extract --preamble $(QTEST_PREAMBLE) \ - -o qtest/run_qtest.cppo.ml \ - $(QTESTABLE) 2> /dev/null + @if which qtest ; then \ + qtest extract --preamble $(QTEST_PREAMBLE) \ + -o qtest/run_qtest.cppo.ml \ + $(QTESTABLE) 2> /dev/null ; \ + else touch qtest/run_qtest.cppo.ml ; \ + fi push-stable: git checkout stable From aa86a5454bb155afb01476d30ef75ff8adec7842 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 11:44:59 +0100 Subject: [PATCH 03/39] new benchmark, benchs.ml; drop use of "bench" package --- _oasis | 11 +- benchs/bench_batch.ml | 6 +- benchs/bench_tbl.ml | 422 ++++++++++++++++++++++++++++++++++++++++ benchs/benchs.ml | 437 +++--------------------------------------- 4 files changed, 463 insertions(+), 413 deletions(-) create mode 100644 benchs/bench_tbl.ml diff --git a/_oasis b/_oasis index 17d82e36..1d02eee1 100644 --- a/_oasis +++ b/_oasis @@ -155,7 +155,16 @@ Executable benchs CompiledObject: native Build$: flag(bench) MainIs: benchs.ml - BuildDepends: containers,containers.string,containers.misc,bench,containers.advanced + BuildDepends: containers,benchmark + +Executable bench_tbl + Path: benchs/ + Install: false + CompiledObject: native + Build$: flag(bench) + MainIs: bench_tbl.ml + BuildDepends: containers, containers.string, containers.misc, + benchmark, containers.advanced Executable bench_conv Path: benchs/ diff --git a/benchs/bench_batch.ml b/benchs/bench_batch.ml index 0e478e28..9846d851 100644 --- a/benchs/bench_batch.ml +++ b/benchs/bench_batch.ml @@ -55,9 +55,9 @@ module Make(C : COLL) = struct Benchmark.tabulate res let bench () = - bench_for 1 100; - bench_for 4 100_000; - bench_for 4 1_000_000; + bench_for ~time:1 100; + bench_for ~time:4 100_000; + bench_for ~time:4 1_000_000; () end diff --git a/benchs/bench_tbl.ml b/benchs/bench_tbl.ml new file mode 100644 index 00000000..94787857 --- /dev/null +++ b/benchs/bench_tbl.ml @@ -0,0 +1,422 @@ + +(** Benchmarking *) +(** {2 hashtables} *) + +module IHashtbl = Hashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module IFlatHashtbl = FlatHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module IFHashtbl = FHashtbl.Tree(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module IPersistentHashtbl = CCPersistentHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module IMap = Map.Make(struct + type t = int + let compare i j = i - j +end) + +module ICCHashtbl = CCFlatHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +let phashtbl_add n = + let h = PHashtbl.create 50 in + for i = n downto 0 do + PHashtbl.add h i i; + done; + h + +let hashtbl_add n = + let h = Hashtbl.create 50 in + for i = n downto 0 do + Hashtbl.add h i i; + done; + h + +let ihashtbl_add n = + let h = IHashtbl.create 50 in + for i = n downto 0 do + IHashtbl.add h i i; + done; + h + +let iflathashtbl_add n = + let h = IFlatHashtbl.create 50 in + for i = n downto 0 do + IFlatHashtbl.replace h i i; + done; + h + +let ifhashtbl_add n = + let h = ref (IFHashtbl.empty 32) in + for i = n downto 0 do + h := IFHashtbl.replace !h i i; + done; + !h + +let skiplist_add n = + let l = SkipList.create compare in + for i = n downto 0 do + SkipList.add l i i; + done; + l + +let ipersistenthashtbl_add n = + let h = ref (IPersistentHashtbl.create 32) in + for i = n downto 0 do + h := IPersistentHashtbl.replace !h i i; + done; + !h + +let imap_add n = + let h = ref IMap.empty in + for i = n downto 0 do + h := IMap.add i i !h; + done; + !h + +let icchashtbl_add n = + let h = ICCHashtbl.create 50 in + for i = n downto 0 do + ICCHashtbl.add h i i; + done; + h + +let bench_maps1 () = + Format.printf "----------------------------------------@."; + let n = 100 in + let res = Benchmark.throughputN 3 + ["phashtbl_add", (fun n -> ignore (phashtbl_add n)), n; + "hashtbl_add", (fun n -> ignore (hashtbl_add n)), n; + "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)), n; + "iflathashtbl_add", (fun n -> ignore (iflathashtbl_add n)), n; + "ifhashtbl_add", (fun n -> ignore (ifhashtbl_add n)), n; + "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)), n; + "skiplist_add", (fun n -> ignore (skiplist_add n)), n; + "imap_add", (fun n -> ignore (imap_add n)), n; + "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)), n; + ] + in + Benchmark.tabulate res + +let phashtbl_replace n = + let h = PHashtbl.create 50 in + for i = 0 to n do + PHashtbl.replace h i i; + done; + for i = n downto 0 do + PHashtbl.replace h i i; + done; + h + +let hashtbl_replace n = + let h = Hashtbl.create 50 in + for i = 0 to n do + Hashtbl.replace h i i; + done; + for i = n downto 0 do + Hashtbl.replace h i i; + done; + h + +let ihashtbl_replace n = + let h = IHashtbl.create 50 in + for i = 0 to n do + IHashtbl.replace h i i; + done; + for i = n downto 0 do + IHashtbl.replace h i i; + done; + h + +let iflathashtbl_replace n = + let h = IFlatHashtbl.create 50 in + for i = 0 to n do + IFlatHashtbl.replace h i i; + done; + for i = n downto 0 do + IFlatHashtbl.replace h i i; + done; + h + +let ifhashtbl_replace n = + let h = ref (IFHashtbl.empty 32) in + for i = 0 to n do + h := IFHashtbl.replace !h i i; + done; + for i = n downto 0 do + h := IFHashtbl.replace !h i i; + done; + !h + +let ipersistenthashtbl_replace n = + let h = ref (IPersistentHashtbl.create 32) in + for i = 0 to n do + h := IPersistentHashtbl.replace !h i i; + done; + for i = n downto 0 do + h := IPersistentHashtbl.replace !h i i; + done; + !h + +let skiplist_replace n = + let l = SkipList.create compare in + for i = 0 to n do + SkipList.add l i i; + done; + for i = n downto 0 do + SkipList.add l i i; + done; + l + +let imap_replace n = + let h = ref IMap.empty in + for i = 0 to n do + h := IMap.add i i !h; + done; + for i = n downto 0 do + h := IMap.add i i !h; + done; + !h + +let icchashtbl_replace n = + let h = ICCHashtbl.create 50 in + for i = 0 to n do + ICCHashtbl.add h i i; + done; + for i = n downto 0 do + ICCHashtbl.add h i i; + done; + h + +let bench_maps2 () = + Format.printf "----------------------------------------@."; + let n = 100 in + let res = Benchmark.throughputN 3 + ["phashtbl_replace", (fun n -> ignore (phashtbl_replace n)), n; + "hashtbl_replace", (fun n -> ignore (hashtbl_replace n)), n; + "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)), n; + "iflathashtbl_replace", (fun n -> ignore (iflathashtbl_replace n)), n; + "ifhashtbl_replace", (fun n -> ignore (ifhashtbl_replace n)), n; + "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)), n; + "skiplist_replace", (fun n -> ignore (skiplist_replace n)), n; + "imap_replace", (fun n -> ignore (imap_replace n)), n; + "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)), n; + ] + in + Benchmark.tabulate res + +let my_len = 250 + +let phashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (PHashtbl.find h i); + done + +let hashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (Hashtbl.find h i); + done + +let ihashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IHashtbl.find h i); + done + +let iflathashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IFlatHashtbl.find h i); + done + +let ifhashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IFHashtbl.find h i); + done + +let ipersistenthashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IPersistentHashtbl.find h i); + done + +let skiplist_find l = + fun n -> + for i = 0 to n-1 do + ignore (SkipList.find l i); + done + +let array_find a = + fun n -> + for i = 0 to n-1 do + ignore (Array.get a i); + done + +let imap_find m = + fun n -> + for i = 0 to n-1 do + ignore (IMap.find i m); + done + +let icchashtbl_find m = + fun n -> + for i = 0 to n-1 do + ignore (ICCHashtbl.get_exn i m); + done + +let bench_maps3 () = + List.iter + (fun len -> + let h = phashtbl_add len in + let h' = hashtbl_add len in + let h'' = ihashtbl_add len in + let h''' = iflathashtbl_add len in + let h'''' = ifhashtbl_add len in + let h''''' = ipersistenthashtbl_add len in + let l = skiplist_add len in + let a = Array.init len (fun i -> string_of_int i) in + let m = imap_add len in + let h'''''' = icchashtbl_add len in + Format.printf "----------------------------------------@."; + Format.printf "try on size %d@.@.@." len; + let res = Benchmark.throughputN 3 [ + "phashtbl_find", (fun () -> phashtbl_find h len), (); + "hashtbl_find", (fun () -> hashtbl_find h' len), (); + "ihashtbl_find", (fun () -> ihashtbl_find h'' len), (); + "iflathashtbl_find", (fun () -> iflathashtbl_find h''' len), (); + "ifhashtbl_find", (fun () -> ifhashtbl_find h'''' len), (); + "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' len), (); + "skiplist_find", (fun () -> skiplist_find l len), (); + "array_find", (fun () -> array_find a len), (); + "imap_find", (fun () -> imap_find m len), (); + "cchashtbl_find", (fun () -> icchashtbl_find h'''''' len), (); + ] in + Benchmark.tabulate res) + [10;20;100;1000;10000] + +let bench_maps() = + bench_maps1 (); + bench_maps2 (); + bench_maps3 (); + () + +(** {2 Sequence/Gen} *) + +let bench_enum () = + let n = 1_000_000 in + let seq () = CCSequence.fold (+) 0 (CCSequence.int_range ~start:0 ~stop:n) in + let enum () = CCGen.fold (+) 0 (CCGen.int_range 0 n) in + let res = Benchmark.throughputN 3 + [ "sequence.fold", seq, (); + "gen.fold", enum, (); + ] in + Benchmark.tabulate res; + + let n = 100_000 in + let seq () = + let open CCSequence in + let seq = int_range ~start:0 ~stop:n in + let seq = flatMap (fun x -> int_range ~start:x ~stop:(x+10)) seq in + fold (+) 0 seq in + let enum () = + let open CCGen in + let seq = int_range 0 n in + let seq = flat_map (fun x -> int_range x (x+10)) seq in + fold (+) 0 seq in + let res = Benchmark.throughputN 3 + [ "sequence.flatMap", seq, (); + "gen.flatMap", enum, (); + ] in + Benchmark.tabulate res + +(** {2 Cache} *) + +(** Cached fibonacci function *) +module Fibo(C : Cache.S with type key = int) = struct + let fib ~size = + let fib fib' n = + match n with + | 0 -> 0 + | 1 -> 1 + | 2 -> 1 + | n -> + fib' (n-1) + fib' (n-2) + in + let cache = C.create size in + let cached_fib x = C.with_cache_rec cache fib x in + cached_fib +end + +module LinearIntCache = Cache.Linear(struct + type t = int + let equal i j = i = j +end) + +module ReplacingIntCache = Cache.Replacing(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module LRUIntCache = Cache.LRU(struct + type t = int + let equal i j = i = j + let hash i = i +end) + +module DummyIntCache = Cache.Dummy(struct type t = int end) + +let bench_cache () = + (* Fibonacci for those caching implementations *) + let module LinearFibo = Fibo(LinearIntCache) in + let module ReplacingFibo = Fibo(ReplacingIntCache) in + let module LRUFibo= Fibo(LRUIntCache) in + let module DummyFibo = Fibo(DummyIntCache) in + (* benchmark caches with fibo function *) + let bench_fib fib () = + ignore (List.map fib [5;10;20;30;35]); + () + in + let res = Benchmark.throughputN 3 + [ "linear_fib", bench_fib (LinearFibo.fib ~size:5), (); + "replacing_fib", bench_fib (ReplacingFibo.fib ~size:256), (); + "LRU_fib", bench_fib (LRUFibo.fib ~size:256), (); + "dummy_fib", bench_fib (DummyFibo.fib ~size:5), (); + ] in + Benchmark.tabulate res; + () + +let _ = + match Sys.argv with + | [| _; "maps" |] -> bench_maps () + | [| _; "enum" |] -> bench_enum () + | [| _; "cache" |] -> bench_cache () + | [| _; ("-help" | "--help") |] -> print_endline "./benchs [maps|enum|cache]" + | [| _ |] -> + bench_enum (); + bench_maps (); + bench_cache (); + () + | _ -> failwith "unknown argument (-help)" diff --git a/benchs/benchs.ml b/benchs/benchs.ml index 3a871049..63d26ef9 100644 --- a/benchs/benchs.ml +++ b/benchs/benchs.ml @@ -1,419 +1,38 @@ +(** Generic benchs *) -(** Benchmarking *) -(** {2 hashtables} *) +let draw_line () = + output_string stdout (CCString.repeat "*" 80); + output_char stdout '\n' -module IHashtbl = Hashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) +module L = struct -module IFlatHashtbl = FlatHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) + let f_ x = + if x mod 10 = 0 then [] + else if x mod 5 = 1 then [x;x+1] + else [x;x+1;x+2;x+3] -module IFHashtbl = FHashtbl.Tree(struct - type t = int - let equal i j = i = j - let hash i = i -end) + let bench_flatmap ?(time=2) n = + draw_line (); + Printf.printf "flat_map for %d elements\n" n; + let l = CCList.(1 -- n) in + let res = Benchmark.throughputN time + [ "flat_map", CCList.flat_map f_, l + ; "flatten o CCList.map", (fun l -> List.flatten (CCList.map f_ l)), l + ; "flatten o map", (fun l -> List.flatten (List.map f_ l)), l + ] in + Benchmark.tabulate res -module IPersistentHashtbl = CCPersistentHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module IMap = Map.Make(struct - type t = int - let compare i j = i - j -end) - -module ICCHashtbl = CCFlatHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -let phashtbl_add n = - let h = PHashtbl.create 50 in - for i = n downto 0 do - PHashtbl.add h i i; - done; - h - -let hashtbl_add n = - let h = Hashtbl.create 50 in - for i = n downto 0 do - Hashtbl.add h i i; - done; - h - -let ihashtbl_add n = - let h = IHashtbl.create 50 in - for i = n downto 0 do - IHashtbl.add h i i; - done; - h - -let iflathashtbl_add n = - let h = IFlatHashtbl.create 50 in - for i = n downto 0 do - IFlatHashtbl.replace h i i; - done; - h - -let ifhashtbl_add n = - let h = ref (IFHashtbl.empty 32) in - for i = n downto 0 do - h := IFHashtbl.replace !h i i; - done; - !h - -let skiplist_add n = - let l = SkipList.create compare in - for i = n downto 0 do - SkipList.add l i i; - done; - l - -let ipersistenthashtbl_add n = - let h = ref (IPersistentHashtbl.create 32) in - for i = n downto 0 do - h := IPersistentHashtbl.replace !h i i; - done; - !h - -let imap_add n = - let h = ref IMap.empty in - for i = n downto 0 do - h := IMap.add i i !h; - done; - !h - -let icchashtbl_add n = - let h = ICCHashtbl.create 50 in - for i = n downto 0 do - ICCHashtbl.add h i i; - done; - h - -let bench_maps1 () = - Format.printf "----------------------------------------@."; - let res = Bench.bench_n - ["phashtbl_add", (fun n -> ignore (phashtbl_add n)); - "hashtbl_add", (fun n -> ignore (hashtbl_add n)); - "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)); - "iflathashtbl_add", (fun n -> ignore (iflathashtbl_add n)); - "ifhashtbl_add", (fun n -> ignore (ifhashtbl_add n)); - "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)); - "skiplist_add", (fun n -> ignore (skiplist_add n)); - "imap_add", (fun n -> ignore (imap_add n)); - "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)) - ] - in - Bench.summarize 1. res - -let phashtbl_replace n = - let h = PHashtbl.create 50 in - for i = 0 to n do - PHashtbl.replace h i i; - done; - for i = n downto 0 do - PHashtbl.replace h i i; - done; - h - -let hashtbl_replace n = - let h = Hashtbl.create 50 in - for i = 0 to n do - Hashtbl.replace h i i; - done; - for i = n downto 0 do - Hashtbl.replace h i i; - done; - h - -let ihashtbl_replace n = - let h = IHashtbl.create 50 in - for i = 0 to n do - IHashtbl.replace h i i; - done; - for i = n downto 0 do - IHashtbl.replace h i i; - done; - h - -let iflathashtbl_replace n = - let h = IFlatHashtbl.create 50 in - for i = 0 to n do - IFlatHashtbl.replace h i i; - done; - for i = n downto 0 do - IFlatHashtbl.replace h i i; - done; - h - -let ifhashtbl_replace n = - let h = ref (IFHashtbl.empty 32) in - for i = 0 to n do - h := IFHashtbl.replace !h i i; - done; - for i = n downto 0 do - h := IFHashtbl.replace !h i i; - done; - !h - -let ipersistenthashtbl_replace n = - let h = ref (IPersistentHashtbl.create 32) in - for i = 0 to n do - h := IPersistentHashtbl.replace !h i i; - done; - for i = n downto 0 do - h := IPersistentHashtbl.replace !h i i; - done; - !h - -let skiplist_replace n = - let l = SkipList.create compare in - for i = 0 to n do - SkipList.add l i i; - done; - for i = n downto 0 do - SkipList.add l i i; - done; - l - -let imap_replace n = - let h = ref IMap.empty in - for i = 0 to n do - h := IMap.add i i !h; - done; - for i = n downto 0 do - h := IMap.add i i !h; - done; - !h - -let icchashtbl_replace n = - let h = ICCHashtbl.create 50 in - for i = 0 to n do - ICCHashtbl.add h i i; - done; - for i = n downto 0 do - ICCHashtbl.add h i i; - done; - h - -let bench_maps2 () = - Format.printf "----------------------------------------@."; - let res = Bench.bench_n - ["phashtbl_replace", (fun n -> ignore (phashtbl_replace n)); - "hashtbl_replace", (fun n -> ignore (hashtbl_replace n)); - "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)); - "iflathashtbl_replace", (fun n -> ignore (iflathashtbl_replace n)); - "ifhashtbl_replace", (fun n -> ignore (ifhashtbl_replace n)); - "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)); - "skiplist_replace", (fun n -> ignore (skiplist_replace n)); - "imap_replace", (fun n -> ignore (imap_replace n)); - "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)); - ] - in - Bench.summarize 1. res - -let my_len = 250 - -let phashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (PHashtbl.find h i); - done - -let hashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (Hashtbl.find h i); - done - -let ihashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IHashtbl.find h i); - done - -let iflathashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IFlatHashtbl.find h i); - done - -let ifhashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IFHashtbl.find h i); - done - -let ipersistenthashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IPersistentHashtbl.find h i); - done - -let skiplist_find l = - fun n -> - for i = 0 to n-1 do - ignore (SkipList.find l i); - done - -let array_find a = - fun n -> - for i = 0 to n-1 do - ignore (Array.get a i); - done - -let imap_find m = - fun n -> - for i = 0 to n-1 do - ignore (IMap.find i m); - done - -let icchashtbl_find m = - fun n -> - for i = 0 to n-1 do - ignore (ICCHashtbl.get_exn i m); - done - -let bench_maps3 () = - List.iter - (fun len -> - let h = phashtbl_add len in - let h' = hashtbl_add len in - let h'' = ihashtbl_add len in - let h''' = iflathashtbl_add len in - let h'''' = ifhashtbl_add len in - let h''''' = ipersistenthashtbl_add len in - let l = skiplist_add len in - let a = Array.init len (fun i -> string_of_int i) in - let m = imap_add len in - let h'''''' = icchashtbl_add len in - Format.printf "----------------------------------------@."; - Format.printf "try on size %d@.@.@." len; - Bench.bench [ - "phashtbl_find", (fun () -> phashtbl_find h len); - "hashtbl_find", (fun () -> hashtbl_find h' len); - "ihashtbl_find", (fun () -> ihashtbl_find h'' len); - "iflathashtbl_find", (fun () -> iflathashtbl_find h''' len); - "ifhashtbl_find", (fun () -> ifhashtbl_find h'''' len); - "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' len); - "skiplist_find", (fun () -> skiplist_find l len); - "array_find", (fun () -> array_find a len); - "imap_find", (fun () -> imap_find m len); - "cchashtbl_find", (fun () -> icchashtbl_find h'''''' len); - ]) - [10;20;100;1000;10000] - -let bench_maps() = - bench_maps1 (); - bench_maps2 (); - bench_maps3 (); - () - -(** {2 Sequence/Gen} *) - -let bench_enum () = - let n = 1_000_000 in - let seq () = CCSequence.fold (+) 0 (CCSequence.int_range ~start:0 ~stop:n) in - let enum () = CCGen.fold (+) 0 (CCGen.int_range 0 n) in - Bench.bench - [ "sequence.fold", seq; - "gen.fold", enum; - ]; - - let n = 100_000 in - let seq () = - let open CCSequence in - let seq = int_range ~start:0 ~stop:n in - let seq = flatMap (fun x -> int_range ~start:x ~stop:(x+10)) seq in - fold (+) 0 seq in - let enum () = - let open CCGen in - let seq = int_range 0 n in - let seq = flat_map (fun x -> int_range x (x+10)) seq in - fold (+) 0 seq in - Bench.bench - [ "sequence.flatMap", seq; - "gen.flatMap", enum; - ] - -(** {2 Cache} *) - -(** Cached fibonacci function *) -module Fibo(C : Cache.S with type key = int) = struct - let fib ~size = - let rec fib fib' n = - match n with - | 0 -> 0 - | 1 -> 1 - | 2 -> 1 - | n -> - fib' (n-1) + fib' (n-2) - in - let cache = C.create size in - let cached_fib x = C.with_cache_rec cache fib x in - cached_fib + let run() = + bench_flatmap 100; + bench_flatmap 10_000; + bench_flatmap ~time:4 100_000; + () end -module LinearIntCache = Cache.Linear(struct - type t = int - let equal i j = i = j -end) +(* TODO *) -module ReplacingIntCache = Cache.Replacing(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module LRUIntCache = Cache.LRU(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module DummyIntCache = Cache.Dummy(struct type t = int end) - -let bench_cache () = - (* Fibonacci for those caching implementations *) - let module LinearFibo = Fibo(LinearIntCache) in - let module ReplacingFibo = Fibo(ReplacingIntCache) in - let module LRUFibo= Fibo(LRUIntCache) in - let module DummyFibo = Fibo(DummyIntCache) in - (* benchmark caches with fibo function *) - let bench_fib fib () = - ignore (List.map fib [5;10;20;30;35]); - () - in - let conf = Bench.config in - conf.Bench.samples <- 100; - Bench.bench - [ "linear_fib", bench_fib (LinearFibo.fib ~size:5); - "replacing_fib", bench_fib (ReplacingFibo.fib ~size:256); - "LRU_fib", bench_fib (LRUFibo.fib ~size:256); - "dummy_fib", bench_fib (DummyFibo.fib ~size:5); - ]; - conf.Bench.samples <- 1000; +let () = + L.run (); () -let _ = - match Sys.argv with - | [| _; "maps" |] -> bench_maps () - | [| _; "enum" |] -> bench_enum () - | [| _; "cache" |] -> bench_cache () - | [| _; ("-help" | "--help") |] -> print_endline "./benchs [maps|enum|cache]" - | [| _ |] -> - bench_enum (); - bench_maps (); - bench_cache (); - () - | _ -> failwith "unknown argument (-help)" + From 0ad73a2cffd44654559536391c860d74ba6d5b21 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 13:06:34 +0100 Subject: [PATCH 04/39] more bechmarks on lists --- benchs/benchs.ml | 69 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/benchs/benchs.ml b/benchs/benchs.ml index 63d26ef9..022e3004 100644 --- a/benchs/benchs.ml +++ b/benchs/benchs.ml @@ -6,6 +6,8 @@ let draw_line () = module L = struct + (* FLAT MAP *) + let f_ x = if x mod 10 = 0 then [] else if x mod 5 = 1 then [x;x+1] @@ -22,17 +24,82 @@ module L = struct ] in Benchmark.tabulate res + (* APPEND *) + + let append_ f (l1, l2, l3) = + ignore (f (f l1 l2) l3) + + let bench_append ?(time=2) n = + draw_line (); + Printf.printf "append for %d elements\n" n; + let l1 = CCList.(1 -- n) in + let l2 = CCList.(n+1 -- 2*n) in + let l3 = CCList.(2*n+1 -- 3*n) in + let arg = l1, l2, l3 in + let res = Benchmark.throughputN time + [ "CCList.append", append_ CCList.append, arg + ; "List.append", append_ List.append, arg + ] in + Benchmark.tabulate res + + (* FLATTEN *) + + let bench_flatten ?(time=2) n = + draw_line (); + Printf.printf "flatten for %d elements\n" n; + let l = CCList.Idx.mapi (fun i x -> CCList.(x -- (x+ min i 100))) CCList.(1 -- n) in + let res = Benchmark.throughputN time + [ "CCList.flatten", CCList.flatten, l + ; "List.flatten", List.flatten, l + ; "fold_right append", (fun l -> List.fold_right List.append l []), l + ; "CCList.(fold_right append)", (fun l->CCList.fold_right CCList.append l []), l + ] in + Benchmark.tabulate res + + + (* MAIN *) + let run() = + bench_flatten 100; + bench_flatten 10_000; + bench_flatten ~time:4 100_000; bench_flatmap 100; bench_flatmap 10_000; bench_flatmap ~time:4 100_000; + bench_append 100; + bench_append 10_000; + bench_append ~time:4 100_000; () end (* TODO *) +let tbl_ = + [ "list", L.run + ] + +let bench_all () = + List.iter (fun (name, run) -> + draw_line (); + Printf.printf "run tests for %s...\n" name; + run() + ) tbl_ + +let which_ = ref ("all", bench_all) +let set_which s = + if s = "all" then which_ := s, bench_all + else try + let run = List.assoc s tbl_ in + which_ := s, run + with Not_found -> + failwith ("unknown test " ^ s) +let options = [] + let () = - L.run (); + Arg.parse options set_which "benchs [which]"; + let name, run = !which_ in + Printf.printf "run test %s\n" name; + run (); () From 0c1e7c30e8517c8e9cc4a77e1d4b4ac7ec4a25a5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 13:06:52 +0100 Subject: [PATCH 05/39] more efficient versions of CCList.{flatten,append,flat_map} --- core/CCList.ml | 52 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/core/CCList.ml b/core/CCList.ml index 961d1fd8..80d31608 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -31,7 +31,7 @@ type 'a t = 'a list let empty = [] (* max depth for direct recursion *) -let _direct_depth = 500 +let direct_depth_default_ = 1000 let map f l = let rec direct f i l = match l with @@ -43,7 +43,7 @@ let map f l = and safe f l = List.rev (List.rev_map f l) in - direct f _direct_depth l + direct f direct_depth_default_ l (*$Q (Q.list Q.small_int) (fun l -> \ @@ -53,6 +53,8 @@ let map f l = let (>|=) l f = map f l +let direct_depth_append_ = 10_000 + let append l1 l2 = let rec direct i l1 l2 = match l1 with | [] -> l2 @@ -61,10 +63,17 @@ let append l1 l2 = and safe l1 l2 = List.rev_append (List.rev l1) l2 in - direct _direct_depth l1 l2 + direct direct_depth_append_ l1 l2 let (@) = append +(*$T + [1;2;3] @ [4;5;6] = [1;2;3;4;5;6] + (1-- 10_000) @ (10_001 -- 20_000) = 1 -- 20_000 +*) + +let direct_depth_filter_ = 10_000 + let filter p l = let rec direct i p l = match l with | [] -> [] @@ -76,7 +85,7 @@ let filter p l = | x::l' when not (p x) -> safe p l' acc | x::l' -> safe p l' (x::acc) in - direct _direct_depth p l + direct direct_depth_filter_ p l let fold_right f l acc = let rec direct i f l acc = match l with @@ -91,7 +100,7 @@ let fold_right f l acc = let acc = f x acc in safe f l' acc in - direct _direct_depth f l acc + direct direct_depth_default_ f l acc (*$T fold_right (+) (1 -- 1_000_000) 0 = \ @@ -116,25 +125,30 @@ let rec equal f l1 l2 = match l1, l2 with | [], _ | _, [] -> false | x1::l1', x2::l2' -> f x1 x2 && equal f l1' l2' -(* append difference lists *) -let _d_append f1 f2 = - fun l -> f1 (f2 l) - let flat_map f l = - let rec aux prefix f l = match l with - | [] -> prefix [] + let rec aux f l kont = match l with + | [] -> kont [] | x::l' -> - let sublist = append (f x) in - let prefix = _d_append prefix sublist in - aux prefix f l' + let y = f x in + let kont' tail = match y with + | [] -> kont tail + | [x] -> kont (x :: tail) + | [x;y] -> kont (x::y::tail) + | l -> kont (append l tail) + in + aux f l' kont' in - aux (fun l->l) f l + aux f l (fun l->l) (*$T flat_map (fun x -> [x+1; x*2]) [10;100] = [11;20;101;200] *) -let flatten l = flat_map (fun l -> l) l +let flatten l = fold_right append l [] + +(*$T + flatten [[1]; [2;3;4]; []; []; [5;6]] = 1--6 + *) let product f l1 l2 = flat_map (fun x -> map (fun y -> f x y) l2) l1 @@ -210,7 +224,7 @@ let take n l = | _ when n=0 -> List.rev acc | x::l' -> safe (n-1) (x::acc) l' in - direct _direct_depth n l + direct direct_depth_default_ n l (*$T take 2 [1;2;3;4;5] = [1;2] @@ -639,7 +653,7 @@ let of_gen g = | None -> List.rev acc | Some x -> safe (x::acc) g in - direct _direct_depth g + direct direct_depth_default_ g let to_klist l = let rec make l () = match l with @@ -657,7 +671,7 @@ let of_klist l = | `Nil -> List.rev acc | `Cons (x,l') -> safe (x::acc) l' in - direct _direct_depth l + direct direct_depth_default_ l (** {2 IO} *) From 26068335189a1f3a720b78d03f490cde28d2d53d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 15:41:10 +0100 Subject: [PATCH 06/39] add benchs/CCBench helper module (tree of benchmarks) --- benchs/CCBench.ml | 251 +++++++++++++++++++++++++++++++++++++++++++++ benchs/CCBench.mli | 115 +++++++++++++++++++++ 2 files changed, 366 insertions(+) create mode 100644 benchs/CCBench.ml create mode 100644 benchs/CCBench.mli diff --git a/benchs/CCBench.ml b/benchs/CCBench.ml new file mode 100644 index 00000000..85e4e516 --- /dev/null +++ b/benchs/CCBench.ml @@ -0,0 +1,251 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +this software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. in no event shall the copyright holder or contributors be liable +for any direct, indirect, incidental, special, exemplary, or consequential +damages (including, but not limited to, procurement of substitute goods or +services; loss of use, data, or profits; or business interruption) however +caused and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of the use +of this software, even if advised of the possibility of such damage. +*) + +(** {1 helpers for benchmarks} *) + +let print_line_ fmt () = + Format.pp_print_string fmt (CCString.repeat "*" 80); + Format.pp_print_newline fmt () + +let print_list_ ?(sep=", ") pp_item fmt l = + let rec print fmt l = match l with + | x::((_::_) as l) -> + pp_item fmt x; + Format.pp_print_string fmt sep; + Format.pp_print_cut fmt (); + print fmt l + | x::[] -> pp_item fmt x + | [] -> () + in + print fmt l + +(** {2 Bench Tree} *) + +module SMap = Map.Make(String) + +type single_bench = unit -> Benchmark.samples +type bench = + | Multiple of bench list * bench SMap.t + | Bench of single_bench + | WithInt of ((int -> bench) * int) list + +let is_multiple = function + | Multiple _ -> true + | _ -> false + +let rec merge_ t1 t2 = match t1, t2 with + | Multiple (l, map), ((Bench _ | WithInt _) as x) -> + Multiple (x :: l, map) + | Multiple (l1, m1), Multiple (l2, m2) -> + let m = SMap.merge + (fun _ o1 o2 -> merge_opt_ o1 o2) + m1 m2 + in + Multiple (l1 @ l2, m) + | (Bench _ | WithInt _), Multiple _ -> merge_ t2 t1 + | Bench _, _ + | WithInt _, _ -> + Multiple ([t1; t2], SMap.empty) (* composite *) +and merge_opt_ o1 o2 = match o1, o2 with + | None, None -> None + | Some o, None + | None, Some o -> Some o + | Some o1, Some o2 -> Some (merge_ o1 o2) + +let mk_list = function + | [] -> invalid_arg "mk_list" + | x :: tail -> List.fold_left merge_ x tail + +let raw f = Bench f + +let throughput1 ?min_count ?style ?fwidth ?fdigits ?repeat time ?name f x = + Bench (fun () -> + Benchmark.throughput1 ?min_count ?style ?fwidth ?fdigits ?repeat time ?name f x) + +let throughputN ?style ?fwidth ?fdigits ?repeat time f = + Bench (fun () -> + Benchmark.throughputN ?style ?fwidth ?fdigits ?repeat time f) + +let (>::) n t = + if n = "" then invalid_arg ">::"; + Multiple ([], SMap.singleton n t) + +let (>:::) n l = + if n = "" then invalid_arg ">:::"; + Multiple ([], SMap.singleton n (mk_list l)) + +let with_int f = function + | [] -> invalid_arg "with_int: empty list" + | l -> WithInt (List.map (fun n -> f, n) l) + +let map_int l = + if l = [] then invalid_arg "map_int"; + WithInt l + +(* print the structure of the tree *) +let rec print fmt = function + | Multiple (l, m) -> + Format.fprintf fmt "@[%a%a@]" + (print_list_ ~sep:"," print) l + print_map m + | WithInt l -> + Format.fprintf fmt "@[[%a]@]" + (print_list_ print_pair) + (List.map (fun (f, n) -> n, f n) l) + | Bench _ -> Format.fprintf fmt "<>" +and print_pair fmt (n,t) = + Format.fprintf fmt "@[%d: %a@]" n print t +and print_map fmt m = + Format.pp_open_hvbox fmt 0; + SMap.iter (fun n t -> Format.fprintf fmt "@[%s.%a@]" n print t) m; + Format.pp_close_box fmt () + +(** {2 Path} *) + +type path = string list + +let print_path fmt path = + Format.fprintf fmt "@[%a@]" + (print_list_ ~sep:"." Format.pp_print_string) path + +let str_split_ ~by s = + let len_by = String.length by in + assert (len_by > 0); + let l = ref [] in + let n = String.length s in + let rec search prev i = + if i >= n + then ( + if i>prev then l := String.sub s prev (n-prev) :: !l ; + List.rev !l + ) + else if is_prefix i 0 + then begin + l := (String.sub s prev (i-prev)) :: !l; (* save substring *) + search (i+len_by) (i+len_by) + end + else search prev (i+1) + and is_prefix i j = + if j = len_by + then true + else if i = n + then false + else s.[i] = by.[j] && is_prefix (i+1) (j+1) + in search 0 0 + +let parse_path s = str_split_ ~by:"." s + +let () = + assert (parse_path "foo.bar" = ["foo";"bar"]); + assert (parse_path "foo" = ["foo"]); + assert (parse_path "" = []); + () + +let prefix path t = List.fold_right (fun s t -> s >:: t) path t + +(** {2 Run} *) + +(* run one atomic single_bench *) +let run_single_bench_ fmt path f = + print_line_ fmt (); + Format.fprintf fmt "run single_bench %a@." print_path (List.rev path); + let res = f () in + Benchmark.tabulate res + +(* run all benchs *) +let rec run_all fmt path t = match t with + | Bench f -> run_single_bench_ fmt path f + | Multiple (l, m) -> + List.iter (run_all fmt path) l; + SMap.iter + (fun n t' -> + let path = n :: path in + run_all fmt path t' + ) m + | WithInt l -> + List.iter (fun (f, n) -> run_all fmt path (f n)) l + +let run fmt t = run_all fmt [] t + +let sprintf_ format = + let b = Buffer.create 32 in + let fmt = Format.formatter_of_buffer b in + Format.kfprintf + (fun fmt -> Format.pp_print_flush fmt (); Buffer.contents b) fmt format + +(* run all within a path *) +let rec run_path_rec_ fmt path remaining t = match t, remaining with + | _, [] -> run_all fmt path t + | Multiple (_, m), s :: remaining' -> + begin try + let t' = SMap.find s m in + run_path_rec_ fmt (s::path) remaining' t' + with Not_found -> + let msg = sprintf_ "could not find %s under path %a" + s print_path (List.rev path) in + failwith msg + end + | WithInt l, _ -> + List.iter (fun (f, n) -> run_path_rec_ fmt path remaining (f n)) l + | Bench _, _::_ -> () + +let run_path fmt t path = run_path_rec_ fmt [] path t + +let run_main ?(argv=Sys.argv) ?(out=Format.std_formatter) t = + let path = ref [] in + let do_print_tree = ref false in + let set_path_ s = path := parse_path s in + let options = + [ "-p", Arg.String set_path_, "only apply to subpath" + ; "-tree", Arg.Set do_print_tree, "print bench tree" + ] in + try + Arg.parse_argv argv options (fun _ -> ()) "run benchmarks [options]"; + if !do_print_tree + then Format.fprintf out "@[%a@]@." print t + else ( + Format.printf "run on path %a@." print_path !path; + run_path out t !path (* regular path *) + ) + with Arg.Help msg -> + Format.pp_print_string out msg + +(** {2 Global Registration} *) + +module Glob = struct + let tree_ = ref (Multiple ([], SMap.empty)) + + let get () = !tree_ + + let register ?(path=[]) new_t = + tree_ := merge_ !tree_ (prefix path new_t) + + let register' ~path new_t = + register ~path:(parse_path path) new_t + + let run_main ?argv ?out () = + run_main ?argv ?out !tree_ +end diff --git a/benchs/CCBench.mli b/benchs/CCBench.mli new file mode 100644 index 00000000..73eb005e --- /dev/null +++ b/benchs/CCBench.mli @@ -0,0 +1,115 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +this software is provided by the copyright holders and contributors "as is" and +any express or implied warranties, including, but not limited to, the implied +warranties of merchantability and fitness for a particular purpose are +disclaimed. in no event shall the copyright holder or contributors be liable +for any direct, indirect, incidental, special, exemplary, or consequential +damages (including, but not limited to, procurement of substitute goods or +services; loss of use, data, or profits; or business interruption) however +caused and on any theory of liability, whether in contract, strict liability, +or tort (including negligence or otherwise) arising in any way out of the use +of this software, even if advised of the possibility of such damage. +*) + +(** {1 helpers for benchmarks} *) + +(** {2 Benchmark Tree} + +Naming benchmark within a hierarchy that allows to filter them *) + +type bench + +val throughput1 : + ?min_count:Int64.t -> + ?style:Benchmark.style -> + ?fwidth:int -> + ?fdigits:int -> + ?repeat:int -> int -> ?name:string -> ('a -> 'b) -> 'a -> bench + +val throughputN : + ?style:Benchmark.style -> + ?fwidth:int -> + ?fdigits:int -> + ?repeat:int -> int -> (string * ('a -> 'b) * 'a) list -> bench + +val raw : (unit -> Benchmark.samples) -> bench +(** Give control to the user to produce her samples *) + +val (>::) : string -> bench -> bench + +val mk_list : bench list -> bench + +val (>:::) : string -> bench list -> bench + +val with_int : (int -> bench) -> int list -> bench +(** Parametrize a bench with several values *) + +val map_int : ((int -> bench) * int) list -> bench +(** One function for each integer. + @raise Invalid_argument if the two lists don't have the same length + or are empty *) + +val print : Format.formatter -> bench -> unit +(** Print the tree of benchmarks *) + +(** {2 Path} + +A path in a benchmark tree *) + +type path = string list + +val print_path : Format.formatter -> path -> unit + +val parse_path : string -> path +(** split a string into a path at the "." separators *) + +val prefix : path -> bench -> bench +(** Add the path as a prefix to the tree *) + +(** {2 Running} *) + +val run : Format.formatter -> bench -> unit +(** [run fmt t] runs all benchmarks of [t] and print the results to [fmt] *) + +val run_path : Format.formatter -> bench -> path -> unit +(** Run only a sub-tree of the benchmarks *) + +val run_main : + ?argv:string array -> + ?out:Format.formatter -> + bench -> unit +(** Main function: parses the command line arguments and runs benchmarks + accordingly *) + + +(** {2 Global Registration} *) + +module Glob : sig + val register : ?path:path -> bench -> unit + (** Register a benchmark to the global register of benchmarks (a global tree) *) + + val register' : path:string -> bench -> unit + (** Same as {!register} but applies {!parse_path} first to its argument *) + + val get : unit -> bench + (** Global bench tree *) + + val run_main : + ?argv:string array -> + ?out:Format.formatter -> + unit -> unit + (** Same as {!run_main} but on the global tree of benchmarks *) +end From 55e18bbb0f3d563cf382ffec08ef57b9be20418a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 15:41:30 +0100 Subject: [PATCH 07/39] use CCBench in benchs/benchs.ml to hierarchize benchmarks --- .merlin | 2 ++ benchs/benchs.ml | 89 ++++++++++++++++-------------------------------- 2 files changed, 31 insertions(+), 60 deletions(-) diff --git a/.merlin b/.merlin index 933075fd..33392d20 100644 --- a/.merlin +++ b/.merlin @@ -4,12 +4,14 @@ S string S pervasives S tests S examples +S benchs B _build/core B _build/misc B _build/string B _build/pervasives B _build/tests B _build/examples +B _build/benchs/ PKG oUnit PKG benchmark PKG threads diff --git a/benchs/benchs.ml b/benchs/benchs.ml index 022e3004..ad0a6064 100644 --- a/benchs/benchs.ml +++ b/benchs/benchs.ml @@ -1,9 +1,5 @@ (** Generic benchs *) -let draw_line () = - output_string stdout (CCString.repeat "*" 80); - output_char stdout '\n' - module L = struct (* FLAT MAP *) @@ -13,16 +9,13 @@ module L = struct else if x mod 5 = 1 then [x;x+1] else [x;x+1;x+2;x+3] - let bench_flatmap ?(time=2) n = - draw_line (); - Printf.printf "flat_map for %d elements\n" n; + let bench_flat_map ?(time=2) n = let l = CCList.(1 -- n) in - let res = Benchmark.throughputN time + CCBench.throughputN time [ "flat_map", CCList.flat_map f_, l ; "flatten o CCList.map", (fun l -> List.flatten (CCList.map f_ l)), l ; "flatten o map", (fun l -> List.flatten (List.map f_ l)), l - ] in - Benchmark.tabulate res + ] (* APPEND *) @@ -30,76 +23,52 @@ module L = struct ignore (f (f l1 l2) l3) let bench_append ?(time=2) n = - draw_line (); - Printf.printf "append for %d elements\n" n; let l1 = CCList.(1 -- n) in let l2 = CCList.(n+1 -- 2*n) in let l3 = CCList.(2*n+1 -- 3*n) in let arg = l1, l2, l3 in - let res = Benchmark.throughputN time + CCBench.throughputN time [ "CCList.append", append_ CCList.append, arg ; "List.append", append_ List.append, arg - ] in - Benchmark.tabulate res + ] (* FLATTEN *) let bench_flatten ?(time=2) n = - draw_line (); - Printf.printf "flatten for %d elements\n" n; let l = CCList.Idx.mapi (fun i x -> CCList.(x -- (x+ min i 100))) CCList.(1 -- n) in - let res = Benchmark.throughputN time + CCBench.throughputN time [ "CCList.flatten", CCList.flatten, l ; "List.flatten", List.flatten, l ; "fold_right append", (fun l -> List.fold_right List.append l []), l ; "CCList.(fold_right append)", (fun l->CCList.fold_right CCList.append l []), l - ] in - Benchmark.tabulate res - + ] (* MAIN *) - let run() = - bench_flatten 100; - bench_flatten 10_000; - bench_flatten ~time:4 100_000; - bench_flatmap 100; - bench_flatmap 10_000; - bench_flatmap ~time:4 100_000; - bench_append 100; - bench_append 10_000; - bench_append ~time:4 100_000; - () + let bench = CCBench.( + "list" >::: + [ "flat_map" >:: + map_int + [ bench_flat_map ~time:2, 100 + ; bench_flat_map ~time:2, 10_000 + ; bench_flat_map ~time:4, 100_000] + ; "flatten" >:: + map_int + [ bench_flatten ~time:2, 100 + ; bench_flatten ~time:2, 10_000 + ; bench_flatten ~time:4, 100_000] + ; "append" >:: + map_int + [ bench_append ~time:2, 100 + ; bench_append ~time:2, 10_000 + ; bench_append ~time:4, 100_000] + ] + ) + + let () = CCBench.Glob.register bench end -(* TODO *) - -let tbl_ = - [ "list", L.run - ] - -let bench_all () = - List.iter (fun (name, run) -> - draw_line (); - Printf.printf "run tests for %s...\n" name; - run() - ) tbl_ - -let which_ = ref ("all", bench_all) -let set_which s = - if s = "all" then which_ := s, bench_all - else try - let run = List.assoc s tbl_ in - which_ := s, run - with Not_found -> - failwith ("unknown test " ^ s) -let options = [] - let () = - Arg.parse options set_which "benchs [which]"; - let name, run = !which_ in - Printf.printf "run test %s\n" name; - run (); - () + CCBench.Glob.run_main () From ece324f4c2f259a60acf96ce68687dfb59c47b95 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 20:32:41 +0100 Subject: [PATCH 08/39] CCList.init added --- core/CCList.ml | 29 +++++++++++++++++++++++++++-- core/CCList.mli | 4 ++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/core/CCList.ml b/core/CCList.ml index 80d31608..06ab1500 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -63,7 +63,11 @@ let append l1 l2 = and safe l1 l2 = List.rev_append (List.rev l1) l2 in - direct direct_depth_append_ l1 l2 + match l1 with + | [] -> l2 + | [x] -> x::l2 + | [x;y] -> x::y::l2 + | _ -> direct direct_depth_append_ l1 l2 let (@) = append @@ -112,6 +116,21 @@ let fold_right f l acc = l = fold_right (fun x y->x::y) l []) *) +let init len f = + let rec init_rec acc i f = + if i=0 then f i :: acc + else init_rec (f i :: acc) (i-1) f + in + if len<0 then invalid_arg "init" + else if len=0 then [] + else init_rec [] (len-1) f + +(*$T + init 0 (fun _ -> 0) = [] + init 1 (fun x->x) = [0] + init 1000 (fun x->x) = 0--999 +*) + let rec compare f l1 l2 = match l1, l2 with | [], [] -> 0 | _, [] -> 1 @@ -125,6 +144,10 @@ let rec equal f l1 l2 = match l1, l2 with | [], _ | _, [] -> false | x1::l1', x2::l2' -> f x1 x2 && equal f l1' l2' +(*$T + equal CCInt.equal (1--1_000_000) (1--1_000_000) +*) + let flat_map f l = let rec aux f l kont = match l with | [] -> kont [] @@ -142,13 +165,15 @@ let flat_map f l = (*$T flat_map (fun x -> [x+1; x*2]) [10;100] = [11;20;101;200] + List.length (flat_map (fun x->[x]) (1--300_000)) = 300_000 *) let flatten l = fold_right append l [] (*$T flatten [[1]; [2;3;4]; []; []; [5;6]] = 1--6 - *) + flatten (init 300_001 (fun x->[x])) = 0--300_000 +*) let product f l1 l2 = flat_map (fun x -> map (fun y -> f x y) l2) l1 diff --git a/core/CCList.mli b/core/CCList.mli index 160348a5..f00c2af3 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -48,6 +48,10 @@ val filter : ('a -> bool) -> 'a t -> 'a t val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b (** Safe version of [fold_right] *) +val init : int -> (int -> 'a) -> 'a t +(** Same as [Array.init] + @since NEXT_RELEASE *) + val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool From 8d875994f500eac77e4d90ea0ecea4e6346b15d7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 20:32:59 +0100 Subject: [PATCH 09/39] slightly better printing in CCbench --- benchs/CCBench.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchs/CCBench.ml b/benchs/CCBench.ml index 85e4e516..b0926940 100644 --- a/benchs/CCBench.ml +++ b/benchs/CCBench.ml @@ -171,7 +171,7 @@ let prefix path t = List.fold_right (fun s t -> s >:: t) path t (* run one atomic single_bench *) let run_single_bench_ fmt path f = print_line_ fmt (); - Format.fprintf fmt "run single_bench %a@." print_path (List.rev path); + Format.fprintf fmt "run bench %a@." print_path (List.rev path); let res = f () in Benchmark.tabulate res @@ -186,7 +186,7 @@ let rec run_all fmt path t = match t with run_all fmt path t' ) m | WithInt l -> - List.iter (fun (f, n) -> run_all fmt path (f n)) l + List.iter (fun (f, n) -> run_all fmt (string_of_int n::path) (f n)) l let run fmt t = run_all fmt [] t @@ -209,7 +209,7 @@ let rec run_path_rec_ fmt path remaining t = match t, remaining with failwith msg end | WithInt l, _ -> - List.iter (fun (f, n) -> run_path_rec_ fmt path remaining (f n)) l + List.iter (fun (f, n) -> run_path_rec_ fmt (string_of_int n::path) remaining (f n)) l | Bench _, _::_ -> () let run_path fmt t path = run_path_rec_ fmt [] path t From ea6a07ed310f3ae52ddb380ee06d255395365b5a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 20:57:14 +0100 Subject: [PATCH 10/39] tests for CCList; made some functions tailrec --- core/CCList.ml | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/core/CCList.ml b/core/CCList.ml index 06ab1500..451e73c4 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -255,6 +255,7 @@ let take n l = take 2 [1;2;3;4;5] = [1;2] take 10_000 (range 0 100_000) |> List.length = 10_000 take 10_000 (range 0 2_000) = range 0 2_000 + take 300_000 (1 -- 400_000) = 1 -- 300_000 *) let rec drop n l = match l with @@ -313,20 +314,38 @@ module Set = struct (fun t -> mem ~eq t l2) l1 - let rec uniq ?(eq=(=)) l = match l with - | [] -> [] - | x::xs when List.exists (eq x) xs -> uniq ~eq xs - | x::xs -> x :: uniq ~eq xs + let uniq ?(eq=(=)) l = + let rec uniq eq acc l = match l with + | [] -> List.rev acc + | x::xs when List.exists (eq x) xs -> uniq eq acc xs + | x::xs -> uniq eq (x::acc) xs + in uniq eq [] l - let rec union ?(eq=(=)) l1 l2 = match l1 with - | [] -> l2 - | x::xs when mem ~eq x l2 -> union ~eq xs l2 - | x::xs -> x::(union ~eq xs l2) + (*$T + Set.uniq [1;1;2;2;3;4;4;2;4;1;5] |> List.sort Pervasives.compare = [1;2;3;4;5] + *) - let rec inter ?(eq=(=)) l1 l2 = match l1 with - | [] -> [] - | x::xs when mem ~eq x l2 -> x::(inter ~eq xs l2) - | _::xs -> inter ~eq xs l2 + let union ?(eq=(=)) l1 l2 = + let rec union eq acc l1 l2 = match l1 with + | [] -> List.rev_append acc l2 + | x::xs when mem ~eq x l2 -> union eq acc xs l2 + | x::xs -> union eq (x::acc) xs l2 + in union eq [] l1 l2 + + (*$T + Set.union [1;2;4] [2;3;4;5] = [1;2;3;4;5] + *) + + let inter ?(eq=(=)) l1 l2 = + let rec inter eq acc l1 l2 = match l1 with + | [] -> List.rev acc + | x::xs when mem ~eq x l2 -> inter eq (x::acc) xs l2 + | _::xs -> inter eq acc xs l2 + in inter eq [] l1 l2 + + (*$T + Set.inter [1;2;4] [2;3;4;5] = [2;4] + *) end module Idx = struct @@ -630,7 +649,7 @@ type 'a formatter = Format.formatter -> 'a -> unit type 'a random_gen = Random.State.t -> 'a let random_len len g st = - map (fun _ -> g st) (range' 0 len) + init len (fun _ -> g st) (*$T random_len 10 CCInt.random_small (Random.State.make [||]) |> List.length = 10 From c6072b6b2f06bab6334ac33f8895b9e7664807f6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 21:39:11 +0100 Subject: [PATCH 11/39] better printing in CCBench --- benchs/CCBench.ml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/benchs/CCBench.ml b/benchs/CCBench.ml index b0926940..3a987b42 100644 --- a/benchs/CCBench.ml +++ b/benchs/CCBench.ml @@ -109,8 +109,8 @@ let map_int l = let rec print fmt = function | Multiple (l, m) -> Format.fprintf fmt "@[%a%a@]" - (print_list_ ~sep:"," print) l print_map m + (print_list_ ~sep:"," print) l | WithInt l -> Format.fprintf fmt "@[[%a]@]" (print_list_ print_pair) @@ -119,8 +119,11 @@ let rec print fmt = function and print_pair fmt (n,t) = Format.fprintf fmt "@[%d: %a@]" n print t and print_map fmt m = - Format.pp_open_hvbox fmt 0; - SMap.iter (fun n t -> Format.fprintf fmt "@[%s.%a@]" n print t) m; + let first = ref true in + Format.pp_open_vbox fmt 0; + SMap.iter (fun n t -> + if !first then first := false else Format.pp_print_cut fmt (); + Format.fprintf fmt "@[%s.%a@]" n print t) m; Format.pp_close_box fmt () (** {2 Path} *) From bd8392dfe56d7cf1c232848c7dd7f59b18b6a7cf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 21:40:17 +0100 Subject: [PATCH 12/39] rename benchs.ml into run_benchs.ml --- _oasis | 4 ++-- benchs/{benchs.ml => run_benchs.ml} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename benchs/{benchs.ml => run_benchs.ml} (100%) diff --git a/_oasis b/_oasis index 1d02eee1..7fe06994 100644 --- a/_oasis +++ b/_oasis @@ -149,12 +149,12 @@ Document containers_advanced XOCamlbuildPath: . XOCamlbuildLibraries: containers.advanced -Executable benchs +Executable run_benchs Path: benchs/ Install: false CompiledObject: native Build$: flag(bench) - MainIs: benchs.ml + MainIs: run_benchs.ml BuildDepends: containers,benchmark Executable bench_tbl diff --git a/benchs/benchs.ml b/benchs/run_benchs.ml similarity index 100% rename from benchs/benchs.ml rename to benchs/run_benchs.ml From 1a20df93934d4fc032df25dbcf932bc3805d0701 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 22:20:58 +0100 Subject: [PATCH 13/39] benchmarks for CCVector --- benchs/run_benchs.ml | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index ad0a6064..f0033617 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -68,6 +68,54 @@ module L = struct let () = CCBench.Glob.register bench end +module Vec = struct + let f x = x+1 + + let map_push_ f v = + let v' = CCVector.create () in + CCVector.iter (fun x -> CCVector.push v' (f x)) v; + v' + + let map_push_size_ f v = + let v' = CCVector.create_with ~capacity:(CCVector.length v) 0 in + CCVector.iter (fun x -> CCVector.push v' (f x)) v; + v' + + let bench_map n = + let v = CCVector.init n (fun x->x) in + CCBench.throughputN 2 + [ "map", CCVector.map f, v + ; "map_push", map_push_ f, v + ; "map_push_cap", map_push_size_ f, v + ] + + let try_append_ app n v2 () = + let v1 = CCVector.init n (fun x->x) in + app v1 v2; + assert (CCVector.length v1 = 2*n); + () + + let append_naive_ v1 v2 = + CCVector.iter (fun x -> CCVector.push v1 x) v2 + + let bench_append n = + let v2 = CCVector.init n (fun x->n+x) in + CCBench.throughputN 2 + [ "append", try_append_ CCVector.append n v2, () + ; "append_naive", try_append_ append_naive_ n v2, () + ] + + let bench = CCBench.( + "vector" >::: + [ "map" >:: with_int bench_map [100; 10_000; 100_000] + ; "append" >:: with_int bench_append [100; 10_000; 50_000] + ] + ) + + let () = + CCBench.Glob.register bench +end + let () = CCBench.Glob.run_main () From b8d84de4dcdc4ab3d2f89cd1e522933addc94542 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 22:21:09 +0100 Subject: [PATCH 14/39] some optimizations in CCVector --- core/CCVector.ml | 72 +++++++++++++++++++++++++++++++---------------- core/CCVector.mli | 3 +- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/core/CCVector.ml b/core/CCVector.ml index 8f948dc3..b236ee2b 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -92,36 +92,48 @@ let _resize v newcapacity = () (*$T - (let v = create_with ~capacity:10 1 in ensure v 200; capacity v >= 200) + let v = create_with ~capacity:10 1 in \ + ensure v 200; capacity v >= 200 *) (* grow the array, using [x] as a filler if required *) let _grow v x = if _empty_array v then v.vec <- Array.make 32 x - else + else ( let n = Array.length v.vec in - let size = min (n + n/2 + 10) Sys.max_array_length in + let size = min (2 * n + 10) Sys.max_array_length in + if size = n then failwith "vec: can't grow any further"; _resize v size + ) +(* resize so that capacity is at least size. Use a doubling-size + strategy so that calling many times [ensure] will + behave well *) let ensure v size = if Array.length v.vec = 0 then () - else if v.size < size - then - let size' = min size Sys.max_array_length in - _resize v size' + else if size > Sys.max_array_length + then failwith "vec.ensure: size too big" + else ( + let n = ref (max 16 (Array.length v.vec)) in + while !n < size do n := min Sys.max_array_length (2* !n) done; + _resize v !n + ) let clear v = v.size <- 0 let is_empty v = v.size = 0 +let push_unsafe v x = + Array.unsafe_set v.vec v.size x; + v.size <- v.size + 1 + let push v x = if v.size = Array.length v.vec then _grow v x; - Array.unsafe_set v.vec v.size x; - v.size <- v.size + 1 + push_unsafe v x (** add all elements of b to a *) let append a b = @@ -164,8 +176,10 @@ let append_seq a seq = seq (fun x -> push a x) let append_array a b = - ensure a (a.size + Array.length b); - Array.iter (push a) b + let len_b = Array.length b in + ensure a (a.size + len_b); + Array.blit b 0 a.vec a.size len_b; + a.size <- a.size + len_b (*$T let v1 = init 5 (fun i->i) and v2 = Array.init 5 (fun i->i+5) in \ @@ -274,10 +288,7 @@ let map f v = then create () else ( let vec = Array.init v.size (fun i -> f (Array.unsafe_get v.vec i)) in - { - size=v.size; - vec; - } + { size=v.size; vec; } ) (*$T @@ -286,17 +297,23 @@ let map f v = *) let filter' p v = - let i = ref (v.size - 1) in - while !i >= 0 do - if not (p v.vec.(! i)) - (* remove i-th item! *) - then remove v !i; - decr i - done + let i = ref 0 in (* cur element *) + let j = ref 0 in (* cur insertion point *) + let n = v.size in + while !i < n do + if p v.vec.(! i) then ( + (* move element i at the first empty slot. + invariant: i >= j*) + if !i > !j then v.vec.(!j) <- v.vec.(!i); + incr i; + incr j + ) else incr i + done; + v.size <- !j (*$T let v = 1 -- 10 in filter' (fun x->x<4) v; \ - to_list v |> List.sort Pervasives.compare = [1;2;3] + to_list v = [1;2;3] *) let filter p v = @@ -305,13 +322,14 @@ let filter p v = else ( let v' = create_with ~capacity:v.size v.vec.(0) in Array.iter - (fun x -> if p x then push v' x) + (fun x -> if p x then push_unsafe v' x) v.vec; v' ) (*$T filter (fun x-> x mod 2=0) (of_list [1;2;3;4;5]) |> to_list = [2;4] + filter (fun x-> x mod 2=0) (1 -- 1_000_000) |> length = 500_000 *) let fold f acc v = @@ -463,9 +481,13 @@ let of_list l = match l with | [] -> create() | x::_ -> let v = create_with ~capacity:(List.length l + 5) x in - List.iter (push v) l; + List.iter (push_unsafe v) l; v +(*$T + of_list CCList.(1--300_000) |> to_list = CCList.(1--300_000) +*) + let to_array v = Array.sub v.vec 0 v.size diff --git a/core/CCVector.mli b/core/CCVector.mli index 37cb0677..a032eb88 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -134,8 +134,7 @@ val filter : ('a -> bool) -> ('a,_) t -> ('a, 'mut) t returns a new vector that only contains elements of [v] satisfying [p]. *) val filter' : ('a -> bool) -> ('a, rw) t -> unit -(** Filter elements in place. Does {b NOT} preserve the order - of the elements. *) +(** Filter elements in place. *) val fold : ('b -> 'a -> 'b) -> 'b -> ('a,_) t -> 'b (** fold on elements of the vector *) From ac0f18d3c7034bab0bc24e2ea5641ec69ca35f2c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 13 Nov 2014 22:33:15 +0100 Subject: [PATCH 15/39] simpler interface for CCbench global registration --- benchs/CCBench.ml | 18 ++++++++---------- benchs/CCBench.mli | 24 +++++++++++------------- benchs/run_benchs.ml | 11 +++-------- 3 files changed, 22 insertions(+), 31 deletions(-) diff --git a/benchs/CCBench.ml b/benchs/CCBench.ml index 3a987b42..73145714 100644 --- a/benchs/CCBench.ml +++ b/benchs/CCBench.ml @@ -238,17 +238,15 @@ let run_main ?(argv=Sys.argv) ?(out=Format.std_formatter) t = (** {2 Global Registration} *) -module Glob = struct - let tree_ = ref (Multiple ([], SMap.empty)) +let tree_ = ref (Multiple ([], SMap.empty)) - let get () = !tree_ +let global_bench () = !tree_ - let register ?(path=[]) new_t = - tree_ := merge_ !tree_ (prefix path new_t) +let register ?(path=[]) new_t = + tree_ := merge_ !tree_ (prefix path new_t) - let register' ~path new_t = - register ~path:(parse_path path) new_t +let register' ~path new_t = + register ~path:(parse_path path) new_t - let run_main ?argv ?out () = - run_main ?argv ?out !tree_ -end +let run_main ?argv ?out () = + run_main ?argv ?out !tree_ diff --git a/benchs/CCBench.mli b/benchs/CCBench.mli index 73eb005e..e5000df7 100644 --- a/benchs/CCBench.mli +++ b/benchs/CCBench.mli @@ -97,19 +97,17 @@ val run_main : (** {2 Global Registration} *) -module Glob : sig - val register : ?path:path -> bench -> unit - (** Register a benchmark to the global register of benchmarks (a global tree) *) +val register : ?path:path -> bench -> unit +(** Register a benchmark to the global register of benchmarks (a global tree) *) - val register' : path:string -> bench -> unit - (** Same as {!register} but applies {!parse_path} first to its argument *) +val register' : path:string -> bench -> unit +(** Same as {!register} but applies {!parse_path} first to its argument *) - val get : unit -> bench - (** Global bench tree *) +val global_bench : unit -> bench +(** Global bench tree, built from calls to {!register} *) - val run_main : - ?argv:string array -> - ?out:Format.formatter -> - unit -> unit - (** Same as {!run_main} but on the global tree of benchmarks *) -end +val run_main : + ?argv:string array -> + ?out:Format.formatter -> + unit -> unit + (** Same as {!run_main} but on the global tree of benchmarks *) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index f0033617..abf7d076 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -45,7 +45,7 @@ module L = struct (* MAIN *) - let bench = CCBench.( + let () = CCBench.register CCBench.( "list" >::: [ "flat_map" >:: map_int @@ -64,8 +64,6 @@ module L = struct ; bench_append ~time:4, 100_000] ] ) - - let () = CCBench.Glob.register bench end module Vec = struct @@ -105,18 +103,15 @@ module Vec = struct ; "append_naive", try_append_ append_naive_ n v2, () ] - let bench = CCBench.( + let () = CCBench.register CCBench.( "vector" >::: [ "map" >:: with_int bench_map [100; 10_000; 100_000] ; "append" >:: with_int bench_append [100; 10_000; 50_000] ] ) - - let () = - CCBench.Glob.register bench end let () = - CCBench.Glob.run_main () + CCBench.run_main () From 0da13496a4475daf7cdd55a5797716ff429271fe Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 16 Nov 2014 23:54:37 +0100 Subject: [PATCH 16/39] comments --- core/CCIO.ml | 2 +- core/CCIO.mli | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/CCIO.ml b/core/CCIO.ml index 26645d5c..961880e5 100644 --- a/core/CCIO.ml +++ b/core/CCIO.ml @@ -128,7 +128,7 @@ and _sequence_map : type a b. (a -> b t) -> a list -> b list -> b list | a::tail -> let x = _run (f a) in _sequence_map f tail (x::acc) - + let _printers = ref [ (* default printer *) diff --git a/core/CCIO.mli b/core/CCIO.mli index d7950385..03c4216d 100644 --- a/core/CCIO.mli +++ b/core/CCIO.mli @@ -112,9 +112,9 @@ val fail : string -> 'a t (** {2 Finalizers} *) val (>>>=) : 'a with_finalizer -> ('a -> 'b t) -> 'b t -(** Alternative to {!(>>=)} that also takes a [unit t] value, that is a - finalizer. This action will run in any case (even failure). - Other than the finalizer, this behaves like {!(>>=)} *) +(** Same as {!(>>=)}, but taking the finalizer into account. Once this + IO value is done executing, the finalizer is executed and the resource, + fred. *) (** {2 Running} *) @@ -281,7 +281,7 @@ How to list recursively files in a directory: File.read_dir ~recurse:true (File.make "/tmp") >>= Seq.output ~sep:"\n" stdout ) |> CCIO.run_exn ;; - + ]} See {!File.walk} if you also need to list directories. From 2420df32f32702283aef4708101ceaec7388cbba Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 17 Nov 2014 01:09:13 +0100 Subject: [PATCH 17/39] moved some benchmarks to benchs/run_benchs.ml, under the centralized bench tree --- _oasis | 38 +-- benchs/bench_batch.ml | 91 ------- benchs/bench_tbl.ml | 422 -------------------------------- benchs/run_benchs.ml | 549 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 540 insertions(+), 560 deletions(-) delete mode 100644 benchs/bench_batch.ml delete mode 100644 benchs/bench_tbl.ml diff --git a/_oasis b/_oasis index 7fe06994..be3dba05 100644 --- a/_oasis +++ b/_oasis @@ -153,34 +153,10 @@ Executable run_benchs Path: benchs/ Install: false CompiledObject: native - Build$: flag(bench) + Build$: flag(bench) && flag(misc) MainIs: run_benchs.ml - BuildDepends: containers,benchmark - -Executable bench_tbl - Path: benchs/ - Install: false - CompiledObject: native - Build$: flag(bench) - MainIs: bench_tbl.ml - BuildDepends: containers, containers.string, containers.misc, - benchmark, containers.advanced - -Executable bench_conv - Path: benchs/ - Install: false - CompiledObject: native - Build$: flag(bench) - MainIs: bench_conv.ml - BuildDepends: containers,benchmark - -Executable bench_batch - Path: benchs/ - Install: false - CompiledObject: native - Build$: flag(bench) - MainIs: bench_batch.ml - BuildDepends: containers,benchmark + BuildDepends: containers, containers.misc, containers.advanced, + containers.string, benchmark Executable bench_hash Path: benchs/ @@ -190,6 +166,14 @@ Executable bench_hash MainIs: bench_hash.ml BuildDepends: containers,containers.misc +Executable bench_conv + Path: benchs/ + Install: false + CompiledObject: native + Build$: flag(bench) + MainIs: bench_conv.ml + BuildDepends: containers,benchmark + Executable test_levenshtein Path: tests/ Install: false diff --git a/benchs/bench_batch.ml b/benchs/bench_batch.ml deleted file mode 100644 index 9846d851..00000000 --- a/benchs/bench_batch.ml +++ /dev/null @@ -1,91 +0,0 @@ -(** benchmark CCBatch *) - -open Containers_advanced - -module type COLL = sig - val name : string - include CCBatch.COLLECTION - val doubleton : 'a -> 'a -> 'a t - val (--) : int -> int -> int t - val equal : int t -> int t -> bool -end - -module Make(C : COLL) = struct - let f1 x = x mod 2 = 0 - let f2 x = -x - let f3 x = C.doubleton x (x+1) - let f4 x = -x - let collect a = C.fold (+) 0 a - - let naive a = - let a = C.filter f1 a in - let a = C.flat_map f3 a in - let a = C.filter f1 a in - let a = C.map f2 a in - let a = C.flat_map f3 a in - let a = C.map f4 a in - ignore (collect a); - a - - module BA = CCBatch.Make(C) - - let ops = - BA.(filter f1 >>> flat_map f3 >>> filter f1 >>> map f2 >>> flat_map f3 >>> map f4) - - let batch a = - let a = BA.apply ops a in - ignore (collect a); - a - - let bench_for ~time n = - Printf.printf "\n\nbenchmark for %s of len %d\n" C.name n; - flush stdout; - let a = C.(0 -- n) in - (* debug - CCPrint.printf "naive: %a\n" (CCArray.pp CCInt.pp) (naive a); - CCPrint.printf "simple: %a\n" (CCArray.pp CCInt.pp) (batch_simple a); - CCPrint.printf "batch: %a\n" (CCArray.pp CCInt.pp) (batch a); - *) - assert (C.equal (batch a) (naive a)); - let res = Benchmark.throughputN time - [ C.name ^ "_naive", naive, a - ; C.name ^ "_batch", batch, a - ] - in - Benchmark.tabulate res - - let bench () = - bench_for ~time:1 100; - bench_for ~time:4 100_000; - bench_for ~time:4 1_000_000; - () -end - -module BenchArray = Make(struct - include CCArray - let name = "array" - let equal a b = a=b - let doubleton x y = [| x; y |] - let fold = Array.fold_left -end) - -module BenchList = Make(struct - include CCList - let name = "list" - let equal a b = a=b - let doubleton x y = [ x; y ] - let fold = List.fold_left -end) - -module BenchKList = Make(struct - include CCKList - let name = "klist" - let equal a b = equal (=) a b - let doubleton x y = CCKList.of_list [ x; y ] -end) - -let () = - BenchArray.bench(); - BenchList.bench(); - BenchKList.bench (); - () diff --git a/benchs/bench_tbl.ml b/benchs/bench_tbl.ml deleted file mode 100644 index 94787857..00000000 --- a/benchs/bench_tbl.ml +++ /dev/null @@ -1,422 +0,0 @@ - -(** Benchmarking *) -(** {2 hashtables} *) - -module IHashtbl = Hashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module IFlatHashtbl = FlatHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module IFHashtbl = FHashtbl.Tree(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module IPersistentHashtbl = CCPersistentHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module IMap = Map.Make(struct - type t = int - let compare i j = i - j -end) - -module ICCHashtbl = CCFlatHashtbl.Make(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -let phashtbl_add n = - let h = PHashtbl.create 50 in - for i = n downto 0 do - PHashtbl.add h i i; - done; - h - -let hashtbl_add n = - let h = Hashtbl.create 50 in - for i = n downto 0 do - Hashtbl.add h i i; - done; - h - -let ihashtbl_add n = - let h = IHashtbl.create 50 in - for i = n downto 0 do - IHashtbl.add h i i; - done; - h - -let iflathashtbl_add n = - let h = IFlatHashtbl.create 50 in - for i = n downto 0 do - IFlatHashtbl.replace h i i; - done; - h - -let ifhashtbl_add n = - let h = ref (IFHashtbl.empty 32) in - for i = n downto 0 do - h := IFHashtbl.replace !h i i; - done; - !h - -let skiplist_add n = - let l = SkipList.create compare in - for i = n downto 0 do - SkipList.add l i i; - done; - l - -let ipersistenthashtbl_add n = - let h = ref (IPersistentHashtbl.create 32) in - for i = n downto 0 do - h := IPersistentHashtbl.replace !h i i; - done; - !h - -let imap_add n = - let h = ref IMap.empty in - for i = n downto 0 do - h := IMap.add i i !h; - done; - !h - -let icchashtbl_add n = - let h = ICCHashtbl.create 50 in - for i = n downto 0 do - ICCHashtbl.add h i i; - done; - h - -let bench_maps1 () = - Format.printf "----------------------------------------@."; - let n = 100 in - let res = Benchmark.throughputN 3 - ["phashtbl_add", (fun n -> ignore (phashtbl_add n)), n; - "hashtbl_add", (fun n -> ignore (hashtbl_add n)), n; - "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)), n; - "iflathashtbl_add", (fun n -> ignore (iflathashtbl_add n)), n; - "ifhashtbl_add", (fun n -> ignore (ifhashtbl_add n)), n; - "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)), n; - "skiplist_add", (fun n -> ignore (skiplist_add n)), n; - "imap_add", (fun n -> ignore (imap_add n)), n; - "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)), n; - ] - in - Benchmark.tabulate res - -let phashtbl_replace n = - let h = PHashtbl.create 50 in - for i = 0 to n do - PHashtbl.replace h i i; - done; - for i = n downto 0 do - PHashtbl.replace h i i; - done; - h - -let hashtbl_replace n = - let h = Hashtbl.create 50 in - for i = 0 to n do - Hashtbl.replace h i i; - done; - for i = n downto 0 do - Hashtbl.replace h i i; - done; - h - -let ihashtbl_replace n = - let h = IHashtbl.create 50 in - for i = 0 to n do - IHashtbl.replace h i i; - done; - for i = n downto 0 do - IHashtbl.replace h i i; - done; - h - -let iflathashtbl_replace n = - let h = IFlatHashtbl.create 50 in - for i = 0 to n do - IFlatHashtbl.replace h i i; - done; - for i = n downto 0 do - IFlatHashtbl.replace h i i; - done; - h - -let ifhashtbl_replace n = - let h = ref (IFHashtbl.empty 32) in - for i = 0 to n do - h := IFHashtbl.replace !h i i; - done; - for i = n downto 0 do - h := IFHashtbl.replace !h i i; - done; - !h - -let ipersistenthashtbl_replace n = - let h = ref (IPersistentHashtbl.create 32) in - for i = 0 to n do - h := IPersistentHashtbl.replace !h i i; - done; - for i = n downto 0 do - h := IPersistentHashtbl.replace !h i i; - done; - !h - -let skiplist_replace n = - let l = SkipList.create compare in - for i = 0 to n do - SkipList.add l i i; - done; - for i = n downto 0 do - SkipList.add l i i; - done; - l - -let imap_replace n = - let h = ref IMap.empty in - for i = 0 to n do - h := IMap.add i i !h; - done; - for i = n downto 0 do - h := IMap.add i i !h; - done; - !h - -let icchashtbl_replace n = - let h = ICCHashtbl.create 50 in - for i = 0 to n do - ICCHashtbl.add h i i; - done; - for i = n downto 0 do - ICCHashtbl.add h i i; - done; - h - -let bench_maps2 () = - Format.printf "----------------------------------------@."; - let n = 100 in - let res = Benchmark.throughputN 3 - ["phashtbl_replace", (fun n -> ignore (phashtbl_replace n)), n; - "hashtbl_replace", (fun n -> ignore (hashtbl_replace n)), n; - "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)), n; - "iflathashtbl_replace", (fun n -> ignore (iflathashtbl_replace n)), n; - "ifhashtbl_replace", (fun n -> ignore (ifhashtbl_replace n)), n; - "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)), n; - "skiplist_replace", (fun n -> ignore (skiplist_replace n)), n; - "imap_replace", (fun n -> ignore (imap_replace n)), n; - "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)), n; - ] - in - Benchmark.tabulate res - -let my_len = 250 - -let phashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (PHashtbl.find h i); - done - -let hashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (Hashtbl.find h i); - done - -let ihashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IHashtbl.find h i); - done - -let iflathashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IFlatHashtbl.find h i); - done - -let ifhashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IFHashtbl.find h i); - done - -let ipersistenthashtbl_find h = - fun n -> - for i = 0 to n-1 do - ignore (IPersistentHashtbl.find h i); - done - -let skiplist_find l = - fun n -> - for i = 0 to n-1 do - ignore (SkipList.find l i); - done - -let array_find a = - fun n -> - for i = 0 to n-1 do - ignore (Array.get a i); - done - -let imap_find m = - fun n -> - for i = 0 to n-1 do - ignore (IMap.find i m); - done - -let icchashtbl_find m = - fun n -> - for i = 0 to n-1 do - ignore (ICCHashtbl.get_exn i m); - done - -let bench_maps3 () = - List.iter - (fun len -> - let h = phashtbl_add len in - let h' = hashtbl_add len in - let h'' = ihashtbl_add len in - let h''' = iflathashtbl_add len in - let h'''' = ifhashtbl_add len in - let h''''' = ipersistenthashtbl_add len in - let l = skiplist_add len in - let a = Array.init len (fun i -> string_of_int i) in - let m = imap_add len in - let h'''''' = icchashtbl_add len in - Format.printf "----------------------------------------@."; - Format.printf "try on size %d@.@.@." len; - let res = Benchmark.throughputN 3 [ - "phashtbl_find", (fun () -> phashtbl_find h len), (); - "hashtbl_find", (fun () -> hashtbl_find h' len), (); - "ihashtbl_find", (fun () -> ihashtbl_find h'' len), (); - "iflathashtbl_find", (fun () -> iflathashtbl_find h''' len), (); - "ifhashtbl_find", (fun () -> ifhashtbl_find h'''' len), (); - "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' len), (); - "skiplist_find", (fun () -> skiplist_find l len), (); - "array_find", (fun () -> array_find a len), (); - "imap_find", (fun () -> imap_find m len), (); - "cchashtbl_find", (fun () -> icchashtbl_find h'''''' len), (); - ] in - Benchmark.tabulate res) - [10;20;100;1000;10000] - -let bench_maps() = - bench_maps1 (); - bench_maps2 (); - bench_maps3 (); - () - -(** {2 Sequence/Gen} *) - -let bench_enum () = - let n = 1_000_000 in - let seq () = CCSequence.fold (+) 0 (CCSequence.int_range ~start:0 ~stop:n) in - let enum () = CCGen.fold (+) 0 (CCGen.int_range 0 n) in - let res = Benchmark.throughputN 3 - [ "sequence.fold", seq, (); - "gen.fold", enum, (); - ] in - Benchmark.tabulate res; - - let n = 100_000 in - let seq () = - let open CCSequence in - let seq = int_range ~start:0 ~stop:n in - let seq = flatMap (fun x -> int_range ~start:x ~stop:(x+10)) seq in - fold (+) 0 seq in - let enum () = - let open CCGen in - let seq = int_range 0 n in - let seq = flat_map (fun x -> int_range x (x+10)) seq in - fold (+) 0 seq in - let res = Benchmark.throughputN 3 - [ "sequence.flatMap", seq, (); - "gen.flatMap", enum, (); - ] in - Benchmark.tabulate res - -(** {2 Cache} *) - -(** Cached fibonacci function *) -module Fibo(C : Cache.S with type key = int) = struct - let fib ~size = - let fib fib' n = - match n with - | 0 -> 0 - | 1 -> 1 - | 2 -> 1 - | n -> - fib' (n-1) + fib' (n-2) - in - let cache = C.create size in - let cached_fib x = C.with_cache_rec cache fib x in - cached_fib -end - -module LinearIntCache = Cache.Linear(struct - type t = int - let equal i j = i = j -end) - -module ReplacingIntCache = Cache.Replacing(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module LRUIntCache = Cache.LRU(struct - type t = int - let equal i j = i = j - let hash i = i -end) - -module DummyIntCache = Cache.Dummy(struct type t = int end) - -let bench_cache () = - (* Fibonacci for those caching implementations *) - let module LinearFibo = Fibo(LinearIntCache) in - let module ReplacingFibo = Fibo(ReplacingIntCache) in - let module LRUFibo= Fibo(LRUIntCache) in - let module DummyFibo = Fibo(DummyIntCache) in - (* benchmark caches with fibo function *) - let bench_fib fib () = - ignore (List.map fib [5;10;20;30;35]); - () - in - let res = Benchmark.throughputN 3 - [ "linear_fib", bench_fib (LinearFibo.fib ~size:5), (); - "replacing_fib", bench_fib (ReplacingFibo.fib ~size:256), (); - "LRU_fib", bench_fib (LRUFibo.fib ~size:256), (); - "dummy_fib", bench_fib (DummyFibo.fib ~size:5), (); - ] in - Benchmark.tabulate res; - () - -let _ = - match Sys.argv with - | [| _; "maps" |] -> bench_maps () - | [| _; "enum" |] -> bench_enum () - | [| _; "cache" |] -> bench_cache () - | [| _; ("-help" | "--help") |] -> print_endline "./benchs [maps|enum|cache]" - | [| _ |] -> - bench_enum (); - bench_maps (); - bench_cache (); - () - | _ -> failwith "unknown argument (-help)" diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index abf7d076..04c67756 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1,7 +1,11 @@ (** Generic benchs *) -module L = struct +(* composition *) +let (%%) f g x = f (g x) +(* FIXME: find out why -tree takes so long *) + +module L = struct (* FLAT MAP *) let f_ x = @@ -10,22 +14,24 @@ module L = struct else [x;x+1;x+2;x+3] let bench_flat_map ?(time=2) n = - let l = CCList.(1 -- n) in + let l = lazy CCList.(1 -- n) in + let flatten_map_ l = List.flatten (CCList.map f_ l) + and flatten_ccmap_ l = List.flatten (List.map f_ l) in CCBench.throughputN time - [ "flat_map", CCList.flat_map f_, l - ; "flatten o CCList.map", (fun l -> List.flatten (CCList.map f_ l)), l - ; "flatten o map", (fun l -> List.flatten (List.map f_ l)), l + [ "flat_map", CCList.flat_map f_ %% Lazy.force, l + ; "flatten o CCList.map", flatten_ccmap_ %% Lazy.force, l + ; "flatten o map", flatten_map_ %% Lazy.force, l ] (* APPEND *) - let append_ f (l1, l2, l3) = + let append_ f (lazy l1, lazy l2, lazy l3) = ignore (f (f l1 l2) l3) let bench_append ?(time=2) n = - let l1 = CCList.(1 -- n) in - let l2 = CCList.(n+1 -- 2*n) in - let l3 = CCList.(2*n+1 -- 3*n) in + let l1 = lazy CCList.(1 -- n) in + let l2 = lazy CCList.(n+1 -- 2*n) in + let l3 = lazy CCList.(2*n+1 -- 3*n) in let arg = l1, l2, l3 in CCBench.throughputN time [ "CCList.append", append_ CCList.append, arg @@ -35,12 +41,21 @@ module L = struct (* FLATTEN *) let bench_flatten ?(time=2) n = - let l = CCList.Idx.mapi (fun i x -> CCList.(x -- (x+ min i 100))) CCList.(1 -- n) in + let fold_right_append_ l = + List.fold_right List.append l [] + and cc_fold_right_append_ l = + CCList.fold_right CCList.append l [] + in + let l = lazy ( + CCList.Idx.mapi + (fun i x -> CCList.(x -- (x+ min i 100))) + CCList.(1 -- n)) + in CCBench.throughputN time - [ "CCList.flatten", CCList.flatten, l - ; "List.flatten", List.flatten, l - ; "fold_right append", (fun l -> List.fold_right List.append l []), l - ; "CCList.(fold_right append)", (fun l->CCList.fold_right CCList.append l []), l + [ "CCList.flatten", CCList.flatten %% Lazy.force, l + ; "List.flatten", List.flatten %% Lazy.force, l + ; "fold_right append", fold_right_append_ %% Lazy.force, l + ; "CCList.(fold_right append)", cc_fold_right_append_ %% Lazy.force, l ] (* MAIN *) @@ -80,16 +95,16 @@ module Vec = struct v' let bench_map n = - let v = CCVector.init n (fun x->x) in + let v = lazy (CCVector.init n (fun x->x)) in CCBench.throughputN 2 - [ "map", CCVector.map f, v - ; "map_push", map_push_ f, v - ; "map_push_cap", map_push_size_ f, v + [ "map", CCVector.map f %% Lazy.force, v + ; "map_push", map_push_ f %% Lazy.force, v + ; "map_push_cap", map_push_size_ f %% Lazy.force, v ] let try_append_ app n v2 () = let v1 = CCVector.init n (fun x->x) in - app v1 v2; + app v1 (Lazy.force v2); assert (CCVector.length v1 = 2*n); () @@ -97,7 +112,7 @@ module Vec = struct CCVector.iter (fun x -> CCVector.push v1 x) v2 let bench_append n = - let v2 = CCVector.init n (fun x->n+x) in + let v2 = lazy (CCVector.init n (fun x->n+x)) in CCBench.throughputN 2 [ "append", try_append_ CCVector.append n v2, () ; "append_naive", try_append_ append_naive_ n v2, () @@ -111,6 +126,500 @@ module Vec = struct ) end +module Cache = struct + module Fibo(C : Cache.S with type key = int) = struct + let fib ~size = + let fib fib' n = + match n with + | 0 -> 0 + | 1 -> 1 + | 2 -> 1 + | n -> + fib' (n-1) + fib' (n-2) + in + let cache = C.create size in + let cached_fib x = C.with_cache_rec cache fib x in + cached_fib + end + + module LinearIntCache = Cache.Linear(struct + type t = int + let equal i j = i = j + end) + + module ReplacingIntCache = Cache.Replacing(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module LRUIntCache = Cache.LRU(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module DummyIntCache = Cache.Dummy(struct type t = int end) + + module LinearFibo = Fibo(LinearIntCache) + module ReplacingFibo = Fibo(ReplacingIntCache) + module LRUFibo= Fibo(LRUIntCache) + module DummyFibo = Fibo(DummyIntCache) + + let bench_fib n = + CCBench.throughputN 3 + [ "linear_fib", LinearFibo.fib ~size:5, n; + "replacing_fib", ReplacingFibo.fib ~size:256, n; + "LRU_fib", LRUFibo.fib ~size:256, n; + "dummy_fib", DummyFibo.fib ~size:5, n; + ] + + let () = CCBench.register CCBench.( + "cache" >::: + [ "fib" >:: with_int bench_fib [10; 100] + ] + ) +end + +module Tbl = struct + module IHashtbl = Hashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module IFlatHashtbl = FlatHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module IFHashtbl = FHashtbl.Tree(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module IPersistentHashtbl = CCPersistentHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + module IMap = Map.Make(struct + type t = int + let compare i j = i - j + end) + + module ICCHashtbl = CCFlatHashtbl.Make(struct + type t = int + let equal i j = i = j + let hash i = i + end) + + let phashtbl_add n = + let h = PHashtbl.create 50 in + for i = n downto 0 do + PHashtbl.add h i i; + done; + h + + let hashtbl_add n = + let h = Hashtbl.create 50 in + for i = n downto 0 do + Hashtbl.add h i i; + done; + h + + let ihashtbl_add n = + let h = IHashtbl.create 50 in + for i = n downto 0 do + IHashtbl.add h i i; + done; + h + + let iflathashtbl_add n = + let h = IFlatHashtbl.create 50 in + for i = n downto 0 do + IFlatHashtbl.replace h i i; + done; + h + + let ifhashtbl_add n = + let h = ref (IFHashtbl.empty 32) in + for i = n downto 0 do + h := IFHashtbl.replace !h i i; + done; + !h + + let skiplist_add n = + let l = SkipList.create compare in + for i = n downto 0 do + SkipList.add l i i; + done; + l + + let ipersistenthashtbl_add n = + let h = ref (IPersistentHashtbl.create 32) in + for i = n downto 0 do + h := IPersistentHashtbl.replace !h i i; + done; + !h + + let imap_add n = + let h = ref IMap.empty in + for i = n downto 0 do + h := IMap.add i i !h; + done; + !h + + let icchashtbl_add n = + let h = ICCHashtbl.create 50 in + for i = n downto 0 do + ICCHashtbl.add h i i; + done; + h + + let bench_maps1 n = + CCBench.throughputN 3 + ["phashtbl_add", (fun n -> ignore (phashtbl_add n)), n; + "hashtbl_add", (fun n -> ignore (hashtbl_add n)), n; + "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)), n; + "iflathashtbl_add", (fun n -> ignore (iflathashtbl_add n)), n; + "ifhashtbl_add", (fun n -> ignore (ifhashtbl_add n)), n; + "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)), n; + "skiplist_add", (fun n -> ignore (skiplist_add n)), n; + "imap_add", (fun n -> ignore (imap_add n)), n; + "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)), n; + ] + + let phashtbl_replace n = + let h = PHashtbl.create 50 in + for i = 0 to n do + PHashtbl.replace h i i; + done; + for i = n downto 0 do + PHashtbl.replace h i i; + done; + h + + let hashtbl_replace n = + let h = Hashtbl.create 50 in + for i = 0 to n do + Hashtbl.replace h i i; + done; + for i = n downto 0 do + Hashtbl.replace h i i; + done; + h + + let ihashtbl_replace n = + let h = IHashtbl.create 50 in + for i = 0 to n do + IHashtbl.replace h i i; + done; + for i = n downto 0 do + IHashtbl.replace h i i; + done; + h + + let iflathashtbl_replace n = + let h = IFlatHashtbl.create 50 in + for i = 0 to n do + IFlatHashtbl.replace h i i; + done; + for i = n downto 0 do + IFlatHashtbl.replace h i i; + done; + h + + let ifhashtbl_replace n = + let h = ref (IFHashtbl.empty 32) in + for i = 0 to n do + h := IFHashtbl.replace !h i i; + done; + for i = n downto 0 do + h := IFHashtbl.replace !h i i; + done; + !h + + let ipersistenthashtbl_replace n = + let h = ref (IPersistentHashtbl.create 32) in + for i = 0 to n do + h := IPersistentHashtbl.replace !h i i; + done; + for i = n downto 0 do + h := IPersistentHashtbl.replace !h i i; + done; + !h + + let skiplist_replace n = + let l = SkipList.create compare in + for i = 0 to n do + SkipList.add l i i; + done; + for i = n downto 0 do + SkipList.add l i i; + done; + l + + let imap_replace n = + let h = ref IMap.empty in + for i = 0 to n do + h := IMap.add i i !h; + done; + for i = n downto 0 do + h := IMap.add i i !h; + done; + !h + + let icchashtbl_replace n = + let h = ICCHashtbl.create 50 in + for i = 0 to n do + ICCHashtbl.add h i i; + done; + for i = n downto 0 do + ICCHashtbl.add h i i; + done; + h + + let bench_maps2 n = + CCBench.throughputN 3 + ["phashtbl_replace", (fun n -> ignore (phashtbl_replace n)), n; + "hashtbl_replace", (fun n -> ignore (hashtbl_replace n)), n; + "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)), n; + "iflathashtbl_replace", (fun n -> ignore (iflathashtbl_replace n)), n; + "ifhashtbl_replace", (fun n -> ignore (ifhashtbl_replace n)), n; + "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)), n; + "skiplist_replace", (fun n -> ignore (skiplist_replace n)), n; + "imap_replace", (fun n -> ignore (imap_replace n)), n; + "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)), n; + ] + + let my_len = 250 + + let phashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (PHashtbl.find h i); + done + + let hashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (Hashtbl.find h i); + done + + let ihashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IHashtbl.find h i); + done + + let iflathashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IFlatHashtbl.find h i); + done + + let ifhashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IFHashtbl.find h i); + done + + let ipersistenthashtbl_find h = + fun n -> + for i = 0 to n-1 do + ignore (IPersistentHashtbl.find h i); + done + + let skiplist_find l = + fun n -> + for i = 0 to n-1 do + ignore (SkipList.find l i); + done + + let array_find a = + fun n -> + for i = 0 to n-1 do + ignore (Array.get a i); + done + + let imap_find m = + fun n -> + for i = 0 to n-1 do + ignore (IMap.find i m); + done + + let icchashtbl_find m = + fun n -> + for i = 0 to n-1 do + ignore (ICCHashtbl.get_exn i m); + done + + let bench_maps3 n = + let h = phashtbl_add n in + let h' = hashtbl_add n in + let h'' = ihashtbl_add n in + let h''' = iflathashtbl_add n in + let h'''' = ifhashtbl_add n in + let h''''' = ipersistenthashtbl_add n in + let l = skiplist_add n in + let a = Array.init n (fun i -> string_of_int i) in + let m = imap_add n in + let h'''''' = icchashtbl_add n in + CCBench.throughputN 3 [ + "phashtbl_find", (fun () -> phashtbl_find h n), (); + "hashtbl_find", (fun () -> hashtbl_find h' n), (); + "ihashtbl_find", (fun () -> ihashtbl_find h'' n), (); + "iflathashtbl_find", (fun () -> iflathashtbl_find h''' n), (); + "ifhashtbl_find", (fun () -> ifhashtbl_find h'''' n), (); + "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' n), (); + "skiplist_find", (fun () -> skiplist_find l n), (); + "array_find", (fun () -> array_find a n), (); + "imap_find", (fun () -> imap_find m n), (); + "cchashtbl_find", (fun () -> icchashtbl_find h'''''' n), (); + ] + + let () = CCBench.register CCBench.( + "tbl" >::: + [ "add" >:: with_int bench_maps1 [10; 100; 1_000; 10_000;] + ; "replace" >:: with_int bench_maps2 [10; 100; 1_000; 10_000] + ; "find" >:: with_int bench_maps3 [10; 20; 100; 1_000; 10_000] + ]) +end + +module Iter = struct + (** {2 Sequence/Gen} *) + + let bench_fold n = + let seq () = CCSequence.fold (+) 0 CCSequence.(0 --n) in + let gen () = CCGen.fold (+) 0 CCGen.(0 -- n) in + let klist () = CCKList.fold (+) 0 CCKList.(0 -- n) in + CCBench.throughputN 3 + [ "sequence.fold", seq, (); + "gen.fold", gen, (); + "klist.fold", klist, (); + ] + + let bench_flat_map n = + let seq () = CCSequence.( + 0 -- n |> flat_map (fun x -> x-- (x+10)) |> fold (+) 0 + ) + and gen () = CCGen.( + 0 -- n |> flat_map (fun x -> x-- (x+10)) |> fold (+) 0 + ) + and klist () = CCKList.( + 0 -- n |> flat_map (fun x -> x-- (x+10)) |> fold (+) 0 + ) + in + CCBench.throughputN 3 + [ "sequence.flat_map", seq, (); + "gen.flat_map", gen, (); + "klist.flat_map", klist, (); + ] + + let () = CCBench.register CCBench.( + "iter" >::: + [ "fold" >:: with_int bench_fold [100; 1_000; 10_000; 1_000_000] + ; "flat_map" >:: with_int bench_flat_map [1_000; 10_000] + ]) +end + +module Batch = struct + (** benchmark CCBatch *) + + open Containers_advanced + + module type COLL = sig + val name : string + include CCBatch.COLLECTION + val doubleton : 'a -> 'a -> 'a t + val (--) : int -> int -> int t + val equal : int t -> int t -> bool + end + + module Make(C : COLL) = struct + let f1 x = x mod 2 = 0 + let f2 x = -x + let f3 x = C.doubleton x (x+1) + let f4 x = -x + let collect a = C.fold (+) 0 a + + let naive a = + let a = C.filter f1 a in + let a = C.flat_map f3 a in + let a = C.filter f1 a in + let a = C.map f2 a in + let a = C.flat_map f3 a in + let a = C.map f4 a in + ignore (collect a); + a + + module BA = CCBatch.Make(C) + + let ops = + BA.(filter f1 >>> flat_map f3 >>> filter f1 >>> + map f2 >>> flat_map f3 >>> map f4) + + let batch a = + let a = BA.apply ops a in + ignore (collect a); + a + + let bench_for ~time n = + let a = C.(0 -- n) in + (* debug + CCPrint.printf "naive: %a\n" (CCArray.pp CCInt.pp) (naive a); + CCPrint.printf "simple: %a\n" (CCArray.pp CCInt.pp) (batch_simple a); + CCPrint.printf "batch: %a\n" (CCArray.pp CCInt.pp) (batch a); + *) + assert (C.equal (batch a) (naive a)); + CCBench.throughputN time + [ C.name ^ "_naive", naive, a + ; C.name ^ "_batch", batch, a + ] + + let bench = CCBench.( + C.name >:: map_int + [ bench_for ~time:1, 100 + ; bench_for ~time:4, 100_000 + ; bench_for ~time:4, 1_000_000 + ]) + end + + module BenchArray = Make(struct + include CCArray + let name = "array" + let equal a b = a=b + let doubleton x y = [| x; y |] + let fold = Array.fold_left + end) + + module BenchList = Make(struct + include CCList + let name = "list" + let equal a b = a=b + let doubleton x y = [ x; y ] + let fold = List.fold_left + end) + + module BenchKList = Make(struct + include CCKList + let name = "klist" + let equal a b = equal (=) a b + let doubleton x y = CCKList.of_list [ x; y ] + end) + + let () = CCBench.register CCBench.( + "batch" >:: mk_list + [ BenchKList.bench + ; BenchArray.bench + ; BenchList.bench + ]) +end + let () = CCBench.run_main () From 05ba0e5bba75dce57963dc29e6523fa31b002ac7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 17 Nov 2014 02:52:25 +0100 Subject: [PATCH 18/39] breaking change: new API for cache, based on values (no more functors) --- misc/cache.ml | 528 ++++++++++++++++++++----------------------------- misc/cache.mli | 182 +++++++++-------- 2 files changed, 319 insertions(+), 391 deletions(-) diff --git a/misc/cache.ml b/misc/cache.ml index bbf59d3c..ff79a43e 100644 --- a/misc/cache.ml +++ b/misc/cache.ml @@ -25,356 +25,258 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Memoization caches} *) -module type EQ = sig - type t - val equal : t -> t -> bool +type 'a equal = 'a -> 'a -> bool +type 'a hash = 'a -> int + +let default_eq_ = Pervasives.(=) +let default_hash_ = Hashtbl.hash + +(** {2 Value interface} *) + +type ('a,'b) t = { + set : 'a -> 'b -> unit; + get : 'a -> 'b; (* or raise Not_found *) + clear : unit -> unit; +} + +let clear c = c.clear () + +let with_cache c f x = + try + c.get x + with Not_found -> + let y = f x in + c.set x y; + y + +let with_cache_rec c f = + let rec f' x = with_cache c (f f') x in + f' + +let dummy = { + set=(fun _ _ -> ()); + get=(fun _ -> raise Not_found); + clear=(fun _ -> ()); +} + +module Linear = struct + type ('a,'b) bucket = + | Empty + | Pair of 'a * 'b + + type ('a,'b) t = { + eq : 'a equal; + arr : ('a,'b) bucket array; + mutable i : int; (* index for next assertion, cycles through *) + } + + let make eq size = + assert (size>0); + {arr=Array.make size Empty; eq; i=0; } + + let clear c = + Array.fill c.arr 0 (Array.length c.arr) Empty; + c.i <- 0 + + (* linear lookup *) + let rec search_ c i x = + if i=Array.length c.arr then raise Not_found; + match c.arr.(i) with + | Pair (x', y) when c.eq x x' -> y + | Pair _ + | Empty -> search_ c (i+1) x + + let get c x = search_ c 0 x + + let set c x y = + c.arr.(c.i) <- Pair (x,y); + c.i <- (c.i + 1) mod Array.length c.arr end +let linear ?(eq=default_eq_) size = + let size = max size 1 in + let arr = Linear.make eq size in + { get=(fun x -> Linear.get arr x); + set=(fun x y -> Linear.set arr x y); + clear=(fun () -> Linear.clear arr); + } + +module Replacing = struct + type ('a,'b) bucket = + | Empty + | Pair of 'a * 'b + + type ('a,'b) t = { + eq : 'a equal; + hash : 'a hash; + arr : ('a,'b) bucket array; + } + + let make eq hash size = + assert (size>0); + {arr=Array.make size Empty; eq; hash } + + let clear c = + Array.fill c.arr 0 (Array.length c.arr) Empty + + let get c x = + let i = c.hash x mod Array.length c.arr in + match c.arr.(i) with + | Pair (x', y) when c.eq x x' -> y + | Pair _ + | Empty -> raise Not_found + + let set c x y = + let i = c.hash x mod Array.length c.arr in + c.arr.(i) <- Pair (x,y) +end + +let replacing ?(eq=default_eq_) ?(hash=default_hash_) size = + let c = Replacing.make eq hash size in + { get=(fun x -> Replacing.get c x); + set=(fun x y -> Replacing.set c x y); + clear=(fun () -> Replacing.clear c); + } + module type HASH = sig - include EQ - val hash : t -> int + type t + val equal : t equal + val hash : t hash end -(** Signature of a cache for values *) -module type S = sig - type 'a t - type key - - val create : int -> 'a t - (** Create a new cache of the given size. *) - - val clear : 'a t -> unit - (** Clear content of the cache *) - - val with_cache : 'a t -> (key -> 'a) -> key -> 'a - (** Wrap the function with the cache. This means that - [with_cache cache f x] always returns the same value as - [f x], if [f x] returns, or raise the same exception. - However, [f] may not be called if [x] is in the cache. *) - - val with_cache_rec : 'a t -> ((key -> 'a) -> key -> 'a) -> key -> 'a - (** Partially apply the given function with a cached version of itself. - It returns the specialized function. - [with_cache_rec cache f] applies [f] to a cached version of [f], - called [f'], so that [f' x = f f' x]. *) -end - -(** Signature of a cache for pairs of values *) -module type S2 = sig - type 'a t - type key1 - type key2 - - val create : int -> 'a t - (** Create a new cache of the given size. *) - - val clear : 'a t -> unit - (** Clear content of the cache *) - - val with_cache : 'a t -> (key1 -> key2 -> 'a) -> key1 -> key2 -> 'a - (** Wrap the function with the cache *) -end - -(** {2 Dummy cache (no caching) *) - -module Dummy(X : sig type t end) = struct - type 'a t = unit - and key = X.t - - let create size = () - - let clear () = () - - let with_cache () f x = f x - - let with_cache_rec () f x = - let rec f' x = f f' x in - f' x -end - -module Dummy2(X : sig type t end)(Y : sig type t end) = struct - type 'a t = unit - and key1 = X.t - and key2 = Y.t - - let create size = () - - let clear () = () - - let with_cache () f x1 x2 = f x1 x2 -end - -(** {2 Small linear cache} *) - -(** This cache stores (key,value) pairs in an array, that is traversed - linearily. It is therefore only reasonable for small sizes (like 5). *) - -module Linear(X : EQ) = struct - type 'a t = 'a bucket array - and 'a bucket = Empty | Pair of key * 'a | PairRaise of key * exn - and key = X.t - - let create size = - assert (size >= 1); - Array.make size Empty - - let clear cache = - Array.fill cache 0 (Array.length cache) Empty - - (** Insert the bucket [b] into the cache *) - let insert cache b = - let n = Array.length cache in - (* shift other values toward the end *) - Array.blit cache 0 cache 1 (n-1); - cache.(0) <- b - - (** Try to find [f x] in the cache, otherwise compute it - and cache the result *) - let with_cache cache f x = - let n = Array.length cache in - let rec search i = - (* function that performs the lookup *) - if i = n then begin - (* cache miss *) - try - let y = f x in - insert cache (Pair (x, y)); - y - with e -> - insert cache (PairRaise (x, e)); - raise e - end else match cache.(i) with - | Pair (x',y) when X.equal x x' -> y - | PairRaise (x', e) when X.equal x x' -> raise e - | _ -> search (i+1) - in - search 0 - - let with_cache_rec cache f x = - (* make a recursive version of [f] that uses the cache *) - let rec f' x = with_cache cache (fun x -> f f' x) x in - f' x -end - -module Linear2(X : EQ)(Y : EQ) = struct - type 'a t = 'a bucket array - and 'a bucket = Empty | Assoc of key1 * key2 * 'a | AssocRaise of key1 * key2 * exn - and key1 = X.t - and key2 = Y.t - - let create size = - assert (size >= 1); - Array.make size Empty - - let clear cache = - Array.fill cache 0 (Array.length cache) Empty - - (** Insert the binding [b] into the cache *) - let insert cache b = - let n = Array.length cache in - (* shift other values toward the end *) - Array.blit cache 0 cache 1 (n-1); - cache.(0) <- b - - (** Try to find [f x] in the cache, otherwise compute it - and cache the result *) - let with_cache cache f x1 x2 = - let n = Array.length cache in - let rec search i = - (* function that performs the lookup *) - if i = n then begin - (* cache miss *) - try - let y = f x1 x2 in - insert cache (Assoc (x1, x2, y)); - y - with e -> - insert cache (AssocRaise (x1, x2, e)); - raise e - end else match cache.(i) with - | Assoc (x1',x2',y) when X.equal x1 x1' && Y.equal x2 x2' -> y - | AssocRaise (x1',x2',e) when X.equal x1 x1' && Y.equal x2 x2' -> raise e - | _ -> search (i+1) - in - search 0 -end - -(** {2 An imperative cache of fixed size for memoization of pairs} *) - -module Replacing(X : HASH) = struct - type key = X.t - - (** A slot of the array contains a (key, value, true) - if key->value is stored there (at index hash(key) % length), - (null, null, false) otherwise. - - The first slot in the array contains the function - used to produce the value upon a cache miss. *) - type 'a t = 'a bucket array - and 'a bucket = Empty | Assoc of key * 'a | AssocRaise of key * exn - - let create size = - Array.make size Empty - - let clear c = - Array.fill c 0 (Array.length c) Empty - - (** Try to find [f x] in the cache, otherwise compute it - and cache the result *) - let with_cache c f x = - let i = (X.hash x) mod (Array.length c) in - match c.(i) with - | Assoc (x', y) when X.equal x x' -> - y (* cache hit *) - | AssocRaise (x', e) when X.equal x x' -> - raise e (* cache hit *) - | _ -> (* cache miss *) - try - let y = f x in - c.(i) <- Assoc (x, y); - y - with e -> - c.(i) <- AssocRaise (x, e); - raise e - - let with_cache_rec cache f x = - (* make a recursive version of [f] that uses the cache *) - let rec f' x = with_cache cache (fun x -> f f' x) x in - f' x -end - -module Replacing2(X : HASH)(Y : HASH) = struct - (** A slot of the array contains a (key, value, true) - if key->value is stored there (at index hash(key) % length), - (null, null, false) otherwise. - - The first slot in the array contains the function - used to produce the value upon a cache miss. *) - type 'a t = 'a bucket array - and 'a bucket = Empty | Assoc of key1 * key2 * 'a | AssocRaise of key1 * key2 * exn - and key1 = X.t - and key2 = Y.t - - let create size = - Array.make size Empty - - let clear c = - Array.fill c 0 (Array.length c) Empty - - let with_cache c f x1 x2 = - let i = (((X.hash x1 + 17) lxor Y.hash x2) mod Array.length c) in - match c.(i) with - | Assoc (x1', x2', y) when X.equal x1 x1' && Y.equal x2 x2' -> - y (* cache hit *) - | AssocRaise (x1', x2', e) when X.equal x1 x1' && Y.equal x2 x2' -> - raise e (* cache hit *) - | _ -> (* cache miss *) - try - let y = f x1 x2 in - c.(i) <- Assoc (x1, x2, y); - y - with e -> - c.(i) <- AssocRaise (x1, x2, e); - raise e -end - -(** {2 Hashtables with Least Recently Used eviction policy *) - -(* TODO: handle exceptions *) - -module LRU(X : HASH) = struct +module LRU(X:HASH) = struct type key = X.t module H = Hashtbl.Make(X) type 'a t = { table : 'a node H.t; (* hashtable key -> node *) - first : 'a node; (* dummy node for the entry of the list *) - mutable len : int; (* number of entries *) + mutable first : 'a node option; + mutable last : 'a node option; size : int; (* max size *) } and 'a node = { mutable key : key; mutable value : 'a; - mutable next : 'a node; - mutable prev : 'a node; - } (** Meta data for the value *) + mutable next : 'a node option; + } (** Meta data for the value, making a chained list *) - let create size = - let rec first = - { key = Obj.magic 0; value = Obj.magic 0; next=first; prev=first; } - in + let make size = + assert (size > 0); { table = H.create size; - len = 0; size; - first; + first=None; + last=None; } - (** Clear the content of the cache *) let clear c = - c.len <- 0; H.clear c.table; - c.first.next <- c.first; - c.first.prev <- c.first; + c.first <- None; + c.last <- None; () - (** Find an element, or raise Not_found *) - let find c x = - let n = H.find c.table x in - assert (X.equal n.key x); - n.value + let get c x = (H.find c.table x).value - (** Replace least recently used element of [c] by x->y *) - let replace c x y = - let n = c.first.next in - (* remove old element *) + let get_opt = function + | None -> assert false + | Some x -> x + + (* reverse the list *) + let rec reverse_ prev = function + | None -> prev + | Some n as node -> + let next = n.next in + n.next <- prev; + reverse_ node next + + (* take first from queue *) + let take_ c = + match c.first with + | Some n -> + c.first <- n.next; + n + | None -> + (* re-fill front list *) + match reverse_ None c.last with + | None -> assert false + | Some n -> + c.first <- n.next; + n + + let push_ c n = + n.next <- c.last; + c.last <- Some n + + (* Replace least recently used element of [c] by x->y *) + let replace_ c x y = + (* remove old *) + let n = take_ c in H.remove c.table n.key; - (* insertion in hashtable *) + (* add x->y *) H.add c.table x n; - (* re-use the node for x,y *) n.key <- x; n.value <- y; - (* remove from front of queue *) - n.next.prev <- c.first; - c.first.next <- n.next; - (* insert at back of queue *) - let last = c.first.prev in - last.next <- n; - c.first.prev <- n; - n.next <- c.first; - n.prev <- last; + (* push at back of queue *) + push_ c n; () - (** Insert x->y in the cache, increasing its entry count *) - let insert c x y = - c.len <- c.len + 1; + (* Insert x->y in the cache, increasing its entry count *) + let insert_ c x y = let n = { key = x; value = y; - next = c.first; - prev = c.first.prev; + next = c.last; } in - (* insertion in hashtable *) H.add c.table x n; - (* insertion at back of queue *) - c.first.prev.next <- n; - c.first.prev <- n; + c.last <- Some n; () - (** Try to find [f x] in the cache, otherwise compute it - and cache the result *) - let with_cache c f x = - try - find c x - with Not_found -> - let y = f x in - (if c.len = c.size - then replace c x y - else insert c x y); - y - - let with_cache_rec cache f x = - (* make a recursive version of [f] that uses the cache *) - let rec f' x = with_cache cache (fun x -> f f' x) x in - f' x + let set c x y = + let len = H.length c.table in + assert (len <= c.size); + if len = c.size + then replace_ c x y + else insert_ c x y end + +let lru (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = + let module L = LRU(struct + type t = a + let equal = eq + let hash = hash + end) in + let c = L.make size in + { get=(fun x -> L.get c x); + set=(fun x y -> L.set c x y); + clear=(fun () -> L.clear c); + } + +module UNBOUNDED(X:HASH) = struct + type key = X.t + + module H = Hashtbl.Make(X) + + type 'a t = 'a H.t + + let make size = + assert (size > 0); + H.create size + + let clear c = H.clear c + + let get c x = H.find c x + + let set c x y = H.replace c x y +end + +let unbounded (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = + let module C = UNBOUNDED(struct + type t = a + let equal = eq + let hash = hash + end) in + let c = C.make size in + { get=(fun x -> C.get c x); + set=(fun x y -> C.set c x y); + clear=(fun () -> C.clear c); + } diff --git a/misc/cache.mli b/misc/cache.mli index 63637a44..d548bbfd 100644 --- a/misc/cache.mli +++ b/misc/cache.mli @@ -25,83 +25,109 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Memoization caches} *) -(** {2 Signatures} *) +type 'a equal = 'a -> 'a -> bool +type 'a hash = 'a -> int -module type EQ = sig - type t - val equal : t -> t -> bool +(** {2 Value interface} + +Typical use case: one wants to memoize a function [f : 'a -> 'b]. Code sample: +{[ +let f x = + print_endline "call f"; + x + 1;; + +let f' = with_cache (lru 256) f;; +f' 0;; (* prints *) +f' 1;; (* prints *) +f' 0;; (* doesn't print, returns cached value *) +]} + +@since NEXT_RELEASE *) + +type ('a, 'b) t + +val clear : (_,_) t -> unit +(** Clear the content of the cache *) + +val with_cache : ('a, 'b) t -> ('a -> 'b) -> 'a -> 'b +(** [with_cache c f] behaves like [f], but caches calls to [f] in the + cache [c]. It always returns the same value as + [f x], if [f x] returns, or raise the same exception. + However, [f] may not be called if [x] is in the cache. *) + +val with_cache_rec : ('a,'b) t -> (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b +(** [with_cache_rec c f] is a function that first, applies [f] to + some [f' = fix f], such that recursive calls to [f'] are cached in [c]. + It is similar to {!with_cache} but with a function that takes as + first argument its own recursive version. + Examples (memoized Fibonacci function): +{[ +let fib = with_cache_rec (lru 256) + (fun fib' n -> match n with + | 1 | 2 -> 1 + | _ -> fib' (n-1) + fib' (n-2) + );; + +fib 70;; +]} +*) + +val dummy : ('a,'b) t +(** dummy cache, never stores any value *) + +val linear : ?eq:'a equal -> int -> ('a, 'b) t +(** Linear cache with the given size. It stores key/value pairs in + an array and does linear search at every call, so it should only be used + with small size. + @param eq optional equality predicate for keys *) + +val replacing : ?eq:'a equal -> ?hash:'a hash -> + int -> ('a,'b) t +(** Replacing cache of the given size. Equality and hash functions can be + parametrized. It's a hash table that handles collisions by replacing + the old value with the new (so a cache entry is evicted when another + entry with the same hash (modulo size) is added). + Never grows wider than the given size. *) + +val lru : ?eq:'a equal -> ?hash:'a hash -> + int -> ('a,'b) t +(** LRU cache of the given size ("Least Recently Used": keys that have not been + used recently are deleted first). Never grows wider. *) + +val unbounded : ?eq:'a equal -> ?hash:'a hash -> + int -> ('a,'b) t +(** Unbounded cache, backed by a Hash table. Will grow forever + unless {!clear} is called manually. *) + +(** {2 Binary Caches} +TODO + +module C2 : sig + type ('a, 'b, 'c) t + + val clear : (_,_,_) t -> unit + + val with_cache : ('a, 'b, 'c) t -> ('a -> 'b -> 'c) -> 'a -> 'b -> 'c + + val with_cache_rec : ('a,'b,'c) t -> + (('a -> 'b -> 'c) -> 'a -> 'b -> 'c) -> + 'a -> 'b -> 'c + + val dummy : ('a,'b,'c) t + + val linear : ?eq1:('a -> 'a -> bool) -> ?eq2:('b -> 'b -> bool) -> + int -> ('a, 'b, 'c) t + + val replacing : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> + ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> + int -> ('a,'b,'c) t + + val lru : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> + ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> + int -> ('a,'b,'c) t + + val unbounded : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> + ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> + int -> ('a,'b,'c) t end - -module type HASH = sig - include EQ - val hash : t -> int -end - -(** Signature of a cache for values *) -module type S = sig - type 'a t - type key - - val create : int -> 'a t - (** Create a new cache of the given size. *) - - val clear : 'a t -> unit - (** Clear content of the cache *) - - val with_cache : 'a t -> (key -> 'a) -> key -> 'a - (** Wrap the function with the cache. This means that - [with_cache cache f x] always returns the same value as - [f x], if [f x] returns, or raise the same exception. - However, [f] may not be called if [x] is in the cache. *) - - val with_cache_rec : 'a t -> ((key -> 'a) -> key -> 'a) -> key -> 'a - (** Partially apply the given function with a cached version of itself. - It returns the specialized function. - [with_cache_rec cache f] applies [f] to a cached version of [f], - called [f'], so that [f' x = f f' x]. *) -end - -(** Signature of a cache for pairs of values *) -module type S2 = sig - type 'a t - type key1 - type key2 - - val create : int -> 'a t - (** Create a new cache of the given size. *) - - val clear : 'a t -> unit - (** Clear content of the cache *) - - val with_cache : 'a t -> (key1 -> key2 -> 'a) -> key1 -> key2 -> 'a - (** Wrap the function with the cache *) -end - -(** {2 Dummy cache (no caching)} *) - -module Dummy(X : sig type t end) : S with type key = X.t - -module Dummy2(X : sig type t end)(Y : sig type t end) : S2 with type key1 = X.t and type key2 = Y.t - -(** {2 Small linear cache} *) - -(** This cache stores (key,value) pairs in an array, that is traversed - linearily. It is therefore only reasonable for small sizes (like 5). *) - -module Linear(X : EQ) : S with type key = X.t - -module Linear2(X : EQ)(Y : EQ) : S2 with type key1 = X.t and type key2 = Y.t - -(** {2 Hashtables that resolve collisions by replacing} *) - -module Replacing(X : HASH) : S with type key = X.t - -module Replacing2(X : HASH)(Y : HASH) : S2 with type key1 = X.t and type key2 = Y.t - -(** {2 Hashtables with Least Recently Used eviction policy} *) - -module LRU(X : HASH) : S with type key = X.t - -(* TODO exception handling in LRU *) -(* TODO LRU2 *) - +*) From fbc278907ae5436db8ac2891cf43c700b78418f1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 17 Nov 2014 02:52:46 +0100 Subject: [PATCH 19/39] updated benchmarks for Cache, to use new API and fix a stupid issue --- benchs/run_benchs.ml | 63 ++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 04c67756..d8cfd8e4 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -127,56 +127,37 @@ module Vec = struct end module Cache = struct - module Fibo(C : Cache.S with type key = int) = struct - let fib ~size = - let fib fib' n = - match n with + let make_fib c = + let f = Cache.with_cache_rec c + (fun fib n -> match n with | 0 -> 0 | 1 -> 1 | 2 -> 1 - | n -> - fib' (n-1) + fib' (n-2) - in - let cache = C.create size in - let cached_fib x = C.with_cache_rec cache fib x in - cached_fib - end - - module LinearIntCache = Cache.Linear(struct - type t = int - let equal i j = i = j - end) - - module ReplacingIntCache = Cache.Replacing(struct - type t = int - let equal i j = i = j - let hash i = i - end) - - module LRUIntCache = Cache.LRU(struct - type t = int - let equal i j = i = j - let hash i = i - end) - - module DummyIntCache = Cache.Dummy(struct type t = int end) - - module LinearFibo = Fibo(LinearIntCache) - module ReplacingFibo = Fibo(ReplacingIntCache) - module LRUFibo= Fibo(LRUIntCache) - module DummyFibo = Fibo(DummyIntCache) + | n -> fib (n-1) + fib (n-2) + ) + in + fun x -> + Cache.clear c; + f x let bench_fib n = - CCBench.throughputN 3 - [ "linear_fib", LinearFibo.fib ~size:5, n; - "replacing_fib", ReplacingFibo.fib ~size:256, n; - "LRU_fib", LRUFibo.fib ~size:256, n; - "dummy_fib", DummyFibo.fib ~size:5, n; + let l = + [ "replacing_fib", make_fib (Cache.replacing 256), n + ; "LRU_fib", make_fib (Cache.lru 256), n ] + in + let l = if n <= 20 + then [ "linear_fib (5)", make_fib (Cache.linear 5), n + ; "linear_fib (32)", make_fib (Cache.linear 32), n + ; "dummy_fib", make_fib Cache.dummy, n + ] @ l + else l + in + CCBench.throughputN 3 l let () = CCBench.register CCBench.( "cache" >::: - [ "fib" >:: with_int bench_fib [10; 100] + [ "fib" >:: with_int bench_fib [10; 20; 100; 200; 1_000;] ] ) end From 62135fc9b7d7add13cd3fee956df6c6d95d40430 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 17 Nov 2014 09:16:07 +0100 Subject: [PATCH 20/39] fix the LRU algorithm to really be "least recently used" --- misc/cache.ml | 69 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/misc/cache.ml b/misc/cache.ml index ff79a43e..b5a657ff 100644 --- a/misc/cache.ml +++ b/misc/cache.ml @@ -152,13 +152,13 @@ module LRU(X:HASH) = struct type 'a t = { table : 'a node H.t; (* hashtable key -> node *) mutable first : 'a node option; - mutable last : 'a node option; size : int; (* max size *) } and 'a node = { mutable key : key; mutable value : 'a; - mutable next : 'a node option; + mutable next : 'a node; + mutable prev : 'a node; } (** Meta data for the value, making a chained list *) let make size = @@ -166,71 +166,82 @@ module LRU(X:HASH) = struct { table = H.create size; size; first=None; - last=None; } let clear c = H.clear c.table; c.first <- None; - c.last <- None; () - let get c x = (H.find c.table x).value - let get_opt = function | None -> assert false | Some x -> x - (* reverse the list *) - let rec reverse_ prev = function - | None -> prev - | Some n as node -> - let next = n.next in - n.next <- prev; - reverse_ node next - (* take first from queue *) let take_ c = match c.first with + | Some n when n.next == n -> + (* last element *) + c.first <- None; + n | Some n -> - c.first <- n.next; + c.first <- Some n.next; + n.prev.next <- n.next; + n.next.prev <- n.prev; n | None -> - (* re-fill front list *) - match reverse_ None c.last with - | None -> assert false - | Some n -> - c.first <- n.next; - n + failwith "LRU: empty queue" + (* push at back of queue *) let push_ c n = - n.next <- c.last; - c.last <- Some n + match c.first with + | None -> + n.next <- n; + n.prev <- n; + c.first <- Some n + | Some n1 when n1==n -> () + | Some n1 -> + n.prev <- n1.prev; + n.next <- n1; + n1.prev.next <- n; + n1.prev <- n + + (* remove from queue *) + let remove_ n = + n.prev.next <- n.next; + n.next.prev <- n.prev (* Replace least recently used element of [c] by x->y *) let replace_ c x y = (* remove old *) let n = take_ c in H.remove c.table n.key; - (* add x->y *) - H.add c.table x n; + (* add x->y, at the back of the queue *) n.key <- x; n.value <- y; - (* push at back of queue *) + H.add c.table x n; push_ c n; () (* Insert x->y in the cache, increasing its entry count *) let insert_ c x y = - let n = { + let rec n = { key = x; value = y; - next = c.last; + next = n; + prev = n; } in H.add c.table x n; - c.last <- Some n; + push_ c n; () + let get c x = + let n = H.find c.table x in + (* put n at the back of the queue *) + remove_ n; + push_ c n; + n.value + let set c x y = let len = H.length c.table in assert (len <= c.size); From a2617fd83c2c71aca272685eaa1669f53be02f95 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 17 Nov 2014 09:20:04 +0100 Subject: [PATCH 21/39] more precise benchmarks for caches --- benchs/run_benchs.ml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index d8cfd8e4..2443cfbb 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -142,8 +142,11 @@ module Cache = struct let bench_fib n = let l = - [ "replacing_fib", make_fib (Cache.replacing 256), n - ; "LRU_fib", make_fib (Cache.lru 256), n + [ "replacing_fib (128)", make_fib (Cache.replacing 128), n + ; "LRU_fib (128)", make_fib (Cache.lru 128), n + ; "replacing_fib (16)", make_fib (Cache.replacing 16), n + ; "LRU_fib (16)", make_fib (Cache.lru 16), n + ; "unbounded", make_fib (Cache.unbounded 32), n ] in let l = if n <= 20 From fbf24ea7c7ef8199dbb09cec690086e8667904c8 Mon Sep 17 00:00:00 2001 From: "Hezekiah M. Carty" Date: Tue, 18 Nov 2014 11:09:54 -0500 Subject: [PATCH 22/39] CCError.map2 could map to a new error type With the recent change from `_ CCError.t` to `(_, _) CCError.t` it's reasonable to allow `map2` to take an argument transforming the error to a new type. --- core/CCError.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/CCError.mli b/core/CCError.mli index 95929888..7ab6bef2 100644 --- a/core/CCError.mli +++ b/core/CCError.mli @@ -62,7 +62,7 @@ val map_err : ('err1 -> 'err2) -> ('a, 'err1) t -> ('a, 'err2) t (** Map on error. @since 0.5 *) -val map2 : ('a -> 'b) -> ('err -> 'err) -> ('a, 'err) t -> ('b, 'err) t +val map2 : ('a -> 'b) -> ('err1 -> 'err2) -> ('a, 'err1) t -> ('b, 'err2) t (** Same as {!map}, but also with a function that can transform the error message in case of failure *) From af850a88c163dbe644e89422430277e799e8fb8e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 19 Nov 2014 14:49:51 +0100 Subject: [PATCH 23/39] CCFun.on_top_of (binary composition) --- core/CCFun.cppo.ml | 2 ++ core/CCFun.mli | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/core/CCFun.cppo.ml b/core/CCFun.cppo.ml index 49e2fada..e9568b35 100644 --- a/core/CCFun.cppo.ml +++ b/core/CCFun.cppo.ml @@ -40,6 +40,8 @@ let (@@) f x = f x let compose f g x = g (f x) +let compose_binop f g x y = g (f x) (f y) + let flip f x y = f y x let curry f x y = f (x,y) diff --git a/core/CCFun.mli b/core/CCFun.mli index 6ac21173..2d7ab372 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -32,6 +32,12 @@ val (|>) : 'a -> ('a -> 'b) -> 'b val compose : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c (** Composition *) +val compose_binop : ('a -> 'b) -> ('b -> 'b -> 'c) -> 'a -> 'a -> 'c +(** [compose_binop f g] is [fun x y -> g (f x) (f y)] + Example (partial order): + [List.sort (compose_binop fst CCInt.compare) [1, true; 2, false; 1, false]] + @since NEXT_RELEASE*) + val (%>) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c (** Alias to [compose] *) @@ -68,9 +74,9 @@ val lexicographic : ('a -> 'a -> int) -> ('a -> 'a -> int) -> 'a -> 'a -> int (** Lexicographic combination of comparison functions *) val finally : h:(unit -> unit) -> f:(unit -> 'a) -> 'a - (** [finally h f] calls [f ()] and returns its result. If it raises, the - same exception is raised; in {b any} case, [h ()] is called after - [f ()] terminates. *) +(** [finally h f] calls [f ()] and returns its result. If it raises, the + same exception is raised; in {b any} case, [h ()] is called after + [f ()] terminates. *) (** {2 Monad} From a47bd108ecd2b09cc48819612a1eaeaad9a18b31 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 19 Nov 2014 16:39:52 +0100 Subject: [PATCH 24/39] CCVector.{top,top_exn} --- core/CCVector.ml | 13 +++++++++++++ core/CCVector.mli | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/core/CCVector.ml b/core/CCVector.ml index b236ee2b..143a5a55 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -215,6 +215,19 @@ let pop v = try Some (pop_exn v) with Failure _ -> None +let top v = + if v.size = 0 then None else Some v.vec.(v.size-1) + +let top_exn v = + if v.size = 0 then failwith "Vector.top"; + v.vec.(v.size-1) + +(*$T + 1 -- 10 |> top = Some 10 + create () |> top = None + 1 -- 10 |> top_exn = 10 + *) + let copy v = { size = v.size; vec = Array.sub v.vec 0 v.size; diff --git a/core/CCVector.mli b/core/CCVector.mli index a032eb88..94a312fb 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -99,6 +99,15 @@ val pop_exn : ('a, rw) t -> 'a (** remove last element, or raise a Failure if empty @raise Failure on an empty vector *) +val top : ('a, _) t -> 'a option +(** Top element, if present + @since NEXT_RELEASE *) + +val top_exn : ('a, _) t -> 'a +(** Top element, if present + @raise Failure on an empty vector + @since NEXT_RELEASE *) + val copy : ('a,_) t -> ('a,'mut) t (** Shallow copy (may give an immutable or mutable vector) *) From cad578840eb6bb1e315ccadb99da362cbc7a7c2a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 19 Nov 2014 17:16:57 +0100 Subject: [PATCH 25/39] import Mixtbl from its repository, into misc/ --- _oasis | 2 +- misc/mixtbl.ml | 91 ++++++++++++++++++++++++++++++++++ misc/mixtbl.mli | 113 +++++++++++++++++++++++++++++++++++++++++++ tests/run_tests.ml | 1 + tests/test_mixtbl.ml | 94 +++++++++++++++++++++++++++++++++++ 5 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 misc/mixtbl.ml create mode 100644 misc/mixtbl.mli create mode 100644 tests/test_mixtbl.ml diff --git a/_oasis b/_oasis index be3dba05..36303bfe 100644 --- a/_oasis +++ b/_oasis @@ -81,7 +81,7 @@ Library "containers_misc" PHashtbl, SkipList, SplayTree, SplayMap, Univ, Bij, PiCalculus, RAL, UnionFind, SmallSet, AbsSet, CSM, TTree, PrintBox, HGraph, Automaton, Conv, Bidir, Iteratee, - BTree, Ty, Cause, AVL, ParseReact + BTree, Ty, Cause, AVL, ParseReact, Mixtbl BuildDepends: unix,containers FindlibName: misc FindlibParent: containers diff --git a/misc/mixtbl.ml b/misc/mixtbl.ml new file mode 100644 index 00000000..95d3413b --- /dev/null +++ b/misc/mixtbl.ml @@ -0,0 +1,91 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Hash Table with Heterogeneous Keys} *) + +type 'a t = ('a, (unit -> unit)) Hashtbl.t + +type ('a, 'b) injection = { + getter : 'a t -> 'a -> 'b option; + setter : 'a t -> 'a -> 'b -> unit; +} + +let create n = Hashtbl.create n + +let access () = + let r = ref None in + let getter tbl k = + r := None; (* reset state in case last operation was not a get *) + try + (Hashtbl.find tbl k) (); + let result = !r in + r := None; (* clean up here in order to avoid memory leak *) + result + with Not_found -> None + in + let setter tbl k v = + let v_opt = Some v in + Hashtbl.replace tbl k (fun () -> r := v_opt) + in + { getter; setter; } + +let get ~inj tbl x = inj.getter tbl x + +let set ~inj tbl x y = inj.setter tbl x y + +let length tbl = Hashtbl.length tbl + +let clear tbl = Hashtbl.clear tbl + +let remove tbl x = Hashtbl.remove tbl x + +let copy tbl = Hashtbl.copy tbl + +let mem ~inj tbl x = + match inj.getter tbl x with + | None -> false + | Some _ -> true + +let find ~inj tbl x = + match inj.getter tbl x with + | None -> raise Not_found + | Some y -> y + +let iter_keys tbl f = + Hashtbl.iter (fun x _ -> f x) tbl + +let fold_keys tbl acc f = + Hashtbl.fold (fun x _ acc -> f acc x) tbl acc + +let keys tbl = + Hashtbl.fold (fun x _ acc -> x :: acc) tbl [] + +let bindings ~inj tbl = + fold_keys tbl [] + (fun acc k -> + match inj.getter tbl k with + | None -> acc + | Some v -> (k, v) :: acc) diff --git a/misc/mixtbl.mli b/misc/mixtbl.mli new file mode 100644 index 00000000..4681c1b9 --- /dev/null +++ b/misc/mixtbl.mli @@ -0,0 +1,113 @@ +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Hash Table with Heterogeneous Keys} + +From https://github.com/mjambon/mixtbl , thanks to him. +Example: + +{[ +let inj_int = Mixtbl.access () ;; + +let tbl = Mixtbl.create 10 ;; + +OUnit.assert_equal None (Mixtbl.get ~inj:inj_int tbl "a");; + +Mixtbl.set inj_int tbl "a" 1;; + +OUnit.assert_equal (Some 1) (Mixtbl.get ~inj:inj_int tbl "a");; + +let inj_string = Mixtbl.access () ;; + +Mixtbl.set inj_string tbl "b" "Hello"; + +OUnit.assert_equal (Some "Hello") (Mixtbl.get inj_string tbl "b");; +OUnit.assert_equal None (Mixtbl.get inj_string tbl "a");; +OUnit.assert_equal (Some 1) (Mixtbl.get inj_int tbl "a");; +Mixtbl.set inj_string tbl "a" "Bye";; + +OUnit.assert_equal None (Mixtbl.get inj_int tbl "a");; +OUnit.assert_equal (Some "Bye") (Mixtbl.get inj_string tbl "a");; +]} + +@since NEXT_RELEASE *) + +type 'a t +(** A hash table containing values of different types. + The type parameter ['a] represents the type of the keys. *) + +type ('a, 'b) injection +(** An accessor for values of type 'b in the table. Values put + in the table using an injection can only be retrieved using this + very same injection. *) + +val create : int -> 'a t +(** [create n] creates a hash table of initial size [n]. *) + +val access : unit -> ('a, 'b) injection +(** Return a value that works for a given type of values. This function is + normally called once for each type of value. Several injections may be + created for the same type, but a value set with a given setter can only be + retrieved with the matching getter. The same injection can be reused + across multiple tables (although not in a thread-safe way). *) + +val get : inj:('a, 'b) injection -> 'a t -> 'a -> 'b option +(** Get the value corresponding to this key, if it exists and + belongs to the same injection *) + +val set : inj:('a, 'b) injection -> 'a t -> 'a -> 'b -> unit +(** Bind the key to the value, using [inj] *) + +val length : 'a t -> int +(** Number of bindings *) + +val clear : 'a t -> unit +(** Clear content of the hashtable *) + +val remove : 'a t -> 'a -> unit +(** Remove the binding for this key *) + +val copy : 'a t -> 'a t +(** Copy of the table *) + +val mem : inj:('a, _) injection -> 'a t -> 'a -> bool +(** Is the given key in the table, with the right type? *) + +val find : inj:('a, 'b) injection -> 'a t -> 'a -> 'b +(** Find the value for the given key, which must be of the right type. + raises Not_found if either the key is not found, or if its value + doesn't belong to the right type *) + +val iter_keys : 'a t -> ('a -> unit) -> unit +(** Iterate on the keys of this table *) + +val fold_keys : 'a t -> 'b -> ('b -> 'a -> 'b) -> 'b +(** Fold over the keys *) + +val keys : 'a t -> 'a list +(** List of the keys *) + +val bindings : inj:('a, 'b) injection -> 'a t -> ('a * 'b) list +(** All the bindings that come from the corresponding injection *) diff --git a/tests/run_tests.ml b/tests/run_tests.ml index 631379e5..cf4787ac 100644 --- a/tests/run_tests.ml +++ b/tests/run_tests.ml @@ -21,6 +21,7 @@ let suite = Test_heap.suite; Test_graph.suite; Test_univ.suite; + Test_mixtbl.suite; ] let props = diff --git a/tests/test_mixtbl.ml b/tests/test_mixtbl.ml new file mode 100644 index 00000000..6e517417 --- /dev/null +++ b/tests/test_mixtbl.ml @@ -0,0 +1,94 @@ + +open OUnit +open Containers_misc + +let example () = + let inj_int = Mixtbl.access () in + let tbl = Mixtbl.create 10 in + OUnit.assert_equal None (Mixtbl.get ~inj:inj_int tbl "a"); + Mixtbl.set inj_int tbl "a" 1; + OUnit.assert_equal (Some 1) (Mixtbl.get ~inj:inj_int tbl "a"); + let inj_string = Mixtbl.access () in + Mixtbl.set inj_string tbl "b" "Hello"; + OUnit.assert_equal (Some "Hello") (Mixtbl.get inj_string tbl "b"); + OUnit.assert_equal None (Mixtbl.get inj_string tbl "a"); + OUnit.assert_equal (Some 1) (Mixtbl.get inj_int tbl "a"); + Mixtbl.set inj_string tbl "a" "Bye"; + OUnit.assert_equal None (Mixtbl.get inj_int tbl "a"); + OUnit.assert_equal (Some "Bye") (Mixtbl.get inj_string tbl "a"); + () + +let test_length () = + let inj_int = Mixtbl.access () in + let tbl = Mixtbl.create 5 in + Mixtbl.set ~inj:inj_int tbl "foo" 1; + Mixtbl.set ~inj:inj_int tbl "bar" 2; + OUnit.assert_equal 2 (Mixtbl.length tbl); + OUnit.assert_equal 2 (Mixtbl.find ~inj:inj_int tbl "bar"); + Mixtbl.set ~inj:inj_int tbl "foo" 42; + OUnit.assert_equal 2 (Mixtbl.length tbl); + Mixtbl.remove tbl "bar"; + OUnit.assert_equal 1 (Mixtbl.length tbl); + () + +let test_clear () = + let inj_int = Mixtbl.access () in + let inj_str = Mixtbl.access () in + let tbl = Mixtbl.create 5 in + Mixtbl.set ~inj:inj_int tbl "foo" 1; + Mixtbl.set ~inj:inj_int tbl "bar" 2; + Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; + OUnit.assert_equal 3 (Mixtbl.length tbl); + Mixtbl.clear tbl; + OUnit.assert_equal 0 (Mixtbl.length tbl); + () + +let test_mem () = + let inj_int = Mixtbl.access () in + let inj_str = Mixtbl.access () in + let tbl = Mixtbl.create 5 in + Mixtbl.set ~inj:inj_int tbl "foo" 1; + Mixtbl.set ~inj:inj_int tbl "bar" 2; + Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; + OUnit.assert_bool "mem foo int" (Mixtbl.mem ~inj:inj_int tbl "foo"); + OUnit.assert_bool "mem bar int" (Mixtbl.mem ~inj:inj_int tbl "bar"); + OUnit.assert_bool "not mem baaz int" (not (Mixtbl.mem ~inj:inj_int tbl "baaz")); + OUnit.assert_bool "not mem foo str" (not (Mixtbl.mem ~inj:inj_str tbl "foo")); + OUnit.assert_bool "not mem bar str" (not (Mixtbl.mem ~inj:inj_str tbl "bar")); + OUnit.assert_bool "mem baaz str" (Mixtbl.mem ~inj:inj_str tbl "baaz"); + () + +let test_keys () = + let inj_int = Mixtbl.access () in + let inj_str = Mixtbl.access () in + let tbl = Mixtbl.create 5 in + Mixtbl.set ~inj:inj_int tbl "foo" 1; + Mixtbl.set ~inj:inj_int tbl "bar" 2; + Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; + let l = Mixtbl.keys tbl in + OUnit.assert_equal ["baaz"; "bar"; "foo"] (List.sort compare l); + () + +let test_bindings () = + let inj_int = Mixtbl.access () in + let inj_str = Mixtbl.access () in + let tbl = Mixtbl.create 5 in + Mixtbl.set ~inj:inj_int tbl "foo" 1; + Mixtbl.set ~inj:inj_int tbl "bar" 2; + Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; + Mixtbl.set ~inj:inj_str tbl "str" "rts"; + let l_int = Mixtbl.bindings tbl ~inj:inj_int in + OUnit.assert_equal ["bar", 2; "foo", 1] (List.sort compare l_int); + let l_str = Mixtbl.bindings tbl ~inj:inj_str in + OUnit.assert_equal ["baaz", "hello"; "str", "rts"] (List.sort compare l_str); + () + +let suite = + "mixtbl" >::: + [ "example" >:: example; + "length" >:: test_length; + "clear" >:: test_clear; + "mem" >:: test_mem; + "bindings" >:: test_bindings; + ] + From e74c85e3d2e6260448d67d43c23909e4f268ec1d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 19 Nov 2014 17:44:55 +0100 Subject: [PATCH 26/39] more modern interface to Mixtbl; added a way to iterate on all bindings --- misc/mixtbl.ml | 82 +++++++++++++++++++++++++------------------- misc/mixtbl.mli | 48 ++++++++++++++++---------- tests/test_mixtbl.ml | 29 ++++++++-------- 3 files changed, 91 insertions(+), 68 deletions(-) diff --git a/misc/mixtbl.ml b/misc/mixtbl.ml index 95d3413b..d89e6e67 100644 --- a/misc/mixtbl.ml +++ b/misc/mixtbl.ml @@ -26,35 +26,32 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Hash Table with Heterogeneous Keys} *) -type 'a t = ('a, (unit -> unit)) Hashtbl.t - -type ('a, 'b) injection = { - getter : 'a t -> 'a -> 'b option; - setter : 'a t -> 'a -> 'b -> unit; +type 'b injection = { + get : (unit -> unit) -> 'b option; + set : 'b -> (unit -> unit); } +type 'a t = ('a, unit -> unit) Hashtbl.t + let create n = Hashtbl.create n -let access () = +let create_inj () = let r = ref None in - let getter tbl k = - r := None; (* reset state in case last operation was not a get *) - try - (Hashtbl.find tbl k) (); - let result = !r in - r := None; (* clean up here in order to avoid memory leak *) - result - with Not_found -> None + let get f = + r := None; + f (); + !r + and set v = + (fun () -> r := Some v) in - let setter tbl k v = - let v_opt = Some v in - Hashtbl.replace tbl k (fun () -> r := v_opt) - in - { getter; setter; } + {get;set} -let get ~inj tbl x = inj.getter tbl x +let get ~inj tbl x = + try inj.get (Hashtbl.find tbl x) + with Not_found -> None -let set ~inj tbl x y = inj.setter tbl x y +let set ~inj tbl x y = + Hashtbl.replace tbl x (inj.set y) let length tbl = Hashtbl.length tbl @@ -65,14 +62,14 @@ let remove tbl x = Hashtbl.remove tbl x let copy tbl = Hashtbl.copy tbl let mem ~inj tbl x = - match inj.getter tbl x with - | None -> false - | Some _ -> true + try + inj.get (Hashtbl.find tbl x) <> None + with Not_found -> false let find ~inj tbl x = - match inj.getter tbl x with - | None -> raise Not_found - | Some y -> y + match inj.get (Hashtbl.find tbl x) with + | None -> raise Not_found + | Some v -> v let iter_keys tbl f = Hashtbl.iter (fun x _ -> f x) tbl @@ -80,12 +77,27 @@ let iter_keys tbl f = let fold_keys tbl acc f = Hashtbl.fold (fun x _ acc -> f acc x) tbl acc -let keys tbl = - Hashtbl.fold (fun x _ acc -> x :: acc) tbl [] +(** {2 Iterators} *) -let bindings ~inj tbl = - fold_keys tbl [] - (fun acc k -> - match inj.getter tbl k with - | None -> acc - | Some v -> (k, v) :: acc) +type 'a sequence = ('a -> unit) -> unit + +let keys_seq tbl yield = + Hashtbl.iter + (fun x _ -> yield x) + tbl + +let bindings_of ~inj tbl yield = + Hashtbl.iter + (fun k value -> + match inj.get value with + | None -> () + | Some v -> yield (k, v) + ) tbl + +type value = + | Value : ('b injection -> 'b option) -> value + +let bindings tbl yield = + Hashtbl.iter + (fun x y -> yield (x, Value (fun inj -> inj.get y))) + tbl diff --git a/misc/mixtbl.mli b/misc/mixtbl.mli index 4681c1b9..6e714c64 100644 --- a/misc/mixtbl.mli +++ b/misc/mixtbl.mli @@ -58,28 +58,33 @@ type 'a t (** A hash table containing values of different types. The type parameter ['a] represents the type of the keys. *) -type ('a, 'b) injection -(** An accessor for values of type 'b in the table. Values put - in the table using an injection can only be retrieved using this - very same injection. *) +type 'b injection +(** An accessor for values of type 'b in any table. Values put + in the table using an key can only be retrieved using this + very same key. *) val create : int -> 'a t (** [create n] creates a hash table of initial size [n]. *) -val access : unit -> ('a, 'b) injection +val create_inj : unit -> 'b injection (** Return a value that works for a given type of values. This function is - normally called once for each type of value. Several injections may be + normally called once for each type of value. Several keys may be created for the same type, but a value set with a given setter can only be - retrieved with the matching getter. The same injection can be reused + retrieved with the matching getter. The same key can be reused across multiple tables (although not in a thread-safe way). *) -val get : inj:('a, 'b) injection -> 'a t -> 'a -> 'b option +val get : inj:'b injection -> 'a t -> 'a -> 'b option (** Get the value corresponding to this key, if it exists and - belongs to the same injection *) + belongs to the same key *) -val set : inj:('a, 'b) injection -> 'a t -> 'a -> 'b -> unit +val set : inj:'b injection -> 'a t -> 'a -> 'b -> unit (** Bind the key to the value, using [inj] *) +val find : inj:'b injection -> 'a t -> 'a -> 'b +(** Find the value for the given key, which must be of the right type. + raises Not_found if either the key is not found, or if its value + doesn't belong to the right type *) + val length : 'a t -> int (** Number of bindings *) @@ -92,22 +97,27 @@ val remove : 'a t -> 'a -> unit val copy : 'a t -> 'a t (** Copy of the table *) -val mem : inj:('a, _) injection -> 'a t -> 'a -> bool +val mem : inj:_ injection-> 'a t -> 'a -> bool (** Is the given key in the table, with the right type? *) -val find : inj:('a, 'b) injection -> 'a t -> 'a -> 'b -(** Find the value for the given key, which must be of the right type. - raises Not_found if either the key is not found, or if its value - doesn't belong to the right type *) - val iter_keys : 'a t -> ('a -> unit) -> unit (** Iterate on the keys of this table *) val fold_keys : 'a t -> 'b -> ('b -> 'a -> 'b) -> 'b (** Fold over the keys *) -val keys : 'a t -> 'a list -(** List of the keys *) +(** {2 Iterators} *) -val bindings : inj:('a, 'b) injection -> 'a t -> ('a * 'b) list +type 'a sequence = ('a -> unit) -> unit + +val keys_seq : 'a t -> 'a sequence +(** All the keys *) + +val bindings_of : inj:'b injection -> 'a t -> ('a * 'b) sequence (** All the bindings that come from the corresponding injection *) + +type value = + | Value : ('b injection -> 'b option) -> value + +val bindings : 'a t -> ('a * value) sequence +(** Iterate on all bindings *) diff --git a/tests/test_mixtbl.ml b/tests/test_mixtbl.ml index 6e517417..bbb5b28f 100644 --- a/tests/test_mixtbl.ml +++ b/tests/test_mixtbl.ml @@ -1,14 +1,15 @@ open OUnit open Containers_misc +open CCFun let example () = - let inj_int = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in let tbl = Mixtbl.create 10 in OUnit.assert_equal None (Mixtbl.get ~inj:inj_int tbl "a"); Mixtbl.set inj_int tbl "a" 1; OUnit.assert_equal (Some 1) (Mixtbl.get ~inj:inj_int tbl "a"); - let inj_string = Mixtbl.access () in + let inj_string = Mixtbl.create_inj () in Mixtbl.set inj_string tbl "b" "Hello"; OUnit.assert_equal (Some "Hello") (Mixtbl.get inj_string tbl "b"); OUnit.assert_equal None (Mixtbl.get inj_string tbl "a"); @@ -19,7 +20,7 @@ let example () = () let test_length () = - let inj_int = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in let tbl = Mixtbl.create 5 in Mixtbl.set ~inj:inj_int tbl "foo" 1; Mixtbl.set ~inj:inj_int tbl "bar" 2; @@ -32,8 +33,8 @@ let test_length () = () let test_clear () = - let inj_int = Mixtbl.access () in - let inj_str = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in + let inj_str = Mixtbl.create_inj () in let tbl = Mixtbl.create 5 in Mixtbl.set ~inj:inj_int tbl "foo" 1; Mixtbl.set ~inj:inj_int tbl "bar" 2; @@ -44,8 +45,8 @@ let test_clear () = () let test_mem () = - let inj_int = Mixtbl.access () in - let inj_str = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in + let inj_str = Mixtbl.create_inj () in let tbl = Mixtbl.create 5 in Mixtbl.set ~inj:inj_int tbl "foo" 1; Mixtbl.set ~inj:inj_int tbl "bar" 2; @@ -59,27 +60,27 @@ let test_mem () = () let test_keys () = - let inj_int = Mixtbl.access () in - let inj_str = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in + let inj_str = Mixtbl.create_inj () in let tbl = Mixtbl.create 5 in Mixtbl.set ~inj:inj_int tbl "foo" 1; Mixtbl.set ~inj:inj_int tbl "bar" 2; Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; - let l = Mixtbl.keys tbl in + let l = Mixtbl.keys_seq tbl |> CCSequence.to_list in OUnit.assert_equal ["baaz"; "bar"; "foo"] (List.sort compare l); () let test_bindings () = - let inj_int = Mixtbl.access () in - let inj_str = Mixtbl.access () in + let inj_int = Mixtbl.create_inj () in + let inj_str = Mixtbl.create_inj () in let tbl = Mixtbl.create 5 in Mixtbl.set ~inj:inj_int tbl "foo" 1; Mixtbl.set ~inj:inj_int tbl "bar" 2; Mixtbl.set ~inj:inj_str tbl "baaz" "hello"; Mixtbl.set ~inj:inj_str tbl "str" "rts"; - let l_int = Mixtbl.bindings tbl ~inj:inj_int in + let l_int = Mixtbl.bindings_of tbl ~inj:inj_int |> CCSequence.to_list in OUnit.assert_equal ["bar", 2; "foo", 1] (List.sort compare l_int); - let l_str = Mixtbl.bindings tbl ~inj:inj_str in + let l_str = Mixtbl.bindings_of tbl ~inj:inj_str |> CCSequence.to_list in OUnit.assert_equal ["baaz", "hello"; "str", "rts"] (List.sort compare l_str); () From e9760976b42500e8308d0b609eaa0abcce198de6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 19 Nov 2014 18:02:14 +0100 Subject: [PATCH 27/39] fix warnings in test --- tests/test_mixtbl.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_mixtbl.ml b/tests/test_mixtbl.ml index bbb5b28f..f58fc2bb 100644 --- a/tests/test_mixtbl.ml +++ b/tests/test_mixtbl.ml @@ -7,16 +7,16 @@ let example () = let inj_int = Mixtbl.create_inj () in let tbl = Mixtbl.create 10 in OUnit.assert_equal None (Mixtbl.get ~inj:inj_int tbl "a"); - Mixtbl.set inj_int tbl "a" 1; + Mixtbl.set ~inj:inj_int tbl "a" 1; OUnit.assert_equal (Some 1) (Mixtbl.get ~inj:inj_int tbl "a"); let inj_string = Mixtbl.create_inj () in - Mixtbl.set inj_string tbl "b" "Hello"; - OUnit.assert_equal (Some "Hello") (Mixtbl.get inj_string tbl "b"); - OUnit.assert_equal None (Mixtbl.get inj_string tbl "a"); - OUnit.assert_equal (Some 1) (Mixtbl.get inj_int tbl "a"); - Mixtbl.set inj_string tbl "a" "Bye"; - OUnit.assert_equal None (Mixtbl.get inj_int tbl "a"); - OUnit.assert_equal (Some "Bye") (Mixtbl.get inj_string tbl "a"); + Mixtbl.set ~inj:inj_string tbl "b" "Hello"; + OUnit.assert_equal (Some "Hello") (Mixtbl.get ~inj:inj_string tbl "b"); + OUnit.assert_equal None (Mixtbl.get ~inj:inj_string tbl "a"); + OUnit.assert_equal (Some 1) (Mixtbl.get ~inj:inj_int tbl "a"); + Mixtbl.set ~inj:inj_string tbl "a" "Bye"; + OUnit.assert_equal None (Mixtbl.get ~inj:inj_int tbl "a"); + OUnit.assert_equal (Some "Bye") (Mixtbl.get ~inj:inj_string tbl "a"); () let test_length () = From 2346e833df0530160460d92d405c685360dfa9eb Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 20 Nov 2014 00:17:47 +0100 Subject: [PATCH 28/39] opam file --- opam | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 opam diff --git a/opam b/opam new file mode 100644 index 00000000..2e55f8ea --- /dev/null +++ b/opam @@ -0,0 +1,32 @@ +opam-version: "1.2" +author: "Simon Cruanes" +maintainer: "simon.cruanes@inria.fr" +build: [ + ["./configure" "--prefix" prefix "--disable-thread" "--disable-bench" + "--disable-tests" "--disable-cgi" "--%{lwt:enable}%-lwt" + "--enable-docs" "--enable-misc"] + [make "build"] +] +install: [ + [make "install"] +] +build-doc: [ make "doc" ] +build-test: [ make "test" ] +remove: [ + ["ocamlfind" "remove" "containers"] +] +post-messages: [ + "in containers, modules start with 'CC' (stands for 'core containers')" +] +depends: [ + "ocamlfind" {build} + "base-bytes" + "cppo" {build} +] +depopts: [ "lwt" ] +tags: [ "stdlib" "containers" "iterators" "list" "heap" "queue" ] +homepage: "https://github.com/c-cube/ocaml-containers/" +doc: "http://cedeela.fr/~simon/software/containers/" +available: [ocaml-version >= "4.00.0"] +dev-repo: "https://github.com/c-cube/ocaml-containers.git" +bug-reports: "https://github.com/c-cube/ocaml-containers/issues/" From 510f63f92175279e09606d399e4d877c54427078 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 20 Nov 2014 01:04:28 +0100 Subject: [PATCH 29/39] firx quick tests; remove misc/ratTerm (already removed from _oasis) --- _oasis | 3 +- lwt/lwt_automaton.ml | 2 + lwt/lwt_automaton.mli | 2 + misc/ratTerm.ml | 340 -------------------------------- misc/ratTerm.mli | 105 ---------- tests/quick/.common.ml | 5 +- tests/quick/levenshtein_dict.ml | 17 +- tests/quick/ratTerm.ml | 17 -- 8 files changed, 18 insertions(+), 473 deletions(-) delete mode 100644 misc/ratTerm.ml delete mode 100644 misc/ratTerm.mli delete mode 100755 tests/quick/ratTerm.ml diff --git a/_oasis b/_oasis index 36303bfe..56f9fda5 100644 --- a/_oasis +++ b/_oasis @@ -104,8 +104,7 @@ Library "containers_lwt" FindlibParent: containers Build$: flag(lwt) && flag(misc) Install$: flag(lwt) && flag(misc) - BuildDepends: containers,lwt,lwt.unix,containers.misc - XMETARequires: containers,lwt,lwt.unix,containers.misc + BuildDepends: containers, lwt, lwt.unix, containers.misc Library "containers_cgi" Path: cgi diff --git a/lwt/lwt_automaton.ml b/lwt/lwt_automaton.ml index cdd03e50..2f8d98f1 100644 --- a/lwt/lwt_automaton.ml +++ b/lwt/lwt_automaton.ml @@ -26,6 +26,8 @@ of this software, even if advised of the possibility of such damage. (** {1 interface lwt-automaton} *) +open Containers_misc + module I = struct let send f i = Lwt.on_success f (Automaton.I.send i) diff --git a/lwt/lwt_automaton.mli b/lwt/lwt_automaton.mli index 79dda242..daa03517 100644 --- a/lwt/lwt_automaton.mli +++ b/lwt/lwt_automaton.mli @@ -26,6 +26,8 @@ of this software, even if advised of the possibility of such damage. (** {1 interface lwt-automaton} *) +open Containers_misc + module I : sig val send : 'a Lwt.t -> 'a Automaton.I.t -> unit (** Feed the content of the Lwt value into the automaton input, as soon as diff --git a/misc/ratTerm.ml b/misc/ratTerm.ml deleted file mode 100644 index 722e7fbb..00000000 --- a/misc/ratTerm.ml +++ /dev/null @@ -1,340 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Rational Terms} *) - -module type SYMBOL = sig - type t - val compare : t -> t -> int - val to_string : t -> string -end - -module type S = sig - module Symbol : SYMBOL - - type t = private - | Var of int - | Ref of int - | App of Symbol.t * t list - - type term = t - - type 'a env = 'a RAL.t - - (** Structural equality and comparisons. Two terms being different - for {!eq} may still be equal, but with distinct representations. - For instance [r:f(f(r))] and [r:f(r)] are the same term but they - are not equal structurally. *) - - val eq : t -> t -> bool - val cmp : t -> t -> int - - val eq_set : t -> t -> bool - (** Proper equality on terms. This returns [true] if the two terms represent - the same infinite tree, not only if they have the same shape. *) - - val var : unit -> t - (** free variable, with a fresh name *) - - val mk_ref : int -> t - (** Back-ref of [n] levels down (see De Bruijn indices) *) - - val app : Symbol.t -> t list -> t - (** Application of a symbol to a list, possibly with a unique label *) - - val const : Symbol.t -> t - (** Shortcut for [app s []] *) - - val pp : Buffer.t -> t -> unit - val fmt : Format.formatter -> t -> unit - val to_string : t -> string - - val rename : t -> t - (** Rename all variables and references to fresh ones *) - - module Subst : sig - type t - val empty : t - val bind : t -> int -> term -> t - val deref : t -> term -> term - val apply : ?depth:int -> t -> term -> term - - val pp : Buffer.t -> t -> unit - val fmt : Format.formatter -> t -> unit - val to_string : t -> string - end - - val matching : ?subst:Subst.t -> term -> term -> Subst.t option - val unify : ?subst:Subst.t -> term -> term -> Subst.t option -end - -module Make(Symbol : SYMBOL) = struct - module Symbol = Symbol - - type t = - | Var of int - | Ref of int - | App of Symbol.t * t list - - type term = t - - module IMap = Map.Make(struct - type t = int - let compare i j = i-j - end) - module IHTbl = Hashtbl.Make(struct - type t = int - let equal i j = i=j - let hash i = i land max_int - end) - - type 'a env = 'a RAL.t - (** Environment for De Bruijn variables: a random-access list. *) - - let _to_int = function - | Var _ -> 1 - | Ref _ -> 2 - | App _ -> 3 - - let rec cmp t1 t2 = match t1, t2 with - | Var i1, Var i2 -> i1 - i2 - | Ref i1, Ref i2 -> i1 - i2 - | App (f1, l1), App (f2, l2) -> - let c = Symbol.compare f1 f2 in - if c <> 0 then c - else cmp_list l1 l2 - | _ -> _to_int t1 - _to_int t2 - and cmp_list l1 l2 = match l1, l2 with - | [], [] -> 0 - | [], _ -> -1 - | _, [] -> 1 - | t1::l1', t2::l2' -> - let c = cmp t1 t2 in - if c <> 0 then c else cmp_list l1' l2' - - let eq t1 t2 = cmp t1 t2 = 0 - - module Set2T = Set.Make(struct - type t = term*term - let compare (l1,r1)(l2,r2) = - let c = cmp l1 l2 in - if c <> 0 then c else cmp r1 r2 - end) - - let eq_set t1 t2 = - let cycle = ref Set2T.empty in - let rec eq env t1 t2 = match t1, t2 with - | Ref i, _ -> eq env (RAL.get env i) t2 - | _, Ref j -> eq env t1 (RAL.get env j) - | Var i, Var j -> i=j - | _ when Set2T.mem (t1,t2) !cycle -> true - | App(f1,l1), App(f2,l2) when Symbol.compare f1 f2 = 0 -> - (* if the subterms are equal, and we try to solve again t1=t2, - then we shouldn't cycle. Hence we protect ourself. *) - cycle := Set2T.add (t1, t2) !cycle; - let env = RAL.cons t1 env in - begin try - List.for_all2 (eq env) l1 l2 - with Invalid_argument _ -> false - end - | _ -> false - in - eq RAL.empty t1 t2 - - let _count = ref 0 - - let var () = - let v = Var !_count in - incr _count; - v - - let mk_ref i = Ref i - - let app s l = App (s, l) - - let const s = App (s, []) - - let rec pp buf t = match t with - | Var i -> Printf.bprintf buf "X%d" i - | Ref i -> Printf.bprintf buf "*%d" i - | App (s, []) -> - Buffer.add_string buf (Symbol.to_string s) - | App (s, l) -> - Printf.bprintf buf "%s(%a)" (Symbol.to_string s) pp_list l - and pp_list buf l = match l with - | [] -> () - | [x] -> pp buf x - | x::((_::_) as l') -> - pp buf x; Buffer.add_string buf ", "; pp_list buf l' - - let to_string t = - let b = Buffer.create 16 in - pp b t; - Buffer.contents b - - let fmt fmt t = Format.pp_print_string fmt (to_string t) - - let rename t = - let names = IHTbl.create 16 in - let rec rename t = match t with - | Var i -> - begin try IHTbl.find names i - with Not_found -> - (* rename variable into a fresh one *) - let v = var() in - IHTbl.add names i v; - v - end - | Ref _ -> t (* no need to rename *) - | App (s, l) -> - app s (List.map rename l) - in rename t - - module Subst = struct - type t = term IMap.t - - let empty = IMap.empty - - let bind s i t = - match t with - | _ when IMap.mem i s -> failwith "Subst.bind" - | Var j when i=j -> s (* id *) - | _ -> IMap.add i t s - - let rec deref s t = match t with - | Var i -> - begin try deref s (IMap.find i s) - with Not_found -> t - end - | Ref _ - | App _ -> t - - (* does the variable [v] occur in [subst(t)]? *) - let rec _occur subst ~var t = - match deref subst t with - | Var _ -> eq var t - | Ref _ - | App (_, []) -> false - | App (_, l) -> List.exists (_occur subst ~var) l - - let apply ?(depth=0) subst t = - (* [depth]: current depth w.r.t root, [back]: map from var to - the depth of the term they are bound to *) - let rec apply depth back subst t = match t with - | Ref _ -> t - | Var i -> - let t' = deref subst t in - (* interesting case. Either [t] is bound to a term [t'] - that contains it, which makes a cyclic term, or it's - not in which case it's easy. *) - begin match t' with - | Ref _ -> t - | App (s, l) -> - if _occur subst ~var:t t' - then - (* in any case we are possibly going to modify [r'] - by replacing [x] by a backref. *) - let back = IMap.add i depth back in - let subst = IMap.remove i subst in - app s (List.map (apply (depth+1) back subst) l) - else - (* simply keep t'->s(l) *) - app s (List.map (apply (depth+1) back subst) l) - | Var j -> - assert (not (IMap.mem j subst)); - begin try - let k = IMap.find j back in - (* the variable is actually bound to a superterm, - which is at depth [k]. The depth difference is - therefore [depth-k]. *) - Ref (depth-k) - with Not_found -> - t' (* truly unbound variable. *) - end - end - | App (s, l) -> - app s (List.map (apply (depth+1) back subst) l) - in apply depth IMap.empty subst t - - let pp buf subst = - Buffer.add_string buf "{"; - let first = ref true in - IMap.iter - (fun i t -> - if !first then first:= false else Buffer.add_string buf ", "; - Printf.bprintf buf "X%d → %a" i pp t) - subst; - Buffer.add_string buf "}"; - () - - let to_string t = - let b = Buffer.create 16 in - pp b t; - Buffer.contents b - - let fmt fmt t = Format.pp_print_string fmt (to_string t) - end - - exception Fail - - let matching ?(subst=Subst.empty) t1 t2 = - assert false (* TODO (need to gather variables of [t2]... *) - - let unify ?(subst=Subst.empty) t1 t2 = - (* pairs of terms already unified *) - let cycle = ref Set2T.empty in - (* [env] contains references to superterms *) - let rec unif env subst t1 t2 = - match Subst.deref subst t1, Subst.deref subst t2 with - | Ref i1, _ -> unif env subst (RAL.get env i1) t2 - | _, Ref i2 -> unif env subst t1 (RAL.get env i2) - | Var i, Var j when i=j -> subst - | Var i, _ -> Subst.bind subst i t2 - | _, Var j -> Subst.bind subst j t1 - | t1, t2 when Set2T.mem (t1,t2) !cycle -> - subst (* t1,t2 already being unified, avoid cycling forever *) - | App (f1, l1) as t1, (App (f2, l2) as t2) -> - if Symbol.compare f1 f2 <> 0 then raise Fail; - (* remember we are unifying those terms *) - cycle := Set2T.add (t1, t2) !cycle; - (* now we can assume [t1 = t2] if unification succeeds, so - we just push [t1] into the env *) - let env = RAL.cons t1 env in - try - List.fold_left2 (unif env) subst l1 l2 - with Invalid_argument _ -> raise Fail - in - try Some (unif RAL.empty subst t1 t2) - with Fail -> None -end - -module Str = struct - type t = string - let compare = String.compare - let to_string s = s -end - -module Default = Make(Str) diff --git a/misc/ratTerm.mli b/misc/ratTerm.mli deleted file mode 100644 index b84a72d6..00000000 --- a/misc/ratTerm.mli +++ /dev/null @@ -1,105 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Rational Terms} *) - -module type SYMBOL = sig - type t - val compare : t -> t -> int - val to_string : t -> string -end - -module type S = sig - module Symbol : SYMBOL - - type t = private - | Var of int - | Ref of int - | App of Symbol.t * t list - - type term = t - - type 'a env = 'a RAL.t - - (** Structural equality and comparisons. Two terms being different - for {!eq} may still be equal, but with distinct representations. - For instance [r:f(f(r))] and [r:f(r)] are the same term but they - are not equal structurally. *) - - val eq : t -> t -> bool - val cmp : t -> t -> int - - val eq_set : t -> t -> bool - (** Proper equality on terms. This returns [true] if the two terms represent - the same infinite tree, not only if they have the same shape. *) - - val var : unit -> t - (** free variable, with a fresh name *) - - val mk_ref : int -> t - (** Back-ref of [n] levels down (see De Bruijn indices) *) - - val app : Symbol.t -> t list -> t - (** Application of a symbol to a list, possibly with a unique label *) - - val const : Symbol.t -> t - (** Shortcut for [app s []] *) - - val pp : Buffer.t -> t -> unit - val fmt : Format.formatter -> t -> unit - val to_string : t -> string - - val rename : t -> t - (** Rename all variables and references to fresh ones *) - - module Subst : sig - type t - val empty : t - val bind : t -> int -> term -> t - val deref : t -> term -> term - val apply : ?depth:int -> t -> term -> term - - val pp : Buffer.t -> t -> unit - val fmt : Format.formatter -> t -> unit - val to_string : t -> string - end - - val matching : ?subst:Subst.t -> term -> term -> Subst.t option - val unify : ?subst:Subst.t -> term -> term -> Subst.t option -end - -module Make(Sym : SYMBOL) : S with module Symbol = Sym - -module Str : SYMBOL with type t = string - -module Default : sig - include S with module Symbol = Str - - (* TODO - val of_string : string -> t option - val of_string_exn : string -> t (** @raise Failure possibly *) - *) -end diff --git a/tests/quick/.common.ml b/tests/quick/.common.ml index 1bb57abf..9ee90649 100644 --- a/tests/quick/.common.ml +++ b/tests/quick/.common.ml @@ -1,5 +1,8 @@ #use "topfind";; -#directory "_build/";; +#directory "_build/core/";; +#directory "_build/string";; +#directory "_build/misc";; +#directory "_build/lwt";; #require "unix";; diff --git a/tests/quick/levenshtein_dict.ml b/tests/quick/levenshtein_dict.ml index 6a785351..8700c4fa 100755 --- a/tests/quick/levenshtein_dict.ml +++ b/tests/quick/levenshtein_dict.ml @@ -1,18 +1,19 @@ #!/usr/bin/env ocaml #use "tests/quick/.common.ml";; #load "containers.cma";; -open Containers;; +#load "containers_string.cma";; -#require "batteries";; -open Batteries;; +open Containers_string -let words = File.with_file_in "/usr/share/dict/cracklib-small" - (fun i -> IO.read_all i |> String.nsplit ~by:"\\n");; +let words = CCIO.( + (with_in "/usr/share/dict/cracklib-small" >>>= read_lines) + |> run_exn + ) let idx = List.fold_left - (fun idx s -> Levenshtein.StrIndex.add_string idx s s) - Levenshtein.StrIndex.empty words;; + (fun idx s -> Levenshtein.Index.add idx s s) + Levenshtein.Index.empty words;; -Levenshtein.StrIndex.retrieve_string ~limit:1 idx "hell" +Levenshtein.Index.retrieve ~limit:1 idx "hell" |> Levenshtein.klist_to_list |> List.iter print_endline;; diff --git a/tests/quick/ratTerm.ml b/tests/quick/ratTerm.ml deleted file mode 100755 index 2eb9d93f..00000000 --- a/tests/quick/ratTerm.ml +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env ocaml -#use "tests/quick/.common.ml";; -#load "containers.cma";; -open Containers;; - -module T = RatTerm.Default;; -#install_printer T.fmt;; -#install_printer T.Subst.fmt;; - -let t = T.(app "f" [const "a"; app "f" [mk_ref 1; const "b"]]);; -let t2 = T.(app "f" [var (); app "f" [mk_ref 1; var ()]]);; -let t3 = T.(app "f" [var (); app "f" [var (); const "b"]]);; -let subst2 = match T.unify t t3 with Some s -> s | None -> assert false;; -let t3' = T.Subst.apply subst2 t3;; -T.eq_set t t3';; - -ok();; From 9c9a78c7a64f3af622919bff6c9962d04d42483b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 20 Nov 2014 00:17:35 +0100 Subject: [PATCH 30/39] lwt/Lwt_actor stub, for erlang-style concurrency (albeit much much more naive) --- _oasis | 2 +- lwt/lwt_actor.ml | 181 ++++++++++++++++++++++++++++++++++++++++++++++ lwt/lwt_actor.mli | 75 +++++++++++++++++++ 3 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 lwt/lwt_actor.ml create mode 100644 lwt/lwt_actor.mli diff --git a/_oasis b/_oasis index 56f9fda5..aa4e6762 100644 --- a/_oasis +++ b/_oasis @@ -98,7 +98,7 @@ Library "containers_thread" Library "containers_lwt" Path: lwt - Modules: Behavior, Lwt_automaton + Modules: Behavior, Lwt_automaton, Lwt_actor Pack: true FindlibName: lwt FindlibParent: containers diff --git a/lwt/lwt_actor.ml b/lwt/lwt_actor.ml new file mode 100644 index 00000000..f5686b3d --- /dev/null +++ b/lwt/lwt_actor.ml @@ -0,0 +1,181 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Small Actor system for Lwt} *) + +module ITbl = Hashtbl.Make(struct + type t = int + let equal (i:int) j = i=j + let hash i = i land max_int +end) + +(** {2 Actors Basics} *) + +let (>>=) = Lwt.(>>=) + +type 'a t = { + mutable inbox : 'a Queue.t; + cond : unit Lwt_condition.t; + act : 'a t -> 'a -> unit Lwt.t; + setup : unit -> unit Lwt.t; + pid : int; + mutable links : any_actor list; + mutable monitors : monitor list; + mutable thread : unit Lwt.t option; (* running thread *) +} +(* invariant: thead=Some t means that t is running, and the + actor is alive *) + +and any_actor = + | AnyActor : _ t -> any_actor +and monitor = + | Monitor : [> `Died of any_actor] t -> monitor + +(* send message *) +let send m x = + Queue.push x m.inbox; + Lwt_condition.signal m.cond (); + Lwt.return_unit + +(* [a] just died, now kill its friends *) +let propagate_dead a = + let traversed = ITbl.create 16 in + (* depth-first traversal of the clique of linked actors *) + let rec traverse stack = match stack with + | [] -> () + | AnyActor a :: stack' when ITbl.mem traversed a.pid -> + traverse stack' + | (AnyActor a) as any_a :: stack' -> + ITbl.add traversed a.pid (); + begin match a.thread with + | None -> () + | Some t -> + Lwt.cancel t; + a.thread <- None; + end; + (* notify monitors that [a] died *) + let monitors = a.monitors in + Lwt.async + (fun () -> + Lwt_list.iter_p + (function Monitor m -> send m (`Died any_a) + ) monitors + ); + (* follow links to other actors to kill *) + let stack' = List.rev_append a.links stack' in + traverse stack' + in + traverse [AnyActor a] + +(* number of active actors *) +let num_active = ref 0 +let on_num_active_0 = Lwt_condition.create() + +let decr_num_active () = + decr num_active; + assert (!num_active >= 0); + if !num_active = 0 then Lwt_condition.broadcast on_num_active_0 () + +(* how to start an actor *) +let start_ a = + (* main loop of the actor *) + let rec loop () = + Lwt_condition.wait a.cond >>= fun () -> + let x = Queue.pop a.inbox in + a.act a x >>= fun () -> + loop () + and exn_handler e = + Lwt_log.ign_info_f ~exn:e "error in thread %d" a.pid; + propagate_dead a; + Lwt.return_unit + in + match a.thread with + | Some _ -> failwith "start: actor already running"; + | None -> + (* start the thread *) + let thread = Lwt.catch (fun () -> a.setup () >>= loop) exn_handler in + (* maintain [num_active] *) + incr num_active; + Lwt.on_termination thread decr_num_active; + a.thread <- Some thread; + () + +let kill a = propagate_dead a + +let no_setup_ () = Lwt.return_unit + +let pid a = a.pid + +let cur_pid = ref 0 + +let monitor m a = + a.monitors <- Monitor m :: a.monitors + +let link a b = + if a.thread = None + then kill b + else if b.thread = None + then kill a; + a.links <- AnyActor b :: a.links; + b.links <- AnyActor a :: b.links; + () + +let spawn ?(links=[]) ?(setup=no_setup_) act = + let pid = !cur_pid in + incr cur_pid; + let a = { + inbox=Queue.create (); + cond = Lwt_condition.create(); + act; + setup; + pid; + links=[]; + monitors=[]; + thread=None; + } in + start_ a; + (* link now *) + List.iter (function AnyActor b -> link a b) links; + a + +let cur_timeout_id = ref 0 + +let timeout a f = + if f <= 0. then invalid_arg "timeout"; + let i = !cur_timeout_id in + incr cur_timeout_id; + let _ = Lwt_engine.on_timer f false + (fun _ -> Lwt.async (fun () -> send a (`Timeout i))) + in + i + +(* wait until num_active=0 *) +let rec wait_all () = + if !num_active = 0 + then Lwt.return_unit + else + Lwt_condition.wait on_num_active_0 >>= fun () -> + wait_all () diff --git a/lwt/lwt_actor.mli b/lwt/lwt_actor.mli new file mode 100644 index 00000000..6eca78c8 --- /dev/null +++ b/lwt/lwt_actor.mli @@ -0,0 +1,75 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Small Actor system for Lwt} + +Let's draw inspiration from Erlang. Just a tiny bit. +{b NOTE}: this module is not thread-safe at all. +*) + +(** {2 Actors Basics} *) + +type 'a t +(** An actor that can receive messages of type 'a. In practice, 'a will + often be a variant or a polymorphic variant. *) + +type any_actor = + | AnyActor : _ t -> any_actor + +val spawn : ?links:any_actor list -> + ?setup:(unit -> unit Lwt.t) -> + ('a t -> 'a -> unit Lwt.t) -> 'a t +(** Spawn a new actor with the given loop function. The function will + be called repeatedly with [(self, message)] where [self] is the actor + itself, and [msg] some incoming message.. + @param setup function that is called when the actor (re)starts + @param links list of other actors that are linked to immediately *) + +val send : 'a t -> 'a -> unit Lwt.t +(** Send a message to an actor's inbox *) + +val pid : _ t -> int +(** Pid of an actor *) + +val timeout : [> `Timeout of int ] t -> float -> int +(** [timeout a f] returns some unique integer ticket [i], + and, [f] seconds later, sends [`Timeout i] to [a] *) + +val link : _ t -> _ t -> unit +(** [link a b] links the two actors together, so that if one dies, the + other dies too. The linking relationship is transitive and symmetric. *) + +val kill : _ t -> unit +(** Kill the actor, and all its linked actors *) + +val monitor : [> `Died of any_actor] t -> _ t -> unit +(** [monitor m a] adds [a] to the list of actors monitored by [m]. If [a] + dies for any reason, [m] is sent [`Died a] and can react consequently. *) + +val wait_all : unit -> unit Lwt.t +(** Wait for all actors to finish. Typically used directly in {!Lwt_main.run} *) + +(* TODO: some basic patterns: monitor strategies, pub/sub... *) From 5b9a7a26891bc94a6d64860e16fcdb460bd2e01b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 20 Nov 2014 01:04:11 +0100 Subject: [PATCH 31/39] quick test for Lwt_actors --- tests/quick/actors.ml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 tests/quick/actors.ml diff --git a/tests/quick/actors.ml b/tests/quick/actors.ml new file mode 100755 index 00000000..ef10daf7 --- /dev/null +++ b/tests/quick/actors.ml @@ -0,0 +1,33 @@ +#!/usr/bin/env ocaml +#use "tests/quick/.common.ml";; +#load "containers.cma";; +#require "lwt.unix";; +#load "containers_misc.cma";; +#load "containers_lwt.cma";; + +let (>>=) = Lwt.(>>=) + +module A = Containers_lwt.Lwt_actor + +let a = A.spawn + (fun _ (`Ping sender) -> + Lwt_io.printl "ping!" >>= fun () -> + Lwt_unix.sleep 1. >>= fun () -> + A.send sender `Pong + ) + +let b = A.spawn + (fun self -> function + | `Pong + | `Start -> + Lwt_io.printl "pong!" >>= fun () -> + Lwt_unix.sleep 1. >>= fun () -> + A.send a (`Ping self) + ) + +let () = Lwt_main.run ( + Lwt_io.printl "start" >>= fun () -> + A.send b `Start >>= fun () -> + A.wait_all () +) + From 4d0d988f4c90014f7dfc773d950766e98fba455b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 20 Nov 2014 15:35:00 +0100 Subject: [PATCH 32/39] readme --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 3d5af8d4..6ca90613 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,11 @@ Some of the modules have been moved to their own repository (e.g. `sequence`, [![Build Status](http://ci.cedeela.fr/buildStatus/icon?job=containers)](http://ci.cedeela.fr/job/containers/) +## Finding help + +- the [github wiki](https://github.com/c-cube/ocaml-containers/wiki) +- the IRC channel (`##ocaml-containers` on Freenode) + ## Use You can either build and install the library (see `Build`), or just copy From 13862b51331225395d003b8d98fee504658b9be3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 11:54:37 +0100 Subject: [PATCH 33/39] CCMultiSet.{add_mult,remove_mult,update} --- core/CCMultiSet.ml | 44 +++++++++++++++++++++++++++++++++++++++----- core/CCMultiSet.mli | 17 +++++++++++++++++ 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/core/CCMultiSet.ml b/core/CCMultiSet.ml index 8e840586..3fa4c8c1 100644 --- a/core/CCMultiSet.ml +++ b/core/CCMultiSet.ml @@ -45,6 +45,12 @@ module type S = sig val remove : t -> elt -> t + val add_mult : t -> elt -> int -> t + + val remove_mult : t -> elt -> int -> t + + val update : t -> elt -> (int -> int) -> t + val min : t -> elt val max : t -> elt @@ -102,12 +108,31 @@ module Make(O : Set.OrderedType) = struct let n = count ms x in M.add x (n+1) ms - let remove ms x = + let add_mult ms x n = + if n < 0 then invalid_arg "CCMultiSet.add_mult"; + if n=0 + then ms + else M.add x (count ms x + n) ms + + let remove_mult ms x n = + if n < 0 then invalid_arg "CCMultiSet.remove_mult"; + let cur_n = count ms x in + let new_n = cur_n - n in + if new_n <= 0 + then M.remove x ms + else M.add x new_n ms + + let remove ms x = remove_mult ms x 1 + + let update ms x f = let n = count ms x in - match n with - | 0 -> ms - | 1 -> M.remove x ms - | _ -> M.add x (n-1) ms + match f n with + | 0 -> + if n=0 then ms else M.remove x ms + | n' -> + if n' < 0 + then invalid_arg "CCMultiSet.udpate" + else M.add x n' ms let min ms = fst (M.min_binding ms) @@ -197,3 +222,12 @@ module Make(O : Set.OrderedType) = struct seq (fun x -> m := add !m x); !m end + +(*$T + let module S = CCMultiSet.Make(String) in \ + S.count (S.add_mult S.empty "a" 5) "a" = 5 + let module S = CCMultiSet.Make(String) in \ + S.count (S.remove_mult (S.add_mult S.empty "a" 5) "a" 3) "a" = 2 + let module S = CCMultiSet.Make(String) in \ + S.count (S.remove_mult (S.add_mult S.empty "a" 4) "a" 6) "a" = 0 +*) diff --git a/core/CCMultiSet.mli b/core/CCMultiSet.mli index 89d32f83..826f394b 100644 --- a/core/CCMultiSet.mli +++ b/core/CCMultiSet.mli @@ -45,6 +45,23 @@ module type S = sig val remove : t -> elt -> t + val add_mult : t -> elt -> int -> t + (** [add_mult set x n] adds [n] occurrences of [x] to [set] + @raise Invalid_argument if [n < 0] + @since NEXT_RELEASE *) + + val remove_mult : t -> elt -> int -> t + (** [remove_mult set x n] removes at most [n] occurrences of [x] from [set] + @raise Invalid_argument if [n < 0] + @since NEXT_RELEASE *) + + val update : t -> elt -> (int -> int) -> t + (** [update set x f] calls [f n] where [n] is the current multiplicity + of [x] in [set] ([0] to indicate its absence); the result of [f n] + is the new multiplicity of [x]. + @raise Invalid_argument if [f n < 0] + @since NEXT_RELEASE *) + val min : t -> elt (** Minimal element w.r.t the total ordering on elements *) From cb311bf764344540eb2953ce96bdd09ea4ca727b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 12:54:28 +0100 Subject: [PATCH 34/39] breaking change: renamed CCIO to advanced.CCMonadIO; new CCIO module, much simpler --- _oasis | 2 +- advanced/CCMonadIO.ml | 519 +++++++++++++++++++++++++++++++++++++ advanced/CCMonadIO.mli | 323 +++++++++++++++++++++++ core/CCIO.ml | 571 ++++++++++------------------------------- core/CCIO.mli | 290 +++++---------------- 5 files changed, 1038 insertions(+), 667 deletions(-) create mode 100644 advanced/CCMonadIO.ml create mode 100644 advanced/CCMonadIO.mli diff --git a/_oasis b/_oasis index aa4e6762..e95bd4f5 100644 --- a/_oasis +++ b/_oasis @@ -61,7 +61,7 @@ Library "containers_string" Library "containers_advanced" Path: advanced Pack: true - Modules: CCLinq, CCBatch, CCCat + Modules: CCLinq, CCBatch, CCCat, CCMonadIO FindlibName: advanced FindlibParent: containers BuildDepends: containers diff --git a/advanced/CCMonadIO.ml b/advanced/CCMonadIO.ml new file mode 100644 index 00000000..961880e5 --- /dev/null +++ b/advanced/CCMonadIO.ml @@ -0,0 +1,519 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 IO Monad} *) + +type _ t = + | Return : 'a -> 'a t + | Fail : string -> 'a t + | Map : ('a -> 'b) * 'a t -> 'b t + | Bind : ('a -> 'b t) * 'a t -> 'b t + | WithGuard: unit t * 'a t -> 'a t (* run guard in any case *) + | Star : ('a -> 'b) t * 'a t -> 'b t + | Repeat : int * 'a t -> 'a list t + | RepeatIgnore : int * 'a t -> unit t + | Wrap : (unit -> 'a) -> 'a t + | SequenceMap : ('a -> 'b t) * 'a list -> 'b list t + +type 'a io = 'a t +type 'a with_finalizer = ('a t * unit t) t +type 'a or_error = [ `Ok of 'a | `Error of string ] + +let (>>=) x f = Bind(f,x) + +let bind ?finalize f a = match finalize with + | None -> Bind(f,a) + | Some b -> WithGuard (b, Bind (f,a)) + +let map f x = Map(f, x) + +let (>|=) x f = Map(f, x) + +let return x = Return x +let pure = return + +let fail msg = Fail msg + +let (<*>) f a = Star (f, a) + +let lift = map + +let lift2 f a b = + a >>= fun x -> map (f x) b + +let lift3 f a b c = + a >>= fun x -> + b >>= fun y -> map (f x y) c + +let sequence_map f l = + SequenceMap (f,l) + +let sequence l = + let _id x = x in + SequenceMap(_id, l) + +let repeat i a = + if i <= 0 then Return [] else Repeat (i,a) + +let repeat' i a = + if i <= 0 then Return () else RepeatIgnore (i,a) + +(** {2 Finalizers} *) + +let (>>>=) a f = + a >>= function + | x, finalizer -> WithGuard (finalizer, x >>= f) + +(** {2 Running} *) + +exception IOFailure of string + +let rec _run : type a. a t -> a = function + | Return x -> x + | Fail msg -> raise (IOFailure msg) + | Map (f, a) -> f (_run a) + | Bind (f, a) -> _run (f (_run a)) + | WithGuard (g, a) -> + begin try + let res = _run a in + _run g; + res + with e -> + _run g; + raise e + end + | Star (f, a) -> _run f (_run a) + | Repeat (i,a) -> _repeat [] i a + | RepeatIgnore (i,a) -> _repeat_ignore i a + | Wrap f -> f() + | SequenceMap (f, l) -> _sequence_map f l [] +and _repeat : type a. a list -> int -> a t -> a list + = fun acc i a -> match i with + | 0 -> List.rev acc + | _ -> + let x = _run a in + _repeat (x::acc) (i-1) a +and _repeat_ignore : type a. int -> a t -> unit + = fun i a -> match i with + | 0 -> () + | _ -> + let _ = _run a in + _repeat_ignore (i-1) a +and _sequence_map : type a b. (a -> b t) -> a list -> b list -> b list + = fun f l acc -> match l with + | [] -> List.rev acc + | a::tail -> + let x = _run (f a) in + _sequence_map f tail (x::acc) + +let _printers = + ref [ + (* default printer *) + ( function IOFailure msg + | Sys_error msg -> Some msg + | Exit -> Some "exit" + | _ -> None + ) + ] + +exception PrinterResult of string + +let _print_exn e = + try + List.iter + (fun p -> match p e with + | None -> () + | Some msg -> raise (PrinterResult msg) + ) !_printers; + Printexc.to_string e + with PrinterResult s -> s + +let run x = + try `Ok (_run x) + with e -> `Error (_print_exn e) + +exception IO_error of string + +let run_exn x = + try _run x + with e -> raise (IO_error (_print_exn e)) + +let register_printer p = _printers := p :: !_printers + +(** {2 Standard Wrappers} *) + +let _open_in mode flags filename () = + open_in_gen flags mode filename +let _close_in ic () = close_in ic + +let with_in ?(mode=0o644) ?(flags=[]) filename = + Wrap (_open_in mode flags filename) + >>= fun ic -> + Return (Return ic, Wrap (_close_in ic)) + +let _read ic s i len () = input ic s i len +let read ic s i len = Wrap (_read ic s i len) + +let _read_line ic () = + try Some (Pervasives.input_line ic) + with End_of_file -> None +let read_line ic = Wrap(_read_line ic) + +let rec _read_lines ic acc = + read_line ic + >>= function + | None -> return (List.rev acc) + | Some l -> _read_lines ic (l::acc) + +let read_lines ic = _read_lines ic [] + +let _read_all ic () = + let buf = Buffer.create 128 in + try + while true do + Buffer.add_channel buf ic 1024 + done; + "" (* never returned *) + with End_of_file -> Buffer.contents buf + +let read_all ic = Wrap(_read_all ic) + +let _open_out mode flags filename () = + open_out_gen flags mode filename +let _close_out oc () = close_out oc + +let with_out ?(mode=0o644) ?(flags=[]) filename = + Wrap(_open_out mode (Open_wronly::flags) filename) + >>= fun oc -> + Return(Return oc, Wrap(_close_out oc)) + +let with_out_a ?mode ?(flags=[]) filename = + with_out ?mode ~flags:(Open_creat::Open_append::flags) filename + +let _write oc s i len () = output oc s i len +let write oc s i len = Wrap (_write oc s i len) + +let _write_str oc s () = output oc s 0 (String.length s) +let write_str oc s = Wrap (_write_str oc s) + +let _write_line oc l () = + output_string oc l; + output_char oc '\n' +let write_line oc l = Wrap (_write_line oc l) + +let _write_buf oc buf () = Buffer.output_buffer oc buf +let write_buf oc buf = Wrap (_write_buf oc buf) + +let flush oc = Wrap (fun () -> Pervasives.flush oc) + +(** {2 Seq} *) + +module Seq = struct + type 'a step_result = + | Yield of 'a + | Stop + + type 'a gen = unit -> 'a step_result io + + type 'a t = 'a gen + + let _stop () = return Stop + let _yield x = return (Yield x) + + let map_pure f gen () = + gen() >>= function + | Stop -> _stop () + | Yield x -> _yield (f x) + + let map f g () = + g() >>= function + | Stop -> _stop () + | Yield x -> f x >>= _yield + + let rec filter_map f g () = + g() >>= function + | Stop -> _stop() + | Yield x -> + match f x with + | None -> filter_map f g() + | Some y -> _yield y + + let rec filter f g () = + g() >>= function + | Stop -> _stop() + | Yield x -> + if f x then _yield x else filter f g() + + let rec flat_map f g () = + g() >>= function + | Stop -> _stop () + | Yield x -> + f x >>= fun g' -> _flat_map_aux f g g' () + and _flat_map_aux f g g' () = + g'() >>= function + | Stop -> flat_map f g () + | Yield x -> _yield x + + let general_iter f acc g = + let acc = ref acc in + let rec _next () = + g() >>= function + | Stop -> _stop() + | Yield x -> + f !acc x >>= function + | `Stop -> _stop() + | `Continue (acc', ret) -> + acc := acc'; + match ret with + | None -> _next() + | Some y -> _yield y + in + _next + + let take n seq = + general_iter + (fun n x -> if n<=0 + then return `Stop + else return (`Continue (n-1, Some x)) + ) n seq + + let drop n seq = + general_iter + (fun n x -> if n<=0 + then return (`Continue (n, Some x)) + else return (`Continue (n-1, None)) + ) n seq + + let take_while p seq = + general_iter + (fun () x -> + p x >|= function + | true -> `Continue ((), Some x) + | false -> `Stop + ) () seq + + let drop_while p seq = + general_iter + (fun dropping x -> + if dropping + then p x >|= function + | true -> `Continue (true, None) + | false -> `Continue (false, Some x) + else return (`Continue (false, Some x)) + ) true seq + + (* apply all actions from [l] to [x] *) + let rec _apply_all_to x l = match l with + | [] -> return () + | f::tail -> f x >>= fun () -> _apply_all_to x tail + + let _tee funs g () = + g() >>= function + | Stop -> _stop() + | Yield x -> + _apply_all_to x funs >>= fun () -> + _yield x + + let tee funs g = match funs with + | [] -> g + | _::_ -> _tee funs g + + (** {6 Consume} *) + + let rec fold_pure f acc g = + g() >>= function + | Stop -> return acc + | Yield x -> fold_pure f (f acc x) g + + let length g = fold_pure (fun acc _ -> acc+1) 0 g + + let rec fold f acc g = + g() >>= function + | Stop -> return acc + | Yield x -> + f acc x >>= fun acc' -> fold f acc' g + + let rec iter f g = + g() >>= function + | Stop -> return () + | Yield x -> f x >>= fun _ -> iter f g + + let of_fun g = g + + let empty () = _stop() + + let singleton x = + let first = ref true in + fun () -> + if !first then (first := false; _yield x) else _stop() + + let cons x g = + let first = ref true in + fun () -> + if !first then (first := false; _yield x) else g() + + let of_list l = + let l = ref l in + fun () -> match !l with + | [] -> _stop() + | x::tail -> l:= tail; _yield x + + let of_array a = + let i = ref 0 in + fun () -> + if !i = Array.length a + then _stop() + else ( + let x = a.(!i) in + incr i; + _yield x + ) + + (* TODO: wrapper around with_in? using bind ~finalize:... ? *) + + let chunks ~size ic = + let buf = Buffer.create size in + let eof = ref false in + let next() = + if !eof then _stop() + else try + Buffer.add_channel buf ic size; + let s = Buffer.contents buf in + Buffer.clear buf; + _yield s + with End_of_file -> + let s = Buffer.contents buf in + eof := true; + if s="" then _stop() else _yield s + in + next + + let lines ic () = + try _yield (input_line ic) + with End_of_file -> _stop() + + let words _g = + failwith "words: not implemented yet" + (* TODO: state machine that goes: + - 0: read input chunk + - switch to "search for ' '", and yield word + - goto 0 if no ' ' found + - yield leftover when g returns Stop + let buf = Buffer.create 32 in + let next() = + g() >>= function + | Stop -> _stop + | Yield s -> + Buffer.add_string buf s; + search_ + in + next + *) + + let output ?sep oc seq = + let first = ref true in + iter + (fun s -> + (* print separator *) + ( if !first + then (first:=false; return ()) + else match sep with + | None -> return () + | Some sep -> write_str oc sep + ) >>= fun () -> + write_str oc s + ) seq + >>= fun () -> flush oc +end + +(** {6 File and file names} *) + +module File = struct + type t = string + + let to_string f = f + + let make f = + if Filename.is_relative f + then Filename.concat (Sys.getcwd()) f + else f + + let exists f = Wrap (fun () -> Sys.file_exists f) + + let is_directory f = Wrap (fun () -> Sys.is_directory f) + + let remove f = Wrap (fun () -> Sys.remove f) + + let _read_dir d () = + if Sys.is_directory d + then + let arr = Sys.readdir d in + Seq.map_pure make (Seq.of_array arr) + else Seq.empty + + let rec _walk d () = + if Sys.is_directory d + then + let arr = Sys.readdir d in + let tail = Seq.of_array arr in + let tail = Seq.flat_map + (fun s -> return (_walk (Filename.concat d s) ())) + tail + in Seq.cons (`Dir,d) tail + else Seq.singleton (`File, d) + + let walk t = Wrap (_walk t) + + let read_dir ?(recurse=false) d = + if recurse + then walk d + >|= Seq.filter_map + (function + | `File, f -> Some f + | `Dir, _ -> None + ) + else Wrap (_read_dir d) + + let rec _read_dir_rec d () = + if Sys.is_directory d + then + let arr = Sys.readdir d in + let arr = Seq.of_array arr in + let arr = Seq.map_pure (fun s -> Filename.concat d s) arr in + Seq.flat_map + (fun s -> + if Sys.is_directory s + then return (_read_dir_rec s ()) + else return (Seq.singleton s) + ) arr + else Seq.empty +end + +(** {2 Raw} *) + +module Raw = struct + let wrap f = Wrap f +end diff --git a/advanced/CCMonadIO.mli b/advanced/CCMonadIO.mli new file mode 100644 index 00000000..03c4216d --- /dev/null +++ b/advanced/CCMonadIO.mli @@ -0,0 +1,323 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 IO Monad} + +A simple abstraction over blocking IO, with strict evaluation. This is in +no way an alternative to Lwt/Async if you need concurrency. + +@since 0.3.3 +*) + +(** +Examples: + +- obtain the list of lines of a file: + +{[ +let l = CCIO.((with_in "/tmp/some_file" >>>= read_lines) |> run_exn);; +]} + +- transfer one file into another: + +{[ +# let a = CCIO.( + with_in "input" >>>= fun ic -> + with_out ~flags:[Open_creat] "output" >>>= fun oc -> + Seq.chunks 512 ic + |> Seq.output oc +) ;; + +# run a;; +]} +*) + +type 'a t +type 'a io = 'a t + +type 'a with_finalizer +(** A value of type ['a with_finalizer] is similar to a value ['a t] but + also contains a finalizer that must be run to cleanup. + See {!(>>>=)} to get rid of it. *) + +type 'a or_error = [ `Ok of 'a | `Error of string ] + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** wait for the result of an action, then use a function to build a + new action and execute it *) + +val return : 'a -> 'a t +(** Just return a value *) + +val repeat : int -> 'a t -> 'a list t +(** Repeat an IO action as many times as required *) + +val repeat' : int -> 'a t -> unit t +(** Same as {!repeat}, but ignores the result *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** Map values *) + +val (>|=) : 'a t -> ('a -> 'b) -> 'b t + +val bind : ?finalize:(unit t) -> ('a -> 'b t) -> 'a t -> 'b t +(** [bind f a] runs the action [a] and applies [f] to its result + to obtain a new action. It then behaves exactly like this new + action. + @param finalize an optional action that is always run after evaluating + the whole action *) + +val pure : 'a -> 'a t +val (<*>) : ('a -> 'b) t -> 'a t -> 'b t + +val lift : ('a -> 'b) -> 'a t -> 'b t +(** Synonym to {!map} *) + +val lift2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +val lift3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t + +val sequence : 'a t list -> 'a list t +(** Runs operations one by one and gather their results *) + +val sequence_map : ('a -> 'b t) -> 'a list -> 'b list t +(** Generalization of {!sequence} *) + +val fail : string -> 'a t +(** [fail msg] fails with the given message. Running the IO value will + return an [`Error] variant *) + +(** {2 Finalizers} *) + +val (>>>=) : 'a with_finalizer -> ('a -> 'b t) -> 'b t +(** Same as {!(>>=)}, but taking the finalizer into account. Once this + IO value is done executing, the finalizer is executed and the resource, + fred. *) + +(** {2 Running} *) + +val run : 'a t -> 'a or_error +(** Run an IO action. + @return either [`Ok x] when [x] is the successful result of the + computation, or some [`Error "message"] *) + +exception IO_error of string + +val run_exn : 'a t -> 'a +(** Unsafe version of {!run}. It assumes non-failure. + @raise IO_error if the execution didn't go well *) + +val register_printer : (exn -> string option) -> unit +(** [register_printer p] register [p] as a possible failure printer. + If [run a] raises an exception [e], [p e] is evaluated. If [p e = Some msg] + then the error message will be [msg], otherwise other printers will + be tried *) + +(** {2 Standard Wrappers} *) + +(** {6 Input} *) + +val with_in : ?mode:int -> ?flags:open_flag list -> + string -> in_channel with_finalizer +(** Open an input file with the given optional flag list. + It yields a [in_channel] with a finalizer attached. See {!(>>>=)} to + use it. *) + +val read : in_channel -> string -> int -> int -> int t +(** Read a chunk into the given string *) + +val read_line : in_channel -> string option t +(** Read a line from the channel. Returns [None] if the input is terminated. *) + +val read_lines : in_channel -> string list t +(** Read all lines eagerly *) + +val read_all : in_channel -> string t +(** Read the whole channel into a buffer, then converted into a string *) + +(** {6 Output} *) + +val with_out : ?mode:int -> ?flags:open_flag list -> + string -> out_channel with_finalizer +(** Same as {!with_in} but for an output channel *) + +val with_out_a : ?mode:int -> ?flags:open_flag list -> + string -> out_channel with_finalizer +(** Similar to {!with_out} but with the [Open_append] and [Open_creat] + flags activated *) + +val write : out_channel -> string -> int -> int -> unit t + +val write_str : out_channel -> string -> unit t + +val write_buf : out_channel -> Buffer.t -> unit t + +val write_line : out_channel -> string -> unit t + +val flush : out_channel -> unit t + +(* TODO: printf/fprintf wrappers *) + +(** {2 Streams} + +Iterators on chunks of bytes, or lines, or any other value using combinators. +Those iterators are usable only once, because their source might +be usable only once (think of a socket) *) + +module Seq : sig + type 'a t + (** An IO stream of values of type 'a, consumable (iterable only once) *) + + val map : ('a -> 'b io) -> 'a t -> 'b t + (** Map values with actions *) + + val map_pure : ('a -> 'b) -> 'a t -> 'b t + (** Map values with a pure function *) + + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + + val filter : ('a -> bool) -> 'a t -> 'a t + + val flat_map : ('a -> 'b t io) -> 'a t -> 'b t + (** Map each value to a sub sequence of values *) + + val take : int -> 'a t -> 'a t + + val drop : int -> 'a t -> 'a t + + val take_while : ('a -> bool io) -> 'a t -> 'a t + + val drop_while : ('a -> bool io) -> 'a t -> 'a t + + val general_iter : ('b -> 'a -> [`Stop | `Continue of ('b * 'c option)] io) -> + 'b -> 'a t -> 'c t + (** [general_iter f acc seq] performs a [filter_map] over [seq], + using [f]. [f] is given a state and the current value, and + can either return [`Stop] to indicate it stops traversing, + or [`Continue (st, c)] where [st] is the new state and + [c] an optional output value. + The result is the stream of values output by [f] *) + + val tee : ('a -> unit io) list -> 'a t -> 'a t + (** [tee funs seq] behaves like [seq], but each element is given to + every function [f] in [funs]. This function [f] returns an action that + is eagerly executed. *) + + (** {6 Consume} *) + + val iter : ('a -> _ io) -> 'a t -> unit io + (** Iterate on the stream, with an action for each element *) + + val length : _ t -> int io + (** Length of the stream *) + + val fold : ('b -> 'a -> 'b io) -> 'b -> 'a t -> 'b io + (** [fold f acc seq] folds over [seq], consuming it. Every call to [f] + has the right to return an IO value. *) + + val fold_pure : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b io + (** [fold f acc seq] folds over [seq], consuming it. [f] is pure. *) + + (** {6 Standard Wrappers} *) + + type 'a step_result = + | Yield of 'a + | Stop + + type 'a gen = unit -> 'a step_result io + + val of_fun : 'a gen -> 'a t + (** Create a stream from a function that yields an element or stops *) + + val empty : 'a t + val singleton : 'a -> 'a t + val cons : 'a -> 'a t -> 'a t + val of_list : 'a list -> 'a t + val of_array : 'a array -> 'a t + + val chunks : size:int -> in_channel -> string t + (** Read the channel's content into chunks of size [size] *) + + val lines : in_channel -> string t + (** Lines of an input channel *) + + val words : string t -> string t + (** Split strings into words at " " boundaries. + {b NOT IMPLEMENTED} *) + + val output : ?sep:string -> out_channel -> string t -> unit io + (** [output oc seq] outputs every value of [seq] into [oc], separated + with the optional argument [sep] (default: None). + It blocks until all values of [seq] are produced and written to [oc]. *) +end + +(** {6 File and file names} + +How to list recursively files in a directory: +{[ + CCIO.( + File.read_dir ~recurse:true (File.make "/tmp") + >>= Seq.output ~sep:"\n" stdout + ) |> CCIO.run_exn ;; + + ]} + +See {!File.walk} if you also need to list directories. +*) + +module File : sig + type t = string + (** A file is always represented by its absolute path *) + + val to_string : t -> string + + val make : string -> t + (** Build a file representation from a path (absolute or relative) *) + + val exists : t -> bool io + + val is_directory : t -> bool io + + val remove : t -> unit io + + val read_dir : ?recurse:bool -> t -> t Seq.t io + (** [read_dir d] returns a sequence of files and directory contained + in the directory [d] (or an empty stream if [d] is not a directory) + @param recurse if true (default [false]), sub-directories are also + explored *) + + val walk : t -> ([`File | `Dir] * t) Seq.t io + (** similar to {!read_dir} (with [recurse=true]), this function walks + a directory recursively and yields either files or directories. + Is a file anything that doesn't satisfy {!is_directory} (including + symlinks, etc.) *) +end + +(** {2 Low level access} *) +module Raw : sig + val wrap : (unit -> 'a) -> 'a t + (** [wrap f] is the IO action that, when executed, returns [f ()]. + [f] should be callable as many times as required *) +end diff --git a/core/CCIO.ml b/core/CCIO.ml index 961880e5..5f8a6116 100644 --- a/core/CCIO.ml +++ b/core/CCIO.ml @@ -24,432 +24,127 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 IO Monad} *) +(** {1 IO Utils} *) -type _ t = - | Return : 'a -> 'a t - | Fail : string -> 'a t - | Map : ('a -> 'b) * 'a t -> 'b t - | Bind : ('a -> 'b t) * 'a t -> 'b t - | WithGuard: unit t * 'a t -> 'a t (* run guard in any case *) - | Star : ('a -> 'b) t * 'a t -> 'b t - | Repeat : int * 'a t -> 'a list t - | RepeatIgnore : int * 'a t -> unit t - | Wrap : (unit -> 'a) -> 'a t - | SequenceMap : ('a -> 'b t) * 'a list -> 'b list t +type 'a or_error = [ `Ok of 'a | `Error of string ] (** See {!CCError} *) +type 'a gen = unit -> 'a option (** See {!CCGen} *) -type 'a io = 'a t -type 'a with_finalizer = ('a t * unit t) t -type 'a or_error = [ `Ok of 'a | `Error of string ] - -let (>>=) x f = Bind(f,x) - -let bind ?finalize f a = match finalize with - | None -> Bind(f,a) - | Some b -> WithGuard (b, Bind (f,a)) - -let map f x = Map(f, x) - -let (>|=) x f = Map(f, x) - -let return x = Return x -let pure = return - -let fail msg = Fail msg - -let (<*>) f a = Star (f, a) - -let lift = map - -let lift2 f a b = - a >>= fun x -> map (f x) b - -let lift3 f a b c = - a >>= fun x -> - b >>= fun y -> map (f x y) c - -let sequence_map f l = - SequenceMap (f,l) - -let sequence l = - let _id x = x in - SequenceMap(_id, l) - -let repeat i a = - if i <= 0 then Return [] else Repeat (i,a) - -let repeat' i a = - if i <= 0 then Return () else RepeatIgnore (i,a) - -(** {2 Finalizers} *) - -let (>>>=) a f = - a >>= function - | x, finalizer -> WithGuard (finalizer, x >>= f) - -(** {2 Running} *) - -exception IOFailure of string - -let rec _run : type a. a t -> a = function - | Return x -> x - | Fail msg -> raise (IOFailure msg) - | Map (f, a) -> f (_run a) - | Bind (f, a) -> _run (f (_run a)) - | WithGuard (g, a) -> - begin try - let res = _run a in - _run g; - res - with e -> - _run g; - raise e - end - | Star (f, a) -> _run f (_run a) - | Repeat (i,a) -> _repeat [] i a - | RepeatIgnore (i,a) -> _repeat_ignore i a - | Wrap f -> f() - | SequenceMap (f, l) -> _sequence_map f l [] -and _repeat : type a. a list -> int -> a t -> a list - = fun acc i a -> match i with - | 0 -> List.rev acc - | _ -> - let x = _run a in - _repeat (x::acc) (i-1) a -and _repeat_ignore : type a. int -> a t -> unit - = fun i a -> match i with - | 0 -> () - | _ -> - let _ = _run a in - _repeat_ignore (i-1) a -and _sequence_map : type a b. (a -> b t) -> a list -> b list -> b list - = fun f l acc -> match l with - | [] -> List.rev acc - | a::tail -> - let x = _run (f a) in - _sequence_map f tail (x::acc) - -let _printers = - ref [ - (* default printer *) - ( function IOFailure msg - | Sys_error msg -> Some msg - | Exit -> Some "exit" - | _ -> None - ) - ] - -exception PrinterResult of string - -let _print_exn e = +let with_in ?(mode=0o644) ?(flags=[]) filename f = + let ic = open_in_gen flags mode filename in try - List.iter - (fun p -> match p e with - | None -> () - | Some msg -> raise (PrinterResult msg) - ) !_printers; - Printexc.to_string e - with PrinterResult s -> s + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + raise e -let run x = - try `Ok (_run x) - with e -> `Error (_print_exn e) +let read_chunks ?(size=256) ic = + let buf = Buffer.create size in + let eof = ref false in + let next() = + if !eof then None + else try + Buffer.add_channel buf ic size; + let s = Buffer.contents buf in + Buffer.clear buf; + Some s + with End_of_file -> + let s = Buffer.contents buf in + eof := true; + if s="" then None else Some s + in + next -exception IO_error of string - -let run_exn x = - try _run x - with e -> raise (IO_error (_print_exn e)) - -let register_printer p = _printers := p :: !_printers - -(** {2 Standard Wrappers} *) - -let _open_in mode flags filename () = - open_in_gen flags mode filename -let _close_in ic () = close_in ic - -let with_in ?(mode=0o644) ?(flags=[]) filename = - Wrap (_open_in mode flags filename) - >>= fun ic -> - Return (Return ic, Wrap (_close_in ic)) - -let _read ic s i len () = input ic s i len -let read ic s i len = Wrap (_read ic s i len) - -let _read_line ic () = - try Some (Pervasives.input_line ic) +let read_line ic = + try Some (input_line ic) with End_of_file -> None -let read_line ic = Wrap(_read_line ic) -let rec _read_lines ic acc = - read_line ic - >>= function - | None -> return (List.rev acc) - | Some l -> _read_lines ic (l::acc) +let read_lines ic = + let stop = ref false in + fun () -> + if !stop then None + else try Some (input_line ic) + with End_of_file -> (stop:=true; None) -let read_lines ic = _read_lines ic [] +let read_lines_l ic = + let l = ref [] in + try + while true do + l := input_line ic :: !l + done; + assert false + with End_of_file -> + List.rev !l -let _read_all ic () = - let buf = Buffer.create 128 in +let read_all ic = + let buf = Buffer.create 256 in try while true do Buffer.add_channel buf ic 1024 done; - "" (* never returned *) - with End_of_file -> Buffer.contents buf + assert false (* never reached*) + with End_of_file -> + Buffer.contents buf -let read_all ic = Wrap(_read_all ic) +let with_out ?(mode=0o644) ?(flags=[]) filename f = + let oc = open_out_gen flags mode filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + raise e -let _open_out mode flags filename () = - open_out_gen flags mode filename -let _close_out oc () = close_out oc +let with_out_a ?mode ?(flags=[]) filename f = + with_out ?mode ~flags:(Open_creat::Open_append::flags) filename f -let with_out ?(mode=0o644) ?(flags=[]) filename = - Wrap(_open_out mode (Open_wronly::flags) filename) - >>= fun oc -> - Return(Return oc, Wrap(_close_out oc)) - -let with_out_a ?mode ?(flags=[]) filename = - with_out ?mode ~flags:(Open_creat::Open_append::flags) filename - -let _write oc s i len () = output oc s i len -let write oc s i len = Wrap (_write oc s i len) - -let _write_str oc s () = output oc s 0 (String.length s) -let write_str oc s = Wrap (_write_str oc s) - -let _write_line oc l () = - output_string oc l; +let write_line oc s = + output_string oc s; output_char oc '\n' -let write_line oc l = Wrap (_write_line oc l) -let _write_buf oc buf () = Buffer.output_buffer oc buf -let write_buf oc buf = Wrap (_write_buf oc buf) +let write_gen ?(sep="") oc g = + let rec recurse () = match g() with + | None -> () + | Some s -> + output_string oc sep; + output_string oc s; + recurse () + in match g() with + | None -> () + | Some s -> + output_string oc s; + recurse () -let flush oc = Wrap (fun () -> Pervasives.flush oc) +let rec write_lines oc g = match g () with + | None -> () + | Some l -> + write_line oc l; + write_lines oc g -(** {2 Seq} *) +let write_lines_l oc l = + List.iter (write_line oc) l -module Seq = struct - type 'a step_result = - | Yield of 'a - | Stop +let tee funs g () = match g() with + | None -> None + | Some x as res -> + List.iter + (fun f -> + try f x + with _ -> () + ) funs; + res - type 'a gen = unit -> 'a step_result io +(* TODO: lines/unlines: string gen -> string gen *) - type 'a t = 'a gen - - let _stop () = return Stop - let _yield x = return (Yield x) - - let map_pure f gen () = - gen() >>= function - | Stop -> _stop () - | Yield x -> _yield (f x) - - let map f g () = - g() >>= function - | Stop -> _stop () - | Yield x -> f x >>= _yield - - let rec filter_map f g () = - g() >>= function - | Stop -> _stop() - | Yield x -> - match f x with - | None -> filter_map f g() - | Some y -> _yield y - - let rec filter f g () = - g() >>= function - | Stop -> _stop() - | Yield x -> - if f x then _yield x else filter f g() - - let rec flat_map f g () = - g() >>= function - | Stop -> _stop () - | Yield x -> - f x >>= fun g' -> _flat_map_aux f g g' () - and _flat_map_aux f g g' () = - g'() >>= function - | Stop -> flat_map f g () - | Yield x -> _yield x - - let general_iter f acc g = - let acc = ref acc in - let rec _next () = - g() >>= function - | Stop -> _stop() - | Yield x -> - f !acc x >>= function - | `Stop -> _stop() - | `Continue (acc', ret) -> - acc := acc'; - match ret with - | None -> _next() - | Some y -> _yield y - in - _next - - let take n seq = - general_iter - (fun n x -> if n<=0 - then return `Stop - else return (`Continue (n-1, Some x)) - ) n seq - - let drop n seq = - general_iter - (fun n x -> if n<=0 - then return (`Continue (n, Some x)) - else return (`Continue (n-1, None)) - ) n seq - - let take_while p seq = - general_iter - (fun () x -> - p x >|= function - | true -> `Continue ((), Some x) - | false -> `Stop - ) () seq - - let drop_while p seq = - general_iter - (fun dropping x -> - if dropping - then p x >|= function - | true -> `Continue (true, None) - | false -> `Continue (false, Some x) - else return (`Continue (false, Some x)) - ) true seq - - (* apply all actions from [l] to [x] *) - let rec _apply_all_to x l = match l with - | [] -> return () - | f::tail -> f x >>= fun () -> _apply_all_to x tail - - let _tee funs g () = - g() >>= function - | Stop -> _stop() - | Yield x -> - _apply_all_to x funs >>= fun () -> - _yield x - - let tee funs g = match funs with - | [] -> g - | _::_ -> _tee funs g - - (** {6 Consume} *) - - let rec fold_pure f acc g = - g() >>= function - | Stop -> return acc - | Yield x -> fold_pure f (f acc x) g - - let length g = fold_pure (fun acc _ -> acc+1) 0 g - - let rec fold f acc g = - g() >>= function - | Stop -> return acc - | Yield x -> - f acc x >>= fun acc' -> fold f acc' g - - let rec iter f g = - g() >>= function - | Stop -> return () - | Yield x -> f x >>= fun _ -> iter f g - - let of_fun g = g - - let empty () = _stop() - - let singleton x = - let first = ref true in - fun () -> - if !first then (first := false; _yield x) else _stop() - - let cons x g = - let first = ref true in - fun () -> - if !first then (first := false; _yield x) else g() - - let of_list l = - let l = ref l in - fun () -> match !l with - | [] -> _stop() - | x::tail -> l:= tail; _yield x - - let of_array a = - let i = ref 0 in - fun () -> - if !i = Array.length a - then _stop() - else ( - let x = a.(!i) in - incr i; - _yield x - ) - - (* TODO: wrapper around with_in? using bind ~finalize:... ? *) - - let chunks ~size ic = - let buf = Buffer.create size in - let eof = ref false in - let next() = - if !eof then _stop() - else try - Buffer.add_channel buf ic size; - let s = Buffer.contents buf in - Buffer.clear buf; - _yield s - with End_of_file -> - let s = Buffer.contents buf in - eof := true; - if s="" then _stop() else _yield s - in - next - - let lines ic () = - try _yield (input_line ic) - with End_of_file -> _stop() - - let words _g = - failwith "words: not implemented yet" - (* TODO: state machine that goes: - - 0: read input chunk - - switch to "search for ' '", and yield word - - goto 0 if no ' ' found - - yield leftover when g returns Stop - let buf = Buffer.create 32 in - let next() = - g() >>= function - | Stop -> _stop - | Yield s -> - Buffer.add_string buf s; - search_ - in - next - *) - - let output ?sep oc seq = - let first = ref true in - iter - (fun s -> - (* print separator *) - ( if !first - then (first:=false; return ()) - else match sep with - | None -> return () - | Some sep -> write_str oc sep - ) >>= fun () -> - write_str oc s - ) seq - >>= fun () -> flush oc -end - -(** {6 File and file names} *) +(* TODO: words: string gen -> string gen, + with a state machine that goes: + - 0: read input chunk + - switch to "search for ' '", and yield word + - goto 0 if no ' ' found + - yield leftover when g returns Stop +*) module File = struct type t = string @@ -461,59 +156,53 @@ module File = struct then Filename.concat (Sys.getcwd()) f else f - let exists f = Wrap (fun () -> Sys.file_exists f) + let exists f = Sys.file_exists f - let is_directory f = Wrap (fun () -> Sys.is_directory f) + let is_directory f = Sys.is_directory f - let remove f = Wrap (fun () -> Sys.remove f) + let remove f = Sys.remove f - let _read_dir d () = + let read_dir_base d = if Sys.is_directory d then let arr = Sys.readdir d in - Seq.map_pure make (Seq.of_array arr) - else Seq.empty + CCGen.of_array arr + else CCGen.empty - let rec _walk d () = + let cons_ x tl = + let first=ref true in + fun () -> + if !first then ( + first := false; + Some x + ) else tl () + + let rec walk d = if Sys.is_directory d then let arr = Sys.readdir d in - let tail = Seq.of_array arr in - let tail = Seq.flat_map - (fun s -> return (_walk (Filename.concat d s) ())) + let tail = CCGen.of_array arr in + let tail = CCGen.flat_map + (fun s -> walk (Filename.concat d s)) tail - in Seq.cons (`Dir,d) tail - else Seq.singleton (`File, d) + in cons_ (`Dir,d) tail + else CCGen.singleton (`File, d) - let walk t = Wrap (_walk t) + type walk_item = [`File | `Dir] * t let read_dir ?(recurse=false) d = if recurse - then walk d - >|= Seq.filter_map - (function - | `File, f -> Some f - | `Dir, _ -> None - ) - else Wrap (_read_dir d) - - let rec _read_dir_rec d () = - if Sys.is_directory d then - let arr = Sys.readdir d in - let arr = Seq.of_array arr in - let arr = Seq.map_pure (fun s -> Filename.concat d s) arr in - Seq.flat_map - (fun s -> - if Sys.is_directory s - then return (_read_dir_rec s ()) - else return (Seq.singleton s) - ) arr - else Seq.empty -end + CCGen.filter_map + (function + | `File, f -> Some f + | `Dir, _ -> None + ) (walk d) + else read_dir_base d -(** {2 Raw} *) - -module Raw = struct - let wrap f = Wrap f + let show_walk_item (i,f) = + (match i with + | `File -> "file:" + | `Dir -> "dir: " + ) ^ f end diff --git a/core/CCIO.mli b/core/CCIO.mli index 03c4216d..20bdd622 100644 --- a/core/CCIO.mli +++ b/core/CCIO.mli @@ -24,267 +24,110 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 IO Monad} +(** {1 IO Utils} -A simple abstraction over blocking IO, with strict evaluation. This is in -no way an alternative to Lwt/Async if you need concurrency. +Simple utilities to deal with basic Input/Output tasks in a resource-safe +way. For advanced IO tasks, the user is advised to use something +like Lwt or Async, that are far more comprehensive. +This module depends on {!CCGen}. -@since 0.3.3 -*) +@since NEXT_RELEASE + +{b NOTE} this was formerly a monadic IO module. The old module is now +in [containers.advanced] under the name [CCMonadIO]. -(** Examples: - obtain the list of lines of a file: {[ -let l = CCIO.((with_in "/tmp/some_file" >>>= read_lines) |> run_exn);; +# let l = CCIO.(with_in "/tmp/some_file" read_lines);; ]} - transfer one file into another: {[ -# let a = CCIO.( - with_in "input" >>>= fun ic -> - with_out ~flags:[Open_creat] "output" >>>= fun oc -> - Seq.chunks 512 ic - |> Seq.output oc +# CCIO.( + with_in "/tmp/input" + (fun ic -> + with_out ~flags:[Open_creat] ~mode:0o644 "/tmp/output" + (fun oc -> + Seq.chunks 512 ic |> Seq.to_output oc + ) + ) ) ;; - -# run a;; ]} *) -type 'a t -type 'a io = 'a t +type 'a or_error = [ `Ok of 'a | `Error of string ] (** See {!CCError} *) +type 'a gen = unit -> 'a option (** See {!CCGen} *) -type 'a with_finalizer -(** A value of type ['a with_finalizer] is similar to a value ['a t] but - also contains a finalizer that must be run to cleanup. - See {!(>>>=)} to get rid of it. *) - -type 'a or_error = [ `Ok of 'a | `Error of string ] - -val (>>=) : 'a t -> ('a -> 'b t) -> 'b t -(** wait for the result of an action, then use a function to build a - new action and execute it *) - -val return : 'a -> 'a t -(** Just return a value *) - -val repeat : int -> 'a t -> 'a list t -(** Repeat an IO action as many times as required *) - -val repeat' : int -> 'a t -> unit t -(** Same as {!repeat}, but ignores the result *) - -val map : ('a -> 'b) -> 'a t -> 'b t -(** Map values *) - -val (>|=) : 'a t -> ('a -> 'b) -> 'b t - -val bind : ?finalize:(unit t) -> ('a -> 'b t) -> 'a t -> 'b t -(** [bind f a] runs the action [a] and applies [f] to its result - to obtain a new action. It then behaves exactly like this new - action. - @param finalize an optional action that is always run after evaluating - the whole action *) - -val pure : 'a -> 'a t -val (<*>) : ('a -> 'b) t -> 'a t -> 'b t - -val lift : ('a -> 'b) -> 'a t -> 'b t -(** Synonym to {!map} *) - -val lift2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t -val lift3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t - -val sequence : 'a t list -> 'a list t -(** Runs operations one by one and gather their results *) - -val sequence_map : ('a -> 'b t) -> 'a list -> 'b list t -(** Generalization of {!sequence} *) - -val fail : string -> 'a t -(** [fail msg] fails with the given message. Running the IO value will - return an [`Error] variant *) - -(** {2 Finalizers} *) - -val (>>>=) : 'a with_finalizer -> ('a -> 'b t) -> 'b t -(** Same as {!(>>=)}, but taking the finalizer into account. Once this - IO value is done executing, the finalizer is executed and the resource, - fred. *) - -(** {2 Running} *) - -val run : 'a t -> 'a or_error -(** Run an IO action. - @return either [`Ok x] when [x] is the successful result of the - computation, or some [`Error "message"] *) - -exception IO_error of string - -val run_exn : 'a t -> 'a -(** Unsafe version of {!run}. It assumes non-failure. - @raise IO_error if the execution didn't go well *) - -val register_printer : (exn -> string option) -> unit -(** [register_printer p] register [p] as a possible failure printer. - If [run a] raises an exception [e], [p e] is evaluated. If [p e = Some msg] - then the error message will be [msg], otherwise other printers will - be tried *) - -(** {2 Standard Wrappers} *) - -(** {6 Input} *) +(** {2 Input} *) val with_in : ?mode:int -> ?flags:open_flag list -> - string -> in_channel with_finalizer -(** Open an input file with the given optional flag list. - It yields a [in_channel] with a finalizer attached. See {!(>>>=)} to - use it. *) + string -> (in_channel -> 'a) -> 'a +(** Open an input file with the given optional flag list, calls the function + on the input channel. When the function raises or returns, the + channel is closed. *) -val read : in_channel -> string -> int -> int -> int t -(** Read a chunk into the given string *) +val read_chunks : ?size:int -> in_channel -> string gen +(** Read the channel's content into chunks of size [size] *) -val read_line : in_channel -> string option t -(** Read a line from the channel. Returns [None] if the input is terminated. *) +val read_line : in_channel -> string option +(** Read a line from the channel. Returns [None] if the input is terminated. + The "\n" is removed from the line. *) -val read_lines : in_channel -> string list t -(** Read all lines eagerly *) +val read_lines : in_channel -> string gen +(** Read all lines. The generator should be traversed only once. *) -val read_all : in_channel -> string t +val read_lines_l : in_channel -> string list +(** Read all lines into a list *) + +val read_all : in_channel -> string (** Read the whole channel into a buffer, then converted into a string *) (** {6 Output} *) val with_out : ?mode:int -> ?flags:open_flag list -> - string -> out_channel with_finalizer + string -> (out_channel -> 'a) -> 'a (** Same as {!with_in} but for an output channel *) val with_out_a : ?mode:int -> ?flags:open_flag list -> - string -> out_channel with_finalizer + string -> (out_channel -> 'a) -> 'a (** Similar to {!with_out} but with the [Open_append] and [Open_creat] flags activated *) -val write : out_channel -> string -> int -> int -> unit t +val write_line : out_channel -> string -> unit +(** Write the given string on the channel, followed by "\n" *) -val write_str : out_channel -> string -> unit t +val write_gen : ?sep:string -> out_channel -> string gen -> unit +(** Write the given strings on the output. If provided, add [sep] between + every two string (but not at the end) *) -val write_buf : out_channel -> Buffer.t -> unit t +val write_lines : out_channel -> string gen -> unit +(** Write every string on the output, followed by "\n". *) -val write_line : out_channel -> string -> unit t +val write_lines_l : out_channel -> string list -> unit -val flush : out_channel -> unit t +(** {2 Misc for Generators} *) -(* TODO: printf/fprintf wrappers *) - -(** {2 Streams} - -Iterators on chunks of bytes, or lines, or any other value using combinators. -Those iterators are usable only once, because their source might -be usable only once (think of a socket) *) - -module Seq : sig - type 'a t - (** An IO stream of values of type 'a, consumable (iterable only once) *) - - val map : ('a -> 'b io) -> 'a t -> 'b t - (** Map values with actions *) - - val map_pure : ('a -> 'b) -> 'a t -> 'b t - (** Map values with a pure function *) - - val filter_map : ('a -> 'b option) -> 'a t -> 'b t - - val filter : ('a -> bool) -> 'a t -> 'a t - - val flat_map : ('a -> 'b t io) -> 'a t -> 'b t - (** Map each value to a sub sequence of values *) - - val take : int -> 'a t -> 'a t - - val drop : int -> 'a t -> 'a t - - val take_while : ('a -> bool io) -> 'a t -> 'a t - - val drop_while : ('a -> bool io) -> 'a t -> 'a t - - val general_iter : ('b -> 'a -> [`Stop | `Continue of ('b * 'c option)] io) -> - 'b -> 'a t -> 'c t - (** [general_iter f acc seq] performs a [filter_map] over [seq], - using [f]. [f] is given a state and the current value, and - can either return [`Stop] to indicate it stops traversing, - or [`Continue (st, c)] where [st] is the new state and - [c] an optional output value. - The result is the stream of values output by [f] *) - - val tee : ('a -> unit io) list -> 'a t -> 'a t - (** [tee funs seq] behaves like [seq], but each element is given to - every function [f] in [funs]. This function [f] returns an action that - is eagerly executed. *) - - (** {6 Consume} *) - - val iter : ('a -> _ io) -> 'a t -> unit io - (** Iterate on the stream, with an action for each element *) - - val length : _ t -> int io - (** Length of the stream *) - - val fold : ('b -> 'a -> 'b io) -> 'b -> 'a t -> 'b io - (** [fold f acc seq] folds over [seq], consuming it. Every call to [f] - has the right to return an IO value. *) - - val fold_pure : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b io - (** [fold f acc seq] folds over [seq], consuming it. [f] is pure. *) - - (** {6 Standard Wrappers} *) - - type 'a step_result = - | Yield of 'a - | Stop - - type 'a gen = unit -> 'a step_result io - - val of_fun : 'a gen -> 'a t - (** Create a stream from a function that yields an element or stops *) - - val empty : 'a t - val singleton : 'a -> 'a t - val cons : 'a -> 'a t -> 'a t - val of_list : 'a list -> 'a t - val of_array : 'a array -> 'a t - - val chunks : size:int -> in_channel -> string t - (** Read the channel's content into chunks of size [size] *) - - val lines : in_channel -> string t - (** Lines of an input channel *) - - val words : string t -> string t - (** Split strings into words at " " boundaries. - {b NOT IMPLEMENTED} *) - - val output : ?sep:string -> out_channel -> string t -> unit io - (** [output oc seq] outputs every value of [seq] into [oc], separated - with the optional argument [sep] (default: None). - It blocks until all values of [seq] are produced and written to [oc]. *) -end +val tee : ('a -> unit) list -> 'a gen -> 'a gen +(** [tee funs gen] behaves like [gen], but each element is given to + every function [f] in [funs] at the time the element is produced. *) (** {6 File and file names} How to list recursively files in a directory: {[ - CCIO.( - File.read_dir ~recurse:true (File.make "/tmp") - >>= Seq.output ~sep:"\n" stdout - ) |> CCIO.run_exn ;; +# let files = CCIO.File.read_dir ~recurse:true (CCIO.File.make "/tmp");; +# CCIO.write_lines stdout files;; +]} - ]} +See {!File.walk} if you also need to list directories: -See {!File.walk} if you also need to list directories. +{[ +# let content = CCIO.File.walk (CCIO.File.make "/tmp");; +# CCGen.map CCIO.File.show_walk_item content |> CCIO.write_lines stdout;; *) module File : sig @@ -296,28 +139,25 @@ module File : sig val make : string -> t (** Build a file representation from a path (absolute or relative) *) - val exists : t -> bool io + val exists : t -> bool - val is_directory : t -> bool io + val is_directory : t -> bool - val remove : t -> unit io + val remove : t -> unit - val read_dir : ?recurse:bool -> t -> t Seq.t io + val read_dir : ?recurse:bool -> t -> t gen (** [read_dir d] returns a sequence of files and directory contained in the directory [d] (or an empty stream if [d] is not a directory) @param recurse if true (default [false]), sub-directories are also explored *) - val walk : t -> ([`File | `Dir] * t) Seq.t io + type walk_item = [`File | `Dir] * t + + val walk : t -> walk_item gen (** similar to {!read_dir} (with [recurse=true]), this function walks a directory recursively and yields either files or directories. Is a file anything that doesn't satisfy {!is_directory} (including symlinks, etc.) *) -end -(** {2 Low level access} *) -module Raw : sig - val wrap : (unit -> 'a) -> 'a t - (** [wrap f] is the IO action that, when executed, returns [f ()]. - [f] should be callable as many times as required *) + val show_walk_item : walk_item -> string end From f010bc6ebc58d35d01a0e8fbcc2d0255f20c1e0f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 13:48:50 +0100 Subject: [PATCH 35/39] move misc/Cache to core/CCCache --- _oasis | 4 ++-- benchs/run_benchs.ml | 22 ++++++++++++---------- misc/cache.ml => core/CCCache.ml | 0 misc/cache.mli => core/CCCache.mli | 0 4 files changed, 14 insertions(+), 12 deletions(-) rename misc/cache.ml => core/CCCache.ml (100%) rename misc/cache.mli => core/CCCache.mli (100%) diff --git a/_oasis b/_oasis index e95bd4f5..f76776a2 100644 --- a/_oasis +++ b/_oasis @@ -48,7 +48,7 @@ Library "containers" CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCKList, CCInt, CCBool, CCArray, CCOrd, CCIO, CCRandom, CCKTree, CCTrie, CCString, CCHashtbl, - CCFlatHashtbl, CCSexp, CCMap + CCFlatHashtbl, CCSexp, CCMap, CCCache BuildDepends: bytes Library "containers_string" @@ -76,7 +76,7 @@ Library "containers_pervasives" Library "containers_misc" Path: misc Pack: true - Modules: Cache, FHashtbl, FlatHashtbl, Hashset, + Modules: FHashtbl, FlatHashtbl, Hashset, Heap, LazyGraph, PersistentGraph, PHashtbl, SkipList, SplayTree, SplayMap, Univ, Bij, PiCalculus, RAL, UnionFind, SmallSet, AbsSet, CSM, diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 2443cfbb..925ae450 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -127,8 +127,10 @@ module Vec = struct end module Cache = struct + module C = CCCache + let make_fib c = - let f = Cache.with_cache_rec c + let f = C.with_cache_rec c (fun fib n -> match n with | 0 -> 0 | 1 -> 1 @@ -137,22 +139,22 @@ module Cache = struct ) in fun x -> - Cache.clear c; + C.clear c; f x let bench_fib n = let l = - [ "replacing_fib (128)", make_fib (Cache.replacing 128), n - ; "LRU_fib (128)", make_fib (Cache.lru 128), n - ; "replacing_fib (16)", make_fib (Cache.replacing 16), n - ; "LRU_fib (16)", make_fib (Cache.lru 16), n - ; "unbounded", make_fib (Cache.unbounded 32), n + [ "replacing_fib (128)", make_fib (C.replacing 128), n + ; "LRU_fib (128)", make_fib (C.lru 128), n + ; "replacing_fib (16)", make_fib (C.replacing 16), n + ; "LRU_fib (16)", make_fib (C.lru 16), n + ; "unbounded", make_fib (C.unbounded 32), n ] in let l = if n <= 20 - then [ "linear_fib (5)", make_fib (Cache.linear 5), n - ; "linear_fib (32)", make_fib (Cache.linear 32), n - ; "dummy_fib", make_fib Cache.dummy, n + then [ "linear_fib (5)", make_fib (C.linear 5), n + ; "linear_fib (32)", make_fib (C.linear 32), n + ; "dummy_fib", make_fib C.dummy, n ] @ l else l in diff --git a/misc/cache.ml b/core/CCCache.ml similarity index 100% rename from misc/cache.ml rename to core/CCCache.ml diff --git a/misc/cache.mli b/core/CCCache.mli similarity index 100% rename from misc/cache.mli rename to core/CCCache.mli From 6e3b0f534a9ae182a9945b42dbc98b4a3e186ffd Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 13:49:04 +0100 Subject: [PATCH 36/39] CCache.{size,iter} --- core/CCCache.ml | 53 +++++++++++++++++++++++++++++++++++++++--------- core/CCCache.mli | 48 ++++++++++++------------------------------- 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/core/CCCache.ml b/core/CCCache.ml index b5a657ff..e65b2e2f 100644 --- a/core/CCCache.ml +++ b/core/CCCache.ml @@ -23,7 +23,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Memoization caches} *) +(** {1 Caches} *) type 'a equal = 'a -> 'a -> bool type 'a hash = 'a -> int @@ -36,6 +36,8 @@ let default_hash_ = Hashtbl.hash type ('a,'b) t = { set : 'a -> 'b -> unit; get : 'a -> 'b; (* or raise Not_found *) + size : unit -> int; + iter : ('a -> 'b -> unit) -> unit; clear : unit -> unit; } @@ -53,10 +55,16 @@ let with_cache_rec c f = let rec f' x = with_cache c (f f') x in f' +let size c = c.size () + +let iter c f = c.iter f + let dummy = { set=(fun _ _ -> ()); get=(fun _ -> raise Not_found); clear=(fun _ -> ()); + size=(fun _ -> 0); + iter=(fun _ -> ()); } module Linear = struct @@ -91,6 +99,14 @@ module Linear = struct let set c x y = c.arr.(c.i) <- Pair (x,y); c.i <- (c.i + 1) mod Array.length c.arr + + let iter c f = + Array.iter (function Pair (x,y) -> f x y | Empty -> ()) c.arr + + let size c () = + let r = ref 0 in + iter c (fun _ _ -> incr r); + !r end let linear ?(eq=default_eq_) size = @@ -99,6 +115,8 @@ let linear ?(eq=default_eq_) size = { get=(fun x -> Linear.get arr x); set=(fun x y -> Linear.set arr x y); clear=(fun () -> Linear.clear arr); + size=Linear.size arr; + iter=Linear.iter arr; } module Replacing = struct @@ -110,13 +128,15 @@ module Replacing = struct eq : 'a equal; hash : 'a hash; arr : ('a,'b) bucket array; + mutable c_size : int; } let make eq hash size = assert (size>0); - {arr=Array.make size Empty; eq; hash } + {arr=Array.make size Empty; eq; hash; c_size=0 } let clear c = + c.c_size <- 0; Array.fill c.arr 0 (Array.length c.arr) Empty let get c x = @@ -128,7 +148,13 @@ module Replacing = struct let set c x y = let i = c.hash x mod Array.length c.arr in + if c.arr.(i) = Empty then c.c_size <- c.c_size + 1; c.arr.(i) <- Pair (x,y) + + let iter c f = + Array.iter (function Empty -> () | Pair (x,y) -> f x y) c.arr + + let size c () = c.c_size end let replacing ?(eq=default_eq_) ?(hash=default_hash_) size = @@ -136,6 +162,8 @@ let replacing ?(eq=default_eq_) ?(hash=default_hash_) size = { get=(fun x -> Replacing.get c x); set=(fun x y -> Replacing.set c x y); clear=(fun () -> Replacing.clear c); + size=Replacing.size c; + iter=Replacing.iter c; } module type HASH = sig @@ -173,10 +201,6 @@ module LRU(X:HASH) = struct c.first <- None; () - let get_opt = function - | None -> assert false - | Some x -> x - (* take first from queue *) let take_ c = match c.first with @@ -248,6 +272,11 @@ module LRU(X:HASH) = struct if len = c.size then replace_ c x y else insert_ c x y + + let size c () = H.length c.table + + let iter c f = + H.iter (fun x node -> f x node.value) c.table end let lru (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = @@ -260,15 +289,13 @@ let lru (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = { get=(fun x -> L.get c x); set=(fun x y -> L.set c x y); clear=(fun () -> L.clear c); + size=L.size c; + iter=L.iter c; } module UNBOUNDED(X:HASH) = struct - type key = X.t - module H = Hashtbl.Make(X) - type 'a t = 'a H.t - let make size = assert (size > 0); H.create size @@ -278,6 +305,10 @@ module UNBOUNDED(X:HASH) = struct let get c x = H.find c x let set c x y = H.replace c x y + + let size c () = H.length c + + let iter c f = H.iter f c end let unbounded (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = @@ -290,4 +321,6 @@ let unbounded (type a) ?(eq=default_eq_) ?(hash=default_hash_) size = { get=(fun x -> C.get c x); set=(fun x y -> C.set c x y); clear=(fun () -> C.clear c); + iter=C.iter c; + size=C.size c; } diff --git a/core/CCCache.mli b/core/CCCache.mli index d548bbfd..a50b8ee6 100644 --- a/core/CCCache.mli +++ b/core/CCCache.mli @@ -23,7 +23,11 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Memoization caches} *) +(** {1 Caches} + +Particularly useful for memoization. See {!with_cache} and {!with_cache_rec} +for more details. +@since NEXT_RELEASE *) type 'a equal = 'a -> 'a -> bool type 'a hash = 'a -> int @@ -72,6 +76,13 @@ fib 70;; ]} *) +val size : (_,_) t -> int +(** Size of the cache (number of entries). At most linear in the number + of entries. *) + +val iter : ('a,'b) t -> ('a -> 'b -> unit) -> unit +(** Iterate on cached values. Should yield [size cache] pairs. *) + val dummy : ('a,'b) t (** dummy cache, never stores any value *) @@ -92,42 +103,9 @@ val replacing : ?eq:'a equal -> ?hash:'a hash -> val lru : ?eq:'a equal -> ?hash:'a hash -> int -> ('a,'b) t (** LRU cache of the given size ("Least Recently Used": keys that have not been - used recently are deleted first). Never grows wider. *) + used recently are deleted first). Never grows wider than the given size. *) val unbounded : ?eq:'a equal -> ?hash:'a hash -> int -> ('a,'b) t (** Unbounded cache, backed by a Hash table. Will grow forever unless {!clear} is called manually. *) - -(** {2 Binary Caches} -TODO - -module C2 : sig - type ('a, 'b, 'c) t - - val clear : (_,_,_) t -> unit - - val with_cache : ('a, 'b, 'c) t -> ('a -> 'b -> 'c) -> 'a -> 'b -> 'c - - val with_cache_rec : ('a,'b,'c) t -> - (('a -> 'b -> 'c) -> 'a -> 'b -> 'c) -> - 'a -> 'b -> 'c - - val dummy : ('a,'b,'c) t - - val linear : ?eq1:('a -> 'a -> bool) -> ?eq2:('b -> 'b -> bool) -> - int -> ('a, 'b, 'c) t - - val replacing : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> - ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> - int -> ('a,'b,'c) t - - val lru : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> - ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> - int -> ('a,'b,'c) t - - val unbounded : ?eq1:('a -> 'a -> bool) -> ?hash1:('a -> int) -> - ?eq2:('b -> 'b -> bool) -> ?hash2:('b -> int) -> - int -> ('a,'b,'c) t -end -*) From a7657883e46901319d9dd9aa9d1aa57d22eec6f5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 14:08:48 +0100 Subject: [PATCH 37/39] fix doc --- core/CCIO.ml | 1 - core/CCIO.mli | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/core/CCIO.ml b/core/CCIO.ml index 5f8a6116..1ebe7064 100644 --- a/core/CCIO.ml +++ b/core/CCIO.ml @@ -26,7 +26,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 IO Utils} *) -type 'a or_error = [ `Ok of 'a | `Error of string ] (** See {!CCError} *) type 'a gen = unit -> 'a option (** See {!CCGen} *) let with_in ?(mode=0o644) ?(flags=[]) filename f = diff --git a/core/CCIO.mli b/core/CCIO.mli index 20bdd622..3e18f3e9 100644 --- a/core/CCIO.mli +++ b/core/CCIO.mli @@ -59,7 +59,6 @@ Examples: ]} *) -type 'a or_error = [ `Ok of 'a | `Error of string ] (** See {!CCError} *) type 'a gen = unit -> 'a option (** See {!CCGen} *) (** {2 Input} *) @@ -128,6 +127,7 @@ See {!File.walk} if you also need to list directories: {[ # let content = CCIO.File.walk (CCIO.File.make "/tmp");; # CCGen.map CCIO.File.show_walk_item content |> CCIO.write_lines stdout;; +]} *) module File : sig From bbcb04b5a459010bd2f543c7d8a8d72864574dff Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 14:14:38 +0100 Subject: [PATCH 38/39] update howto.mds procedure to release --- HOWTO.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/HOWTO.md b/HOWTO.md index 116ca78d..6dfa347e 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -2,10 +2,11 @@ ## Make a release 1. `make test-all` -2. merge into `stable` (from now on, proceed on branch `stable`) -3. update version in `_oasis` -4. `make update_next_tag` (to update `@since` comments) -5. update `CHANGELOG.md` (see its end to find the right git command) -6. commit, tag, and push both to github -7. new opam package +2. update version in `_oasis` +3. `make update_next_tag` (to update `@since` comments) +4. `git checkout stable` +5. `git merge master` +6. update `CHANGELOG.md` (see its end to find the right git command) +7. commit, tag, and push both to github +8. new opam package From 16f160678d3f4697a4d61b43195064a9a41be121 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 23 Nov 2014 14:14:46 +0100 Subject: [PATCH 39/39] prepare for 0.6 --- _oasis | 2 +- core/CCCache.mli | 4 ++-- core/CCFun.mli | 2 +- core/CCIO.mli | 2 +- core/CCList.mli | 2 +- core/CCMultiSet.mli | 6 +++--- core/CCVector.mli | 4 ++-- misc/mixtbl.mli | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/_oasis b/_oasis index f76776a2..0ba9e705 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: dev +Version: 0.6 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause diff --git a/core/CCCache.mli b/core/CCCache.mli index a50b8ee6..33533aa1 100644 --- a/core/CCCache.mli +++ b/core/CCCache.mli @@ -27,7 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Particularly useful for memoization. See {!with_cache} and {!with_cache_rec} for more details. -@since NEXT_RELEASE *) +@since 0.6 *) type 'a equal = 'a -> 'a -> bool type 'a hash = 'a -> int @@ -46,7 +46,7 @@ f' 1;; (* prints *) f' 0;; (* doesn't print, returns cached value *) ]} -@since NEXT_RELEASE *) +@since 0.6 *) type ('a, 'b) t diff --git a/core/CCFun.mli b/core/CCFun.mli index 2d7ab372..1a371705 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -36,7 +36,7 @@ val compose_binop : ('a -> 'b) -> ('b -> 'b -> 'c) -> 'a -> 'a -> 'c (** [compose_binop f g] is [fun x y -> g (f x) (f y)] Example (partial order): [List.sort (compose_binop fst CCInt.compare) [1, true; 2, false; 1, false]] - @since NEXT_RELEASE*) + @since 0.6*) val (%>) : ('a -> 'b) -> ('b -> 'c) -> 'a -> 'c (** Alias to [compose] *) diff --git a/core/CCIO.mli b/core/CCIO.mli index 3e18f3e9..1e29f134 100644 --- a/core/CCIO.mli +++ b/core/CCIO.mli @@ -31,7 +31,7 @@ way. For advanced IO tasks, the user is advised to use something like Lwt or Async, that are far more comprehensive. This module depends on {!CCGen}. -@since NEXT_RELEASE +@since 0.6 {b NOTE} this was formerly a monadic IO module. The old module is now in [containers.advanced] under the name [CCMonadIO]. diff --git a/core/CCList.mli b/core/CCList.mli index f00c2af3..21e1f63a 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -50,7 +50,7 @@ val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b val init : int -> (int -> 'a) -> 'a t (** Same as [Array.init] - @since NEXT_RELEASE *) + @since 0.6 *) val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int diff --git a/core/CCMultiSet.mli b/core/CCMultiSet.mli index 826f394b..a6574d4a 100644 --- a/core/CCMultiSet.mli +++ b/core/CCMultiSet.mli @@ -48,19 +48,19 @@ module type S = sig val add_mult : t -> elt -> int -> t (** [add_mult set x n] adds [n] occurrences of [x] to [set] @raise Invalid_argument if [n < 0] - @since NEXT_RELEASE *) + @since 0.6 *) val remove_mult : t -> elt -> int -> t (** [remove_mult set x n] removes at most [n] occurrences of [x] from [set] @raise Invalid_argument if [n < 0] - @since NEXT_RELEASE *) + @since 0.6 *) val update : t -> elt -> (int -> int) -> t (** [update set x f] calls [f n] where [n] is the current multiplicity of [x] in [set] ([0] to indicate its absence); the result of [f n] is the new multiplicity of [x]. @raise Invalid_argument if [f n < 0] - @since NEXT_RELEASE *) + @since 0.6 *) val min : t -> elt (** Minimal element w.r.t the total ordering on elements *) diff --git a/core/CCVector.mli b/core/CCVector.mli index 94a312fb..e9362f75 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -101,12 +101,12 @@ val pop_exn : ('a, rw) t -> 'a val top : ('a, _) t -> 'a option (** Top element, if present - @since NEXT_RELEASE *) + @since 0.6 *) val top_exn : ('a, _) t -> 'a (** Top element, if present @raise Failure on an empty vector - @since NEXT_RELEASE *) + @since 0.6 *) val copy : ('a,_) t -> ('a,'mut) t (** Shallow copy (may give an immutable or mutable vector) *) diff --git a/misc/mixtbl.mli b/misc/mixtbl.mli index 6e714c64..650c0ab6 100644 --- a/misc/mixtbl.mli +++ b/misc/mixtbl.mli @@ -52,7 +52,7 @@ OUnit.assert_equal None (Mixtbl.get inj_int tbl "a");; OUnit.assert_equal (Some "Bye") (Mixtbl.get inj_string tbl "a");; ]} -@since NEXT_RELEASE *) +@since 0.6 *) type 'a t (** A hash table containing values of different types.