From 2b15a21570c859376f0c176d175c3d08310ba1d1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Jun 2014 16:24:19 +0200 Subject: [PATCH] refactored CCrandom (hide fuel, too complicated, but provide a fix operator); bench_hash to compare hash combinators to the default hash function --- .merlin | 2 +- _oasis | 8 ++ core/CCRandom.ml | 191 ++++++++++++++++++++++++-------------------- core/CCRandom.mli | 120 +++++++++++----------------- misc/.merlin | 6 ++ tests/.merlin | 3 + tests/bench_hash.ml | 84 +++++++++++++++++++ 7 files changed, 253 insertions(+), 161 deletions(-) create mode 100644 misc/.merlin create mode 100644 tests/.merlin create mode 100644 tests/bench_hash.ml diff --git a/.merlin b/.merlin index 7598e01b..8d5ebfe5 100644 --- a/.merlin +++ b/.merlin @@ -9,7 +9,7 @@ B _build/string B _build/tests B _build/examples PKG oUnit -PKG bench +PKG benchmark PKG threads PKG threads.posix PKG lwt diff --git a/_oasis b/_oasis index d32eef86..bc8324f0 100644 --- a/_oasis +++ b/_oasis @@ -146,6 +146,14 @@ Executable bench_batch MainIs: bench_batch.ml BuildDepends: containers,benchmark +Executable bench_hash + Path: tests/ + Install: false + CompiledObject: native + Build$: flag(bench) && flag(misc) + MainIs: bench_hash.ml + BuildDepends: containers,containers.misc + Executable test_levenshtein Path: tests/ Install: false diff --git a/core/CCRandom.ml b/core/CCRandom.ml index 9385542e..86b1e6db 100644 --- a/core/CCRandom.ml +++ b/core/CCRandom.ml @@ -41,112 +41,129 @@ let map f g st = f (g st) let (>|=) g f st = map f g st -let choose_array a st = +let _choose_array a st = if Array.length a = 0 then invalid_arg "CCRandom.choose_array"; - a.(Random.State.int st (Array.length a)) st + a.(Random.State.int st (Array.length a)) -let choose l = choose_array (Array.of_list l) +let choose_array a st = + try Some (_choose_array a st st) with Invalid_argument _ -> None -let choose_return l = choose_array (Array.of_list (List.map return l)) +let choose l = + let a = Array.of_list l in + choose_array a -(** {2 Fuel and Backtracking} *) +let choose_exn l = + let a = Array.of_list l in + fun st -> _choose_array a st st -module Fuel = struct - type fuel = int +let choose_return l = _choose_array (Array.of_list l) - exception Backtrack +let int i st = Random.State.int st i - (* consume [d] units of fuel and return [x] if it works *) - let _consume d fuel x = - if fuel >= d then Some (fuel-d,x) else None +let small_int = int 100 - let _split i st = - if i < 2 then raise Backtrack +let int_range i j st = i + Random.State.int st (j-i+1) + +let replicate n g st = + let rec aux acc n = + if n = 0 then acc else aux (g st :: acc) (n-1) + in aux [] n + +exception SplitFail + +let _split i st = + if i < 2 then raise SplitFail + else + let j = 1 + Random.State.int st (i-1) in + (j, i-j) + +let split i st = try Some (_split i st) with SplitFail -> None + +(* partition of an int into [len] integers. We divide-and-conquer on + the expected length, until it reaches 1. *) +let split_list i ~len st = + let rec aux i ~len acc = + if i < len then raise SplitFail + else if len = 1 then i::acc else - let j = 1 + Random.State.int st (i-1) in - (j, i-j) - - let split i st = try Some (_split i st) with Backtrack -> None - - (* partition of an int into [len] integers. We divide-and-conquer on - the expected length, until it reaches 1. *) - let split_list i ~len st = - let rec aux i ~len acc = - if i < len then raise Backtrack - else if len = 1 then i::acc + (* split somewhere in the middle *) + let len1, len2 = _split len st in + assert (len = len1+len2); + if i = len + then aux len1 ~len:len1 (aux len2 ~len:len2 acc) else - (* split somewhere in the middle *) - let len1, len2 = _split len st in - if i = len - then aux len1 ~len:len1 (aux len2 ~len:len2 acc) - else - let i1, i2 = _split (i-len1-len2) st in - aux i1 ~len:len1 (aux i2 ~len:len2 acc) - in - try Some (aux i ~len []) with Backtrack -> None + let i1, i2 = _split (i-len) st in + aux (i1+len1) ~len:len1 (aux (i2+len2) ~len:len2 acc) + in + try Some (aux i ~len []) with SplitFail -> None - (** {6 Fueled Generators} *) - - type 'a t = fuel -> state -> (fuel * 'a) option - - let return x fuel _st = _consume 1 fuel x - - let return' fuel x fuel' _st = _consume fuel fuel' x - - let flat_map f g fuel st = - match g fuel st with - | None -> None - | Some (fuel, x) -> f x fuel st - - let (>>=) g f = flat_map f g - - let map f g fuel st = - match g fuel st with - | None -> None - | Some (fuel, x) -> Some (fuel, f x) - - let (>|=) g f = map f g - - let consume fuel _st = _consume 1 fuel () - - let consume' fuel fuel' _st = _consume fuel fuel' () - - let fail _fuel _st = None - - let retry ?(max=10) g fuel st = - let rec aux n = - match g fuel st with +let retry ?(max=10) g st = + let rec aux n = + match g st with | None when n=0 -> None | None -> aux (n-1) (* retry *) | Some _ as res -> res - in - aux max + in + aux max - let rec try_successively l fuel st = match l with - | [] -> None - | g :: l' -> - begin match g fuel st with - | None -> try_successively l' fuel st - | Some _ as res -> res - end +let rec try_successively l st = match l with + | [] -> None + | g :: l' -> + begin match g st with + | None -> try_successively l' st + | Some _ as res -> res + end - let () a b = try_successively [a;b] +let () a b = try_successively [a;b] - let rec fix f fuel st = f (fix f) fuel st +exception Backtrack - let lift g fuel st = _consume 1 fuel (g st) +let _choose_array_call a f st = + try + f (_choose_array a st) + with Invalid_argument _ -> raise Backtrack - let lift' d g fuel st = _consume d fuel (g st) +let fix ?(sub1=[]) ?(sub2=[]) ?(subn=[]) ~base fuel st = + let sub1 = Array.of_list sub1 + and sub2 = Array.of_list sub2 + and subn = Array.of_list subn in + (* recursive function with fuel *) + let rec make fuel st = + if fuel=0 then raise Backtrack + else if fuel=1 then base st + else + _try_otherwise 0 + [| _choose_array_call sub1 (fun f -> f (make (fuel-1)) st) + ; _choose_array_call sub2 + (fun f -> + match split fuel st with + | None -> raise Backtrack + | Some (i,j) -> f (make i) (make j) st + ) + ; _choose_array_call subn + (fun (len,f) -> + let len = len st in + match split_list fuel ~len st with + | None -> raise Backtrack + | Some l' -> + f (fun st -> List.map (fun x -> make x st) l') st + ) + ; base (* base case then *) + |] + and _try_otherwise i a = + if i=Array.length a then raise Backtrack + else try + a.(i) st + with Backtrack -> + _try_otherwise (i+1) a + in + make (fuel st) st - let run ?(fuel=fun st -> Random.State.int st 40) f st = - match f (fuel st) st with - | None -> None - | Some (_fuel, x) -> Some x +let pure x _st = x - exception GenFailure +let (<*>) f g st = f st (g st) + +let __default_state = Random.State.make_self_init () + +let run ?(st=__default_state) g = g st - let run_exn ?fuel f st = - match run ?fuel f st with - | None -> raise GenFailure - | Some x -> x -end diff --git a/core/CCRandom.mli b/core/CCRandom.mli index 22b48ca5..fcf00d42 100644 --- a/core/CCRandom.mli +++ b/core/CCRandom.mli @@ -45,98 +45,72 @@ val map : ('a -> 'b) -> 'a t -> 'b t val (>|=) : 'a t -> ('a -> 'b) -> 'b t -val choose : 'a t list -> 'a t -(** Choose a generator within the list. +val choose : 'a t list -> 'a option t +(** Choose a generator within the list. *) + +val choose_exn : 'a t list -> 'a t +(** Same as {!choose} but without option. @raise Invalid_argument if the list is empty *) -val choose_array : 'a t array -> 'a t +val choose_array : 'a t array -> 'a option t val choose_return : 'a list -> 'a t (** Choose among the list @raise Invalid_argument if the list is empty *) -(** {2 Fuel and Backtracking} *) +val replicate : int -> 'a t -> 'a list t -module Fuel : sig - type fuel = int - (** The fuel is a value that represents some "resource" used by the - random generator. *) +val small_int : int t - val split : fuel -> (fuel * fuel) option t - (** Split a (fuel) value [n] into [n1,n2] where [n = n1 + n2]. - @return [None] if the value is too small *) +val int : int -> int t - val split_list : fuel -> len:int -> fuel list option t - (** Split a (fuel) value [n] into a list of values whose sum is [n] - and whose length is [length]. - @return [None] if the value is too small *) +val int_range : int -> int -> int t +(** Inclusive range *) - (** {6 Fueled Generators} *) +val split : int -> (int * int) option t +(** Split a positive value [n] into [n1,n2] where [n = n1 + n2]. + @return [None] if the value is too small *) - type 'a t = fuel -> state -> (fuel * 'a) option - (** Fueled generators use some fuel to generate a value. - Can fail by lack of fuel. *) +val split_list : int -> len:int -> int list option t +(** Split a value [n] into a list of values whose sum is [n] + and whose length is [length]. + @return [None] if the value is too small *) - val return : 'a -> 'a t - (** [return x] is the generator that always returns [x], and consumes one - fuel doing it. *) +val retry : ?max:int -> 'a option t -> 'a option t +(** [retry g] calls [g] until it returns some value, or until the maximum + number of retries was reached. If [g] fails, + then it counts for one iteration, and the generator retries. + @param max: maximum number of retries. Default [10] *) - val return' : fuel -> 'a -> 'a t - (** [return' f x] returns [x] but also consumes [fuel]. *) +val try_successively : 'a option t list -> 'a option t +(** [try_successively l] tries each generator of [l], one after the other. + If some generator succeeds its result is returned, else the + next generator is tried *) - val flat_map : ('a -> 'b t) -> 'a t -> 'b t +val () : 'a option t -> 'a option t -> 'a option t +(** [a b] is a choice operator. It first tries [a], and returns its + result if successful. If [a] fails, then [b] is returned. *) - val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +val fix : + ?sub1:('a t -> 'a t) list -> + ?sub2:('a t -> 'a t -> 'a t) list -> + ?subn:(int t * ('a list t -> 'a t)) list -> + base:'a t -> int t -> 'a t +(** Recursion combinators, for building recursive values. + The integer generator is used to provide fuel. The [sub_] generators + should use their arguments only once! + @param sub1 cases that recurse on one value + @param sub2 cases that use the recursive gen twice + @param subn cases that use a list of recursive cases *) - val map : ('a -> 'b) -> 'a t -> 'b t +(** {6 Applicative} *) - val (>|=) : 'a t -> ('a -> 'b) -> 'b t +val pure : 'a -> 'a t - val consume : unit t - (** Consume one fuel value *) +val (<*>) : ('a -> 'b) t -> 'a t -> 'b t - val consume' : fuel -> unit t - (** Consume the given amount of fuel *) +(** {6 Run a generator} *) - val fail : 'a t - (** Always fails. *) +val run : ?st:state -> 'a t -> 'a +(** Using a random state (possibly the one in argument) run a generator *) - val retry : ?max:int -> 'a t -> 'a t - (** [retry g] calls [g] until it returns some value, or until the maximum - number of retries was reached. If [g] fails, - then it counts for one iteration, and the generator retries. - @param max: maximum number of retries. Default [10] *) - - val try_successively : 'a t list -> 'a t - (** [try_successively l] tries each generator of [l], one after the other. - If some generator succeeds its result is returned, else the - next generator is tried *) - - val () : 'a t -> 'a t -> 'a t - (** [a b] is a choice operator. It first tries [a], and returns its - result if successful. If [a] fails, then [b] is returned. *) - - val fix : ('a t -> 'a t) -> 'a t - (** Recursion combinators, for building (fueled) recursive values *) - - val lift : 'a random_gen -> 'a t - (** lifts a regular random generator into a fueled one, that consumes - one fuel unit *) - - val lift' : fuel -> 'a random_gen -> 'a t - (** lifts a regular random generator into a fueled one, that consumes - one fuel unit *) - - (** {6 Running} *) - - val run : ?fuel:fuel random_gen -> 'a t -> 'a option random_gen - (** Run the given fueled generator with an amount of fuel - given by the [fuel] generator. - @return None if the *) - - exception GenFailure - - val run_exn : ?fuel:fuel random_gen -> 'a t -> 'a random_gen - (** Same as {!run}, but in case of failure it raises an exception. - @raise GenFailure in case the generator fails *) -end diff --git a/misc/.merlin b/misc/.merlin new file mode 100644 index 00000000..cc64b0c4 --- /dev/null +++ b/misc/.merlin @@ -0,0 +1,6 @@ +REC +S ../core +S . +B ../_build/core/ +B ../_build/misc/ +PKG core diff --git a/tests/.merlin b/tests/.merlin new file mode 100644 index 00000000..c8fb82a3 --- /dev/null +++ b/tests/.merlin @@ -0,0 +1,3 @@ +S . +B ../_build/tests/ +REC diff --git a/tests/bench_hash.ml b/tests/bench_hash.ml new file mode 100644 index 00000000..c17f3969 --- /dev/null +++ b/tests/bench_hash.ml @@ -0,0 +1,84 @@ +(** Test hash functions *) + +type tree = + | Empty + | Node of int * tree list + +let mk_node i l = Node (i,l) + +let random_tree = + CCRandom.(fix + ~base:(return Empty) + ~subn:[int 10, (fun sublist -> pure mk_node <*> small_int <*> sublist)] + (int_range 15 150) + ) + +let random_list = + CCRandom.( + int 5 >>= fun len -> + CCList.random_len len random_tree + ) + +let rec eq t1 t2 = match t1, t2 with + | Empty, Empty -> true + | Node(i1,l1), Node (i2,l2) -> i1=i2 && CCList.equal eq l1 l2 + | Node _, _ + | _, Node _ -> false + +let rec hash_tree t h = match t with + | Empty -> CCHash.string_ "empty" h + | Node (i, l) -> + h |> CCHash.string_ "node" |> CCHash.int_ i |> CCHash.list_ hash_tree l + +module Box = Containers_misc.PrintBox + +let tree2box = Box.mk_tree + (function + | Empty -> Box.empty, [] + | Node (i,l) -> Box.line (CCPrint.sprintf "node %d" i), l + ) + +let l = CCRandom.(run (CCList.random random_list)) + +let pp_list buf l = + let box = Box.(frame (vlist ~bars:true (List.map tree2box l))) in + CCPrint.string buf (Box.to_string box) + +(* print some terms *) +let () = + List.iter + (fun l -> CCPrint.printf "%a\n" pp_list l) l + + +module H = Hashtbl.Make(struct + type t = tree + let equal = eq + let hash = CCHash.apply hash_tree +end) + +let print_hashcons_stats st = + let open Hashtbl in + CCPrint.printf + "tbl stats: %d elements, num buckets: %d, max bucket: %d\n" + st.num_bindings st.num_buckets st.max_bucket_length; + Array.iteri + (fun i n -> CCPrint.printf " %d\t buckets have length %d\n" n i) + st.bucket_histogram + +let () = + let st = Random.State.make_self_init () in + let n = 50_000 in + CCPrint.printf "generate %d elements...\n" n; + let l = CCRandom.run ~st (CCList.random_len n random_tree) in + (* with custom hashtable *) + CCPrint.printf "### custom hashtable\n"; + let tbl = H.create 256 in + List.iter (fun t -> H.replace tbl t ()) l; + print_hashcons_stats (H.stats tbl); + (* with default hashtable *) + CCPrint.printf "### default hashtable\n"; + let tbl' = Hashtbl.create 256 in + List.iter (fun t -> Hashtbl.replace tbl' t ()) l; + print_hashcons_stats (Hashtbl.stats tbl'); + () +