From b2450a771b65f2a2b781fa2499798215523bd7a0 Mon Sep 17 00:00:00 2001 From: Nicolas Braud-Santoni Date: Fri, 23 May 2014 10:20:52 +0200 Subject: [PATCH 01/52] AVL: Minor edit There was a let that was uselessly rec --- misc/AVL.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/AVL.ml b/misc/AVL.ml index 15f3f8ed..8fcc0136 100644 --- a/misc/AVL.ml +++ b/misc/AVL.ml @@ -355,7 +355,7 @@ module Gen = struct let _gen stack = let stack = ref stack in - let rec next () = + let next () = match !stack with | [] -> None | l -> From 12beb9051421202359bb6099a4d6141093e3b8da Mon Sep 17 00:00:00 2001 From: Nicolas Braud-Santoni Date: Fri, 23 May 2014 10:38:15 +0200 Subject: [PATCH 02/52] AVL: Change API not to carry around comparison functions --- misc/AVL.ml | 148 +++++++++++++++++++++++++++++++-------------------- misc/AVL.mli | 36 +++++++------ 2 files changed, 109 insertions(+), 75 deletions(-) diff --git a/misc/AVL.ml b/misc/AVL.ml index 8fcc0136..97e53436 100644 --- a/misc/AVL.ml +++ b/misc/AVL.ml @@ -28,13 +28,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. See https://en.wikipedia.org/wiki/AVL_tree *) -type ('a,'b) t = - | Empty - | Node of ('a,'b) t * 'a * 'b * ('a,'b) t * int - type 'a comparator = 'a -> 'a -> int -let empty = Empty +type ('a,'b) tree = + | Empty + | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int + +type ('a,'b) t = + { cmp: 'a comparator; + t: ('a,'b) tree } + +let empty ~cmp = { cmp; t=Empty } let _height = function | Empty -> 0 @@ -46,7 +50,8 @@ let _balance l r = _height l - _height r let _make l x y r = Node (l, x, y, r, 1 + max (_height l) (_height r)) -let singleton k v = _make empty k v empty +let _singleton k v = _make Empty k v Empty +let singleton ~cmp k v = { cmp; t = _singleton k v } (* balance tree [t] *) let _rebalance t = match t with @@ -93,32 +98,40 @@ let _rebalance t = match t with let _make_balance l k v r = _rebalance (_make l k v r) -let rec fold f acc t = match t with +let rec _fold f acc t = match t with | Empty -> acc | Node (l, x, y, r, _) -> - let acc = fold f acc l in + let acc = _fold f acc l in let acc = f acc x y in - fold f acc r + _fold f acc r -let rec for_all p t = match t with +let fold f acc {t; _} = _fold f acc t + +let rec _for_all p t = match t with | Empty -> true | Node (l, x, y, r, _) -> - p x y && for_all p l && for_all p r + p x y && _for_all p l && _for_all p r -let rec exists p t = match t with +let for_all p {t; _} = _for_all p t + +let rec _exists p t = match t with | Empty -> false | Node (l, x, y, r, _) -> - p x y || exists p l || exists p r + p x y || _exists p l || _exists p r -let rec insert ~cmp t k v = match t with - | Empty -> _make empty k v empty +let exists p {t; _} = _exists p t + +let rec _insert ~cmp t k v = match t with + | Empty -> _make Empty k v Empty | Node (l, k1, v1, r, _) -> let c = cmp k k1 in if c < 0 - then _make_balance (insert ~cmp l k v) k1 v1 r + then _make_balance (_insert ~cmp l k v) k1 v1 r else if c = 0 then _make l k v r - else _make_balance l k1 v1 (insert ~cmp r k v) + else _make_balance l k1 v1 (_insert ~cmp r k v) + +let insert {cmp; t} k v = {cmp; t=_insert ~cmp t k v} (* remove the maximal value in the given tree (the only which only has a left child), and return its key/value pair *) @@ -132,7 +145,7 @@ let rec _remove_max t = match t with exception NoSuchElement -let remove ~cmp t key = +let _remove ~cmp t key = let rec _remove t = match t with | Empty -> raise NoSuchElement | Node (l, k, v, r, _) -> @@ -153,30 +166,34 @@ let remove ~cmp t key = try _remove t with NoSuchElement -> t (* element not found *) -let update ~cmp t key f = failwith "update: not implemented" +let remove {cmp; t} k = {cmp; t=_remove ~cmp t k} -let rec find_exn ~cmp t key = match t with +let _update ~cmp t key f = failwith "update: not implemented" +let update {cmp; t} = _update ~cmp t + +let rec _find_exn ~cmp t key = match t with | Empty -> raise Not_found | Node (l, k, v, r, _) -> let c = cmp key k in - if c < 0 then find_exn ~cmp l key - else if c > 0 then find_exn ~cmp r key + if c < 0 then _find_exn ~cmp l key + else if c > 0 then _find_exn ~cmp r key else v +let find_exn {cmp; t} = _find_exn ~cmp t -let find ~cmp t key = - try Some (find_exn ~cmp t key) +let find t key = + try Some (find_exn t key) with Not_found -> None (* add k,v as strictly maximal element to t. [t] must not contain any key >= k *) let rec _add_max k v t = match t with - | Empty -> singleton k v + | Empty -> _singleton k v | Node (l, k', v', r, _) -> _make_balance l k' v' (_add_max k v r) -and + (* same for minimal value *) -_add_min k v t = match t with - | Empty -> singleton k v +let rec _add_min k v t = match t with + | Empty -> _singleton k v | Node (l, k', v', r, _) -> _make_balance (_add_min k v l) k' v' r @@ -204,42 +221,51 @@ let _concat t1 t2 = match t1, t2 with let t1', k, v = _remove_max t1 in _join t1' k v t2 -let rec split ~cmp t key = match t with - | Empty -> empty, None, empty +let rec _split ~cmp t key = match t with + | Empty -> Empty, None, Empty | Node (l, k, v, r, _) -> let c = cmp key k in if c < 0 then - let ll, result, lr = split ~cmp l key in + let ll, result, lr = _split ~cmp l key in ll, result, _join lr k v r else if c > 0 then - let rl, result, rr = split ~cmp r key in + let rl, result, rr = _split ~cmp r key in _join l k v rl, result, rr else l, Some v, r +let split {cmp; t} k = + let (t,b,t') = _split ~cmp t k in + {cmp; t}, b, {cmp; t=t'} + (* if k = Some v, join l k v r, else concat l v *) let _concat_or_join l k result r = match result with | None -> _concat l r | Some v -> _join l k v r -let rec merge ~cmp f t1 t2 = match t1, t2 with - | Empty, Empty -> empty +let rec _merge ~cmp f t1 t2 = match t1, t2 with + | Empty, Empty -> Empty | Node (l1, k1, v1, r1, h1), _ when h1 >= _height t2 -> - let l2, result2, r2 = split ~cmp t2 k1 in + let l2, result2, r2 = _split ~cmp t2 k1 in let result = f k1 (Some v1) result2 in - let l = merge ~cmp f l1 l2 in - let r = merge ~cmp f r1 r2 in + let l = _merge ~cmp f l1 l2 in + let r = _merge ~cmp f r1 r2 in _concat_or_join l k1 result r | _, Node (l2, k2, v2, r2, _) -> - let l1, result1, r1 = split ~cmp t1 k2 in + let l1, result1, r1 = _split ~cmp t1 k2 in let result = f k2 result1 (Some v2) in - let l = merge ~cmp f l1 l2 in - let r = merge ~cmp f r1 r2 in + let l = _merge ~cmp f l1 l2 in + let r = _merge ~cmp f r1 r2 in _concat_or_join l k2 result r | _, Empty -> assert false (* h1 < heigth h2?? *) +let merge f {cmp; t} {cmp=cmp'; t=t'} = + if(cmp != cmp') then invalid_arg "AVL.merge: trees wit different + comparison function"; + {cmp; t = _merge ~cmp f t t'} + (* invariant: balanced *) let rec invariant_balance t = match t with | Empty -> true @@ -253,13 +279,13 @@ let rec invariant_search ~cmp t = match t with | Node (l, x, _, r, _) -> invariant_search ~cmp l && invariant_search ~cmp r && - for_all (fun x' _ -> cmp x' x < 0) l && - for_all (fun x' _ -> cmp x' x > 0) r + _for_all (fun x' _ -> cmp x' x < 0) l && + _for_all (fun x' _ -> cmp x' x > 0) r let of_list ~cmp l = - List.fold_left (fun acc (x,y) -> insert ~cmp acc x y) empty l + {cmp; t = List.fold_left (fun acc (x,y) -> _insert ~cmp acc x y) Empty l} -let to_list t = +let to_list {t; _} = let rec aux acc t = match t with | Empty -> acc | Node (l, k, v, r, _) -> @@ -273,15 +299,15 @@ let to_list t = module type ITERATOR = sig type 'a iter - val after : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val before : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val iter : ('a,'b) t -> ('a * 'b) iter - val add : cmp:'a comparator -> ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t + val after : ('a,'b) t -> 'a -> ('a * 'b) iter + val before : ('a,'b) t -> 'a -> ('a * 'b) iter + val iter : ('a,'b) t -> ('a * 'b) iter + val add : ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t end type ('a,'b) explore = | Yield of 'a * 'b - | Explore of ('a, 'b) t + | Explore of ('a, 'b) tree exception EndOfIter @@ -338,16 +364,18 @@ module KList = struct let k, v, l' = _pop l in `Cons ((k,v), _next l') - let iter t = _next (_push t []) () + let iter {t; _} = _next (_push t []) () - let rec add ~cmp t (l:'a t) = match l with + let rec _add ~cmp t (l:'a t) = match l with | `Nil -> t | `Cons ((k,v), l') -> - add ~cmp (insert ~cmp t k v) (l' ()) + _add ~cmp (_insert ~cmp t k v) (l' ()) - let after ~cmp t key = _next (_after ~cmp [] t key) () + let add {cmp; t} l = {cmp; t=_add ~cmp t l} - let before ~cmp t key = _next (_before ~cmp [] t key) () + let after {cmp; t} key = _next (_after ~cmp [] t key) () + + let before {cmp; t} key = _next (_before ~cmp [] t key) () end module Gen = struct @@ -364,13 +392,15 @@ module Gen = struct Some (k, v) in next - let iter t = _gen (_push t []) + let iter {t; _} = _gen (_push t []) - let rec add ~cmp t gen = + let rec _add ~cmp t gen = match gen() with | None -> t - | Some (k,v) -> add ~cmp (insert ~cmp t k v) gen + | Some (k,v) -> _add ~cmp (_insert ~cmp t k v) gen - let after ~cmp t key = _gen (_after ~cmp [] t key) - let before ~cmp t key = _gen (_before ~cmp [] t key) + let add {cmp; t} l = {cmp; t=_add ~cmp t l} + + let after {cmp; t} key = _gen (_after ~cmp [] t key) + let before {cmp; t} key = _gen (_before ~cmp [] t key) end diff --git a/misc/AVL.mli b/misc/AVL.mli index 926eefa1..788f2aa1 100644 --- a/misc/AVL.mli +++ b/misc/AVL.mli @@ -26,16 +26,20 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 AVL trees} *) -type ('a,'b) t = private - | Empty - | Node of ('a,'b) t * 'a * 'b * ('a,'b) t * int - type 'a comparator = 'a -> 'a -> int -val empty : ('a,'b) t +type ('a,'b) tree = private + | Empty + | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int + +type ('a,'b) t = private + { cmp: 'a comparator; + t: ('a,'b) tree } + +val empty : cmp:'a comparator -> ('a,'b) t (** Empty tree *) -val singleton : 'a -> 'b -> ('a,'b) t +val singleton : cmp:'a comparator -> 'a -> 'b -> ('a,'b) t (** Tree with a single node *) val fold : ('c -> 'a -> 'b -> 'c) -> 'c -> ('a,'b) t -> 'c @@ -44,29 +48,29 @@ val fold : ('c -> 'a -> 'b -> 'c) -> 'c -> ('a,'b) t -> 'c val for_all : ('a -> 'b -> bool) -> ('a,'b) t -> bool val exists : ('a -> 'b -> bool) -> ('a,'b) t -> bool -val find : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b option +val find : ('a,'b) t -> 'a -> 'b option (** Find the value associated to the key, if any *) -val find_exn : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b +val find_exn : ('a,'b) t -> 'a -> 'b (** @raise Not_found if the key is not present *) -val insert : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b -> ('a,'b) t +val insert : ('a,'b) t -> 'a -> 'b -> ('a,'b) t (** Insertion in the tree *) -val remove : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a,'b) t +val remove : ('a,'b) t -> 'a -> ('a,'b) t (** Removal from the tree *) -val update : cmp:'a comparator -> ('a,'b) t -> 'a -> +val update : ('a,'b) t -> 'a -> ('b option -> ('a * 'b) option) -> ('a,'b) t (** Update of the given key binding (subsumes [insert] and [remove]) *) -val split : cmp:'a comparator -> ('a,'b) t -> 'a -> +val split : ('a,'b) t -> 'a -> ('a,'b) t * 'b option * ('a,'b) t (** [split ~cmp t k] splits [t] into a left part that is smaller than [k], the possible binding of [k], and a part bigger than [k]. *) -val merge : cmp:'a comparator -> +val merge : ('a -> 'b option -> 'c option -> 'd option) -> ('a,'b) t -> ('a,'c) t -> ('a,'d) t (** Merge two trees together, with the given function *) @@ -82,10 +86,10 @@ val to_list : ('a,'b) t -> ('a * 'b) list module type ITERATOR = sig type 'a iter - val after : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val before : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter + val after : ('a,'b) t -> 'a -> ('a * 'b) iter + val before : ('a,'b) t -> 'a -> ('a * 'b) iter val iter : ('a,'b) t -> ('a * 'b) iter - val add : cmp:'a comparator -> ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t + val add : ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t end module KList : sig From 8ba39cb32d55e3ae99d9bc705e04bb6b001a56b1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 26 May 2014 19:09:06 +0200 Subject: [PATCH 03/52] update readme --- README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d1703755..4aac41da 100644 --- a/README.md +++ b/README.md @@ -52,13 +52,15 @@ structures comprise (some modules in `misc/`, some other in `core/`): - `CCLeftistheap`, a polymorphic heap structure. - `CCFQueue`, a purely functional queue structure - `CCBV`, mutable bitvectors -- `CCPersistentHashtbl`, a semi-persistent hashtable (similar - to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) +- `CCPersistentHashtbl`, a semi-persistent hashtable (similar to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) - `CCVector`, a growable array (pure OCaml, no C) - -- `CCGen` and `CCSequence`, generic iterators structures (with structural types - so they can be defined in several places). Now also in their own - repository and opam packages (`gen` and `sequence`). +- `CCGen` and `CCSequence`, generic iterators structures (with structural types so they can be defined in several places). Now also in their own repository and opam packages (`gen` and `sequence`). +- `CCKlist`, another iterator structure +- `CCList`, functions and lists including tail-recursive implementations of `map` and `append` +- `CCArray`, utilities on arrays +- `CCInt`, `CCPair`, `CCOpt`, `CCFun`, `CCBool`, utilities on basic types +- `CCPrint`, printing combinators +- `CCHash`, hashing combinators ### Misc From ffcdfa8625028530269e9ae02203e0418f43931c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 26 May 2014 21:26:45 +0200 Subject: [PATCH 04/52] udpate the type of CCKlist --- core/CCKList.ml | 103 +++++++++++++++++++++++++++-------------------- core/CCKList.mli | 8 ++-- 2 files changed, 64 insertions(+), 47 deletions(-) diff --git a/core/CCKList.ml b/core/CCKList.ml index 337310f4..05656a9c 100644 --- a/core/CCKList.ml +++ b/core/CCKList.ml @@ -25,31 +25,29 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) - -type + 'a t = +type + 'a t = unit -> [ `Nil - | `Cons of 'a * (unit -> 'a t) + | `Cons of 'a * 'a t ] -let nil = `Nil -let _nil () = nil -let cons a b = `Cons (a,b) +let nil () = `Nil +let cons a b () = `Cons (a,b) let empty = nil -let singleton x = `Cons (x, fun () -> `Nil) +let singleton x () = `Cons (x, nil) -let is_empty = function +let is_empty l = match l () with | `Nil -> true | `Cons _ -> false let to_list l = - let rec direct i (l:'a t) = match l with + let rec direct i (l:'a t) = match l () with | `Nil -> [] | _ when i=0 -> safe [] l - | `Cons (x, f) -> x :: direct (i-1) (f ()) - and safe acc l = match l with + | `Cons (x, f) -> x :: direct (i-1) f + and safe acc l = match l () with | `Nil -> List.rev acc - | `Cons (x,l') -> safe (x::acc) (l' ()) + | `Cons (x,l') -> safe (x::acc) l' in direct 200 l @@ -57,87 +55,104 @@ let of_list l = let rec aux l () = match l with | [] -> `Nil | x::l' -> `Cons (x, aux l') - in aux l () + in aux l type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -let rec to_seq res k = match res with +let rec to_seq res k = match res () with | `Nil -> () - | `Cons (s, f) -> k s; to_seq (f ()) k + | `Cons (s, f) -> k s; to_seq f k let to_gen l = let l = ref l in fun () -> - match !l with + match !l () with | `Nil -> None | `Cons (x,l') -> - l := l' (); + l := l'; Some x -let rec fold f acc res = match res with +let rec fold f acc res = match res () with | `Nil -> acc - | `Cons (s, cont) -> fold f (f acc s) (cont ()) + | `Cons (s, cont) -> fold f (f acc s) cont -let rec iter f l = match l with +let rec iter f l = match l () with | `Nil -> () - | `Cons (x, l') -> f x; iter f (l' ()) + | `Cons (x, l') -> f x; iter f l' let length l = fold (fun acc _ -> acc+1) 0 l -let rec take n (l:'a t):'a t = match l with +let rec take n (l:'a t) () = match l () with | _ when n=0 -> `Nil | `Nil -> `Nil - | `Cons (x,l') -> `Cons (x, fun () -> take (n-1) (l' ())) + | `Cons (x,l') -> `Cons (x, take (n-1) l') -let rec drop n (l:'a t) = match l with - | _ when n=0 -> l +let rec drop n (l:'a t) () = match l () with + | l' when n=0 -> l' | `Nil -> `Nil - | `Cons (_,l') -> drop (n-1) (l'()) + | `Cons (_,l') -> drop (n-1) l' () -let rec map f l = match l with +(*$Q + (Q.pair (Q.list Q.small_int) Q.small_int) (fun (l,n) -> \ + let s = of_list l in let s1, s2 = take n s, drop n s in \ + append s1 s2 |> to_list = l ) +*) + +let rec map f l () = match l () with | `Nil -> `Nil - | `Cons (x, l') -> `Cons (f x, fun () -> map f (l' ())) + | `Cons (x, l') -> `Cons (f x, map f l') -let rec fmap f (l:'a t):'b t = match l with +(*$T + (map ((+) 1) (1 -- 5) |> to_list) = (2 -- 6 |> to_list) +*) + +let rec fmap f (l:'a t) () = match l() with | `Nil -> `Nil | `Cons (x, l') -> begin match f x with - | None -> fmap f (l' ()) - | Some y -> `Cons (y, fun () -> fmap f (l' ())) + | None -> fmap f l' () + | Some y -> `Cons (y, fmap f l') end -let rec filter p l = match l with +(*$T + fmap (fun x -> if x mod 2=0 then Some (x*3) else None) (1--10) |> to_list \ + = [6;12;18;24;30] +*) + +let rec filter p l () = match l () with | `Nil -> `Nil | `Cons (x, l') -> if p x - then `Cons (x, fun () -> filter p (l' ())) - else filter p (l' ()) + then `Cons (x, filter p l') + else filter p l' () -let rec append l1 l2 = match l1 with - | `Nil -> l2 - | `Cons (x, l1') -> `Cons (x, fun () -> append (l1' ()) l2) +let rec append l1 l2 () = match l1 () with + | `Nil -> l2 () + | `Cons (x, l1') -> `Cons (x, append l1' l2) -let rec flat_map f l = match l with +let rec flat_map f l () = match l () with | `Nil -> `Nil | `Cons (x, l') -> - _flat_map_app f (f x) (l' ()) () -and _flat_map_app f l l' () = match l with - | `Nil -> flat_map f l' + _flat_map_app f (f x) l' () +and _flat_map_app f l l' () = match l () with + | `Nil -> flat_map f l' () | `Cons (x, tl) -> - `Cons (x, _flat_map_app f (tl ()) l') + `Cons (x, _flat_map_app f tl l') let flatten l = flat_map (fun x->x) l let range i j = let rec aux i j () = - if i=j then cons i _nil + if i=j then `Cons(i, nil) else if i to_list = [0;1;2;3;4;5] range 0 0 |> to_list = [0] range 5 2 |> to_list = [5;4;3;2] *) + +let (--) = range diff --git a/core/CCKList.mli b/core/CCKList.mli index 0975a24c..ec3a8ec9 100644 --- a/core/CCKList.mli +++ b/core/CCKList.mli @@ -25,16 +25,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) -type + 'a t = +type + 'a t = unit -> [ `Nil - | `Cons of 'a * (unit -> 'a t) + | `Cons of 'a * 'a t ] val nil : 'a t val empty : 'a t -val cons : 'a -> (unit -> 'a t) -> 'a t +val cons : 'a -> 'a t -> 'a t val singleton : 'a -> 'a t @@ -75,3 +75,5 @@ val flat_map : ('a -> 'b t) -> 'a t -> 'b t val flatten : 'a t t -> 'a t val range : int -> int -> int t + +val (--) : int -> int -> int t From 9a10d477eea0a80675b2ef6a7bb869fd62443989 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 26 May 2014 21:55:29 +0200 Subject: [PATCH 05/52] update the klist type in other modules --- core/CCHash.ml | 7 +++---- core/CCHash.mli | 2 +- core/CCList.ml | 12 ++++++------ core/CCList.mli | 2 +- misc/AVL.ml | 14 +++++++------- misc/AVL.mli | 2 +- misc/CSM.ml | 18 ++++++++++++++++-- misc/CSM.mli | 15 +++++++++++---- string/levenshtein.ml | 23 +++++++++++++++-------- string/levenshtein.mli | 6 +++--- 10 files changed, 64 insertions(+), 37 deletions(-) diff --git a/core/CCHash.ml b/core/CCHash.ml index af0b9a3b..e485d228 100644 --- a/core/CCHash.ml +++ b/core/CCHash.ml @@ -58,7 +58,7 @@ let hash_triple h1 h2 h3 (x,y,z) = (h1 x) <<>> (h2 y) <<>> (h3 z) type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] let hash_seq f h seq = let h = ref h in @@ -70,7 +70,6 @@ let rec hash_gen f h g = match g () with | Some x -> hash_gen f (h <<>> f x) g -let rec hash_klist f h l = match l with +let rec hash_klist f h l = match l () with | `Nil -> h - | `Cons (x,l') -> hash_klist f (h <<>> f x) (l' ()) - + | `Cons (x,l') -> hash_klist f (h <<>> f x) l' diff --git a/core/CCHash.mli b/core/CCHash.mli index 2233e240..e250ed10 100644 --- a/core/CCHash.mli +++ b/core/CCHash.mli @@ -57,7 +57,7 @@ val hash_triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) ha type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] val hash_seq : 'a hash_fun -> t -> 'a sequence hash_fun val hash_gen : 'a hash_fun -> t -> 'a gen hash_fun diff --git a/core/CCList.ml b/core/CCList.ml index 0d446ed8..a60428db 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -388,7 +388,7 @@ end type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit @@ -422,17 +422,17 @@ let to_klist l = let rec make l () = match l with | [] -> `Nil | x::l' -> `Cons (x, make l') - in make l () + in make l let of_klist l = let rec direct i g = if i = 0 then safe [] g - else match l with + else match l () with | `Nil -> [] - | `Cons (x,l') -> x :: direct (i-1) (l' ()) - and safe acc l = match l with + | `Cons (x,l') -> x :: direct (i-1) l' + and safe acc l = match l () with | `Nil -> List.rev acc - | `Cons (x,l') -> safe (x::acc) (l' ()) + | `Cons (x,l') -> safe (x::acc) l' in direct _direct_depth l diff --git a/core/CCList.mli b/core/CCList.mli index 1691ed75..6f9adf77 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -171,7 +171,7 @@ end type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit diff --git a/misc/AVL.ml b/misc/AVL.ml index 97e53436..592113a1 100644 --- a/misc/AVL.ml +++ b/misc/AVL.ml @@ -356,26 +356,26 @@ let rec _before~cmp stack t key = match t with else _yield k v (_push_swap l stack) module KList = struct - type 'a t = [ `Nil | `Cons of 'a * (unit -> 'a t) ] + type 'a t = unit -> [ `Nil | `Cons of 'a * 'a t ] - let rec _next (l:('a,'b) explore list) () : ('a*'b) t = match l with + let rec _next (l:('a,'b) explore list) () = match l with | [] -> `Nil | _::_ -> let k, v, l' = _pop l in `Cons ((k,v), _next l') - let iter {t; _} = _next (_push t []) () + let iter {t; _} = _next (_push t []) - let rec _add ~cmp t (l:'a t) = match l with + let rec _add ~cmp t (l:'a t) = match l () with | `Nil -> t | `Cons ((k,v), l') -> - _add ~cmp (_insert ~cmp t k v) (l' ()) + _add ~cmp (_insert ~cmp t k v) l' let add {cmp; t} l = {cmp; t=_add ~cmp t l} - let after {cmp; t} key = _next (_after ~cmp [] t key) () + let after {cmp; t} key = _next (_after ~cmp [] t key) - let before {cmp; t} key = _next (_before ~cmp [] t key) () + let before {cmp; t} key = _next (_before ~cmp [] t key) end module Gen = struct diff --git a/misc/AVL.mli b/misc/AVL.mli index 788f2aa1..5ff71ffa 100644 --- a/misc/AVL.mli +++ b/misc/AVL.mli @@ -93,7 +93,7 @@ module type ITERATOR = sig end module KList : sig - type 'a t = [ `Nil | `Cons of 'a * (unit -> 'a t) ] + type 'a t = unit -> [ `Nil | `Cons of 'a * 'a t ] include ITERATOR with type 'a iter := 'a t end diff --git a/misc/CSM.ml b/misc/CSM.ml index 158f5b3a..6d72cd7b 100644 --- a/misc/CSM.ml +++ b/misc/CSM.ml @@ -210,7 +210,7 @@ module List = struct let build state x = Some (x::state, x::state) end -module CCGen = struct +module Gen = struct type 'a gen = unit -> 'a option let map a state gen = @@ -227,7 +227,7 @@ module CCGen = struct end end -module CCSequence = struct +module Sequence = struct type 'a sequence = ('a -> unit) -> unit exception ExitSeq @@ -244,6 +244,20 @@ module CCSequence = struct with ExitSeq -> () end +module KList = struct + type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + + let rec map f state (l:'a klist) () = + match l () with + | `Nil -> `Nil + | `Cons (x, l') -> + begin match f state x with + | None -> `Nil + | Some (y, state') -> + `Cons (y, map f state' l') + end +end + (** {2 Mutable Interface} *) module Mut = struct diff --git a/misc/CSM.mli b/misc/CSM.mli index abd59af1..0f60cfca 100644 --- a/misc/CSM.mli +++ b/misc/CSM.mli @@ -26,10 +26,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Composable State Machines} This module defines state machines that should help design applications -with a more explicit control of state (e.g. for networking applications. *) +with a more explicit control of state (e.g. for networking applications). *) type ('a, 's, 'b) t = 's -> 'a -> ('b * 's) option -(** transition function that fully describes an automaton *) +(** transition function that fully describes an automaton. It returns + [None] to indicate that it stops. *) type ('a, 's, 'b) automaton = ('a, 's, 'b) t @@ -140,18 +141,24 @@ module List : sig (** build a list from its inputs *) end -module CCGen : sig +module Gen : sig type 'a gen = unit -> 'a option val map : ('a, 's, 'b) t -> 's -> 'a gen -> 'b gen end -module CCSequence : sig +module Sequence : sig type 'a sequence = ('a -> unit) -> unit val map : ('a, 's, 'b) t -> 's -> 'a sequence -> 'b sequence end +module KList : sig + type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + + val map : ('a, 's, 'b) t -> 's -> 'a klist -> 'b klist +end + (** {2 Mutable Interface} *) module Mut : sig diff --git a/string/levenshtein.ml b/string/levenshtein.ml index 9aec05c8..cf9b4f9d 100644 --- a/string/levenshtein.ml +++ b/string/levenshtein.ml @@ -37,15 +37,15 @@ module type STRING = sig end (** Continuation list *) -type 'a klist = +type 'a klist = unit -> [ | `Nil - | `Cons of 'a * (unit -> 'a klist) + | `Cons of 'a * 'a klist ] -let rec klist_to_list = function +let rec klist_to_list l = match l () with | `Nil -> [] - | `Cons (x,k) -> x :: klist_to_list (k ()) + | `Cons (x,k) -> x :: klist_to_list k module type S = sig type char_ @@ -568,7 +568,7 @@ module Make(Str : STRING) = struct let dfa = of_string ~limit s in (* traverse at index i in automaton, with [fk] the failure continuation *) - let rec traverse node i ~(fk:unit->'a klist) = + let rec traverse node i ~(fk:'a klist) () = match node with | Node (opt, m) -> (* all alternatives: continue exploring [m], or call [fk] *) @@ -577,7 +577,7 @@ module Make(Str : STRING) = struct (fun c node' fk -> try let next = __transition dfa i c in - (fun () -> traverse node' next ~fk) + traverse node' next ~fk with Not_found -> fk) m fk in @@ -617,13 +617,13 @@ module Make(Str : STRING) = struct fold (fun acc str v -> (str,v) :: acc) [] idx let to_klist idx = - let rec traverse node trail ~(fk:unit->(string_*'a) klist) = + let rec traverse node trail ~(fk:(string_*'a) klist) () = match node with | Node (opt, m) -> (* all alternatives: continue exploring [m], or call [fk] *) let fk = M.fold - (fun c node' fk () -> traverse node' (c::trail) ~fk) + (fun c node' fk -> traverse node' (c::trail) ~fk) m fk in match opt with @@ -650,6 +650,13 @@ end) let debug_print = debug_print output_char +(*$T + edit_distance "foo" "fo0" = 1 + edit_distance "foob" "foo" = 1 + edit_distance "yolo" "yoyo" = 1 + edit_distance "aaaaaaab" "aaaa" = 4 +*) + (* open Batteries;; let words = File.with_file_in "/usr/share/dict/cracklib-small" (fun i -> IO.read_all i |> String.nsplit ~by:"\\n");; diff --git a/string/levenshtein.mli b/string/levenshtein.mli index d571415e..9affac48 100644 --- a/string/levenshtein.mli +++ b/string/levenshtein.mli @@ -58,13 +58,13 @@ strings, we return a continuation list so that, even if there are many results, only those actually asked for are evaluated. *) type 'a klist = - [ + unit -> [ | `Nil - | `Cons of 'a * (unit -> 'a klist) + | `Cons of 'a * 'a klist ] val klist_to_list : 'a klist -> 'a list - (** Helper. *) + (** Helper for short lists. *) (** {2 Signature} From 3127f046def904862ece9f850d3550c359550e3b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 27 May 2014 13:25:15 +0200 Subject: [PATCH 06/52] remove ugly tabs --- misc/AVL.ml | 7 ++++--- misc/AVL.mli | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/misc/AVL.ml b/misc/AVL.ml index 592113a1..b28a4b8f 100644 --- a/misc/AVL.ml +++ b/misc/AVL.ml @@ -34,9 +34,10 @@ type ('a,'b) tree = | Empty | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int -type ('a,'b) t = - { cmp: 'a comparator; - t: ('a,'b) tree } +type ('a,'b) t = { + cmp: 'a comparator; + t: ('a,'b) tree +} let empty ~cmp = { cmp; t=Empty } diff --git a/misc/AVL.mli b/misc/AVL.mli index 5ff71ffa..094ace1e 100644 --- a/misc/AVL.mli +++ b/misc/AVL.mli @@ -32,9 +32,10 @@ type ('a,'b) tree = private | Empty | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int -type ('a,'b) t = private - { cmp: 'a comparator; - t: ('a,'b) tree } +type ('a,'b) t = private { + cmp: 'a comparator; + t: ('a,'b) tree +} val empty : cmp:'a comparator -> ('a,'b) t (** Empty tree *) From fd88f102169a276f6379fbbc1a2baa68aa3b2621 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 31 May 2014 12:36:15 +0200 Subject: [PATCH 07/52] safe version of fold_right --- core/CCList.ml | 20 ++++++++++++++++++++ core/CCList.mli | 3 +++ 2 files changed, 23 insertions(+) diff --git a/core/CCList.ml b/core/CCList.ml index a60428db..45a31ccc 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -61,6 +61,26 @@ let append l1 l2 = let (@) = append +let fold_right f l acc = + let rec direct i f l acc = match l with + | [] -> acc + | _ when i=0 -> safe f (List.rev l) acc + | x::l' -> + let acc = direct (i-1) f l' acc in + f x acc + and safe f l acc = match l with + | [] -> acc + | x::l' -> + let acc = f x acc in + safe f l' acc + in + direct _direct_depth f l acc + +(*$T + fold_right (+) (1 -- 1_000_000) 0 = \ + List.fold_left (+) 0 (1 -- 1_000_000) +*) + let rec compare f l1 l2 = match l1, l2 with | [], [] -> 0 | _, [] -> 1 diff --git a/core/CCList.mli b/core/CCList.mli index 6f9adf77..f06284f1 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -36,6 +36,9 @@ val append : 'a t -> 'a t -> 'a t val (@) : 'a t -> 'a t -> 'a t +val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(** Safe version of [fold_right] *) + val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool From 945325054f107ad3ed6d50cf2f51ae774b63cb6b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 3 Jun 2014 17:03:25 +0200 Subject: [PATCH 08/52] more functions for CCOpt --- core/CCOpt.ml | 16 ++++++++++++++++ core/CCOpt.mli | 13 +++++++++++++ 2 files changed, 29 insertions(+) diff --git a/core/CCOpt.ml b/core/CCOpt.ml index 7a979b62..f5e8f753 100644 --- a/core/CCOpt.ml +++ b/core/CCOpt.ml @@ -58,6 +58,10 @@ let (>>=) o f = match o with | None -> None | Some x -> f x +let flat_map f o = match o with + | None -> None + | Some x -> f x + let (<*>) f x = match f, x with | None, _ | _, None -> None @@ -70,10 +74,22 @@ let map2 f o1 o2 = match o1, o2 with | _, None -> None | Some x, Some y -> Some (f x y) +let iter f o = match o with + | None -> () + | Some x -> f x + +let fold f acc o = match o with + | None -> acc + | Some x -> f acc x + let to_list o = match o with | None -> [] | Some x -> [x] +let of_list = function + | x::_ -> Some x + | [] -> None + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a printer = Buffer.t -> 'a -> unit diff --git a/core/CCOpt.mli b/core/CCOpt.mli index 16d96f3b..c81f0081 100644 --- a/core/CCOpt.mli +++ b/core/CCOpt.mli @@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. type 'a t = 'a option val map : ('a -> 'b) -> 'a t -> 'b t +(** Transform the element inside, if any *) val maybe : ('a -> 'b) -> 'b -> 'a t -> 'b (** [maybe f x o] is [x] if [o] is [None], otherwise it's [f y] if [o = Some y] *) @@ -45,16 +46,28 @@ val return : 'a -> 'a t val (>>=) : 'a t -> ('a -> 'b t) -> 'b t (** Monadic bind *) +val flat_map : ('a -> 'b t) -> 'a t -> 'b t +(** Flip version of {!>>=} *) + val (<*>) : ('a -> 'b) t -> 'a t -> 'b t val (<$>) : ('a -> 'b) -> 'a t -> 'b t val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +val iter : ('a -> unit) -> 'a t -> unit +(** Iterate on 0 or 1 elements *) + +val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +(** Fold on 0 or 1 elements *) + (** {2 Conversion and IO} *) val to_list : 'a t -> 'a list +val of_list : 'a list -> 'a t +(** Head of list, or [None] *) + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a printer = Buffer.t -> 'a -> unit From b5fb3f0ff0f538cfe4fdee7496fd972ff4bb3764 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 3 Jun 2014 17:33:11 +0200 Subject: [PATCH 09/52] printing error in LazyGraph --- misc/lazyGraph.ml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/misc/lazyGraph.ml b/misc/lazyGraph.ml index 82a664d8..3a2f893a 100644 --- a/misc/lazyGraph.ml +++ b/misc/lazyGraph.ml @@ -577,13 +577,13 @@ module Dot = struct CCSequence.iter (function | Full.EnterVertex (v, attrs, _, _) -> - Format.fprintf formatter " @[%a [%a];@]@." pp_vertex v - (CCList.print ~sep:"," print_attribute) attrs + Format.fprintf formatter " @[%a %a;@]@." pp_vertex v + (CCList.print ~start:"[" ~stop:"]" ~sep:"," print_attribute) attrs | Full.ExitVertex _ -> () | Full.MeetEdge (v2, attrs, v1, _) -> - Format.fprintf formatter " @[%a -> %a [%a];@]@." + Format.fprintf formatter " @[%a -> %a %a;@]@." pp_vertex v1 pp_vertex v2 - (CCList.print ~sep:"," print_attribute) + (CCList.print ~start:"[" ~stop:"]" ~sep:"," print_attribute) attrs) events; (* close *) From 9ca1f76bd76754dc4bd29adf847d0274b816bbe0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 9 Jun 2014 18:10:53 +0200 Subject: [PATCH 10/52] more explicit type declaration --- core/CCList.ml | 5 +++++ misc/CSM.mli | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/core/CCList.ml b/core/CCList.ml index 45a31ccc..f6eac495 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -81,6 +81,11 @@ let fold_right f l acc = List.fold_left (+) 0 (1 -- 1_000_000) *) +(*$Q + (Q.list Q.small_int) (fun l -> \ + l = fold_right (fun x y->x::y) l []) +*) + let rec compare f l1 l2 = match l1, l2 with | [], [] -> 0 | _, [] -> 1 diff --git a/misc/CSM.mli b/misc/CSM.mli index 0f60cfca..476ef889 100644 --- a/misc/CSM.mli +++ b/misc/CSM.mli @@ -28,7 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. This module defines state machines that should help design applications with a more explicit control of state (e.g. for networking applications). *) -type ('a, 's, 'b) t = 's -> 'a -> ('b * 's) option +type ('input, 'state, 'output) t = 'state -> 'input -> ('output * 'state) option (** transition function that fully describes an automaton. It returns [None] to indicate that it stops. *) From 39f5e135bd43961c85690354378568ab1065ce2e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 00:47:42 +0200 Subject: [PATCH 11/52] crazy input-driven parser combinators --- _oasis | 2 +- misc/parseReact.ml | 235 ++++++++++++++++++++++++++++++++++++++++++++ misc/parseReact.mli | 113 +++++++++++++++++++++ 3 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 misc/parseReact.ml create mode 100644 misc/parseReact.mli diff --git a/_oasis b/_oasis index 2e97f303..d878e97e 100644 --- a/_oasis +++ b/_oasis @@ -61,7 +61,7 @@ Library "containers_misc" UnionFind, SmallSet, AbsSet, CSM, ActionMan, QCheck, BencodeOnDisk, TTree, HGraph, Automaton, Conv, Bidir, Iteratee, - Ty, Tell, BencodeStream, RatTerm, Cause, AVL + Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers FindlibName: misc FindlibParent: containers diff --git a/misc/parseReact.ml b/misc/parseReact.ml new file mode 100644 index 00000000..1549db83 --- /dev/null +++ b/misc/parseReact.ml @@ -0,0 +1,235 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Parser combinators driven by the input} *) + +type ('a, 'b) t = + | Return : 'b -> ('a,'b) t + | Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t + | One : ('a, 'a) t + | Stop : ('a, unit) t + | Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t + | Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t + | Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t + | Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t + | Skip : ('a, unit) t + | IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t + | Fail : ('a, 'b) t + +let stop = Stop + +let return x = Return x + +let delay f = Delay f + +let return' f = Delay (fun () -> return (f ())) + +let fail = Fail + +let one = One + +let skip = Skip + +let bind f p = Bind (p, f) + +let (>>=) p f = bind f p + +let exact ?(eq=(=)) x = + one + >>= fun y -> + if eq x y then Return () else Fail + +let guard f p = Guard (p, f) + +let (>>) p1 p2 = p1 >>= fun _ -> p2 + +let map f p = Map (p, f) + +let (>>|) p f = Map (p, f) + +let (<|>) p1 p2 = Choice (p1, p2) + +let pair p1 p2 = + p1 >>= fun x1 -> + p2 >>= fun x2 -> + return (x1, x2) + +let triple p1 p2 p3 = + p1 >>= fun x1 -> + p2 >>= fun x2 -> + p3 >>= fun x3 -> + return (x1, x2, x3) + +let if_then_else p a b = IfThenElse (p, a, b) + +(** {6 Utils} *) + +let take_while pred = + let rec next acc = + if_then_else pred + (one >>= fun x -> next (x::acc)) + (return' (fun () -> List.rev acc)) + in + next [] + +let take_n n = + let rec next acc n = + if n = 0 + then return (List.rev acc) + else one >>= fun x -> next (x::acc) (n-1) + in + next [] n + +let skip_spaces = + let rec next () = + if_then_else + (fun c -> c = ' ' || c = '\t' || c = '\n') + (skip >> delay next) + (return ()) + in next () + +let ident = + let accept = function + | c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true + | c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true + | c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true + | _ -> false + in + let rec aggregate buf = + if_then_else + accept + (one >>= fun c -> Buffer.add_char buf c; aggregate buf) + (return (Buffer.contents buf)) + in + (* create buffer on demand, to avoid sharing it *) + delay (fun () -> aggregate (Buffer.create 32)) + +let many ~sep p = + let rec next acc = + (return (List.rev acc)) + <|> (p >>= fun x -> sep >> next (x::acc)) + in + next [] + +let many1 ~sep p = + let rec next acc = + p >>= fun x -> + let acc = x :: acc in + (return (List.rev acc)) + <|> (sep >> next acc) + in + next [] + +(** {6 Run} *) + +type 'a sequence = ('a -> unit) -> unit + +let _fold_seq f acc seq = + let acc = ref acc in + seq (fun x -> acc := f !acc x); + !acc + +(** Partial state during parsing: a tree of continuations *) +type (_, _) state = + | STBottom : 'b -> ('a, 'b) state + | STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state + +let (>>>) p cont = STPush (p, cont) + +let run p seq = + (* normalize the stack (do not let a "return" on top) *) + let rec reduce : type a b. (a,b)state -> (a,b) state list + = fun stack -> match stack with + | STPush (Return x, cont) -> CCList.flat_map reduce (cont x) + | STPush (Delay f, cont) -> reduce (f () >>> cont) + | STPush (Bind (p, f), cont) -> + let stack' = p >>> fun x -> [f x >>> cont] in + reduce stack' + | STPush (Choice (a, b), cont) -> + (* fork into sub-stacks *) + CCList.append (reduce (a >>> cont)) (reduce (b >>> cont)) + | STPush (Map (p, f), cont) -> + let stack' = p >>> fun x -> cont (f x) in + reduce stack' + | STPush (Guard (p, f), cont) -> + let stack' = p >>> fun x -> if f x then cont x else [] in + reduce stack' + | _ -> [stack] + in + (* consume one input token *) + let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list + = fun stack x -> match stack with + | STBottom _ -> [] (* fail *) + | STPush (Stop, _) -> [] (* fail *) + | STPush (Fail, _) -> [] (* fail *) + | STPush (One, cont) -> CCList.flat_map reduce (cont x) + | STPush (Skip, cont) -> CCList.flat_map reduce (cont ()) + | STPush (IfThenElse (p, yay, nay), cont) -> + let l = if p x + then reduce (yay >>> cont) + else reduce (nay >>> cont) + in + CCList.flat_map (fun stack -> consume_one stack x) l + | STPush (Return _, _) -> assert false + | STPush (Delay _, _) -> assert false + | STPush (Bind _, _) -> assert false + | STPush (Choice _, _) -> assert false + | STPush (Map _, _) -> assert false + | STPush (Guard _, _) -> assert false + in + (* to be called at the end of input *) + let finish : type a b. (a,b) state -> (a,b) state list + = fun stack -> match stack with + | STPush (Stop, cont) -> CCList.flat_map reduce (cont ()) + | STPush (Fail, _) -> [] + | _ -> [stack] + in + (* how to parse the input: step by step, starting with [p] as initial parser *) + let step l x = CCList.flat_map (fun p -> consume_one p x) l in + let initial_state = p >>> fun x -> [STBottom x] in + let res = _fold_seq step [initial_state] seq in + (* signal "end of input" *) + let res = CCList.flat_map finish res in + (* recover results *) + CCList.filter_map + (function + | STBottom x -> Some x + | _ -> None + ) res + +(*$R + type sexp = Atom of string | List of sexp list \ + let atom i = Atom i \ + let list_ i = List i \ + + let rec p () = + (skip_spaces >> ident >>= atom) + <|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l + >> skip_spaces >> exact ')' >> return (list_ l)) + in + run (p ()) (CCSequence.of_string "(a b (c d))") = + [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]] +*) diff --git a/misc/parseReact.mli b/misc/parseReact.mli new file mode 100644 index 00000000..da823495 --- /dev/null +++ b/misc/parseReact.mli @@ -0,0 +1,113 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Parser combinators driven by the input} *) + +type ('input, 'result) t +(** parser that takes some type as input and outputs a value of type 'result +when it's done *) + +(** {6 Basic Building Blocs} *) + +val stop : ('a, unit) t +(** Succeed exactly at the end of input *) + +val return : 'b -> ('a, 'b) t +(** Return a value *) + +val return' : (unit -> 'b) -> ('a, 'b) t +(** Suspended version of {!return}, does not evaluate yet *) + +val delay : (unit -> ('a, 'b) t) -> ('a, 'b) t +(** Delay evaluation of the parser *) + +val fail : ('a, 'b) t +(** Failure *) + +val one : ('a, 'a) t +(** Parse one value exactly *) + +val skip : ('a, unit) t +(** Ignore the next value *) + +val exact : ?eq:('a -> 'a -> bool) -> 'a -> ('a, unit) t +(** Accept one value as input exactly *) + +val guard : ('b -> bool) -> ('a, 'b) t -> ('a, 'b) t +(** Ensure the return value of the given parser satisfies the predicate. + [guard f p] will be the same as [p] if [p] returns + some [x] with [f x = true]. If [not (f x)], then [guard f p] fails. *) + +val bind : ('b -> ('a, 'c) t) -> ('a, 'b) t -> ('a, 'c) t + +val (>>=) : ('a, 'b) t -> ('b -> ('a, 'c) t) -> ('a, 'c) t + +val (>>) : ('a, 'b) t -> ('a, 'c) t -> ('a, 'c) t +(** Wait for the first parser to succeed, then switch to the second one *) + +val map : ('b -> 'c) -> ('a, 'b) t -> ('a, 'c) t +(** Map outputs *) + +val (>>|) : ('a, 'b) t -> ('b -> 'c) -> ('a, 'c) t +(** Infix version of {!map} *) + +val (<|>) : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t +(** Non-deterministic choice. Both branches are evaluated in parallel *) + +val pair : ('a,'b) t -> ('a, 'c) t -> ('a, ('b * 'c)) t +val triple : ('a,'b) t -> ('a, 'c) t -> ('a, 'd) t -> ('a, ('b * 'c * 'd)) t + +val if_then_else : ('a -> bool) -> ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t +(** Test the next input, and choose the parser based on it. Does not consume + the input token for the test *) + +(** {6 Utils} *) + +val take_while : ('a -> bool) -> ('a, 'a list) t +(** Take input while it satisfies the given predicate *) + +val take_n : int -> ('a, 'a list) t +(** Take n input elements *) + +val skip_spaces : (char, unit) t +(** Skip whitespace (space,tab,newline) *) + +val ident : (char, string) t +(** Parse identifiers (stops on whitespaces) *) + +val many : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t +(** [many ~sep p] parses as many [p] as possible, separated by [sep]. *) + +val many1 : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t + +(** {6 Run} *) + +type 'a sequence = ('a -> unit) -> unit + +val run : ('a,'b) t -> 'a sequence -> 'b list +(** List of results. Each element of the list comes from a successful + series of choices [<|>]. If no choice operator was used, the list + contains 0 or 1 elements *) From 021508968cec7d3ff21f091daa2eeb203bf4c196 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 21:53:47 +0200 Subject: [PATCH 12/52] updated test in parseReact; fixed a bug --- misc/parseReact.ml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/misc/parseReact.ml b/misc/parseReact.ml index 1549db83..d1eee788 100644 --- a/misc/parseReact.ml +++ b/misc/parseReact.ml @@ -210,7 +210,7 @@ let run p seq = (* how to parse the input: step by step, starting with [p] as initial parser *) let step l x = CCList.flat_map (fun p -> consume_one p x) l in let initial_state = p >>> fun x -> [STBottom x] in - let res = _fold_seq step [initial_state] seq in + let res = _fold_seq step (reduce initial_state) seq in (* signal "end of input" *) let res = CCList.flat_map finish res in (* recover results *) @@ -220,16 +220,18 @@ let run p seq = | _ -> None ) res -(*$R - type sexp = Atom of string | List of sexp list \ - let atom i = Atom i \ - let list_ i = List i \ +(*$R + let module S = struct type t = Atom of string | List of t list end in + let open S in + let (%) f g x = f (g x) in + let atom i = Atom i in + let list_ i = List i in let rec p () = - (skip_spaces >> ident >>= atom) - <|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l - >> skip_spaces >> exact ')' >> return (list_ l)) + (skip_spaces >> ident >>= (return % atom)) + <|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l -> + skip_spaces >> exact ')' >> return (list_ l)) in - run (p ()) (CCSequence.of_string "(a b (c d))") = - [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]] + let res = run (p ()) (CCSequence.of_str "(a b (c d))") in + assert_equal res [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]] *) From bc4089316632cb11f0ae7a07559c92b94cf16e75 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 21:54:01 +0200 Subject: [PATCH 13/52] CCList.filter --- core/CCList.ml | 13 +++++++++++++ core/CCList.mli | 3 +++ 2 files changed, 16 insertions(+) diff --git a/core/CCList.ml b/core/CCList.ml index f6eac495..1eb68c32 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -61,6 +61,19 @@ let append l1 l2 = let (@) = append +let filter p l = + let rec direct i p l = match l with + | [] -> [] + | _ when i=0 -> safe p l [] + | x::l' when not (p x) -> direct i p l' + | x::l' -> x :: direct (i-1) p l' + and safe p l acc = match l with + | [] -> List.rev acc + | x::l' when not (p x) -> safe p l' acc + | x::l' -> safe p l' (x::acc) + in + direct _direct_depth p l + let fold_right f l acc = let rec direct i f l acc = match l with | [] -> acc diff --git a/core/CCList.mli b/core/CCList.mli index f06284f1..0d0901c5 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -36,6 +36,9 @@ val append : 'a t -> 'a t -> 'a t val (@) : 'a t -> 'a t -> 'a t +val filter : ('a -> bool) -> 'a t -> 'a t +(** Safe version of {!List.filter} *) + val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b (** Safe version of [fold_right] *) From d7909200f61ac1ae85262a416d750c9dc377c752 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 21:54:20 +0200 Subject: [PATCH 14/52] operations on arrays (filter, filter_map, flat_map) --- core/CCArray.ml | 72 +++++++++++++++++++++++++++++++++++++++++++++++- core/CCArray.mli | 16 +++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 9f52f5f3..363e0301 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -32,6 +32,77 @@ let foldi f acc a = if i = Array.length a then acc else recurse (f acc i a.(i)) (i+1) in recurse acc 0 +let reverse_in_place a = + let n = Array.length a in + for i = 0 to (n-1)/2 do + let t = a.(i) in + a.(i) <- a.(n-i-1); + a.(n-i-1) <- t; + done + +(*$T + let a = [| 1; 2; 3; 4; 5 |] in \ + reverse_in_place a; \ + a = [| 5;4;3;2;1 |] +*) + +(*$T + let a = [| 1; 2; 3; 4; 5; 6 |] in \ + reverse_in_place a; \ + a = [| 6;5;4;3;2;1 |] +*) + +let filter_map f a = + let rec aux acc i = + if i = Array.length a + then ( + let a' = Array.of_list acc in + reverse_in_place a'; + a' + ) else match f a.(i) with + | None -> aux acc (i+1) + | Some x -> aux (x::acc) (i+1) + in aux [] 0 + +(*$T + filter_map (fun x -> if x mod 2 = 0 then Some (string_of_int x) else None) \ + [| 1; 2; 3; 4 |] = [| "2"; "4" |] + filter_map (fun x -> if x mod 2 = 0 then Some (string_of_int x) else None) \ + [| 1; 2; 3; 4; 5; 6 |] \ + = [| "2"; "4"; "6" |] +*) + +let filter p a = + filter_map (fun x -> if p x then Some x else None) a + +(* append [rev a] in front of [acc] *) +let rec __rev_append_list a acc i = + if i = Array.length a + then acc + else + __rev_append_list a (a.(i) :: acc) (i+1) + +let flat_map f a = + let rec aux acc i = + if i = Array.length a + then ( + let a' = Array.of_list acc in + reverse_in_place a'; + a' + ) + else + let a' = f a.(i) in + aux (__rev_append_list a' acc 0) (i+1) + in aux [] 0 + +(*$T + let a = [| 1; 3; 5 |] in \ + let a' = flat_map (fun x -> [| x; x+1 |]) a in \ + a' = [| 1; 2; 3; 4; 5; 6 |] +*) + +let (>>=) a f = flat_map f a + let for_all p a = let rec check i = i = Array.length a || (p a.(i) && check (i+1)) @@ -79,4 +150,3 @@ let pp_i ?(sep=", ") pp_item buf a = (if i > 0 then Buffer.add_string buf sep); pp_item buf i a.(i) done - diff --git a/core/CCArray.mli b/core/CCArray.mli index b520d221..beae918e 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -30,6 +30,22 @@ type 'a t = 'a array val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b (** fold left on array, with index *) +val filter : ('a -> bool) -> 'a t -> 'a t +(** Filter elements out of the array. Only the elements satisfying + the given predicate will be kept. *) + +val reverse_in_place : 'a t -> unit +(** Reverse the array in place *) + +val filter_map : ('a -> 'b option) -> 'a t -> 'b t +(** Map each element into another value, or discard it *) + +val flat_map : ('a -> 'b t) -> 'a t -> 'b t +(** transform each element into an array, then flatten *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** Infix version of {!flat_map} *) + val for_all : ('a -> bool) -> 'a t -> bool val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool From 5567b12b797ade22b463ad332a832caeb6ea39f9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 21:57:54 +0200 Subject: [PATCH 15/52] CCBatch: batch operations on collections, with some fusion optimizations to reduce the numbre of intermediate collections --- _oasis | 2 +- core/CCArray.ml | 4 ++ core/CCArray.mli | 4 ++ core/CCBatch.ml | 141 +++++++++++++++++++++++++++++++++++++++++++++++ core/CCBatch.mli | 69 +++++++++++++++++++++++ core/CCList.ml | 2 + core/CCList.mli | 2 + 7 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 core/CCBatch.ml create mode 100644 core/CCBatch.mli diff --git a/_oasis b/_oasis index d878e97e..03e0d8b9 100644 --- a/_oasis +++ b/_oasis @@ -41,7 +41,7 @@ Library "containers" Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap, CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash, - CCKList, CCInt, CCBool, CCArray + CCKList, CCInt, CCBool, CCArray, CCBatch FindlibName: containers Library "containers_string" diff --git a/core/CCArray.ml b/core/CCArray.ml index 363e0301..4dc07f13 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -27,6 +27,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. type 'a t = 'a array +let empty = [| |] + +let map = Array.map + let foldi f acc a = let rec recurse acc i = if i = Array.length a then acc else recurse (f acc i a.(i)) (i+1) diff --git a/core/CCArray.mli b/core/CCArray.mli index beae918e..b63baed5 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -27,6 +27,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. type 'a t = 'a array +val empty : 'a t + +val map : ('a -> 'b) -> 'a t -> 'b t + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b (** fold left on array, with index *) diff --git a/core/CCBatch.ml b/core/CCBatch.ml new file mode 100644 index 00000000..ce9677a6 --- /dev/null +++ b/core/CCBatch.ml @@ -0,0 +1,141 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Batch Operations on Collections} *) + +module type COLLECTION = sig + type 'a t + + val empty : 'a t + val map : ('a -> 'b) -> 'a t -> 'b t + val filter : ('a -> bool) -> 'a t -> 'a t + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flat_map : ('a -> 'b t) -> 'a t -> 'b t +end + +module type S = sig + type 'a t + + type ('a,'b) op + (** Operation that converts an ['a t] into a ['b t] *) + + val apply : ('a,'b) op -> 'a t -> 'b t + val apply' : 'a t -> ('a,'b) op -> 'b t + + (** {6 Combinators} *) + + val map : ('a -> 'b) -> ('a, 'b) op + + val filter : ('a -> bool) -> ('a,'a) op + + val filter_map : ('a -> 'b option) -> ('a,'b) op + + val flat_map : ('a -> 'b t) -> ('a,'b) op + + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op + val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op +end + +module Make(C : COLLECTION) = struct + type 'a t = 'a C.t + type (_,_) op = + | Map : ('a -> 'b) -> ('a, 'b) op + | Filter : ('a -> bool) -> ('a, 'a) op + | FilterMap : ('a -> 'b option) -> ('a,'b) op + | FlatMap : ('a -> 'b t) -> ('a,'b) op + | Compose : ('a,'b) op * ('b, 'c) op -> ('a, 'c) op + + (* right-associativity *) + let _compose f g = match f with + | Compose (f1, f2) -> Compose (f1, Compose (f2, g)) + | _ -> Compose (f, g) + + let compose f g = _compose g f + let (>>>) f g = _compose f g + + (* function composition *) + let _compose_fun f g = fun x -> g (f x) + + (* result of one step of optimization, indicates whether the object did + change or not *) + type 'a optim_result = + | Same of 'a + | New of 'a + + let _new_compose a b = New (Compose (a,b)) + + (* optimize a batch operation by fusion *) + let rec _optimize : type a b. (a,b) op -> (a,b) op + = fun op -> match op with + | Compose (a, b) -> + let a' = _optimize a + and b' = _optimize b in + _optimize_rec (Compose (a', b')) + | op -> op + (* repeat optimization until a fixpoint is reached *) + and _optimize_rec : type a b. (a,b) op -> (a,b) op + = fun op -> match _optimize_head op with + | Same _ -> op + | New op' -> _optimize_rec op' + and _optimize_head : type a b. (a,b) op -> (a,b) op optim_result + = function + | Compose (Map f, Compose (Map g, cont)) -> + _new_compose (Map (fun x -> g (f x))) cont + | Compose (Filter p, Compose (Map g, cont)) -> + _new_compose + (FilterMap (fun x -> if p x then Some (g x) else None)) cont + | Compose (Filter p, Compose (Filter p', cont)) -> + _new_compose (Filter (fun x -> p x && p' x)) cont + | Compose (Filter p, Compose (FlatMap f, cont)) -> + _new_compose (FlatMap (fun x -> if p x then f x else C.empty)) cont + | op -> + Same op (* cannot optimize *) + + let apply op a = + let rec _apply : type a b. (a,b) op -> a t -> b t + = fun op a -> match op with + | Compose (op1, op2) -> + let a' = _apply op1 a in + _apply op2 a' + | Map f -> C.map f a + | Filter p -> C.filter p a + | FlatMap f -> C.flat_map f a + | FilterMap f -> C.filter_map f a + in + (* optimize and run *) + let op' = _optimize op in + _apply op' a + + let apply' a op = apply op a + + (** {6 Combinators} *) + + let map f = Map f + let filter p = Filter p + let filter_map f = FilterMap f + let flat_map f = FlatMap f +end + diff --git a/core/CCBatch.mli b/core/CCBatch.mli new file mode 100644 index 00000000..10a88634 --- /dev/null +++ b/core/CCBatch.mli @@ -0,0 +1,69 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Batch Operations on Collections} +Define and combine operations on a collection of elements, then +run the composition of those operations on some collection. The +composition is optimized to minimize the number of intermediate +collections *) + +(** {2 Definition of a Collection} *) +module type COLLECTION = sig + type 'a t + + val empty : 'a t + val map : ('a -> 'b) -> 'a t -> 'b t + val filter : ('a -> bool) -> 'a t -> 'a t + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flat_map : ('a -> 'b t) -> 'a t -> 'b t +end + +(** {2 Definition of a Batch operations} *) +module type S = sig + type 'a t + + type ('a,'b) op + (** Operation that converts an ['a t] into a ['b t] *) + + val apply : ('a,'b) op -> 'a t -> 'b t + val apply' : 'a t -> ('a,'b) op -> 'b t + + (** {6 Combinators} *) + + val map : ('a -> 'b) -> ('a, 'b) op + + val filter : ('a -> bool) -> ('a,'a) op + + val filter_map : ('a -> 'b option) -> ('a,'b) op + + val flat_map : ('a -> 'b t) -> ('a,'b) op + + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op + val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op +end + +(** {2 Functor} *) +module Make(C : COLLECTION) : S with type 'a t = 'a C.t diff --git a/core/CCList.ml b/core/CCList.ml index 1eb68c32..8a240539 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -28,6 +28,8 @@ of this software, even if advised of the possibility of such damage. type 'a t = 'a list +let empty = [] + (* max depth for direct recursion *) let _direct_depth = 500 diff --git a/core/CCList.mli b/core/CCList.mli index 0d0901c5..7ccc71a7 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -28,6 +28,8 @@ of this software, even if advised of the possibility of such damage. type 'a t = 'a list +val empty : 'a t + val map : ('a -> 'b) -> 'a t -> 'b t (** Safe version of map *) From 40f8955b34b0eb88739d9c031ac333c16c3de679 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 22:21:29 +0200 Subject: [PATCH 16/52] updated CCBatch (more optimizations and type-safety, enforcing some structural constraints with GADT) --- core/CCBatch.ml | 103 ++++++++++++++++++++++++++++++++++------------- core/CCBatch.mli | 2 + 2 files changed, 77 insertions(+), 28 deletions(-) diff --git a/core/CCBatch.ml b/core/CCBatch.ml index ce9677a6..036860cc 100644 --- a/core/CCBatch.ml +++ b/core/CCBatch.ml @@ -47,6 +47,8 @@ module type S = sig (** {6 Combinators} *) + val id : ('a, 'a) op + val map : ('a -> 'b) -> ('a, 'b) op val filter : ('a -> bool) -> ('a,'a) op @@ -62,22 +64,21 @@ end module Make(C : COLLECTION) = struct type 'a t = 'a C.t type (_,_) op = - | Map : ('a -> 'b) -> ('a, 'b) op - | Filter : ('a -> bool) -> ('a, 'a) op - | FilterMap : ('a -> 'b option) -> ('a,'b) op - | FlatMap : ('a -> 'b t) -> ('a,'b) op - | Compose : ('a,'b) op * ('b, 'c) op -> ('a, 'c) op + | Id : ('a,'a) op + | Compose : ('a,'b) base_op * ('b, 'c) op -> ('a, 'c) op + and (_,_) base_op = + | Map : ('a -> 'b) -> ('a, 'b) base_op + | Filter : ('a -> bool) -> ('a, 'a) base_op + | FilterMap : ('a -> 'b option) -> ('a,'b) base_op + | FlatMap : ('a -> 'b t) -> ('a,'b) base_op - (* right-associativity *) - let _compose f g = match f with - | Compose (f1, f2) -> Compose (f1, Compose (f2, g)) - | _ -> Compose (f, g) - let compose f g = _compose g f - let (>>>) f g = _compose f g - - (* function composition *) - let _compose_fun f g = fun x -> g (f x) + (* associativity: put parenthesis on the right *) + let rec _compose : type a b c. (a,b) op -> (b,c) op -> (a,c) op + = fun f g -> match f with + | Compose (f1, Id) -> Compose (f1, g) + | Compose (f1, f2) -> Compose (f1, _compose f2 g) + | Id -> g (* result of one step of optimization, indicates whether the object did change or not *) @@ -91,10 +92,9 @@ module Make(C : COLLECTION) = struct let rec _optimize : type a b. (a,b) op -> (a,b) op = fun op -> match op with | Compose (a, b) -> - let a' = _optimize a - and b' = _optimize b in - _optimize_rec (Compose (a', b')) - | op -> op + let b' = _optimize b in + _optimize_rec (Compose (a, b')) + | Id -> Id (* repeat optimization until a fixpoint is reached *) and _optimize_rec : type a b. (a,b) op -> (a,b) op = fun op -> match _optimize_head op with @@ -102,24 +102,67 @@ module Make(C : COLLECTION) = struct | New op' -> _optimize_rec op' and _optimize_head : type a b. (a,b) op -> (a,b) op optim_result = function + | Id -> Same Id | Compose (Map f, Compose (Map g, cont)) -> _new_compose (Map (fun x -> g (f x))) cont + | Compose (Map f, Compose (Filter p, cont)) -> + _new_compose + (FilterMap (fun x -> let y = f x in if p y then Some y else None)) cont + | Compose (Map f, Compose (FilterMap f', cont)) -> + _new_compose + (FilterMap (fun x -> f' (f x))) cont + | Compose (Map f, Compose (FlatMap f', cont)) -> + _new_compose + (FlatMap (fun x -> f' (f x))) cont + | Compose (Filter p, Compose (Filter p', cont)) -> + _new_compose (Filter (fun x -> p x && p' x)) cont | Compose (Filter p, Compose (Map g, cont)) -> _new_compose (FilterMap (fun x -> if p x then Some (g x) else None)) cont - | Compose (Filter p, Compose (Filter p', cont)) -> - _new_compose (Filter (fun x -> p x && p' x)) cont - | Compose (Filter p, Compose (FlatMap f, cont)) -> - _new_compose (FlatMap (fun x -> if p x then f x else C.empty)) cont - | op -> + | Compose (Filter p, Compose (FilterMap f', cont)) -> + _new_compose + (FilterMap (fun x -> if p x then f' x else None)) cont + | Compose (Filter p, Compose (FlatMap f', cont)) -> + _new_compose + (FlatMap (fun x -> if p x then f' x else C.empty)) cont + | Compose (FilterMap f, Compose (FilterMap f', cont)) -> + _new_compose + (FilterMap + (fun x -> match f x with None -> None | Some y -> f' y)) + cont + | Compose (FilterMap f, Compose (Filter p, cont)) -> + _new_compose + (FilterMap + (fun x -> match f x with + | (Some y) as res when p y -> res + | _ -> None)) + cont + | Compose (FilterMap f, Compose (Map f', cont)) -> + _new_compose + (FilterMap + (fun x -> match f x with + | None -> None + | Some y -> Some (f' y))) + cont + | Compose (FilterMap f, Compose (FlatMap f', cont)) -> + _new_compose + (FlatMap + (fun x -> match f x with + | None -> C.empty + | Some y -> f' y)) + cont + | (Compose _) as op -> Same op (* cannot optimize *) let apply op a = let rec _apply : type a b. (a,b) op -> a t -> b t = fun op a -> match op with | Compose (op1, op2) -> - let a' = _apply op1 a in + let a' = _apply_base op1 a in _apply op2 a' + | Id -> a + and _apply_base : type a b. (a,b) base_op -> a t -> b t + = fun op a -> match op with | Map f -> C.map f a | Filter p -> C.filter p a | FlatMap f -> C.flat_map f a @@ -133,9 +176,13 @@ module Make(C : COLLECTION) = struct (** {6 Combinators} *) - let map f = Map f - let filter p = Filter p - let filter_map f = FilterMap f - let flat_map f = FlatMap f + let id = Id + let map f = Compose (Map f, Id) + let filter p = Compose (Filter p, Id) + let filter_map f = Compose (FilterMap f, Id) + let flat_map f = Compose (FlatMap f, Id) + + let compose f g = _compose g f + let (>>>) f g = _compose f g end diff --git a/core/CCBatch.mli b/core/CCBatch.mli index 10a88634..9931929f 100644 --- a/core/CCBatch.mli +++ b/core/CCBatch.mli @@ -53,6 +53,8 @@ module type S = sig (** {6 Combinators} *) + val id : ('a, 'a) op + val map : ('a -> 'b) -> ('a, 'b) op val filter : ('a -> bool) -> ('a,'a) op From 80522a4959bfcc3e56cf8140b6d7c12731d35999 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 22:25:59 +0200 Subject: [PATCH 17/52] expose the optimization to the user in CCBatch --- core/CCBatch.ml | 13 +++++++++++++ core/CCBatch.mli | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/core/CCBatch.ml b/core/CCBatch.ml index 036860cc..0102f163 100644 --- a/core/CCBatch.ml +++ b/core/CCBatch.ml @@ -45,6 +45,12 @@ module type S = sig val apply : ('a,'b) op -> 'a t -> 'b t val apply' : 'a t -> ('a,'b) op -> 'b t + val length : (_,_) op -> int + (** Number of intermediate structures needed to compute this operation *) + + val optimize : ('a,'b) op -> ('a,'b) op + (** Try to minimize the length of the operation *) + (** {6 Combinators} *) val id : ('a, 'a) op @@ -154,6 +160,13 @@ module Make(C : COLLECTION) = struct | (Compose _) as op -> Same op (* cannot optimize *) + let rec length : type a b. (a,b) op -> int = function + | Id -> 0 + | Compose (_, Id) -> 0 + | Compose (_, cont) -> 1 + length cont + + let optimize = _optimize + let apply op a = let rec _apply : type a b. (a,b) op -> a t -> b t = fun op a -> match op with diff --git a/core/CCBatch.mli b/core/CCBatch.mli index 9931929f..8f7c0b12 100644 --- a/core/CCBatch.mli +++ b/core/CCBatch.mli @@ -51,6 +51,12 @@ module type S = sig val apply : ('a,'b) op -> 'a t -> 'b t val apply' : 'a t -> ('a,'b) op -> 'b t + val length : (_,_) op -> int + (** Number of intermediate structures needed to compute this operation *) + + val optimize : ('a,'b) op -> ('a,'b) op + (** Try to minimize the length of the operation *) + (** {6 Combinators} *) val id : ('a, 'a) op From ee729348646cc0fa60fbdbe223bbc9a9b5513857 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 23:27:21 +0200 Subject: [PATCH 18/52] more operations on collections; optimization level as a parameter in CCBatch --- core/CCArray.ml | 7 ++++++ core/CCArray.mli | 3 +++ core/CCBatch.ml | 55 ++++++++++++++++++++++++++++++++++-------------- core/CCBatch.mli | 17 +++++++++++---- core/CCKList.ml | 17 +++++++++++++++ core/CCKList.mli | 4 ++++ 6 files changed, 83 insertions(+), 20 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 4dc07f13..16fab34d 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -125,6 +125,13 @@ let exists p a = i < Array.length a && (p a.(i) || check (i+1)) in check 0 +let (--) i j = + if i<=j + then + Array.init (j-i+1) (fun k -> i+k) + else + Array.init (i-j+1) (fun k -> i-k) + (** all the elements of a, but the i-th, into a list *) let except_idx a i = foldi diff --git a/core/CCArray.mli b/core/CCArray.mli index b63baed5..047ef761 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -58,6 +58,9 @@ val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool val exists : ('a -> bool) -> 'a t -> bool +val (--) : int -> int -> int t +(** Range array *) + val except_idx : 'a t -> int -> 'a list (** Remove given index *) diff --git a/core/CCBatch.ml b/core/CCBatch.ml index 0102f163..95ecab8a 100644 --- a/core/CCBatch.ml +++ b/core/CCBatch.ml @@ -42,15 +42,24 @@ module type S = sig type ('a,'b) op (** Operation that converts an ['a t] into a ['b t] *) - val apply : ('a,'b) op -> 'a t -> 'b t - val apply' : 'a t -> ('a,'b) op -> 'b t - val length : (_,_) op -> int (** Number of intermediate structures needed to compute this operation *) - val optimize : ('a,'b) op -> ('a,'b) op + type optimization_level = + | OptimNone + | OptimBase + | OptimMergeFlatMap + + val optimize : ?level:optimization_level -> ('a,'b) op -> ('a,'b) op (** Try to minimize the length of the operation *) + val apply : ?level:optimization_level -> ('a,'b) op -> 'a t -> 'b t + (** Apply the operation to the collection. + @param level the optimization level, default is [OptimBase] *) + + val apply' : 'a t -> ('a,'b) op -> 'b t + (** Flip of {!apply} *) + (** {6 Combinators} *) val id : ('a, 'a) op @@ -92,22 +101,29 @@ module Make(C : COLLECTION) = struct | Same of 'a | New of 'a + type optimization_level = + | OptimNone + | OptimBase + | OptimMergeFlatMap + let _new_compose a b = New (Compose (a,b)) (* optimize a batch operation by fusion *) - let rec _optimize : type a b. (a,b) op -> (a,b) op - = fun op -> match op with + let rec _optimize : type a b. level:optimization_level -> (a,b) op -> (a,b) op + = fun ~level op -> match op with + | _ when level = OptimNone -> op | Compose (a, b) -> - let b' = _optimize b in - _optimize_rec (Compose (a, b')) + let b' = _optimize ~level b in + _optimize_rec ~level (Compose (a, b')) | Id -> Id (* repeat optimization until a fixpoint is reached *) - and _optimize_rec : type a b. (a,b) op -> (a,b) op - = fun op -> match _optimize_head op with + and _optimize_rec : type a b. level:optimization_level -> (a,b) op -> (a,b) op + = fun ~level op -> match _optimize_head ~level op with | Same _ -> op - | New op' -> _optimize_rec op' - and _optimize_head : type a b. (a,b) op -> (a,b) op optim_result - = function + | New op' -> _optimize_rec ~level op' + and _optimize_head + : type a b. level:optimization_level -> (a,b) op -> (a,b) op optim_result + = fun ~level op -> match op with | Id -> Same Id | Compose (Map f, Compose (Map g, cont)) -> _new_compose (Map (fun x -> g (f x))) cont @@ -157,6 +173,13 @@ module Make(C : COLLECTION) = struct | None -> C.empty | Some y -> f' y)) cont + | Compose (FlatMap f, Compose (FlatMap f', cont)) -> + _new_compose + (FlatMap + (fun x -> + let a = f x in + C.flat_map f' a)) + cont | (Compose _) as op -> Same op (* cannot optimize *) @@ -165,9 +188,9 @@ module Make(C : COLLECTION) = struct | Compose (_, Id) -> 0 | Compose (_, cont) -> 1 + length cont - let optimize = _optimize + let optimize ?(level=OptimBase) = _optimize ~level - let apply op a = + let apply ?level op a = let rec _apply : type a b. (a,b) op -> a t -> b t = fun op a -> match op with | Compose (op1, op2) -> @@ -182,7 +205,7 @@ module Make(C : COLLECTION) = struct | FilterMap f -> C.filter_map f a in (* optimize and run *) - let op' = _optimize op in + let op' = optimize ?level op in _apply op' a let apply' a op = apply op a diff --git a/core/CCBatch.mli b/core/CCBatch.mli index 8f7c0b12..bbd8f921 100644 --- a/core/CCBatch.mli +++ b/core/CCBatch.mli @@ -48,15 +48,24 @@ module type S = sig type ('a,'b) op (** Operation that converts an ['a t] into a ['b t] *) - val apply : ('a,'b) op -> 'a t -> 'b t - val apply' : 'a t -> ('a,'b) op -> 'b t - val length : (_,_) op -> int (** Number of intermediate structures needed to compute this operation *) - val optimize : ('a,'b) op -> ('a,'b) op + type optimization_level = + | OptimNone + | OptimBase + | OptimMergeFlatMap + + val optimize : ?level:optimization_level -> ('a,'b) op -> ('a,'b) op (** Try to minimize the length of the operation *) + val apply : ?level:optimization_level -> ('a,'b) op -> 'a t -> 'b t + (** Apply the operation to the collection. + @param level the optimization level, default is [OptimBase] *) + + val apply' : 'a t -> ('a,'b) op -> 'b t + (** Flip of {!apply} *) + (** {6 Combinators} *) val id : ('a, 'a) op diff --git a/core/CCKList.ml b/core/CCKList.ml index 05656a9c..6bce244b 100644 --- a/core/CCKList.ml +++ b/core/CCKList.ml @@ -57,6 +57,15 @@ let of_list l = | x::l' -> `Cons (x, aux l') in aux l +let equal ?(eq=(=)) l1 l2 = + let rec aux l1 l2 = match l1(), l2() with + | `Nil, `Nil -> true + | `Nil, _ + | _, `Nil -> false + | `Cons (x1,l1'), `Cons (x2,l2') -> + eq x1 x2 && aux l1' l2' + in aux l1 l2 + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option @@ -140,6 +149,14 @@ and _flat_map_app f l l' () = match l () with | `Cons (x, tl) -> `Cons (x, _flat_map_app f tl l') +let rec filter_map f l () = match l() with + | `Nil -> `Nil + | `Cons (x, l') -> + begin match f x with + | None -> filter_map f l' () + | Some y -> `Cons (y, filter_map f l') + end + let flatten l = flat_map (fun x->x) l let range i j = diff --git a/core/CCKList.mli b/core/CCKList.mli index ec3a8ec9..20cce78f 100644 --- a/core/CCKList.mli +++ b/core/CCKList.mli @@ -45,6 +45,8 @@ val of_list : 'a list -> 'a t val to_list : 'a t -> 'a list (** Gather all values into a list *) +val equal : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t -> bool + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option @@ -72,6 +74,8 @@ val append : 'a t -> 'a t -> 'a t val flat_map : ('a -> 'b t) -> 'a t -> 'b t +val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flatten : 'a t t -> 'a t val range : int -> int -> int t From fc3b6040f6c228eec911545c48bd67ca8d33b9ca Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Jun 2014 23:27:37 +0200 Subject: [PATCH 19/52] benchmark for CCBatch on list, array, and klist --- _oasis | 8 ++++ tests/bench_batch.ml | 105 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 tests/bench_batch.ml diff --git a/_oasis b/_oasis index 03e0d8b9..3ae0d45c 100644 --- a/_oasis +++ b/_oasis @@ -129,6 +129,14 @@ Executable bench_conv MainIs: bench_conv.ml BuildDepends: containers,benchmark +Executable bench_batch + Path: tests/ + Install: false + CompiledObject: native + Build$: flag(bench) + MainIs: bench_batch.ml + BuildDepends: containers,benchmark + Executable test_levenshtein Path: tests/ Install: false diff --git a/tests/bench_batch.ml b/tests/bench_batch.ml new file mode 100644 index 00000000..90489ec3 --- /dev/null +++ b/tests/bench_batch.ml @@ -0,0 +1,105 @@ +(** benchmark CCBatch *) + +module type COLL = sig + val name : string + include CCBatch.COLLECTION + val doubleton : 'a -> 'a -> 'a t + val (--) : int -> int -> int t + val equal : int t -> int t -> bool + val fold : (int -> int -> int) -> int -> int t -> int +end + +module Make(C : COLL) = struct + let f1 x = x mod 2 = 0 + let f2 x = -x + let f3 x = C.doubleton x (x+1) + let f4 x = -x + let collect a = C.fold (+) 0 a + + let naive a = + let a = C.filter f1 a in + let a = C.flat_map f3 a in + let a = C.filter f1 a in + let a = C.map f2 a in + let a = C.flat_map f3 a in + let a = C.map f4 a in + ignore (collect a); + a + + module BA = CCBatch.Make(C) + + let ops = + BA.(filter f1 >>> flat_map f3 >>> filter f1 >>> map f2 >>> flat_map f3 >>> map f4) + + let batch_simple a = + let a = BA.apply ~level:BA.OptimNone ops a in + ignore (collect a); + a + + let batch a = + let a = BA.apply ~level:BA.OptimBase ops a in + ignore (collect a); + a + + let batch2 a = + let a = BA.apply ~level:BA.OptimMergeFlatMap ops a in + ignore (collect a); + a + + let bench_for ~time n = + Printf.printf "\n\nbenchmark for %s of len %d\n" C.name n; + Printf.printf "optimization: from %d to %d\n" + (BA.length ops) (BA.length (BA.optimize ops)); + flush stdout; + let a = C.(0 -- n) in + (* debug + CCPrint.printf "naive: %a\n" (CCArray.pp CCInt.pp) (naive a); + CCPrint.printf "simple: %a\n" (CCArray.pp CCInt.pp) (batch_simple a); + CCPrint.printf "batch: %a\n" (CCArray.pp CCInt.pp) (batch a); + *) + assert (C.equal (batch_simple a) (naive a)); + assert (C.equal (batch_simple a) (batch a)); + let res = Benchmark.throughputN time + [ C.name ^ "_naive", naive, a + ; C.name ^ "_batch_simple", batch_simple, a + ; C.name ^ "_batch", batch, a + ; C.name ^ "_batch_merge", batch2, a + ] + in + Benchmark.tabulate res + + let bench () = + bench_for 1 100; + bench_for 2 100_000; + bench_for 2 1_000_000; + () +end + +module BenchArray = Make(struct + include CCArray + let name = "array" + let equal a b = a=b + let doubleton x y = [| x; y |] + let fold = Array.fold_left +end) + +module BenchList = Make(struct + include CCList + let name = "list" + let equal a b = a=b + let doubleton x y = [ x; y ] + let fold = List.fold_left +end) + +module BenchKList = Make(struct + include CCKList + let name = "klist" + let equal a b = equal a b + let doubleton x y = CCKList.of_list [ x; y ] +end) + +let () = + BenchArray.bench(); + BenchList.bench(); + BenchKList.bench (); + () From 7f3a33a78dc621375e3b4130044d5c11b8b242ee Mon Sep 17 00:00:00 2001 From: Drup Date: Thu, 12 Jun 2014 02:58:43 +0200 Subject: [PATCH 20/52] Change the representation of optimized list of operation and add folding. --- core/CCBatch.ml | 191 +++++++++++++++++++++++------------------------ core/CCBatch.mli | 14 +--- 2 files changed, 97 insertions(+), 108 deletions(-) diff --git a/core/CCBatch.ml b/core/CCBatch.ml index 95ecab8a..d27de6ad 100644 --- a/core/CCBatch.ml +++ b/core/CCBatch.ml @@ -30,6 +30,7 @@ module type COLLECTION = sig type 'a t val empty : 'a t + val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a val map : ('a -> 'b) -> 'a t -> 'b t val filter : ('a -> bool) -> 'a t -> 'a t val filter_map : ('a -> 'b option) -> 'a t -> 'b t @@ -45,15 +46,7 @@ module type S = sig val length : (_,_) op -> int (** Number of intermediate structures needed to compute this operation *) - type optimization_level = - | OptimNone - | OptimBase - | OptimMergeFlatMap - - val optimize : ?level:optimization_level -> ('a,'b) op -> ('a,'b) op - (** Try to minimize the length of the operation *) - - val apply : ?level:optimization_level -> ('a,'b) op -> 'a t -> 'b t + val apply : ('a,'b) op -> 'a t -> 'b t (** Apply the operation to the collection. @param level the optimization level, default is [OptimBase] *) @@ -79,9 +72,10 @@ end module Make(C : COLLECTION) = struct type 'a t = 'a C.t type (_,_) op = - | Id : ('a,'a) op + | Nil : ('a,'a) op | Compose : ('a,'b) base_op * ('b, 'c) op -> ('a, 'c) op and (_,_) base_op = + | Id : ('a, 'a) base_op | Map : ('a -> 'b) -> ('a, 'b) base_op | Filter : ('a -> bool) -> ('a, 'a) base_op | FilterMap : ('a -> 'b option) -> ('a,'b) base_op @@ -91,134 +85,135 @@ module Make(C : COLLECTION) = struct (* associativity: put parenthesis on the right *) let rec _compose : type a b c. (a,b) op -> (b,c) op -> (a,c) op = fun f g -> match f with - | Compose (f1, Id) -> Compose (f1, g) + | Compose (f1, Nil) -> Compose (f1, g) | Compose (f1, f2) -> Compose (f1, _compose f2 g) - | Id -> g + | Nil -> g - (* result of one step of optimization, indicates whether the object did - change or not *) - type 'a optim_result = - | Same of 'a - | New of 'a - type optimization_level = - | OptimNone - | OptimBase - | OptimMergeFlatMap + (* After optimization, the op is a list of flatmaps, with maybe something else at the end *) + type (_,_) optimized_op = + | Base : ('a,'b) base_op -> ('a,'b) optimized_op + | FlatMapPlus : ('a -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op - let _new_compose a b = New (Compose (a,b)) - (* optimize a batch operation by fusion *) - let rec _optimize : type a b. level:optimization_level -> (a,b) op -> (a,b) op - = fun ~level op -> match op with - | _ when level = OptimNone -> op - | Compose (a, b) -> - let b' = _optimize ~level b in - _optimize_rec ~level (Compose (a, b')) - | Id -> Id - (* repeat optimization until a fixpoint is reached *) - and _optimize_rec : type a b. level:optimization_level -> (a,b) op -> (a,b) op - = fun ~level op -> match _optimize_head ~level op with - | Same _ -> op - | New op' -> _optimize_rec ~level op' - and _optimize_head - : type a b. level:optimization_level -> (a,b) op -> (a,b) op optim_result - = fun ~level op -> match op with - | Id -> Same Id - | Compose (Map f, Compose (Map g, cont)) -> - _new_compose (Map (fun x -> g (f x))) cont - | Compose (Map f, Compose (Filter p, cont)) -> - _new_compose + (* As compose, but optimize recursively on the way. *) + let rec optimize_compose + : type a b c. (a,b) base_op -> (b,c) op -> (a,c) optimized_op + = fun base_op op -> match base_op, op with + | f, Nil -> Base f + | Id, Compose (f, cont) -> optimize_compose f cont + | f, Compose (Id, cont) -> optimize_compose f cont + | Map f, Compose (Map g, cont) -> + optimize_compose (Map (fun x -> g (f x))) cont + | Map f, Compose (Filter p, cont) -> + optimize_compose (FilterMap (fun x -> let y = f x in if p y then Some y else None)) cont - | Compose (Map f, Compose (FilterMap f', cont)) -> - _new_compose + | Map f, Compose (FilterMap f', cont) -> + optimize_compose (FilterMap (fun x -> f' (f x))) cont - | Compose (Map f, Compose (FlatMap f', cont)) -> - _new_compose + | Map f, Compose (FlatMap f', cont) -> + optimize_compose (FlatMap (fun x -> f' (f x))) cont - | Compose (Filter p, Compose (Filter p', cont)) -> - _new_compose (Filter (fun x -> p x && p' x)) cont - | Compose (Filter p, Compose (Map g, cont)) -> - _new_compose + | Filter p, Compose (Filter p', cont) -> + optimize_compose (Filter (fun x -> p x && p' x)) cont + | Filter p, Compose (Map g, cont) -> + optimize_compose (FilterMap (fun x -> if p x then Some (g x) else None)) cont - | Compose (Filter p, Compose (FilterMap f', cont)) -> - _new_compose + | Filter p, Compose (FilterMap f', cont) -> + optimize_compose (FilterMap (fun x -> if p x then f' x else None)) cont - | Compose (Filter p, Compose (FlatMap f', cont)) -> - _new_compose + | Filter p, Compose (FlatMap f', cont) -> + optimize_compose (FlatMap (fun x -> if p x then f' x else C.empty)) cont - | Compose (FilterMap f, Compose (FilterMap f', cont)) -> - _new_compose + | FilterMap f, Compose (FilterMap f', cont) -> + optimize_compose (FilterMap (fun x -> match f x with None -> None | Some y -> f' y)) cont - | Compose (FilterMap f, Compose (Filter p, cont)) -> - _new_compose + | FilterMap f, Compose (Filter p, cont) -> + optimize_compose (FilterMap (fun x -> match f x with | (Some y) as res when p y -> res | _ -> None)) cont - | Compose (FilterMap f, Compose (Map f', cont)) -> - _new_compose + | FilterMap f, Compose (Map f', cont) -> + optimize_compose (FilterMap (fun x -> match f x with | None -> None | Some y -> Some (f' y))) cont - | Compose (FilterMap f, Compose (FlatMap f', cont)) -> - _new_compose + | FilterMap f, Compose (FlatMap f', cont) -> + optimize_compose (FlatMap (fun x -> match f x with | None -> C.empty | Some y -> f' y)) cont - | Compose (FlatMap f, Compose (FlatMap f', cont)) -> - _new_compose - (FlatMap - (fun x -> - let a = f x in - C.flat_map f' a)) - cont - | (Compose _) as op -> - Same op (* cannot optimize *) + + (* flatmap doesn't compose with anything *) + | FlatMap f, Compose (f', cont) -> + FlatMapPlus (f, optimize_compose f' cont) + let rec length : type a b. (a,b) op -> int = function - | Id -> 0 - | Compose (_, Id) -> 0 + | Nil -> 0 | Compose (_, cont) -> 1 + length cont - let optimize ?(level=OptimBase) = _optimize ~level - let apply ?level op a = - let rec _apply : type a b. (a,b) op -> a t -> b t - = fun op a -> match op with - | Compose (op1, op2) -> - let a' = _apply_base op1 a in - _apply op2 a' - | Id -> a - and _apply_base : type a b. (a,b) base_op -> a t -> b t - = fun op a -> match op with - | Map f -> C.map f a - | Filter p -> C.filter p a - | FlatMap f -> C.flat_map f a - | FilterMap f -> C.filter_map f a - in - (* optimize and run *) - let op' = optimize ?level op in - _apply op' a + (* optimize a batch operation by fusion *) + let optimize : type a b. (a,b) op -> (a,b) optimized_op + = fun op -> match op with + | Compose (a, b) -> optimize_compose a b + | Nil -> Base Id + + let rec apply_optimized : type a b. (a,b) optimized_op -> a t -> b t + = fun op a -> match op with + | Base f -> apply_base f a + | FlatMapPlus (f,c) -> apply_optimized c @@ C.flat_map f a + and apply_base : type a b. (a,b) base_op -> a t -> b t + = fun op a -> match op with + | Map f -> C.map f a + | Filter p -> C.filter p a + | FlatMap f -> C.flat_map f a + | FilterMap f -> C.filter_map f a + | Id -> a + + let fusion_fold : type a b c. (a,b) base_op -> (c -> b -> c) -> c -> a -> c + = fun op f' -> match op with + | Map f -> (fun z x -> f' z (f x)) + | Filter p -> (fun z x -> if p x then f' z x else z) + | FlatMap f -> (fun z x -> C.fold f' z (f x)) + | FilterMap f -> (fun z x -> match f x with Some x' -> f' z x' | None -> z) + | Id -> f' + + let rec apply_optimized_with_fold : type a b c. (a,b) optimized_op -> (c -> b -> c) -> c -> a t -> c + = fun op fold z a -> match op with + | Base f -> C.fold (fusion_fold f fold) z a + | FlatMapPlus (f,c) -> apply_optimized_with_fold c fold z @@ C.flat_map f a + + + + (* optimize and run *) + let apply op a = + let op' = optimize op in + apply_optimized op' a + + let apply_fold op fold z a = + let op' = optimize op in + apply_optimized_with_fold op' fold z a let apply' a op = apply op a (** {6 Combinators} *) - let id = Id - let map f = Compose (Map f, Id) - let filter p = Compose (Filter p, Id) - let filter_map f = Compose (FilterMap f, Id) - let flat_map f = Compose (FlatMap f, Id) + let id = Nil + let map f = Compose (Map f, Nil) + let filter p = Compose (Filter p, Nil) + let filter_map f = Compose (FilterMap f, Nil) + let flat_map f = Compose (FlatMap f, Nil) let compose f g = _compose g f let (>>>) f g = _compose f g end - diff --git a/core/CCBatch.mli b/core/CCBatch.mli index bbd8f921..df10080a 100644 --- a/core/CCBatch.mli +++ b/core/CCBatch.mli @@ -51,17 +51,11 @@ module type S = sig val length : (_,_) op -> int (** Number of intermediate structures needed to compute this operation *) - type optimization_level = - | OptimNone - | OptimBase - | OptimMergeFlatMap + val apply : ('a,'b) op -> 'a t -> 'b t + (** Apply the operation to the collection. *) - val optimize : ?level:optimization_level -> ('a,'b) op -> ('a,'b) op - (** Try to minimize the length of the operation *) - - val apply : ?level:optimization_level -> ('a,'b) op -> 'a t -> 'b t - (** Apply the operation to the collection. - @param level the optimization level, default is [OptimBase] *) + val apply_width_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c + (** Apply the operation plus a fold to the collection. *) val apply' : 'a t -> ('a,'b) op -> 'b t (** Flip of {!apply} *) From 8b2f5deb147f302b940589202bdef328a3e7e17c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Jun 2014 11:56:13 +0200 Subject: [PATCH 21/52] bugfix in CCArray --- core/CCArray.ml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 16fab34d..69d6f445 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -37,20 +37,21 @@ let foldi f acc a = in recurse acc 0 let reverse_in_place a = - let n = Array.length a in - for i = 0 to (n-1)/2 do - let t = a.(i) in - a.(i) <- a.(n-i-1); - a.(n-i-1) <- t; - done + if a = [| |] then () + else + let n = Array.length a in + for i = 0 to (n-1)/2 do + let t = a.(i) in + a.(i) <- a.(n-i-1); + a.(n-i-1) <- t; + done (*$T + reverse_in_place [| |]; true + reverse_in_place [| 1 |]; true let a = [| 1; 2; 3; 4; 5 |] in \ reverse_in_place a; \ a = [| 5;4;3;2;1 |] -*) - -(*$T let a = [| 1; 2; 3; 4; 5; 6 |] in \ reverse_in_place a; \ a = [| 6;5;4;3;2;1 |] From 730f99de7033cc5d9c8eb9c6d92d95ef6efc11fc Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Jun 2014 11:56:57 +0200 Subject: [PATCH 22/52] fix benchmark broken by Drup; fix some small issues; add CCBatch.extern combinator for arbitrary computations (that wont be optimized) --- core/CCBatch.ml | 89 +++++++++++++++++++++++++++----------------- core/CCBatch.mli | 9 +++-- tests/bench_batch.ml | 24 ++---------- 3 files changed, 63 insertions(+), 59 deletions(-) diff --git a/core/CCBatch.ml b/core/CCBatch.ml index d27de6ad..760982e6 100644 --- a/core/CCBatch.ml +++ b/core/CCBatch.ml @@ -1,6 +1,6 @@ (* -copyright (c) 2013-2014, simon cruanes +copyright (c) 2013-2014, Simon Cruanes, Gabriel Radanne all rights reserved. redistribution and use in source and binary forms, with or without @@ -43,12 +43,11 @@ module type S = sig type ('a,'b) op (** Operation that converts an ['a t] into a ['b t] *) - val length : (_,_) op -> int - (** Number of intermediate structures needed to compute this operation *) - val apply : ('a,'b) op -> 'a t -> 'b t - (** Apply the operation to the collection. - @param level the optimization level, default is [OptimBase] *) + (** Apply the operation to the collection. *) + + val apply_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c + (** Apply the operation plus a fold to the collection. *) val apply' : 'a t -> ('a,'b) op -> 'b t (** Flip of {!apply} *) @@ -65,6 +64,8 @@ module type S = sig val flat_map : ('a -> 'b t) -> ('a,'b) op + val extern : ('a t -> 'b t) -> ('a,'b) op + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op end @@ -75,12 +76,11 @@ module Make(C : COLLECTION) = struct | Nil : ('a,'a) op | Compose : ('a,'b) base_op * ('b, 'c) op -> ('a, 'c) op and (_,_) base_op = - | Id : ('a, 'a) base_op | Map : ('a -> 'b) -> ('a, 'b) base_op | Filter : ('a -> bool) -> ('a, 'a) base_op | FilterMap : ('a -> 'b option) -> ('a,'b) base_op | FlatMap : ('a -> 'b t) -> ('a,'b) base_op - + | Extern : ('a t -> 'b t) -> ('a,'b) base_op (* associativity: put parenthesis on the right *) let rec _compose : type a b c. (a,b) op -> (b,c) op -> (a,c) op @@ -89,20 +89,19 @@ module Make(C : COLLECTION) = struct | Compose (f1, f2) -> Compose (f1, _compose f2 g) | Nil -> g - - (* After optimization, the op is a list of flatmaps, with maybe something else at the end *) + (* After optimization, the op is a list of flatmaps and external operations, + with maybe something else at the end *) type (_,_) optimized_op = - | Base : ('a,'b) base_op -> ('a,'b) optimized_op - | FlatMapPlus : ('a -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op - + | OptNil : ('a, 'a) optimized_op + | OptBase : ('a,'b) base_op * ('b, 'c) optimized_op -> ('a,'c) optimized_op + | OptFlatMap : ('a -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op + | OptExtern : ('a t -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op (* As compose, but optimize recursively on the way. *) let rec optimize_compose : type a b c. (a,b) base_op -> (b,c) op -> (a,c) optimized_op = fun base_op op -> match base_op, op with - | f, Nil -> Base f - | Id, Compose (f, cont) -> optimize_compose f cont - | f, Compose (Id, cont) -> optimize_compose f cont + | f, Nil -> OptBase (f, OptNil) | Map f, Compose (Map g, cont) -> optimize_compose (Map (fun x -> g (f x))) cont | Map f, Compose (Filter p, cont) -> @@ -151,34 +150,45 @@ module Make(C : COLLECTION) = struct | None -> C.empty | Some y -> f' y)) cont + | FlatMap f, Compose (f', tail) -> + merge_flat_map f (optimize_compose f' tail) + | Extern f, Compose (f', tail) -> + OptExtern (f, optimize_compose f' tail) + | op, Compose (Extern f', cont) -> + OptBase (op, optimize_compose (Extern f') cont) - (* flatmap doesn't compose with anything *) - | FlatMap f, Compose (f', cont) -> - FlatMapPlus (f, optimize_compose f' cont) - - - let rec length : type a b. (a,b) op -> int = function - | Nil -> 0 - | Compose (_, cont) -> 1 + length cont - + and merge_flat_map + : type a b c. (a -> b C.t) -> (b,c) optimized_op -> (a,c) optimized_op = + fun f op -> match op with + | OptNil -> OptFlatMap (f, op) + | OptFlatMap (f', cont) -> + merge_flat_map + (fun x -> + let a = f x in + C.flat_map f' a) + cont + | OptExtern _ -> OptFlatMap (f, op) + | OptBase _ -> OptFlatMap (f, op) (* optimize a batch operation by fusion *) let optimize : type a b. (a,b) op -> (a,b) optimized_op = fun op -> match op with | Compose (a, b) -> optimize_compose a b - | Nil -> Base Id + | Nil -> OptNil let rec apply_optimized : type a b. (a,b) optimized_op -> a t -> b t = fun op a -> match op with - | Base f -> apply_base f a - | FlatMapPlus (f,c) -> apply_optimized c @@ C.flat_map f a + | OptNil -> a + | OptBase (f,c) -> apply_optimized c (apply_base f a) + | OptFlatMap (f,c) -> apply_optimized c (C.flat_map f a) + | OptExtern (f,c) -> apply_optimized c (f a) and apply_base : type a b. (a,b) base_op -> a t -> b t = fun op a -> match op with | Map f -> C.map f a | Filter p -> C.filter p a | FlatMap f -> C.flat_map f a | FilterMap f -> C.filter_map f a - | Id -> a + | Extern f -> f a let fusion_fold : type a b c. (a,b) base_op -> (c -> b -> c) -> c -> a -> c = fun op f' -> match op with @@ -186,14 +196,22 @@ module Make(C : COLLECTION) = struct | Filter p -> (fun z x -> if p x then f' z x else z) | FlatMap f -> (fun z x -> C.fold f' z (f x)) | FilterMap f -> (fun z x -> match f x with Some x' -> f' z x' | None -> z) - | Id -> f' + | Extern _ -> assert false - let rec apply_optimized_with_fold : type a b c. (a,b) optimized_op -> (c -> b -> c) -> c -> a t -> c + let rec apply_optimized_with_fold + : type a b c. (a,b) optimized_op -> (c -> b -> c) -> c -> a t -> c = fun op fold z a -> match op with - | Base f -> C.fold (fusion_fold f fold) z a - | FlatMapPlus (f,c) -> apply_optimized_with_fold c fold z @@ C.flat_map f a - - + | OptNil -> C.fold fold z a + | OptBase (Extern f, OptNil) -> + C.fold fold z (f a) + | OptBase (f,OptNil) -> + (* terminal fold *) + C.fold (fusion_fold f fold) z a + | OptBase (f,c) -> + (* make intermediate collection and continue *) + apply_optimized_with_fold c fold z (apply_base f a) + | OptExtern (f,c) -> apply_optimized_with_fold c fold z (f a) + | OptFlatMap (f,c) -> apply_optimized_with_fold c fold z (C.flat_map f a) (* optimize and run *) let apply op a = @@ -213,6 +231,7 @@ module Make(C : COLLECTION) = struct let filter p = Compose (Filter p, Nil) let filter_map f = Compose (FilterMap f, Nil) let flat_map f = Compose (FlatMap f, Nil) + let extern f = Compose (Extern f, Nil) let compose f g = _compose g f let (>>>) f g = _compose f g diff --git a/core/CCBatch.mli b/core/CCBatch.mli index df10080a..7b04b692 100644 --- a/core/CCBatch.mli +++ b/core/CCBatch.mli @@ -35,6 +35,7 @@ module type COLLECTION = sig type 'a t val empty : 'a t + val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a val map : ('a -> 'b) -> 'a t -> 'b t val filter : ('a -> bool) -> 'a t -> 'a t val filter_map : ('a -> 'b option) -> 'a t -> 'b t @@ -48,13 +49,10 @@ module type S = sig type ('a,'b) op (** Operation that converts an ['a t] into a ['b t] *) - val length : (_,_) op -> int - (** Number of intermediate structures needed to compute this operation *) - val apply : ('a,'b) op -> 'a t -> 'b t (** Apply the operation to the collection. *) - val apply_width_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c + val apply_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c (** Apply the operation plus a fold to the collection. *) val apply' : 'a t -> ('a,'b) op -> 'b t @@ -72,6 +70,9 @@ module type S = sig val flat_map : ('a -> 'b t) -> ('a,'b) op + val extern : ('a t -> 'b t) -> ('a,'b) op + (** Use a specific function that won't be optimized *) + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op end diff --git a/tests/bench_batch.ml b/tests/bench_batch.ml index 90489ec3..daac17f6 100644 --- a/tests/bench_batch.ml +++ b/tests/bench_batch.ml @@ -6,7 +6,6 @@ module type COLL = sig val doubleton : 'a -> 'a -> 'a t val (--) : int -> int -> int t val equal : int t -> int t -> bool - val fold : (int -> int -> int) -> int -> int t -> int end module Make(C : COLL) = struct @@ -31,25 +30,13 @@ module Make(C : COLL) = struct let ops = BA.(filter f1 >>> flat_map f3 >>> filter f1 >>> map f2 >>> flat_map f3 >>> map f4) - let batch_simple a = - let a = BA.apply ~level:BA.OptimNone ops a in - ignore (collect a); - a - let batch a = - let a = BA.apply ~level:BA.OptimBase ops a in - ignore (collect a); - a - - let batch2 a = - let a = BA.apply ~level:BA.OptimMergeFlatMap ops a in + let a = BA.apply ops a in ignore (collect a); a let bench_for ~time n = Printf.printf "\n\nbenchmark for %s of len %d\n" C.name n; - Printf.printf "optimization: from %d to %d\n" - (BA.length ops) (BA.length (BA.optimize ops)); flush stdout; let a = C.(0 -- n) in (* debug @@ -57,21 +44,18 @@ module Make(C : COLL) = struct CCPrint.printf "simple: %a\n" (CCArray.pp CCInt.pp) (batch_simple a); CCPrint.printf "batch: %a\n" (CCArray.pp CCInt.pp) (batch a); *) - assert (C.equal (batch_simple a) (naive a)); - assert (C.equal (batch_simple a) (batch a)); + assert (C.equal (batch a) (naive a)); let res = Benchmark.throughputN time [ C.name ^ "_naive", naive, a - ; C.name ^ "_batch_simple", batch_simple, a ; C.name ^ "_batch", batch, a - ; C.name ^ "_batch_merge", batch2, a ] in Benchmark.tabulate res let bench () = bench_for 1 100; - bench_for 2 100_000; - bench_for 2 1_000_000; + bench_for 4 100_000; + bench_for 4 1_000_000; () end From 17930cf119417019f8b7323de13feca0e61d2d0f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Jun 2014 21:03:22 +0200 Subject: [PATCH 23/52] CCLinq: powerful interface for querying containers (work in progress) --- _oasis | 2 +- core/CCLinq.ml | 499 ++++++++++++++++++++++++++++++++++++++++++++++++ core/CCLinq.mli | 268 ++++++++++++++++++++++++++ 3 files changed, 768 insertions(+), 1 deletion(-) create mode 100644 core/CCLinq.ml create mode 100644 core/CCLinq.mli diff --git a/_oasis b/_oasis index 3ae0d45c..1f182e98 100644 --- a/_oasis +++ b/_oasis @@ -60,7 +60,7 @@ Library "containers_misc" Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, ActionMan, QCheck, BencodeOnDisk, TTree, - HGraph, Automaton, Conv, Bidir, Iteratee, + HGraph, Automaton, Conv, Bidir, Iteratee, Linq, Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers FindlibName: misc diff --git a/core/CCLinq.ml b/core/CCLinq.ml new file mode 100644 index 00000000..a2cda146 --- /dev/null +++ b/core/CCLinq.ml @@ -0,0 +1,499 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 LINQ-like operations on collections} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a hash = 'a -> int +type 'a klist = unit -> [ `Nil | `Cons of 'a * 'a klist ] + +let _id x = x + +module Coll = struct + type 'a t = + | Seq of 'a sequence + | List of 'a list + + let of_seq s = Seq s + let of_list l = List l + let of_array a = Seq (CCSequence.of_array a) + let empty = List [] + + let to_seq = function + | Seq s -> s + | List l -> (fun k -> List.iter k l) + + let to_list = function + | Seq s -> CCSequence.to_list s + | List l -> l + + let _fmap ~lst ~seq c = match c with + | List l -> List (lst l) + | Seq s -> Seq (seq s) + + let _fold ~lst ~seq acc c = match c with + | List l -> List.fold_left lst acc l + | Seq s -> CCSequence.fold seq acc s + + let iter f c = match c with + | List l -> List.iter f l + | Seq s -> s f + + let map f c = + _fmap ~lst:(List.map f) ~seq:(CCSequence.map f) c + + let filter p c = + _fmap ~lst:(List.filter p) ~seq:(CCSequence.filter p) c + + let flat_map f c = + let c' = to_seq c in + Seq (CCSequence.flatMap f c') + + let filter_map f c = + _fmap ~lst:(CCList.filter_map f) ~seq:(CCSequence.fmap f) c + + let size = function + | List l -> List.length l + | Seq s -> CCSequence.length s + + let fold f acc c = _fold ~lst:f ~seq:f acc c +end + +type 'a collection = 'a Coll.t + +module Map = struct + type ('a, 'b) t = { + is_empty : unit -> bool; + size : unit -> int; (** Number of keys *) + get : 'a -> 'b option; + fold : 'c. ('c -> 'a -> 'b -> 'c) -> 'c -> 'c; + to_seq : ('a * 'b) sequence; + } + + let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) seq = + let module H = Hashtbl.Make(struct + type t = key + let equal = eq + let hash = hash + end) in + (* build table *) + let tbl = H.create 32 in + seq + (fun (k,v) -> + let l = try H.find tbl k with Not_found -> [] in + H.replace tbl k (v::l) + ); + (* provide the multimap interface *) + let to_seq cont = H.iter (fun k v -> cont (k, Coll.of_list v)) tbl + in + { + is_empty = (fun () -> H.length tbl = 0); + size = (fun () -> H.length tbl); + get = (fun k -> + try Some (Coll.of_list (H.find tbl k)) + with Not_found -> None); + fold = (fun f acc -> H.fold (fun k v acc -> f acc k (Coll.of_list v)) tbl acc); + to_seq; + } + + let make_map (type key) (type value) + ?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) seq = + let module M = CCSequence.Map.Make(struct + type t = key + let compare = cmp_key + end) in + let module S = CCSequence.Set.Make(struct + type t = value + let compare = cmp_val + end) in + let _map_set set = Coll.of_seq (S.to_seq set) in + let map = CCSequence.fold + (fun map (k,v) -> + let set = try M.find k map with Not_found -> S.empty in + M.add k (S.add v set) map + ) M.empty seq + in + let to_seq = + M.to_seq map |> CCSequence.map (fun (k,v) -> k, _map_set v) + in + { + is_empty = (fun () -> M.is_empty map); + size = (fun () -> M.cardinal map); + get = (fun k -> + try Some (_map_set (M.find k map)) + with Not_found -> None); + fold = (fun f acc -> + M.fold + (fun key set acc -> f acc key (_map_set set)) map acc + ); + to_seq; + } + + let get m x = m.get x + + let get_exn m x = + match m.get x with + | None -> raise Not_found + | Some x -> x + + let size m = m.size () + + let to_seq m = m.to_seq + + type 'a key_info = { + eq : 'a equal option; + cmp : 'a ord option; + hash : 'a hash option; + } +end + +(** {2 Query operators} *) + +type safe = Safe +type unsafe = Unsafe +type (_,_) safety = + | Safe : ('a, 'a option) safety + | Unsafe : ('a, 'a) safety + +type 'a search_result = + | SearchContinue + | SearchStop of 'a + +type (_, _) unary = + | Map : ('a -> 'b) -> ('a collection, 'b collection) unary + | GeneralMap : ('a -> 'b) -> ('a, 'b) unary + | Filter : ('a -> bool) -> ('a collection, 'a collection) unary + | Fold : ('b -> 'a -> 'b) * 'b -> ('a collection, 'b) unary + | Reduce : ('c, 'd) safety * ('a -> 'b) * ('a -> 'b -> 'b) * ('b -> 'c) + -> ('a collection, 'd) unary + | Size : ('a collection, int) unary + | Choose : ('a,'b) safety -> ('a collection, 'b) unary + | FilterMap : ('a -> 'b option) -> ('a collection, 'b collection) unary + | FlatMap : ('a -> 'b collection) -> ('a collection, 'b collection) unary + | Take : int -> ('a collection, 'a collection) unary + | TakeWhile : ('a -> bool) -> ('a collection, 'a collection) unary + | Sort : 'a ord -> ('a collection, 'a collection) unary + | Distinct : 'a ord option * 'a equal option * 'a hash option + -> ('a collection, 'a collection) unary + | Search : + < check: ('a -> 'b search_result); + failure : 'b; + > -> ('a collection, 'b) unary + | Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary + | GroupBy : 'b ord * 'a ord * ('a -> 'b) + -> ('a collection, ('b,'a collection) Map.t) unary + | Count : 'a ord -> ('a collection, ('a, int) Map.t) unary + +type ('a,'b,'key,'c) join_descr = { + join_key1 : 'a -> 'key; + join_key2 : 'b -> 'key; + join_merge : 'key -> 'a -> 'b -> 'c; + join_key : 'key Map.key_info; +} + +type ('a,'b) group_join_descr = { + gjoin_proj : 'b -> 'a; + gjoin_key : 'a Map.key_info; +} + +type set_op = + | Union + | Inter + | Diff + +type (_, _, _) binary = + | Join : ('a, 'b, 'key, 'c) join_descr + -> ('a collection, 'b collection, 'c collection) binary + | GroupJoin : ('a, 'b) group_join_descr + -> ('a collection, 'b collection, ('a, 'b collection) Map.t) binary + | Product : ('a collection, 'b collection, ('a*'b) collection) binary + | Append : ('a collection, 'a collection, 'a collection) binary + | SetOp : set_op * 'a ord -> ('a collection, 'a collection, 'a collection) binary + | Inter : 'a ord -> ('a collection, 'a collection, 'a collection) binary + +(* type of queries that return a 'a *) +and 'a t = + | Start : 'a -> 'a t + | Unary : ('a, 'b) unary * 'a t -> 'b t + | Binary : ('a, 'b, 'c) binary * 'a t * 'b t -> 'c t + | QueryMap : ('a -> 'b) * 'a t -> 'b t + | Bind : ('a -> 'b t) * 'a t -> 'b t + +let start x = Start x + +let start_list l = + Start (Coll.of_list l) + +let start_array a = + Start (Coll.of_array a) + +let start_hashtbl h = + Start (Coll.of_seq (CCSequence.of_hashtbl h)) + +let start_seq seq = + Start (Coll.of_seq seq) + +(** {6 Composition} *) + +let apply u q = Unary (u, q) + +let (>>>) = apply + +(** {6 Execution} *) + +let rec _optimize : type a. a t -> a t + = fun q -> match q with + | Start _ -> q + | Unary (u, q) -> + _optimize_unary u (_optimize q) + | Binary (b, q1, q2) -> + _optimize_binary b (_optimize q1) (_optimize q2) + | QueryMap (f, q) -> QueryMap (f, _optimize q) +and _optimize_unary : type a b. (a,b) unary -> a t -> b t + = fun u q -> match u, q with + | Map f, Unary (Map g, q') -> + _optimize_unary (Map (fun x -> f (g x))) q' + | _ -> Unary (u,q) + (* TODO *) +and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t + = fun b q1 q2 -> match b, q1, q2 with + | _ -> Binary (b, q1, q2) (* TODO *) + + +(* apply a unary operator on a collection *) +let _do_unary : type a b. (a,b) unary -> a -> b += fun u c -> match u with + | Map f -> Coll.map f c + | Filter p -> Coll.filter p c + | Fold (f, acc) -> Coll.fold f acc c (* TODO: optimize *) + +(* TODO: join of two collections *) +let _do_join ~join c1 c2 = + assert false + +let _do_product c1 c2 = + let s1 = Coll.to_seq c1 and s2 = Coll.to_seq c2 in + Coll.of_seq (CCSequence.product s1 s2) + +let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c += fun b c1 c2 -> match b with + | Join join -> _do_join ~join c1 c2 + | Product -> _do_product c1 c2 + +let rec _run : type a. opt:bool -> a t -> a + = fun ~opt q -> match q with + | Start c -> c + | Unary (u, q') -> _do_unary u (_run ~opt q') + | Binary (b, q1, q2) -> _do_binary b (_run ~opt q1) (_run ~opt q2) + | QueryMap (f, q') -> f (_run ~opt q') + | Bind (f, q') -> + let x = _run ~opt q' in + let q'' = f x in + let q'' = if opt then _optimize q'' else q'' in + _run ~opt q'' + +let run q = _run ~opt:true (_optimize q) +let run_no_opt q = _run ~opt:false q + +(** {6 Basics on Collections} *) + +let map f q = Unary (Map f, q) + +let filter p q = Unary (Filter p, q) + +let size q = Unary (Size, q) + +let choose q = Unary (Choose Safe, q) + +let choose_exn q = Unary (Choose Unsafe, q) + +let filter_map f q = Unary (FilterMap f, q) + +let flat_map f q = Unary (FlatMap f, q) + +let flat_map_seq f q = + let f' x = Coll.of_seq (f x) in + Unary (FlatMap f', q) + +let take n q = Unary (Take n, q) + +let take_while p q = Unary (TakeWhile p, q) + +let sort ~cmp q = Unary (Sort cmp, q) + +let distinct ?cmp ?eq ?hash () q = + Unary (Distinct (cmp,eq,hash), q) + +let get key q = + Unary (Get (Safe, key), q) + +let get_exn key q = + Unary (Get (Unsafe, key), q) + +let map_to_seq q = + Unary (GeneralMap (fun m -> Coll.of_seq m.Map.to_seq), q) + +let map_to_seq_flatten q = + let f m = m.Map.to_seq + |> CCSequence.flatMap + (fun (k,v) -> Coll.to_seq v |> CCSequence.map (fun v' -> k,v')) + |> Coll.of_seq + in + Unary (GeneralMap f, q) + +let group_by ?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) f q = + Unary (GroupBy (cmp_key,cmp_val,f), q) + +let count ?(cmp=Pervasives.compare) () q = + Unary (Count cmp, q) + +let fold f acc q = + Unary (Fold (f, acc), q) + +let size q = Unary (Size, q) + +let sum q = Unary (Fold ((+), 0), q) + +let reduce start mix stop q = + Unary (Reduce (Safe, start,mix,stop), q) + +let reduce_exn start mix stop q = + Unary (Reduce (Unsafe, start,mix,stop), q) + +let _avg_start x = (x,1) +let _avg_mix x (y,n) = (x+y,n+1) +let _avg_stop (x,n) = x/n + +let _lift_some f x y = match y with + | None -> Some x + | Some y -> Some (f x y) + +let max q = Unary (Reduce (Safe, _id, Pervasives.max, _id), q) +let min q = Unary (Reduce (Safe, _id, Pervasives.min, _id), q) +let average q = Unary (Reduce (Safe, _avg_start, _avg_mix, _avg_stop), q) + +let max_exn q = Unary (Reduce (Unsafe, _id, Pervasives.max, _id), q) +let min_exn q = Unary (Reduce (Unsafe, _id, Pervasives.min, _id), q) +let average_exn q = Unary (Reduce (Unsafe, _avg_start, _avg_mix, _avg_stop), q) + +let for_all p q = + Unary (Search (object + method check x = if p x then SearchContinue else SearchStop false + method failure = true + end), q) + +let exists p q = + Unary (Search (object + method check x = if p x then SearchStop true else SearchContinue + method failure = false + end), q) + +let find p q = + Unary (Search (object + method check x = if p x then SearchStop (Some x) else SearchContinue + method failure = None + end), q) + +let find_map f q = + Unary (Search (object + method check x = match f x with + | Some y -> SearchStop (Some y) + | None -> SearchContinue + method failure = None + end), q) + +(** {6 Binary Operators} *) + +let join ?cmp ?eq ?hash join_key1 join_key2 ~merge q1 q2 = + let j = { + join_key1; + join_key2; + join_merge=merge; + join_key = Map.({ eq; cmp; hash; }); + } in + Binary (Join j, q1, q2) + +let group_join ?cmp ?eq ?hash gjoin_proj q1 q2 = + let j = { + gjoin_proj; + gjoin_key = Map.({ eq; cmp; hash; }); + } in + Binary (GroupJoin j, q1, q2) + +let product q1 q2 = Binary (Product, q1, q2) + +let append q1 q2 = Binary (Append, q1, q2) + +let inter ?(cmp=Pervasives.compare) () q1 q2 = + Binary (SetOp (Inter, cmp), q1, q2) + +let union ?(cmp=Pervasives.compare) () q1 q2 = + Binary (SetOp (Union, cmp), q1, q2) + +let diff ?(cmp=Pervasives.compare) () q1 q2 = + Binary (SetOp (Diff, cmp), q1, q2) + +let fst q = map fst q +let snd q = map snd q + +let flatten_opt q = filter_map _id q + +let opt_get_exn q = + QueryMap ((function + | Some x -> x + | None -> invalid_arg "opt_get_exn"), q) + +(** {6 Monadic stuff} *) + +let return x = Start x + +let bind f q = Bind (f,q) + +let (>>=) x f = Bind (f, x) + +let query_map f q = QueryMap (f, q) + +(** {6 Output containers} *) + +let to_list q = + QueryMap (Coll.to_list, q) + +let to_array q = + QueryMap ((fun c -> Array.of_list (Coll.to_list c)), q) + +let to_seq q = + QueryMap ((fun c -> Coll.to_seq c |> CCSequence.persistent), q) + +let to_hashtbl q = + QueryMap ((fun c -> CCSequence.to_hashtbl (Coll.to_seq c)), q) + +let to_queue q = + QueryMap ((fun c q -> CCSequence.to_queue q (Coll.to_seq c)), q) + +let to_stack q = + QueryMap ((fun c s -> CCSequence.to_stack s (Coll.to_seq c)), q) diff --git a/core/CCLinq.mli b/core/CCLinq.mli new file mode 100644 index 00000000..b9915cc3 --- /dev/null +++ b/core/CCLinq.mli @@ -0,0 +1,268 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 LINQ-like operations on collections} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a hash = 'a -> int + +type 'a collection +(** Abstract type of collections of objects of type 'a. Those cannot + be used directly, they are to be processed using a query (type {!'a t}) + and converted to some list/sequence/array *) + +(** {2 A polymorphic map} *) +module Map : sig + type ('a, 'b) t + + val get : ('a,'b) t -> 'a -> 'b option + + val get_exn : ('a,'b) t -> 'a -> 'b + (** Unsafe version of {!get}. + @raise Not_found if the element is not present *) + + val size : (_,_) t -> int + + val to_seq : ('a, 'b) t -> ('a * 'b) sequence +end + +(** {2 Query operators} *) + +type 'a t +(** Type of a query that returns some value of type 'a *) + +(** {6 Initial values} *) + +val start : 'a -> 'a t +(** Start with a single value *) + +val start_list : 'a list -> 'a collection t +(** Query that just returns the elements of the list *) + +val start_array : 'a array -> 'a collection t + +val start_hashtbl : ('a,'b) Hashtbl.t -> ('a * 'b) collection t + +val start_seq : 'a sequence -> 'a collection t +(** Query that returns the elements of the given sequence. *) + +(** {6 Execution} *) + +val run : 'a t -> 'a +(** Execute the actual query *) + +val run_no_opt : 'a t -> 'a +(** Execute the query, without optimizing it at all *) + +(** {6 Basics on Collections} *) + +val map : ('a -> 'b) -> 'a collection t -> 'b collection t + +val filter : ('a -> bool) -> 'a collection t -> 'a collection t + +val size : _ collection t -> int t + +val choose : 'a collection t -> 'a option t +(** Choose one element (if any) in the collection *) + +val choose_exn : 'a collection t -> 'a t +(** Choose one element or fail. + @raise Invalid_argument if the collection is empty *) + +val filter_map : ('a -> 'b option) -> 'a collection t -> 'b collection t +(** Filter and map elements at once *) + +val flat_map : ('a -> 'b collection) -> 'a collection t -> 'b collection t +(** Monadic "bind", maps each element to a collection + and flatten the result *) + +val flat_map_seq : ('a -> 'b sequence) -> 'a collection t -> 'b collection t +(** Same as {!flat_map} but using sequences *) + +val take : int -> 'a collection t -> 'a collection t +(** take at most [n] elements *) + +val take_while : ('a -> bool) -> 'a collection t -> 'a collection t +(** take elements while they satisfy a predicate *) + +val sort : cmp:'a ord -> 'a collection t -> 'a collection t +(** Sort items by the given comparison function *) + +val distinct : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> + unit -> 'a collection t -> 'a collection t +(** Remove duplicate elements from the input collection. + All elements in the result are distinct. *) + +(** {6 Maps} *) + +val get : 'a -> ('a, 'b) Map.t t -> 'b option t +(** Select a key from a map *) + +val get_exn : 'a -> ('a, 'b) Map.t t -> 'b t +(** Unsafe version of {!get}. + @raise Not_found if the key is not present. *) + +val map_to_seq : ('a,'b) Map.t t -> ('a*'b) collection t +(** View a multimap as a proper collection *) + +val map_to_seq_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t +(** View a multimap as a collection of individual key/value pairs *) + +(** {6 Aggregation} *) + +val group_by : ?cmp_key:'b ord -> ?cmp_val:'a ord -> + ('a -> 'b) -> 'a collection t -> ('b,'a collection) Map.t t +(** [group_by f] takes a collection [c] as input, and returns + a multimap [m] such that for each [x] in [c], + [x] occurs in [m] under the key [f x]. In other words, [f] is used + to obtain a key from [x], and [x] is added to the multimap using this key. *) + +val count : ?cmp:'a ord -> unit -> 'a collection t -> ('a, int) Map.t t +(** [count c] returns a map from elements of [c] to the number + of time those elements occur. *) + +val fold : ('b -> 'a -> 'b) -> 'b -> 'a collection t -> 'b t +(** Fold over the collection *) + +val size : _ collection t -> int t +(** Count how many elements the collection contains *) + +val reduce : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> 'a collection t -> 'c option t +(** [reduce start mix stop q] uses [start] on the first element of [q], + and combine the result with following elements using [mix]. The final + value is transformed using [stop]. This returns [None] if the collection + is empty *) + +val reduce_exn : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> + 'a collection t -> 'c t +(** Same as {!reduce} but fails on empty collections. + @raise Invalid_argument if the collection is empty *) + +val sum : int collection t -> int t + +val average : int collection t -> int option t +val max : int collection t -> int option t +val min : int collection t -> int option t + +val average_exn : int collection t -> int t +val max_exn : int collection t -> int t +val min_exn : int collection t -> int t + +val for_all : ('a -> bool) -> 'a collection t -> bool t +val exists : ('a -> bool) -> 'a collection t -> bool t +val find : ('a -> bool) -> 'a collection t -> 'a option t +val find_map : ('a -> 'b option) -> 'a collection t -> 'b option t + +(** {6 Binary Operators} *) + +val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a -> 'b -> 'c) -> + 'a collection t -> 'b collection t -> 'c collection t +(** [join key1 key2 ~merge] is a binary operation + that takes two collections [a] and [b], projects their + elements resp. with [key1] and [key2], and combine + values [(x,y)] from [(a,b)] with the same [key] + using [merge]. *) + +val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> + ('b -> 'a) -> 'a collection t -> 'b collection t -> + ('a, 'b collection) Map.t t +(** [group_join key2] associates to every element [x] of + the first collection, all the elements [y] of the second + collection such that [eq x (key y)] *) + +val product : 'a collection t -> 'b collection t -> ('a * 'b) collection t +(** Cartesian product *) + +val append : 'a collection t -> 'a collection t -> 'a collection t +(** Append two collections together *) + +val inter : ?cmp:'a ord -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Intersection of two collections. Each element will occur at most once + in the result *) + +val union : ?cmp:'a ord -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Union of two collections. Each element will occur at most once + in the result *) + +val diff : ?cmp:'a ord -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Set difference *) + +(** {6 Tuple and Options} *) + +(** Specialized projection operators *) + +val fst : ('a * 'b) collection t -> 'a collection t +val snd : ('a * 'b) collection t -> 'b collection t + +val flatten_opt : 'a option collection t -> 'a collection t +(** Flatten the collection by removing options *) + +val opt_get_exn : 'a option t -> 'a t +(** unwrap an option type. + @raise Invalid_argument if the option value is [None] *) + +(** {6 Monad} + +Careful, those operators do not allow any optimization before running the +query, they might therefore be pretty slow. *) + +val bind : ('a -> 'b t) -> 'a t -> 'b t +(** Use the result of a query to build another query and imediately run it. *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** Infix version of {!bind} *) + +val return : 'a -> 'a t +(** Synonym to {!start} *) + +val query_map : ('a -> 'b) -> 'a t -> 'b t +(** Map results directly, rather than collections of elements *) + +(** {6 Output Containers} *) + +val to_list : 'a collection t -> 'a list t +(** Build a list of results *) + +val to_array : 'a collection t -> 'a array t +(** Build an array of results *) + +val to_seq : 'a collection t -> 'a sequence t +(** Build a (re-usable) sequence of elements, which can then be + converted into other structures *) + +val to_hashtbl : ('a * 'b) collection t -> ('a, 'b) Hashtbl.t t +(** Build a hashtable from the collection *) + +val to_queue : 'a collection t -> ('a Queue.t -> unit) t + +val to_stack : 'a collection t -> ('a Stack.t -> unit) t From 0d39bfdbf4230c4989f68ef03d2b7a58a662942e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Jun 2014 23:55:26 +0200 Subject: [PATCH 24/52] more optimizations, and a big chunk of query evaluation in CCLinq --- _oasis | 4 +- core/CCLinq.ml | 377 +++++++++++++++++++++++++++++++++++++----------- core/CCLinq.mli | 14 +- 3 files changed, 303 insertions(+), 92 deletions(-) diff --git a/_oasis b/_oasis index 1f182e98..c1421116 100644 --- a/_oasis +++ b/_oasis @@ -41,7 +41,7 @@ Library "containers" Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap, CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash, - CCKList, CCInt, CCBool, CCArray, CCBatch + CCKList, CCInt, CCBool, CCArray, CCBatch, CCLinq FindlibName: containers Library "containers_string" @@ -60,7 +60,7 @@ Library "containers_misc" Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, ActionMan, QCheck, BencodeOnDisk, TTree, - HGraph, Automaton, Conv, Bidir, Iteratee, Linq, + HGraph, Automaton, Conv, Bidir, Iteratee, Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers FindlibName: misc diff --git a/core/CCLinq.ml b/core/CCLinq.ml index a2cda146..9728d414 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -30,39 +30,61 @@ type 'a sequence = ('a -> unit) -> unit type 'a equal = 'a -> 'a -> bool type 'a ord = 'a -> 'a -> int type 'a hash = 'a -> int -type 'a klist = unit -> [ `Nil | `Cons of 'a * 'a klist ] let _id x = x +type 'a search_result = + | SearchContinue + | SearchStop of 'a + module Coll = struct type 'a t = - | Seq of 'a sequence - | List of 'a list + | Seq : 'a sequence -> 'a t + | List : 'a list -> 'a t + | Set : (module CCSequence.Set.S + with type elt = 'a and type t = 'b) * 'b -> 'a t let of_seq s = Seq s let of_list l = List l let of_array a = Seq (CCSequence.of_array a) - let empty = List [] - let to_seq = function + let set_of_seq (type elt) ?(cmp=Pervasives.compare) seq = + let module S = CCSequence.Set.Make(struct + type t = elt + let compare = cmp + end) in + let set = S.of_seq seq in + Set ((module S), set) + + let to_seq (type elt) = function | Seq s -> s | List l -> (fun k -> List.iter k l) + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.to_seq set - let to_list = function + let to_list (type elt) = function | Seq s -> CCSequence.to_list s | List l -> l + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.elements set let _fmap ~lst ~seq c = match c with | List l -> List (lst l) | Seq s -> Seq (seq s) + | Set _ -> + List (lst (to_list c)) - let _fold ~lst ~seq acc c = match c with - | List l -> List.fold_left lst acc l - | Seq s -> CCSequence.fold seq acc s - - let iter f c = match c with - | List l -> List.iter f l - | Seq s -> s f + let fold (type elt) f acc c = match c with + | List l -> List.fold_left f acc l + | Seq s -> CCSequence.fold f acc s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.fold (fun x acc -> f acc x) set acc let map f c = _fmap ~lst:(List.map f) ~seq:(CCSequence.map f) c @@ -72,16 +94,79 @@ module Coll = struct let flat_map f c = let c' = to_seq c in - Seq (CCSequence.flatMap f c') + Seq (CCSequence.flatMap (fun x -> to_seq (f x)) c') let filter_map f c = _fmap ~lst:(CCList.filter_map f) ~seq:(CCSequence.fmap f) c - let size = function + let size (type elt) = function | List l -> List.length l | Seq s -> CCSequence.length s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.cardinal set - let fold f acc c = _fold ~lst:f ~seq:f acc c + let choose (type elt) = function + | List [] -> None + | List (x::_) -> Some x + | Seq s -> + begin match CCSequence.take 1 s |> CCSequence.to_list with + | [x] -> Some x + | _ -> None + end + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + try Some (S.choose set) with Not_found -> None + + let take n c = + _fmap ~lst:(CCList.take n) ~seq:(CCSequence.take n) c + + exception MySurpriseExit + + let _seq_take_while p seq k = + try + seq (fun x -> if not (p x) then k x else raise MySurpriseExit) + with MySurpriseExit -> () + + let take_while p c = + to_seq c |> _seq_take_while p |> of_seq + + let distinct ~cmp c = set_of_seq ~cmp (to_seq c) + + let sort cmp c = match c with + | List l -> List (List.sort cmp l) + | _ -> + to_seq c |> set_of_seq ~cmp + + let search obj c = + let _search_seq obj seq = + let ret = ref None in + begin try + seq (fun x -> match obj#check x with + | SearchContinue -> () + | SearchStop y -> ret := Some y; raise MySurpriseExit); + with MySurpriseExit -> () + end; + match !ret with + | None -> obj#failure + | Some x -> x + in + to_seq c |> _search_seq obj + + let contains (type elt) ~eq x c = match c with + | List l -> List.exists (eq x) l + | Seq s -> CCSequence.exists (eq x) s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + (* XXX: here we don't use the equality relation *) + try + let y = S.find x set in + assert (eq x y); + true + with Not_found -> false end type 'a collection = 'a Coll.t @@ -95,7 +180,14 @@ module Map = struct to_seq : ('a * 'b) sequence; } - let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) seq = + type ('a, 'b) build = { + mutable cur : ('a, 'b) t; + add : 'a -> 'b -> unit; + update : 'a -> ('b option -> 'b option) -> unit; + } + + (* careful to use this map linearly *) + let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) () = let module H = Hashtbl.Make(struct type t = key let equal = eq @@ -103,57 +195,99 @@ module Map = struct end) in (* build table *) let tbl = H.create 32 in - seq - (fun (k,v) -> - let l = try H.find tbl k with Not_found -> [] in - H.replace tbl k (v::l) - ); - (* provide the multimap interface *) - let to_seq cont = H.iter (fun k v -> cont (k, Coll.of_list v)) tbl - in - { + let cur = { is_empty = (fun () -> H.length tbl = 0); size = (fun () -> H.length tbl); get = (fun k -> - try Some (Coll.of_list (H.find tbl k)) + try Some (H.find tbl k) with Not_found -> None); - fold = (fun f acc -> H.fold (fun k v acc -> f acc k (Coll.of_list v)) tbl acc); - to_seq; + fold = (fun f acc -> H.fold (fun k v acc -> f acc k v) tbl acc); + to_seq = (fun k -> H.iter (fun key v -> k (key,v)) tbl); + } in + { cur; + add = (fun k v -> H.replace tbl k v); + update = (fun k f -> + match (try f (Some (H.find tbl k)) with Not_found -> f None) with + | None -> H.remove tbl k + | Some v' -> H.replace tbl k v'); } - let make_map (type key) (type value) - ?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) seq = + let make_cmp (type key) ?(cmp=Pervasives.compare) () = let module M = CCSequence.Map.Make(struct type t = key - let compare = cmp_key + let compare = cmp end) in - let module S = CCSequence.Set.Make(struct - type t = value - let compare = cmp_val - end) in - let _map_set set = Coll.of_seq (S.to_seq set) in - let map = CCSequence.fold - (fun map (k,v) -> - let set = try M.find k map with Not_found -> S.empty in - M.add k (S.add v set) map - ) M.empty seq - in - let to_seq = - M.to_seq map |> CCSequence.map (fun (k,v) -> k, _map_set v) - in - { - is_empty = (fun () -> M.is_empty map); - size = (fun () -> M.cardinal map); + let map = ref M.empty in + let cur = { + is_empty = (fun () -> M.is_empty !map); + size = (fun () -> M.cardinal !map); get = (fun k -> - try Some (_map_set (M.find k map)) + try Some (M.find k !map) with Not_found -> None); fold = (fun f acc -> M.fold - (fun key set acc -> f acc key (_map_set set)) map acc + (fun key set acc -> f acc key set) !map acc ); - to_seq; + to_seq = M.to_seq !map; + } in + { + cur; + add = (fun k v -> map := M.add k v !map); + update = (fun k f -> + match (try f (Some (M.find k !map)) with Not_found -> f None) with + | None -> map := M.remove k !map + | Some v' -> map := M.add k v' !map); } + type 'a key_info = { + eq : 'a equal option; + cmp : 'a ord option; + hash : 'a hash option; + } + + let default_key_info = { + eq=None; cmp=None; hash=None; + } + + let make_info info = + match info with + | { hash=None; _} + | { eq=None; _} -> + begin match info.cmp with + | None -> make_cmp () + | Some cmp -> make_cmp ~cmp () + end + | {eq=Some eq; hash=Some hash; _} -> make_hash ~eq ~hash () + + let multiset build seq = + seq (fun (k,v) -> + build.update k (function + | None -> Some [v] + | Some l -> Some (v::l))); + { is_empty = build.cur.is_empty; + size = build.cur.size; + get = (fun k -> match build.cur.get k with + | None -> None + | Some v -> Some (Coll.of_list v)); + fold = (fun f acc -> + build.cur.fold (fun acc k v -> f acc k (Coll.of_list v)) acc); + to_seq = build.cur.to_seq + |> CCSequence.map (fun (k,v) -> k,Coll.of_list v); + } + + let multimap_cmp ?cmp seq = + let build = make_cmp ?cmp () in + multiset build seq + + let count build seq = + seq (fun x -> + let n = match build.cur.get x with + | None -> 1 + | Some n -> n+1 + in + build.add x n); + build.cur + let get m x = m.get x let get_exn m x = @@ -164,26 +298,14 @@ module Map = struct let size m = m.size () let to_seq m = m.to_seq - - type 'a key_info = { - eq : 'a equal option; - cmp : 'a ord option; - hash : 'a hash option; - } end (** {2 Query operators} *) -type safe = Safe -type unsafe = Unsafe type (_,_) safety = | Safe : ('a, 'a option) safety | Unsafe : ('a, 'a) safety -type 'a search_result = - | SearchContinue - | SearchStop of 'a - type (_, _) unary = | Map : ('a -> 'b) -> ('a collection, 'b collection) unary | GeneralMap : ('a -> 'b) -> ('a, 'b) unary @@ -198,21 +320,21 @@ type (_, _) unary = | Take : int -> ('a collection, 'a collection) unary | TakeWhile : ('a -> bool) -> ('a collection, 'a collection) unary | Sort : 'a ord -> ('a collection, 'a collection) unary - | Distinct : 'a ord option * 'a equal option * 'a hash option - -> ('a collection, 'a collection) unary + | Distinct : 'a ord -> ('a collection, 'a collection) unary | Search : < check: ('a -> 'b search_result); failure : 'b; > -> ('a collection, 'b) unary + | Contains : 'a equal * 'a -> ('a collection, bool) unary | Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary - | GroupBy : 'b ord * 'a ord * ('a -> 'b) + | GroupBy : 'b ord * ('a -> 'b) -> ('a collection, ('b,'a collection) Map.t) unary | Count : 'a ord -> ('a collection, ('a, int) Map.t) unary type ('a,'b,'key,'c) join_descr = { join_key1 : 'a -> 'key; join_key2 : 'b -> 'key; - join_merge : 'key -> 'a -> 'b -> 'c; + join_merge : 'key -> 'a -> 'b -> 'c option; join_key : 'key Map.key_info; } @@ -234,7 +356,6 @@ type (_, _, _) binary = | Product : ('a collection, 'b collection, ('a*'b) collection) binary | Append : ('a collection, 'a collection, 'a collection) binary | SetOp : set_op * 'a ord -> ('a collection, 'a collection, 'a collection) binary - | Inter : 'a ord -> ('a collection, 'a collection, 'a collection) binary (* type of queries that return a 'a *) and 'a t = @@ -258,12 +379,6 @@ let start_hashtbl h = let start_seq seq = Start (Coll.of_seq seq) -(** {6 Composition} *) - -let apply u q = Unary (u, q) - -let (>>>) = apply - (** {6 Execution} *) let rec _optimize : type a. a t -> a t @@ -274,26 +389,98 @@ let rec _optimize : type a. a t -> a t | Binary (b, q1, q2) -> _optimize_binary b (_optimize q1) (_optimize q2) | QueryMap (f, q) -> QueryMap (f, _optimize q) + | Bind _ -> q (* cannot optimize before execution *) and _optimize_unary : type a b. (a,b) unary -> a t -> b t = fun u q -> match u, q with | Map f, Unary (Map g, q') -> _optimize_unary (Map (fun x -> f (g x))) q' + | Filter p, Unary (Map f, cont) -> + _optimize_unary + (FilterMap (fun x -> let y = f x in if p y then Some y else None)) + cont + | Map f, Unary (Filter p, cont) -> + _optimize_unary + (FilterMap (fun x -> if p x then Some (f x) else None)) + cont + | Map f, Binary (Append, q1, q2) -> + _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) + | Filter p, Binary (Append, q1, q2) -> + _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) + | Fold (f,acc), Unary (Map f', cont) -> + _optimize_unary + (Fold ((fun acc x -> f acc (f' x)), acc)) + cont + | Reduce (safety, start, mix, stop), Unary (Map f, cont) -> + _optimize_unary + (Reduce (safety, + (fun x -> start (f x)), + (fun x acc -> mix (f x) acc), + stop)) + cont + | Size, Unary (Map _, cont) -> + _optimize_unary Size cont (* ignore the map! *) + | Size, Unary (Sort _, cont) -> + _optimize_unary Size cont | _ -> Unary (u,q) - (* TODO *) + (* TODO: other cases *) and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t = fun b q1 q2 -> match b, q1, q2 with - | _ -> Binary (b, q1, q2) (* TODO *) - + | _ -> Binary (b, q1, q2) (* TODO *) (* apply a unary operator on a collection *) let _do_unary : type a b. (a,b) unary -> a -> b = fun u c -> match u with | Map f -> Coll.map f c + | GeneralMap f -> f c | Filter p -> Coll.filter p c - | Fold (f, acc) -> Coll.fold f acc c (* TODO: optimize *) + | Fold (f, acc) -> Coll.fold f acc c + | Reduce (safety, start, mix, stop) -> + let acc = Coll.to_seq c + |> CCSequence.fold + (fun acc x -> match acc with + | None -> Some (start x) + | Some acc -> Some (mix x acc) + ) None + in + begin match acc, safety with + | Some x, Safe -> Some (stop x) + | None, Safe -> None + | Some x, Unsafe -> stop x + | None, Unsafe -> invalid_arg "reduce: empty collection" + end + | Size -> Coll.size c + | Choose Safe -> Coll.choose c + | Choose Unsafe -> + begin match Coll.choose c with + | Some x -> x + | None -> invalid_arg "choose: empty collection" + end + | FilterMap f -> Coll.filter_map f c + | FlatMap f -> Coll.flat_map f c + | Take n -> Coll.take n c + | TakeWhile p -> Coll.take_while p c + | Sort cmp -> Coll.sort cmp c + | Distinct cmp -> Coll.distinct ~cmp c + | Search obj -> Coll.search obj c + | Get (Safe, k) -> Map.get c k + | Get (Unsafe, k) -> Map.get_exn c k + | GroupBy (cmp,f) -> + Coll.to_seq c + |> CCSequence.map (fun x -> f x, x) + |> Map.multimap_cmp ~cmp + | Contains (eq, x) -> Coll.contains ~eq x c + | Count cmp -> + Coll.to_seq c + |> Map.count (Map.make_cmp ~cmp ()) + (* TODO: join of two collections *) let _do_join ~join c1 c2 = + let _build = Map.make_info join.join_key in + assert false + +(* TODO *) +let _do_group_join ~gjoin c1 c2 = assert false let _do_product c1 c2 = @@ -303,7 +490,20 @@ let _do_product c1 c2 = let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c = fun b c1 c2 -> match b with | Join join -> _do_join ~join c1 c2 + | GroupJoin gjoin -> _do_group_join ~gjoin c1 c2 | Product -> _do_product c1 c2 + | Append -> + Coll.of_seq (CCSequence.append (Coll.to_seq c1) (Coll.to_seq c2)) + | SetOp (Inter,cmp) -> + (* use a join *) + _do_join ~join:{ + join_key1=_id; + join_key2=_id; + join_merge=(fun _ a b -> Some a); + join_key=Map.({default_key_info with cmp=Some cmp; }) + } c1 c2 + | SetOp (Union,cmp) -> failwith "union: not implemented" (* TODO *) + | SetOp (Diff,cmp) -> failwith "diff: not implemented" (* TODO *) let rec _run : type a. opt:bool -> a t -> a = fun ~opt q -> match q with @@ -326,8 +526,6 @@ let map f q = Unary (Map f, q) let filter p q = Unary (Filter p, q) -let size q = Unary (Size, q) - let choose q = Unary (Choose Safe, q) let choose_exn q = Unary (Choose Unsafe, q) @@ -346,8 +544,8 @@ let take_while p q = Unary (TakeWhile p, q) let sort ~cmp q = Unary (Sort cmp, q) -let distinct ?cmp ?eq ?hash () q = - Unary (Distinct (cmp,eq,hash), q) +let distinct ?(cmp=Pervasives.compare) () q = + Unary (Distinct cmp, q) let get key q = Unary (Get (Safe, key), q) @@ -366,8 +564,8 @@ let map_to_seq_flatten q = in Unary (GeneralMap f, q) -let group_by ?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) f q = - Unary (GroupBy (cmp_key,cmp_val,f), q) +let group_by ?(cmp=Pervasives.compare) f q = + Unary (GroupBy (cmp,f), q) let count ?(cmp=Pervasives.compare) () q = Unary (Count cmp, q) @@ -401,6 +599,15 @@ let max_exn q = Unary (Reduce (Unsafe, _id, Pervasives.max, _id), q) let min_exn q = Unary (Reduce (Unsafe, _id, Pervasives.min, _id), q) let average_exn q = Unary (Reduce (Unsafe, _avg_start, _avg_mix, _avg_stop), q) +let is_empty q = + Unary (Search (object + method check _ = SearchStop false (* stop in case there is an element *) + method failure = true + end), q) + +let contains ?(eq=(=)) x q = + Unary (Contains (eq, x), q) + let for_all p q = Unary (Search (object method check x = if p x then SearchContinue else SearchStop false diff --git a/core/CCLinq.mli b/core/CCLinq.mli index b9915cc3..885c8840 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -113,8 +113,7 @@ val take_while : ('a -> bool) -> 'a collection t -> 'a collection t val sort : cmp:'a ord -> 'a collection t -> 'a collection t (** Sort items by the given comparison function *) -val distinct : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> - unit -> 'a collection t -> 'a collection t +val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t (** Remove duplicate elements from the input collection. All elements in the result are distinct. *) @@ -135,7 +134,7 @@ val map_to_seq_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t (** {6 Aggregation} *) -val group_by : ?cmp_key:'b ord -> ?cmp_val:'a ord -> +val group_by : ?cmp:'b ord -> ('a -> 'b) -> 'a collection t -> ('b,'a collection) Map.t t (** [group_by f] takes a collection [c] as input, and returns a multimap [m] such that for each [x] in [c], @@ -163,8 +162,12 @@ val reduce_exn : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> (** Same as {!reduce} but fails on empty collections. @raise Invalid_argument if the collection is empty *) +val is_empty : 'a collection t -> bool t + val sum : int collection t -> int t +val contains : ?eq:'a equal -> 'a -> 'a collection t -> bool t + val average : int collection t -> int option t val max : int collection t -> int option t val min : int collection t -> int option t @@ -182,13 +185,14 @@ val find_map : ('a -> 'b option) -> 'a collection t -> 'b option t val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash -> ('a -> 'key) -> ('b -> 'key) -> - merge:('key -> 'a -> 'b -> 'c) -> + merge:('key -> 'a -> 'b -> 'c option) -> 'a collection t -> 'b collection t -> 'c collection t (** [join key1 key2 ~merge] is a binary operation that takes two collections [a] and [b], projects their elements resp. with [key1] and [key2], and combine values [(x,y)] from [(a,b)] with the same [key] - using [merge]. *) + using [merge]. If [merge] returns [None], the combination + of values is discarded. *) val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> ('b -> 'a) -> 'a collection t -> 'b collection t -> From 6ec499799c6d6732c771e8c821a5da4105b9b3b6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 00:21:53 +0200 Subject: [PATCH 25/52] a few helpers + 1 example --- core/CCLinq.ml | 31 ++++++++++++++++++++++--------- core/CCLinq.mli | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index 9728d414..c7803f00 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -137,6 +137,7 @@ module Coll = struct let sort cmp c = match c with | List l -> List (List.sort cmp l) + | Seq s -> List (List.sort cmp (CCSequence.to_rev_list s)) | _ -> to_seq c |> set_of_seq ~cmp @@ -228,7 +229,7 @@ module Map = struct M.fold (fun key set acc -> f acc key set) !map acc ); - to_seq = M.to_seq !map; + to_seq = (fun k -> M.to_seq !map k); } in { cur; @@ -281,11 +282,10 @@ module Map = struct let count build seq = seq (fun x -> - let n = match build.cur.get x with - | None -> 1 - | Some n -> n+1 - in - build.add x n); + build.update x + (function + | None -> Some 1 + | Some n -> Some (n+1))); build.cur let get m x = m.get x @@ -298,6 +298,8 @@ module Map = struct let size m = m.size () let to_seq m = m.to_seq + + let to_list m = m.to_seq |> CCSequence.to_rev_list end (** {2 Query operators} *) @@ -538,11 +540,15 @@ let flat_map_seq f q = let f' x = Coll.of_seq (f x) in Unary (FlatMap f', q) +let flat_map_list f q = + let f' x = Coll.of_list (f x) in + Unary (FlatMap f', q) + let take n q = Unary (Take n, q) let take_while p q = Unary (TakeWhile p, q) -let sort ~cmp q = Unary (Sort cmp, q) +let sort ?(cmp=Pervasives.compare) () q = Unary (Sort cmp, q) let distinct ?(cmp=Pervasives.compare) () q = Unary (Distinct cmp, q) @@ -553,10 +559,10 @@ let get key q = let get_exn key q = Unary (Get (Unsafe, key), q) -let map_to_seq q = +let map_iter q = Unary (GeneralMap (fun m -> Coll.of_seq m.Map.to_seq), q) -let map_to_seq_flatten q = +let map_iter_flatten q = let f m = m.Map.to_seq |> CCSequence.flatMap (fun (k,v) -> Coll.to_seq v |> CCSequence.map (fun v' -> k,v')) @@ -564,6 +570,9 @@ let map_to_seq_flatten q = in Unary (GeneralMap f, q) +let map_to_list q = + Unary (GeneralMap Map.to_list, q) + let group_by ?(cmp=Pervasives.compare) f q = Unary (GroupBy (cmp,f), q) @@ -704,3 +713,7 @@ let to_queue q = let to_stack q = QueryMap ((fun c s -> CCSequence.to_stack s (Coll.to_seq c)), q) + +(** {6 Misc} *) + +let run_list q = run (q |> to_list) diff --git a/core/CCLinq.mli b/core/CCLinq.mli index 885c8840..fec9860c 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -24,7 +24,25 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 LINQ-like operations on collections} *) +(** {1 LINQ-like operations on collections} + +The purpose it to provide powerful combinators to express iteration, +transformation and combination of collections of items. + +{[ + +CCLinq.( + start_list [1;2;3] + |> flat_map_list (fun x -> CCList.(x -- (x+10))) + |> sort () + |> count () + |> map_to_list |> run +);; +- : (int * int) list = [(13, 1); (12, 2); (11, 3); (10, 3); (9, 3); + (8, 3); (7, 3); (6, 3); (5, 3); (4, 3); (3, 3); (2, 2); (1, 1)] +]} + +*) type 'a sequence = ('a -> unit) -> unit type 'a equal = 'a -> 'a -> bool @@ -49,6 +67,8 @@ module Map : sig val size : (_,_) t -> int val to_seq : ('a, 'b) t -> ('a * 'b) sequence + + val to_list : ('a, 'b) t -> ('a * 'b) list end (** {2 Query operators} *) @@ -79,6 +99,9 @@ val run : 'a t -> 'a val run_no_opt : 'a t -> 'a (** Execute the query, without optimizing it at all *) +val run_list : 'a collection t -> 'a list +(** Shortcut to obtain a list *) + (** {6 Basics on Collections} *) val map : ('a -> 'b) -> 'a collection t -> 'b collection t @@ -104,13 +127,15 @@ val flat_map : ('a -> 'b collection) -> 'a collection t -> 'b collection t val flat_map_seq : ('a -> 'b sequence) -> 'a collection t -> 'b collection t (** Same as {!flat_map} but using sequences *) +val flat_map_list : ('a -> 'b list) -> 'a collection t -> 'b collection t + val take : int -> 'a collection t -> 'a collection t (** take at most [n] elements *) val take_while : ('a -> bool) -> 'a collection t -> 'a collection t (** take elements while they satisfy a predicate *) -val sort : cmp:'a ord -> 'a collection t -> 'a collection t +val sort : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t (** Sort items by the given comparison function *) val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t @@ -126,12 +151,14 @@ val get_exn : 'a -> ('a, 'b) Map.t t -> 'b t (** Unsafe version of {!get}. @raise Not_found if the key is not present. *) -val map_to_seq : ('a,'b) Map.t t -> ('a*'b) collection t +val map_iter : ('a,'b) Map.t t -> ('a*'b) collection t (** View a multimap as a proper collection *) -val map_to_seq_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t +val map_iter_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t (** View a multimap as a collection of individual key/value pairs *) +val map_to_list : ('a,'b) Map.t t -> ('a*'b) list t + (** {6 Aggregation} *) val group_by : ?cmp:'b ord -> From 86cd5c0e8df60caeb3793d06471d47f022b53970 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 00:54:51 +0200 Subject: [PATCH 26/52] group_by now uses lists; a few more utils --- core/CCLinq.ml | 26 +++++++++++++------------- core/CCLinq.mli | 12 +++++++++++- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index c7803f00..9cf3a918 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -260,25 +260,16 @@ module Map = struct end | {eq=Some eq; hash=Some hash; _} -> make_hash ~eq ~hash () - let multiset build seq = + let multimap build seq = seq (fun (k,v) -> build.update k (function | None -> Some [v] | Some l -> Some (v::l))); - { is_empty = build.cur.is_empty; - size = build.cur.size; - get = (fun k -> match build.cur.get k with - | None -> None - | Some v -> Some (Coll.of_list v)); - fold = (fun f acc -> - build.cur.fold (fun acc k v -> f acc k (Coll.of_list v)) acc); - to_seq = build.cur.to_seq - |> CCSequence.map (fun (k,v) -> k,Coll.of_list v); - } + build.cur let multimap_cmp ?cmp seq = let build = make_cmp ?cmp () in - multiset build seq + multimap build seq let count build seq = seq (fun x -> @@ -330,7 +321,7 @@ type (_, _) unary = | Contains : 'a equal * 'a -> ('a collection, bool) unary | Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary | GroupBy : 'b ord * ('a -> 'b) - -> ('a collection, ('b,'a collection) Map.t) unary + -> ('a collection, ('b,'a list) Map.t) unary | Count : 'a ord -> ('a collection, ('a, int) Map.t) unary type ('a,'b,'key,'c) join_descr = { @@ -576,9 +567,15 @@ let map_to_list q = let group_by ?(cmp=Pervasives.compare) f q = Unary (GroupBy (cmp,f), q) +let group_by' ?cmp f q = + map_iter (group_by ?cmp f q) + let count ?(cmp=Pervasives.compare) () q = Unary (Count cmp, q) +let count' ?cmp () q = + map_iter (count ?cmp () q) + let fold f acc q = Unary (Fold (f, acc), q) @@ -677,6 +674,9 @@ let diff ?(cmp=Pervasives.compare) () q1 q2 = let fst q = map fst q let snd q = map snd q +let map1 f q = map (fun (x,y) -> f x, y) q +let map2 f q = map (fun (x,y) -> x, f y) q + let flatten_opt q = filter_map _id q let opt_get_exn q = diff --git a/core/CCLinq.mli b/core/CCLinq.mli index fec9860c..96d1a939 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -162,16 +162,21 @@ val map_to_list : ('a,'b) Map.t t -> ('a*'b) list t (** {6 Aggregation} *) val group_by : ?cmp:'b ord -> - ('a -> 'b) -> 'a collection t -> ('b,'a collection) Map.t t + ('a -> 'b) -> 'a collection t -> ('b,'a list) Map.t t (** [group_by f] takes a collection [c] as input, and returns a multimap [m] such that for each [x] in [c], [x] occurs in [m] under the key [f x]. In other words, [f] is used to obtain a key from [x], and [x] is added to the multimap using this key. *) +val group_by' : ?cmp:'b ord -> + ('a -> 'b) -> 'a collection t -> ('b * 'a list) collection t + val count : ?cmp:'a ord -> unit -> 'a collection t -> ('a, int) Map.t t (** [count c] returns a map from elements of [c] to the number of time those elements occur. *) +val count' : ?cmp:'a ord -> unit -> 'a collection t -> ('a * int) collection t + val fold : ('b -> 'a -> 'b) -> 'b -> 'a collection t -> 'b t (** Fold over the collection *) @@ -253,8 +258,13 @@ val diff : ?cmp:'a ord -> unit -> (** Specialized projection operators *) val fst : ('a * 'b) collection t -> 'a collection t + val snd : ('a * 'b) collection t -> 'b collection t +val map1 : ('a -> 'b) -> ('a * 'c) collection t -> ('b * 'c) collection t + +val map2 : ('a -> 'b) -> ('c * 'a) collection t -> ('c * 'b) collection t + val flatten_opt : 'a option collection t -> 'a collection t (** Flatten the collection by removing options *) From e9e861479c1f5442c08b7acc1a5ae3ce34b6d9d2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 15:08:27 +0200 Subject: [PATCH 27/52] basic combinators for ordering (including lexico combination) --- _oasis | 2 +- core/CCOrd.ml | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++ core/CCOrd.mli | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 core/CCOrd.ml create mode 100644 core/CCOrd.mli diff --git a/_oasis b/_oasis index c1421116..7eafd834 100644 --- a/_oasis +++ b/_oasis @@ -41,7 +41,7 @@ Library "containers" Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap, CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash, - CCKList, CCInt, CCBool, CCArray, CCBatch, CCLinq + CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, CCLinq FindlibName: containers Library "containers_string" diff --git a/core/CCOrd.ml b/core/CCOrd.ml new file mode 100644 index 00000000..30fa2598 --- /dev/null +++ b/core/CCOrd.ml @@ -0,0 +1,55 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Comparisons} *) + +type 'a t = 'a -> 'a -> int +(** Comparison (total ordering) between two elements, that returns an int *) + +let compare = Pervasives.compare + +let int_ (x:int) y = Pervasives.compare x y +let string_ (x:string) y = Pervasives.compare x y +let bool_ (x:bool) y = Pervasives.compare x y +let float_ (x:float) y = Pervasives.compare x y + +(** {2 Lexicographic Combination} *) + +let () c (ord,x,y) = + if c = 0 + then ord x y + else c + +let rec list_ ord l1 l2 = match l1, l2 with + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | x1::l1', x2::l2' -> + let c = ord x1 x2 in + if c = 0 + then list_ ord l1' l2' + else c + diff --git a/core/CCOrd.mli b/core/CCOrd.mli new file mode 100644 index 00000000..11b22398 --- /dev/null +++ b/core/CCOrd.mli @@ -0,0 +1,54 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Comparisons} *) + +type 'a t = 'a -> 'a -> int +(** Comparison (total ordering) between two elements, that returns an int *) + +val compare : 'a t +(** Polymorphic "magic" comparison *) + +val int_ : int t +val string_ : string t +val bool_ : bool t +val float_ : float t + +(** {2 Lexicographic Combination} *) + +val () : int -> ('a t * 'a * 'a) -> int +(** [c1 @@? (ord, x, y)] returns the same as [c1] if [c1] is not [0]; + otherwise it uses [ord] to compare the two values [x] and [y], + of type ['a]. + + Example: + {[CCInt.compare 1 3 + (String.compare, "a", "b") + (CCBool.compare, true, false)]} +*) + +val list_ : 'a t -> 'a list t +(** Lexicographic combination on lists *) From 933638244d2d7c37b30099435394de84315d8b0f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 15:56:28 +0200 Subject: [PATCH 28/52] more combinators for comparisons --- core/CCOrd.ml | 29 +++++++++++++++++++++++++++++ core/CCOrd.mli | 6 ++++++ 2 files changed, 35 insertions(+) diff --git a/core/CCOrd.ml b/core/CCOrd.ml index 30fa2598..fe393055 100644 --- a/core/CCOrd.ml +++ b/core/CCOrd.ml @@ -43,6 +43,22 @@ let () c (ord,x,y) = then ord x y else c +let pair o_x o_y (x1,y1) (x2,y2) = + let c = o_x x1 x2 in + if c = 0 + then o_y y1 y2 + else c + +let triple o_x o_y o_z (x1,y1,z1) (x2,y2,z2) = + let c = o_x x1 x2 in + if c = 0 + then + let c' = o_y y1 y2 in + if c' = 0 + then o_z z1 z2 + else c' + else c + let rec list_ ord l1 l2 = match l1, l2 with | [], [] -> 0 | [], _ -> -1 @@ -53,3 +69,16 @@ let rec list_ ord l1 l2 = match l1, l2 with then list_ ord l1' l2' else c +let array_ ord a1 a2 = + let rec aux i = + if i = Array.length a1 + then if Array.length a1 = Array.length a2 then 0 + else -1 + else if i = Array.length a2 + then 1 + else + let c = ord a1.(i) a2.(i) in + if c = 0 + then aux (i+1) else c + in + aux 0 diff --git a/core/CCOrd.mli b/core/CCOrd.mli index 11b22398..80a38d6d 100644 --- a/core/CCOrd.mli +++ b/core/CCOrd.mli @@ -50,5 +50,11 @@ val () : int -> ('a t * 'a * 'a) -> int (CCBool.compare, true, false)]} *) +val pair : 'a t -> 'b t -> ('a * 'b) t + +val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t + val list_ : 'a t -> 'a list t (** Lexicographic combination on lists *) + +val array_ : 'a t -> 'a array t From d368931f703d2a4809ac3ad997ba5c4ec98a2306 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 17:56:32 +0200 Subject: [PATCH 29/52] CCOpt infix map --- core/CCOpt.ml | 2 ++ core/CCOpt.mli | 3 +++ 2 files changed, 5 insertions(+) diff --git a/core/CCOpt.ml b/core/CCOpt.ml index f5e8f753..3c8f0b50 100644 --- a/core/CCOpt.ml +++ b/core/CCOpt.ml @@ -54,6 +54,8 @@ let equal f o1 o2 = match o1, o2 with let return x = Some x +let (>|=) x f = map f x + let (>>=) o f = match o with | None -> None | Some x -> f x diff --git a/core/CCOpt.mli b/core/CCOpt.mli index c81f0081..7c6e4b6d 100644 --- a/core/CCOpt.mli +++ b/core/CCOpt.mli @@ -43,6 +43,9 @@ val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool val return : 'a -> 'a t (** Monadic return *) +val (>|=) : 'a t -> ('a -> 'b) -> 'b t +(** Infix version of {!map} *) + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t (** Monadic bind *) From 89f4500fc2efea70b45749012bc4a791c1c8b08e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 21:51:33 +0200 Subject: [PATCH 30/52] updated CCLefistheap (more functions) --- core/CCLeftistheap.ml | 40 +++++++++++++++++++++++++++++++++++++--- core/CCLeftistheap.mli | 35 +++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 13 deletions(-) diff --git a/core/CCLeftistheap.ml b/core/CCLeftistheap.ml index 5c6f9def..b0e5fe9f 100644 --- a/core/CCLeftistheap.ml +++ b/core/CCLeftistheap.ml @@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** Polymorphic implementation, following Okasaki *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a t = { tree : 'a tree; @@ -81,6 +82,8 @@ let insert heap x = let tree = merge_tree heap.leq (Node (1, x, Empty, Empty)) heap.tree in { heap with tree; } +let add = insert + let filter heap p = let rec filter tree p = match tree with | Empty -> Empty @@ -104,7 +107,14 @@ let extract_min heap = let heap' = { heap with tree; } in heap', x -let iter heap f = +let take heap = match heap.tree with + | Empty -> None + | Node (_, x, a, b) -> + let tree = merge_tree heap.leq a b in + let heap' = { heap with tree; } in + Some (x, heap') + +let iter f heap = let rec iter t = match t with | Empty -> () | Node (_, x, a, b) -> @@ -113,9 +123,18 @@ let iter heap f = iter b; in iter heap.tree +let fold f acc h = + let rec fold acc h = match h with + | Empty -> acc + | Node (_, x, a, b) -> + let acc = f acc x in + let acc = fold acc a in + fold acc b + in fold acc h.tree + let size heap = let r = ref 0 in - iter heap (fun _ -> incr r); + iter (fun _ -> incr r) heap; !r let of_seq heap seq = @@ -123,4 +142,19 @@ let of_seq heap seq = seq (fun x -> h := insert !h x); !h -let to_seq = iter +let to_seq h k = iter k h + +let rec of_klist h l = match l() with + | `Nil -> h + | `Cons (x, l') -> + let h' = add h x in + of_klist h' l' + +let to_klist h = + let rec next stack () = match stack with + | [] -> `Nil + | Empty :: stack' -> next stack' () + | Node (_, x, a, b) :: stack' -> + `Cons (x, next (a :: b :: stack')) + in + next [h.tree] diff --git a/core/CCLeftistheap.mli b/core/CCLeftistheap.mli index 701df006..aa99a61f 100644 --- a/core/CCLeftistheap.mli +++ b/core/CCLeftistheap.mli @@ -23,11 +23,11 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Leftist Heaps} *) - -(** Polymorphic implementation, following Okasaki *) +(** {1 Leftist Heaps} +Polymorphic implementation, following Okasaki *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a t (** Heap containing values of type 'a *) @@ -37,7 +37,7 @@ val empty_with : leq:('a -> 'a -> bool) -> 'a t smaller than the second. *) val empty : 'a t - (** Empty heap using Pervasives.compare *) + (** Empty heap using [Pervasives.compare] *) val is_empty : _ t -> bool (** Is the heap empty? *) @@ -48,21 +48,36 @@ val merge : 'a t -> 'a t -> 'a t val insert : 'a t -> 'a -> 'a t (** Insert a value in the heap *) +val add : 'a t -> 'a -> 'a t + (** Synonym to {!insert} *) + val filter : 'a t -> ('a -> bool) -> 'a t - (** Filter values, only retaining the ones that satisfy the predicate *) + (** Filter values, only retaining the ones that satisfy the predicate. + Linear time at least. *) val find_min : 'a t -> 'a - (** Find minimal element, or raise Not_found *) + (** Find minimal element, or fails + @raise Not_found if the heap is empty *) val extract_min : 'a t -> 'a t * 'a - (** Extract and returns the minimal element, or raise Not_found *) + (** Extract and returns the minimal element, or + raise Not_found if the heap is empty *) -val iter : 'a t -> ('a -> unit) -> unit +val take : 'a t -> ('a * 'a t) option + (** Extract and return the minimum element, and the new heap (without + this element), or [None] if the heap is empty *) + +val iter : ('a -> unit) -> 'a t -> unit (** Iterate on elements *) +val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** Fold on all values *) + val size : _ t -> int - (** Number of elements (linear) *) + (** Number of elements (linear complexity) *) val of_seq : 'a t -> 'a sequence -> 'a t - val to_seq : 'a t -> 'a sequence + +val of_klist : 'a t -> 'a klist -> 'a t +val to_klist : 'a t -> 'a klist From 71bdc7667d3fedb9f97a5173406ea68ece0c7708 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 21:57:47 +0200 Subject: [PATCH 31/52] gen interface in leftistheap --- core/CCLeftistheap.ml | 20 ++++++++++++++++++++ core/CCLeftistheap.mli | 4 ++++ 2 files changed, 24 insertions(+) diff --git a/core/CCLeftistheap.ml b/core/CCLeftistheap.ml index b0e5fe9f..1a73853c 100644 --- a/core/CCLeftistheap.ml +++ b/core/CCLeftistheap.ml @@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option type 'a t = { tree : 'a tree; @@ -158,3 +159,22 @@ let to_klist h = `Cons (x, next (a :: b :: stack')) in next [h.tree] + +let rec of_gen h g = match g () with + | None -> h + | Some x -> + of_gen (add h x) g + +let to_gen h = + let stack = Stack.create () in + Stack.push h.tree stack; + let rec next () = + if Stack.is_empty stack + then None + else match Stack.pop stack with + | Empty -> next() + | Node (_, x, a, b) -> + Stack.push a stack; + Stack.push b stack; + Some x + in next diff --git a/core/CCLeftistheap.mli b/core/CCLeftistheap.mli index aa99a61f..9836ce9a 100644 --- a/core/CCLeftistheap.mli +++ b/core/CCLeftistheap.mli @@ -28,6 +28,7 @@ Polymorphic implementation, following Okasaki *) type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option type 'a t (** Heap containing values of type 'a *) @@ -81,3 +82,6 @@ val to_seq : 'a t -> 'a sequence val of_klist : 'a t -> 'a klist -> 'a t val to_klist : 'a t -> 'a klist + +val of_gen : 'a t -> 'a gen -> 'a t +val to_gen : 'a t -> 'a gen From 56fe3b087391c9bf8b7c600e84054ecd565ca2e5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 22:30:41 +0200 Subject: [PATCH 32/52] remove Qcheck (it has had its own repo for a long time) --- _oasis | 2 +- misc/qCheck.ml | 335 ------------------------------------------------ misc/qCheck.mli | 267 -------------------------------------- 3 files changed, 1 insertion(+), 603 deletions(-) delete mode 100644 misc/qCheck.ml delete mode 100644 misc/qCheck.mli diff --git a/_oasis b/_oasis index 7eafd834..6251d22a 100644 --- a/_oasis +++ b/_oasis @@ -59,7 +59,7 @@ Library "containers_misc" PHashtbl, SkipList, SplayTree, SplayMap, Univ, Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, - ActionMan, QCheck, BencodeOnDisk, TTree, + ActionMan, BencodeOnDisk, TTree, HGraph, Automaton, Conv, Bidir, Iteratee, Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers diff --git a/misc/qCheck.ml b/misc/qCheck.ml deleted file mode 100644 index b971ad7f..00000000 --- a/misc/qCheck.ml +++ /dev/null @@ -1,335 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {6 Quickcheck inspired property-based testing} *) - -module Arbitrary = struct - type 'a t = Random.State.t -> 'a - - let return x st = x - - let int n st = Random.State.int st n - - let int_range ~start ~stop st = - let n = stop - start in - if n <= 0 - then 0 - else start + Random.State.int st n - - let (--) start stop = int_range ~start ~stop - - let small_int = int 100 - - let split_int gen st = - let n = gen st in - if n > 0 - then let i = Random.State.int st (n+1) in i, n-i - else 0, 0 - - let bool = Random.State.bool - - let float f st = Random.State.float st f - - let char st = Char.chr (Random.State.int st 128) - - let alpha st = - Char.chr (Char.code 'a' + Random.State.int st (Char.code 'z' - Char.code 'a')) - - let string_len len st = - let n = len st in - assert (n>=0); - let s = String.create n in - for i = 0 to n-1 do - s.[i] <- alpha st - done; - s - - let string st = string_len (int 10) st - - let map ar f st = f (ar st) - - let rec _make_list ar st acc n = - if n = 0 then acc else - let x = ar st in - _make_list ar st (x::acc) (n-1) - - let list ?(len=int 10) ar st = - let n = len st in - _make_list ar st [] n - - let opt ar st = - if Random.State.bool st - then Some (ar st) - else None - - let list_repeat len ar st = - _make_list ar st [] len - - let array ?(len=int 10) ar st = - let n = len st in - Array.init n (fun _ -> ar st) - - let array_repeat n ar st = - Array.init n (fun _ -> ar st) - - let among_array a st = - if Array.length a < 1 - then failwith "Arbitrary.among: cannot choose in empty array "; - let i = Random.State.int st (Array.length a) in - a.(i) - - let among l = - if List.length l < 1 - then failwith "Arbitrary.among: cannot choose in empty list"; - among_array (Array.of_list l) - - let choose l = match l with - | [] -> failwith "cannot choose from empty list" - | [x] -> x - | _ -> - let a = Array.of_list l in - fun st -> - let i = Random.State.int st (Array.length a) in - a.(i) st - - let fix ?(max=15) ~base f = - let rec ar = lazy - (fun depth st -> - if depth >= max || Random.State.int st max < depth - then base st (* base case. THe deeper, the more likely. *) - else (* recurse *) - let ar' = Lazy.force ar (depth+1) in - f ar' st) - in - Lazy.force ar 0 - - let fix_depth ~depth ~base f st = - let max = depth st in - fix ~max ~base f st - - let rec retry gen st = match gen st with - | None -> retry gen st - | Some x -> x - - let lift f a st = f (a st) - - let lift2 f a b st = f (a st) (b st) - - let lift3 f a b c st = f (a st) (b st) (c st) - - let lift4 f a b c d st = f (a st) (b st) (c st) (d st) - - let pair a b = lift2 (fun x y -> x,y) a b - - let triple a b c = lift3 (fun x y z -> x,y,z) a b c - - let quad a b c d = lift4 (fun x y z w -> x,y,z,w) a b c d - - let (>>=) a f st = - let x = a st in - f x st - - let generate ?(n=100) ?(rand=Random.State.make_self_init()) gen = - let l = ref [] in - for i = 0 to n-1 do - l := (gen rand) :: !l - done; - !l -end - -(** {2 Pretty printing} *) - -module PP = struct - type 'a t = 'a -> string - - let int = string_of_int - let bool = string_of_bool - let float = string_of_float - let string s = s - let char c = - let s = "_" in - s.[0] <- c; - s - - let pair a b (x,y) = Printf.sprintf "(%s, %s)" (a x) (b y) - let triple a b c (x,y,z) = Printf.sprintf "(%s, %s, %s)" (a x) (b y) (c z) - let quad a b c d (x,y,z,w) = - Printf.sprintf "(%s, %s, %s, %s)" (a x) (b y) (c z) (d w) - - let list pp l = - let b = Buffer.create 25 in - Buffer.add_char b '['; - List.iteri (fun i x -> - if i > 0 then Buffer.add_string b ", "; - Buffer.add_string b (pp x)) - l; - Buffer.add_char b ']'; - Buffer.contents b - - let array pp a = - let b = Buffer.create 25 in - Buffer.add_string b "[|"; - Array.iteri (fun i x -> - if i > 0 then Buffer.add_string b ", "; - Buffer.add_string b (pp x)) - a; - Buffer.add_string b "|]"; - Buffer.contents b -end - -(** {2 Testing} *) - -module Prop = struct - type 'a t = 'a -> bool - - exception PrecondFail - - let assume p = - if not p then raise PrecondFail - - let assume_lazy (lazy p) = - if not p then raise PrecondFail - - let (==>) a b = - fun x -> - assume (a x); - b x - - let (&&&) a b x = a x && b x - - let (|||) a b x = a x || b x - - let (!!!) a x = not (a x) -end - -type 'a result = - | Ok of int * int (* total number / precond failed *) - | Failed of 'a list - | Error of 'a option * exn - -(* random seed, for repeatability of tests *) -let __seed = [| 89809344; 994326685; 290180182 |] - -let check ?(rand=Random.State.make __seed) ?(n=100) gen prop = - let precond_failed = ref 0 in - let failures = ref [] in - let inst = ref None in - try - for i = 0 to n - 1 do - let x = gen rand in - inst := Some x; - try - if not (prop x) - then failures := x :: !failures - with Prop.PrecondFail -> - incr precond_failed - done; - match !failures with - | [] -> Ok (n, !precond_failed) - | _ -> Failed (!failures) - with e -> - Error (!inst, e) - -(** {2 Main} *) - -type 'a test_cell = { - n : int; - pp : 'a PP.t option; - prop : 'a Prop.t; - gen : 'a Arbitrary.t; - name : string; - limit : int; - size : ('a -> int) option; -} -type test = - | Test : 'a test_cell -> test - (** GADT needed for the existential type *) - -let mk_test ?(n=100) ?pp ?(name="") ?size ?(limit=10) gen prop = - if limit < 0 then failwith "QCheck: limit needs be >= 0"; - if n <= 0 then failwith "QCheck: n needs be >= 0"; - Test { prop; gen; name; n; pp; size; limit; } - -(* tail call version of take, that returns (at most) [n] elements of [l] *) -let rec _list_take acc l n = match l, n with - | _, 0 - | [], _ -> List.rev acc - | x::l', _ -> _list_take (x::acc) l' (n-1) - -let run ?(out=stdout) ?(rand=Random.State.make __seed) (Test test) = - Printf.fprintf out "testing property %s...\n" test.name; - match check ~rand ~n:test.n test.gen test.prop with - | Ok (n, prefail) -> - Printf.fprintf out " [✔] passed %d tests (%d preconditions failed)\n" n prefail; - true - | Failed l -> - begin match test.pp with - | None -> Printf.fprintf out " [×] %d failures over %d\n" (List.length l) test.n - | Some pp -> - Printf.fprintf out " [×] %d failures over %d (print at most %d):\n" - (List.length l) test.n test.limit; - let to_print = match test.size with - | None -> l - | Some size -> - (* sort by increasing size *) - let l = List.map (fun x -> x, size x) l in - let l = List.sort (fun (x,sx) (y,sy) -> sx - sy) l in - List.map fst l - in - (* only keep [limit] counter examples *) - let to_print = _list_take [] to_print test.limit in - (* print the counter examples *) - List.iter - (fun x -> Printf.fprintf out " %s\n" (pp x)) - to_print - end; - false - | Error (inst, e) -> - begin match inst, test.pp with - | _, None - | None, _ -> Printf.fprintf out " [×] error: %s\n" (Printexc.to_string e); - | Some x, Some pp -> - (* print instance on which the error occurred *) - Printf.fprintf out " [×] error on instance %s: %s\n" - (pp x) (Printexc.to_string e); - end; - false - -type suite = test list - -let flatten = List.flatten - -let run_tests ?(out=stdout) ?(rand=Random.State.make __seed) l = - let start = Unix.gettimeofday () in - let n = List.length l in - let failed = ref 0 in - Printf.fprintf out "check %d properties...\n" (List.length l); - List.iter (fun test -> if not (run ~out ~rand test) then incr failed) l; - Printf.fprintf out "tests run in %.2fs\n" (Unix.gettimeofday() -. start); - if !failed = 0 - then Printf.fprintf out "[✔] Success! (passed %d tests)\n" n - else Printf.fprintf out "[×] Failure. (%d tests failed over %d)\n" !failed n; - !failed = 0 diff --git a/misc/qCheck.mli b/misc/qCheck.mli deleted file mode 100644 index 7c3c006e..00000000 --- a/misc/qCheck.mli +++ /dev/null @@ -1,267 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Quickcheck inspired property-based testing} *) - -(** The library takes inspiration from Haskell's QuickCheck library. The -rough idea is that the programer describes invariants that values of -a certain type need to satisfy ("properties"), as functions from this type -to bool. She also needs to desribe how to generate random values of the type, -so that the property is tried and checked on a number of random instances. - -This explains the organization of this module: - -- {! Arbitrary} is used to describe how to generate random values. An - ['a Arbitrary.t] is a random generator of values of type 'a. -- {! Prop} is used to describe and combine properties. Especially interesting - is [Prop.(==>)], that is such that [a ==> b] only checks the property [b] - on a value [x] if [a x] holds (precondition). -- {! PP} describes a few combinators to print values. This is used when a - property fails on some instances, to print the failing instances. - -Then, a few functions are provided to test properties. Optional parameters -allow to specify the random generator, the printer for failing cases, the -number of instances to generate and test... - - -Examples: - - - List.rev is involutive: - -{[ -let test = QCheck.mk_test ~n:1000 QCheck.Arbitrary.(list alpha) - (fun l -> List.rev (List.rev l) = l);; -QCheck.run test;; -]} - - Not all lists are sorted (false property that will fail. The 15 smallest - counter-example lists will be printed): - -{[ -let test = QCheck.( - mk_test - ~n:10_000 ~size:List.length ~limit:15 ~pp:QCheck.PP.(list int) - QCheck.Arbitrary.(list small_int) - (fun l -> l = List.sort compare l));; -QCheck.run test;; -]} - - - - generate 20 random trees using {! Arbitrary.fix} : - -{[type tree = Int of int | Node of tree list;; - - let ar = QCheck.Arbitrary.(fix ~max:10 - ~base:(map small_int (fun i -> Int i)) - (fun t st -> Node (list t st)));; - - Arbitrary.generate ~n:20 ar;; - ]} -*) - -(** {2 Description of how to generate arbitrary values for some type} *) - -module Arbitrary : sig - type 'a t = Random.State.t -> 'a - (** A generator of arbitrary values of type 'a *) - - val return : 'a -> 'a t - (** Return always the same value (e.g. [4]) *) - - val int : int -> int t - (** Any integer between 0 (inclusive) and the given higher bound (exclusive) *) - - val int_range : start:int -> stop:int -> int t - (* Integer range start .. stop-1 *) - - val (--) : int -> int -> int t - (** Infix synonym for {!int_range} *) - - val small_int : int t - (** Ints lower than 100 *) - - val split_int : int t -> (int * int) t - (** [split_int gen] generates a number [n] from [gen], and - returns [i, j] where [i + j = n] *) - - val bool : bool t - (** Arbitrary boolean *) - - val char : char t - (** A (printable) char *) - - val alpha : char t - (** Alphabetic char *) - - val float : float -> float t - (** Random float *) - - val string : string t - (** Random strings of small length *) - - val string_len : int t -> string t - (** String of random length *) - - val map : 'a t -> ('a -> 'b) -> 'b t - (** Transform an arbitrary into another *) - - val list : ?len:int t -> 'a t -> 'a list t - (** List of arbitrary length. Default [len] is between 0 and 10. *) - - val opt : 'a t -> 'a option t - (** May return a value, or None *) - - val pair : 'a t -> 'b t -> ('a * 'b) t - - val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t - - val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t - - val list_repeat : int -> 'a t -> 'a list t - (** Lists of given length exactly *) - - val array : ?len:int t -> 'a t -> 'a array t - (** Random array of random length *) - - val array_repeat : int -> 'a t -> 'a array t - (** Random array of given length *) - - val among : 'a list -> 'a t - (** Choose an element among those of the list *) - - val among_array : 'a array -> 'a t - (** Choose in the array *) - - val choose : 'a t list -> 'a t - (** Choice among combinations *) - - val fix : ?max:int -> base:'a t -> ('a t -> 'a t) -> 'a t - (** Recursive arbitrary values. The optional value [max] defines - the maximal depth, if needed (default 15). [base] is the base case. *) - - val fix_depth : depth:int t -> base:'a t -> ('a t -> 'a t) -> 'a t - (** Recursive values of at most given random depth *) - - val lift : ('a -> 'b) -> 'a t -> 'b t - val lift2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t - val lift3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t - val lift4 : ('a -> 'b -> 'c -> 'd -> 'e) -> 'a t -> 'b t -> 'c t -> 'd t -> 'e t - - val (>>=) : 'a t -> ('a -> 'b t) -> 'b t - (** Monadic bind *) - - val retry : 'a option t -> 'a t - (** CCGenerate until a Some value is returned *) - - val generate : ?n:int -> ?rand:Random.State.t -> 'a t -> 'a list - (** CCGenerate [n] random values of the given type *) -end - -(** {2 Pretty printing} *) - -module PP : sig - type 'a t = 'a -> string - - val int : int t - val bool : bool t - val float : float t - val char : char t - val string : string t - - val pair : 'a t -> 'b t -> ('a*'b) t - val triple : 'a t -> 'b t -> 'c t -> ('a*'b*'c) t - val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a*'b*'c*'d) t - - val list : 'a t -> 'a list t - val array : 'a t -> 'a array t -end - -(** {2 Testing} *) - -module Prop : sig - type 'a t = 'a -> bool - - val (==>) : ('a -> bool) -> 'a t -> 'a t - (** Precondition for a test *) - - val assume : bool -> unit - (** Assume the given precondition holds. A test won't fail if the - precondition (the boolean argument) is false, but it will be - discarded. Running tests counts how many instances were - discarded for not satisfying preconditions. *) - - val assume_lazy : bool lazy_t -> unit - (** Assume the given (lazy) precondition holds. See {!assume}. *) - - val (&&&) : 'a t -> 'a t -> 'a t - (** Logical 'and' on tests *) - - val (|||) : 'a t -> 'a t -> 'a t - (** Logical 'or' on tests *) - - val (!!!) : 'a t -> 'a t - (** Logical 'not' on tests *) -end - -type 'a result = - | Ok of int * int (** total number of tests / number of failed preconditions *) - | Failed of 'a list (** Failed instances *) - | Error of 'a option * exn (** Error, and possibly instance that triggered it *) - -val check : ?rand:Random.State.t -> ?n:int -> - 'a Arbitrary.t -> 'a Prop.t -> 'a result - (** Check that the property [prop] holds on [n] random instances of the type - 'a, as generated by the arbitrary instance [gen] *) - -(** {2 Main} *) - -type test - (** A single property test *) - -val mk_test : ?n:int -> ?pp:'a PP.t -> ?name:string -> - ?size:('a -> int) -> ?limit:int -> - 'a Arbitrary.t -> 'a Prop.t -> test - (** Construct a test. Optional parameters are the same as for {!run}. - @param name is the name of the property that is checked - @param pp is a pretty printer for failing instances - @out is the channel to print results onto - @n is the number of tests (default 100) - @rand is the random generator to use - @size is a size function on values on which tests are performed. If - the test fails and a size function is given, the smallest - counter-examples with respect to [size] will be printed in priority. - @limit maximal number of counter-examples that will get printed. - Default is [10]. *) - -val run : ?out:out_channel -> ?rand:Random.State.t -> test -> bool - (** Run a test and print results *) - -type suite = test list - (** A test suite is a list of tests *) - -val flatten : suite list -> suite - -val run_tests : ?out:out_channel -> ?rand:Random.State.t -> suite -> bool - (** Run a suite of tests, and print its results *) From e7dbdeff2ec5c86edb9b28c2a2cbe5b651bcb3da Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 22:33:17 +0200 Subject: [PATCH 33/52] added zippers to CCList --- core/CCList.ml | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ core/CCList.mli | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/core/CCList.ml b/core/CCList.ml index 8a240539..34d04392 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -424,6 +424,54 @@ module Assoc = struct *) end +(** {2 Zipper} *) + +module Zipper = struct + type 'a t = 'a list * 'a list + + let empty = [], [] + + let is_empty = function + | _, [] -> true + | _, _::_ -> false + + let to_list (l,r) = + let rec append l acc = match l with + | [] -> acc + | x::l' -> append l' (x::acc) + in append l r + + let make l = [], l + + let left = function + | x::l, r -> l, x::r + | [], r -> [], r + + let right = function + | l, x::r -> x::l, r + | l, [] -> l, [] + + let modify f z = match z with + | l, [] -> + begin match f None with + | None -> z + | Some x -> l, [x] + end + | l, x::r -> + begin match f (Some x) with + | None -> l,r + | Some x' -> l, x::r + end + + let focused = function + | _, x::_ -> Some x + | _, [] -> None + + let focused_exn = function + | _, x::_ -> x + | _, [] -> raise Not_found +end + (** {2 Conversions} *) type 'a sequence = ('a -> unit) -> unit diff --git a/core/CCList.mli b/core/CCList.mli index 7ccc71a7..54338a68 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -175,6 +175,42 @@ module Assoc : sig (** Add the binding into the list (erase it if already present) *) end +(** {2 Zipper} *) + +module Zipper : sig + type 'a t = 'a list * 'a list + + val empty : 'a t + (** Empty zipper *) + + val is_empty : _ t -> bool + (** Empty zipper, or at the end of the zipper? *) + + val to_list : 'a t -> 'a list + (** Convert the zipper back to a list *) + + val make : 'a list -> 'a t + (** Create a zipper pointing at the first element of the list *) + + val left : 'a t -> 'a t + (** Go to the left, or do nothing if the zipper is already at leftmost pos *) + + val right : 'a t -> 'a t + (** Go to the right, or do nothing if the zipper is already at rightmost pos *) + + val modify : ('a option -> 'a option) -> 'a t -> 'a t + (** Modify the current element, if any, by returning a new element, or + returning [None] if the element is to be deleted *) + + val focused : 'a t -> 'a option + (** Returns the focused element, if any. [focused zip = Some _] iff + [empty zip = false] *) + + val focused_exn : 'a t -> 'a + (** Returns the focused element, or + @raise Not_found if the zipper is at an end *) +end + (** {2 Conversions} *) type 'a sequence = ('a -> unit) -> unit From 043003cf3b9cd2214ffcc0f5ca94303a245ab0ad Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 13 Jun 2014 23:51:34 +0200 Subject: [PATCH 34/52] big refactoring of CCVector, to fit the recent coding style; safe functions with _exn versions, iterators, printers, comparison and other monadic operators --- core/CCArray.ml | 7 + core/CCArray.mli | 8 +- core/CCVector.ml | 417 ++++++++++++++++++++++++++++++++++------------ core/CCVector.mli | 123 ++++++++++---- 4 files changed, 415 insertions(+), 140 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 69d6f445..4f7ad5c6 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -162,3 +162,10 @@ let pp_i ?(sep=", ") pp_item buf a = (if i > 0 then Buffer.add_string buf sep); pp_item buf i a.(i) done + +let print ?(sep=", ") pp_item fmt a = + Array.iteri + (fun i x -> + if i > 0 then Format.pp_print_string fmt sep; + pp_item fmt x + ) a diff --git a/core/CCArray.mli b/core/CCArray.mli index 047ef761..555a77c8 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -68,9 +68,13 @@ val shuffle : 'a t -> unit (** shuffle randomly the array, in place *) val pp: ?sep:string -> (Buffer.t -> 'a -> unit) - -> Buffer.t -> 'a array -> unit + -> Buffer.t -> 'a t -> unit (** print an array of items with printing function *) val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) - -> Buffer.t -> 'a array -> unit + -> Buffer.t -> 'a t -> unit (** print an array, giving the printing function both index and item *) + +val print : ?sep:string -> (Format.formatter -> 'a -> unit) + -> Format.formatter -> 'a t -> unit +(** print an array of items with printing function *) diff --git a/core/CCVector.ml b/core/CCVector.ml index 0ea564e7..cf5de68a 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -25,30 +25,76 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + (** a vector of 'a. *) type 'a t = { mutable size : int; mutable vec : 'a array; } -let create i = - let i = max i 3 in - { size = 0; - vec = Array.create i (Obj.magic None); - } +let create () = { + size = 0; + vec = [| |]; +} -let resize v newcapacity = +let create_with ?(capacity=128) x = { + size = 0; + vec = Array.make capacity x; +} + +(*$T + (create_with ~capacity:200 1 |> capacity) >= 200 +*) + +let make n x = { + size=n; + vec=Array.make n x; +} + +let init n f = { + size=n; + vec=Array.init n f; +} + +(* is the underlying empty? *) +let _empty_array v = + Array.length v.vec = 0 + +(* assuming the underlying array isn't empty, resize it *) +let _resize v newcapacity = assert (newcapacity >= v.size); - let new_vec = Array.create newcapacity (Obj.magic None) in + assert (not (_empty_array v)); + let new_vec = Array.create newcapacity v.vec.(0) in Array.blit v.vec 0 new_vec 0 v.size; v.vec <- new_vec; () +(*$T + (let v = create_with ~capacity:10 1 in ensure v 200; capacity v >= 200) +*) + +(* grow the array, using [x] as a filler if required *) +let _grow v x = + if _empty_array v + then v.vec <- Array.make 32 x + else + let size = min (2 * Array.length v.vec + 10) Sys.max_array_length in + _resize v size + let ensure v size = - if v.size < size + if Array.length v.vec = 0 + then () + else if v.size < size then - let size' = min (2 * v.size) Sys.max_array_length in - resize v size' + let size' = min size Sys.max_array_length in + _resize v size' let clear v = v.size <- 0 @@ -56,59 +102,92 @@ let clear v = let is_empty v = v.size = 0 let push v x = - (if v.size = Array.length v.vec then resize v (2 * v.size)); + if v.size = Array.length v.vec + then _grow v x; Array.unsafe_set v.vec v.size x; v.size <- v.size + 1 (** add all elements of b to a *) let append a b = - (if Array.length a.vec < a.size + b.size - then resize a (2 * (a.size + b.size))); - Array.blit b.vec 0 a.vec a.size b.size; - a.size <- a.size + b.size + if _empty_array a + then if _empty_array b + then () + else ( + a.vec <- Array.copy b.vec; + a.size <- b.size + ) + else ( + ensure a (a.size + b.size); + assert (Array.length a.vec >= a.size + b.size); + Array.blit b.vec 0 a.vec a.size b.size; + a.size <- a.size + b.size + ) -let append_array a b = - (if Array.length a.vec < a.size + Array.length b - then resize a (2 * (a.size + Array.length b))); - Array.blit b 0 a.vec a.size (Array.length b); - a.size <- a.size + Array.length b +let get v i = + if i < 0 || i >= v.size then failwith "Vector.get"; + Array.unsafe_get v.vec i + +let set v i x = + if i < 0 || i >= v.size then failwith "Vector.set"; + Array.unsafe_set v.vec i x let append_seq a seq = seq (fun x -> push a x) -let pop v = - (if v.size = 0 then failwith "Vector.pop on empty vector"); +let append_array a b = + Array.iter (push a) b + +let equal eq v1 v2 = + let n = min v1.size v2.size in + let rec check i = + if i = n + then v1.size = v2.size + else eq (get v1 i) (get v2 i) && check (i+1) + in check 0 + +let compare cmp v1 v2 = + let n = min v1.size v2.size in + let rec check i = + if i = n + then Pervasives.compare v1.size v2.size + else + let c = cmp (get v1 i) (get v2 i) in + if c = 0 then check (i+1) else c + in check 0 + +let pop_exn v = + if v.size = 0 + then failwith "Vector.pop on empty vector"; v.size <- v.size - 1; let x = v.vec.(v.size) in x -let copy v = - let v' = create v.size in - Array.blit v.vec 0 v'.vec 0 v.size; - v'.size <- v.size; - v' +let pop v = + try Some (pop_exn v) + with Failure _ -> None + +let copy v = { + size = v.size; + vec = Array.sub v.vec 0 v.size; +} + +(*$T + (let v = of_list [1;2;3] in let v' = copy v in \ + to_list v' = [1;2;3]) + create () |> copy |> is_empty +*) let shrink v n = - if n > v.size - then failwith "cannot shrink to bigger size" - else v.size <- n + if n < v.size then v.size <- n -let member ?(eq=(=)) v x = - let n = v.size in - let rec check i = - if i = n then false - else if eq x v.vec.(i) then true - else check (i+1) - in check 0 - -let sort ?(cmp=compare) v = +let sort cmp v = (* copy array (to avoid junk in it), then sort the array *) let a = Array.sub v.vec 0 v.size in Array.fast_sort cmp a; v.vec <- a -let uniq_sort ?(cmp=compare) v = - sort ~cmp v; +let uniq_sort cmp v = + sort cmp v; let n = v.size in (* traverse to remove duplicates. i= current index, j=current append index, j<=i. new_size is the size @@ -116,64 +195,83 @@ let uniq_sort ?(cmp=compare) v = let rec traverse prev i j = if i >= n then () (* done traversing *) else if cmp prev v.vec.(i) = 0 - then (v.size <- v.size - 1; traverse prev (i+1) j) (* duplicate, remove it *) - else (v.vec.(j) <- v.vec.(i); traverse v.vec.(i) (i+1) (j+1)) (* keep it *) + then ( + v.size <- v.size - 1; + traverse prev (i+1) j + ) (* duplicate, remove it *) + else ( + v.vec.(j) <- v.vec.(i); + traverse v.vec.(i) (i+1) (j+1) + ) (* keep it *) in if v.size > 0 - then traverse v.vec.(0) 1 1 (* start at 1, to get the first element in hand *) + then traverse v.vec.(0) 1 1 + (* start at 1, to get the first element in hand *) -let iter v k = +let iter k v = for i = 0 to v.size -1 do k (Array.unsafe_get v.vec i) done -let iteri v k = +let iteri k v = for i = 0 to v.size -1 do k i (Array.unsafe_get v.vec i) done -let map v f = - let v' = create v.size in - for i = 0 to v.size - 1 do - let x = f (Array.unsafe_get v.vec i) in - Array.unsafe_set v'.vec i x - done; - v'.size <- v.size; - v' +let map f v = + if _empty_array v + then create () + else { + size=v.size; + vec=Array.map f v.vec + } -let filter v f = - let v' = create v.size in - for i = 0 to v.size - 1 do - let x = Array.unsafe_get v.vec i in - if f x then push v' x; - done; - v' +let filter p v = + if _empty_array v + then create () + else ( + let v' = create_with ~capacity:v.size v.vec.(0) in + Array.iter + (fun x -> if p x then push v' x) + v.vec; + v' + ) -let fold v acc f = - let acc = ref acc in - for i = 0 to v.size - 1 do - let x = Array.unsafe_get v.vec i in - acc := f !acc x; - done; - !acc +(*$T + filter (fun x-> x mod 2=0) (of_list [1;2;3;4;5]) |> to_list = [2;4] +*) -let exists v p = +let fold f acc v = + let rec fold acc i = + if i = v.size then acc + else + let x = Array.unsafe_get v.vec i in + fold (f acc x) (i+1) + in fold acc 0 + +(*$T + fold (+) 0 (of_list [1;2;3;4;5]) = 15 + fold (+) 0 (create ()) = 0 +*) + +let exists p v = let n = v.size in let rec check i = if i = n then false - else if p v.vec.(i) then true - else check (i+1) + else p v.vec.(i) || check (i+1) in check 0 -let for_all v p = +let for_all p v = let n = v.size in let rec check i = if i = n then true - else if not (p v.vec.(i)) then false - else check (i+1) + else p v.vec.(i) && check (i+1) in check 0 -let find v p = +let member ?(eq=(=)) x v = + exists (eq x) v + +let find_exn p v = let n = v.size in let rec check i = if i = n then raise Not_found @@ -181,65 +279,166 @@ let find v p = else check (i+1) in check 0 -let get v i = - (if i < 0 || i >= v.size then failwith "Vector.get"); - Array.unsafe_get v.vec i +let find p v = + try Some (find_exn p v) + with Not_found -> None -let set v i x = - (if i < 0 || i >= v.size then failwith "Vector.set"); - Array.unsafe_set v.vec i x +let filter_map f v = + let v' = create () in + iter + (fun x -> match f x with + | None -> () + | Some y -> push v' y + ) v; + v' + +let flat_map f v = + let v' = create () in + iter (fun x -> iter (push v') (f x)) v; + v' + +let flat_map' f v = + let v' = create () in + iter + (fun x -> + let seq = f x in + seq (fun y -> push v' y) + ) v; + v' + +let (>>=) x f = flat_map f x + +let (>|=) x f = map f x + +let rev' v = + if v.size > 0 + then ( + let n = v.size in + let vec = v.vec in + for i = 0 to (n-1)/2 do + let x = Array.unsafe_get vec i in + let y = Array.unsafe_get vec (n-i-1) in + Array.unsafe_set vec i y; + Array.unsafe_set vec (n-i-1) x; + done + ) let rev v = - let n = v.size in - let vec = v.vec in - for i = 0 to (n-1)/2 do - let x = Array.unsafe_get vec i in - let y = Array.unsafe_get vec (n-i-1) in - Array.unsafe_set vec i y; - Array.unsafe_set vec (n-i-1) x; - done + let v' = copy v in + rev' v'; + v' + +(*$T + rev (of_list [1;2;3;4]) |> to_list = [4;3;2;1] + rev (of_list [1;2;3;4;5]) |> to_list = [5;4;3;2;1] + rev (create ()) |> to_list = [] +*) let size v = v.size let length v = v.size +let capacity v = Array.length v.vec + let unsafe_get_array v = v.vec -type 'a sequence = ('a -> unit) -> unit - -let of_seq ?(init=create 10) seq = +let of_seq ?(init=create ()) seq = append_seq init seq; init -let to_seq = iter +(*$T + of_seq CCSequence.(1 -- 10) |> to_list = CCList.(1 -- 10) +*) + +let to_seq v k = iter k v let slice v start len = assert (start >= 0 && len >= 0); fun k -> assert (start+len < v.size); - for i = start to start+len do + for i = start to start+len-1 do let x = Array.unsafe_get v.vec i in k x done -let from_array a = - let c = Array.length a in - let v = create c in - Array.blit a 0 v.vec 0 c; - v.size <- c; - v +(*$T + slice (of_list [0;1;2;3;4]) 1 3 |> CCList.of_seq = [1;2;3] +*) -let from_list l = - let v = create 10 in - List.iter (push v) l; - v +let (--) i j = + if i>j + then init (i-j+1) (fun k -> i-k) + else init (j-i+1) (fun k -> i+k) + +(*$T + (1 -- 4) |> to_list = [1;2;3;4] + (4 -- 1) |> to_list = [4;3;2;1] + (0 -- 0) |> to_list = [0] +*) + +let of_array a = + if Array.length a = 0 + then create () + else { + size=Array.length a; + vec=Array.copy a; + } + +let of_list l = match l with + | [] -> create() + | x::l' -> + let v = create_with ~capacity:(List.length l + 5) x in + List.iter (push v) l; + v let to_array v = Array.sub v.vec 0 v.size let to_list v = - let l = ref [] in - for i = 0 to v.size - 1 do - l := get v i :: !l; - done; - List.rev !l + List.rev (fold (fun acc x -> x::acc) [] v) + +let of_gen ?(init=create ()) g = + let rec aux g = match g() with + | None -> init + | Some x -> push init x; aux g + in aux g + +let to_gen v = + let i = ref 0 in + fun () -> + if !i < v.size + then ( + let x = v.vec.( !i ) in + incr i; + Some x + ) else None + +let of_klist ?(init=create ()) l = + let rec aux l = match l() with + | `Nil -> init + | `Cons (x,l') -> push init x; aux l' + in aux l + +let to_klist v = + let rec aux i () = + if i=v.size then `Nil + else `Cons (v.vec.(i), aux (i+1)) + in aux 0 + +let pp ?(start="[") ?(stop="]") ?(sep=", ") pp_item buf v = + Buffer.add_string buf start; + iteri + (fun i x -> + if i > 0 then Buffer.add_string buf sep; + pp_item buf x + ) v; + Buffer.add_string buf stop + +let print ?(start="[") ?(stop="]") ?(sep=", ") pp_item fmt v = + Format.fprintf fmt "@[%s" start; + iteri + (fun i x -> + if i > 0 then Format.pp_print_string fmt sep; + pp_item fmt x + ) v; + Format.fprintf fmt "%s@]" stop diff --git a/core/CCVector.mli b/core/CCVector.mli index 697c601e..6981935c 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -29,16 +29,33 @@ type 'a t (** the type of a vector of 'a *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit -val create : int -> 'a t -(** create a vector of given initial capacity *) +val create : unit -> 'a t +(** Create a new, empty vector *) + +val create_with : ?capacity:int -> 'a -> 'a t +(** Create a new vector, using the given value as a filler. + @param capacity the size of the underlying array + {b caution}: the value will likely not be GC'd before the vector is. *) + +val make : int -> 'a -> 'a t +(** [make n x] makes a vector of size [n], filled with [x] *) + +val init : int -> (int -> 'a) -> 'a t +(** Init the vector with the given function and size *) val clear : 'a t -> unit (** clear the content of the vector *) val ensure : 'a t -> int -> unit -(** Ensure that the vector can contain that much elements, resizing it - if required *) +(** Hint to the vector that it should have at least the given capacity. + Just a hint, will not be enforced if the vector is empty. *) val is_empty : 'a t -> bool (** is the vector empty? *) @@ -55,56 +72,88 @@ val append_array : 'a t -> 'a array -> unit val append_seq : 'a t -> 'a sequence -> unit (** Append content of sequence *) -val pop : 'a t -> 'a -(** remove last element, or raise a Failure if empty *) +val equal : 'a equal -> 'a t equal + +val compare : 'a ord -> 'a t ord +(** Lexicographic comparison *) + +val pop : 'a t -> 'a option +(** Remove last element, or [None] *) + +val pop_exn : 'a t -> 'a +(** remove last element, or raise a Failure if empty + @raise Failure on an empty vector *) val copy : 'a t -> 'a t (** shallow copy *) val shrink : 'a t -> int -> unit -(** shrink to the given size (remove elements above this size) *) +(** shrink to the given size (remove elements above this size). + Does nothing if the parameter is bigger than the current size. *) -val member : ?eq:('a -> 'a -> bool) -> 'a t -> 'a -> bool +val member : ?eq:('a -> 'a -> bool) -> 'a -> 'a t -> bool (** is the element a member of the vector? *) -val sort : ?cmp:('a -> 'a -> int) -> 'a t -> unit +val sort : ('a -> 'a -> int) -> 'a t -> unit (** sort the array in place*) -val uniq_sort : ?cmp:('a -> 'a -> int) -> 'a t -> unit +val uniq_sort : ('a -> 'a -> int) -> 'a t -> unit (** sort the array and remove duplicates in place*) -val iter : 'a t -> ('a -> unit) -> unit +val iter : ('a -> unit) -> 'a t -> unit (** iterate on the vector *) -val iteri : 'a t -> (int -> 'a -> unit) -> unit +val iteri : (int -> 'a -> unit) -> 'a t -> unit (** iterate on the vector with indexes *) -val map : 'a t -> ('a -> 'b) -> 'b t +val map : ('a -> 'b) -> 'a t -> 'b t (** map elements of the vector *) -val filter : 'a t -> ('a -> bool) -> 'a t +val filter : ('a -> bool) -> 'a t -> 'a t (** filter elements from vector *) -val fold : 'a t -> 'b -> ('b -> 'a -> 'b) -> 'b +val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b (** fold on elements of the vector *) -val exists : 'a t -> ('a -> bool) -> bool +val exists : ('a -> bool) -> 'a t -> bool (** existential test *) -val for_all : 'a t -> ('a -> bool) -> bool +val for_all : ('a -> bool) -> 'a t -> bool (** universal test *) -val find : 'a t -> ('a -> bool) -> 'a -(** find an element that satisfies the predicate, or Not_found *) +val find : ('a -> bool) -> 'a t -> 'a option +(** Find an element that satisfies the predicate *) + +val find_exn : ('a -> bool) -> 'a t -> 'a +(** find an element that satisfies the predicate, or + @raise Not_found if no element does *) + +val filter_map : ('a -> 'b option) -> 'a t -> 'b t +(** Map elements with a function, possibly filtering some of them out *) + +val flat_map : ('a -> 'b t) -> 'a t -> 'b t +(** Map each element to a sub-vector *) + +val flat_map' : ('a -> 'b sequence) -> 'a t -> 'b t +(** Like {!flat_map}, but using {!sequence} for intermediate collections *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t + +val (>|=) : 'a t -> ('a -> 'b) -> 'b t val get : 'a t -> int -> 'a -(** access element, or Failure if bad index *) +(** access element, or + @raise Failure if bad index *) val set : 'a t -> int -> 'a -> unit -(** access element, or Failure if bad index *) +(** access element, or + @raise Failure if bad index *) -val rev : 'a t -> unit -(** Reverse array in place *) +val rev : 'a t -> 'a t +(** Reverse the vector *) + +val rev' : 'a t -> unit +(** Reverse the vector in place *) val size : 'a t -> int (** number of elements in vector *) @@ -112,21 +161,37 @@ val size : 'a t -> int val length : _ t -> int (** Synonym for {! size} *) +val capacity : _ t -> int +(** Number of elements the vector can contain without being resized *) + val unsafe_get_array : 'a t -> 'a array -(** Access the underlying *shared* array (do not modify!). +(** Access the underlying {b shared} array (do not modify!). [unsafe_get_array v] is longer than [size v], but elements at higher index than [size v] are undefined (do not access!). *) +val (--) : int -> int -> int t +(** Range of integers (both included) *) + +val of_array : 'a array -> 'a t +val of_list : 'a list -> 'a t +val to_array : 'a t -> 'a array +val to_list : 'a t -> 'a list + val of_seq : ?init:'a t -> 'a sequence -> 'a t val to_seq : 'a t -> 'a sequence val slice : 'a t -> int -> int -> 'a sequence (** [slice v start len] is the sequence of elements from [v.(start)] - to [v.(start+len)] included. *) + to [v.(start+len-1)]. *) -val from_array : 'a array -> 'a t -val from_list : 'a list -> 'a t -val to_array : 'a t -> 'a array -val to_list : 'a t -> 'a list +val of_klist : ?init:'a t -> 'a klist -> 'a t +val to_klist : 'a t -> 'a klist +val of_gen : ?init:'a t -> 'a gen -> 'a t +val to_gen : 'a t -> 'a gen +val pp : ?start:string -> ?stop:string -> ?sep:string -> + 'a printer -> 'a t printer + +val print : ?start:string -> ?stop:string -> ?sep:string -> + 'a formatter -> 'a t formatter From 65aceba85e886ab02335904e353ba1200f4a6837 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 00:11:16 +0200 Subject: [PATCH 35/52] phantom types for CCVector, for read-write and read-only vectors --- core/CCVector.ml | 34 ++++++++++-- core/CCVector.mli | 135 +++++++++++++++++++++++++--------------------- 2 files changed, 104 insertions(+), 65 deletions(-) diff --git a/core/CCVector.ml b/core/CCVector.ml index cf5de68a..115c1777 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -25,6 +25,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) +type rw = [`RW] +type ro = [`RO] + type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a gen = unit -> 'a option @@ -34,11 +37,21 @@ type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit (** a vector of 'a. *) -type 'a t = { +type ('a,'mut) t = { mutable size : int; mutable vec : 'a array; } +let freeze v = { + size=v.size; + vec=v.vec; +} + +let freeze_copy v = { + size=v.size; + vec=Array.sub v.vec 0 v.size; +} + let create () = { size = 0; vec = [| |]; @@ -180,14 +193,25 @@ let copy v = { let shrink v n = if n < v.size then v.size <- n -let sort cmp v = - (* copy array (to avoid junk in it), then sort the array *) - let a = Array.sub v.vec 0 v.size in +let sort' cmp v = + (* possibly copy array (to avoid junk at its end), then sort the array *) + let a = + if Array.length v.vec = v.size then v.vec + else Array.sub v.vec 0 v.size + in Array.fast_sort cmp a; v.vec <- a +let sort cmp v = + let v' = { + size=v.size; + vec=Array.sub v.vec 0 v.size; + } in + Array.sort cmp v'.vec; + v' + let uniq_sort cmp v = - sort cmp v; + sort' cmp v; let n = v.size in (* traverse to remove duplicates. i= current index, j=current append index, j<=i. new_size is the size diff --git a/core/CCVector.mli b/core/CCVector.mli index 6981935c..4d8cd997 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -25,8 +25,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) -type 'a t -(** the type of a vector of 'a *) +type ro +type rw + +(** Mutability is [rw] (read-write) or [ro] (read-only) *) + +type ('a, 'mut) t +(** the type of a vector of elements of type ['a], with + a mutability flat ['mut] *) type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] @@ -36,162 +42,171 @@ type 'a ord = 'a -> 'a -> int type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit -val create : unit -> 'a t +val freeze : ('a, _) t -> ('a, ro) t +(** Make an immutable vector (no copy! Don't use the old version)*) + +val freeze_copy : ('a, _) t -> ('a, ro) t +(** Copy the vector into an immutable version *) + +val create : unit -> ('a, rw) t (** Create a new, empty vector *) -val create_with : ?capacity:int -> 'a -> 'a t +val create_with : ?capacity:int -> 'a -> ('a, rw) t (** Create a new vector, using the given value as a filler. @param capacity the size of the underlying array {b caution}: the value will likely not be GC'd before the vector is. *) -val make : int -> 'a -> 'a t +val make : int -> 'a -> ('a, 'mut) t (** [make n x] makes a vector of size [n], filled with [x] *) -val init : int -> (int -> 'a) -> 'a t +val init : int -> (int -> 'a) -> ('a, 'mut) t (** Init the vector with the given function and size *) -val clear : 'a t -> unit +val clear : ('a, rw) t -> unit (** clear the content of the vector *) -val ensure : 'a t -> int -> unit +val ensure : ('a, rw) t -> int -> unit (** Hint to the vector that it should have at least the given capacity. Just a hint, will not be enforced if the vector is empty. *) -val is_empty : 'a t -> bool +val is_empty : ('a, _) t -> bool (** is the vector empty? *) -val push : 'a t -> 'a -> unit +val push : ('a, rw) t -> 'a -> unit (** add an element at the end of the vector *) -val append : 'a t -> 'a t -> unit +val append : ('a, rw) t -> ('a, _) t -> unit (** [append a b] adds all elements of b to a *) -val append_array : 'a t -> 'a array -> unit +val append_array : ('a, rw) t -> 'a array -> unit (** same as append, with an array *) -val append_seq : 'a t -> 'a sequence -> unit +val append_seq : ('a, rw) t -> 'a sequence -> unit (** Append content of sequence *) -val equal : 'a equal -> 'a t equal +val equal : 'a equal -> ('a,_) t equal -val compare : 'a ord -> 'a t ord +val compare : 'a ord -> ('a,_) t ord (** Lexicographic comparison *) -val pop : 'a t -> 'a option +val pop : ('a, rw) t -> 'a option (** Remove last element, or [None] *) -val pop_exn : 'a t -> 'a +val pop_exn : ('a, rw) t -> 'a (** remove last element, or raise a Failure if empty @raise Failure on an empty vector *) -val copy : 'a t -> 'a t -(** shallow copy *) +val copy : ('a,_) t -> ('a,'mut) t +(** Shallow copy (may give an immutable or mutable vector) *) -val shrink : 'a t -> int -> unit +val shrink : ('a, rw) t -> int -> unit (** shrink to the given size (remove elements above this size). Does nothing if the parameter is bigger than the current size. *) -val member : ?eq:('a -> 'a -> bool) -> 'a -> 'a t -> bool +val member : ?eq:('a -> 'a -> bool) -> 'a -> ('a, _) t -> bool (** is the element a member of the vector? *) -val sort : ('a -> 'a -> int) -> 'a t -> unit -(** sort the array in place*) +val sort : ('a -> 'a -> int) -> ('a, _) t -> ('a, 'mut) t +(** Sort the vector *) -val uniq_sort : ('a -> 'a -> int) -> 'a t -> unit +val sort' : ('a -> 'a -> int) -> ('a, rw) t -> unit +(** sort the vector in place*) + +val uniq_sort : ('a -> 'a -> int) -> ('a, rw) t -> unit (** sort the array and remove duplicates in place*) -val iter : ('a -> unit) -> 'a t -> unit +val iter : ('a -> unit) -> ('a,_) t -> unit (** iterate on the vector *) -val iteri : (int -> 'a -> unit) -> 'a t -> unit +val iteri : (int -> 'a -> unit) -> ('a,_) t -> unit (** iterate on the vector with indexes *) -val map : ('a -> 'b) -> 'a t -> 'b t +val map : ('a -> 'b) -> ('a,_) t -> ('b, 'mut) t (** map elements of the vector *) -val filter : ('a -> bool) -> 'a t -> 'a t +val filter : ('a -> bool) -> ('a,_) t -> ('a, 'mut) t (** filter elements from vector *) -val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b +val fold : ('b -> 'a -> 'b) -> 'b -> ('a,_) t -> 'b (** fold on elements of the vector *) -val exists : ('a -> bool) -> 'a t -> bool +val exists : ('a -> bool) -> ('a,_) t -> bool (** existential test *) -val for_all : ('a -> bool) -> 'a t -> bool +val for_all : ('a -> bool) -> ('a,_) t -> bool (** universal test *) -val find : ('a -> bool) -> 'a t -> 'a option +val find : ('a -> bool) -> ('a,_) t -> 'a option (** Find an element that satisfies the predicate *) -val find_exn : ('a -> bool) -> 'a t -> 'a +val find_exn : ('a -> bool) -> ('a,_) t -> 'a (** find an element that satisfies the predicate, or @raise Not_found if no element does *) -val filter_map : ('a -> 'b option) -> 'a t -> 'b t +val filter_map : ('a -> 'b option) -> ('a,_) t -> ('b, 'mut) t (** Map elements with a function, possibly filtering some of them out *) -val flat_map : ('a -> 'b t) -> 'a t -> 'b t +val flat_map : ('a -> ('b,_) t) -> ('a,_) t -> ('b, 'mut) t (** Map each element to a sub-vector *) -val flat_map' : ('a -> 'b sequence) -> 'a t -> 'b t +val flat_map' : ('a -> 'b sequence) -> ('a,_) t -> ('b, 'mut) t (** Like {!flat_map}, but using {!sequence} for intermediate collections *) -val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +val (>>=) : ('a,_) t -> ('a -> ('b,_) t) -> ('b, 'mut) t -val (>|=) : 'a t -> ('a -> 'b) -> 'b t +val (>|=) : ('a,_) t -> ('a -> 'b) -> ('b, 'mut) t -val get : 'a t -> int -> 'a +val get : ('a,_) t -> int -> 'a (** access element, or @raise Failure if bad index *) -val set : 'a t -> int -> 'a -> unit +val set : ('a, rw) t -> int -> 'a -> unit (** access element, or @raise Failure if bad index *) -val rev : 'a t -> 'a t +val rev : ('a,_) t -> ('a, 'mut) t (** Reverse the vector *) -val rev' : 'a t -> unit +val rev' : ('a, rw) t -> unit (** Reverse the vector in place *) -val size : 'a t -> int +val size : ('a,_) t -> int (** number of elements in vector *) -val length : _ t -> int +val length : (_,_) t -> int (** Synonym for {! size} *) -val capacity : _ t -> int +val capacity : (_,_) t -> int (** Number of elements the vector can contain without being resized *) -val unsafe_get_array : 'a t -> 'a array +val unsafe_get_array : ('a, rw) t -> 'a array (** Access the underlying {b shared} array (do not modify!). [unsafe_get_array v] is longer than [size v], but elements at higher index than [size v] are undefined (do not access!). *) -val (--) : int -> int -> int t +val (--) : int -> int -> (int, 'mut) t (** Range of integers (both included) *) -val of_array : 'a array -> 'a t -val of_list : 'a list -> 'a t -val to_array : 'a t -> 'a array -val to_list : 'a t -> 'a list +val of_array : 'a array -> ('a, 'mut) t +val of_list : 'a list -> ('a, 'mut) t +val to_array : ('a,_) t -> 'a array +val to_list : ('a,_) t -> 'a list -val of_seq : ?init:'a t -> 'a sequence -> 'a t +val of_seq : ?init:('a,rw) t -> 'a sequence -> ('a, rw) t -val to_seq : 'a t -> 'a sequence +val to_seq : ('a,_) t -> 'a sequence -val slice : 'a t -> int -> int -> 'a sequence +val slice : ('a,_) t -> int -> int -> 'a sequence (** [slice v start len] is the sequence of elements from [v.(start)] to [v.(start+len-1)]. *) -val of_klist : ?init:'a t -> 'a klist -> 'a t -val to_klist : 'a t -> 'a klist -val of_gen : ?init:'a t -> 'a gen -> 'a t -val to_gen : 'a t -> 'a gen +val of_klist : ?init:('a, rw) t -> 'a klist -> ('a, rw) t +val to_klist : ('a,_) t -> 'a klist +val of_gen : ?init:('a, rw) t -> 'a gen -> ('a, rw) t +val to_gen : ('a,_) t -> 'a gen val pp : ?start:string -> ?stop:string -> ?sep:string -> - 'a printer -> 'a t printer + 'a printer -> ('a,_) t printer val print : ?start:string -> ?stop:string -> ?sep:string -> - 'a formatter -> 'a t formatter + 'a formatter -> ('a,_) t formatter From 4579213e6323757ce2471d729841295cc5a8f47d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 00:23:09 +0200 Subject: [PATCH 36/52] some small additions --- core/CCArray.ml | 8 ++++++-- core/CCArray.mli | 3 +++ core/CCFun.ml | 2 ++ core/CCFun.mli | 9 +++++++++ core/CCVector.mli | 4 ++-- 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 4f7ad5c6..2838c7cc 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -141,14 +141,18 @@ let except_idx a i = (* Randomly shuffle the array, in place. See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) -let shuffle a = +let _shuffle _rand_int a = for i = 1 to Array.length a - 1 do - let j = Random.int i in + let j = _rand_int i in let tmp = a.(i) in a.(i) <- a.(j); a.(j) <- tmp; done +let shuffle a = _shuffle Random.int a + +let shuffle_with st a = _shuffle (Random.State.int st) a + (** print an array of items using the printing function *) let pp ?(sep=", ") pp_item buf a = for i = 0 to Array.length a - 1 do diff --git a/core/CCArray.mli b/core/CCArray.mli index 555a77c8..e31ce665 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -67,6 +67,9 @@ val except_idx : 'a t -> int -> 'a list val shuffle : 'a t -> unit (** shuffle randomly the array, in place *) +val shuffle_with : Random.State.t -> 'a t -> unit +(** Like shuffle but using a specialized random state *) + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) -> Buffer.t -> 'a t -> unit (** print an array of items with printing function *) diff --git a/core/CCFun.ml b/core/CCFun.ml index dbedcf9b..fa4eadb6 100644 --- a/core/CCFun.ml +++ b/core/CCFun.ml @@ -40,6 +40,8 @@ let const x _ = x let uncurry f (x,y) = f x y +let tap f x = ignore (f x); x + let (%>) = compose let (%) f g x = f (g x) diff --git a/core/CCFun.mli b/core/CCFun.mli index d2a02f3e..81048ae0 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -48,6 +48,15 @@ val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c val uncurry : ('a -> 'b -> 'c) -> ('a * 'b) -> 'c +val tap : ('a -> 'b) -> 'a -> 'a +(** [tap f x] evaluates [f x], discards it, then returns [x]. Useful + in a pipeline, for instance: + {[CCArray.(1 -- 10) + |> tap CCArray.shuffle + |> tap CCArray.sort Pervasives.compare + ]} +*) + val (%) : ('b -> 'c) -> ('a -> 'b) -> 'a -> 'c (** Mathematical composition *) diff --git a/core/CCVector.mli b/core/CCVector.mli index 4d8cd997..e981f6f5 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -25,8 +25,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) -type ro -type rw +type ro = [`RO] +type rw = [`RW] (** Mutability is [rw] (read-write) or [ro] (read-only) *) From 174957e6049fe96603d18d4b4bc8ba760f361f99 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 01:01:22 +0200 Subject: [PATCH 37/52] more reasonable increase in size in CCVector --- core/CCVector.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/CCVector.ml b/core/CCVector.ml index 115c1777..28613205 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -98,7 +98,8 @@ let _grow v x = if _empty_array v then v.vec <- Array.make 32 x else - let size = min (2 * Array.length v.vec + 10) Sys.max_array_length in + let n = Array.length v.vec in + let size = min (n + n/2 + 10) Sys.max_array_length in _resize v size let ensure v size = From e3bde405986bd8339175204f9dd181d16b0faef8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 01:01:40 +0200 Subject: [PATCH 38/52] more operators in CCKlist --- core/CCKList.ml | 163 ++++++++++++++++++++++++++++++++----------- core/CCKList.mli | 69 ++++++++++++++---- tests/bench_batch.ml | 2 +- 3 files changed, 181 insertions(+), 53 deletions(-) diff --git a/core/CCKList.ml b/core/CCKList.ml index 6bce244b..d9a1112e 100644 --- a/core/CCKList.ml +++ b/core/CCKList.ml @@ -25,6 +25,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + type + 'a t = unit -> [ `Nil | `Cons of 'a * 'a t @@ -40,47 +47,20 @@ let is_empty l = match l () with | `Nil -> true | `Cons _ -> false -let to_list l = - let rec direct i (l:'a t) = match l () with - | `Nil -> [] - | _ when i=0 -> safe [] l - | `Cons (x, f) -> x :: direct (i-1) f - and safe acc l = match l () with - | `Nil -> List.rev acc - | `Cons (x,l') -> safe (x::acc) l' - in - direct 200 l +let rec equal eq l1 l2 = match l1(), l2() with + | `Nil, `Nil -> true + | `Nil, _ + | _, `Nil -> false + | `Cons (x1,l1'), `Cons (x2,l2') -> + eq x1 x2 && equal eq l1' l2' -let of_list l = - let rec aux l () = match l with - | [] -> `Nil - | x::l' -> `Cons (x, aux l') - in aux l - -let equal ?(eq=(=)) l1 l2 = - let rec aux l1 l2 = match l1(), l2() with - | `Nil, `Nil -> true - | `Nil, _ - | _, `Nil -> false - | `Cons (x1,l1'), `Cons (x2,l2') -> - eq x1 x2 && aux l1' l2' - in aux l1 l2 - -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -let rec to_seq res k = match res () with - | `Nil -> () - | `Cons (s, f) -> k s; to_seq f k - -let to_gen l = - let l = ref l in - fun () -> - match !l () with - | `Nil -> None - | `Cons (x,l') -> - l := l'; - Some x +let rec compare cmp l1 l2 = match l1(), l2() with + | `Nil, `Nil -> 0 + | `Nil, _ -> -1 + | _, `Nil -> 1 + | `Cons (x1,l1'), `Cons (x2,l2') -> + let c = cmp x1 x2 in + if c = 0 then compare cmp l1' l2' else c let rec fold f acc res = match res () with | `Nil -> acc @@ -97,11 +77,21 @@ let rec take n (l:'a t) () = match l () with | `Nil -> `Nil | `Cons (x,l') -> `Cons (x, take (n-1) l') +let rec take_while p l () = match l () with + | `Nil -> `Nil + | `Cons (x,l') when p x -> `Cons (x, take_while p l') + | `Cons (_,l') -> take_while p l' () + let rec drop n (l:'a t) () = match l () with | l' when n=0 -> l' | `Nil -> `Nil | `Cons (_,l') -> drop (n-1) l' () +let rec drop_while p l () = match l() with + | `Nil -> `Nil + | `Cons (x,l') when p x -> drop_while p l' () + | `Cons _ as res -> res + (*$Q (Q.pair (Q.list Q.small_int) Q.small_int) (fun (l,n) -> \ let s = of_list l in let s1, s2 = take n s, drop n s in \ @@ -173,3 +163,96 @@ let range i j = *) let (--) = range + +let rec fold2 f acc l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> acc + | `Cons(x1,l1'), `Cons(x2,l2') -> + fold2 f (f acc x1 x2) l1' l2' + +let rec map2 f l1 l2 () = match l1(), l2() with + | `Nil, _ + | _, `Nil -> `Nil + | `Cons(x1,l1'), `Cons(x2,l2') -> + `Cons (f x1 x2, map2 f l1' l2') + +let rec iter2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> () + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2; iter2 f l1' l2' + +let rec for_all2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> true + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2 && for_all2 f l1' l2' + +let rec exists2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> false + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2 || exists2 f l1' l2' + +let rec merge cmp l1 l2 () = match l1(), l2() with + | `Nil, tl2 -> tl2 + | tl1, `Nil -> tl1 + | `Cons(x1,l1'), `Cons(x2,l2') -> + if cmp x1 x2 < 0 + then `Cons (x1, merge cmp l1' l2) + else `Cons (x2, merge cmp l1 l2') + +(** {2 Conversions} *) + +let rec _to_rev_list acc l = match l() with + | `Nil -> acc + | `Cons (x,l') -> _to_rev_list (x::acc) l' + +let to_rev_list l = _to_rev_list [] l + +let to_list l = + let rec direct i (l:'a t) = match l () with + | `Nil -> [] + | _ when i=0 -> List.rev (_to_rev_list [] l) + | `Cons (x, f) -> x :: direct (i-1) f + in + direct 200 l + +let of_list l = + let rec aux l () = match l with + | [] -> `Nil + | x::l' -> `Cons (x, aux l') + in aux l + +let rec to_seq res k = match res () with + | `Nil -> () + | `Cons (s, f) -> k s; to_seq f k + +let to_gen l = + let l = ref l in + fun () -> + match !l () with + | `Nil -> None + | `Cons (x,l') -> + l := l'; + Some x + +(** {2 IO} *) + +let pp ?(sep=",") pp_item buf l = + let rec pp buf l = match l() with + | `Nil -> () + | `Cons (x,l') -> Buffer.add_string buf sep; pp_item buf x; pp buf l' + in + match l() with + | `Nil -> () + | `Cons (x,l') -> pp_item buf x; pp buf l' + +let print ?(sep=",") pp_item fmt l = + let rec pp fmt l = match l() with + | `Nil -> () + | `Cons (x,l') -> Format.pp_print_string fmt sep; pp_item fmt x; pp fmt l' + in + match l() with + | `Nil -> () + | `Cons (x,l') -> pp_item fmt x; pp fmt l' diff --git a/core/CCKList.mli b/core/CCKList.mli index 20cce78f..ddb808bb 100644 --- a/core/CCKList.mli +++ b/core/CCKList.mli @@ -25,6 +25,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + type + 'a t = unit -> [ `Nil | `Cons of 'a * 'a t @@ -40,30 +49,27 @@ val singleton : 'a -> 'a t val is_empty : 'a t -> bool -val of_list : 'a list -> 'a t +val equal : 'a equal -> 'a t equal +(** Equality step by step. Eager. *) -val to_list : 'a t -> 'a list -(** Gather all values into a list *) - -val equal : ?eq:('a -> 'a -> bool) -> 'a t -> 'a t -> bool - -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -val to_seq : 'a t -> 'a sequence -val to_gen : 'a t -> 'a gen +val compare : 'a ord -> 'a t ord +(** Lexicographic comparison. Eager. *) val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a (** Fold on values *) val iter : ('a -> unit) -> 'a t -> unit -val length : 'a t -> int +val length : _ t -> int val take : int -> 'a t -> 'a t +val take_while : ('a -> bool) -> 'a t -> 'a t + val drop : int -> 'a t -> 'a t +val drop_while : ('a -> bool) -> 'a t -> 'a t + val map : ('a -> 'b) -> 'a t -> 'b t val fmap : ('a -> 'b option) -> 'a t -> 'b t @@ -81,3 +87,42 @@ val flatten : 'a t t -> 'a t val range : int -> int -> int t val (--) : int -> int -> int t + +(** {2 Operations on two Collections} *) + +val fold2 : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> 'a t -> 'b t -> 'acc +(** Fold on two collections at once. Stop at soon as one of them ends *) + +val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +(** Map on two collections at once. Stop as soon as one of the + arguments is exhausted *) + +val iter2 : ('a -> 'b -> unit) -> 'a t -> 'b t -> unit +(** Iterate on two collections at once. Stop as soon as one of them ends *) + +val for_all2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool +val exists2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool + +val merge : 'a ord -> 'a t -> 'a t -> 'a t +(** Merge two sorted iterators into a sorted iterator *) + +(** {2 Conversions} *) + +val of_list : 'a list -> 'a t + +val to_list : 'a t -> 'a list +(** Gather all values into a list *) + +val to_rev_list : 'a t -> 'a list +(** Convert to a list, in reverse order. More efficient than {!to_list} *) + +val to_seq : 'a t -> 'a sequence + +val to_gen : 'a t -> 'a gen + + +(** {2 IO} *) + +val pp : ?sep:string -> 'a printer -> 'a t printer + +val print : ?sep:string -> 'a formatter -> 'a t formatter diff --git a/tests/bench_batch.ml b/tests/bench_batch.ml index daac17f6..1608cfb2 100644 --- a/tests/bench_batch.ml +++ b/tests/bench_batch.ml @@ -78,7 +78,7 @@ end) module BenchKList = Make(struct include CCKList let name = "klist" - let equal a b = equal a b + let equal a b = equal (=) a b let doubleton x y = CCKList.of_list [ x; y ] end) From 2492ee48a6579baf4da4a46c0833dd9c9a7ee2d5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 02:16:14 +0200 Subject: [PATCH 39/52] small change in CCOrd --- core/CCOrd.ml | 7 +++++++ core/CCOrd.mli | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/core/CCOrd.ml b/core/CCOrd.ml index fe393055..8474e12e 100644 --- a/core/CCOrd.ml +++ b/core/CCOrd.ml @@ -31,6 +31,13 @@ type 'a t = 'a -> 'a -> int let compare = Pervasives.compare +let opp f x y = - (f x y) + +let equiv i j = + if i<0 then j<0 + else if i>0 then j>0 + else j=0 + let int_ (x:int) y = Pervasives.compare x y let string_ (x:string) y = Pervasives.compare x y let bool_ (x:bool) y = Pervasives.compare x y diff --git a/core/CCOrd.mli b/core/CCOrd.mli index 80a38d6d..90e929d8 100644 --- a/core/CCOrd.mli +++ b/core/CCOrd.mli @@ -32,6 +32,12 @@ type 'a t = 'a -> 'a -> int val compare : 'a t (** Polymorphic "magic" comparison *) +val opp : 'a t -> 'a t +(** Opposite order *) + +val equiv : int -> int -> bool +(** Returns [true] iff the two comparison results are the same *) + val int_ : int t val string_ : string t val bool_ : bool t From 4550a1c2c2b2d6f13b002ae7bc7f1cdf4200dfef Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 02:16:49 +0200 Subject: [PATCH 40/52] cleaner system to specify hash/eq/cmp for operations in CCLinq; use Map to implement most binary operations, including join --- core/CCLinq.ml | 432 ++++++++++++++++++++++++++++-------------------- core/CCLinq.mli | 22 ++- 2 files changed, 265 insertions(+), 189 deletions(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index 9cf3a918..489740ee 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -31,12 +31,138 @@ type 'a equal = 'a -> 'a -> bool type 'a ord = 'a -> 'a -> int type 'a hash = 'a -> int +(* TODO: add CCVector as a collection *) + let _id x = x +module Map = struct + type ('a, 'b) t = { + is_empty : unit -> bool; + size : unit -> int; (** Number of keys *) + get : 'a -> 'b option; + fold : 'c. ('c -> 'a -> 'b -> 'c) -> 'c -> 'c; + to_seq : ('a * 'b) sequence; + } + + let get m x = m.get x + let mem m x = match m.get x with + | None -> false + | Some _ -> true + let to_seq m = m.to_seq + let size m = m.size () + + type ('a, 'b) build = { + mutable cur : ('a, 'b) t; + add : 'a -> 'b -> unit; + update : 'a -> ('b option -> 'b option) -> unit; + } + + let build_get b = b.cur + let add b x y = b.add x y + let update b f = b.update f + + (* careful to use this map linearly *) + let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) () = + let module H = Hashtbl.Make(struct + type t = key + let equal = eq + let hash = hash + end) in + (* build table *) + let tbl = H.create 32 in + let cur = { + is_empty = (fun () -> H.length tbl = 0); + size = (fun () -> H.length tbl); + get = (fun k -> + try Some (H.find tbl k) + with Not_found -> None); + fold = (fun f acc -> H.fold (fun k v acc -> f acc k v) tbl acc); + to_seq = (fun k -> H.iter (fun key v -> k (key,v)) tbl); + } in + { cur; + add = (fun k v -> H.replace tbl k v); + update = (fun k f -> + match (try f (Some (H.find tbl k)) with Not_found -> f None) with + | None -> H.remove tbl k + | Some v' -> H.replace tbl k v'); + } + + let make_cmp (type key) ?(cmp=Pervasives.compare) () = + let module M = CCSequence.Map.Make(struct + type t = key + let compare = cmp + end) in + let map = ref M.empty in + let cur = { + is_empty = (fun () -> M.is_empty !map); + size = (fun () -> M.cardinal !map); + get = (fun k -> + try Some (M.find k !map) + with Not_found -> None); + fold = (fun f acc -> + M.fold + (fun key set acc -> f acc key set) !map acc + ); + to_seq = (fun k -> M.to_seq !map k); + } in + { + cur; + add = (fun k v -> map := M.add k v !map); + update = (fun k f -> + match (try f (Some (M.find k !map)) with Not_found -> f None) with + | None -> map := M.remove k !map + | Some v' -> map := M.add k v' !map); + } + + type 'a build_method = + | FromCmp of 'a ord + | FromHash of 'a equal * 'a hash + | Default + + let make ?(build=Default) () = match build with + | Default -> make_hash () + | FromCmp cmp -> make_cmp ~cmp () + | FromHash (eq,hash) -> make_hash ~eq ~hash () + + let multimap_of_seq ?(build=make ()) seq = + seq (fun (k,v) -> + build.update k (function + | None -> Some [v] + | Some l -> Some (v::l))); + build.cur + + let count_of_seq ?(build=make ()) seq = + seq (fun x -> + build.update x + (function + | None -> Some 1 + | Some n -> Some (n+1))); + build.cur + + let get_exn m x = + match m.get x with + | None -> raise Not_found + | Some x -> x + + let to_list m = m.to_seq |> CCSequence.to_rev_list +end + type 'a search_result = | SearchContinue | SearchStop of 'a +type ('a,'b,'key,'c) join_descr = { + join_key1 : 'a -> 'key; + join_key2 : 'b -> 'key; + join_merge : 'key -> 'a -> 'b -> 'c option; + join_build : 'key Map.build_method; +} + +type ('a,'b) group_join_descr = { + gjoin_proj : 'b -> 'a; + gjoin_build : 'a Map.build_method; +} + module Coll = struct type 'a t = | Seq : 'a sequence -> 'a t @@ -168,131 +294,87 @@ module Coll = struct assert (eq x y); true with Not_found -> false + + let do_join ~join c1 c2 = + let build1 = + to_seq c1 + |> CCSequence.map (fun x -> join.join_key1 x, x) + |> Map.multimap_of_seq ~build:(Map.make ~build:join.join_build ()) + in + let l = CCSequence.fold + (fun acc y -> + let key = join.join_key2 y in + match Map.get build1 key with + | None -> acc + | Some l1 -> + List.fold_left + (fun acc x -> match join.join_merge key x y with + | None -> acc + | Some res -> res::acc + ) acc l1 + ) [] (to_seq c2) + in + of_list l + + let do_group_join ~gjoin c1 c2 = + let build = Map.make ~build:gjoin.gjoin_build () in + to_seq c1 (fun x -> Map.add build x []); + to_seq c2 + (fun y -> + (* project [y] into some element of [c1] *) + let x = gjoin.gjoin_proj y in + Map.update build x + (function + | None -> None (* [x] not present, ignore! *) + | Some l -> Some (y::l) + ) + ); + Map.build_get build + + let do_product c1 c2 = + let s1 = to_seq c1 and s2 = to_seq c2 in + of_seq (CCSequence.product s1 s2) + + let do_union ~build c1 c2 = + let build = Map.make ~build () in + to_seq c1 (fun x -> Map.add build x ()); + to_seq c2 (fun x -> Map.add build x ()); + Map.to_seq (Map.build_get build) + |> CCSequence.map fst + |> of_seq + + type inter_status = + | InterLeft + | InterDone (* already output *) + + let do_inter ~build c1 c2 = + let build = Map.make ~build () in + let l = ref [] in + to_seq c1 (fun x -> Map.add build x InterLeft); + to_seq c2 (fun x -> + Map.update build x + (function + | None -> Some InterDone + | Some InterDone as foo -> foo + | Some InterLeft -> + l := x :: !l; + Some InterDone + ) + ); + of_list !l + + let do_diff ~build c1 c2 = + let build = Map.make ~build () in + to_seq c2 (fun x -> Map.add build x ()); + let map = Map.build_get build in + (* output elements of [c1] not in [map] *) + to_seq c1 + |> CCSequence.filter (fun x -> not (Map.mem map x)) + |> of_seq end type 'a collection = 'a Coll.t -module Map = struct - type ('a, 'b) t = { - is_empty : unit -> bool; - size : unit -> int; (** Number of keys *) - get : 'a -> 'b option; - fold : 'c. ('c -> 'a -> 'b -> 'c) -> 'c -> 'c; - to_seq : ('a * 'b) sequence; - } - - type ('a, 'b) build = { - mutable cur : ('a, 'b) t; - add : 'a -> 'b -> unit; - update : 'a -> ('b option -> 'b option) -> unit; - } - - (* careful to use this map linearly *) - let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) () = - let module H = Hashtbl.Make(struct - type t = key - let equal = eq - let hash = hash - end) in - (* build table *) - let tbl = H.create 32 in - let cur = { - is_empty = (fun () -> H.length tbl = 0); - size = (fun () -> H.length tbl); - get = (fun k -> - try Some (H.find tbl k) - with Not_found -> None); - fold = (fun f acc -> H.fold (fun k v acc -> f acc k v) tbl acc); - to_seq = (fun k -> H.iter (fun key v -> k (key,v)) tbl); - } in - { cur; - add = (fun k v -> H.replace tbl k v); - update = (fun k f -> - match (try f (Some (H.find tbl k)) with Not_found -> f None) with - | None -> H.remove tbl k - | Some v' -> H.replace tbl k v'); - } - - let make_cmp (type key) ?(cmp=Pervasives.compare) () = - let module M = CCSequence.Map.Make(struct - type t = key - let compare = cmp - end) in - let map = ref M.empty in - let cur = { - is_empty = (fun () -> M.is_empty !map); - size = (fun () -> M.cardinal !map); - get = (fun k -> - try Some (M.find k !map) - with Not_found -> None); - fold = (fun f acc -> - M.fold - (fun key set acc -> f acc key set) !map acc - ); - to_seq = (fun k -> M.to_seq !map k); - } in - { - cur; - add = (fun k v -> map := M.add k v !map); - update = (fun k f -> - match (try f (Some (M.find k !map)) with Not_found -> f None) with - | None -> map := M.remove k !map - | Some v' -> map := M.add k v' !map); - } - - type 'a key_info = { - eq : 'a equal option; - cmp : 'a ord option; - hash : 'a hash option; - } - - let default_key_info = { - eq=None; cmp=None; hash=None; - } - - let make_info info = - match info with - | { hash=None; _} - | { eq=None; _} -> - begin match info.cmp with - | None -> make_cmp () - | Some cmp -> make_cmp ~cmp () - end - | {eq=Some eq; hash=Some hash; _} -> make_hash ~eq ~hash () - - let multimap build seq = - seq (fun (k,v) -> - build.update k (function - | None -> Some [v] - | Some l -> Some (v::l))); - build.cur - - let multimap_cmp ?cmp seq = - let build = make_cmp ?cmp () in - multimap build seq - - let count build seq = - seq (fun x -> - build.update x - (function - | None -> Some 1 - | Some n -> Some (n+1))); - build.cur - - let get m x = m.get x - - let get_exn m x = - match m.get x with - | None -> raise Not_found - | Some x -> x - - let size m = m.size () - - let to_seq m = m.to_seq - - let to_list m = m.to_seq |> CCSequence.to_rev_list -end - (** {2 Query operators} *) type (_,_) safety = @@ -320,21 +402,9 @@ type (_, _) unary = > -> ('a collection, 'b) unary | Contains : 'a equal * 'a -> ('a collection, bool) unary | Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary - | GroupBy : 'b ord * ('a -> 'b) + | GroupBy : 'b Map.build_method * ('a -> 'b) -> ('a collection, ('b,'a list) Map.t) unary - | Count : 'a ord -> ('a collection, ('a, int) Map.t) unary - -type ('a,'b,'key,'c) join_descr = { - join_key1 : 'a -> 'key; - join_key2 : 'b -> 'key; - join_merge : 'key -> 'a -> 'b -> 'c option; - join_key : 'key Map.key_info; -} - -type ('a,'b) group_join_descr = { - gjoin_proj : 'b -> 'a; - gjoin_key : 'a Map.key_info; -} + | Count : 'a Map.build_method -> ('a collection, ('a, int) Map.t) unary type set_op = | Union @@ -345,10 +415,11 @@ type (_, _, _) binary = | Join : ('a, 'b, 'key, 'c) join_descr -> ('a collection, 'b collection, 'c collection) binary | GroupJoin : ('a, 'b) group_join_descr - -> ('a collection, 'b collection, ('a, 'b collection) Map.t) binary + -> ('a collection, 'b collection, ('a, 'b list) Map.t) binary | Product : ('a collection, 'b collection, ('a*'b) collection) binary | Append : ('a collection, 'a collection, 'a collection) binary - | SetOp : set_op * 'a ord -> ('a collection, 'a collection, 'a collection) binary + | SetOp : set_op * 'a Map.build_method + -> ('a collection, 'a collection, 'a collection) binary (* type of queries that return a 'a *) and 'a t = @@ -457,46 +528,25 @@ let _do_unary : type a b. (a,b) unary -> a -> b | Search obj -> Coll.search obj c | Get (Safe, k) -> Map.get c k | Get (Unsafe, k) -> Map.get_exn c k - | GroupBy (cmp,f) -> + | GroupBy (build,f) -> Coll.to_seq c |> CCSequence.map (fun x -> f x, x) - |> Map.multimap_cmp ~cmp + |> Map.multimap_of_seq ~build:(Map.make ~build ()) | Contains (eq, x) -> Coll.contains ~eq x c - | Count cmp -> + | Count build -> Coll.to_seq c - |> Map.count (Map.make_cmp ~cmp ()) - - -(* TODO: join of two collections *) -let _do_join ~join c1 c2 = - let _build = Map.make_info join.join_key in - assert false - -(* TODO *) -let _do_group_join ~gjoin c1 c2 = - assert false - -let _do_product c1 c2 = - let s1 = Coll.to_seq c1 and s2 = Coll.to_seq c2 in - Coll.of_seq (CCSequence.product s1 s2) + |> Map.count_of_seq ~build:(Map.make ~build ()) let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c = fun b c1 c2 -> match b with - | Join join -> _do_join ~join c1 c2 - | GroupJoin gjoin -> _do_group_join ~gjoin c1 c2 - | Product -> _do_product c1 c2 + | Join join -> Coll.do_join ~join c1 c2 + | GroupJoin gjoin -> Coll.do_group_join ~gjoin c1 c2 + | Product -> Coll.do_product c1 c2 | Append -> Coll.of_seq (CCSequence.append (Coll.to_seq c1) (Coll.to_seq c2)) - | SetOp (Inter,cmp) -> - (* use a join *) - _do_join ~join:{ - join_key1=_id; - join_key2=_id; - join_merge=(fun _ a b -> Some a); - join_key=Map.({default_key_info with cmp=Some cmp; }) - } c1 c2 - | SetOp (Union,cmp) -> failwith "union: not implemented" (* TODO *) - | SetOp (Diff,cmp) -> failwith "diff: not implemented" (* TODO *) + | SetOp (Inter,build) -> Coll.do_inter ~build c1 c2 + | SetOp (Union,build) -> Coll.do_union ~build c1 c2 + | SetOp (Diff,build) -> Coll.do_diff ~build c1 c2 let rec _run : type a. opt:bool -> a t -> a = fun ~opt q -> match q with @@ -564,14 +614,29 @@ let map_iter_flatten q = let map_to_list q = Unary (GeneralMap Map.to_list, q) -let group_by ?(cmp=Pervasives.compare) f q = - Unary (GroupBy (cmp,f), q) +(* choose a build method from the optional arguments *) +let _make_build ?cmp ?eq ?hash () = + let _maybe default o = match o with + | Some x -> x + | None -> default + in + match eq, hash with + | Some _, _ + | _, Some _ -> + Map.FromHash ( _maybe (=) eq, _maybe Hashtbl.hash hash) + | _ -> + match cmp with + | Some f -> Map.FromCmp f + | _ -> Map.Default -let group_by' ?cmp f q = +let group_by ?cmp ?eq ?hash f q = + Unary (GroupBy (_make_build ?cmp ?eq ?hash (),f), q) + +let group_by' ?cmp ?eq ?hash f q = map_iter (group_by ?cmp f q) -let count ?(cmp=Pervasives.compare) () q = - Unary (Count cmp, q) +let count ?cmp ?eq ?hash () q = + Unary (Count (_make_build ?cmp ?eq ?hash ()), q) let count' ?cmp () q = map_iter (count ?cmp () q) @@ -643,18 +708,20 @@ let find_map f q = (** {6 Binary Operators} *) let join ?cmp ?eq ?hash join_key1 join_key2 ~merge q1 q2 = + let join_build = _make_build ?eq ?hash ?cmp () in let j = { join_key1; join_key2; join_merge=merge; - join_key = Map.({ eq; cmp; hash; }); + join_build; } in Binary (Join j, q1, q2) let group_join ?cmp ?eq ?hash gjoin_proj q1 q2 = + let gjoin_build = _make_build ?eq ?hash ?cmp () in let j = { gjoin_proj; - gjoin_key = Map.({ eq; cmp; hash; }); + gjoin_build; } in Binary (GroupJoin j, q1, q2) @@ -662,14 +729,17 @@ let product q1 q2 = Binary (Product, q1, q2) let append q1 q2 = Binary (Append, q1, q2) -let inter ?(cmp=Pervasives.compare) () q1 q2 = - Binary (SetOp (Inter, cmp), q1, q2) +let inter ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Inter, build), q1, q2) -let union ?(cmp=Pervasives.compare) () q1 q2 = - Binary (SetOp (Union, cmp), q1, q2) +let union ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Union, build), q1, q2) -let diff ?(cmp=Pervasives.compare) () q1 q2 = - Binary (SetOp (Diff, cmp), q1, q2) +let diff ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Diff, build), q1, q2) let fst q = map fst q let snd q = map snd q diff --git a/core/CCLinq.mli b/core/CCLinq.mli index 96d1a939..347f5051 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -27,7 +27,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 LINQ-like operations on collections} The purpose it to provide powerful combinators to express iteration, -transformation and combination of collections of items. +transformation and combination of collections of items. This module depends +on several other modules, including {!CCList} and {!CCSequence}. + +Functions and operations are assumed to be referentially transparent, i.e. +they should not rely on external side effects, they should not rely on +the order of execution. {[ @@ -161,17 +166,18 @@ val map_to_list : ('a,'b) Map.t t -> ('a*'b) list t (** {6 Aggregation} *) -val group_by : ?cmp:'b ord -> +val group_by : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> ('a -> 'b) -> 'a collection t -> ('b,'a list) Map.t t (** [group_by f] takes a collection [c] as input, and returns a multimap [m] such that for each [x] in [c], [x] occurs in [m] under the key [f x]. In other words, [f] is used to obtain a key from [x], and [x] is added to the multimap using this key. *) -val group_by' : ?cmp:'b ord -> +val group_by' : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> ('a -> 'b) -> 'a collection t -> ('b * 'a list) collection t -val count : ?cmp:'a ord -> unit -> 'a collection t -> ('a, int) Map.t t +val count : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> + unit -> 'a collection t -> ('a, int) Map.t t (** [count c] returns a map from elements of [c] to the number of time those elements occur. *) @@ -228,7 +234,7 @@ val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash -> val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> ('b -> 'a) -> 'a collection t -> 'b collection t -> - ('a, 'b collection) Map.t t + ('a, 'b list) Map.t t (** [group_join key2] associates to every element [x] of the first collection, all the elements [y] of the second collection such that [eq x (key y)] *) @@ -239,17 +245,17 @@ val product : 'a collection t -> 'b collection t -> ('a * 'b) collection t val append : 'a collection t -> 'a collection t -> 'a collection t (** Append two collections together *) -val inter : ?cmp:'a ord -> unit -> +val inter : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> 'a collection t -> 'a collection t -> 'a collection t (** Intersection of two collections. Each element will occur at most once in the result *) -val union : ?cmp:'a ord -> unit -> +val union : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> 'a collection t -> 'a collection t -> 'a collection t (** Union of two collections. Each element will occur at most once in the result *) -val diff : ?cmp:'a ord -> unit -> +val diff : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> 'a collection t -> 'a collection t -> 'a collection t (** Set difference *) From c021a2b3104581c1b888e48fe2906f50606efdb2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 12:09:07 +0200 Subject: [PATCH 41/52] map, set, and IO/string adapters for CCLinq; also a lazy constructor --- core/CCLinq.ml | 331 ++++++++++++++++++++++++++++++++++++------------ core/CCLinq.mli | 122 ++++++++++++++---- 2 files changed, 349 insertions(+), 104 deletions(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index 489740ee..4d916d9b 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -35,7 +35,13 @@ type 'a hash = 'a -> int let _id x = x -module Map = struct +type 'a collection = + | Seq : 'a sequence -> 'a collection + | List : 'a list -> 'a collection + | Set : (module CCSequence.Set.S + with type elt = 'a and type t = 'b) * 'b -> 'a collection + +module PMap = struct type ('a, 'b) t = { is_empty : unit -> bool; size : unit -> int; (** Number of keys *) @@ -49,6 +55,7 @@ module Map = struct | None -> false | Some _ -> true let to_seq m = m.to_seq + let fold f acc m = m.fold f acc let size m = m.size () type ('a, 'b) build = { @@ -144,7 +151,39 @@ module Map = struct | None -> raise Not_found | Some x -> x + (* map values *) + let map f m = { + is_empty = m.is_empty; + size = m.size; + get = (fun k -> match m.get k with + | None -> None + | Some v -> Some (f v) + ); + to_seq = CCSequence.map (fun (x,y) -> x, f y) m.to_seq; + fold = (fun f' acc -> + m.fold (fun acc x y -> f' acc x (f y)) acc + ); + } + let to_list m = m.to_seq |> CCSequence.to_rev_list + + let to_coll m = Seq m.to_seq + + let reverse ~build m = + let build = make ~build () in + to_seq m + |> CCSequence.map (fun (x,y) -> y,x) + |> multimap_of_seq ~build + + let reverse_multimap ~build m = + let build = make ~build () in + to_seq m + |> CCSequence.flatMap + (fun (x,l) -> + CCSequence.of_list l + |> CCSequence.map (fun y -> y,x) + ) + |> multimap_of_seq ~build end type 'a search_result = @@ -155,21 +194,15 @@ type ('a,'b,'key,'c) join_descr = { join_key1 : 'a -> 'key; join_key2 : 'b -> 'key; join_merge : 'key -> 'a -> 'b -> 'c option; - join_build : 'key Map.build_method; + join_build : 'key PMap.build_method; } type ('a,'b) group_join_descr = { gjoin_proj : 'b -> 'a; - gjoin_build : 'a Map.build_method; + gjoin_build : 'a PMap.build_method; } module Coll = struct - type 'a t = - | Seq : 'a sequence -> 'a t - | List : 'a list -> 'a t - | Set : (module CCSequence.Set.S - with type elt = 'a and type t = 'b) * 'b -> 'a t - let of_seq s = Seq s let of_list l = List l let of_array a = Seq (CCSequence.of_array a) @@ -299,12 +332,12 @@ module Coll = struct let build1 = to_seq c1 |> CCSequence.map (fun x -> join.join_key1 x, x) - |> Map.multimap_of_seq ~build:(Map.make ~build:join.join_build ()) + |> PMap.multimap_of_seq ~build:(PMap.make ~build:join.join_build ()) in let l = CCSequence.fold (fun acc y -> let key = join.join_key2 y in - match Map.get build1 key with + match PMap.get build1 key with | None -> acc | Some l1 -> List.fold_left @@ -317,29 +350,29 @@ module Coll = struct of_list l let do_group_join ~gjoin c1 c2 = - let build = Map.make ~build:gjoin.gjoin_build () in - to_seq c1 (fun x -> Map.add build x []); + let build = PMap.make ~build:gjoin.gjoin_build () in + to_seq c1 (fun x -> PMap.add build x []); to_seq c2 (fun y -> (* project [y] into some element of [c1] *) let x = gjoin.gjoin_proj y in - Map.update build x + PMap.update build x (function | None -> None (* [x] not present, ignore! *) | Some l -> Some (y::l) ) ); - Map.build_get build + PMap.build_get build let do_product c1 c2 = let s1 = to_seq c1 and s2 = to_seq c2 in of_seq (CCSequence.product s1 s2) let do_union ~build c1 c2 = - let build = Map.make ~build () in - to_seq c1 (fun x -> Map.add build x ()); - to_seq c2 (fun x -> Map.add build x ()); - Map.to_seq (Map.build_get build) + let build = PMap.make ~build () in + to_seq c1 (fun x -> PMap.add build x ()); + to_seq c2 (fun x -> PMap.add build x ()); + PMap.to_seq (PMap.build_get build) |> CCSequence.map fst |> of_seq @@ -348,11 +381,11 @@ module Coll = struct | InterDone (* already output *) let do_inter ~build c1 c2 = - let build = Map.make ~build () in + let build = PMap.make ~build () in let l = ref [] in - to_seq c1 (fun x -> Map.add build x InterLeft); + to_seq c1 (fun x -> PMap.add build x InterLeft); to_seq c2 (fun x -> - Map.update build x + PMap.update build x (function | None -> Some InterDone | Some InterDone as foo -> foo @@ -364,17 +397,15 @@ module Coll = struct of_list !l let do_diff ~build c1 c2 = - let build = Map.make ~build () in - to_seq c2 (fun x -> Map.add build x ()); - let map = Map.build_get build in + let build = PMap.make ~build () in + to_seq c2 (fun x -> PMap.add build x ()); + let map = PMap.build_get build in (* output elements of [c1] not in [map] *) to_seq c1 - |> CCSequence.filter (fun x -> not (Map.mem map x)) + |> CCSequence.filter (fun x -> not (PMap.mem map x)) |> of_seq end -type 'a collection = 'a Coll.t - (** {2 Query operators} *) type (_,_) safety = @@ -382,10 +413,12 @@ type (_,_) safety = | Unsafe : ('a, 'a) safety type (_, _) unary = - | Map : ('a -> 'b) -> ('a collection, 'b collection) unary + | PMap : ('a -> 'b) -> ('a collection, 'b collection) unary | GeneralMap : ('a -> 'b) -> ('a, 'b) unary | Filter : ('a -> bool) -> ('a collection, 'a collection) unary | Fold : ('b -> 'a -> 'b) * 'b -> ('a collection, 'b) unary + | FoldMap : ('acc -> 'a -> 'b -> 'acc) * 'acc + -> (('a,'b) PMap.t, 'acc) unary | Reduce : ('c, 'd) safety * ('a -> 'b) * ('a -> 'b -> 'b) * ('b -> 'c) -> ('a collection, 'd) unary | Size : ('a collection, int) unary @@ -401,10 +434,11 @@ type (_, _) unary = failure : 'b; > -> ('a collection, 'b) unary | Contains : 'a equal * 'a -> ('a collection, bool) unary - | Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary - | GroupBy : 'b Map.build_method * ('a -> 'b) - -> ('a collection, ('b,'a list) Map.t) unary - | Count : 'a Map.build_method -> ('a collection, ('a, int) Map.t) unary + | Get : ('b,'c) safety * 'a -> (('a,'b) PMap.t, 'c) unary + | GroupBy : 'b PMap.build_method * ('a -> 'b) + -> ('a collection, ('b,'a list) PMap.t) unary + | Count : 'a PMap.build_method -> ('a collection, ('a, int) PMap.t) unary + | Lazy : ('a lazy_t, 'a) unary type set_op = | Union @@ -415,10 +449,10 @@ type (_, _, _) binary = | Join : ('a, 'b, 'key, 'c) join_descr -> ('a collection, 'b collection, 'c collection) binary | GroupJoin : ('a, 'b) group_join_descr - -> ('a collection, 'b collection, ('a, 'b list) Map.t) binary + -> ('a collection, 'b collection, ('a, 'b list) PMap.t) binary | Product : ('a collection, 'b collection, ('a*'b) collection) binary | Append : ('a collection, 'a collection, 'a collection) binary - | SetOp : set_op * 'a Map.build_method + | SetOp : set_op * 'a PMap.build_method -> ('a collection, 'a collection, 'a collection) binary (* type of queries that return a 'a *) @@ -431,18 +465,30 @@ and 'a t = let start x = Start x -let start_list l = +let of_list l = Start (Coll.of_list l) -let start_array a = +let of_array a = Start (Coll.of_array a) -let start_hashtbl h = +let of_array_i a = + Start (CCSequence.of_array_i a |> Coll.of_seq) + +let of_hashtbl h = Start (Coll.of_seq (CCSequence.of_hashtbl h)) -let start_seq seq = +let of_seq seq = Start (Coll.of_seq seq) +let of_queue q = + Start (CCSequence.of_queue q |> Coll.of_seq) + +let of_stack s = + Start (CCSequence.of_stack s |> Coll.of_seq) + +let of_string s = + Start (CCSequence.of_str s |> Coll.of_seq) + (** {6 Execution} *) let rec _optimize : type a. a t -> a t @@ -456,32 +502,32 @@ let rec _optimize : type a. a t -> a t | Bind _ -> q (* cannot optimize before execution *) and _optimize_unary : type a b. (a,b) unary -> a t -> b t = fun u q -> match u, q with - | Map f, Unary (Map g, q') -> - _optimize_unary (Map (fun x -> f (g x))) q' - | Filter p, Unary (Map f, cont) -> + | PMap f, Unary (PMap g, q') -> + _optimize_unary (PMap (fun x -> f (g x))) q' + | Filter p, Unary (PMap f, cont) -> _optimize_unary (FilterMap (fun x -> let y = f x in if p y then Some y else None)) cont - | Map f, Unary (Filter p, cont) -> + | PMap f, Unary (Filter p, cont) -> _optimize_unary (FilterMap (fun x -> if p x then Some (f x) else None)) cont - | Map f, Binary (Append, q1, q2) -> + | PMap f, Binary (Append, q1, q2) -> _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) | Filter p, Binary (Append, q1, q2) -> _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) - | Fold (f,acc), Unary (Map f', cont) -> + | Fold (f,acc), Unary (PMap f', cont) -> _optimize_unary (Fold ((fun acc x -> f acc (f' x)), acc)) cont - | Reduce (safety, start, mix, stop), Unary (Map f, cont) -> + | Reduce (safety, start, mix, stop), Unary (PMap f, cont) -> _optimize_unary (Reduce (safety, (fun x -> start (f x)), (fun x acc -> mix (f x) acc), stop)) cont - | Size, Unary (Map _, cont) -> + | Size, Unary (PMap _, cont) -> _optimize_unary Size cont (* ignore the map! *) | Size, Unary (Sort _, cont) -> _optimize_unary Size cont @@ -494,10 +540,11 @@ and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t (* apply a unary operator on a collection *) let _do_unary : type a b. (a,b) unary -> a -> b = fun u c -> match u with - | Map f -> Coll.map f c + | PMap f -> Coll.map f c | GeneralMap f -> f c | Filter p -> Coll.filter p c | Fold (f, acc) -> Coll.fold f acc c + | FoldMap (f, acc) -> PMap.fold f acc c | Reduce (safety, start, mix, stop) -> let acc = Coll.to_seq c |> CCSequence.fold @@ -526,16 +573,17 @@ let _do_unary : type a b. (a,b) unary -> a -> b | Sort cmp -> Coll.sort cmp c | Distinct cmp -> Coll.distinct ~cmp c | Search obj -> Coll.search obj c - | Get (Safe, k) -> Map.get c k - | Get (Unsafe, k) -> Map.get_exn c k + | Get (Safe, k) -> PMap.get c k + | Get (Unsafe, k) -> PMap.get_exn c k | GroupBy (build,f) -> Coll.to_seq c |> CCSequence.map (fun x -> f x, x) - |> Map.multimap_of_seq ~build:(Map.make ~build ()) + |> PMap.multimap_of_seq ~build:(PMap.make ~build ()) | Contains (eq, x) -> Coll.contains ~eq x c | Count build -> Coll.to_seq c - |> Map.count_of_seq ~build:(Map.make ~build ()) + |> PMap.count_of_seq ~build:(PMap.make ~build ()) + | Lazy -> Lazy.force c let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c = fun b c1 c2 -> match b with @@ -565,7 +613,7 @@ let run_no_opt q = _run ~opt:false q (** {6 Basics on Collections} *) -let map f q = Unary (Map f, q) +let map f q = Unary (PMap f, q) let filter p q = Unary (Filter p, q) @@ -581,10 +629,14 @@ let flat_map_seq f q = let f' x = Coll.of_seq (f x) in Unary (FlatMap f', q) -let flat_map_list f q = +let flat_map_l f q = let f' x = Coll.of_list (f x) in Unary (FlatMap f', q) +let flatten q = Unary (FlatMap (fun x->x), q) + +let flatten_l q = Unary (FlatMap Coll.of_list, q) + let take n q = Unary (Take n, q) let take_while p q = Unary (TakeWhile p, q) @@ -594,26 +646,6 @@ let sort ?(cmp=Pervasives.compare) () q = Unary (Sort cmp, q) let distinct ?(cmp=Pervasives.compare) () q = Unary (Distinct cmp, q) -let get key q = - Unary (Get (Safe, key), q) - -let get_exn key q = - Unary (Get (Unsafe, key), q) - -let map_iter q = - Unary (GeneralMap (fun m -> Coll.of_seq m.Map.to_seq), q) - -let map_iter_flatten q = - let f m = m.Map.to_seq - |> CCSequence.flatMap - (fun (k,v) -> Coll.to_seq v |> CCSequence.map (fun v' -> k,v')) - |> Coll.of_seq - in - Unary (GeneralMap f, q) - -let map_to_list q = - Unary (GeneralMap Map.to_list, q) - (* choose a build method from the optional arguments *) let _make_build ?cmp ?eq ?hash () = let _maybe default o = match o with @@ -623,23 +655,75 @@ let _make_build ?cmp ?eq ?hash () = match eq, hash with | Some _, _ | _, Some _ -> - Map.FromHash ( _maybe (=) eq, _maybe Hashtbl.hash hash) + PMap.FromHash ( _maybe (=) eq, _maybe Hashtbl.hash hash) | _ -> match cmp with - | Some f -> Map.FromCmp f - | _ -> Map.Default + | Some f -> PMap.FromCmp f + | _ -> PMap.Default + +(** {6 Queries on PMaps} *) + +module M = struct + let get key q = + Unary (Get (Safe, key), q) + + let get_exn key q = + Unary (Get (Unsafe, key), q) + + let iter q = + Unary (GeneralMap (fun m -> Coll.of_seq m.PMap.to_seq), q) + + let flatten q = + let f m = m.PMap.to_seq + |> CCSequence.flatMap + (fun (k,v) -> Coll.to_seq v |> CCSequence.map (fun v' -> k,v')) + |> Coll.of_seq + in + Unary (GeneralMap f, q) + + let flatten' q = + let f m = m.PMap.to_seq + |> CCSequence.flatMap + (fun (k,v) -> CCSequence.of_list v |> CCSequence.map (fun v' -> k,v')) + |> Coll.of_seq + in + Unary (GeneralMap f, q) + + let map f q = + Unary (GeneralMap (PMap.map f), q) + + let to_list q = + Unary (GeneralMap PMap.to_list, q) + + let reverse ?cmp ?eq ?hash () q = + let build = _make_build ?cmp ?eq ?hash () in + Unary (GeneralMap (PMap.reverse ~build), q) + + let reverse_multimap ?cmp ?eq ?hash () q = + let build = _make_build ?cmp ?eq ?hash () in + Unary (GeneralMap (PMap.reverse_multimap ~build), q) + + let fold f acc q = + Unary (FoldMap (f, acc), q) + + let fold_multimap f acc q = + let f' acc x l = + List.fold_left (fun acc y -> f acc x y) acc l + in + Unary (FoldMap (f', acc), q) +end let group_by ?cmp ?eq ?hash f q = Unary (GroupBy (_make_build ?cmp ?eq ?hash (),f), q) let group_by' ?cmp ?eq ?hash f q = - map_iter (group_by ?cmp f q) + M.iter (group_by ?cmp f q) let count ?cmp ?eq ?hash () q = Unary (Count (_make_build ?cmp ?eq ?hash ()), q) let count' ?cmp () q = - map_iter (count ?cmp () q) + M.iter (count ?cmp () q) let fold f acc q = Unary (Fold (f, acc), q) @@ -764,6 +848,10 @@ let (>>=) x f = Bind (f, x) let query_map f q = QueryMap (f, q) +(** {6 Misc} *) + +let lazy_ q = Unary (Lazy, q) + (** {6 Output containers} *) let to_list q = @@ -787,3 +875,88 @@ let to_stack q = (** {6 Misc} *) let run_list q = run (q |> to_list) + +(** {6 Adapters} *) + +module AdaptSet(S : Set.S) = struct + let of_set set = + return (Coll.of_seq (fun k -> S.iter k set)) + + let to_set q = + let f c = + Coll.to_seq c |> CCSequence.fold (fun set x -> S.add x set) S.empty + in + query_map f q + + let run q = run (q |> to_set) +end + +module AdaptMap(M : Map.S) = struct + let _to_seq m k = M.iter (fun x y -> k (x,y)) m + + let of_map map = + return (Coll.of_seq (_to_seq map)) + + let to_pmap m = { + PMap.get = (fun x -> try Some (M.find x m) with Not_found -> None); + PMap.size = (fun () -> M.cardinal m); + PMap.is_empty = (fun () -> M.is_empty m); + PMap.fold = (fun f acc -> M.fold (fun x y acc -> f acc x y) m acc); + PMap.to_seq = _to_seq m; + } + + let to_map q = + let f c = + Coll.to_seq c + |> CCSequence.fold (fun m (x,y) -> M.add x y m) M.empty + in + query_map f q + + let run q = run (q |> to_map) +end + +module IO = struct + let slurp ic = + let l = lazy ( + let buf_size = 256 in + let content = Buffer.create 120 + and buf = String.make buf_size 'a' in + let rec next () = + let num = input ic buf 0 buf_size in + if num = 0 + then Buffer.contents content (* EOF *) + else (Buffer.add_substring content buf 0 num; next ()) + in next () + ) in + lazy_ (return l) + + (* find [c] in [s], starting at offset [i] *) + let rec _find s c i = + if i >= String.length s then None + else if s.[i] = c then Some i + else _find s c (i+1) + + let rec _lines s i k = match _find s '\n' i with + | None -> () + | Some j -> + let s' = String.sub s i (j-i) in + k s'; + _lines s (j+1) k + + let lines q = + (* sequence of lines *) + let f s = _lines s 0 |> Coll.of_seq in + query_map f q + + let lines' q = + let f s = lazy (_lines s 0 |> CCSequence.to_list) in + lazy_ (query_map f q) + + let out oc q = + run q |> output_string oc + + let out_lines oc q = + run q + |> Coll.to_seq + |> CCSequence.iter (fun l -> output_string oc l; output_char oc '\n') +end diff --git a/core/CCLinq.mli b/core/CCLinq.mli index 347f5051..04e5266a 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -38,10 +38,10 @@ the order of execution. CCLinq.( start_list [1;2;3] - |> flat_map_list (fun x -> CCList.(x -- (x+10))) + |> flat_map_l (fun x -> CCList.(x -- (x+10))) |> sort () |> count () - |> map_to_list |> run + |> M.to_list |> run );; - : (int * int) list = [(13, 1); (12, 2); (11, 3); (10, 3); (9, 3); (8, 3); (7, 3); (6, 3); (5, 3); (4, 3); (3, 3); (2, 2); (1, 1)] @@ -59,8 +59,8 @@ type 'a collection be used directly, they are to be processed using a query (type {!'a t}) and converted to some list/sequence/array *) -(** {2 A polymorphic map} *) -module Map : sig +(** {2 Polymorphic Maps} *) +module PMap : sig type ('a, 'b) t val get : ('a,'b) t -> 'a -> 'b option @@ -74,6 +74,8 @@ module Map : sig val to_seq : ('a, 'b) t -> ('a * 'b) sequence val to_list : ('a, 'b) t -> ('a * 'b) list + + val to_coll : ('a, 'b) t -> ('a * 'b) collection end (** {2 Query operators} *) @@ -86,16 +88,24 @@ type 'a t val start : 'a -> 'a t (** Start with a single value *) -val start_list : 'a list -> 'a collection t +val of_list : 'a list -> 'a collection t (** Query that just returns the elements of the list *) -val start_array : 'a array -> 'a collection t +val of_array : 'a array -> 'a collection t +val of_array_i : 'a array -> (int * 'a) collection t -val start_hashtbl : ('a,'b) Hashtbl.t -> ('a * 'b) collection t +val of_hashtbl : ('a,'b) Hashtbl.t -> ('a * 'b) collection t -val start_seq : 'a sequence -> 'a collection t +val of_seq : 'a sequence -> 'a collection t (** Query that returns the elements of the given sequence. *) +val of_queue : 'a Queue.t -> 'a collection t + +val of_stack : 'a Stack.t -> 'a collection t + +val of_string : string -> char collection t +(** Traverse the characters of the string *) + (** {6 Execution} *) val run : 'a t -> 'a @@ -132,7 +142,11 @@ val flat_map : ('a -> 'b collection) -> 'a collection t -> 'b collection t val flat_map_seq : ('a -> 'b sequence) -> 'a collection t -> 'b collection t (** Same as {!flat_map} but using sequences *) -val flat_map_list : ('a -> 'b list) -> 'a collection t -> 'b collection t +val flat_map_l : ('a -> 'b list) -> 'a collection t -> 'b collection t + +val flatten : 'a collection collection t -> 'a collection t + +val flatten_l : 'a list collection t -> 'a collection t val take : int -> 'a collection t -> 'a collection t (** take at most [n] elements *) @@ -147,27 +161,50 @@ val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t (** Remove duplicate elements from the input collection. All elements in the result are distinct. *) -(** {6 Maps} *) +(** {6 Queries on Maps} *) -val get : 'a -> ('a, 'b) Map.t t -> 'b option t -(** Select a key from a map *) +module M : sig + val get : 'a -> ('a, 'b) PMap.t t -> 'b option t + (** Select a key from a map *) -val get_exn : 'a -> ('a, 'b) Map.t t -> 'b t -(** Unsafe version of {!get}. - @raise Not_found if the key is not present. *) + val get_exn : 'a -> ('a, 'b) PMap.t t -> 'b t + (** Unsafe version of {!get}. + @raise Not_found if the key is not present. *) -val map_iter : ('a,'b) Map.t t -> ('a*'b) collection t -(** View a multimap as a proper collection *) + val iter : ('a,'b) PMap.t t -> ('a*'b) collection t + (** View a multimap as a proper collection *) -val map_iter_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t -(** View a multimap as a collection of individual key/value pairs *) + val flatten : ('a,'b collection) PMap.t t -> ('a*'b) collection t + (** View a multimap as a collection of individual key/value pairs *) -val map_to_list : ('a,'b) Map.t t -> ('a*'b) list t + val flatten' : ('a,'b list) PMap.t t -> ('a*'b) collection t + (** View a multimap as a collection of individual key/value pairs *) + + val map : ('b -> 'c) -> ('a, 'b) PMap.t t -> ('a, 'c) PMap.t t + (** Transform values *) + + val to_list : ('a,'b) PMap.t t -> ('a*'b) list t + + val reverse : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> unit -> + ('a,'b) PMap.t t -> ('b,'a list) PMap.t t + (** Reverse relation of the map, as a multimap *) + + val reverse_multimap : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> unit -> + ('a,'b list) PMap.t t -> ('b,'a list) PMap.t t + (** Reverse relation of the multimap *) + + val fold : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> ('a,'b) PMap.t t -> 'acc t + (** Fold on the items of the map *) + + val fold_multimap : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> + ('a,'b list) PMap.t t -> 'acc t + (** Fold on the items of the multimap *) +end (** {6 Aggregation} *) val group_by : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> - ('a -> 'b) -> 'a collection t -> ('b,'a list) Map.t t + ('a -> 'b) -> 'a collection t -> ('b,'a list) PMap.t t (** [group_by f] takes a collection [c] as input, and returns a multimap [m] such that for each [x] in [c], [x] occurs in [m] under the key [f x]. In other words, [f] is used @@ -177,7 +214,7 @@ val group_by' : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> ('a -> 'b) -> 'a collection t -> ('b * 'a list) collection t val count : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> - unit -> 'a collection t -> ('a, int) Map.t t + unit -> 'a collection t -> ('a, int) PMap.t t (** [count c] returns a map from elements of [c] to the number of time those elements occur. *) @@ -189,7 +226,8 @@ val fold : ('b -> 'a -> 'b) -> 'b -> 'a collection t -> 'b t val size : _ collection t -> int t (** Count how many elements the collection contains *) -val reduce : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> 'a collection t -> 'c option t +val reduce : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> + 'a collection t -> 'c option t (** [reduce start mix stop q] uses [start] on the first element of [q], and combine the result with following elements using [mix]. The final value is transformed using [stop]. This returns [None] if the collection @@ -234,7 +272,7 @@ val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash -> val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> ('b -> 'a) -> 'a collection t -> 'b collection t -> - ('a, 'b list) Map.t t + ('a, 'b list) PMap.t t (** [group_join key2] associates to every element [x] of the first collection, all the elements [y] of the second collection such that [eq x (key y)] *) @@ -293,7 +331,11 @@ val return : 'a -> 'a t (** Synonym to {!start} *) val query_map : ('a -> 'b) -> 'a t -> 'b t -(** Map results directly, rather than collections of elements *) +(** PMap results directly, rather than collections of elements *) + +(** {6 Misc} *) + +val lazy_ : 'a lazy_t t -> 'a t (** {6 Output Containers} *) @@ -313,3 +355,33 @@ val to_hashtbl : ('a * 'b) collection t -> ('a, 'b) Hashtbl.t t val to_queue : 'a collection t -> ('a Queue.t -> unit) t val to_stack : 'a collection t -> ('a Stack.t -> unit) t + +(** {6 Adapters} *) + +module AdaptSet(S : Set.S) : sig + val of_set : S.t -> S.elt collection t + val to_set : S.elt collection t -> S.t t + val run : S.elt collection t -> S.t +end + +module AdaptMap(M : Map.S) : sig + val of_map : 'a M.t -> (M.key * 'a) collection t + val to_pmap : 'a M.t -> (M.key, 'a) PMap.t + val to_map : (M.key * 'a) collection t -> 'a M.t t + val run : (M.key * 'a) collection t -> 'a M.t +end + +module IO : sig + val slurp : in_channel -> string t + (** Slurp the whole channel in (blocking), returning the corresponding string *) + + val lines : string t -> string collection t + (** Convert a string into a collection of lines *) + + val lines' : string t -> string list t + (** Convert a string into a list of lines *) + + val out : out_channel -> string t -> unit + val out_lines : out_channel -> string collection t -> unit + (** Evaluate the query and print it line by line on the output *) +end From bb12ed932cdb23d0d6c59b844dfb91741c744b9a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 13:51:50 +0200 Subject: [PATCH 42/52] much better error handling in CCLinq --- core/CCLinq.ml | 225 +++++++++++++++++++++++++++++++++++------------- core/CCLinq.mli | 120 +++++++++++++++++--------- 2 files changed, 243 insertions(+), 102 deletions(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index 4d916d9b..e2060191 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -30,11 +30,16 @@ type 'a sequence = ('a -> unit) -> unit type 'a equal = 'a -> 'a -> bool type 'a ord = 'a -> 'a -> int type 'a hash = 'a -> int +type 'a with_err = [`Ok of 'a | `Error of string ] (* TODO: add CCVector as a collection *) let _id x = x +exception ExitWithError of string +let _exit_with_error s = raise (ExitWithError s) +let _error_of_exn f = try `Ok (f ()) with ExitWithError s -> `Error + type 'a collection = | Seq : 'a sequence -> 'a collection | List : 'a list -> 'a collection @@ -57,6 +62,9 @@ module PMap = struct let to_seq m = m.to_seq let fold f acc m = m.fold f acc let size m = m.size () + let get_err m x = match m.get x with + | Some y -> `Ok y + | None -> `Error "PMap.get: lookup error" type ('a, 'b) build = { mutable cur : ('a, 'b) t; @@ -266,18 +274,24 @@ module Coll = struct with type elt = elt and type t = 'b) in S.cardinal set - let choose (type elt) = function - | List [] -> None - | List (x::_) -> Some x + let choose_exn (type elt) c = + let fail () = _exit_with_error "choose: empty collection" in + match c with + | List [] -> fail () + | List (x::_) -> x | Seq s -> begin match CCSequence.take 1 s |> CCSequence.to_list with - | [x] -> Some x - | _ -> None + | [x] -> x + | _ -> fail () end | Set (m, set) -> let module S = (val m : CCSequence.Set.S with type elt = elt and type t = 'b) in - try Some (S.choose set) with Not_found -> None + try S.choose set with Not_found -> fail () + + let choose_err c = + try `Ok (choose_exn c) + with ExitWithError s -> `Error s let take n c = _fmap ~lst:(CCList.take n) ~seq:(CCSequence.take n) c @@ -409,8 +423,8 @@ end (** {2 Query operators} *) type (_,_) safety = - | Safe : ('a, 'a option) safety - | Unsafe : ('a, 'a) safety + | Explicit : ('a, 'a with_err) safety + | Implicit : ('a, 'a) safety type (_, _) unary = | PMap : ('a -> 'b) -> ('a collection, 'b collection) unary @@ -458,6 +472,7 @@ type (_, _, _) binary = (* type of queries that return a 'a *) and 'a t = | Start : 'a -> 'a t + | Catch : 'a with_err t -> 'a t | Unary : ('a, 'b) unary * 'a t -> 'b t | Binary : ('a, 'b, 'c) binary * 'a t * 'b t -> 'c t | QueryMap : ('a -> 'b) * 'a t -> 'b t @@ -494,6 +509,7 @@ let of_string s = let rec _optimize : type a. a t -> a t = fun q -> match q with | Start _ -> q + | Catch q' -> Catch (_optimize q') | Unary (u, q) -> _optimize_unary u (_optimize q) | Binary (b, q1, q2) -> @@ -554,18 +570,14 @@ let _do_unary : type a b. (a,b) unary -> a -> b ) None in begin match acc, safety with - | Some x, Safe -> Some (stop x) - | None, Safe -> None - | Some x, Unsafe -> stop x - | None, Unsafe -> invalid_arg "reduce: empty collection" + | Some x, Implicit -> stop x + | None, Implicit -> _exit_with_error "reduce: empty collection" + | Some x, Explicit -> `Ok (stop x) + | None, Explicit -> `Error "reduce: empty collection" end | Size -> Coll.size c - | Choose Safe -> Coll.choose c - | Choose Unsafe -> - begin match Coll.choose c with - | Some x -> x - | None -> invalid_arg "choose: empty collection" - end + | Choose Implicit -> Coll.choose_exn c + | Choose Explicit -> Coll.choose_err c | FilterMap f -> Coll.filter_map f c | FlatMap f -> Coll.flat_map f c | Take n -> Coll.take n c @@ -573,8 +585,8 @@ let _do_unary : type a b. (a,b) unary -> a -> b | Sort cmp -> Coll.sort cmp c | Distinct cmp -> Coll.distinct ~cmp c | Search obj -> Coll.search obj c - | Get (Safe, k) -> PMap.get c k - | Get (Unsafe, k) -> PMap.get_exn c k + | Get (Implicit, k) -> PMap.get_exn c k + | Get (Explicit, k) -> PMap.get_err c k | GroupBy (build,f) -> Coll.to_seq c |> CCSequence.map (fun x -> f x, x) @@ -597,8 +609,13 @@ let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c | SetOp (Diff,build) -> Coll.do_diff ~build c1 c2 let rec _run : type a. opt:bool -> a t -> a - = fun ~opt q -> match q with += fun ~opt q -> match q with | Start c -> c + | Catch q' -> + begin match _run ~opt q' with + | `Ok x -> x + | `Error s -> _exit_with_error s + end | Unary (u, q') -> _do_unary u (_run ~opt q') | Binary (b, q1, q2) -> _do_binary b (_run ~opt q1) (_run ~opt q2) | QueryMap (f, q') -> f (_run ~opt q') @@ -608,8 +625,23 @@ let rec _run : type a. opt:bool -> a t -> a let q'' = if opt then _optimize q'' else q'' in _run ~opt q'' -let run q = _run ~opt:true (_optimize q) -let run_no_opt q = _run ~opt:false q +(* safe execution *) +let run q = + try `Ok (_run ~opt:true (_optimize q)) + with + | ExitWithError s -> `Error s + | e -> `Error (Printexc.to_string e) + +let run_exn q = + match run q with + | `Ok x -> x + | `Error s -> failwith s + +let run_no_optim q = + try `Ok (_run ~opt:false q) + with + | ExitWithError s -> `Error s + | e -> `Error (Printexc.to_string e) (** {6 Basics on Collections} *) @@ -617,9 +649,9 @@ let map f q = Unary (PMap f, q) let filter p q = Unary (Filter p, q) -let choose q = Unary (Choose Safe, q) +let choose q = Unary (Choose Implicit, q) -let choose_exn q = Unary (Choose Unsafe, q) +let choose_err q = Unary (Choose Explicit, q) let filter_map f q = Unary (FilterMap f, q) @@ -665,10 +697,10 @@ let _make_build ?cmp ?eq ?hash () = module M = struct let get key q = - Unary (Get (Safe, key), q) + Unary (Get (Implicit, key), q) - let get_exn key q = - Unary (Get (Unsafe, key), q) + let get_err key q = + Unary (Get (Explicit, key), q) let iter q = Unary (GeneralMap (fun m -> Coll.of_seq m.PMap.to_seq), q) @@ -733,10 +765,10 @@ let size q = Unary (Size, q) let sum q = Unary (Fold ((+), 0), q) let reduce start mix stop q = - Unary (Reduce (Safe, start,mix,stop), q) + Unary (Reduce (Implicit, start,mix,stop), q) -let reduce_exn start mix stop q = - Unary (Reduce (Unsafe, start,mix,stop), q) +let reduce_err start mix stop q = + Unary (Reduce (Explicit, start,mix,stop), q) let _avg_start x = (x,1) let _avg_mix x (y,n) = (x+y,n+1) @@ -746,13 +778,13 @@ let _lift_some f x y = match y with | None -> Some x | Some y -> Some (f x y) -let max q = Unary (Reduce (Safe, _id, Pervasives.max, _id), q) -let min q = Unary (Reduce (Safe, _id, Pervasives.min, _id), q) -let average q = Unary (Reduce (Safe, _avg_start, _avg_mix, _avg_stop), q) +let max q = Unary (Reduce (Implicit, _id, Pervasives.max, _id), q) +let min q = Unary (Reduce (Implicit, _id, Pervasives.min, _id), q) +let average q = Unary (Reduce (Implicit, _avg_start, _avg_mix, _avg_stop), q) -let max_exn q = Unary (Reduce (Unsafe, _id, Pervasives.max, _id), q) -let min_exn q = Unary (Reduce (Unsafe, _id, Pervasives.min, _id), q) -let average_exn q = Unary (Reduce (Unsafe, _avg_start, _avg_mix, _avg_stop), q) +let max_err q = Unary (Reduce (Explicit, _id, Pervasives.max, _id), q) +let min_err q = Unary (Reduce (Explicit, _id, Pervasives.min, _id), q) +let average_err q = Unary (Reduce (Explicit, _avg_start, _avg_mix, _avg_stop), q) let is_empty q = Unary (Search (object @@ -833,10 +865,15 @@ let map2 f q = map (fun (x,y) -> x, f y) q let flatten_opt q = filter_map _id q -let opt_get_exn q = +let opt_unwrap q = QueryMap ((function | Some x -> x - | None -> invalid_arg "opt_get_exn"), q) + | None -> _exit_with_error "opt_unwrap"), q) + +let catch q = + QueryMap ((function + | `Ok x -> x + | `Error s -> _exit_with_error s), q) (** {6 Monadic stuff} *) @@ -852,10 +889,7 @@ let query_map f q = QueryMap (f, q) let lazy_ q = Unary (Lazy, q) -(** {6 Output containers} *) - -let to_list q = - QueryMap (Coll.to_list, q) +(** {6 Adapters} *) let to_array q = QueryMap ((fun c -> Array.of_list (Coll.to_list c)), q) @@ -872,11 +906,13 @@ let to_queue q = let to_stack q = QueryMap ((fun c s -> CCSequence.to_stack s (Coll.to_seq c)), q) -(** {6 Misc} *) - -let run_list q = run (q |> to_list) - -(** {6 Adapters} *) +module L = struct + let of_list l = Start (Coll.of_list l) + let to_list q = + QueryMap (Coll.to_list, q) + let run q = run (to_list q) + let run_exn q = run_exn (to_list q) +end module AdaptSet(S : Set.S) = struct let of_set set = @@ -888,7 +924,8 @@ module AdaptSet(S : Set.S) = struct in query_map f q - let run q = run (q |> to_set) + let run q = run (to_set q) + let run_exn q = run_exn (to_set q) end module AdaptMap(M : Map.S) = struct @@ -913,23 +950,57 @@ module AdaptMap(M : Map.S) = struct query_map f q let run q = run (q |> to_map) + let run_exn q = run_exn (q |> to_map) end module IO = struct - let slurp ic = + let _slurp with_input = let l = lazy ( - let buf_size = 256 in - let content = Buffer.create 120 - and buf = String.make buf_size 'a' in - let rec next () = - let num = input ic buf 0 buf_size in - if num = 0 - then Buffer.contents content (* EOF *) - else (Buffer.add_substring content buf 0 num; next ()) - in next () + with_input + (fun ic -> + let buf_size = 256 in + let content = Buffer.create 120 + and buf = String.make buf_size 'a' in + let rec next () = + let num = input ic buf 0 buf_size in + if num = 0 + then Buffer.contents content (* EOF *) + else (Buffer.add_substring content buf 0 num; next ()) + in next () + ) ) in lazy_ (return l) + let slurp ic = _slurp (fun f -> f ic) + + let _with_file_in filename f = + try + let ic = open_in filename in + try + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + _exit_with_error (Printexc.to_string e) + with e -> + _exit_with_error (Printexc.to_string e) + + let _with_file_out filename f = + try + let oc = open_out filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + _exit_with_error (Printexc.to_string e) + with e -> + _exit_with_error (Printexc.to_string e) + + let slurp_file filename = _slurp (_with_file_in filename) + (* find [c] in [s], starting at offset [i] *) let rec _find s c i = if i >= String.length s then None @@ -952,11 +1023,43 @@ module IO = struct let f s = lazy (_lines s 0 |> CCSequence.to_list) in lazy_ (query_map f q) + let _join ~sep ?(stop="") l = + let buf = Buffer.create 128 in + Coll.to_seq l + |> CCSequence.iteri + (fun i x -> + if i>0 then Buffer.add_string buf sep; + Buffer.add_string buf x); + Buffer.add_string buf stop; + Buffer.contents buf + + let unlines q = + let f l = lazy (_join ~sep:"\n" ~stop:"\n" l) in + lazy_ (query_map f q) + + let join sep q = + let f l = lazy (_join ~sep l) in + lazy_ (query_map f q) + let out oc q = - run q |> output_string oc + run_exn q |> output_string oc let out_lines oc q = - run q + run_exn q |> Coll.to_seq |> CCSequence.iter (fun l -> output_string oc l; output_char oc '\n') + + let to_file_exn filename q = + _with_file_out filename (fun oc -> out oc q) + + let to_file filename q = + try `Ok (_with_file_out filename (fun oc -> out oc q)) + with Failure s -> `Error s + + let to_file_lines_exn filename q = + _with_file_out filename (fun oc -> out_lines oc q) + + let to_file_lines filename q = + try `Ok (_with_file_out filename (fun oc -> out_lines oc q)) + with Failure s -> `Error s end diff --git a/core/CCLinq.mli b/core/CCLinq.mli index 04e5266a..3195427b 100644 --- a/core/CCLinq.mli +++ b/core/CCLinq.mli @@ -37,14 +37,24 @@ the order of execution. {[ CCLinq.( - start_list [1;2;3] + of_list [1;2;3] |> flat_map_l (fun x -> CCList.(x -- (x+10))) |> sort () |> count () - |> M.to_list |> run + |> M.to_list + |> run_exn );; - : (int * int) list = [(13, 1); (12, 2); (11, 3); (10, 3); (9, 3); (8, 3); (7, 3); (6, 3); (5, 3); (4, 3); (3, 3); (2, 2); (1, 1)] + + +CCLinq.( + IO.slurp_file "/tmp/foo" + |> IO.lines + |> sort () + |> IO.to_file_lines "/tmp/bar" +);; +- : `Ok () ]} *) @@ -53,6 +63,7 @@ type 'a sequence = ('a -> unit) -> unit type 'a equal = 'a -> 'a -> bool type 'a ord = 'a -> 'a -> int type 'a hash = 'a -> int +type 'a with_err = [`Ok of 'a | `Error of string ] type 'a collection (** Abstract type of collections of objects of type 'a. Those cannot @@ -108,14 +119,16 @@ val of_string : string -> char collection t (** {6 Execution} *) -val run : 'a t -> 'a -(** Execute the actual query *) +val run : 'a t -> 'a with_err +(** Execute the query, possibly returning an error if things go wrong *) -val run_no_opt : 'a t -> 'a -(** Execute the query, without optimizing it at all *) +val run_exn : 'a t -> 'a +(** Execute the query, ignoring errors. Can raise an exception + if some execution step does. + @raise Failure if the query fails (or returns [`Error s]) *) -val run_list : 'a collection t -> 'a list -(** Shortcut to obtain a list *) +val run_no_optim : 'a t -> 'a with_err +(** Run without any optimization *) (** {6 Basics on Collections} *) @@ -125,12 +138,12 @@ val filter : ('a -> bool) -> 'a collection t -> 'a collection t val size : _ collection t -> int t -val choose : 'a collection t -> 'a option t -(** Choose one element (if any) in the collection *) +val choose : 'a collection t -> 'a t +(** Choose one element (if any) in the collection. Fails + if the collections is empty *) -val choose_exn : 'a collection t -> 'a t -(** Choose one element or fail. - @raise Invalid_argument if the collection is empty *) +val choose_err : 'a collection t -> 'a with_err t +(** Choose one element or fail explicitely *) val filter_map : ('a -> 'b option) -> 'a collection t -> 'b collection t (** Filter and map elements at once *) @@ -164,12 +177,11 @@ val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t (** {6 Queries on Maps} *) module M : sig - val get : 'a -> ('a, 'b) PMap.t t -> 'b option t + val get : 'a -> ('a, 'b) PMap.t t -> 'b t (** Select a key from a map *) - val get_exn : 'a -> ('a, 'b) PMap.t t -> 'b t - (** Unsafe version of {!get}. - @raise Not_found if the key is not present. *) + val get_err : 'a -> ('a, 'b) PMap.t t -> 'b with_err t + (** Explicit version of {!get}, with [`Error] if the key is not present *) val iter : ('a,'b) PMap.t t -> ('a*'b) collection t (** View a multimap as a proper collection *) @@ -227,16 +239,14 @@ val size : _ collection t -> int t (** Count how many elements the collection contains *) val reduce : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> - 'a collection t -> 'c option t + 'a collection t -> 'c t (** [reduce start mix stop q] uses [start] on the first element of [q], and combine the result with following elements using [mix]. The final - value is transformed using [stop]. This returns [None] if the collection - is empty *) + value is transformed using [stop]. *) -val reduce_exn : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> - 'a collection t -> 'c t -(** Same as {!reduce} but fails on empty collections. - @raise Invalid_argument if the collection is empty *) +val reduce_err : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> + 'a collection t -> 'c with_err t +(** Same as {!reduce} but fails explicitely on empty collections. *) val is_empty : 'a collection t -> bool t @@ -244,13 +254,13 @@ val sum : int collection t -> int t val contains : ?eq:'a equal -> 'a -> 'a collection t -> bool t -val average : int collection t -> int option t -val max : int collection t -> int option t -val min : int collection t -> int option t +val average : int collection t -> int t +val max : int collection t -> int t +val min : int collection t -> int t -val average_exn : int collection t -> int t -val max_exn : int collection t -> int t -val min_exn : int collection t -> int t +val average_err : int collection t -> int with_err t +val max_err : int collection t -> int with_err t +val min_err : int collection t -> int with_err t val for_all : ('a -> bool) -> 'a collection t -> bool t val exists : ('a -> bool) -> 'a collection t -> bool t @@ -312,9 +322,8 @@ val map2 : ('a -> 'b) -> ('c * 'a) collection t -> ('c * 'b) collection t val flatten_opt : 'a option collection t -> 'a collection t (** Flatten the collection by removing options *) -val opt_get_exn : 'a option t -> 'a t -(** unwrap an option type. - @raise Invalid_argument if the option value is [None] *) +val opt_unwrap : 'a option t -> 'a t +(** unwrap an option type. Fails if the option value is [None] *) (** {6 Monad} @@ -335,12 +344,14 @@ val query_map : ('a -> 'b) -> 'a t -> 'b t (** {6 Misc} *) +val catch : 'a with_err t -> 'a t +(** Catch errors within the execution itself. In other words, [run (catch q)] + with succeed with [x] if [q] succeeds with [`Ok x], and fail if [q] + succeeds with [`Error s] or if [q] fails *) + val lazy_ : 'a lazy_t t -> 'a t -(** {6 Output Containers} *) - -val to_list : 'a collection t -> 'a list t -(** Build a list of results *) +(** {6 Adapters} *) val to_array : 'a collection t -> 'a array t (** Build an array of results *) @@ -356,24 +367,38 @@ val to_queue : 'a collection t -> ('a Queue.t -> unit) t val to_stack : 'a collection t -> ('a Stack.t -> unit) t -(** {6 Adapters} *) +module L : sig + val of_list : 'a list -> 'a collection t + val to_list : 'a collection t -> 'a list t + val run : 'a collection t -> 'a list with_err + val run_exn : 'a collection t -> 'a list +end module AdaptSet(S : Set.S) : sig val of_set : S.t -> S.elt collection t val to_set : S.elt collection t -> S.t t - val run : S.elt collection t -> S.t + val run : S.elt collection t -> S.t with_err + val run_exn : S.elt collection t -> S.t end module AdaptMap(M : Map.S) : sig val of_map : 'a M.t -> (M.key * 'a) collection t val to_pmap : 'a M.t -> (M.key, 'a) PMap.t val to_map : (M.key * 'a) collection t -> 'a M.t t - val run : (M.key * 'a) collection t -> 'a M.t + val run : (M.key * 'a) collection t -> 'a M.t with_err + val run_exn : (M.key * 'a) collection t -> 'a M.t end module IO : sig val slurp : in_channel -> string t - (** Slurp the whole channel in (blocking), returning the corresponding string *) + (** Slurp the whole channel in (blocking), returning the + corresponding string. The channel will be read at most once + during execution, and its content cached; however the channel + might never get read because evaluation is lazy. *) + + val slurp_file : string -> string t + (** Read a whole file (given by name) and return its content as + a string *) val lines : string t -> string collection t (** Convert a string into a collection of lines *) @@ -381,7 +406,20 @@ module IO : sig val lines' : string t -> string list t (** Convert a string into a list of lines *) + val join : string -> string collection t -> string t + + val unlines : string collection t -> string t + (** Join lines together *) + val out : out_channel -> string t -> unit val out_lines : out_channel -> string collection t -> unit (** Evaluate the query and print it line by line on the output *) + + (** {8 Run methods} *) + + val to_file : string -> string t -> unit with_err + val to_file_exn : string -> string t -> unit + + val to_file_lines : string -> string collection t -> unit with_err + val to_file_lines_exn : string -> string collection t -> unit end From 4c69e8216c321554417d25ece3ac663714580576 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 13:52:50 +0200 Subject: [PATCH 43/52] CCError for functional error handling --- _oasis | 2 +- core/CCError.ml | 124 +++++++++++++++++++++++++++++++++++++++++++++++ core/CCError.mli | 82 +++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 core/CCError.ml create mode 100644 core/CCError.mli diff --git a/_oasis b/_oasis index 6251d22a..da7316d4 100644 --- a/_oasis +++ b/_oasis @@ -39,7 +39,7 @@ Flag "bench" Library "containers" Path: core Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap, - CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, + CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError, CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash, CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, CCLinq FindlibName: containers diff --git a/core/CCError.ml b/core/CCError.ml new file mode 100644 index 00000000..6d92b531 --- /dev/null +++ b/core/CCError.ml @@ -0,0 +1,124 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Error Monad} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + +type +'a t = + [ `Ok of 'a + | `Error of string + ] + +let return x = `Ok x + +let fail s = `Error s + +let of_exn e = `Error (Printexc.to_string e) + +let map f e = match e with + | `Ok x -> `Ok (f x) + | `Error s -> `Error s + +let flat_map f e = match e with + | `Ok x -> f x + | `Error s -> `Error s + +let guard f = + try + return (f ()) + with e -> of_exn e + +let (>|=) e f = map f e + +let (>>=) e f = flat_map f e + +let equal eq a b = match a, b with + | `Ok x, `Ok y -> eq x y + | `Error s, `Error s' -> s = s' + | _ -> false + +let compare cmp a b = match a, b with + | `Ok x, `Ok y -> cmp x y + | `Ok _, _ -> 1 + | _, `Ok _ -> -1 + | `Error s, `Error s' -> String.compare s s' + +(** {2 Collections} *) + +let map_l f l = + let rec map acc l = match l with + | [] -> `Ok (List.rev acc) + | x::l' -> + match f x with + | `Error s -> `Error s + | `Ok y -> map (y::acc) l' + in map [] l + +exception LocalExit of string + +let fold_seq f acc seq = + try + let acc = ref acc in + seq + (fun x -> match f !acc x with + | `Error s -> raise (LocalExit s) + | `Ok y -> acc := y + ); + `Ok !acc + with LocalExit s -> `Error s + +let fold_l f acc l = fold_seq f acc (fun k -> List.iter k l) + +(** {2 Conversions} *) + +let to_opt = function + | `Ok x -> Some x + | `Error _ -> None + +let of_opt = function + | None -> `Error "of_opt" + | Some x -> `Ok x + +let to_seq e k = match e with + | `Ok x -> k x + | `Error _ -> () + +(** {2 IO} *) + +let pp pp_x buf e = match e with + | `Ok x -> Printf.bprintf buf "ok(%a)" pp_x x + | `Error s -> Printf.bprintf buf "error(%s)" s + +let print pp_x fmt e = match e with + | `Ok x -> Format.fprintf fmt "ok(%a)" pp_x x + | `Error s -> Format.fprintf fmt "error(%s)" s diff --git a/core/CCError.mli b/core/CCError.mli new file mode 100644 index 00000000..76a49065 --- /dev/null +++ b/core/CCError.mli @@ -0,0 +1,82 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Error Monad} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + +type +'a t = + [ `Ok of 'a + | `Error of string + ] + +val return : 'a -> 'a t + +val fail : string -> 'a t + +val of_exn : exn -> 'a t + +val map : ('a -> 'b) -> 'a t -> 'b t + +val flat_map : ('a -> 'b t) -> 'a t -> 'b t + +val guard : (unit -> 'a) -> 'a t + +val (>|=) : 'a t -> ('a -> 'b) -> 'b t + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t + +val equal : 'a equal -> 'a t equal + +val compare : 'a ord -> 'a t ord + +(** {2 Collections} *) + +val map_l : ('a -> 'b t) -> 'a list -> 'b list t + +val fold_l : ('acc -> 'a -> 'acc t) -> 'acc -> 'a list -> 'acc t + +val fold_seq : ('acc -> 'a -> 'acc t) -> 'acc -> 'a sequence -> 'acc t + +(** {2 Conversions} *) + +val to_opt : 'a t -> 'a option + +val of_opt : 'a option -> 'a t + +val to_seq : 'a t -> 'a sequence + +(** {2 IO} *) + +val pp : 'a printer -> 'a t printer + +val print : 'a formatter -> 'a t formatter From e2bb0e93cbfad0b2b216603c932fd770fc8e037e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 14 Jun 2014 14:30:15 +0200 Subject: [PATCH 44/52] CCArray.find --- core/CCArray.ml | 8 ++++++++ core/CCArray.mli | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/core/CCArray.ml b/core/CCArray.ml index 2838c7cc..7d2d6dfa 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -57,6 +57,14 @@ let reverse_in_place a = a = [| 6;5;4;3;2;1 |] *) +let find f a = + let rec find i = + if i = Array.length a then None + else match f a.(i) with + | Some _ as res -> res + | None -> find (i+1) + in find 0 + let filter_map f a = let rec aux acc i = if i = Array.length a diff --git a/core/CCArray.mli b/core/CCArray.mli index e31ce665..5ed08de1 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -41,6 +41,10 @@ val filter : ('a -> bool) -> 'a t -> 'a t val reverse_in_place : 'a t -> unit (** Reverse the array in place *) +val find : ('a -> 'b option) -> 'a t -> 'b option +(** [find f a] returns [Some y] if there is an element [x] such + that [f x = Some y], else it returns [None] *) + val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map each element into another value, or discard it *) From 9a9954c42041b273c224743e4627c803f90fb52b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 18:58:32 +0200 Subject: [PATCH 45/52] Array.Sub: slice implementation --- core/CCArray.ml | 359 +++++++++++++++++++++++++++++++++++++++-------- core/CCArray.mli | 143 ++++++++++++++----- 2 files changed, 406 insertions(+), 96 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 7d2d6dfa..10fe36e6 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -25,26 +25,197 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Array utils} *) +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int + +module type S = sig + type 'a t + (** Array, or sub-array, containing elements of type ['a] *) + + val empty : 'a t + + val equal : 'a equal -> 'a t equal + + val compare : 'a ord -> 'a t ord + + val get : 'a t -> int -> 'a + + val set : 'a t -> int -> 'a -> unit + + val length : _ t -> int + + val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** fold left on array, with index *) + + val iter : ('a -> unit) -> 'a t -> unit + + val iteri : (int -> 'a -> unit) -> 'a t -> unit + + val reverse_in_place : 'a t -> unit + (** Reverse the array in place *) + + val find : ('a -> 'b option) -> 'a t -> 'b option + (** [find f a] returns [Some y] if there is an element [x] such + that [f x = Some y], else it returns [None] *) + + val for_all : ('a -> bool) -> 'a t -> bool + + val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Forall on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val exists : ('a -> bool) -> 'a t -> bool + + val exists2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Exists on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val shuffle : 'a t -> unit + (** shuffle randomly the array, in place *) + + val shuffle_with : Random.State.t -> 'a t -> unit + (** Like shuffle but using a specialized random state *) + + val to_seq : 'a t -> 'a sequence + val to_gen : 'a t -> 'a gen + val to_klist : 'a t -> 'a klist + + (** {2 IO} *) + + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array of items with printing function *) + + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array, giving the printing function both index and item *) + + val print : ?sep:string -> (Format.formatter -> 'a -> unit) + -> Format.formatter -> 'a t -> unit + (** print an array of items with printing function *) +end + +(** {2 General Implementation} +Most of those functions that a range [(i,j)] with +[i] included and [j] excluded *) + +let rec _foldi f acc a i j = + if i = j then acc else _foldi f (f acc i a.(i)) a (i+1) j + +let _reverse_in_place a i j = + if i=j then () + else + for k = i to j/2 do + let t = a.(k) in + a.(k) <- a.(j-k); + a.(j-k) <- t; + done + +let rec _equal eq a1 i1 j1 a2 i2 j2 = + if i1 = j1 || i2 = j2 + then (assert (i1=j1 && i2=j2); true) + else + eq a1.(i1) a2.(i2) && _equal eq a1 (i1+1) j1 a2 (i2+2) j2 + +let rec _compare cmp a1 i1 j1 a2 i2 j2 = + if i1 = j1 + then if i2=j2 then 0 else -1 + else if i2=j2 + then 1 + else + let c = cmp a1.(i1) a2.(i2) in + if c = 0 + then _compare cmp a1 (i1+1) j1 a2 (i2+2) j2 + else c + +let rec _find f a i j = + if i = j then None + else match f a.(i) with + | Some _ as res -> res + | None -> _find f a (i+1) j + +let rec _for_all p a i j = + i = j || (p a.(i) && _for_all p a (i+1) j) + +let rec _exists p a i j = + i <> j && (p a.(i) || _exists p a (i+1) j) + +let rec _for_all2 p a1 a2 i1 i2 j1 = + i1 = j1 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) j1) + +let rec _exists2 p a1 a2 i1 i2 j1 = + i1 <> j1 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) j1) + +(* shuffle a[i...j[ using the given int random generator + See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let _shuffle _rand_int a i j = + for k = i to j do + let l = _rand_int k in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(l) <- tmp; + done + +let _pp ~sep pp_item buf a i j = + for k = i to j - 1 do + if k > i then Buffer.add_string buf sep; + pp_item buf a.(k) + done + +let _pp_i ~sep pp_item buf a i j = + for k = i to j - 1 do + if k > i then Buffer.add_string buf sep; + pp_item buf k a.(k) + done + +let _print ~sep pp_item fmt a i j = + for k = i to j - 1 do + if k > i then Format.pp_print_string fmt sep; + pp_item fmt a.(k) + done + +let _to_gen a i j = + let k = ref i in + fun () -> + if !k < j + then ( + let x = a.(!k) in + incr k; + Some x + ) else None + +let rec _to_klist a i j () = + if i=j then `Nil else `Cons (a.(i), _to_klist a (i+1) j) + +(** {2 Arrays} *) + type 'a t = 'a array let empty = [| |] let map = Array.map -let foldi f acc a = - let rec recurse acc i = - if i = Array.length a then acc else recurse (f acc i a.(i)) (i+1) - in recurse acc 0 +let length = Array.length + +let get = Array.get + +let set = Array.set + +let fold = Array.fold_left + +let foldi f acc a = _foldi f acc a 0 (Array.length a) + +let iter = Array.iter + +let iteri = Array.iteri let reverse_in_place a = - if a = [| |] then () - else - let n = Array.length a in - for i = 0 to (n-1)/2 do - let t = a.(i) in - a.(i) <- a.(n-i-1); - a.(n-i-1) <- t; - done + _reverse_in_place a 0 (Array.length a) (*$T reverse_in_place [| |]; true @@ -58,12 +229,7 @@ let reverse_in_place a = *) let find f a = - let rec find i = - if i = Array.length a then None - else match f a.(i) with - | Some _ as res -> res - | None -> find (i+1) - in find 0 + _find f a 0 (Array.length a) let filter_map f a = let rec aux acc i = @@ -116,23 +282,19 @@ let flat_map f a = let (>>=) a f = flat_map f a -let for_all p a = - let rec check i = - i = Array.length a || (p a.(i) && check (i+1)) - in check 0 +let for_all p a = _for_all p a 0 (Array.length a) -let for_all2 p a1 a2 = - let rec check i = - i = Array.length a1 || (p a1.(i) a2.(i) && check (i+1)) - in - if Array.length a1 <> Array.length a2 - then raise (Invalid_argument "forall2") - else check 0 +let exists p a = _exists p a 0 (Array.length a) -let exists p a = - let rec check i = - i < Array.length a && (p a.(i) || check (i+1)) - in check 0 +let for_all2 p a b = + Array.length a = Array.length b + && + _for_all2 p a b 0 0 (Array.length a) + +let exists2 p a b = + Array.length a = Array.length b + && + _exists2 p a b 0 0 (Array.length a) let (--) i j = if i<=j @@ -147,37 +309,110 @@ let except_idx a i = (fun acc j elt -> if i = j then acc else elt::acc) [] a -(* Randomly shuffle the array, in place. - See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) -let _shuffle _rand_int a = - for i = 1 to Array.length a - 1 do - let j = _rand_int i in - let tmp = a.(i) in - a.(i) <- a.(j); - a.(j) <- tmp; - done +let equal eq a b = + Array.length a = Array.length b + && + _equal eq a 0 (Array.length a) b 0 (Array.length b) -let shuffle a = _shuffle Random.int a +let compare cmp a b = + _compare cmp a 0 (Array.length a) b 0 (Array.length b) -let shuffle_with st a = _shuffle (Random.State.int st) a +let shuffle a = _shuffle Random.int a 0 (Array.length a) -(** print an array of items using the printing function *) -let pp ?(sep=", ") pp_item buf a = - for i = 0 to Array.length a - 1 do - (if i > 0 then Buffer.add_string buf sep); - pp_item buf a.(i) - done +let shuffle_with st a = _shuffle (Random.State.int st) a 0 (Array.length a) -(** print an array of items using the printing function *) -let pp_i ?(sep=", ") pp_item buf a = - for i = 0 to Array.length a - 1 do - (if i > 0 then Buffer.add_string buf sep); - pp_item buf i a.(i) - done +let pp ?(sep=", ") pp_item buf a = _pp ~sep pp_item buf a 0 (Array.length a) -let print ?(sep=", ") pp_item fmt a = - Array.iteri - (fun i x -> - if i > 0 then Format.pp_print_string fmt sep; - pp_item fmt x - ) a +let pp_i ?(sep=", ") pp_item buf a = _pp_i ~sep pp_item buf a 0 (Array.length a) + +let print ?(sep=", ") pp_item fmt a = _print ~sep pp_item fmt a 0 (Array.length a) + +let to_seq a k = iter k a + +let to_gen a = _to_gen a 0 (Array.length a) + +let to_klist a = _to_klist a 0 (Array.length a) + +module Sub = struct + type 'a t = { + arr : 'a array; + i : int; (** Start index (included) *) + j : int; (** Stop index (excluded) *) + } + + let empty = { + arr = [||]; + i = 0; + j = 0; + } + + let make arr i ~len = + if i+len > Array.length arr then invalid_arg "Array.Sub.make"; + { arr; i; j=i+len; } + + let full arr = { arr; i=0; j=Array.length arr; } + + let underlying a = a.arr + + let length a = a.j - a.i + + let copy a = Array.sub a.arr a.i (length a) + + let sub a i len = make a.arr (a.i + i) len + + let equal eq a b = + length a = length b && _equal eq a.arr a.i a.j b.arr b.i b.j + + let compare cmp a b = + _compare cmp a.arr a.i a.j b.arr b.i b.j + + let fold f acc a = + let rec _fold acc i j = + if i=j then acc + else _fold (f acc a.arr.(i)) (i+1) j + in _fold acc a.i a.j + + let foldi f acc a = _foldi f acc a.arr a.i a.j + + let get a i = a.arr.(a.i+i) + + let set a i x = a.arr.(a.i+i) <- x + + let iter f a = + for k=a.i to a.j-1 do f a.arr.(k) done + + let iteri f a = + for k=0 to length a-1 do f k a.arr.(a.i + k) done + + let reverse_in_place a = _reverse_in_place a.arr a.i a.j + + let find f a = _find f a.arr a.i a.j + + let for_all p a = _for_all p a.arr a.i a.j + + let exists p a = _exists p a.arr a.i a.j + + let for_all2 p a b = + length a = length b && _for_all2 p a.arr b.arr a.i b.i b.j + + let exists2 p a b = + length a = length b && _exists2 p a.arr b.arr a.i b.i a.j + + let shuffle a = + _shuffle Random.int a.arr a.i a.j + + let shuffle_with st a = + _shuffle (Random.State.int st) a.arr a.i a.j + + let pp ?(sep=", ") pp_item buf a = _pp ~sep pp_item buf a.arr a.i a.j + + let pp_i ?(sep=", ") pp_item buf a = _pp_i ~sep pp_item buf a.arr a.i a.j + + let print ?(sep=", ") pp_item fmt a = _print ~sep pp_item fmt a.arr a.i a.j + + let to_seq a k = iter k a + + let to_gen a = _to_gen a.arr a.i a.j + + let to_klist a = _to_klist a.arr a.i a.j +end diff --git a/core/CCArray.mli b/core/CCArray.mli index 5ed08de1..7f4bd8e8 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -25,63 +25,138 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Array utils} *) +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int + +(** {2 Abstract Signature} *) + +module type S = sig + type 'a t + (** Array, or sub-array, containing elements of type ['a] *) + + val empty : 'a t + + val equal : 'a equal -> 'a t equal + + val compare : 'a ord -> 'a t ord + + val get : 'a t -> int -> 'a + + val set : 'a t -> int -> 'a -> unit + + val length : _ t -> int + + val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** fold left on array, with index *) + + val iter : ('a -> unit) -> 'a t -> unit + + val iteri : (int -> 'a -> unit) -> 'a t -> unit + + val reverse_in_place : 'a t -> unit + (** Reverse the array in place *) + + val find : ('a -> 'b option) -> 'a t -> 'b option + (** [find f a] returns [Some y] if there is an element [x] such + that [f x = Some y], else it returns [None] *) + + val for_all : ('a -> bool) -> 'a t -> bool + + val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Forall on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val exists : ('a -> bool) -> 'a t -> bool + + val exists2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Exists on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val shuffle : 'a t -> unit + (** shuffle randomly the array, in place *) + + val shuffle_with : Random.State.t -> 'a t -> unit + (** Like shuffle but using a specialized random state *) + + val to_seq : 'a t -> 'a sequence + val to_gen : 'a t -> 'a gen + val to_klist : 'a t -> 'a klist + + (** {2 IO} *) + + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array of items with printing function *) + + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array, giving the printing function both index and item *) + + val print : ?sep:string -> (Format.formatter -> 'a -> unit) + -> Format.formatter -> 'a t -> unit + (** print an array of items with printing function *) +end + +(** {2 Arrays} *) + type 'a t = 'a array -val empty : 'a t +include S with type 'a t := 'a t val map : ('a -> 'b) -> 'a t -> 'b t -val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b -(** fold left on array, with index *) - val filter : ('a -> bool) -> 'a t -> 'a t (** Filter elements out of the array. Only the elements satisfying the given predicate will be kept. *) -val reverse_in_place : 'a t -> unit -(** Reverse the array in place *) - -val find : ('a -> 'b option) -> 'a t -> 'b option -(** [find f a] returns [Some y] if there is an element [x] such - that [f x = Some y], else it returns [None] *) - val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map each element into another value, or discard it *) -val flat_map : ('a -> 'b t) -> 'a t -> 'b t +val flat_map : ('a -> 'b t) -> 'a t -> 'b array (** transform each element into an array, then flatten *) val (>>=) : 'a t -> ('a -> 'b t) -> 'b t (** Infix version of {!flat_map} *) -val for_all : ('a -> bool) -> 'a t -> bool - -val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool -(** Forall on pairs of arrays. - @raise Invalid_argument if they have distinct lengths *) - -val exists : ('a -> bool) -> 'a t -> bool +val except_idx : 'a t -> int -> 'a list +(** Remove given index, obtaining the list of the other elements *) val (--) : int -> int -> int t (** Range array *) -val except_idx : 'a t -> int -> 'a list -(** Remove given index *) +(** {2 Slices} +A slice is a part of an array, that requires no copying and shares +its storage with the original array. -val shuffle : 'a t -> unit -(** shuffle randomly the array, in place *) +All indexing in a slice is relative to the beginning of a slice, not +to the underlying array (meaning a slice is effectively like +a regular array) *) -val shuffle_with : Random.State.t -> 'a t -> unit -(** Like shuffle but using a specialized random state *) +module Sub : sig + type 'a t + (** A slice is an array, an offset, and a length *) -val pp: ?sep:string -> (Buffer.t -> 'a -> unit) - -> Buffer.t -> 'a t -> unit -(** print an array of items with printing function *) + val make : 'a array -> int -> len:int -> 'a t + (** Create a slice. + @raise Invalid_argument if the slice isn't valid *) -val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) - -> Buffer.t -> 'a t -> unit -(** print an array, giving the printing function both index and item *) + val full : 'a array -> 'a t + (** Slice that covers the full array *) + + val underlying : 'a t -> 'a array + (** Underlying array (shared). Modifying this array will modify the slice *) + + val copy : 'a t -> 'a array + (** Copy into a new array *) + + val sub : 'a t -> int -> int -> 'a t + (** Sub-slice *) + + include S with type 'a t := 'a t +end -val print : ?sep:string -> (Format.formatter -> 'a -> unit) - -> Format.formatter -> 'a t -> unit -(** print an array of items with printing function *) From 4d2fa4ea4c3a7a447a24f48b8c4f100aa298aaa8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 19:13:42 +0200 Subject: [PATCH 46/52] small bugfix in CCArray.Sub: bound checking --- core/CCArray.ml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index 10fe36e6..b63f8671 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -374,9 +374,15 @@ module Sub = struct let foldi f acc a = _foldi f acc a.arr a.i a.j - let get a i = a.arr.(a.i+i) + let get a i = + let j = a.i + i in + if i<0 || j>=a.j then invalid_arg "Array.Sub.get"; + a.arr.(j) - let set a i x = a.arr.(a.i+i) <- x + let set a i x = + let j = a.i + i in + if i<0 || j>=a.j then invalid_arg "Array.Sub.get"; + a.arr.(j) <- x let iter f a = for k=a.i to a.j-1 do f a.arr.(k) done From 90239f2c3eeef9a308ddc6375b216d0d6539ba7a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 19:35:47 +0200 Subject: [PATCH 47/52] CCArray.Sub.of_slice; bugfix in reverse_in_place --- core/CCArray.ml | 8 +++++--- core/CCArray.mli | 5 +++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/CCArray.ml b/core/CCArray.ml index b63f8671..7de43cfe 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -110,10 +110,10 @@ let rec _foldi f acc a i j = let _reverse_in_place a i j = if i=j then () else - for k = i to j/2 do + for k = i to (j-1)/2 do let t = a.(k) in - a.(k) <- a.(j-k); - a.(j-k) <- t; + a.(k) <- a.(j-1-k); + a.(j-1-k) <- t; done let rec _equal eq a1 i1 j1 a2 i2 j2 = @@ -350,6 +350,8 @@ module Sub = struct if i+len > Array.length arr then invalid_arg "Array.Sub.make"; { arr; i; j=i+len; } + let of_slice (arr,i,len) = make arr i ~len + let full arr = { arr; i=0; j=Array.length arr; } let underlying a = a.arr diff --git a/core/CCArray.mli b/core/CCArray.mli index 7f4bd8e8..a961bd38 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -145,6 +145,11 @@ module Sub : sig (** Create a slice. @raise Invalid_argument if the slice isn't valid *) + val of_slice : ('a array * int * int) -> 'a t + (** Make a sub-array from a triple [(arr, i, len)] where [arr] is the array, + [i] the offset in [arr], and [len] the number of elements of the slice. + @raise Invalid_argument if the slice isn't valid (See {!make}) *) + val full : 'a array -> 'a t (** Slice that covers the full array *) From d0393cd9b65bafa905bfa214fae128b7463fdfd0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 19:36:15 +0200 Subject: [PATCH 48/52] a few more functions in CCVector, including in-place filtering and efficient non-order-preserving removal --- core/CCVector.ml | 30 ++++++++++++++++++++++++++++-- core/CCVector.mli | 23 +++++++++++++++++++---- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/core/CCVector.ml b/core/CCVector.ml index 28613205..d4809056 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -42,6 +42,8 @@ type ('a,'mut) t = { mutable vec : 'a array; } +type 'a vector = ('a, rw) t + let freeze v = { size=v.size; vec=v.vec; @@ -145,6 +147,14 @@ let set v i x = if i < 0 || i >= v.size then failwith "Vector.set"; Array.unsafe_set v.vec i x +let remove v i = + if i < 0 || i >= v.size then failwith "Vector.remove"; + (* if v.(i) not the last element, then put last element at index i *) + if i < v.size - 1 + then v.vec.(i) <- v.vec.(v.size - 1); + (* remove one element *) + v.size <- v.size - 1 + let append_seq a seq = seq (fun x -> push a x) @@ -251,6 +261,20 @@ let map f v = vec=Array.map f v.vec } +let filter' p v = + let i = ref (v.size - 1) in + while !i >= 0 do + if not (p v.vec.(! i)) + (* remove i-th item! *) + then remove v !i; + decr i + done + +(*$T + let v = 1 -- 10 in filter' (fun x->x<4) v; \ + to_list v |> List.sort Pervasives.compare = [1;2;3] +*) + let filter p v = if _empty_array v then create () @@ -377,7 +401,7 @@ let of_seq ?(init=create ()) seq = let to_seq v k = iter k v -let slice v start len = +let slice_seq v start len = assert (start >= 0 && len >= 0); fun k -> assert (start+len < v.size); @@ -387,9 +411,11 @@ let slice v start len = done (*$T - slice (of_list [0;1;2;3;4]) 1 3 |> CCList.of_seq = [1;2;3] + slice_seq (of_list [0;1;2;3;4]) 1 3 |> CCList.of_seq = [1;2;3] *) +let slice v = (v.vec, 0, v.size) + let (--) i j = if i>j then init (i-j+1) (fun k -> i-k) diff --git a/core/CCVector.mli b/core/CCVector.mli index e981f6f5..26ba90c3 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -34,6 +34,9 @@ type ('a, 'mut) t (** the type of a vector of elements of type ['a], with a mutability flat ['mut] *) +type 'a vector = ('a, rw) t +(** Type synonym: a ['a vector] is mutable. *) + type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a gen = unit -> 'a option @@ -110,10 +113,10 @@ val sort : ('a -> 'a -> int) -> ('a, _) t -> ('a, 'mut) t (** Sort the vector *) val sort' : ('a -> 'a -> int) -> ('a, rw) t -> unit -(** sort the vector in place*) +(** Sort the vector in place *) val uniq_sort : ('a -> 'a -> int) -> ('a, rw) t -> unit -(** sort the array and remove duplicates in place*) +(** Sort the array and remove duplicates, in place*) val iter : ('a -> unit) -> ('a,_) t -> unit (** iterate on the vector *) @@ -127,6 +130,10 @@ val map : ('a -> 'b) -> ('a,_) t -> ('b, 'mut) t val filter : ('a -> bool) -> ('a,_) t -> ('a, 'mut) t (** filter elements from vector *) +val filter' : ('a -> bool) -> ('a, rw) t -> unit +(** Filter elements in place. Does {b NOT} preserve the order + of the elements. *) + val fold : ('b -> 'a -> 'b) -> 'b -> ('a,_) t -> 'b (** fold on elements of the vector *) @@ -164,6 +171,10 @@ val set : ('a, rw) t -> int -> 'a -> unit (** access element, or @raise Failure if bad index *) +val remove : ('a, rw) t -> int -> unit +(** Remove the [n-th] element of the vector. Does {b NOT} preserve the order + of the elements (might swap with the last element) *) + val rev : ('a,_) t -> ('a, 'mut) t (** Reverse the vector *) @@ -196,8 +207,12 @@ val of_seq : ?init:('a,rw) t -> 'a sequence -> ('a, rw) t val to_seq : ('a,_) t -> 'a sequence -val slice : ('a,_) t -> int -> int -> 'a sequence -(** [slice v start len] is the sequence of elements from [v.(start)] +val slice : ('a,rw) t -> ('a array * int * int) +(** Vector as an array slice. By doing it we expose the internal array, so + be careful! *) + +val slice_seq : ('a,_) t -> int -> int -> 'a sequence +(** [slice_seq v start len] is the sequence of elements from [v.(start)] to [v.(start+len-1)]. *) val of_klist : ?init:('a, rw) t -> 'a klist -> ('a, rw) t From 74856b578aca7b8d27960d91ed2479d0c1c8df56 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 22:34:09 +0200 Subject: [PATCH 49/52] misc/PrintBox: printing of nested box into ascii --- .ocamlinit | 2 +- _oasis | 2 +- misc/printBox.ml | 329 ++++++++++++++++++++++++++++++++++++++++++++++ misc/printBox.mli | 100 ++++++++++++++ 4 files changed, 431 insertions(+), 2 deletions(-) create mode 100644 misc/printBox.ml create mode 100644 misc/printBox.mli diff --git a/.ocamlinit b/.ocamlinit index f477d71c..7711334d 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -9,7 +9,7 @@ #load "containers_misc.cma";; #require "threads";; #load "containers_thread.cma";; -open Containers;; +open Containers_misc;; #install_printer Bencode.pretty;; #install_printer HGraph.Default.fmt;; #require "CamlGI";; diff --git a/_oasis b/_oasis index da7316d4..ff9b3700 100644 --- a/_oasis +++ b/_oasis @@ -59,7 +59,7 @@ Library "containers_misc" PHashtbl, SkipList, SplayTree, SplayMap, Univ, Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, - ActionMan, BencodeOnDisk, TTree, + ActionMan, BencodeOnDisk, TTree, PrintBox, HGraph, Automaton, Conv, Bidir, Iteratee, Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers diff --git a/misc/printBox.ml b/misc/printBox.ml new file mode 100644 index 00000000..4d8f5eca --- /dev/null +++ b/misc/printBox.ml @@ -0,0 +1,329 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Pretty-Printing of Boxes} *) + +type position = { x:int ; y: int } + +let origin = {x=0; y=0;} + +let _move pos x y = {x=pos.x + x; y=pos.y + y} +let _add pos1 pos2 = _move pos1 pos2.x pos2.y +let _move_x pos x = _move pos x 0 +let _move_y pos y = _move pos 0 y + +(** {2 Output: where to print to} *) + +module Output = struct + type t = { + put_char : position -> char -> unit; + put_string : position -> string -> unit; + put_sub_string : position -> string -> int -> int -> unit; + flush : unit -> unit; + } + + let put_char out pos c = out.put_char pos c + let put_string out pos s = out.put_string pos s + let put_sub_string out pos s s_i s_len = out.put_sub_string pos s s_i s_len + + (** An internal buffer, suitable for writing efficiently, then + convertable into a list of lines *) + type buffer = { + mutable buf_lines : buf_line array; + mutable buf_len : int; + } + and buf_line = { + mutable bl_str : string; + mutable bl_len : int; + } + + let _make_line _ = {bl_str=""; bl_len=0} + + let _ensure_lines buf i = + if i >= Array.length buf.buf_lines + then ( + let lines' = Array.init (2 * i + 5) _make_line in + Array.blit buf.buf_lines 0 lines' 0 buf.buf_len; + buf.buf_lines <- lines'; + ) + + let _ensure_line line i = + if i >= String.length line.bl_str + then ( + let str' = String.make (2 * i + 5) ' ' in + String.blit line.bl_str 0 str' 0 line.bl_len; + line.bl_str <- str'; + ) + + let _buf_put_char buf pos c = + _ensure_lines buf pos.y; + _ensure_line buf.buf_lines.(pos.y) pos.x; + buf.buf_len <- max buf.buf_len (pos.y+1); + let line = buf.buf_lines.(pos.y) in + line.bl_str.[pos.x] <- c; + line.bl_len <- max line.bl_len (pos.x+1) + + let _buf_put_sub_string buf pos s s_i s_len = + _ensure_lines buf pos.y; + _ensure_line buf.buf_lines.(pos.y) (pos.x + s_len); + buf.buf_len <- max buf.buf_len (pos.y+1); + let line = buf.buf_lines.(pos.y) in + String.blit s s_i line.bl_str pos.x s_len; + line.bl_len <- max line.bl_len (pos.x+s_len) + + let _buf_put_string buf pos s = + _buf_put_sub_string buf pos s 0 (String.length s) + + (* create a new buffer *) + let make_buffer () = + let buf = { + buf_lines = Array.init 16 _make_line; + buf_len = 0; + } in + let buf_out = { + put_char = _buf_put_char buf; + put_sub_string = _buf_put_sub_string buf; + put_string = _buf_put_string buf; + flush = (fun () -> ()); + } in + buf, buf_out + + let buf_to_lines ?(indent=0) buf = + let buffer = Buffer.create (5 + buf.buf_len * 32) in + for i = 0 to buf.buf_len - 1 do + for k = 1 to indent do Buffer.add_char buffer ' ' done; + let line = buf.buf_lines.(i) in + Buffer.add_substring buffer line.bl_str 0 line.bl_len; + Buffer.add_char buffer '\n'; + done; + Buffer.contents buffer + + let buf_output ?(indent=0) oc buf = + for i = 0 to buf.buf_len - 1 do + for k = 1 to indent do output_char oc ' '; done; + let line = buf.buf_lines.(i) in + output oc line.bl_str 0 line.bl_len; + output_char oc '\n'; + done +end + +(* find [c] in [s], starting at offset [i] *) +let rec _find s c i = + if i >= String.length s then None + else if s.[i] = c then Some i + else _find s c (i+1) + +let rec _lines s i k = match _find s '\n' i with + | None -> () + | Some j -> + let s' = String.sub s i (j-i) in + k s'; + _lines s (j+1) k + +module Box = struct + type grid_shape = + | GridBase + | GridFramed + + type 'a shape = + | Line of string + | Text of string list (* in a box *) + | Frame of 'a + | Grid of grid_shape * 'a array array + + type t = { + shape : t shape; + size : position lazy_t; + } + + let size box = Lazy.force box.size + + let shape b = b.shape + + let _array_foldi f acc a = + let acc = ref acc in + Array.iteri (fun i x -> acc := f !acc i x) a; + !acc + + let _dim_matrix m = + if Array.length m = 0 then {x=0;y=0} + else {y=Array.length m; x=Array.length m.(0); } + + (* height of a line composed of boxes *) + let _height_line a = + _array_foldi + (fun h i box -> + let s = size box in + max h s.y + ) 0 a + + (* how large is the [i]-th column of [m]? *) + let _width_column m i = + let acc = ref 0 in + for j = 0 to Array.length m - 1 do + acc := max !acc (size m.(j).(i)).x + done; + !acc + + (* from a matrix [m] (line,column), return two arrays [lines] and [columns], + with [col.(i)] being the start offset of column [i] and + [lines.(j)] being the start offset of line [j]. + Those arrays have one more slot to indicate the end position. *) + let _size_matrix m = + let dim = _dim_matrix m in + (* columns *) + let columns = Array.make (dim.x + 1) 0 in + for i = 0 to dim.x - 1 do + (* +1 is for keeping room for the vertical/horizontal line/column *) + columns.(i+1) <- columns.(i) + 1 + (_width_column m i) + done; + (* lines *) + let lines = Array.make (dim.y + 1) 0 in + for j = 1 to dim.y do + lines.(j) <- lines.(j-1) + 1 + (_height_line m.(j-1)) + done; + (* no trailing bars, adjust *) + columns.(dim.x) <- columns.(dim.x) - 1; + lines.(dim.y) <- lines.(dim.y) - 1; + lines, columns + + let _size = function + | Line s -> { x=String.length s; y=1 } + | Text l -> + let width = List.fold_left + (fun acc line -> max acc (String.length line)) 0 l + in + { x=width; y=List.length l; } + | Frame t -> + let {x;y} = size t in + { x=x+2; y=y+2; } + | Grid (_,m) -> + let dim = _dim_matrix m in + let lines, columns = _size_matrix m in + { y=lines.(dim.y); x=columns.(dim.x)} + + let _make shape = + { shape; size=(lazy (_size shape)); } + + let line s = + assert (_find s '\n' 0 = None); + _make (Line s) + + let text s = + let acc = ref [] in + _lines s 0 (fun x -> acc := x :: !acc); + _make (Text (List.rev !acc)) + + let lines l = + assert (List.for_all (fun s -> _find s '\n' 0 = None) l); + _make (Text l) + + let frame b = _make (Frame b) + + let grid ?(framed=true) m = + _make (Grid ((if framed then GridFramed else GridBase), m)) + + let init_grid ?framed ~line ~col f = + let m = Array.init line (fun j-> Array.init col (fun i -> f ~line:j ~col:i)) in + grid ?framed m + + let vlist ?framed l = + let a = Array.of_list l in + grid ?framed (Array.map (fun line -> [| line |]) a) + + let hlist ?framed l = + grid ?framed [| Array.of_list l |] +end + +let _write_vline ~out pos n = + for j=0 to n-1 do + Output.put_char out (_move_y pos j) '|' + done + +let _write_hline ~out pos n = + for i=0 to n-1 do + Output.put_char out (_move_x pos i) '-' + done + +(* render given box on the output, starting with upper left corner + at the given position. *) +let rec _render ~out b pos = + match Box.shape b with + | Box.Line s -> Output.put_string out pos s + | Box.Text l -> + List.iteri + (fun i line -> + Output.put_string out (_move_y pos i) line + ) l + | Box.Frame b' -> + let {x;y} = Box.size b' in + Output.put_char out pos '+'; + Output.put_char out (_move pos (x+1) (y+1)) '+'; + Output.put_char out (_move pos 0 (y+1)) '+'; + Output.put_char out (_move pos (x+1) 0) '+'; + _write_hline out (_move_x pos 1) x; + _write_hline out (_move pos 1 (y+1)) x; + _write_vline out (_move_y pos 1) y; + _write_vline out (_move pos (x+1) 1) y; + _render ~out b' (_move pos 1 1) + | Box.Grid (grid_shape,m) -> + let dim = Box._dim_matrix m in + let lines, columns = Box._size_matrix m in + + (* write boxes *) + for j = 0 to dim.y - 1 do + for i = 0 to dim.x - 1 do + let pos' = _move pos (columns.(i)) (lines.(j)) in + _render ~out m.(j).(i) pos' + done; + done; + + (* write frame if needed *) + begin match grid_shape with + | Box.GridBase -> () + | Box.GridFramed -> + let size = Box.size b in + for j=1 to dim.y - 1 do + _write_hline ~out {pos with y=lines.(j)} size.x + done; + for i=1 to dim.x - 1 do + _write_vline ~out {pos with x=columns.(i)} size.y + done; + end + +let render out b = + _render ~out b origin + +let to_string b = + let buf, out = Output.make_buffer () in + render out b; + Output.buf_to_lines buf + +let output ?(indent=0) oc b = + let buf, out = Output.make_buffer () in + render out b; + Output.buf_output ~indent oc buf; + flush oc diff --git a/misc/printBox.mli b/misc/printBox.mli new file mode 100644 index 00000000..25c34474 --- /dev/null +++ b/misc/printBox.mli @@ -0,0 +1,100 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Pretty-Printing of Boxes} *) + +type position = { x:int ; y: int } +(** Positions are relative to the upper-left corner, that is, +when [x] increases we go toward the right, and when [y] increases +we go toward the bottom (same order as a printer) *) + +val origin : position +(** Initial position *) + +module Output : sig + type t = { + put_char : position -> char -> unit; + put_string : position -> string -> unit; + put_sub_string : position -> string -> int -> int -> unit; + flush : unit -> unit; + } + + (** {6 Default Instance: a buffer} *) + + type buffer + + val make_buffer : unit -> buffer * t + (** New buffer, and the corresponding output (buffers are mutable) *) + + val buf_to_lines : ?indent:int -> buffer -> string + (** Print the content of the buffer into a string. + @param indent number of spaces to insert in front of the lines *) + + val buf_output : ?indent:int -> out_channel -> buffer -> unit + (** Print the buffer on the given channel *) +end + +module Box : sig + type t + + val size : t -> position + (** Size needed to print the box *) + + val line : string -> t + (** Make a single-line box. + @raise Invalid_argument if the string contains ['\n'] *) + + val text : string -> t + (** Any text, possibly with several lines *) + + val lines : string list -> t + (** Shortcut for {!text}, with a list of lines *) + + val frame : t -> t + (** Put a single frame around the box *) + + val grid : ?framed:bool -> t array array -> t + (** Grid of boxes (no frame between boxes). The matrix is indexed + with lines first, then columns. + @param framed if [true], each item of the grid will be framed. + default value is [true] *) + + val init_grid : ?framed:bool -> + line:int -> col:int -> (line:int -> col:int -> t) -> t + (** Same as {!grid} but takes the matrix as a function *) + + val vlist : ?framed:bool -> t list -> t + (** Vertical list of boxes *) + + val hlist : ?framed:bool -> t list -> t + (** Horizontal list of boxes *) +end + +val render : Output.t -> Box.t -> unit + +val to_string : Box.t -> string + +val output : ?indent:int -> out_channel -> Box.t -> unit From ebb8310f847b9034d24680b5bac58bc0690b0836 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 23:10:16 +0200 Subject: [PATCH 50/52] bugfix --- core/CCLinq.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/CCLinq.ml b/core/CCLinq.ml index e2060191..5650d4c6 100644 --- a/core/CCLinq.ml +++ b/core/CCLinq.ml @@ -1008,7 +1008,8 @@ module IO = struct else _find s c (i+1) let rec _lines s i k = match _find s '\n' i with - | None -> () + | None -> + if i let s' = String.sub s i (j-i) in k s'; From 6ef51a5717e1b678713e0ed8374574852e061ab6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 23:10:51 +0200 Subject: [PATCH 51/52] moved box constructors into PrintBoxs toplevel; more convenience constructors; more accurate printing of nested boxs bars --- misc/printBox.ml | 103 ++++++++++++++++++++++++++++++---------------- misc/printBox.mli | 91 ++++++++++++++++++++++++++-------------- 2 files changed, 128 insertions(+), 66 deletions(-) diff --git a/misc/printBox.ml b/misc/printBox.ml index 4d8f5eca..aed324da 100644 --- a/misc/printBox.ml +++ b/misc/printBox.ml @@ -137,7 +137,8 @@ let rec _find s c i = else _find s c (i+1) let rec _lines s i k = match _find s '\n' i with - | None -> () + | None -> + if i let s' = String.sub s i (j-i) in k s'; @@ -227,37 +228,53 @@ module Box = struct let _make shape = { shape; size=(lazy (_size shape)); } - - let line s = - assert (_find s '\n' 0 = None); - _make (Line s) - - let text s = - let acc = ref [] in - _lines s 0 (fun x -> acc := x :: !acc); - _make (Text (List.rev !acc)) - - let lines l = - assert (List.for_all (fun s -> _find s '\n' 0 = None) l); - _make (Text l) - - let frame b = _make (Frame b) - - let grid ?(framed=true) m = - _make (Grid ((if framed then GridFramed else GridBase), m)) - - let init_grid ?framed ~line ~col f = - let m = Array.init line (fun j-> Array.init col (fun i -> f ~line:j ~col:i)) in - grid ?framed m - - let vlist ?framed l = - let a = Array.of_list l in - grid ?framed (Array.map (fun line -> [| line |]) a) - - let hlist ?framed l = - grid ?framed [| Array.of_list l |] end +let line s = + assert (_find s '\n' 0 = None); + Box._make (Box.Line s) + +let text s = + let acc = ref [] in + _lines s 0 (fun x -> acc := x :: !acc); + Box._make (Box.Text (List.rev !acc)) + +let lines l = + assert (List.for_all (fun s -> _find s '\n' 0 = None) l); + Box._make (Box.Text l) + +let int_ x = line (string_of_int x) +let float_ x = line (string_of_float x) +let bool_ x = line (string_of_bool x) + +let frame b = + Box._make (Box.Frame b) + +let grid ?(framed=true) m = + Box._make (Box.Grid ((if framed then Box.GridFramed else Box.GridBase), m)) + +let init_grid ?framed ~line ~col f = + let m = Array.init line (fun j-> Array.init col (fun i -> f ~line:j ~col:i)) in + grid ?framed m + +let vlist ?framed l = + let a = Array.of_list l in + grid ?framed (Array.map (fun line -> [| line |]) a) + +let hlist ?framed l = + grid ?framed [| Array.of_list l |] + +let hlist_map ?framed f l = hlist ?framed (List.map f l) +let vlist_map ?framed f l = vlist ?framed (List.map f l) +let grid_map ?framed f m = grid ?framed (Array.map (Array.map f) m) + +let grid_text ?framed m = grid_map ?framed text m + +let transpose m = + let dim = Box._dim_matrix m in + Array.init dim.x + (fun i -> Array.init dim.y (fun j -> m.(j).(i))) + let _write_vline ~out pos n = for j=0 to n-1 do Output.put_char out (_move_y pos j) '|' @@ -269,8 +286,9 @@ let _write_hline ~out pos n = done (* render given box on the output, starting with upper left corner - at the given position. *) -let rec _render ~out b pos = + at the given position. [expected_size] is the size of the + available surrounding space *) +let rec _render ?expected_size ~out b pos = match Box.shape b with | Box.Line s -> Output.put_string out pos s | Box.Text l -> @@ -296,22 +314,35 @@ let rec _render ~out b pos = (* write boxes *) for j = 0 to dim.y - 1 do for i = 0 to dim.x - 1 do + let expected_size = { + x=columns.(i+1)-columns.(i); + y=lines.(j+1)-lines.(j); + } in let pos' = _move pos (columns.(i)) (lines.(j)) in - _render ~out m.(j).(i) pos' + _render ~expected_size ~out m.(j).(i) pos' done; done; + let len_hlines, len_vlines = match expected_size with + | None -> columns.(dim.x), lines.(dim.y) + | Some {x;y} -> x,y + in + (* write frame if needed *) begin match grid_shape with | Box.GridBase -> () | Box.GridFramed -> - let size = Box.size b in for j=1 to dim.y - 1 do - _write_hline ~out {pos with y=lines.(j)} size.x + _write_hline ~out (_move_y pos (lines.(j)-1)) len_hlines done; for i=1 to dim.x - 1 do - _write_vline ~out {pos with x=columns.(i)} size.y + _write_vline ~out (_move_x pos (columns.(i)-1)) len_vlines done; + for j=1 to dim.y - 1 do + for i=1 to dim.x - 1 do + Output.put_char out (_move pos (columns.(i)-1) (lines.(j)-1)) '+' + done + done end let render out b = diff --git a/misc/printBox.mli b/misc/printBox.mli index 25c34474..5bc32e27 100644 --- a/misc/printBox.mli +++ b/misc/printBox.mli @@ -24,7 +24,19 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Pretty-Printing of Boxes} *) +(** {1 Pretty-Printing of nested Boxes} + +Allows to print nested boxes, lists, arrays, tables in a nice way +on any monospaced support. + +{[ +let b = PrintBox.( + + + +]} + +*) type position = { x:int ; y: int } (** Positions are relative to the upper-left corner, that is, @@ -62,37 +74,56 @@ module Box : sig val size : t -> position (** Size needed to print the box *) - - val line : string -> t - (** Make a single-line box. - @raise Invalid_argument if the string contains ['\n'] *) - - val text : string -> t - (** Any text, possibly with several lines *) - - val lines : string list -> t - (** Shortcut for {!text}, with a list of lines *) - - val frame : t -> t - (** Put a single frame around the box *) - - val grid : ?framed:bool -> t array array -> t - (** Grid of boxes (no frame between boxes). The matrix is indexed - with lines first, then columns. - @param framed if [true], each item of the grid will be framed. - default value is [true] *) - - val init_grid : ?framed:bool -> - line:int -> col:int -> (line:int -> col:int -> t) -> t - (** Same as {!grid} but takes the matrix as a function *) - - val vlist : ?framed:bool -> t list -> t - (** Vertical list of boxes *) - - val hlist : ?framed:bool -> t list -> t - (** Horizontal list of boxes *) end +val line : string -> Box.t +(** Make a single-line box. + @raise Invalid_argument if the string contains ['\n'] *) + +val text : string -> Box.t +(** Any text, possibly with several lines *) + +val lines : string list -> Box.t +(** Shortcut for {!text}, with a list of lines *) + +val int_ : int -> Box.t + +val bool_ : bool -> Box.t + +val float_ : float -> Box.t + +val frame : Box.t -> Box.t +(** Put a single frame around the box *) + +val grid : ?framed:bool -> Box.t array array -> Box.t +(** Grid of boxes (no frame between boxes). The matrix is indexed + with lines first, then columns. The array must be a proper matrix, + that is, all lines must have the same number of columns! + @param framed if [true], each item of the grid will be framed. + default value is [true] *) + +val grid_text : ?framed:bool -> string array array -> Box.t +(** Same as {!grid}, but wraps every cell into a {!text} box *) + +val transpose : 'a array array -> 'a array array +(** Transpose a matrix *) + +val init_grid : ?framed:bool -> + line:int -> col:int -> (line:int -> col:int -> Box.t) -> Box.t +(** Same as {!grid} but takes the matrix as a function *) + +val vlist : ?framed:bool -> Box.t list -> Box.t +(** Vertical list of boxes *) + +val hlist : ?framed:bool -> Box.t list -> Box.t +(** Horizontal list of boxes *) + +val grid_map : ?framed:bool -> ('a -> Box.t) -> 'a array array -> Box.t + +val vlist_map : ?framed:bool -> ('a -> Box.t) -> 'a list -> Box.t + +val hlist_map : ?framed:bool -> ('a -> Box.t) -> 'a list -> Box.t + val render : Output.t -> Box.t -> unit val to_string : Box.t -> string From 10029acd428dadfc28d858f866885d0b7c00ed39 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Jun 2014 23:25:57 +0200 Subject: [PATCH 52/52] some examples for PrintBox --- misc/printBox.mli | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/misc/printBox.mli b/misc/printBox.mli index 5bc32e27..b6286254 100644 --- a/misc/printBox.mli +++ b/misc/printBox.mli @@ -30,9 +30,35 @@ Allows to print nested boxes, lists, arrays, tables in a nice way on any monospaced support. {[ -let b = PrintBox.( - + # let b = PrintBox.( + frame + (vlist [ line "hello"; + hlist [line "world"; line "yolo"]]) + );; +val b : Box.t = +# PrintBox.output ~indent:2 stdout b;; + +----------+ + |hello | + |----------| + |world|yolo| + +----------+ +- : unit = () +# let b2 = PrintBox.( + frame + (hlist [ text "I love\nto\npress\nenter"; + grid_text [| [|"a"; "bbb"|]; + [|"c"; "hello world"|] |]]) + );; +val b2 : PrintBox.Box.t = +# PrintBox.output stdout b2;; ++--------------------+ +|I love|a|bbb | +|to |-+-----------| +|press |c|hello world| +|enter | | | ++--------------------+ +- : unit = () ]}