diff --git a/.ocamlinit b/.ocamlinit index f477d71c..7711334d 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -9,7 +9,7 @@ #load "containers_misc.cma";; #require "threads";; #load "containers_thread.cma";; -open Containers;; +open Containers_misc;; #install_printer Bencode.pretty;; #install_printer HGraph.Default.fmt;; #require "CamlGI";; diff --git a/README.md b/README.md index d1703755..4aac41da 100644 --- a/README.md +++ b/README.md @@ -52,13 +52,15 @@ structures comprise (some modules in `misc/`, some other in `core/`): - `CCLeftistheap`, a polymorphic heap structure. - `CCFQueue`, a purely functional queue structure - `CCBV`, mutable bitvectors -- `CCPersistentHashtbl`, a semi-persistent hashtable (similar - to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) +- `CCPersistentHashtbl`, a semi-persistent hashtable (similar to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) - `CCVector`, a growable array (pure OCaml, no C) - -- `CCGen` and `CCSequence`, generic iterators structures (with structural types - so they can be defined in several places). Now also in their own - repository and opam packages (`gen` and `sequence`). +- `CCGen` and `CCSequence`, generic iterators structures (with structural types so they can be defined in several places). Now also in their own repository and opam packages (`gen` and `sequence`). +- `CCKlist`, another iterator structure +- `CCList`, functions and lists including tail-recursive implementations of `map` and `append` +- `CCArray`, utilities on arrays +- `CCInt`, `CCPair`, `CCOpt`, `CCFun`, `CCBool`, utilities on basic types +- `CCPrint`, printing combinators +- `CCHash`, hashing combinators ### Misc diff --git a/_oasis b/_oasis index 2e97f303..ff9b3700 100644 --- a/_oasis +++ b/_oasis @@ -39,9 +39,9 @@ Flag "bench" Library "containers" Path: core Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap, - CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, + CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError, CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash, - CCKList, CCInt, CCBool, CCArray + CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, CCLinq FindlibName: containers Library "containers_string" @@ -59,9 +59,9 @@ Library "containers_misc" PHashtbl, SkipList, SplayTree, SplayMap, Univ, Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, - ActionMan, QCheck, BencodeOnDisk, TTree, + ActionMan, BencodeOnDisk, TTree, PrintBox, HGraph, Automaton, Conv, Bidir, Iteratee, - Ty, Tell, BencodeStream, RatTerm, Cause, AVL + Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact BuildDepends: unix,containers FindlibName: misc FindlibParent: containers @@ -129,6 +129,14 @@ Executable bench_conv MainIs: bench_conv.ml BuildDepends: containers,benchmark +Executable bench_batch + Path: tests/ + Install: false + CompiledObject: native + Build$: flag(bench) + MainIs: bench_batch.ml + BuildDepends: containers,benchmark + Executable test_levenshtein Path: tests/ Install: false diff --git a/core/CCArray.ml b/core/CCArray.ml index 9f52f5f3..7de43cfe 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -25,30 +25,283 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Array utils} *) +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int + +module type S = sig + type 'a t + (** Array, or sub-array, containing elements of type ['a] *) + + val empty : 'a t + + val equal : 'a equal -> 'a t equal + + val compare : 'a ord -> 'a t ord + + val get : 'a t -> int -> 'a + + val set : 'a t -> int -> 'a -> unit + + val length : _ t -> int + + val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** fold left on array, with index *) + + val iter : ('a -> unit) -> 'a t -> unit + + val iteri : (int -> 'a -> unit) -> 'a t -> unit + + val reverse_in_place : 'a t -> unit + (** Reverse the array in place *) + + val find : ('a -> 'b option) -> 'a t -> 'b option + (** [find f a] returns [Some y] if there is an element [x] such + that [f x = Some y], else it returns [None] *) + + val for_all : ('a -> bool) -> 'a t -> bool + + val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Forall on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val exists : ('a -> bool) -> 'a t -> bool + + val exists2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Exists on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val shuffle : 'a t -> unit + (** shuffle randomly the array, in place *) + + val shuffle_with : Random.State.t -> 'a t -> unit + (** Like shuffle but using a specialized random state *) + + val to_seq : 'a t -> 'a sequence + val to_gen : 'a t -> 'a gen + val to_klist : 'a t -> 'a klist + + (** {2 IO} *) + + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array of items with printing function *) + + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array, giving the printing function both index and item *) + + val print : ?sep:string -> (Format.formatter -> 'a -> unit) + -> Format.formatter -> 'a t -> unit + (** print an array of items with printing function *) +end + +(** {2 General Implementation} +Most of those functions that a range [(i,j)] with +[i] included and [j] excluded *) + +let rec _foldi f acc a i j = + if i = j then acc else _foldi f (f acc i a.(i)) a (i+1) j + +let _reverse_in_place a i j = + if i=j then () + else + for k = i to (j-1)/2 do + let t = a.(k) in + a.(k) <- a.(j-1-k); + a.(j-1-k) <- t; + done + +let rec _equal eq a1 i1 j1 a2 i2 j2 = + if i1 = j1 || i2 = j2 + then (assert (i1=j1 && i2=j2); true) + else + eq a1.(i1) a2.(i2) && _equal eq a1 (i1+1) j1 a2 (i2+2) j2 + +let rec _compare cmp a1 i1 j1 a2 i2 j2 = + if i1 = j1 + then if i2=j2 then 0 else -1 + else if i2=j2 + then 1 + else + let c = cmp a1.(i1) a2.(i2) in + if c = 0 + then _compare cmp a1 (i1+1) j1 a2 (i2+2) j2 + else c + +let rec _find f a i j = + if i = j then None + else match f a.(i) with + | Some _ as res -> res + | None -> _find f a (i+1) j + +let rec _for_all p a i j = + i = j || (p a.(i) && _for_all p a (i+1) j) + +let rec _exists p a i j = + i <> j && (p a.(i) || _exists p a (i+1) j) + +let rec _for_all2 p a1 a2 i1 i2 j1 = + i1 = j1 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) j1) + +let rec _exists2 p a1 a2 i1 i2 j1 = + i1 <> j1 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) j1) + +(* shuffle a[i...j[ using the given int random generator + See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) +let _shuffle _rand_int a i j = + for k = i to j do + let l = _rand_int k in + let tmp = a.(l) in + a.(l) <- a.(k); + a.(l) <- tmp; + done + +let _pp ~sep pp_item buf a i j = + for k = i to j - 1 do + if k > i then Buffer.add_string buf sep; + pp_item buf a.(k) + done + +let _pp_i ~sep pp_item buf a i j = + for k = i to j - 1 do + if k > i then Buffer.add_string buf sep; + pp_item buf k a.(k) + done + +let _print ~sep pp_item fmt a i j = + for k = i to j - 1 do + if k > i then Format.pp_print_string fmt sep; + pp_item fmt a.(k) + done + +let _to_gen a i j = + let k = ref i in + fun () -> + if !k < j + then ( + let x = a.(!k) in + incr k; + Some x + ) else None + +let rec _to_klist a i j () = + if i=j then `Nil else `Cons (a.(i), _to_klist a (i+1) j) + +(** {2 Arrays} *) + type 'a t = 'a array -let foldi f acc a = - let rec recurse acc i = - if i = Array.length a then acc else recurse (f acc i a.(i)) (i+1) - in recurse acc 0 +let empty = [| |] -let for_all p a = - let rec check i = - i = Array.length a || (p a.(i) && check (i+1)) - in check 0 +let map = Array.map -let for_all2 p a1 a2 = - let rec check i = - i = Array.length a1 || (p a1.(i) a2.(i) && check (i+1)) - in - if Array.length a1 <> Array.length a2 - then raise (Invalid_argument "forall2") - else check 0 +let length = Array.length -let exists p a = - let rec check i = - i < Array.length a && (p a.(i) || check (i+1)) - in check 0 +let get = Array.get + +let set = Array.set + +let fold = Array.fold_left + +let foldi f acc a = _foldi f acc a 0 (Array.length a) + +let iter = Array.iter + +let iteri = Array.iteri + +let reverse_in_place a = + _reverse_in_place a 0 (Array.length a) + +(*$T + reverse_in_place [| |]; true + reverse_in_place [| 1 |]; true + let a = [| 1; 2; 3; 4; 5 |] in \ + reverse_in_place a; \ + a = [| 5;4;3;2;1 |] + let a = [| 1; 2; 3; 4; 5; 6 |] in \ + reverse_in_place a; \ + a = [| 6;5;4;3;2;1 |] +*) + +let find f a = + _find f a 0 (Array.length a) + +let filter_map f a = + let rec aux acc i = + if i = Array.length a + then ( + let a' = Array.of_list acc in + reverse_in_place a'; + a' + ) else match f a.(i) with + | None -> aux acc (i+1) + | Some x -> aux (x::acc) (i+1) + in aux [] 0 + +(*$T + filter_map (fun x -> if x mod 2 = 0 then Some (string_of_int x) else None) \ + [| 1; 2; 3; 4 |] = [| "2"; "4" |] + filter_map (fun x -> if x mod 2 = 0 then Some (string_of_int x) else None) \ + [| 1; 2; 3; 4; 5; 6 |] \ + = [| "2"; "4"; "6" |] +*) + +let filter p a = + filter_map (fun x -> if p x then Some x else None) a + +(* append [rev a] in front of [acc] *) +let rec __rev_append_list a acc i = + if i = Array.length a + then acc + else + __rev_append_list a (a.(i) :: acc) (i+1) + +let flat_map f a = + let rec aux acc i = + if i = Array.length a + then ( + let a' = Array.of_list acc in + reverse_in_place a'; + a' + ) + else + let a' = f a.(i) in + aux (__rev_append_list a' acc 0) (i+1) + in aux [] 0 + +(*$T + let a = [| 1; 3; 5 |] in \ + let a' = flat_map (fun x -> [| x; x+1 |]) a in \ + a' = [| 1; 2; 3; 4; 5; 6 |] +*) + +let (>>=) a f = flat_map f a + +let for_all p a = _for_all p a 0 (Array.length a) + +let exists p a = _exists p a 0 (Array.length a) + +let for_all2 p a b = + Array.length a = Array.length b + && + _for_all2 p a b 0 0 (Array.length a) + +let exists2 p a b = + Array.length a = Array.length b + && + _exists2 p a b 0 0 (Array.length a) + +let (--) i j = + if i<=j + then + Array.init (j-i+1) (fun k -> i+k) + else + Array.init (i-j+1) (fun k -> i-k) (** all the elements of a, but the i-th, into a list *) let except_idx a i = @@ -56,27 +309,118 @@ let except_idx a i = (fun acc j elt -> if i = j then acc else elt::acc) [] a -(* Randomly shuffle the array, in place. - See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) -let shuffle a = - for i = 1 to Array.length a - 1 do - let j = Random.int i in - let tmp = a.(i) in - a.(i) <- a.(j); - a.(j) <- tmp; - done +let equal eq a b = + Array.length a = Array.length b + && + _equal eq a 0 (Array.length a) b 0 (Array.length b) -(** print an array of items using the printing function *) -let pp ?(sep=", ") pp_item buf a = - for i = 0 to Array.length a - 1 do - (if i > 0 then Buffer.add_string buf sep); - pp_item buf a.(i) - done +let compare cmp a b = + _compare cmp a 0 (Array.length a) b 0 (Array.length b) -(** print an array of items using the printing function *) -let pp_i ?(sep=", ") pp_item buf a = - for i = 0 to Array.length a - 1 do - (if i > 0 then Buffer.add_string buf sep); - pp_item buf i a.(i) - done +let shuffle a = _shuffle Random.int a 0 (Array.length a) +let shuffle_with st a = _shuffle (Random.State.int st) a 0 (Array.length a) + +let pp ?(sep=", ") pp_item buf a = _pp ~sep pp_item buf a 0 (Array.length a) + +let pp_i ?(sep=", ") pp_item buf a = _pp_i ~sep pp_item buf a 0 (Array.length a) + +let print ?(sep=", ") pp_item fmt a = _print ~sep pp_item fmt a 0 (Array.length a) + +let to_seq a k = iter k a + +let to_gen a = _to_gen a 0 (Array.length a) + +let to_klist a = _to_klist a 0 (Array.length a) + +module Sub = struct + type 'a t = { + arr : 'a array; + i : int; (** Start index (included) *) + j : int; (** Stop index (excluded) *) + } + + let empty = { + arr = [||]; + i = 0; + j = 0; + } + + let make arr i ~len = + if i+len > Array.length arr then invalid_arg "Array.Sub.make"; + { arr; i; j=i+len; } + + let of_slice (arr,i,len) = make arr i ~len + + let full arr = { arr; i=0; j=Array.length arr; } + + let underlying a = a.arr + + let length a = a.j - a.i + + let copy a = Array.sub a.arr a.i (length a) + + let sub a i len = make a.arr (a.i + i) len + + let equal eq a b = + length a = length b && _equal eq a.arr a.i a.j b.arr b.i b.j + + let compare cmp a b = + _compare cmp a.arr a.i a.j b.arr b.i b.j + + let fold f acc a = + let rec _fold acc i j = + if i=j then acc + else _fold (f acc a.arr.(i)) (i+1) j + in _fold acc a.i a.j + + let foldi f acc a = _foldi f acc a.arr a.i a.j + + let get a i = + let j = a.i + i in + if i<0 || j>=a.j then invalid_arg "Array.Sub.get"; + a.arr.(j) + + let set a i x = + let j = a.i + i in + if i<0 || j>=a.j then invalid_arg "Array.Sub.get"; + a.arr.(j) <- x + + let iter f a = + for k=a.i to a.j-1 do f a.arr.(k) done + + let iteri f a = + for k=0 to length a-1 do f k a.arr.(a.i + k) done + + let reverse_in_place a = _reverse_in_place a.arr a.i a.j + + let find f a = _find f a.arr a.i a.j + + let for_all p a = _for_all p a.arr a.i a.j + + let exists p a = _exists p a.arr a.i a.j + + let for_all2 p a b = + length a = length b && _for_all2 p a.arr b.arr a.i b.i b.j + + let exists2 p a b = + length a = length b && _exists2 p a.arr b.arr a.i b.i a.j + + let shuffle a = + _shuffle Random.int a.arr a.i a.j + + let shuffle_with st a = + _shuffle (Random.State.int st) a.arr a.i a.j + + let pp ?(sep=", ") pp_item buf a = _pp ~sep pp_item buf a.arr a.i a.j + + let pp_i ?(sep=", ") pp_item buf a = _pp_i ~sep pp_item buf a.arr a.i a.j + + let print ?(sep=", ") pp_item fmt a = _print ~sep pp_item fmt a.arr a.i a.j + + let to_seq a k = iter k a + + let to_gen a = _to_gen a.arr a.i a.j + + let to_klist a = _to_klist a.arr a.i a.j +end diff --git a/core/CCArray.mli b/core/CCArray.mli index b520d221..a961bd38 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -25,29 +25,143 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Array utils} *) +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int + +(** {2 Abstract Signature} *) + +module type S = sig + type 'a t + (** Array, or sub-array, containing elements of type ['a] *) + + val empty : 'a t + + val equal : 'a equal -> 'a t equal + + val compare : 'a ord -> 'a t ord + + val get : 'a t -> int -> 'a + + val set : 'a t -> int -> 'a -> unit + + val length : _ t -> int + + val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** fold left on array, with index *) + + val iter : ('a -> unit) -> 'a t -> unit + + val iteri : (int -> 'a -> unit) -> 'a t -> unit + + val reverse_in_place : 'a t -> unit + (** Reverse the array in place *) + + val find : ('a -> 'b option) -> 'a t -> 'b option + (** [find f a] returns [Some y] if there is an element [x] such + that [f x = Some y], else it returns [None] *) + + val for_all : ('a -> bool) -> 'a t -> bool + + val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Forall on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val exists : ('a -> bool) -> 'a t -> bool + + val exists2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + (** Exists on pairs of arrays. + @raise Invalid_argument if they have distinct lengths *) + + val shuffle : 'a t -> unit + (** shuffle randomly the array, in place *) + + val shuffle_with : Random.State.t -> 'a t -> unit + (** Like shuffle but using a specialized random state *) + + val to_seq : 'a t -> 'a sequence + val to_gen : 'a t -> 'a gen + val to_klist : 'a t -> 'a klist + + (** {2 IO} *) + + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array of items with printing function *) + + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) + -> Buffer.t -> 'a t -> unit + (** print an array, giving the printing function both index and item *) + + val print : ?sep:string -> (Format.formatter -> 'a -> unit) + -> Format.formatter -> 'a t -> unit + (** print an array of items with printing function *) +end + +(** {2 Arrays} *) + type 'a t = 'a array -val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b -(** fold left on array, with index *) +include S with type 'a t := 'a t -val for_all : ('a -> bool) -> 'a t -> bool +val map : ('a -> 'b) -> 'a t -> 'b t -val for_all2 : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool -(** Forall on pairs of arrays. - @raise Invalid_argument if they have distinct lengths *) +val filter : ('a -> bool) -> 'a t -> 'a t +(** Filter elements out of the array. Only the elements satisfying + the given predicate will be kept. *) -val exists : ('a -> bool) -> 'a t -> bool +val filter_map : ('a -> 'b option) -> 'a t -> 'b t +(** Map each element into another value, or discard it *) + +val flat_map : ('a -> 'b t) -> 'a t -> 'b array +(** transform each element into an array, then flatten *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** Infix version of {!flat_map} *) val except_idx : 'a t -> int -> 'a list -(** Remove given index *) +(** Remove given index, obtaining the list of the other elements *) -val shuffle : 'a t -> unit -(** shuffle randomly the array, in place *) +val (--) : int -> int -> int t +(** Range array *) -val pp: ?sep:string -> (Buffer.t -> 'a -> unit) - -> Buffer.t -> 'a array -> unit -(** print an array of items with printing function *) +(** {2 Slices} +A slice is a part of an array, that requires no copying and shares +its storage with the original array. + +All indexing in a slice is relative to the beginning of a slice, not +to the underlying array (meaning a slice is effectively like +a regular array) *) + +module Sub : sig + type 'a t + (** A slice is an array, an offset, and a length *) + + val make : 'a array -> int -> len:int -> 'a t + (** Create a slice. + @raise Invalid_argument if the slice isn't valid *) + + val of_slice : ('a array * int * int) -> 'a t + (** Make a sub-array from a triple [(arr, i, len)] where [arr] is the array, + [i] the offset in [arr], and [len] the number of elements of the slice. + @raise Invalid_argument if the slice isn't valid (See {!make}) *) + + val full : 'a array -> 'a t + (** Slice that covers the full array *) + + val underlying : 'a t -> 'a array + (** Underlying array (shared). Modifying this array will modify the slice *) + + val copy : 'a t -> 'a array + (** Copy into a new array *) + + val sub : 'a t -> int -> int -> 'a t + (** Sub-slice *) + + include S with type 'a t := 'a t +end -val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) - -> Buffer.t -> 'a array -> unit -(** print an array, giving the printing function both index and item *) diff --git a/core/CCBatch.ml b/core/CCBatch.ml new file mode 100644 index 00000000..760982e6 --- /dev/null +++ b/core/CCBatch.ml @@ -0,0 +1,238 @@ + +(* +copyright (c) 2013-2014, Simon Cruanes, Gabriel Radanne +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Batch Operations on Collections} *) + +module type COLLECTION = sig + type 'a t + + val empty : 'a t + val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a + val map : ('a -> 'b) -> 'a t -> 'b t + val filter : ('a -> bool) -> 'a t -> 'a t + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flat_map : ('a -> 'b t) -> 'a t -> 'b t +end + +module type S = sig + type 'a t + + type ('a,'b) op + (** Operation that converts an ['a t] into a ['b t] *) + + val apply : ('a,'b) op -> 'a t -> 'b t + (** Apply the operation to the collection. *) + + val apply_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c + (** Apply the operation plus a fold to the collection. *) + + val apply' : 'a t -> ('a,'b) op -> 'b t + (** Flip of {!apply} *) + + (** {6 Combinators} *) + + val id : ('a, 'a) op + + val map : ('a -> 'b) -> ('a, 'b) op + + val filter : ('a -> bool) -> ('a,'a) op + + val filter_map : ('a -> 'b option) -> ('a,'b) op + + val flat_map : ('a -> 'b t) -> ('a,'b) op + + val extern : ('a t -> 'b t) -> ('a,'b) op + + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op + val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op +end + +module Make(C : COLLECTION) = struct + type 'a t = 'a C.t + type (_,_) op = + | Nil : ('a,'a) op + | Compose : ('a,'b) base_op * ('b, 'c) op -> ('a, 'c) op + and (_,_) base_op = + | Map : ('a -> 'b) -> ('a, 'b) base_op + | Filter : ('a -> bool) -> ('a, 'a) base_op + | FilterMap : ('a -> 'b option) -> ('a,'b) base_op + | FlatMap : ('a -> 'b t) -> ('a,'b) base_op + | Extern : ('a t -> 'b t) -> ('a,'b) base_op + + (* associativity: put parenthesis on the right *) + let rec _compose : type a b c. (a,b) op -> (b,c) op -> (a,c) op + = fun f g -> match f with + | Compose (f1, Nil) -> Compose (f1, g) + | Compose (f1, f2) -> Compose (f1, _compose f2 g) + | Nil -> g + + (* After optimization, the op is a list of flatmaps and external operations, + with maybe something else at the end *) + type (_,_) optimized_op = + | OptNil : ('a, 'a) optimized_op + | OptBase : ('a,'b) base_op * ('b, 'c) optimized_op -> ('a,'c) optimized_op + | OptFlatMap : ('a -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op + | OptExtern : ('a t -> 'b t) * ('b, 'c) optimized_op -> ('a, 'c) optimized_op + + (* As compose, but optimize recursively on the way. *) + let rec optimize_compose + : type a b c. (a,b) base_op -> (b,c) op -> (a,c) optimized_op + = fun base_op op -> match base_op, op with + | f, Nil -> OptBase (f, OptNil) + | Map f, Compose (Map g, cont) -> + optimize_compose (Map (fun x -> g (f x))) cont + | Map f, Compose (Filter p, cont) -> + optimize_compose + (FilterMap (fun x -> let y = f x in if p y then Some y else None)) cont + | Map f, Compose (FilterMap f', cont) -> + optimize_compose + (FilterMap (fun x -> f' (f x))) cont + | Map f, Compose (FlatMap f', cont) -> + optimize_compose + (FlatMap (fun x -> f' (f x))) cont + | Filter p, Compose (Filter p', cont) -> + optimize_compose (Filter (fun x -> p x && p' x)) cont + | Filter p, Compose (Map g, cont) -> + optimize_compose + (FilterMap (fun x -> if p x then Some (g x) else None)) cont + | Filter p, Compose (FilterMap f', cont) -> + optimize_compose + (FilterMap (fun x -> if p x then f' x else None)) cont + | Filter p, Compose (FlatMap f', cont) -> + optimize_compose + (FlatMap (fun x -> if p x then f' x else C.empty)) cont + | FilterMap f, Compose (FilterMap f', cont) -> + optimize_compose + (FilterMap + (fun x -> match f x with None -> None | Some y -> f' y)) + cont + | FilterMap f, Compose (Filter p, cont) -> + optimize_compose + (FilterMap + (fun x -> match f x with + | (Some y) as res when p y -> res + | _ -> None)) + cont + | FilterMap f, Compose (Map f', cont) -> + optimize_compose + (FilterMap + (fun x -> match f x with + | None -> None + | Some y -> Some (f' y))) + cont + | FilterMap f, Compose (FlatMap f', cont) -> + optimize_compose + (FlatMap + (fun x -> match f x with + | None -> C.empty + | Some y -> f' y)) + cont + | FlatMap f, Compose (f', tail) -> + merge_flat_map f (optimize_compose f' tail) + | Extern f, Compose (f', tail) -> + OptExtern (f, optimize_compose f' tail) + | op, Compose (Extern f', cont) -> + OptBase (op, optimize_compose (Extern f') cont) + + and merge_flat_map + : type a b c. (a -> b C.t) -> (b,c) optimized_op -> (a,c) optimized_op = + fun f op -> match op with + | OptNil -> OptFlatMap (f, op) + | OptFlatMap (f', cont) -> + merge_flat_map + (fun x -> + let a = f x in + C.flat_map f' a) + cont + | OptExtern _ -> OptFlatMap (f, op) + | OptBase _ -> OptFlatMap (f, op) + + (* optimize a batch operation by fusion *) + let optimize : type a b. (a,b) op -> (a,b) optimized_op + = fun op -> match op with + | Compose (a, b) -> optimize_compose a b + | Nil -> OptNil + + let rec apply_optimized : type a b. (a,b) optimized_op -> a t -> b t + = fun op a -> match op with + | OptNil -> a + | OptBase (f,c) -> apply_optimized c (apply_base f a) + | OptFlatMap (f,c) -> apply_optimized c (C.flat_map f a) + | OptExtern (f,c) -> apply_optimized c (f a) + and apply_base : type a b. (a,b) base_op -> a t -> b t + = fun op a -> match op with + | Map f -> C.map f a + | Filter p -> C.filter p a + | FlatMap f -> C.flat_map f a + | FilterMap f -> C.filter_map f a + | Extern f -> f a + + let fusion_fold : type a b c. (a,b) base_op -> (c -> b -> c) -> c -> a -> c + = fun op f' -> match op with + | Map f -> (fun z x -> f' z (f x)) + | Filter p -> (fun z x -> if p x then f' z x else z) + | FlatMap f -> (fun z x -> C.fold f' z (f x)) + | FilterMap f -> (fun z x -> match f x with Some x' -> f' z x' | None -> z) + | Extern _ -> assert false + + let rec apply_optimized_with_fold + : type a b c. (a,b) optimized_op -> (c -> b -> c) -> c -> a t -> c + = fun op fold z a -> match op with + | OptNil -> C.fold fold z a + | OptBase (Extern f, OptNil) -> + C.fold fold z (f a) + | OptBase (f,OptNil) -> + (* terminal fold *) + C.fold (fusion_fold f fold) z a + | OptBase (f,c) -> + (* make intermediate collection and continue *) + apply_optimized_with_fold c fold z (apply_base f a) + | OptExtern (f,c) -> apply_optimized_with_fold c fold z (f a) + | OptFlatMap (f,c) -> apply_optimized_with_fold c fold z (C.flat_map f a) + + (* optimize and run *) + let apply op a = + let op' = optimize op in + apply_optimized op' a + + let apply_fold op fold z a = + let op' = optimize op in + apply_optimized_with_fold op' fold z a + + let apply' a op = apply op a + + (** {6 Combinators} *) + + let id = Nil + let map f = Compose (Map f, Nil) + let filter p = Compose (Filter p, Nil) + let filter_map f = Compose (FilterMap f, Nil) + let flat_map f = Compose (FlatMap f, Nil) + let extern f = Compose (Extern f, Nil) + + let compose f g = _compose g f + let (>>>) f g = _compose f g +end diff --git a/core/CCBatch.mli b/core/CCBatch.mli new file mode 100644 index 00000000..7b04b692 --- /dev/null +++ b/core/CCBatch.mli @@ -0,0 +1,81 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Batch Operations on Collections} +Define and combine operations on a collection of elements, then +run the composition of those operations on some collection. The +composition is optimized to minimize the number of intermediate +collections *) + +(** {2 Definition of a Collection} *) +module type COLLECTION = sig + type 'a t + + val empty : 'a t + val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a + val map : ('a -> 'b) -> 'a t -> 'b t + val filter : ('a -> bool) -> 'a t -> 'a t + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flat_map : ('a -> 'b t) -> 'a t -> 'b t +end + +(** {2 Definition of a Batch operations} *) +module type S = sig + type 'a t + + type ('a,'b) op + (** Operation that converts an ['a t] into a ['b t] *) + + val apply : ('a,'b) op -> 'a t -> 'b t + (** Apply the operation to the collection. *) + + val apply_fold : ('a, 'b) op -> ('c -> 'b -> 'c) -> 'c -> 'a t -> 'c + (** Apply the operation plus a fold to the collection. *) + + val apply' : 'a t -> ('a,'b) op -> 'b t + (** Flip of {!apply} *) + + (** {6 Combinators} *) + + val id : ('a, 'a) op + + val map : ('a -> 'b) -> ('a, 'b) op + + val filter : ('a -> bool) -> ('a,'a) op + + val filter_map : ('a -> 'b option) -> ('a,'b) op + + val flat_map : ('a -> 'b t) -> ('a,'b) op + + val extern : ('a t -> 'b t) -> ('a,'b) op + (** Use a specific function that won't be optimized *) + + val compose : ('b,'c) op -> ('a,'b) op -> ('a,'c) op + val (>>>) : ('a,'b) op -> ('b,'c) op -> ('a,'c) op +end + +(** {2 Functor} *) +module Make(C : COLLECTION) : S with type 'a t = 'a C.t diff --git a/core/CCError.ml b/core/CCError.ml new file mode 100644 index 00000000..6d92b531 --- /dev/null +++ b/core/CCError.ml @@ -0,0 +1,124 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Error Monad} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + +type +'a t = + [ `Ok of 'a + | `Error of string + ] + +let return x = `Ok x + +let fail s = `Error s + +let of_exn e = `Error (Printexc.to_string e) + +let map f e = match e with + | `Ok x -> `Ok (f x) + | `Error s -> `Error s + +let flat_map f e = match e with + | `Ok x -> f x + | `Error s -> `Error s + +let guard f = + try + return (f ()) + with e -> of_exn e + +let (>|=) e f = map f e + +let (>>=) e f = flat_map f e + +let equal eq a b = match a, b with + | `Ok x, `Ok y -> eq x y + | `Error s, `Error s' -> s = s' + | _ -> false + +let compare cmp a b = match a, b with + | `Ok x, `Ok y -> cmp x y + | `Ok _, _ -> 1 + | _, `Ok _ -> -1 + | `Error s, `Error s' -> String.compare s s' + +(** {2 Collections} *) + +let map_l f l = + let rec map acc l = match l with + | [] -> `Ok (List.rev acc) + | x::l' -> + match f x with + | `Error s -> `Error s + | `Ok y -> map (y::acc) l' + in map [] l + +exception LocalExit of string + +let fold_seq f acc seq = + try + let acc = ref acc in + seq + (fun x -> match f !acc x with + | `Error s -> raise (LocalExit s) + | `Ok y -> acc := y + ); + `Ok !acc + with LocalExit s -> `Error s + +let fold_l f acc l = fold_seq f acc (fun k -> List.iter k l) + +(** {2 Conversions} *) + +let to_opt = function + | `Ok x -> Some x + | `Error _ -> None + +let of_opt = function + | None -> `Error "of_opt" + | Some x -> `Ok x + +let to_seq e k = match e with + | `Ok x -> k x + | `Error _ -> () + +(** {2 IO} *) + +let pp pp_x buf e = match e with + | `Ok x -> Printf.bprintf buf "ok(%a)" pp_x x + | `Error s -> Printf.bprintf buf "error(%s)" s + +let print pp_x fmt e = match e with + | `Ok x -> Format.fprintf fmt "ok(%a)" pp_x x + | `Error s -> Format.fprintf fmt "error(%s)" s diff --git a/core/CCError.mli b/core/CCError.mli new file mode 100644 index 00000000..76a49065 --- /dev/null +++ b/core/CCError.mli @@ -0,0 +1,82 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Error Monad} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + +type +'a t = + [ `Ok of 'a + | `Error of string + ] + +val return : 'a -> 'a t + +val fail : string -> 'a t + +val of_exn : exn -> 'a t + +val map : ('a -> 'b) -> 'a t -> 'b t + +val flat_map : ('a -> 'b t) -> 'a t -> 'b t + +val guard : (unit -> 'a) -> 'a t + +val (>|=) : 'a t -> ('a -> 'b) -> 'b t + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t + +val equal : 'a equal -> 'a t equal + +val compare : 'a ord -> 'a t ord + +(** {2 Collections} *) + +val map_l : ('a -> 'b t) -> 'a list -> 'b list t + +val fold_l : ('acc -> 'a -> 'acc t) -> 'acc -> 'a list -> 'acc t + +val fold_seq : ('acc -> 'a -> 'acc t) -> 'acc -> 'a sequence -> 'acc t + +(** {2 Conversions} *) + +val to_opt : 'a t -> 'a option + +val of_opt : 'a option -> 'a t + +val to_seq : 'a t -> 'a sequence + +(** {2 IO} *) + +val pp : 'a printer -> 'a t printer + +val print : 'a formatter -> 'a t formatter diff --git a/core/CCFun.ml b/core/CCFun.ml index dbedcf9b..fa4eadb6 100644 --- a/core/CCFun.ml +++ b/core/CCFun.ml @@ -40,6 +40,8 @@ let const x _ = x let uncurry f (x,y) = f x y +let tap f x = ignore (f x); x + let (%>) = compose let (%) f g x = f (g x) diff --git a/core/CCFun.mli b/core/CCFun.mli index d2a02f3e..81048ae0 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -48,6 +48,15 @@ val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c val uncurry : ('a -> 'b -> 'c) -> ('a * 'b) -> 'c +val tap : ('a -> 'b) -> 'a -> 'a +(** [tap f x] evaluates [f x], discards it, then returns [x]. Useful + in a pipeline, for instance: + {[CCArray.(1 -- 10) + |> tap CCArray.shuffle + |> tap CCArray.sort Pervasives.compare + ]} +*) + val (%) : ('b -> 'c) -> ('a -> 'b) -> 'a -> 'c (** Mathematical composition *) diff --git a/core/CCHash.ml b/core/CCHash.ml index af0b9a3b..e485d228 100644 --- a/core/CCHash.ml +++ b/core/CCHash.ml @@ -58,7 +58,7 @@ let hash_triple h1 h2 h3 (x,y,z) = (h1 x) <<>> (h2 y) <<>> (h3 z) type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] let hash_seq f h seq = let h = ref h in @@ -70,7 +70,6 @@ let rec hash_gen f h g = match g () with | Some x -> hash_gen f (h <<>> f x) g -let rec hash_klist f h l = match l with +let rec hash_klist f h l = match l () with | `Nil -> h - | `Cons (x,l') -> hash_klist f (h <<>> f x) (l' ()) - + | `Cons (x,l') -> hash_klist f (h <<>> f x) l' diff --git a/core/CCHash.mli b/core/CCHash.mli index 2233e240..e250ed10 100644 --- a/core/CCHash.mli +++ b/core/CCHash.mli @@ -57,7 +57,7 @@ val hash_triple : 'a hash_fun -> 'b hash_fun -> 'c hash_fun -> ('a * 'b * 'c) ha type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] val hash_seq : 'a hash_fun -> t -> 'a sequence hash_fun val hash_gen : 'a hash_fun -> t -> 'a gen hash_fun diff --git a/core/CCKList.ml b/core/CCKList.ml index 337310f4..d9a1112e 100644 --- a/core/CCKList.ml +++ b/core/CCKList.ml @@ -25,31 +25,196 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit -type + 'a t = +type + 'a t = unit -> [ `Nil - | `Cons of 'a * (unit -> 'a t) + | `Cons of 'a * 'a t ] -let nil = `Nil -let _nil () = nil -let cons a b = `Cons (a,b) +let nil () = `Nil +let cons a b () = `Cons (a,b) let empty = nil -let singleton x = `Cons (x, fun () -> `Nil) +let singleton x () = `Cons (x, nil) -let is_empty = function +let is_empty l = match l () with | `Nil -> true | `Cons _ -> false +let rec equal eq l1 l2 = match l1(), l2() with + | `Nil, `Nil -> true + | `Nil, _ + | _, `Nil -> false + | `Cons (x1,l1'), `Cons (x2,l2') -> + eq x1 x2 && equal eq l1' l2' + +let rec compare cmp l1 l2 = match l1(), l2() with + | `Nil, `Nil -> 0 + | `Nil, _ -> -1 + | _, `Nil -> 1 + | `Cons (x1,l1'), `Cons (x2,l2') -> + let c = cmp x1 x2 in + if c = 0 then compare cmp l1' l2' else c + +let rec fold f acc res = match res () with + | `Nil -> acc + | `Cons (s, cont) -> fold f (f acc s) cont + +let rec iter f l = match l () with + | `Nil -> () + | `Cons (x, l') -> f x; iter f l' + +let length l = fold (fun acc _ -> acc+1) 0 l + +let rec take n (l:'a t) () = match l () with + | _ when n=0 -> `Nil + | `Nil -> `Nil + | `Cons (x,l') -> `Cons (x, take (n-1) l') + +let rec take_while p l () = match l () with + | `Nil -> `Nil + | `Cons (x,l') when p x -> `Cons (x, take_while p l') + | `Cons (_,l') -> take_while p l' () + +let rec drop n (l:'a t) () = match l () with + | l' when n=0 -> l' + | `Nil -> `Nil + | `Cons (_,l') -> drop (n-1) l' () + +let rec drop_while p l () = match l() with + | `Nil -> `Nil + | `Cons (x,l') when p x -> drop_while p l' () + | `Cons _ as res -> res + +(*$Q + (Q.pair (Q.list Q.small_int) Q.small_int) (fun (l,n) -> \ + let s = of_list l in let s1, s2 = take n s, drop n s in \ + append s1 s2 |> to_list = l ) +*) + +let rec map f l () = match l () with + | `Nil -> `Nil + | `Cons (x, l') -> `Cons (f x, map f l') + +(*$T + (map ((+) 1) (1 -- 5) |> to_list) = (2 -- 6 |> to_list) +*) + +let rec fmap f (l:'a t) () = match l() with + | `Nil -> `Nil + | `Cons (x, l') -> + begin match f x with + | None -> fmap f l' () + | Some y -> `Cons (y, fmap f l') + end + +(*$T + fmap (fun x -> if x mod 2=0 then Some (x*3) else None) (1--10) |> to_list \ + = [6;12;18;24;30] +*) + +let rec filter p l () = match l () with + | `Nil -> `Nil + | `Cons (x, l') -> + if p x + then `Cons (x, filter p l') + else filter p l' () + +let rec append l1 l2 () = match l1 () with + | `Nil -> l2 () + | `Cons (x, l1') -> `Cons (x, append l1' l2) + +let rec flat_map f l () = match l () with + | `Nil -> `Nil + | `Cons (x, l') -> + _flat_map_app f (f x) l' () +and _flat_map_app f l l' () = match l () with + | `Nil -> flat_map f l' () + | `Cons (x, tl) -> + `Cons (x, _flat_map_app f tl l') + +let rec filter_map f l () = match l() with + | `Nil -> `Nil + | `Cons (x, l') -> + begin match f x with + | None -> filter_map f l' () + | Some y -> `Cons (y, filter_map f l') + end + +let flatten l = flat_map (fun x->x) l + +let range i j = + let rec aux i j () = + if i=j then `Cons(i, nil) + else if i to_list = [0;1;2;3;4;5] + range 0 0 |> to_list = [0] + range 5 2 |> to_list = [5;4;3;2] +*) + +let (--) = range + +let rec fold2 f acc l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> acc + | `Cons(x1,l1'), `Cons(x2,l2') -> + fold2 f (f acc x1 x2) l1' l2' + +let rec map2 f l1 l2 () = match l1(), l2() with + | `Nil, _ + | _, `Nil -> `Nil + | `Cons(x1,l1'), `Cons(x2,l2') -> + `Cons (f x1 x2, map2 f l1' l2') + +let rec iter2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> () + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2; iter2 f l1' l2' + +let rec for_all2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> true + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2 && for_all2 f l1' l2' + +let rec exists2 f l1 l2 = match l1(), l2() with + | `Nil, _ + | _, `Nil -> false + | `Cons(x1,l1'), `Cons(x2,l2') -> + f x1 x2 || exists2 f l1' l2' + +let rec merge cmp l1 l2 () = match l1(), l2() with + | `Nil, tl2 -> tl2 + | tl1, `Nil -> tl1 + | `Cons(x1,l1'), `Cons(x2,l2') -> + if cmp x1 x2 < 0 + then `Cons (x1, merge cmp l1' l2) + else `Cons (x2, merge cmp l1 l2') + +(** {2 Conversions} *) + +let rec _to_rev_list acc l = match l() with + | `Nil -> acc + | `Cons (x,l') -> _to_rev_list (x::acc) l' + +let to_rev_list l = _to_rev_list [] l + let to_list l = - let rec direct i (l:'a t) = match l with + let rec direct i (l:'a t) = match l () with | `Nil -> [] - | _ when i=0 -> safe [] l - | `Cons (x, f) -> x :: direct (i-1) (f ()) - and safe acc l = match l with - | `Nil -> List.rev acc - | `Cons (x,l') -> safe (x::acc) (l' ()) + | _ when i=0 -> List.rev (_to_rev_list [] l) + | `Cons (x, f) -> x :: direct (i-1) f in direct 200 l @@ -57,87 +222,37 @@ let of_list l = let rec aux l () = match l with | [] -> `Nil | x::l' -> `Cons (x, aux l') - in aux l () + in aux l -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -let rec to_seq res k = match res with +let rec to_seq res k = match res () with | `Nil -> () - | `Cons (s, f) -> k s; to_seq (f ()) k + | `Cons (s, f) -> k s; to_seq f k let to_gen l = let l = ref l in fun () -> - match !l with + match !l () with | `Nil -> None | `Cons (x,l') -> - l := l' (); + l := l'; Some x -let rec fold f acc res = match res with - | `Nil -> acc - | `Cons (s, cont) -> fold f (f acc s) (cont ()) +(** {2 IO} *) -let rec iter f l = match l with - | `Nil -> () - | `Cons (x, l') -> f x; iter f (l' ()) +let pp ?(sep=",") pp_item buf l = + let rec pp buf l = match l() with + | `Nil -> () + | `Cons (x,l') -> Buffer.add_string buf sep; pp_item buf x; pp buf l' + in + match l() with + | `Nil -> () + | `Cons (x,l') -> pp_item buf x; pp buf l' -let length l = fold (fun acc _ -> acc+1) 0 l - -let rec take n (l:'a t):'a t = match l with - | _ when n=0 -> `Nil - | `Nil -> `Nil - | `Cons (x,l') -> `Cons (x, fun () -> take (n-1) (l' ())) - -let rec drop n (l:'a t) = match l with - | _ when n=0 -> l - | `Nil -> `Nil - | `Cons (_,l') -> drop (n-1) (l'()) - -let rec map f l = match l with - | `Nil -> `Nil - | `Cons (x, l') -> `Cons (f x, fun () -> map f (l' ())) - -let rec fmap f (l:'a t):'b t = match l with - | `Nil -> `Nil - | `Cons (x, l') -> - begin match f x with - | None -> fmap f (l' ()) - | Some y -> `Cons (y, fun () -> fmap f (l' ())) - end - -let rec filter p l = match l with - | `Nil -> `Nil - | `Cons (x, l') -> - if p x - then `Cons (x, fun () -> filter p (l' ())) - else filter p (l' ()) - -let rec append l1 l2 = match l1 with - | `Nil -> l2 - | `Cons (x, l1') -> `Cons (x, fun () -> append (l1' ()) l2) - -let rec flat_map f l = match l with - | `Nil -> `Nil - | `Cons (x, l') -> - _flat_map_app f (f x) (l' ()) () -and _flat_map_app f l l' () = match l with - | `Nil -> flat_map f l' - | `Cons (x, tl) -> - `Cons (x, _flat_map_app f (tl ()) l') - -let flatten l = flat_map (fun x->x) l - -let range i j = - let rec aux i j () = - if i=j then cons i _nil - else if i to_list = [0;1;2;3;4;5] - range 0 0 |> to_list = [0] - range 5 2 |> to_list = [5;4;3;2] -*) +let print ?(sep=",") pp_item fmt l = + let rec pp fmt l = match l() with + | `Nil -> () + | `Cons (x,l') -> Format.pp_print_string fmt sep; pp_item fmt x; pp fmt l' + in + match l() with + | `Nil -> () + | `Cons (x,l') -> pp_item fmt x; pp fmt l' diff --git a/core/CCKList.mli b/core/CCKList.mli index 0975a24c..ddb808bb 100644 --- a/core/CCKList.mli +++ b/core/CCKList.mli @@ -25,43 +25,51 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Continuation List} *) -type + 'a t = +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + +(** {2 Basics} *) + +type + 'a t = unit -> [ `Nil - | `Cons of 'a * (unit -> 'a t) + | `Cons of 'a * 'a t ] val nil : 'a t val empty : 'a t -val cons : 'a -> (unit -> 'a t) -> 'a t +val cons : 'a -> 'a t -> 'a t val singleton : 'a -> 'a t val is_empty : 'a t -> bool -val of_list : 'a list -> 'a t +val equal : 'a equal -> 'a t equal +(** Equality step by step. Eager. *) -val to_list : 'a t -> 'a list -(** Gather all values into a list *) - -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -val to_seq : 'a t -> 'a sequence -val to_gen : 'a t -> 'a gen +val compare : 'a ord -> 'a t ord +(** Lexicographic comparison. Eager. *) val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a (** Fold on values *) val iter : ('a -> unit) -> 'a t -> unit -val length : 'a t -> int +val length : _ t -> int val take : int -> 'a t -> 'a t +val take_while : ('a -> bool) -> 'a t -> 'a t + val drop : int -> 'a t -> 'a t +val drop_while : ('a -> bool) -> 'a t -> 'a t + val map : ('a -> 'b) -> 'a t -> 'b t val fmap : ('a -> 'b option) -> 'a t -> 'b t @@ -72,6 +80,49 @@ val append : 'a t -> 'a t -> 'a t val flat_map : ('a -> 'b t) -> 'a t -> 'b t +val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val flatten : 'a t t -> 'a t val range : int -> int -> int t + +val (--) : int -> int -> int t + +(** {2 Operations on two Collections} *) + +val fold2 : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> 'a t -> 'b t -> 'acc +(** Fold on two collections at once. Stop at soon as one of them ends *) + +val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +(** Map on two collections at once. Stop as soon as one of the + arguments is exhausted *) + +val iter2 : ('a -> 'b -> unit) -> 'a t -> 'b t -> unit +(** Iterate on two collections at once. Stop as soon as one of them ends *) + +val for_all2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool +val exists2 : ('a -> 'b -> bool) -> 'a t -> 'b t -> bool + +val merge : 'a ord -> 'a t -> 'a t -> 'a t +(** Merge two sorted iterators into a sorted iterator *) + +(** {2 Conversions} *) + +val of_list : 'a list -> 'a t + +val to_list : 'a t -> 'a list +(** Gather all values into a list *) + +val to_rev_list : 'a t -> 'a list +(** Convert to a list, in reverse order. More efficient than {!to_list} *) + +val to_seq : 'a t -> 'a sequence + +val to_gen : 'a t -> 'a gen + + +(** {2 IO} *) + +val pp : ?sep:string -> 'a printer -> 'a t printer + +val print : ?sep:string -> 'a formatter -> 'a t formatter diff --git a/core/CCLeftistheap.ml b/core/CCLeftistheap.ml index 5c6f9def..1a73853c 100644 --- a/core/CCLeftistheap.ml +++ b/core/CCLeftistheap.ml @@ -28,6 +28,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** Polymorphic implementation, following Okasaki *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option type 'a t = { tree : 'a tree; @@ -81,6 +83,8 @@ let insert heap x = let tree = merge_tree heap.leq (Node (1, x, Empty, Empty)) heap.tree in { heap with tree; } +let add = insert + let filter heap p = let rec filter tree p = match tree with | Empty -> Empty @@ -104,7 +108,14 @@ let extract_min heap = let heap' = { heap with tree; } in heap', x -let iter heap f = +let take heap = match heap.tree with + | Empty -> None + | Node (_, x, a, b) -> + let tree = merge_tree heap.leq a b in + let heap' = { heap with tree; } in + Some (x, heap') + +let iter f heap = let rec iter t = match t with | Empty -> () | Node (_, x, a, b) -> @@ -113,9 +124,18 @@ let iter heap f = iter b; in iter heap.tree +let fold f acc h = + let rec fold acc h = match h with + | Empty -> acc + | Node (_, x, a, b) -> + let acc = f acc x in + let acc = fold acc a in + fold acc b + in fold acc h.tree + let size heap = let r = ref 0 in - iter heap (fun _ -> incr r); + iter (fun _ -> incr r) heap; !r let of_seq heap seq = @@ -123,4 +143,38 @@ let of_seq heap seq = seq (fun x -> h := insert !h x); !h -let to_seq = iter +let to_seq h k = iter k h + +let rec of_klist h l = match l() with + | `Nil -> h + | `Cons (x, l') -> + let h' = add h x in + of_klist h' l' + +let to_klist h = + let rec next stack () = match stack with + | [] -> `Nil + | Empty :: stack' -> next stack' () + | Node (_, x, a, b) :: stack' -> + `Cons (x, next (a :: b :: stack')) + in + next [h.tree] + +let rec of_gen h g = match g () with + | None -> h + | Some x -> + of_gen (add h x) g + +let to_gen h = + let stack = Stack.create () in + Stack.push h.tree stack; + let rec next () = + if Stack.is_empty stack + then None + else match Stack.pop stack with + | Empty -> next() + | Node (_, x, a, b) -> + Stack.push a stack; + Stack.push b stack; + Some x + in next diff --git a/core/CCLeftistheap.mli b/core/CCLeftistheap.mli index 701df006..9836ce9a 100644 --- a/core/CCLeftistheap.mli +++ b/core/CCLeftistheap.mli @@ -23,11 +23,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Leftist Heaps} *) - -(** Polymorphic implementation, following Okasaki *) +(** {1 Leftist Heaps} +Polymorphic implementation, following Okasaki *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option type 'a t (** Heap containing values of type 'a *) @@ -37,7 +38,7 @@ val empty_with : leq:('a -> 'a -> bool) -> 'a t smaller than the second. *) val empty : 'a t - (** Empty heap using Pervasives.compare *) + (** Empty heap using [Pervasives.compare] *) val is_empty : _ t -> bool (** Is the heap empty? *) @@ -48,21 +49,39 @@ val merge : 'a t -> 'a t -> 'a t val insert : 'a t -> 'a -> 'a t (** Insert a value in the heap *) +val add : 'a t -> 'a -> 'a t + (** Synonym to {!insert} *) + val filter : 'a t -> ('a -> bool) -> 'a t - (** Filter values, only retaining the ones that satisfy the predicate *) + (** Filter values, only retaining the ones that satisfy the predicate. + Linear time at least. *) val find_min : 'a t -> 'a - (** Find minimal element, or raise Not_found *) + (** Find minimal element, or fails + @raise Not_found if the heap is empty *) val extract_min : 'a t -> 'a t * 'a - (** Extract and returns the minimal element, or raise Not_found *) + (** Extract and returns the minimal element, or + raise Not_found if the heap is empty *) -val iter : 'a t -> ('a -> unit) -> unit +val take : 'a t -> ('a * 'a t) option + (** Extract and return the minimum element, and the new heap (without + this element), or [None] if the heap is empty *) + +val iter : ('a -> unit) -> 'a t -> unit (** Iterate on elements *) +val fold : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** Fold on all values *) + val size : _ t -> int - (** Number of elements (linear) *) + (** Number of elements (linear complexity) *) val of_seq : 'a t -> 'a sequence -> 'a t - val to_seq : 'a t -> 'a sequence + +val of_klist : 'a t -> 'a klist -> 'a t +val to_klist : 'a t -> 'a klist + +val of_gen : 'a t -> 'a gen -> 'a t +val to_gen : 'a t -> 'a gen diff --git a/core/CCLinq.ml b/core/CCLinq.ml new file mode 100644 index 00000000..5650d4c6 --- /dev/null +++ b/core/CCLinq.ml @@ -0,0 +1,1066 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 LINQ-like operations on collections} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a hash = 'a -> int +type 'a with_err = [`Ok of 'a | `Error of string ] + +(* TODO: add CCVector as a collection *) + +let _id x = x + +exception ExitWithError of string +let _exit_with_error s = raise (ExitWithError s) +let _error_of_exn f = try `Ok (f ()) with ExitWithError s -> `Error + +type 'a collection = + | Seq : 'a sequence -> 'a collection + | List : 'a list -> 'a collection + | Set : (module CCSequence.Set.S + with type elt = 'a and type t = 'b) * 'b -> 'a collection + +module PMap = struct + type ('a, 'b) t = { + is_empty : unit -> bool; + size : unit -> int; (** Number of keys *) + get : 'a -> 'b option; + fold : 'c. ('c -> 'a -> 'b -> 'c) -> 'c -> 'c; + to_seq : ('a * 'b) sequence; + } + + let get m x = m.get x + let mem m x = match m.get x with + | None -> false + | Some _ -> true + let to_seq m = m.to_seq + let fold f acc m = m.fold f acc + let size m = m.size () + let get_err m x = match m.get x with + | Some y -> `Ok y + | None -> `Error "PMap.get: lookup error" + + type ('a, 'b) build = { + mutable cur : ('a, 'b) t; + add : 'a -> 'b -> unit; + update : 'a -> ('b option -> 'b option) -> unit; + } + + let build_get b = b.cur + let add b x y = b.add x y + let update b f = b.update f + + (* careful to use this map linearly *) + let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) () = + let module H = Hashtbl.Make(struct + type t = key + let equal = eq + let hash = hash + end) in + (* build table *) + let tbl = H.create 32 in + let cur = { + is_empty = (fun () -> H.length tbl = 0); + size = (fun () -> H.length tbl); + get = (fun k -> + try Some (H.find tbl k) + with Not_found -> None); + fold = (fun f acc -> H.fold (fun k v acc -> f acc k v) tbl acc); + to_seq = (fun k -> H.iter (fun key v -> k (key,v)) tbl); + } in + { cur; + add = (fun k v -> H.replace tbl k v); + update = (fun k f -> + match (try f (Some (H.find tbl k)) with Not_found -> f None) with + | None -> H.remove tbl k + | Some v' -> H.replace tbl k v'); + } + + let make_cmp (type key) ?(cmp=Pervasives.compare) () = + let module M = CCSequence.Map.Make(struct + type t = key + let compare = cmp + end) in + let map = ref M.empty in + let cur = { + is_empty = (fun () -> M.is_empty !map); + size = (fun () -> M.cardinal !map); + get = (fun k -> + try Some (M.find k !map) + with Not_found -> None); + fold = (fun f acc -> + M.fold + (fun key set acc -> f acc key set) !map acc + ); + to_seq = (fun k -> M.to_seq !map k); + } in + { + cur; + add = (fun k v -> map := M.add k v !map); + update = (fun k f -> + match (try f (Some (M.find k !map)) with Not_found -> f None) with + | None -> map := M.remove k !map + | Some v' -> map := M.add k v' !map); + } + + type 'a build_method = + | FromCmp of 'a ord + | FromHash of 'a equal * 'a hash + | Default + + let make ?(build=Default) () = match build with + | Default -> make_hash () + | FromCmp cmp -> make_cmp ~cmp () + | FromHash (eq,hash) -> make_hash ~eq ~hash () + + let multimap_of_seq ?(build=make ()) seq = + seq (fun (k,v) -> + build.update k (function + | None -> Some [v] + | Some l -> Some (v::l))); + build.cur + + let count_of_seq ?(build=make ()) seq = + seq (fun x -> + build.update x + (function + | None -> Some 1 + | Some n -> Some (n+1))); + build.cur + + let get_exn m x = + match m.get x with + | None -> raise Not_found + | Some x -> x + + (* map values *) + let map f m = { + is_empty = m.is_empty; + size = m.size; + get = (fun k -> match m.get k with + | None -> None + | Some v -> Some (f v) + ); + to_seq = CCSequence.map (fun (x,y) -> x, f y) m.to_seq; + fold = (fun f' acc -> + m.fold (fun acc x y -> f' acc x (f y)) acc + ); + } + + let to_list m = m.to_seq |> CCSequence.to_rev_list + + let to_coll m = Seq m.to_seq + + let reverse ~build m = + let build = make ~build () in + to_seq m + |> CCSequence.map (fun (x,y) -> y,x) + |> multimap_of_seq ~build + + let reverse_multimap ~build m = + let build = make ~build () in + to_seq m + |> CCSequence.flatMap + (fun (x,l) -> + CCSequence.of_list l + |> CCSequence.map (fun y -> y,x) + ) + |> multimap_of_seq ~build +end + +type 'a search_result = + | SearchContinue + | SearchStop of 'a + +type ('a,'b,'key,'c) join_descr = { + join_key1 : 'a -> 'key; + join_key2 : 'b -> 'key; + join_merge : 'key -> 'a -> 'b -> 'c option; + join_build : 'key PMap.build_method; +} + +type ('a,'b) group_join_descr = { + gjoin_proj : 'b -> 'a; + gjoin_build : 'a PMap.build_method; +} + +module Coll = struct + let of_seq s = Seq s + let of_list l = List l + let of_array a = Seq (CCSequence.of_array a) + + let set_of_seq (type elt) ?(cmp=Pervasives.compare) seq = + let module S = CCSequence.Set.Make(struct + type t = elt + let compare = cmp + end) in + let set = S.of_seq seq in + Set ((module S), set) + + let to_seq (type elt) = function + | Seq s -> s + | List l -> (fun k -> List.iter k l) + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.to_seq set + + let to_list (type elt) = function + | Seq s -> CCSequence.to_list s + | List l -> l + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.elements set + + let _fmap ~lst ~seq c = match c with + | List l -> List (lst l) + | Seq s -> Seq (seq s) + | Set _ -> + List (lst (to_list c)) + + let fold (type elt) f acc c = match c with + | List l -> List.fold_left f acc l + | Seq s -> CCSequence.fold f acc s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.fold (fun x acc -> f acc x) set acc + + let map f c = + _fmap ~lst:(List.map f) ~seq:(CCSequence.map f) c + + let filter p c = + _fmap ~lst:(List.filter p) ~seq:(CCSequence.filter p) c + + let flat_map f c = + let c' = to_seq c in + Seq (CCSequence.flatMap (fun x -> to_seq (f x)) c') + + let filter_map f c = + _fmap ~lst:(CCList.filter_map f) ~seq:(CCSequence.fmap f) c + + let size (type elt) = function + | List l -> List.length l + | Seq s -> CCSequence.length s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + S.cardinal set + + let choose_exn (type elt) c = + let fail () = _exit_with_error "choose: empty collection" in + match c with + | List [] -> fail () + | List (x::_) -> x + | Seq s -> + begin match CCSequence.take 1 s |> CCSequence.to_list with + | [x] -> x + | _ -> fail () + end + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + try S.choose set with Not_found -> fail () + + let choose_err c = + try `Ok (choose_exn c) + with ExitWithError s -> `Error s + + let take n c = + _fmap ~lst:(CCList.take n) ~seq:(CCSequence.take n) c + + exception MySurpriseExit + + let _seq_take_while p seq k = + try + seq (fun x -> if not (p x) then k x else raise MySurpriseExit) + with MySurpriseExit -> () + + let take_while p c = + to_seq c |> _seq_take_while p |> of_seq + + let distinct ~cmp c = set_of_seq ~cmp (to_seq c) + + let sort cmp c = match c with + | List l -> List (List.sort cmp l) + | Seq s -> List (List.sort cmp (CCSequence.to_rev_list s)) + | _ -> + to_seq c |> set_of_seq ~cmp + + let search obj c = + let _search_seq obj seq = + let ret = ref None in + begin try + seq (fun x -> match obj#check x with + | SearchContinue -> () + | SearchStop y -> ret := Some y; raise MySurpriseExit); + with MySurpriseExit -> () + end; + match !ret with + | None -> obj#failure + | Some x -> x + in + to_seq c |> _search_seq obj + + let contains (type elt) ~eq x c = match c with + | List l -> List.exists (eq x) l + | Seq s -> CCSequence.exists (eq x) s + | Set (m, set) -> + let module S = (val m : CCSequence.Set.S + with type elt = elt and type t = 'b) in + (* XXX: here we don't use the equality relation *) + try + let y = S.find x set in + assert (eq x y); + true + with Not_found -> false + + let do_join ~join c1 c2 = + let build1 = + to_seq c1 + |> CCSequence.map (fun x -> join.join_key1 x, x) + |> PMap.multimap_of_seq ~build:(PMap.make ~build:join.join_build ()) + in + let l = CCSequence.fold + (fun acc y -> + let key = join.join_key2 y in + match PMap.get build1 key with + | None -> acc + | Some l1 -> + List.fold_left + (fun acc x -> match join.join_merge key x y with + | None -> acc + | Some res -> res::acc + ) acc l1 + ) [] (to_seq c2) + in + of_list l + + let do_group_join ~gjoin c1 c2 = + let build = PMap.make ~build:gjoin.gjoin_build () in + to_seq c1 (fun x -> PMap.add build x []); + to_seq c2 + (fun y -> + (* project [y] into some element of [c1] *) + let x = gjoin.gjoin_proj y in + PMap.update build x + (function + | None -> None (* [x] not present, ignore! *) + | Some l -> Some (y::l) + ) + ); + PMap.build_get build + + let do_product c1 c2 = + let s1 = to_seq c1 and s2 = to_seq c2 in + of_seq (CCSequence.product s1 s2) + + let do_union ~build c1 c2 = + let build = PMap.make ~build () in + to_seq c1 (fun x -> PMap.add build x ()); + to_seq c2 (fun x -> PMap.add build x ()); + PMap.to_seq (PMap.build_get build) + |> CCSequence.map fst + |> of_seq + + type inter_status = + | InterLeft + | InterDone (* already output *) + + let do_inter ~build c1 c2 = + let build = PMap.make ~build () in + let l = ref [] in + to_seq c1 (fun x -> PMap.add build x InterLeft); + to_seq c2 (fun x -> + PMap.update build x + (function + | None -> Some InterDone + | Some InterDone as foo -> foo + | Some InterLeft -> + l := x :: !l; + Some InterDone + ) + ); + of_list !l + + let do_diff ~build c1 c2 = + let build = PMap.make ~build () in + to_seq c2 (fun x -> PMap.add build x ()); + let map = PMap.build_get build in + (* output elements of [c1] not in [map] *) + to_seq c1 + |> CCSequence.filter (fun x -> not (PMap.mem map x)) + |> of_seq +end + +(** {2 Query operators} *) + +type (_,_) safety = + | Explicit : ('a, 'a with_err) safety + | Implicit : ('a, 'a) safety + +type (_, _) unary = + | PMap : ('a -> 'b) -> ('a collection, 'b collection) unary + | GeneralMap : ('a -> 'b) -> ('a, 'b) unary + | Filter : ('a -> bool) -> ('a collection, 'a collection) unary + | Fold : ('b -> 'a -> 'b) * 'b -> ('a collection, 'b) unary + | FoldMap : ('acc -> 'a -> 'b -> 'acc) * 'acc + -> (('a,'b) PMap.t, 'acc) unary + | Reduce : ('c, 'd) safety * ('a -> 'b) * ('a -> 'b -> 'b) * ('b -> 'c) + -> ('a collection, 'd) unary + | Size : ('a collection, int) unary + | Choose : ('a,'b) safety -> ('a collection, 'b) unary + | FilterMap : ('a -> 'b option) -> ('a collection, 'b collection) unary + | FlatMap : ('a -> 'b collection) -> ('a collection, 'b collection) unary + | Take : int -> ('a collection, 'a collection) unary + | TakeWhile : ('a -> bool) -> ('a collection, 'a collection) unary + | Sort : 'a ord -> ('a collection, 'a collection) unary + | Distinct : 'a ord -> ('a collection, 'a collection) unary + | Search : + < check: ('a -> 'b search_result); + failure : 'b; + > -> ('a collection, 'b) unary + | Contains : 'a equal * 'a -> ('a collection, bool) unary + | Get : ('b,'c) safety * 'a -> (('a,'b) PMap.t, 'c) unary + | GroupBy : 'b PMap.build_method * ('a -> 'b) + -> ('a collection, ('b,'a list) PMap.t) unary + | Count : 'a PMap.build_method -> ('a collection, ('a, int) PMap.t) unary + | Lazy : ('a lazy_t, 'a) unary + +type set_op = + | Union + | Inter + | Diff + +type (_, _, _) binary = + | Join : ('a, 'b, 'key, 'c) join_descr + -> ('a collection, 'b collection, 'c collection) binary + | GroupJoin : ('a, 'b) group_join_descr + -> ('a collection, 'b collection, ('a, 'b list) PMap.t) binary + | Product : ('a collection, 'b collection, ('a*'b) collection) binary + | Append : ('a collection, 'a collection, 'a collection) binary + | SetOp : set_op * 'a PMap.build_method + -> ('a collection, 'a collection, 'a collection) binary + +(* type of queries that return a 'a *) +and 'a t = + | Start : 'a -> 'a t + | Catch : 'a with_err t -> 'a t + | Unary : ('a, 'b) unary * 'a t -> 'b t + | Binary : ('a, 'b, 'c) binary * 'a t * 'b t -> 'c t + | QueryMap : ('a -> 'b) * 'a t -> 'b t + | Bind : ('a -> 'b t) * 'a t -> 'b t + +let start x = Start x + +let of_list l = + Start (Coll.of_list l) + +let of_array a = + Start (Coll.of_array a) + +let of_array_i a = + Start (CCSequence.of_array_i a |> Coll.of_seq) + +let of_hashtbl h = + Start (Coll.of_seq (CCSequence.of_hashtbl h)) + +let of_seq seq = + Start (Coll.of_seq seq) + +let of_queue q = + Start (CCSequence.of_queue q |> Coll.of_seq) + +let of_stack s = + Start (CCSequence.of_stack s |> Coll.of_seq) + +let of_string s = + Start (CCSequence.of_str s |> Coll.of_seq) + +(** {6 Execution} *) + +let rec _optimize : type a. a t -> a t + = fun q -> match q with + | Start _ -> q + | Catch q' -> Catch (_optimize q') + | Unary (u, q) -> + _optimize_unary u (_optimize q) + | Binary (b, q1, q2) -> + _optimize_binary b (_optimize q1) (_optimize q2) + | QueryMap (f, q) -> QueryMap (f, _optimize q) + | Bind _ -> q (* cannot optimize before execution *) +and _optimize_unary : type a b. (a,b) unary -> a t -> b t + = fun u q -> match u, q with + | PMap f, Unary (PMap g, q') -> + _optimize_unary (PMap (fun x -> f (g x))) q' + | Filter p, Unary (PMap f, cont) -> + _optimize_unary + (FilterMap (fun x -> let y = f x in if p y then Some y else None)) + cont + | PMap f, Unary (Filter p, cont) -> + _optimize_unary + (FilterMap (fun x -> if p x then Some (f x) else None)) + cont + | PMap f, Binary (Append, q1, q2) -> + _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) + | Filter p, Binary (Append, q1, q2) -> + _optimize_binary Append (Unary (u, q1)) (Unary (u, q2)) + | Fold (f,acc), Unary (PMap f', cont) -> + _optimize_unary + (Fold ((fun acc x -> f acc (f' x)), acc)) + cont + | Reduce (safety, start, mix, stop), Unary (PMap f, cont) -> + _optimize_unary + (Reduce (safety, + (fun x -> start (f x)), + (fun x acc -> mix (f x) acc), + stop)) + cont + | Size, Unary (PMap _, cont) -> + _optimize_unary Size cont (* ignore the map! *) + | Size, Unary (Sort _, cont) -> + _optimize_unary Size cont + | _ -> Unary (u,q) + (* TODO: other cases *) +and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t + = fun b q1 q2 -> match b, q1, q2 with + | _ -> Binary (b, q1, q2) (* TODO *) + +(* apply a unary operator on a collection *) +let _do_unary : type a b. (a,b) unary -> a -> b += fun u c -> match u with + | PMap f -> Coll.map f c + | GeneralMap f -> f c + | Filter p -> Coll.filter p c + | Fold (f, acc) -> Coll.fold f acc c + | FoldMap (f, acc) -> PMap.fold f acc c + | Reduce (safety, start, mix, stop) -> + let acc = Coll.to_seq c + |> CCSequence.fold + (fun acc x -> match acc with + | None -> Some (start x) + | Some acc -> Some (mix x acc) + ) None + in + begin match acc, safety with + | Some x, Implicit -> stop x + | None, Implicit -> _exit_with_error "reduce: empty collection" + | Some x, Explicit -> `Ok (stop x) + | None, Explicit -> `Error "reduce: empty collection" + end + | Size -> Coll.size c + | Choose Implicit -> Coll.choose_exn c + | Choose Explicit -> Coll.choose_err c + | FilterMap f -> Coll.filter_map f c + | FlatMap f -> Coll.flat_map f c + | Take n -> Coll.take n c + | TakeWhile p -> Coll.take_while p c + | Sort cmp -> Coll.sort cmp c + | Distinct cmp -> Coll.distinct ~cmp c + | Search obj -> Coll.search obj c + | Get (Implicit, k) -> PMap.get_exn c k + | Get (Explicit, k) -> PMap.get_err c k + | GroupBy (build,f) -> + Coll.to_seq c + |> CCSequence.map (fun x -> f x, x) + |> PMap.multimap_of_seq ~build:(PMap.make ~build ()) + | Contains (eq, x) -> Coll.contains ~eq x c + | Count build -> + Coll.to_seq c + |> PMap.count_of_seq ~build:(PMap.make ~build ()) + | Lazy -> Lazy.force c + +let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c += fun b c1 c2 -> match b with + | Join join -> Coll.do_join ~join c1 c2 + | GroupJoin gjoin -> Coll.do_group_join ~gjoin c1 c2 + | Product -> Coll.do_product c1 c2 + | Append -> + Coll.of_seq (CCSequence.append (Coll.to_seq c1) (Coll.to_seq c2)) + | SetOp (Inter,build) -> Coll.do_inter ~build c1 c2 + | SetOp (Union,build) -> Coll.do_union ~build c1 c2 + | SetOp (Diff,build) -> Coll.do_diff ~build c1 c2 + +let rec _run : type a. opt:bool -> a t -> a += fun ~opt q -> match q with + | Start c -> c + | Catch q' -> + begin match _run ~opt q' with + | `Ok x -> x + | `Error s -> _exit_with_error s + end + | Unary (u, q') -> _do_unary u (_run ~opt q') + | Binary (b, q1, q2) -> _do_binary b (_run ~opt q1) (_run ~opt q2) + | QueryMap (f, q') -> f (_run ~opt q') + | Bind (f, q') -> + let x = _run ~opt q' in + let q'' = f x in + let q'' = if opt then _optimize q'' else q'' in + _run ~opt q'' + +(* safe execution *) +let run q = + try `Ok (_run ~opt:true (_optimize q)) + with + | ExitWithError s -> `Error s + | e -> `Error (Printexc.to_string e) + +let run_exn q = + match run q with + | `Ok x -> x + | `Error s -> failwith s + +let run_no_optim q = + try `Ok (_run ~opt:false q) + with + | ExitWithError s -> `Error s + | e -> `Error (Printexc.to_string e) + +(** {6 Basics on Collections} *) + +let map f q = Unary (PMap f, q) + +let filter p q = Unary (Filter p, q) + +let choose q = Unary (Choose Implicit, q) + +let choose_err q = Unary (Choose Explicit, q) + +let filter_map f q = Unary (FilterMap f, q) + +let flat_map f q = Unary (FlatMap f, q) + +let flat_map_seq f q = + let f' x = Coll.of_seq (f x) in + Unary (FlatMap f', q) + +let flat_map_l f q = + let f' x = Coll.of_list (f x) in + Unary (FlatMap f', q) + +let flatten q = Unary (FlatMap (fun x->x), q) + +let flatten_l q = Unary (FlatMap Coll.of_list, q) + +let take n q = Unary (Take n, q) + +let take_while p q = Unary (TakeWhile p, q) + +let sort ?(cmp=Pervasives.compare) () q = Unary (Sort cmp, q) + +let distinct ?(cmp=Pervasives.compare) () q = + Unary (Distinct cmp, q) + +(* choose a build method from the optional arguments *) +let _make_build ?cmp ?eq ?hash () = + let _maybe default o = match o with + | Some x -> x + | None -> default + in + match eq, hash with + | Some _, _ + | _, Some _ -> + PMap.FromHash ( _maybe (=) eq, _maybe Hashtbl.hash hash) + | _ -> + match cmp with + | Some f -> PMap.FromCmp f + | _ -> PMap.Default + +(** {6 Queries on PMaps} *) + +module M = struct + let get key q = + Unary (Get (Implicit, key), q) + + let get_err key q = + Unary (Get (Explicit, key), q) + + let iter q = + Unary (GeneralMap (fun m -> Coll.of_seq m.PMap.to_seq), q) + + let flatten q = + let f m = m.PMap.to_seq + |> CCSequence.flatMap + (fun (k,v) -> Coll.to_seq v |> CCSequence.map (fun v' -> k,v')) + |> Coll.of_seq + in + Unary (GeneralMap f, q) + + let flatten' q = + let f m = m.PMap.to_seq + |> CCSequence.flatMap + (fun (k,v) -> CCSequence.of_list v |> CCSequence.map (fun v' -> k,v')) + |> Coll.of_seq + in + Unary (GeneralMap f, q) + + let map f q = + Unary (GeneralMap (PMap.map f), q) + + let to_list q = + Unary (GeneralMap PMap.to_list, q) + + let reverse ?cmp ?eq ?hash () q = + let build = _make_build ?cmp ?eq ?hash () in + Unary (GeneralMap (PMap.reverse ~build), q) + + let reverse_multimap ?cmp ?eq ?hash () q = + let build = _make_build ?cmp ?eq ?hash () in + Unary (GeneralMap (PMap.reverse_multimap ~build), q) + + let fold f acc q = + Unary (FoldMap (f, acc), q) + + let fold_multimap f acc q = + let f' acc x l = + List.fold_left (fun acc y -> f acc x y) acc l + in + Unary (FoldMap (f', acc), q) +end + +let group_by ?cmp ?eq ?hash f q = + Unary (GroupBy (_make_build ?cmp ?eq ?hash (),f), q) + +let group_by' ?cmp ?eq ?hash f q = + M.iter (group_by ?cmp f q) + +let count ?cmp ?eq ?hash () q = + Unary (Count (_make_build ?cmp ?eq ?hash ()), q) + +let count' ?cmp () q = + M.iter (count ?cmp () q) + +let fold f acc q = + Unary (Fold (f, acc), q) + +let size q = Unary (Size, q) + +let sum q = Unary (Fold ((+), 0), q) + +let reduce start mix stop q = + Unary (Reduce (Implicit, start,mix,stop), q) + +let reduce_err start mix stop q = + Unary (Reduce (Explicit, start,mix,stop), q) + +let _avg_start x = (x,1) +let _avg_mix x (y,n) = (x+y,n+1) +let _avg_stop (x,n) = x/n + +let _lift_some f x y = match y with + | None -> Some x + | Some y -> Some (f x y) + +let max q = Unary (Reduce (Implicit, _id, Pervasives.max, _id), q) +let min q = Unary (Reduce (Implicit, _id, Pervasives.min, _id), q) +let average q = Unary (Reduce (Implicit, _avg_start, _avg_mix, _avg_stop), q) + +let max_err q = Unary (Reduce (Explicit, _id, Pervasives.max, _id), q) +let min_err q = Unary (Reduce (Explicit, _id, Pervasives.min, _id), q) +let average_err q = Unary (Reduce (Explicit, _avg_start, _avg_mix, _avg_stop), q) + +let is_empty q = + Unary (Search (object + method check _ = SearchStop false (* stop in case there is an element *) + method failure = true + end), q) + +let contains ?(eq=(=)) x q = + Unary (Contains (eq, x), q) + +let for_all p q = + Unary (Search (object + method check x = if p x then SearchContinue else SearchStop false + method failure = true + end), q) + +let exists p q = + Unary (Search (object + method check x = if p x then SearchStop true else SearchContinue + method failure = false + end), q) + +let find p q = + Unary (Search (object + method check x = if p x then SearchStop (Some x) else SearchContinue + method failure = None + end), q) + +let find_map f q = + Unary (Search (object + method check x = match f x with + | Some y -> SearchStop (Some y) + | None -> SearchContinue + method failure = None + end), q) + +(** {6 Binary Operators} *) + +let join ?cmp ?eq ?hash join_key1 join_key2 ~merge q1 q2 = + let join_build = _make_build ?eq ?hash ?cmp () in + let j = { + join_key1; + join_key2; + join_merge=merge; + join_build; + } in + Binary (Join j, q1, q2) + +let group_join ?cmp ?eq ?hash gjoin_proj q1 q2 = + let gjoin_build = _make_build ?eq ?hash ?cmp () in + let j = { + gjoin_proj; + gjoin_build; + } in + Binary (GroupJoin j, q1, q2) + +let product q1 q2 = Binary (Product, q1, q2) + +let append q1 q2 = Binary (Append, q1, q2) + +let inter ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Inter, build), q1, q2) + +let union ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Union, build), q1, q2) + +let diff ?cmp ?eq ?hash () q1 q2 = + let build = _make_build ?cmp ?eq ?hash () in + Binary (SetOp (Diff, build), q1, q2) + +let fst q = map fst q +let snd q = map snd q + +let map1 f q = map (fun (x,y) -> f x, y) q +let map2 f q = map (fun (x,y) -> x, f y) q + +let flatten_opt q = filter_map _id q + +let opt_unwrap q = + QueryMap ((function + | Some x -> x + | None -> _exit_with_error "opt_unwrap"), q) + +let catch q = + QueryMap ((function + | `Ok x -> x + | `Error s -> _exit_with_error s), q) + +(** {6 Monadic stuff} *) + +let return x = Start x + +let bind f q = Bind (f,q) + +let (>>=) x f = Bind (f, x) + +let query_map f q = QueryMap (f, q) + +(** {6 Misc} *) + +let lazy_ q = Unary (Lazy, q) + +(** {6 Adapters} *) + +let to_array q = + QueryMap ((fun c -> Array.of_list (Coll.to_list c)), q) + +let to_seq q = + QueryMap ((fun c -> Coll.to_seq c |> CCSequence.persistent), q) + +let to_hashtbl q = + QueryMap ((fun c -> CCSequence.to_hashtbl (Coll.to_seq c)), q) + +let to_queue q = + QueryMap ((fun c q -> CCSequence.to_queue q (Coll.to_seq c)), q) + +let to_stack q = + QueryMap ((fun c s -> CCSequence.to_stack s (Coll.to_seq c)), q) + +module L = struct + let of_list l = Start (Coll.of_list l) + let to_list q = + QueryMap (Coll.to_list, q) + let run q = run (to_list q) + let run_exn q = run_exn (to_list q) +end + +module AdaptSet(S : Set.S) = struct + let of_set set = + return (Coll.of_seq (fun k -> S.iter k set)) + + let to_set q = + let f c = + Coll.to_seq c |> CCSequence.fold (fun set x -> S.add x set) S.empty + in + query_map f q + + let run q = run (to_set q) + let run_exn q = run_exn (to_set q) +end + +module AdaptMap(M : Map.S) = struct + let _to_seq m k = M.iter (fun x y -> k (x,y)) m + + let of_map map = + return (Coll.of_seq (_to_seq map)) + + let to_pmap m = { + PMap.get = (fun x -> try Some (M.find x m) with Not_found -> None); + PMap.size = (fun () -> M.cardinal m); + PMap.is_empty = (fun () -> M.is_empty m); + PMap.fold = (fun f acc -> M.fold (fun x y acc -> f acc x y) m acc); + PMap.to_seq = _to_seq m; + } + + let to_map q = + let f c = + Coll.to_seq c + |> CCSequence.fold (fun m (x,y) -> M.add x y m) M.empty + in + query_map f q + + let run q = run (q |> to_map) + let run_exn q = run_exn (q |> to_map) +end + +module IO = struct + let _slurp with_input = + let l = lazy ( + with_input + (fun ic -> + let buf_size = 256 in + let content = Buffer.create 120 + and buf = String.make buf_size 'a' in + let rec next () = + let num = input ic buf 0 buf_size in + if num = 0 + then Buffer.contents content (* EOF *) + else (Buffer.add_substring content buf 0 num; next ()) + in next () + ) + ) in + lazy_ (return l) + + let slurp ic = _slurp (fun f -> f ic) + + let _with_file_in filename f = + try + let ic = open_in filename in + try + let x = f ic in + close_in ic; + x + with e -> + close_in ic; + _exit_with_error (Printexc.to_string e) + with e -> + _exit_with_error (Printexc.to_string e) + + let _with_file_out filename f = + try + let oc = open_out filename in + try + let x = f oc in + close_out oc; + x + with e -> + close_out oc; + _exit_with_error (Printexc.to_string e) + with e -> + _exit_with_error (Printexc.to_string e) + + let slurp_file filename = _slurp (_with_file_in filename) + + (* find [c] in [s], starting at offset [i] *) + let rec _find s c i = + if i >= String.length s then None + else if s.[i] = c then Some i + else _find s c (i+1) + + let rec _lines s i k = match _find s '\n' i with + | None -> + if i + let s' = String.sub s i (j-i) in + k s'; + _lines s (j+1) k + + let lines q = + (* sequence of lines *) + let f s = _lines s 0 |> Coll.of_seq in + query_map f q + + let lines' q = + let f s = lazy (_lines s 0 |> CCSequence.to_list) in + lazy_ (query_map f q) + + let _join ~sep ?(stop="") l = + let buf = Buffer.create 128 in + Coll.to_seq l + |> CCSequence.iteri + (fun i x -> + if i>0 then Buffer.add_string buf sep; + Buffer.add_string buf x); + Buffer.add_string buf stop; + Buffer.contents buf + + let unlines q = + let f l = lazy (_join ~sep:"\n" ~stop:"\n" l) in + lazy_ (query_map f q) + + let join sep q = + let f l = lazy (_join ~sep l) in + lazy_ (query_map f q) + + let out oc q = + run_exn q |> output_string oc + + let out_lines oc q = + run_exn q + |> Coll.to_seq + |> CCSequence.iter (fun l -> output_string oc l; output_char oc '\n') + + let to_file_exn filename q = + _with_file_out filename (fun oc -> out oc q) + + let to_file filename q = + try `Ok (_with_file_out filename (fun oc -> out oc q)) + with Failure s -> `Error s + + let to_file_lines_exn filename q = + _with_file_out filename (fun oc -> out_lines oc q) + + let to_file_lines filename q = + try `Ok (_with_file_out filename (fun oc -> out_lines oc q)) + with Failure s -> `Error s +end diff --git a/core/CCLinq.mli b/core/CCLinq.mli new file mode 100644 index 00000000..3195427b --- /dev/null +++ b/core/CCLinq.mli @@ -0,0 +1,425 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 LINQ-like operations on collections} + +The purpose it to provide powerful combinators to express iteration, +transformation and combination of collections of items. This module depends +on several other modules, including {!CCList} and {!CCSequence}. + +Functions and operations are assumed to be referentially transparent, i.e. +they should not rely on external side effects, they should not rely on +the order of execution. + +{[ + +CCLinq.( + of_list [1;2;3] + |> flat_map_l (fun x -> CCList.(x -- (x+10))) + |> sort () + |> count () + |> M.to_list + |> run_exn +);; +- : (int * int) list = [(13, 1); (12, 2); (11, 3); (10, 3); (9, 3); + (8, 3); (7, 3); (6, 3); (5, 3); (4, 3); (3, 3); (2, 2); (1, 1)] + + +CCLinq.( + IO.slurp_file "/tmp/foo" + |> IO.lines + |> sort () + |> IO.to_file_lines "/tmp/bar" +);; +- : `Ok () +]} + +*) + +type 'a sequence = ('a -> unit) -> unit +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a hash = 'a -> int +type 'a with_err = [`Ok of 'a | `Error of string ] + +type 'a collection +(** Abstract type of collections of objects of type 'a. Those cannot + be used directly, they are to be processed using a query (type {!'a t}) + and converted to some list/sequence/array *) + +(** {2 Polymorphic Maps} *) +module PMap : sig + type ('a, 'b) t + + val get : ('a,'b) t -> 'a -> 'b option + + val get_exn : ('a,'b) t -> 'a -> 'b + (** Unsafe version of {!get}. + @raise Not_found if the element is not present *) + + val size : (_,_) t -> int + + val to_seq : ('a, 'b) t -> ('a * 'b) sequence + + val to_list : ('a, 'b) t -> ('a * 'b) list + + val to_coll : ('a, 'b) t -> ('a * 'b) collection +end + +(** {2 Query operators} *) + +type 'a t +(** Type of a query that returns some value of type 'a *) + +(** {6 Initial values} *) + +val start : 'a -> 'a t +(** Start with a single value *) + +val of_list : 'a list -> 'a collection t +(** Query that just returns the elements of the list *) + +val of_array : 'a array -> 'a collection t +val of_array_i : 'a array -> (int * 'a) collection t + +val of_hashtbl : ('a,'b) Hashtbl.t -> ('a * 'b) collection t + +val of_seq : 'a sequence -> 'a collection t +(** Query that returns the elements of the given sequence. *) + +val of_queue : 'a Queue.t -> 'a collection t + +val of_stack : 'a Stack.t -> 'a collection t + +val of_string : string -> char collection t +(** Traverse the characters of the string *) + +(** {6 Execution} *) + +val run : 'a t -> 'a with_err +(** Execute the query, possibly returning an error if things go wrong *) + +val run_exn : 'a t -> 'a +(** Execute the query, ignoring errors. Can raise an exception + if some execution step does. + @raise Failure if the query fails (or returns [`Error s]) *) + +val run_no_optim : 'a t -> 'a with_err +(** Run without any optimization *) + +(** {6 Basics on Collections} *) + +val map : ('a -> 'b) -> 'a collection t -> 'b collection t + +val filter : ('a -> bool) -> 'a collection t -> 'a collection t + +val size : _ collection t -> int t + +val choose : 'a collection t -> 'a t +(** Choose one element (if any) in the collection. Fails + if the collections is empty *) + +val choose_err : 'a collection t -> 'a with_err t +(** Choose one element or fail explicitely *) + +val filter_map : ('a -> 'b option) -> 'a collection t -> 'b collection t +(** Filter and map elements at once *) + +val flat_map : ('a -> 'b collection) -> 'a collection t -> 'b collection t +(** Monadic "bind", maps each element to a collection + and flatten the result *) + +val flat_map_seq : ('a -> 'b sequence) -> 'a collection t -> 'b collection t +(** Same as {!flat_map} but using sequences *) + +val flat_map_l : ('a -> 'b list) -> 'a collection t -> 'b collection t + +val flatten : 'a collection collection t -> 'a collection t + +val flatten_l : 'a list collection t -> 'a collection t + +val take : int -> 'a collection t -> 'a collection t +(** take at most [n] elements *) + +val take_while : ('a -> bool) -> 'a collection t -> 'a collection t +(** take elements while they satisfy a predicate *) + +val sort : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t +(** Sort items by the given comparison function *) + +val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t +(** Remove duplicate elements from the input collection. + All elements in the result are distinct. *) + +(** {6 Queries on Maps} *) + +module M : sig + val get : 'a -> ('a, 'b) PMap.t t -> 'b t + (** Select a key from a map *) + + val get_err : 'a -> ('a, 'b) PMap.t t -> 'b with_err t + (** Explicit version of {!get}, with [`Error] if the key is not present *) + + val iter : ('a,'b) PMap.t t -> ('a*'b) collection t + (** View a multimap as a proper collection *) + + val flatten : ('a,'b collection) PMap.t t -> ('a*'b) collection t + (** View a multimap as a collection of individual key/value pairs *) + + val flatten' : ('a,'b list) PMap.t t -> ('a*'b) collection t + (** View a multimap as a collection of individual key/value pairs *) + + val map : ('b -> 'c) -> ('a, 'b) PMap.t t -> ('a, 'c) PMap.t t + (** Transform values *) + + val to_list : ('a,'b) PMap.t t -> ('a*'b) list t + + val reverse : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> unit -> + ('a,'b) PMap.t t -> ('b,'a list) PMap.t t + (** Reverse relation of the map, as a multimap *) + + val reverse_multimap : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> unit -> + ('a,'b list) PMap.t t -> ('b,'a list) PMap.t t + (** Reverse relation of the multimap *) + + val fold : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> ('a,'b) PMap.t t -> 'acc t + (** Fold on the items of the map *) + + val fold_multimap : ('acc -> 'a -> 'b -> 'acc) -> 'acc -> + ('a,'b list) PMap.t t -> 'acc t + (** Fold on the items of the multimap *) +end + +(** {6 Aggregation} *) + +val group_by : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> + ('a -> 'b) -> 'a collection t -> ('b,'a list) PMap.t t +(** [group_by f] takes a collection [c] as input, and returns + a multimap [m] such that for each [x] in [c], + [x] occurs in [m] under the key [f x]. In other words, [f] is used + to obtain a key from [x], and [x] is added to the multimap using this key. *) + +val group_by' : ?cmp:'b ord -> ?eq:'b equal -> ?hash:'b hash -> + ('a -> 'b) -> 'a collection t -> ('b * 'a list) collection t + +val count : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> + unit -> 'a collection t -> ('a, int) PMap.t t +(** [count c] returns a map from elements of [c] to the number + of time those elements occur. *) + +val count' : ?cmp:'a ord -> unit -> 'a collection t -> ('a * int) collection t + +val fold : ('b -> 'a -> 'b) -> 'b -> 'a collection t -> 'b t +(** Fold over the collection *) + +val size : _ collection t -> int t +(** Count how many elements the collection contains *) + +val reduce : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> + 'a collection t -> 'c t +(** [reduce start mix stop q] uses [start] on the first element of [q], + and combine the result with following elements using [mix]. The final + value is transformed using [stop]. *) + +val reduce_err : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) -> + 'a collection t -> 'c with_err t +(** Same as {!reduce} but fails explicitely on empty collections. *) + +val is_empty : 'a collection t -> bool t + +val sum : int collection t -> int t + +val contains : ?eq:'a equal -> 'a -> 'a collection t -> bool t + +val average : int collection t -> int t +val max : int collection t -> int t +val min : int collection t -> int t + +val average_err : int collection t -> int with_err t +val max_err : int collection t -> int with_err t +val min_err : int collection t -> int with_err t + +val for_all : ('a -> bool) -> 'a collection t -> bool t +val exists : ('a -> bool) -> 'a collection t -> bool t +val find : ('a -> bool) -> 'a collection t -> 'a option t +val find_map : ('a -> 'b option) -> 'a collection t -> 'b option t + +(** {6 Binary Operators} *) + +val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a -> 'b -> 'c option) -> + 'a collection t -> 'b collection t -> 'c collection t +(** [join key1 key2 ~merge] is a binary operation + that takes two collections [a] and [b], projects their + elements resp. with [key1] and [key2], and combine + values [(x,y)] from [(a,b)] with the same [key] + using [merge]. If [merge] returns [None], the combination + of values is discarded. *) + +val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> + ('b -> 'a) -> 'a collection t -> 'b collection t -> + ('a, 'b list) PMap.t t +(** [group_join key2] associates to every element [x] of + the first collection, all the elements [y] of the second + collection such that [eq x (key y)] *) + +val product : 'a collection t -> 'b collection t -> ('a * 'b) collection t +(** Cartesian product *) + +val append : 'a collection t -> 'a collection t -> 'a collection t +(** Append two collections together *) + +val inter : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Intersection of two collections. Each element will occur at most once + in the result *) + +val union : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Union of two collections. Each element will occur at most once + in the result *) + +val diff : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash -> unit -> + 'a collection t -> 'a collection t -> 'a collection t +(** Set difference *) + +(** {6 Tuple and Options} *) + +(** Specialized projection operators *) + +val fst : ('a * 'b) collection t -> 'a collection t + +val snd : ('a * 'b) collection t -> 'b collection t + +val map1 : ('a -> 'b) -> ('a * 'c) collection t -> ('b * 'c) collection t + +val map2 : ('a -> 'b) -> ('c * 'a) collection t -> ('c * 'b) collection t + +val flatten_opt : 'a option collection t -> 'a collection t +(** Flatten the collection by removing options *) + +val opt_unwrap : 'a option t -> 'a t +(** unwrap an option type. Fails if the option value is [None] *) + +(** {6 Monad} + +Careful, those operators do not allow any optimization before running the +query, they might therefore be pretty slow. *) + +val bind : ('a -> 'b t) -> 'a t -> 'b t +(** Use the result of a query to build another query and imediately run it. *) + +val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +(** Infix version of {!bind} *) + +val return : 'a -> 'a t +(** Synonym to {!start} *) + +val query_map : ('a -> 'b) -> 'a t -> 'b t +(** PMap results directly, rather than collections of elements *) + +(** {6 Misc} *) + +val catch : 'a with_err t -> 'a t +(** Catch errors within the execution itself. In other words, [run (catch q)] + with succeed with [x] if [q] succeeds with [`Ok x], and fail if [q] + succeeds with [`Error s] or if [q] fails *) + +val lazy_ : 'a lazy_t t -> 'a t + +(** {6 Adapters} *) + +val to_array : 'a collection t -> 'a array t +(** Build an array of results *) + +val to_seq : 'a collection t -> 'a sequence t +(** Build a (re-usable) sequence of elements, which can then be + converted into other structures *) + +val to_hashtbl : ('a * 'b) collection t -> ('a, 'b) Hashtbl.t t +(** Build a hashtable from the collection *) + +val to_queue : 'a collection t -> ('a Queue.t -> unit) t + +val to_stack : 'a collection t -> ('a Stack.t -> unit) t + +module L : sig + val of_list : 'a list -> 'a collection t + val to_list : 'a collection t -> 'a list t + val run : 'a collection t -> 'a list with_err + val run_exn : 'a collection t -> 'a list +end + +module AdaptSet(S : Set.S) : sig + val of_set : S.t -> S.elt collection t + val to_set : S.elt collection t -> S.t t + val run : S.elt collection t -> S.t with_err + val run_exn : S.elt collection t -> S.t +end + +module AdaptMap(M : Map.S) : sig + val of_map : 'a M.t -> (M.key * 'a) collection t + val to_pmap : 'a M.t -> (M.key, 'a) PMap.t + val to_map : (M.key * 'a) collection t -> 'a M.t t + val run : (M.key * 'a) collection t -> 'a M.t with_err + val run_exn : (M.key * 'a) collection t -> 'a M.t +end + +module IO : sig + val slurp : in_channel -> string t + (** Slurp the whole channel in (blocking), returning the + corresponding string. The channel will be read at most once + during execution, and its content cached; however the channel + might never get read because evaluation is lazy. *) + + val slurp_file : string -> string t + (** Read a whole file (given by name) and return its content as + a string *) + + val lines : string t -> string collection t + (** Convert a string into a collection of lines *) + + val lines' : string t -> string list t + (** Convert a string into a list of lines *) + + val join : string -> string collection t -> string t + + val unlines : string collection t -> string t + (** Join lines together *) + + val out : out_channel -> string t -> unit + val out_lines : out_channel -> string collection t -> unit + (** Evaluate the query and print it line by line on the output *) + + (** {8 Run methods} *) + + val to_file : string -> string t -> unit with_err + val to_file_exn : string -> string t -> unit + + val to_file_lines : string -> string collection t -> unit with_err + val to_file_lines_exn : string -> string collection t -> unit +end diff --git a/core/CCList.ml b/core/CCList.ml index 0d446ed8..34d04392 100644 --- a/core/CCList.ml +++ b/core/CCList.ml @@ -28,6 +28,8 @@ of this software, even if advised of the possibility of such damage. type 'a t = 'a list +let empty = [] + (* max depth for direct recursion *) let _direct_depth = 500 @@ -61,6 +63,44 @@ let append l1 l2 = let (@) = append +let filter p l = + let rec direct i p l = match l with + | [] -> [] + | _ when i=0 -> safe p l [] + | x::l' when not (p x) -> direct i p l' + | x::l' -> x :: direct (i-1) p l' + and safe p l acc = match l with + | [] -> List.rev acc + | x::l' when not (p x) -> safe p l' acc + | x::l' -> safe p l' (x::acc) + in + direct _direct_depth p l + +let fold_right f l acc = + let rec direct i f l acc = match l with + | [] -> acc + | _ when i=0 -> safe f (List.rev l) acc + | x::l' -> + let acc = direct (i-1) f l' acc in + f x acc + and safe f l acc = match l with + | [] -> acc + | x::l' -> + let acc = f x acc in + safe f l' acc + in + direct _direct_depth f l acc + +(*$T + fold_right (+) (1 -- 1_000_000) 0 = \ + List.fold_left (+) 0 (1 -- 1_000_000) +*) + +(*$Q + (Q.list Q.small_int) (fun l -> \ + l = fold_right (fun x y->x::y) l []) +*) + let rec compare f l1 l2 = match l1, l2 with | [], [] -> 0 | _, [] -> 1 @@ -384,11 +424,59 @@ module Assoc = struct *) end +(** {2 Zipper} *) + +module Zipper = struct + type 'a t = 'a list * 'a list + + let empty = [], [] + + let is_empty = function + | _, [] -> true + | _, _::_ -> false + + let to_list (l,r) = + let rec append l acc = match l with + | [] -> acc + | x::l' -> append l' (x::acc) + in append l r + + let make l = [], l + + let left = function + | x::l, r -> l, x::r + | [], r -> [], r + + let right = function + | l, x::r -> x::l, r + | l, [] -> l, [] + + let modify f z = match z with + | l, [] -> + begin match f None with + | None -> z + | Some x -> l, [x] + end + | l, x::r -> + begin match f (Some x) with + | None -> l,r + | Some x' -> l, x::r + end + + let focused = function + | _, x::_ -> Some x + | _, [] -> None + + let focused_exn = function + | _, x::_ -> x + | _, [] -> raise Not_found +end + (** {2 Conversions} *) type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit @@ -422,17 +510,17 @@ let to_klist l = let rec make l () = match l with | [] -> `Nil | x::l' -> `Cons (x, make l') - in make l () + in make l let of_klist l = let rec direct i g = if i = 0 then safe [] g - else match l with + else match l () with | `Nil -> [] - | `Cons (x,l') -> x :: direct (i-1) (l' ()) - and safe acc l = match l with + | `Cons (x,l') -> x :: direct (i-1) l' + and safe acc l = match l () with | `Nil -> List.rev acc - | `Cons (x,l') -> safe (x::acc) (l' ()) + | `Cons (x,l') -> safe (x::acc) l' in direct _direct_depth l diff --git a/core/CCList.mli b/core/CCList.mli index 1691ed75..54338a68 100644 --- a/core/CCList.mli +++ b/core/CCList.mli @@ -28,6 +28,8 @@ of this software, even if advised of the possibility of such damage. type 'a t = 'a list +val empty : 'a t + val map : ('a -> 'b) -> 'a t -> 'b t (** Safe version of map *) @@ -36,6 +38,12 @@ val append : 'a t -> 'a t -> 'a t val (@) : 'a t -> 'a t -> 'a t +val filter : ('a -> bool) -> 'a t -> 'a t +(** Safe version of {!List.filter} *) + +val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(** Safe version of [fold_right] *) + val compare : ('a -> 'a -> int) -> 'a t -> 'a t -> int val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool @@ -167,11 +175,47 @@ module Assoc : sig (** Add the binding into the list (erase it if already present) *) end +(** {2 Zipper} *) + +module Zipper : sig + type 'a t = 'a list * 'a list + + val empty : 'a t + (** Empty zipper *) + + val is_empty : _ t -> bool + (** Empty zipper, or at the end of the zipper? *) + + val to_list : 'a t -> 'a list + (** Convert the zipper back to a list *) + + val make : 'a list -> 'a t + (** Create a zipper pointing at the first element of the list *) + + val left : 'a t -> 'a t + (** Go to the left, or do nothing if the zipper is already at leftmost pos *) + + val right : 'a t -> 'a t + (** Go to the right, or do nothing if the zipper is already at rightmost pos *) + + val modify : ('a option -> 'a option) -> 'a t -> 'a t + (** Modify the current element, if any, by returning a new element, or + returning [None] if the element is to be deleted *) + + val focused : 'a t -> 'a option + (** Returns the focused element, if any. [focused zip = Some _] iff + [empty zip = false] *) + + val focused_exn : 'a t -> 'a + (** Returns the focused element, or + @raise Not_found if the zipper is at an end *) +end + (** {2 Conversions} *) type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option -type 'a klist = [`Nil | `Cons of 'a * (unit -> 'a klist)] +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a printer = Buffer.t -> 'a -> unit type 'a formatter = Format.formatter -> 'a -> unit diff --git a/core/CCOpt.ml b/core/CCOpt.ml index 7a979b62..3c8f0b50 100644 --- a/core/CCOpt.ml +++ b/core/CCOpt.ml @@ -54,10 +54,16 @@ let equal f o1 o2 = match o1, o2 with let return x = Some x +let (>|=) x f = map f x + let (>>=) o f = match o with | None -> None | Some x -> f x +let flat_map f o = match o with + | None -> None + | Some x -> f x + let (<*>) f x = match f, x with | None, _ | _, None -> None @@ -70,10 +76,22 @@ let map2 f o1 o2 = match o1, o2 with | _, None -> None | Some x, Some y -> Some (f x y) +let iter f o = match o with + | None -> () + | Some x -> f x + +let fold f acc o = match o with + | None -> acc + | Some x -> f acc x + let to_list o = match o with | None -> [] | Some x -> [x] +let of_list = function + | x::_ -> Some x + | [] -> None + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a printer = Buffer.t -> 'a -> unit diff --git a/core/CCOpt.mli b/core/CCOpt.mli index 16d96f3b..7c6e4b6d 100644 --- a/core/CCOpt.mli +++ b/core/CCOpt.mli @@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. type 'a t = 'a option val map : ('a -> 'b) -> 'a t -> 'b t +(** Transform the element inside, if any *) val maybe : ('a -> 'b) -> 'b -> 'a t -> 'b (** [maybe f x o] is [x] if [o] is [None], otherwise it's [f y] if [o = Some y] *) @@ -42,19 +43,34 @@ val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool val return : 'a -> 'a t (** Monadic return *) +val (>|=) : 'a t -> ('a -> 'b) -> 'b t +(** Infix version of {!map} *) + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t (** Monadic bind *) +val flat_map : ('a -> 'b t) -> 'a t -> 'b t +(** Flip version of {!>>=} *) + val (<*>) : ('a -> 'b) t -> 'a t -> 'b t val (<$>) : ('a -> 'b) -> 'a t -> 'b t val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t +val iter : ('a -> unit) -> 'a t -> unit +(** Iterate on 0 or 1 elements *) + +val fold : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +(** Fold on 0 or 1 elements *) + (** {2 Conversion and IO} *) val to_list : 'a t -> 'a list +val of_list : 'a list -> 'a t +(** Head of list, or [None] *) + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option type 'a printer = Buffer.t -> 'a -> unit diff --git a/core/CCOrd.ml b/core/CCOrd.ml new file mode 100644 index 00000000..8474e12e --- /dev/null +++ b/core/CCOrd.ml @@ -0,0 +1,91 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Comparisons} *) + +type 'a t = 'a -> 'a -> int +(** Comparison (total ordering) between two elements, that returns an int *) + +let compare = Pervasives.compare + +let opp f x y = - (f x y) + +let equiv i j = + if i<0 then j<0 + else if i>0 then j>0 + else j=0 + +let int_ (x:int) y = Pervasives.compare x y +let string_ (x:string) y = Pervasives.compare x y +let bool_ (x:bool) y = Pervasives.compare x y +let float_ (x:float) y = Pervasives.compare x y + +(** {2 Lexicographic Combination} *) + +let () c (ord,x,y) = + if c = 0 + then ord x y + else c + +let pair o_x o_y (x1,y1) (x2,y2) = + let c = o_x x1 x2 in + if c = 0 + then o_y y1 y2 + else c + +let triple o_x o_y o_z (x1,y1,z1) (x2,y2,z2) = + let c = o_x x1 x2 in + if c = 0 + then + let c' = o_y y1 y2 in + if c' = 0 + then o_z z1 z2 + else c' + else c + +let rec list_ ord l1 l2 = match l1, l2 with + | [], [] -> 0 + | [], _ -> -1 + | _, [] -> 1 + | x1::l1', x2::l2' -> + let c = ord x1 x2 in + if c = 0 + then list_ ord l1' l2' + else c + +let array_ ord a1 a2 = + let rec aux i = + if i = Array.length a1 + then if Array.length a1 = Array.length a2 then 0 + else -1 + else if i = Array.length a2 + then 1 + else + let c = ord a1.(i) a2.(i) in + if c = 0 + then aux (i+1) else c + in + aux 0 diff --git a/core/CCOrd.mli b/core/CCOrd.mli new file mode 100644 index 00000000..90e929d8 --- /dev/null +++ b/core/CCOrd.mli @@ -0,0 +1,66 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Comparisons} *) + +type 'a t = 'a -> 'a -> int +(** Comparison (total ordering) between two elements, that returns an int *) + +val compare : 'a t +(** Polymorphic "magic" comparison *) + +val opp : 'a t -> 'a t +(** Opposite order *) + +val equiv : int -> int -> bool +(** Returns [true] iff the two comparison results are the same *) + +val int_ : int t +val string_ : string t +val bool_ : bool t +val float_ : float t + +(** {2 Lexicographic Combination} *) + +val () : int -> ('a t * 'a * 'a) -> int +(** [c1 @@? (ord, x, y)] returns the same as [c1] if [c1] is not [0]; + otherwise it uses [ord] to compare the two values [x] and [y], + of type ['a]. + + Example: + {[CCInt.compare 1 3 + (String.compare, "a", "b") + (CCBool.compare, true, false)]} +*) + +val pair : 'a t -> 'b t -> ('a * 'b) t + +val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t + +val list_ : 'a t -> 'a list t +(** Lexicographic combination on lists *) + +val array_ : 'a t -> 'a array t diff --git a/core/CCVector.ml b/core/CCVector.ml index 0ea564e7..d4809056 100644 --- a/core/CCVector.ml +++ b/core/CCVector.ml @@ -25,30 +25,92 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) +type rw = [`RW] +type ro = [`RO] + +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit + (** a vector of 'a. *) -type 'a t = { +type ('a,'mut) t = { mutable size : int; mutable vec : 'a array; } -let create i = - let i = max i 3 in - { size = 0; - vec = Array.create i (Obj.magic None); - } +type 'a vector = ('a, rw) t -let resize v newcapacity = +let freeze v = { + size=v.size; + vec=v.vec; +} + +let freeze_copy v = { + size=v.size; + vec=Array.sub v.vec 0 v.size; +} + +let create () = { + size = 0; + vec = [| |]; +} + +let create_with ?(capacity=128) x = { + size = 0; + vec = Array.make capacity x; +} + +(*$T + (create_with ~capacity:200 1 |> capacity) >= 200 +*) + +let make n x = { + size=n; + vec=Array.make n x; +} + +let init n f = { + size=n; + vec=Array.init n f; +} + +(* is the underlying empty? *) +let _empty_array v = + Array.length v.vec = 0 + +(* assuming the underlying array isn't empty, resize it *) +let _resize v newcapacity = assert (newcapacity >= v.size); - let new_vec = Array.create newcapacity (Obj.magic None) in + assert (not (_empty_array v)); + let new_vec = Array.create newcapacity v.vec.(0) in Array.blit v.vec 0 new_vec 0 v.size; v.vec <- new_vec; () +(*$T + (let v = create_with ~capacity:10 1 in ensure v 200; capacity v >= 200) +*) + +(* grow the array, using [x] as a filler if required *) +let _grow v x = + if _empty_array v + then v.vec <- Array.make 32 x + else + let n = Array.length v.vec in + let size = min (n + n/2 + 10) Sys.max_array_length in + _resize v size + let ensure v size = - if v.size < size + if Array.length v.vec = 0 + then () + else if v.size < size then - let size' = min (2 * v.size) Sys.max_array_length in - resize v size' + let size' = min size Sys.max_array_length in + _resize v size' let clear v = v.size <- 0 @@ -56,59 +118,111 @@ let clear v = let is_empty v = v.size = 0 let push v x = - (if v.size = Array.length v.vec then resize v (2 * v.size)); + if v.size = Array.length v.vec + then _grow v x; Array.unsafe_set v.vec v.size x; v.size <- v.size + 1 (** add all elements of b to a *) let append a b = - (if Array.length a.vec < a.size + b.size - then resize a (2 * (a.size + b.size))); - Array.blit b.vec 0 a.vec a.size b.size; - a.size <- a.size + b.size + if _empty_array a + then if _empty_array b + then () + else ( + a.vec <- Array.copy b.vec; + a.size <- b.size + ) + else ( + ensure a (a.size + b.size); + assert (Array.length a.vec >= a.size + b.size); + Array.blit b.vec 0 a.vec a.size b.size; + a.size <- a.size + b.size + ) -let append_array a b = - (if Array.length a.vec < a.size + Array.length b - then resize a (2 * (a.size + Array.length b))); - Array.blit b 0 a.vec a.size (Array.length b); - a.size <- a.size + Array.length b +let get v i = + if i < 0 || i >= v.size then failwith "Vector.get"; + Array.unsafe_get v.vec i + +let set v i x = + if i < 0 || i >= v.size then failwith "Vector.set"; + Array.unsafe_set v.vec i x + +let remove v i = + if i < 0 || i >= v.size then failwith "Vector.remove"; + (* if v.(i) not the last element, then put last element at index i *) + if i < v.size - 1 + then v.vec.(i) <- v.vec.(v.size - 1); + (* remove one element *) + v.size <- v.size - 1 let append_seq a seq = seq (fun x -> push a x) -let pop v = - (if v.size = 0 then failwith "Vector.pop on empty vector"); +let append_array a b = + Array.iter (push a) b + +let equal eq v1 v2 = + let n = min v1.size v2.size in + let rec check i = + if i = n + then v1.size = v2.size + else eq (get v1 i) (get v2 i) && check (i+1) + in check 0 + +let compare cmp v1 v2 = + let n = min v1.size v2.size in + let rec check i = + if i = n + then Pervasives.compare v1.size v2.size + else + let c = cmp (get v1 i) (get v2 i) in + if c = 0 then check (i+1) else c + in check 0 + +let pop_exn v = + if v.size = 0 + then failwith "Vector.pop on empty vector"; v.size <- v.size - 1; let x = v.vec.(v.size) in x -let copy v = - let v' = create v.size in - Array.blit v.vec 0 v'.vec 0 v.size; - v'.size <- v.size; - v' +let pop v = + try Some (pop_exn v) + with Failure _ -> None + +let copy v = { + size = v.size; + vec = Array.sub v.vec 0 v.size; +} + +(*$T + (let v = of_list [1;2;3] in let v' = copy v in \ + to_list v' = [1;2;3]) + create () |> copy |> is_empty +*) let shrink v n = - if n > v.size - then failwith "cannot shrink to bigger size" - else v.size <- n + if n < v.size then v.size <- n -let member ?(eq=(=)) v x = - let n = v.size in - let rec check i = - if i = n then false - else if eq x v.vec.(i) then true - else check (i+1) - in check 0 - -let sort ?(cmp=compare) v = - (* copy array (to avoid junk in it), then sort the array *) - let a = Array.sub v.vec 0 v.size in +let sort' cmp v = + (* possibly copy array (to avoid junk at its end), then sort the array *) + let a = + if Array.length v.vec = v.size then v.vec + else Array.sub v.vec 0 v.size + in Array.fast_sort cmp a; v.vec <- a -let uniq_sort ?(cmp=compare) v = - sort ~cmp v; +let sort cmp v = + let v' = { + size=v.size; + vec=Array.sub v.vec 0 v.size; + } in + Array.sort cmp v'.vec; + v' + +let uniq_sort cmp v = + sort' cmp v; let n = v.size in (* traverse to remove duplicates. i= current index, j=current append index, j<=i. new_size is the size @@ -116,64 +230,97 @@ let uniq_sort ?(cmp=compare) v = let rec traverse prev i j = if i >= n then () (* done traversing *) else if cmp prev v.vec.(i) = 0 - then (v.size <- v.size - 1; traverse prev (i+1) j) (* duplicate, remove it *) - else (v.vec.(j) <- v.vec.(i); traverse v.vec.(i) (i+1) (j+1)) (* keep it *) + then ( + v.size <- v.size - 1; + traverse prev (i+1) j + ) (* duplicate, remove it *) + else ( + v.vec.(j) <- v.vec.(i); + traverse v.vec.(i) (i+1) (j+1) + ) (* keep it *) in if v.size > 0 - then traverse v.vec.(0) 1 1 (* start at 1, to get the first element in hand *) + then traverse v.vec.(0) 1 1 + (* start at 1, to get the first element in hand *) -let iter v k = +let iter k v = for i = 0 to v.size -1 do k (Array.unsafe_get v.vec i) done -let iteri v k = +let iteri k v = for i = 0 to v.size -1 do k i (Array.unsafe_get v.vec i) done -let map v f = - let v' = create v.size in - for i = 0 to v.size - 1 do - let x = f (Array.unsafe_get v.vec i) in - Array.unsafe_set v'.vec i x - done; - v'.size <- v.size; - v' +let map f v = + if _empty_array v + then create () + else { + size=v.size; + vec=Array.map f v.vec + } -let filter v f = - let v' = create v.size in - for i = 0 to v.size - 1 do - let x = Array.unsafe_get v.vec i in - if f x then push v' x; - done; - v' +let filter' p v = + let i = ref (v.size - 1) in + while !i >= 0 do + if not (p v.vec.(! i)) + (* remove i-th item! *) + then remove v !i; + decr i + done -let fold v acc f = - let acc = ref acc in - for i = 0 to v.size - 1 do - let x = Array.unsafe_get v.vec i in - acc := f !acc x; - done; - !acc +(*$T + let v = 1 -- 10 in filter' (fun x->x<4) v; \ + to_list v |> List.sort Pervasives.compare = [1;2;3] +*) -let exists v p = +let filter p v = + if _empty_array v + then create () + else ( + let v' = create_with ~capacity:v.size v.vec.(0) in + Array.iter + (fun x -> if p x then push v' x) + v.vec; + v' + ) + +(*$T + filter (fun x-> x mod 2=0) (of_list [1;2;3;4;5]) |> to_list = [2;4] +*) + +let fold f acc v = + let rec fold acc i = + if i = v.size then acc + else + let x = Array.unsafe_get v.vec i in + fold (f acc x) (i+1) + in fold acc 0 + +(*$T + fold (+) 0 (of_list [1;2;3;4;5]) = 15 + fold (+) 0 (create ()) = 0 +*) + +let exists p v = let n = v.size in let rec check i = if i = n then false - else if p v.vec.(i) then true - else check (i+1) + else p v.vec.(i) || check (i+1) in check 0 -let for_all v p = +let for_all p v = let n = v.size in let rec check i = if i = n then true - else if not (p v.vec.(i)) then false - else check (i+1) + else p v.vec.(i) && check (i+1) in check 0 -let find v p = +let member ?(eq=(=)) x v = + exists (eq x) v + +let find_exn p v = let n = v.size in let rec check i = if i = n then raise Not_found @@ -181,65 +328,168 @@ let find v p = else check (i+1) in check 0 -let get v i = - (if i < 0 || i >= v.size then failwith "Vector.get"); - Array.unsafe_get v.vec i +let find p v = + try Some (find_exn p v) + with Not_found -> None -let set v i x = - (if i < 0 || i >= v.size then failwith "Vector.set"); - Array.unsafe_set v.vec i x +let filter_map f v = + let v' = create () in + iter + (fun x -> match f x with + | None -> () + | Some y -> push v' y + ) v; + v' + +let flat_map f v = + let v' = create () in + iter (fun x -> iter (push v') (f x)) v; + v' + +let flat_map' f v = + let v' = create () in + iter + (fun x -> + let seq = f x in + seq (fun y -> push v' y) + ) v; + v' + +let (>>=) x f = flat_map f x + +let (>|=) x f = map f x + +let rev' v = + if v.size > 0 + then ( + let n = v.size in + let vec = v.vec in + for i = 0 to (n-1)/2 do + let x = Array.unsafe_get vec i in + let y = Array.unsafe_get vec (n-i-1) in + Array.unsafe_set vec i y; + Array.unsafe_set vec (n-i-1) x; + done + ) let rev v = - let n = v.size in - let vec = v.vec in - for i = 0 to (n-1)/2 do - let x = Array.unsafe_get vec i in - let y = Array.unsafe_get vec (n-i-1) in - Array.unsafe_set vec i y; - Array.unsafe_set vec (n-i-1) x; - done + let v' = copy v in + rev' v'; + v' + +(*$T + rev (of_list [1;2;3;4]) |> to_list = [4;3;2;1] + rev (of_list [1;2;3;4;5]) |> to_list = [5;4;3;2;1] + rev (create ()) |> to_list = [] +*) let size v = v.size let length v = v.size +let capacity v = Array.length v.vec + let unsafe_get_array v = v.vec -type 'a sequence = ('a -> unit) -> unit - -let of_seq ?(init=create 10) seq = +let of_seq ?(init=create ()) seq = append_seq init seq; init -let to_seq = iter +(*$T + of_seq CCSequence.(1 -- 10) |> to_list = CCList.(1 -- 10) +*) -let slice v start len = +let to_seq v k = iter k v + +let slice_seq v start len = assert (start >= 0 && len >= 0); fun k -> assert (start+len < v.size); - for i = start to start+len do + for i = start to start+len-1 do let x = Array.unsafe_get v.vec i in k x done -let from_array a = - let c = Array.length a in - let v = create c in - Array.blit a 0 v.vec 0 c; - v.size <- c; - v +(*$T + slice_seq (of_list [0;1;2;3;4]) 1 3 |> CCList.of_seq = [1;2;3] +*) -let from_list l = - let v = create 10 in - List.iter (push v) l; - v +let slice v = (v.vec, 0, v.size) + +let (--) i j = + if i>j + then init (i-j+1) (fun k -> i-k) + else init (j-i+1) (fun k -> i+k) + +(*$T + (1 -- 4) |> to_list = [1;2;3;4] + (4 -- 1) |> to_list = [4;3;2;1] + (0 -- 0) |> to_list = [0] +*) + +let of_array a = + if Array.length a = 0 + then create () + else { + size=Array.length a; + vec=Array.copy a; + } + +let of_list l = match l with + | [] -> create() + | x::l' -> + let v = create_with ~capacity:(List.length l + 5) x in + List.iter (push v) l; + v let to_array v = Array.sub v.vec 0 v.size let to_list v = - let l = ref [] in - for i = 0 to v.size - 1 do - l := get v i :: !l; - done; - List.rev !l + List.rev (fold (fun acc x -> x::acc) [] v) + +let of_gen ?(init=create ()) g = + let rec aux g = match g() with + | None -> init + | Some x -> push init x; aux g + in aux g + +let to_gen v = + let i = ref 0 in + fun () -> + if !i < v.size + then ( + let x = v.vec.( !i ) in + incr i; + Some x + ) else None + +let of_klist ?(init=create ()) l = + let rec aux l = match l() with + | `Nil -> init + | `Cons (x,l') -> push init x; aux l' + in aux l + +let to_klist v = + let rec aux i () = + if i=v.size then `Nil + else `Cons (v.vec.(i), aux (i+1)) + in aux 0 + +let pp ?(start="[") ?(stop="]") ?(sep=", ") pp_item buf v = + Buffer.add_string buf start; + iteri + (fun i x -> + if i > 0 then Buffer.add_string buf sep; + pp_item buf x + ) v; + Buffer.add_string buf stop + +let print ?(start="[") ?(stop="]") ?(sep=", ") pp_item fmt v = + Format.fprintf fmt "@[%s" start; + iteri + (fun i x -> + if i > 0 then Format.pp_print_string fmt sep; + pp_item fmt x + ) v; + Format.fprintf fmt "%s@]" stop diff --git a/core/CCVector.mli b/core/CCVector.mli index 697c601e..26ba90c3 100644 --- a/core/CCVector.mli +++ b/core/CCVector.mli @@ -25,108 +25,203 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Growable, mutable vector} *) -type 'a t -(** the type of a vector of 'a *) +type ro = [`RO] +type rw = [`RW] + +(** Mutability is [rw] (read-write) or [ro] (read-only) *) + +type ('a, 'mut) t +(** the type of a vector of elements of type ['a], with + a mutability flat ['mut] *) + +type 'a vector = ('a, rw) t +(** Type synonym: a ['a vector] is mutable. *) type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] +type 'a gen = unit -> 'a option +type 'a equal = 'a -> 'a -> bool +type 'a ord = 'a -> 'a -> int +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit -val create : int -> 'a t -(** create a vector of given initial capacity *) +val freeze : ('a, _) t -> ('a, ro) t +(** Make an immutable vector (no copy! Don't use the old version)*) -val clear : 'a t -> unit +val freeze_copy : ('a, _) t -> ('a, ro) t +(** Copy the vector into an immutable version *) + +val create : unit -> ('a, rw) t +(** Create a new, empty vector *) + +val create_with : ?capacity:int -> 'a -> ('a, rw) t +(** Create a new vector, using the given value as a filler. + @param capacity the size of the underlying array + {b caution}: the value will likely not be GC'd before the vector is. *) + +val make : int -> 'a -> ('a, 'mut) t +(** [make n x] makes a vector of size [n], filled with [x] *) + +val init : int -> (int -> 'a) -> ('a, 'mut) t +(** Init the vector with the given function and size *) + +val clear : ('a, rw) t -> unit (** clear the content of the vector *) -val ensure : 'a t -> int -> unit -(** Ensure that the vector can contain that much elements, resizing it - if required *) +val ensure : ('a, rw) t -> int -> unit +(** Hint to the vector that it should have at least the given capacity. + Just a hint, will not be enforced if the vector is empty. *) -val is_empty : 'a t -> bool +val is_empty : ('a, _) t -> bool (** is the vector empty? *) -val push : 'a t -> 'a -> unit +val push : ('a, rw) t -> 'a -> unit (** add an element at the end of the vector *) -val append : 'a t -> 'a t -> unit +val append : ('a, rw) t -> ('a, _) t -> unit (** [append a b] adds all elements of b to a *) -val append_array : 'a t -> 'a array -> unit +val append_array : ('a, rw) t -> 'a array -> unit (** same as append, with an array *) -val append_seq : 'a t -> 'a sequence -> unit +val append_seq : ('a, rw) t -> 'a sequence -> unit (** Append content of sequence *) -val pop : 'a t -> 'a -(** remove last element, or raise a Failure if empty *) +val equal : 'a equal -> ('a,_) t equal -val copy : 'a t -> 'a t -(** shallow copy *) +val compare : 'a ord -> ('a,_) t ord +(** Lexicographic comparison *) -val shrink : 'a t -> int -> unit -(** shrink to the given size (remove elements above this size) *) +val pop : ('a, rw) t -> 'a option +(** Remove last element, or [None] *) -val member : ?eq:('a -> 'a -> bool) -> 'a t -> 'a -> bool +val pop_exn : ('a, rw) t -> 'a +(** remove last element, or raise a Failure if empty + @raise Failure on an empty vector *) + +val copy : ('a,_) t -> ('a,'mut) t +(** Shallow copy (may give an immutable or mutable vector) *) + +val shrink : ('a, rw) t -> int -> unit +(** shrink to the given size (remove elements above this size). + Does nothing if the parameter is bigger than the current size. *) + +val member : ?eq:('a -> 'a -> bool) -> 'a -> ('a, _) t -> bool (** is the element a member of the vector? *) -val sort : ?cmp:('a -> 'a -> int) -> 'a t -> unit -(** sort the array in place*) +val sort : ('a -> 'a -> int) -> ('a, _) t -> ('a, 'mut) t +(** Sort the vector *) -val uniq_sort : ?cmp:('a -> 'a -> int) -> 'a t -> unit -(** sort the array and remove duplicates in place*) +val sort' : ('a -> 'a -> int) -> ('a, rw) t -> unit +(** Sort the vector in place *) -val iter : 'a t -> ('a -> unit) -> unit +val uniq_sort : ('a -> 'a -> int) -> ('a, rw) t -> unit +(** Sort the array and remove duplicates, in place*) + +val iter : ('a -> unit) -> ('a,_) t -> unit (** iterate on the vector *) -val iteri : 'a t -> (int -> 'a -> unit) -> unit +val iteri : (int -> 'a -> unit) -> ('a,_) t -> unit (** iterate on the vector with indexes *) -val map : 'a t -> ('a -> 'b) -> 'b t +val map : ('a -> 'b) -> ('a,_) t -> ('b, 'mut) t (** map elements of the vector *) -val filter : 'a t -> ('a -> bool) -> 'a t +val filter : ('a -> bool) -> ('a,_) t -> ('a, 'mut) t (** filter elements from vector *) -val fold : 'a t -> 'b -> ('b -> 'a -> 'b) -> 'b +val filter' : ('a -> bool) -> ('a, rw) t -> unit +(** Filter elements in place. Does {b NOT} preserve the order + of the elements. *) + +val fold : ('b -> 'a -> 'b) -> 'b -> ('a,_) t -> 'b (** fold on elements of the vector *) -val exists : 'a t -> ('a -> bool) -> bool +val exists : ('a -> bool) -> ('a,_) t -> bool (** existential test *) -val for_all : 'a t -> ('a -> bool) -> bool +val for_all : ('a -> bool) -> ('a,_) t -> bool (** universal test *) -val find : 'a t -> ('a -> bool) -> 'a -(** find an element that satisfies the predicate, or Not_found *) +val find : ('a -> bool) -> ('a,_) t -> 'a option +(** Find an element that satisfies the predicate *) -val get : 'a t -> int -> 'a -(** access element, or Failure if bad index *) +val find_exn : ('a -> bool) -> ('a,_) t -> 'a +(** find an element that satisfies the predicate, or + @raise Not_found if no element does *) -val set : 'a t -> int -> 'a -> unit -(** access element, or Failure if bad index *) +val filter_map : ('a -> 'b option) -> ('a,_) t -> ('b, 'mut) t +(** Map elements with a function, possibly filtering some of them out *) -val rev : 'a t -> unit -(** Reverse array in place *) +val flat_map : ('a -> ('b,_) t) -> ('a,_) t -> ('b, 'mut) t +(** Map each element to a sub-vector *) -val size : 'a t -> int +val flat_map' : ('a -> 'b sequence) -> ('a,_) t -> ('b, 'mut) t +(** Like {!flat_map}, but using {!sequence} for intermediate collections *) + +val (>>=) : ('a,_) t -> ('a -> ('b,_) t) -> ('b, 'mut) t + +val (>|=) : ('a,_) t -> ('a -> 'b) -> ('b, 'mut) t + +val get : ('a,_) t -> int -> 'a +(** access element, or + @raise Failure if bad index *) + +val set : ('a, rw) t -> int -> 'a -> unit +(** access element, or + @raise Failure if bad index *) + +val remove : ('a, rw) t -> int -> unit +(** Remove the [n-th] element of the vector. Does {b NOT} preserve the order + of the elements (might swap with the last element) *) + +val rev : ('a,_) t -> ('a, 'mut) t +(** Reverse the vector *) + +val rev' : ('a, rw) t -> unit +(** Reverse the vector in place *) + +val size : ('a,_) t -> int (** number of elements in vector *) -val length : _ t -> int +val length : (_,_) t -> int (** Synonym for {! size} *) -val unsafe_get_array : 'a t -> 'a array -(** Access the underlying *shared* array (do not modify!). +val capacity : (_,_) t -> int +(** Number of elements the vector can contain without being resized *) + +val unsafe_get_array : ('a, rw) t -> 'a array +(** Access the underlying {b shared} array (do not modify!). [unsafe_get_array v] is longer than [size v], but elements at higher index than [size v] are undefined (do not access!). *) -val of_seq : ?init:'a t -> 'a sequence -> 'a t +val (--) : int -> int -> (int, 'mut) t +(** Range of integers (both included) *) -val to_seq : 'a t -> 'a sequence +val of_array : 'a array -> ('a, 'mut) t +val of_list : 'a list -> ('a, 'mut) t +val to_array : ('a,_) t -> 'a array +val to_list : ('a,_) t -> 'a list -val slice : 'a t -> int -> int -> 'a sequence -(** [slice v start len] is the sequence of elements from [v.(start)] - to [v.(start+len)] included. *) +val of_seq : ?init:('a,rw) t -> 'a sequence -> ('a, rw) t -val from_array : 'a array -> 'a t -val from_list : 'a list -> 'a t -val to_array : 'a t -> 'a array -val to_list : 'a t -> 'a list +val to_seq : ('a,_) t -> 'a sequence +val slice : ('a,rw) t -> ('a array * int * int) +(** Vector as an array slice. By doing it we expose the internal array, so + be careful! *) + +val slice_seq : ('a,_) t -> int -> int -> 'a sequence +(** [slice_seq v start len] is the sequence of elements from [v.(start)] + to [v.(start+len-1)]. *) + +val of_klist : ?init:('a, rw) t -> 'a klist -> ('a, rw) t +val to_klist : ('a,_) t -> 'a klist +val of_gen : ?init:('a, rw) t -> 'a gen -> ('a, rw) t +val to_gen : ('a,_) t -> 'a gen + +val pp : ?start:string -> ?stop:string -> ?sep:string -> + 'a printer -> ('a,_) t printer + +val print : ?start:string -> ?stop:string -> ?sep:string -> + 'a formatter -> ('a,_) t formatter diff --git a/misc/AVL.ml b/misc/AVL.ml index 15f3f8ed..b28a4b8f 100644 --- a/misc/AVL.ml +++ b/misc/AVL.ml @@ -28,13 +28,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. See https://en.wikipedia.org/wiki/AVL_tree *) -type ('a,'b) t = - | Empty - | Node of ('a,'b) t * 'a * 'b * ('a,'b) t * int - type 'a comparator = 'a -> 'a -> int -let empty = Empty +type ('a,'b) tree = + | Empty + | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int + +type ('a,'b) t = { + cmp: 'a comparator; + t: ('a,'b) tree +} + +let empty ~cmp = { cmp; t=Empty } let _height = function | Empty -> 0 @@ -46,7 +51,8 @@ let _balance l r = _height l - _height r let _make l x y r = Node (l, x, y, r, 1 + max (_height l) (_height r)) -let singleton k v = _make empty k v empty +let _singleton k v = _make Empty k v Empty +let singleton ~cmp k v = { cmp; t = _singleton k v } (* balance tree [t] *) let _rebalance t = match t with @@ -93,32 +99,40 @@ let _rebalance t = match t with let _make_balance l k v r = _rebalance (_make l k v r) -let rec fold f acc t = match t with +let rec _fold f acc t = match t with | Empty -> acc | Node (l, x, y, r, _) -> - let acc = fold f acc l in + let acc = _fold f acc l in let acc = f acc x y in - fold f acc r + _fold f acc r -let rec for_all p t = match t with +let fold f acc {t; _} = _fold f acc t + +let rec _for_all p t = match t with | Empty -> true | Node (l, x, y, r, _) -> - p x y && for_all p l && for_all p r + p x y && _for_all p l && _for_all p r -let rec exists p t = match t with +let for_all p {t; _} = _for_all p t + +let rec _exists p t = match t with | Empty -> false | Node (l, x, y, r, _) -> - p x y || exists p l || exists p r + p x y || _exists p l || _exists p r -let rec insert ~cmp t k v = match t with - | Empty -> _make empty k v empty +let exists p {t; _} = _exists p t + +let rec _insert ~cmp t k v = match t with + | Empty -> _make Empty k v Empty | Node (l, k1, v1, r, _) -> let c = cmp k k1 in if c < 0 - then _make_balance (insert ~cmp l k v) k1 v1 r + then _make_balance (_insert ~cmp l k v) k1 v1 r else if c = 0 then _make l k v r - else _make_balance l k1 v1 (insert ~cmp r k v) + else _make_balance l k1 v1 (_insert ~cmp r k v) + +let insert {cmp; t} k v = {cmp; t=_insert ~cmp t k v} (* remove the maximal value in the given tree (the only which only has a left child), and return its key/value pair *) @@ -132,7 +146,7 @@ let rec _remove_max t = match t with exception NoSuchElement -let remove ~cmp t key = +let _remove ~cmp t key = let rec _remove t = match t with | Empty -> raise NoSuchElement | Node (l, k, v, r, _) -> @@ -153,30 +167,34 @@ let remove ~cmp t key = try _remove t with NoSuchElement -> t (* element not found *) -let update ~cmp t key f = failwith "update: not implemented" +let remove {cmp; t} k = {cmp; t=_remove ~cmp t k} -let rec find_exn ~cmp t key = match t with +let _update ~cmp t key f = failwith "update: not implemented" +let update {cmp; t} = _update ~cmp t + +let rec _find_exn ~cmp t key = match t with | Empty -> raise Not_found | Node (l, k, v, r, _) -> let c = cmp key k in - if c < 0 then find_exn ~cmp l key - else if c > 0 then find_exn ~cmp r key + if c < 0 then _find_exn ~cmp l key + else if c > 0 then _find_exn ~cmp r key else v +let find_exn {cmp; t} = _find_exn ~cmp t -let find ~cmp t key = - try Some (find_exn ~cmp t key) +let find t key = + try Some (find_exn t key) with Not_found -> None (* add k,v as strictly maximal element to t. [t] must not contain any key >= k *) let rec _add_max k v t = match t with - | Empty -> singleton k v + | Empty -> _singleton k v | Node (l, k', v', r, _) -> _make_balance l k' v' (_add_max k v r) -and + (* same for minimal value *) -_add_min k v t = match t with - | Empty -> singleton k v +let rec _add_min k v t = match t with + | Empty -> _singleton k v | Node (l, k', v', r, _) -> _make_balance (_add_min k v l) k' v' r @@ -204,42 +222,51 @@ let _concat t1 t2 = match t1, t2 with let t1', k, v = _remove_max t1 in _join t1' k v t2 -let rec split ~cmp t key = match t with - | Empty -> empty, None, empty +let rec _split ~cmp t key = match t with + | Empty -> Empty, None, Empty | Node (l, k, v, r, _) -> let c = cmp key k in if c < 0 then - let ll, result, lr = split ~cmp l key in + let ll, result, lr = _split ~cmp l key in ll, result, _join lr k v r else if c > 0 then - let rl, result, rr = split ~cmp r key in + let rl, result, rr = _split ~cmp r key in _join l k v rl, result, rr else l, Some v, r +let split {cmp; t} k = + let (t,b,t') = _split ~cmp t k in + {cmp; t}, b, {cmp; t=t'} + (* if k = Some v, join l k v r, else concat l v *) let _concat_or_join l k result r = match result with | None -> _concat l r | Some v -> _join l k v r -let rec merge ~cmp f t1 t2 = match t1, t2 with - | Empty, Empty -> empty +let rec _merge ~cmp f t1 t2 = match t1, t2 with + | Empty, Empty -> Empty | Node (l1, k1, v1, r1, h1), _ when h1 >= _height t2 -> - let l2, result2, r2 = split ~cmp t2 k1 in + let l2, result2, r2 = _split ~cmp t2 k1 in let result = f k1 (Some v1) result2 in - let l = merge ~cmp f l1 l2 in - let r = merge ~cmp f r1 r2 in + let l = _merge ~cmp f l1 l2 in + let r = _merge ~cmp f r1 r2 in _concat_or_join l k1 result r | _, Node (l2, k2, v2, r2, _) -> - let l1, result1, r1 = split ~cmp t1 k2 in + let l1, result1, r1 = _split ~cmp t1 k2 in let result = f k2 result1 (Some v2) in - let l = merge ~cmp f l1 l2 in - let r = merge ~cmp f r1 r2 in + let l = _merge ~cmp f l1 l2 in + let r = _merge ~cmp f r1 r2 in _concat_or_join l k2 result r | _, Empty -> assert false (* h1 < heigth h2?? *) +let merge f {cmp; t} {cmp=cmp'; t=t'} = + if(cmp != cmp') then invalid_arg "AVL.merge: trees wit different + comparison function"; + {cmp; t = _merge ~cmp f t t'} + (* invariant: balanced *) let rec invariant_balance t = match t with | Empty -> true @@ -253,13 +280,13 @@ let rec invariant_search ~cmp t = match t with | Node (l, x, _, r, _) -> invariant_search ~cmp l && invariant_search ~cmp r && - for_all (fun x' _ -> cmp x' x < 0) l && - for_all (fun x' _ -> cmp x' x > 0) r + _for_all (fun x' _ -> cmp x' x < 0) l && + _for_all (fun x' _ -> cmp x' x > 0) r let of_list ~cmp l = - List.fold_left (fun acc (x,y) -> insert ~cmp acc x y) empty l + {cmp; t = List.fold_left (fun acc (x,y) -> _insert ~cmp acc x y) Empty l} -let to_list t = +let to_list {t; _} = let rec aux acc t = match t with | Empty -> acc | Node (l, k, v, r, _) -> @@ -273,15 +300,15 @@ let to_list t = module type ITERATOR = sig type 'a iter - val after : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val before : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val iter : ('a,'b) t -> ('a * 'b) iter - val add : cmp:'a comparator -> ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t + val after : ('a,'b) t -> 'a -> ('a * 'b) iter + val before : ('a,'b) t -> 'a -> ('a * 'b) iter + val iter : ('a,'b) t -> ('a * 'b) iter + val add : ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t end type ('a,'b) explore = | Yield of 'a * 'b - | Explore of ('a, 'b) t + | Explore of ('a, 'b) tree exception EndOfIter @@ -330,24 +357,26 @@ let rec _before~cmp stack t key = match t with else _yield k v (_push_swap l stack) module KList = struct - type 'a t = [ `Nil | `Cons of 'a * (unit -> 'a t) ] + type 'a t = unit -> [ `Nil | `Cons of 'a * 'a t ] - let rec _next (l:('a,'b) explore list) () : ('a*'b) t = match l with + let rec _next (l:('a,'b) explore list) () = match l with | [] -> `Nil | _::_ -> let k, v, l' = _pop l in `Cons ((k,v), _next l') - let iter t = _next (_push t []) () + let iter {t; _} = _next (_push t []) - let rec add ~cmp t (l:'a t) = match l with + let rec _add ~cmp t (l:'a t) = match l () with | `Nil -> t | `Cons ((k,v), l') -> - add ~cmp (insert ~cmp t k v) (l' ()) + _add ~cmp (_insert ~cmp t k v) l' - let after ~cmp t key = _next (_after ~cmp [] t key) () + let add {cmp; t} l = {cmp; t=_add ~cmp t l} - let before ~cmp t key = _next (_before ~cmp [] t key) () + let after {cmp; t} key = _next (_after ~cmp [] t key) + + let before {cmp; t} key = _next (_before ~cmp [] t key) end module Gen = struct @@ -355,7 +384,7 @@ module Gen = struct let _gen stack = let stack = ref stack in - let rec next () = + let next () = match !stack with | [] -> None | l -> @@ -364,13 +393,15 @@ module Gen = struct Some (k, v) in next - let iter t = _gen (_push t []) + let iter {t; _} = _gen (_push t []) - let rec add ~cmp t gen = + let rec _add ~cmp t gen = match gen() with | None -> t - | Some (k,v) -> add ~cmp (insert ~cmp t k v) gen + | Some (k,v) -> _add ~cmp (_insert ~cmp t k v) gen - let after ~cmp t key = _gen (_after ~cmp [] t key) - let before ~cmp t key = _gen (_before ~cmp [] t key) + let add {cmp; t} l = {cmp; t=_add ~cmp t l} + + let after {cmp; t} key = _gen (_after ~cmp [] t key) + let before {cmp; t} key = _gen (_before ~cmp [] t key) end diff --git a/misc/AVL.mli b/misc/AVL.mli index 926eefa1..094ace1e 100644 --- a/misc/AVL.mli +++ b/misc/AVL.mli @@ -26,16 +26,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 AVL trees} *) -type ('a,'b) t = private - | Empty - | Node of ('a,'b) t * 'a * 'b * ('a,'b) t * int - type 'a comparator = 'a -> 'a -> int -val empty : ('a,'b) t +type ('a,'b) tree = private + | Empty + | Node of ('a,'b) tree * 'a * 'b * ('a,'b) tree * int + +type ('a,'b) t = private { + cmp: 'a comparator; + t: ('a,'b) tree +} + +val empty : cmp:'a comparator -> ('a,'b) t (** Empty tree *) -val singleton : 'a -> 'b -> ('a,'b) t +val singleton : cmp:'a comparator -> 'a -> 'b -> ('a,'b) t (** Tree with a single node *) val fold : ('c -> 'a -> 'b -> 'c) -> 'c -> ('a,'b) t -> 'c @@ -44,29 +49,29 @@ val fold : ('c -> 'a -> 'b -> 'c) -> 'c -> ('a,'b) t -> 'c val for_all : ('a -> 'b -> bool) -> ('a,'b) t -> bool val exists : ('a -> 'b -> bool) -> ('a,'b) t -> bool -val find : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b option +val find : ('a,'b) t -> 'a -> 'b option (** Find the value associated to the key, if any *) -val find_exn : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b +val find_exn : ('a,'b) t -> 'a -> 'b (** @raise Not_found if the key is not present *) -val insert : cmp:'a comparator -> ('a,'b) t -> 'a -> 'b -> ('a,'b) t +val insert : ('a,'b) t -> 'a -> 'b -> ('a,'b) t (** Insertion in the tree *) -val remove : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a,'b) t +val remove : ('a,'b) t -> 'a -> ('a,'b) t (** Removal from the tree *) -val update : cmp:'a comparator -> ('a,'b) t -> 'a -> +val update : ('a,'b) t -> 'a -> ('b option -> ('a * 'b) option) -> ('a,'b) t (** Update of the given key binding (subsumes [insert] and [remove]) *) -val split : cmp:'a comparator -> ('a,'b) t -> 'a -> +val split : ('a,'b) t -> 'a -> ('a,'b) t * 'b option * ('a,'b) t (** [split ~cmp t k] splits [t] into a left part that is smaller than [k], the possible binding of [k], and a part bigger than [k]. *) -val merge : cmp:'a comparator -> +val merge : ('a -> 'b option -> 'c option -> 'd option) -> ('a,'b) t -> ('a,'c) t -> ('a,'d) t (** Merge two trees together, with the given function *) @@ -82,14 +87,14 @@ val to_list : ('a,'b) t -> ('a * 'b) list module type ITERATOR = sig type 'a iter - val after : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter - val before : cmp:'a comparator -> ('a,'b) t -> 'a -> ('a * 'b) iter + val after : ('a,'b) t -> 'a -> ('a * 'b) iter + val before : ('a,'b) t -> 'a -> ('a * 'b) iter val iter : ('a,'b) t -> ('a * 'b) iter - val add : cmp:'a comparator -> ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t + val add : ('a,'b) t -> ('a * 'b) iter -> ('a,'b) t end module KList : sig - type 'a t = [ `Nil | `Cons of 'a * (unit -> 'a t) ] + type 'a t = unit -> [ `Nil | `Cons of 'a * 'a t ] include ITERATOR with type 'a iter := 'a t end diff --git a/misc/CSM.ml b/misc/CSM.ml index 158f5b3a..6d72cd7b 100644 --- a/misc/CSM.ml +++ b/misc/CSM.ml @@ -210,7 +210,7 @@ module List = struct let build state x = Some (x::state, x::state) end -module CCGen = struct +module Gen = struct type 'a gen = unit -> 'a option let map a state gen = @@ -227,7 +227,7 @@ module CCGen = struct end end -module CCSequence = struct +module Sequence = struct type 'a sequence = ('a -> unit) -> unit exception ExitSeq @@ -244,6 +244,20 @@ module CCSequence = struct with ExitSeq -> () end +module KList = struct + type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + + let rec map f state (l:'a klist) () = + match l () with + | `Nil -> `Nil + | `Cons (x, l') -> + begin match f state x with + | None -> `Nil + | Some (y, state') -> + `Cons (y, map f state' l') + end +end + (** {2 Mutable Interface} *) module Mut = struct diff --git a/misc/CSM.mli b/misc/CSM.mli index abd59af1..476ef889 100644 --- a/misc/CSM.mli +++ b/misc/CSM.mli @@ -26,10 +26,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Composable State Machines} This module defines state machines that should help design applications -with a more explicit control of state (e.g. for networking applications. *) +with a more explicit control of state (e.g. for networking applications). *) -type ('a, 's, 'b) t = 's -> 'a -> ('b * 's) option -(** transition function that fully describes an automaton *) +type ('input, 'state, 'output) t = 'state -> 'input -> ('output * 'state) option +(** transition function that fully describes an automaton. It returns + [None] to indicate that it stops. *) type ('a, 's, 'b) automaton = ('a, 's, 'b) t @@ -140,18 +141,24 @@ module List : sig (** build a list from its inputs *) end -module CCGen : sig +module Gen : sig type 'a gen = unit -> 'a option val map : ('a, 's, 'b) t -> 's -> 'a gen -> 'b gen end -module CCSequence : sig +module Sequence : sig type 'a sequence = ('a -> unit) -> unit val map : ('a, 's, 'b) t -> 's -> 'a sequence -> 'b sequence end +module KList : sig + type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + + val map : ('a, 's, 'b) t -> 's -> 'a klist -> 'b klist +end + (** {2 Mutable Interface} *) module Mut : sig diff --git a/misc/lazyGraph.ml b/misc/lazyGraph.ml index 82a664d8..3a2f893a 100644 --- a/misc/lazyGraph.ml +++ b/misc/lazyGraph.ml @@ -577,13 +577,13 @@ module Dot = struct CCSequence.iter (function | Full.EnterVertex (v, attrs, _, _) -> - Format.fprintf formatter " @[%a [%a];@]@." pp_vertex v - (CCList.print ~sep:"," print_attribute) attrs + Format.fprintf formatter " @[%a %a;@]@." pp_vertex v + (CCList.print ~start:"[" ~stop:"]" ~sep:"," print_attribute) attrs | Full.ExitVertex _ -> () | Full.MeetEdge (v2, attrs, v1, _) -> - Format.fprintf formatter " @[%a -> %a [%a];@]@." + Format.fprintf formatter " @[%a -> %a %a;@]@." pp_vertex v1 pp_vertex v2 - (CCList.print ~sep:"," print_attribute) + (CCList.print ~start:"[" ~stop:"]" ~sep:"," print_attribute) attrs) events; (* close *) diff --git a/misc/parseReact.ml b/misc/parseReact.ml new file mode 100644 index 00000000..d1eee788 --- /dev/null +++ b/misc/parseReact.ml @@ -0,0 +1,237 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Parser combinators driven by the input} *) + +type ('a, 'b) t = + | Return : 'b -> ('a,'b) t + | Delay : (unit -> ('a, 'b) t) -> ('a, 'b) t + | One : ('a, 'a) t + | Stop : ('a, unit) t + | Bind : ('a, 'b) t * ('b -> ('a, 'c) t) -> ('a, 'c) t + | Choice : ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t + | Map : ('a, 'b) t * ('b -> 'c) -> ('a, 'c) t + | Guard : ('a, 'b) t * ('b -> bool) -> ('a, 'b) t + | Skip : ('a, unit) t + | IfThenElse: ('a -> bool) * ('a, 'b) t * ('a, 'b) t -> ('a, 'b) t + | Fail : ('a, 'b) t + +let stop = Stop + +let return x = Return x + +let delay f = Delay f + +let return' f = Delay (fun () -> return (f ())) + +let fail = Fail + +let one = One + +let skip = Skip + +let bind f p = Bind (p, f) + +let (>>=) p f = bind f p + +let exact ?(eq=(=)) x = + one + >>= fun y -> + if eq x y then Return () else Fail + +let guard f p = Guard (p, f) + +let (>>) p1 p2 = p1 >>= fun _ -> p2 + +let map f p = Map (p, f) + +let (>>|) p f = Map (p, f) + +let (<|>) p1 p2 = Choice (p1, p2) + +let pair p1 p2 = + p1 >>= fun x1 -> + p2 >>= fun x2 -> + return (x1, x2) + +let triple p1 p2 p3 = + p1 >>= fun x1 -> + p2 >>= fun x2 -> + p3 >>= fun x3 -> + return (x1, x2, x3) + +let if_then_else p a b = IfThenElse (p, a, b) + +(** {6 Utils} *) + +let take_while pred = + let rec next acc = + if_then_else pred + (one >>= fun x -> next (x::acc)) + (return' (fun () -> List.rev acc)) + in + next [] + +let take_n n = + let rec next acc n = + if n = 0 + then return (List.rev acc) + else one >>= fun x -> next (x::acc) (n-1) + in + next [] n + +let skip_spaces = + let rec next () = + if_then_else + (fun c -> c = ' ' || c = '\t' || c = '\n') + (skip >> delay next) + (return ()) + in next () + +let ident = + let accept = function + | c when Char.code c >= Char.code 'a' && Char.code c <= Char.code 'z' -> true + | c when Char.code c >= Char.code 'A' && Char.code c <= Char.code 'Z' -> true + | c when Char.code c >= Char.code '0' && Char.code c <= Char.code '9' -> true + | _ -> false + in + let rec aggregate buf = + if_then_else + accept + (one >>= fun c -> Buffer.add_char buf c; aggregate buf) + (return (Buffer.contents buf)) + in + (* create buffer on demand, to avoid sharing it *) + delay (fun () -> aggregate (Buffer.create 32)) + +let many ~sep p = + let rec next acc = + (return (List.rev acc)) + <|> (p >>= fun x -> sep >> next (x::acc)) + in + next [] + +let many1 ~sep p = + let rec next acc = + p >>= fun x -> + let acc = x :: acc in + (return (List.rev acc)) + <|> (sep >> next acc) + in + next [] + +(** {6 Run} *) + +type 'a sequence = ('a -> unit) -> unit + +let _fold_seq f acc seq = + let acc = ref acc in + seq (fun x -> acc := f !acc x); + !acc + +(** Partial state during parsing: a tree of continuations *) +type (_, _) state = + | STBottom : 'b -> ('a, 'b) state + | STPush : ('a, 'c) t * ('c -> ('a, 'b) state list) -> ('a, 'b) state + +let (>>>) p cont = STPush (p, cont) + +let run p seq = + (* normalize the stack (do not let a "return" on top) *) + let rec reduce : type a b. (a,b)state -> (a,b) state list + = fun stack -> match stack with + | STPush (Return x, cont) -> CCList.flat_map reduce (cont x) + | STPush (Delay f, cont) -> reduce (f () >>> cont) + | STPush (Bind (p, f), cont) -> + let stack' = p >>> fun x -> [f x >>> cont] in + reduce stack' + | STPush (Choice (a, b), cont) -> + (* fork into sub-stacks *) + CCList.append (reduce (a >>> cont)) (reduce (b >>> cont)) + | STPush (Map (p, f), cont) -> + let stack' = p >>> fun x -> cont (f x) in + reduce stack' + | STPush (Guard (p, f), cont) -> + let stack' = p >>> fun x -> if f x then cont x else [] in + reduce stack' + | _ -> [stack] + in + (* consume one input token *) + let rec consume_one : type a b. (a,b) state -> a -> (a,b) state list + = fun stack x -> match stack with + | STBottom _ -> [] (* fail *) + | STPush (Stop, _) -> [] (* fail *) + | STPush (Fail, _) -> [] (* fail *) + | STPush (One, cont) -> CCList.flat_map reduce (cont x) + | STPush (Skip, cont) -> CCList.flat_map reduce (cont ()) + | STPush (IfThenElse (p, yay, nay), cont) -> + let l = if p x + then reduce (yay >>> cont) + else reduce (nay >>> cont) + in + CCList.flat_map (fun stack -> consume_one stack x) l + | STPush (Return _, _) -> assert false + | STPush (Delay _, _) -> assert false + | STPush (Bind _, _) -> assert false + | STPush (Choice _, _) -> assert false + | STPush (Map _, _) -> assert false + | STPush (Guard _, _) -> assert false + in + (* to be called at the end of input *) + let finish : type a b. (a,b) state -> (a,b) state list + = fun stack -> match stack with + | STPush (Stop, cont) -> CCList.flat_map reduce (cont ()) + | STPush (Fail, _) -> [] + | _ -> [stack] + in + (* how to parse the input: step by step, starting with [p] as initial parser *) + let step l x = CCList.flat_map (fun p -> consume_one p x) l in + let initial_state = p >>> fun x -> [STBottom x] in + let res = _fold_seq step (reduce initial_state) seq in + (* signal "end of input" *) + let res = CCList.flat_map finish res in + (* recover results *) + CCList.filter_map + (function + | STBottom x -> Some x + | _ -> None + ) res + + +(*$R + let module S = struct type t = Atom of string | List of t list end in + let open S in + let (%) f g x = f (g x) in + let atom i = Atom i in + let list_ i = List i in + let rec p () = + (skip_spaces >> ident >>= (return % atom)) + <|> (skip_spaces >> exact '(' >> many1 ~sep:(exact ' ') (delay p) >>= fun l -> + skip_spaces >> exact ')' >> return (list_ l)) + in + let res = run (p ()) (CCSequence.of_str "(a b (c d))") in + assert_equal res [list_ [atom "a"; atom "b"; list_ [atom "c"; atom "d"]]] +*) diff --git a/misc/parseReact.mli b/misc/parseReact.mli new file mode 100644 index 00000000..da823495 --- /dev/null +++ b/misc/parseReact.mli @@ -0,0 +1,113 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Parser combinators driven by the input} *) + +type ('input, 'result) t +(** parser that takes some type as input and outputs a value of type 'result +when it's done *) + +(** {6 Basic Building Blocs} *) + +val stop : ('a, unit) t +(** Succeed exactly at the end of input *) + +val return : 'b -> ('a, 'b) t +(** Return a value *) + +val return' : (unit -> 'b) -> ('a, 'b) t +(** Suspended version of {!return}, does not evaluate yet *) + +val delay : (unit -> ('a, 'b) t) -> ('a, 'b) t +(** Delay evaluation of the parser *) + +val fail : ('a, 'b) t +(** Failure *) + +val one : ('a, 'a) t +(** Parse one value exactly *) + +val skip : ('a, unit) t +(** Ignore the next value *) + +val exact : ?eq:('a -> 'a -> bool) -> 'a -> ('a, unit) t +(** Accept one value as input exactly *) + +val guard : ('b -> bool) -> ('a, 'b) t -> ('a, 'b) t +(** Ensure the return value of the given parser satisfies the predicate. + [guard f p] will be the same as [p] if [p] returns + some [x] with [f x = true]. If [not (f x)], then [guard f p] fails. *) + +val bind : ('b -> ('a, 'c) t) -> ('a, 'b) t -> ('a, 'c) t + +val (>>=) : ('a, 'b) t -> ('b -> ('a, 'c) t) -> ('a, 'c) t + +val (>>) : ('a, 'b) t -> ('a, 'c) t -> ('a, 'c) t +(** Wait for the first parser to succeed, then switch to the second one *) + +val map : ('b -> 'c) -> ('a, 'b) t -> ('a, 'c) t +(** Map outputs *) + +val (>>|) : ('a, 'b) t -> ('b -> 'c) -> ('a, 'c) t +(** Infix version of {!map} *) + +val (<|>) : ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t +(** Non-deterministic choice. Both branches are evaluated in parallel *) + +val pair : ('a,'b) t -> ('a, 'c) t -> ('a, ('b * 'c)) t +val triple : ('a,'b) t -> ('a, 'c) t -> ('a, 'd) t -> ('a, ('b * 'c * 'd)) t + +val if_then_else : ('a -> bool) -> ('a, 'b) t -> ('a, 'b) t -> ('a, 'b) t +(** Test the next input, and choose the parser based on it. Does not consume + the input token for the test *) + +(** {6 Utils} *) + +val take_while : ('a -> bool) -> ('a, 'a list) t +(** Take input while it satisfies the given predicate *) + +val take_n : int -> ('a, 'a list) t +(** Take n input elements *) + +val skip_spaces : (char, unit) t +(** Skip whitespace (space,tab,newline) *) + +val ident : (char, string) t +(** Parse identifiers (stops on whitespaces) *) + +val many : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t +(** [many ~sep p] parses as many [p] as possible, separated by [sep]. *) + +val many1 : sep:('a,_) t -> ('a, 'b) t -> ('a, 'b list) t + +(** {6 Run} *) + +type 'a sequence = ('a -> unit) -> unit + +val run : ('a,'b) t -> 'a sequence -> 'b list +(** List of results. Each element of the list comes from a successful + series of choices [<|>]. If no choice operator was used, the list + contains 0 or 1 elements *) diff --git a/misc/printBox.ml b/misc/printBox.ml new file mode 100644 index 00000000..aed324da --- /dev/null +++ b/misc/printBox.ml @@ -0,0 +1,360 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Pretty-Printing of Boxes} *) + +type position = { x:int ; y: int } + +let origin = {x=0; y=0;} + +let _move pos x y = {x=pos.x + x; y=pos.y + y} +let _add pos1 pos2 = _move pos1 pos2.x pos2.y +let _move_x pos x = _move pos x 0 +let _move_y pos y = _move pos 0 y + +(** {2 Output: where to print to} *) + +module Output = struct + type t = { + put_char : position -> char -> unit; + put_string : position -> string -> unit; + put_sub_string : position -> string -> int -> int -> unit; + flush : unit -> unit; + } + + let put_char out pos c = out.put_char pos c + let put_string out pos s = out.put_string pos s + let put_sub_string out pos s s_i s_len = out.put_sub_string pos s s_i s_len + + (** An internal buffer, suitable for writing efficiently, then + convertable into a list of lines *) + type buffer = { + mutable buf_lines : buf_line array; + mutable buf_len : int; + } + and buf_line = { + mutable bl_str : string; + mutable bl_len : int; + } + + let _make_line _ = {bl_str=""; bl_len=0} + + let _ensure_lines buf i = + if i >= Array.length buf.buf_lines + then ( + let lines' = Array.init (2 * i + 5) _make_line in + Array.blit buf.buf_lines 0 lines' 0 buf.buf_len; + buf.buf_lines <- lines'; + ) + + let _ensure_line line i = + if i >= String.length line.bl_str + then ( + let str' = String.make (2 * i + 5) ' ' in + String.blit line.bl_str 0 str' 0 line.bl_len; + line.bl_str <- str'; + ) + + let _buf_put_char buf pos c = + _ensure_lines buf pos.y; + _ensure_line buf.buf_lines.(pos.y) pos.x; + buf.buf_len <- max buf.buf_len (pos.y+1); + let line = buf.buf_lines.(pos.y) in + line.bl_str.[pos.x] <- c; + line.bl_len <- max line.bl_len (pos.x+1) + + let _buf_put_sub_string buf pos s s_i s_len = + _ensure_lines buf pos.y; + _ensure_line buf.buf_lines.(pos.y) (pos.x + s_len); + buf.buf_len <- max buf.buf_len (pos.y+1); + let line = buf.buf_lines.(pos.y) in + String.blit s s_i line.bl_str pos.x s_len; + line.bl_len <- max line.bl_len (pos.x+s_len) + + let _buf_put_string buf pos s = + _buf_put_sub_string buf pos s 0 (String.length s) + + (* create a new buffer *) + let make_buffer () = + let buf = { + buf_lines = Array.init 16 _make_line; + buf_len = 0; + } in + let buf_out = { + put_char = _buf_put_char buf; + put_sub_string = _buf_put_sub_string buf; + put_string = _buf_put_string buf; + flush = (fun () -> ()); + } in + buf, buf_out + + let buf_to_lines ?(indent=0) buf = + let buffer = Buffer.create (5 + buf.buf_len * 32) in + for i = 0 to buf.buf_len - 1 do + for k = 1 to indent do Buffer.add_char buffer ' ' done; + let line = buf.buf_lines.(i) in + Buffer.add_substring buffer line.bl_str 0 line.bl_len; + Buffer.add_char buffer '\n'; + done; + Buffer.contents buffer + + let buf_output ?(indent=0) oc buf = + for i = 0 to buf.buf_len - 1 do + for k = 1 to indent do output_char oc ' '; done; + let line = buf.buf_lines.(i) in + output oc line.bl_str 0 line.bl_len; + output_char oc '\n'; + done +end + +(* find [c] in [s], starting at offset [i] *) +let rec _find s c i = + if i >= String.length s then None + else if s.[i] = c then Some i + else _find s c (i+1) + +let rec _lines s i k = match _find s '\n' i with + | None -> + if i + let s' = String.sub s i (j-i) in + k s'; + _lines s (j+1) k + +module Box = struct + type grid_shape = + | GridBase + | GridFramed + + type 'a shape = + | Line of string + | Text of string list (* in a box *) + | Frame of 'a + | Grid of grid_shape * 'a array array + + type t = { + shape : t shape; + size : position lazy_t; + } + + let size box = Lazy.force box.size + + let shape b = b.shape + + let _array_foldi f acc a = + let acc = ref acc in + Array.iteri (fun i x -> acc := f !acc i x) a; + !acc + + let _dim_matrix m = + if Array.length m = 0 then {x=0;y=0} + else {y=Array.length m; x=Array.length m.(0); } + + (* height of a line composed of boxes *) + let _height_line a = + _array_foldi + (fun h i box -> + let s = size box in + max h s.y + ) 0 a + + (* how large is the [i]-th column of [m]? *) + let _width_column m i = + let acc = ref 0 in + for j = 0 to Array.length m - 1 do + acc := max !acc (size m.(j).(i)).x + done; + !acc + + (* from a matrix [m] (line,column), return two arrays [lines] and [columns], + with [col.(i)] being the start offset of column [i] and + [lines.(j)] being the start offset of line [j]. + Those arrays have one more slot to indicate the end position. *) + let _size_matrix m = + let dim = _dim_matrix m in + (* columns *) + let columns = Array.make (dim.x + 1) 0 in + for i = 0 to dim.x - 1 do + (* +1 is for keeping room for the vertical/horizontal line/column *) + columns.(i+1) <- columns.(i) + 1 + (_width_column m i) + done; + (* lines *) + let lines = Array.make (dim.y + 1) 0 in + for j = 1 to dim.y do + lines.(j) <- lines.(j-1) + 1 + (_height_line m.(j-1)) + done; + (* no trailing bars, adjust *) + columns.(dim.x) <- columns.(dim.x) - 1; + lines.(dim.y) <- lines.(dim.y) - 1; + lines, columns + + let _size = function + | Line s -> { x=String.length s; y=1 } + | Text l -> + let width = List.fold_left + (fun acc line -> max acc (String.length line)) 0 l + in + { x=width; y=List.length l; } + | Frame t -> + let {x;y} = size t in + { x=x+2; y=y+2; } + | Grid (_,m) -> + let dim = _dim_matrix m in + let lines, columns = _size_matrix m in + { y=lines.(dim.y); x=columns.(dim.x)} + + let _make shape = + { shape; size=(lazy (_size shape)); } +end + +let line s = + assert (_find s '\n' 0 = None); + Box._make (Box.Line s) + +let text s = + let acc = ref [] in + _lines s 0 (fun x -> acc := x :: !acc); + Box._make (Box.Text (List.rev !acc)) + +let lines l = + assert (List.for_all (fun s -> _find s '\n' 0 = None) l); + Box._make (Box.Text l) + +let int_ x = line (string_of_int x) +let float_ x = line (string_of_float x) +let bool_ x = line (string_of_bool x) + +let frame b = + Box._make (Box.Frame b) + +let grid ?(framed=true) m = + Box._make (Box.Grid ((if framed then Box.GridFramed else Box.GridBase), m)) + +let init_grid ?framed ~line ~col f = + let m = Array.init line (fun j-> Array.init col (fun i -> f ~line:j ~col:i)) in + grid ?framed m + +let vlist ?framed l = + let a = Array.of_list l in + grid ?framed (Array.map (fun line -> [| line |]) a) + +let hlist ?framed l = + grid ?framed [| Array.of_list l |] + +let hlist_map ?framed f l = hlist ?framed (List.map f l) +let vlist_map ?framed f l = vlist ?framed (List.map f l) +let grid_map ?framed f m = grid ?framed (Array.map (Array.map f) m) + +let grid_text ?framed m = grid_map ?framed text m + +let transpose m = + let dim = Box._dim_matrix m in + Array.init dim.x + (fun i -> Array.init dim.y (fun j -> m.(j).(i))) + +let _write_vline ~out pos n = + for j=0 to n-1 do + Output.put_char out (_move_y pos j) '|' + done + +let _write_hline ~out pos n = + for i=0 to n-1 do + Output.put_char out (_move_x pos i) '-' + done + +(* render given box on the output, starting with upper left corner + at the given position. [expected_size] is the size of the + available surrounding space *) +let rec _render ?expected_size ~out b pos = + match Box.shape b with + | Box.Line s -> Output.put_string out pos s + | Box.Text l -> + List.iteri + (fun i line -> + Output.put_string out (_move_y pos i) line + ) l + | Box.Frame b' -> + let {x;y} = Box.size b' in + Output.put_char out pos '+'; + Output.put_char out (_move pos (x+1) (y+1)) '+'; + Output.put_char out (_move pos 0 (y+1)) '+'; + Output.put_char out (_move pos (x+1) 0) '+'; + _write_hline out (_move_x pos 1) x; + _write_hline out (_move pos 1 (y+1)) x; + _write_vline out (_move_y pos 1) y; + _write_vline out (_move pos (x+1) 1) y; + _render ~out b' (_move pos 1 1) + | Box.Grid (grid_shape,m) -> + let dim = Box._dim_matrix m in + let lines, columns = Box._size_matrix m in + + (* write boxes *) + for j = 0 to dim.y - 1 do + for i = 0 to dim.x - 1 do + let expected_size = { + x=columns.(i+1)-columns.(i); + y=lines.(j+1)-lines.(j); + } in + let pos' = _move pos (columns.(i)) (lines.(j)) in + _render ~expected_size ~out m.(j).(i) pos' + done; + done; + + let len_hlines, len_vlines = match expected_size with + | None -> columns.(dim.x), lines.(dim.y) + | Some {x;y} -> x,y + in + + (* write frame if needed *) + begin match grid_shape with + | Box.GridBase -> () + | Box.GridFramed -> + for j=1 to dim.y - 1 do + _write_hline ~out (_move_y pos (lines.(j)-1)) len_hlines + done; + for i=1 to dim.x - 1 do + _write_vline ~out (_move_x pos (columns.(i)-1)) len_vlines + done; + for j=1 to dim.y - 1 do + for i=1 to dim.x - 1 do + Output.put_char out (_move pos (columns.(i)-1) (lines.(j)-1)) '+' + done + done + end + +let render out b = + _render ~out b origin + +let to_string b = + let buf, out = Output.make_buffer () in + render out b; + Output.buf_to_lines buf + +let output ?(indent=0) oc b = + let buf, out = Output.make_buffer () in + render out b; + Output.buf_output ~indent oc buf; + flush oc diff --git a/misc/printBox.mli b/misc/printBox.mli new file mode 100644 index 00000000..b6286254 --- /dev/null +++ b/misc/printBox.mli @@ -0,0 +1,157 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Pretty-Printing of nested Boxes} + +Allows to print nested boxes, lists, arrays, tables in a nice way +on any monospaced support. + +{[ + # let b = PrintBox.( + frame + (vlist [ line "hello"; + hlist [line "world"; line "yolo"]]) + );; +val b : Box.t = +# PrintBox.output ~indent:2 stdout b;; + +----------+ + |hello | + |----------| + |world|yolo| + +----------+ +- : unit = () +# let b2 = PrintBox.( + frame + (hlist [ text "I love\nto\npress\nenter"; + grid_text [| [|"a"; "bbb"|]; + [|"c"; "hello world"|] |]]) + );; +val b2 : PrintBox.Box.t = +# PrintBox.output stdout b2;; ++--------------------+ +|I love|a|bbb | +|to |-+-----------| +|press |c|hello world| +|enter | | | ++--------------------+ + +- : unit = () + +]} + +*) + +type position = { x:int ; y: int } +(** Positions are relative to the upper-left corner, that is, +when [x] increases we go toward the right, and when [y] increases +we go toward the bottom (same order as a printer) *) + +val origin : position +(** Initial position *) + +module Output : sig + type t = { + put_char : position -> char -> unit; + put_string : position -> string -> unit; + put_sub_string : position -> string -> int -> int -> unit; + flush : unit -> unit; + } + + (** {6 Default Instance: a buffer} *) + + type buffer + + val make_buffer : unit -> buffer * t + (** New buffer, and the corresponding output (buffers are mutable) *) + + val buf_to_lines : ?indent:int -> buffer -> string + (** Print the content of the buffer into a string. + @param indent number of spaces to insert in front of the lines *) + + val buf_output : ?indent:int -> out_channel -> buffer -> unit + (** Print the buffer on the given channel *) +end + +module Box : sig + type t + + val size : t -> position + (** Size needed to print the box *) +end + +val line : string -> Box.t +(** Make a single-line box. + @raise Invalid_argument if the string contains ['\n'] *) + +val text : string -> Box.t +(** Any text, possibly with several lines *) + +val lines : string list -> Box.t +(** Shortcut for {!text}, with a list of lines *) + +val int_ : int -> Box.t + +val bool_ : bool -> Box.t + +val float_ : float -> Box.t + +val frame : Box.t -> Box.t +(** Put a single frame around the box *) + +val grid : ?framed:bool -> Box.t array array -> Box.t +(** Grid of boxes (no frame between boxes). The matrix is indexed + with lines first, then columns. The array must be a proper matrix, + that is, all lines must have the same number of columns! + @param framed if [true], each item of the grid will be framed. + default value is [true] *) + +val grid_text : ?framed:bool -> string array array -> Box.t +(** Same as {!grid}, but wraps every cell into a {!text} box *) + +val transpose : 'a array array -> 'a array array +(** Transpose a matrix *) + +val init_grid : ?framed:bool -> + line:int -> col:int -> (line:int -> col:int -> Box.t) -> Box.t +(** Same as {!grid} but takes the matrix as a function *) + +val vlist : ?framed:bool -> Box.t list -> Box.t +(** Vertical list of boxes *) + +val hlist : ?framed:bool -> Box.t list -> Box.t +(** Horizontal list of boxes *) + +val grid_map : ?framed:bool -> ('a -> Box.t) -> 'a array array -> Box.t + +val vlist_map : ?framed:bool -> ('a -> Box.t) -> 'a list -> Box.t + +val hlist_map : ?framed:bool -> ('a -> Box.t) -> 'a list -> Box.t + +val render : Output.t -> Box.t -> unit + +val to_string : Box.t -> string + +val output : ?indent:int -> out_channel -> Box.t -> unit diff --git a/misc/qCheck.ml b/misc/qCheck.ml deleted file mode 100644 index b971ad7f..00000000 --- a/misc/qCheck.ml +++ /dev/null @@ -1,335 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {6 Quickcheck inspired property-based testing} *) - -module Arbitrary = struct - type 'a t = Random.State.t -> 'a - - let return x st = x - - let int n st = Random.State.int st n - - let int_range ~start ~stop st = - let n = stop - start in - if n <= 0 - then 0 - else start + Random.State.int st n - - let (--) start stop = int_range ~start ~stop - - let small_int = int 100 - - let split_int gen st = - let n = gen st in - if n > 0 - then let i = Random.State.int st (n+1) in i, n-i - else 0, 0 - - let bool = Random.State.bool - - let float f st = Random.State.float st f - - let char st = Char.chr (Random.State.int st 128) - - let alpha st = - Char.chr (Char.code 'a' + Random.State.int st (Char.code 'z' - Char.code 'a')) - - let string_len len st = - let n = len st in - assert (n>=0); - let s = String.create n in - for i = 0 to n-1 do - s.[i] <- alpha st - done; - s - - let string st = string_len (int 10) st - - let map ar f st = f (ar st) - - let rec _make_list ar st acc n = - if n = 0 then acc else - let x = ar st in - _make_list ar st (x::acc) (n-1) - - let list ?(len=int 10) ar st = - let n = len st in - _make_list ar st [] n - - let opt ar st = - if Random.State.bool st - then Some (ar st) - else None - - let list_repeat len ar st = - _make_list ar st [] len - - let array ?(len=int 10) ar st = - let n = len st in - Array.init n (fun _ -> ar st) - - let array_repeat n ar st = - Array.init n (fun _ -> ar st) - - let among_array a st = - if Array.length a < 1 - then failwith "Arbitrary.among: cannot choose in empty array "; - let i = Random.State.int st (Array.length a) in - a.(i) - - let among l = - if List.length l < 1 - then failwith "Arbitrary.among: cannot choose in empty list"; - among_array (Array.of_list l) - - let choose l = match l with - | [] -> failwith "cannot choose from empty list" - | [x] -> x - | _ -> - let a = Array.of_list l in - fun st -> - let i = Random.State.int st (Array.length a) in - a.(i) st - - let fix ?(max=15) ~base f = - let rec ar = lazy - (fun depth st -> - if depth >= max || Random.State.int st max < depth - then base st (* base case. THe deeper, the more likely. *) - else (* recurse *) - let ar' = Lazy.force ar (depth+1) in - f ar' st) - in - Lazy.force ar 0 - - let fix_depth ~depth ~base f st = - let max = depth st in - fix ~max ~base f st - - let rec retry gen st = match gen st with - | None -> retry gen st - | Some x -> x - - let lift f a st = f (a st) - - let lift2 f a b st = f (a st) (b st) - - let lift3 f a b c st = f (a st) (b st) (c st) - - let lift4 f a b c d st = f (a st) (b st) (c st) (d st) - - let pair a b = lift2 (fun x y -> x,y) a b - - let triple a b c = lift3 (fun x y z -> x,y,z) a b c - - let quad a b c d = lift4 (fun x y z w -> x,y,z,w) a b c d - - let (>>=) a f st = - let x = a st in - f x st - - let generate ?(n=100) ?(rand=Random.State.make_self_init()) gen = - let l = ref [] in - for i = 0 to n-1 do - l := (gen rand) :: !l - done; - !l -end - -(** {2 Pretty printing} *) - -module PP = struct - type 'a t = 'a -> string - - let int = string_of_int - let bool = string_of_bool - let float = string_of_float - let string s = s - let char c = - let s = "_" in - s.[0] <- c; - s - - let pair a b (x,y) = Printf.sprintf "(%s, %s)" (a x) (b y) - let triple a b c (x,y,z) = Printf.sprintf "(%s, %s, %s)" (a x) (b y) (c z) - let quad a b c d (x,y,z,w) = - Printf.sprintf "(%s, %s, %s, %s)" (a x) (b y) (c z) (d w) - - let list pp l = - let b = Buffer.create 25 in - Buffer.add_char b '['; - List.iteri (fun i x -> - if i > 0 then Buffer.add_string b ", "; - Buffer.add_string b (pp x)) - l; - Buffer.add_char b ']'; - Buffer.contents b - - let array pp a = - let b = Buffer.create 25 in - Buffer.add_string b "[|"; - Array.iteri (fun i x -> - if i > 0 then Buffer.add_string b ", "; - Buffer.add_string b (pp x)) - a; - Buffer.add_string b "|]"; - Buffer.contents b -end - -(** {2 Testing} *) - -module Prop = struct - type 'a t = 'a -> bool - - exception PrecondFail - - let assume p = - if not p then raise PrecondFail - - let assume_lazy (lazy p) = - if not p then raise PrecondFail - - let (==>) a b = - fun x -> - assume (a x); - b x - - let (&&&) a b x = a x && b x - - let (|||) a b x = a x || b x - - let (!!!) a x = not (a x) -end - -type 'a result = - | Ok of int * int (* total number / precond failed *) - | Failed of 'a list - | Error of 'a option * exn - -(* random seed, for repeatability of tests *) -let __seed = [| 89809344; 994326685; 290180182 |] - -let check ?(rand=Random.State.make __seed) ?(n=100) gen prop = - let precond_failed = ref 0 in - let failures = ref [] in - let inst = ref None in - try - for i = 0 to n - 1 do - let x = gen rand in - inst := Some x; - try - if not (prop x) - then failures := x :: !failures - with Prop.PrecondFail -> - incr precond_failed - done; - match !failures with - | [] -> Ok (n, !precond_failed) - | _ -> Failed (!failures) - with e -> - Error (!inst, e) - -(** {2 Main} *) - -type 'a test_cell = { - n : int; - pp : 'a PP.t option; - prop : 'a Prop.t; - gen : 'a Arbitrary.t; - name : string; - limit : int; - size : ('a -> int) option; -} -type test = - | Test : 'a test_cell -> test - (** GADT needed for the existential type *) - -let mk_test ?(n=100) ?pp ?(name="") ?size ?(limit=10) gen prop = - if limit < 0 then failwith "QCheck: limit needs be >= 0"; - if n <= 0 then failwith "QCheck: n needs be >= 0"; - Test { prop; gen; name; n; pp; size; limit; } - -(* tail call version of take, that returns (at most) [n] elements of [l] *) -let rec _list_take acc l n = match l, n with - | _, 0 - | [], _ -> List.rev acc - | x::l', _ -> _list_take (x::acc) l' (n-1) - -let run ?(out=stdout) ?(rand=Random.State.make __seed) (Test test) = - Printf.fprintf out "testing property %s...\n" test.name; - match check ~rand ~n:test.n test.gen test.prop with - | Ok (n, prefail) -> - Printf.fprintf out " [✔] passed %d tests (%d preconditions failed)\n" n prefail; - true - | Failed l -> - begin match test.pp with - | None -> Printf.fprintf out " [×] %d failures over %d\n" (List.length l) test.n - | Some pp -> - Printf.fprintf out " [×] %d failures over %d (print at most %d):\n" - (List.length l) test.n test.limit; - let to_print = match test.size with - | None -> l - | Some size -> - (* sort by increasing size *) - let l = List.map (fun x -> x, size x) l in - let l = List.sort (fun (x,sx) (y,sy) -> sx - sy) l in - List.map fst l - in - (* only keep [limit] counter examples *) - let to_print = _list_take [] to_print test.limit in - (* print the counter examples *) - List.iter - (fun x -> Printf.fprintf out " %s\n" (pp x)) - to_print - end; - false - | Error (inst, e) -> - begin match inst, test.pp with - | _, None - | None, _ -> Printf.fprintf out " [×] error: %s\n" (Printexc.to_string e); - | Some x, Some pp -> - (* print instance on which the error occurred *) - Printf.fprintf out " [×] error on instance %s: %s\n" - (pp x) (Printexc.to_string e); - end; - false - -type suite = test list - -let flatten = List.flatten - -let run_tests ?(out=stdout) ?(rand=Random.State.make __seed) l = - let start = Unix.gettimeofday () in - let n = List.length l in - let failed = ref 0 in - Printf.fprintf out "check %d properties...\n" (List.length l); - List.iter (fun test -> if not (run ~out ~rand test) then incr failed) l; - Printf.fprintf out "tests run in %.2fs\n" (Unix.gettimeofday() -. start); - if !failed = 0 - then Printf.fprintf out "[✔] Success! (passed %d tests)\n" n - else Printf.fprintf out "[×] Failure. (%d tests failed over %d)\n" !failed n; - !failed = 0 diff --git a/misc/qCheck.mli b/misc/qCheck.mli deleted file mode 100644 index 7c3c006e..00000000 --- a/misc/qCheck.mli +++ /dev/null @@ -1,267 +0,0 @@ - -(* -copyright (c) 2013, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Quickcheck inspired property-based testing} *) - -(** The library takes inspiration from Haskell's QuickCheck library. The -rough idea is that the programer describes invariants that values of -a certain type need to satisfy ("properties"), as functions from this type -to bool. She also needs to desribe how to generate random values of the type, -so that the property is tried and checked on a number of random instances. - -This explains the organization of this module: - -- {! Arbitrary} is used to describe how to generate random values. An - ['a Arbitrary.t] is a random generator of values of type 'a. -- {! Prop} is used to describe and combine properties. Especially interesting - is [Prop.(==>)], that is such that [a ==> b] only checks the property [b] - on a value [x] if [a x] holds (precondition). -- {! PP} describes a few combinators to print values. This is used when a - property fails on some instances, to print the failing instances. - -Then, a few functions are provided to test properties. Optional parameters -allow to specify the random generator, the printer for failing cases, the -number of instances to generate and test... - - -Examples: - - - List.rev is involutive: - -{[ -let test = QCheck.mk_test ~n:1000 QCheck.Arbitrary.(list alpha) - (fun l -> List.rev (List.rev l) = l);; -QCheck.run test;; -]} - - Not all lists are sorted (false property that will fail. The 15 smallest - counter-example lists will be printed): - -{[ -let test = QCheck.( - mk_test - ~n:10_000 ~size:List.length ~limit:15 ~pp:QCheck.PP.(list int) - QCheck.Arbitrary.(list small_int) - (fun l -> l = List.sort compare l));; -QCheck.run test;; -]} - - - - generate 20 random trees using {! Arbitrary.fix} : - -{[type tree = Int of int | Node of tree list;; - - let ar = QCheck.Arbitrary.(fix ~max:10 - ~base:(map small_int (fun i -> Int i)) - (fun t st -> Node (list t st)));; - - Arbitrary.generate ~n:20 ar;; - ]} -*) - -(** {2 Description of how to generate arbitrary values for some type} *) - -module Arbitrary : sig - type 'a t = Random.State.t -> 'a - (** A generator of arbitrary values of type 'a *) - - val return : 'a -> 'a t - (** Return always the same value (e.g. [4]) *) - - val int : int -> int t - (** Any integer between 0 (inclusive) and the given higher bound (exclusive) *) - - val int_range : start:int -> stop:int -> int t - (* Integer range start .. stop-1 *) - - val (--) : int -> int -> int t - (** Infix synonym for {!int_range} *) - - val small_int : int t - (** Ints lower than 100 *) - - val split_int : int t -> (int * int) t - (** [split_int gen] generates a number [n] from [gen], and - returns [i, j] where [i + j = n] *) - - val bool : bool t - (** Arbitrary boolean *) - - val char : char t - (** A (printable) char *) - - val alpha : char t - (** Alphabetic char *) - - val float : float -> float t - (** Random float *) - - val string : string t - (** Random strings of small length *) - - val string_len : int t -> string t - (** String of random length *) - - val map : 'a t -> ('a -> 'b) -> 'b t - (** Transform an arbitrary into another *) - - val list : ?len:int t -> 'a t -> 'a list t - (** List of arbitrary length. Default [len] is between 0 and 10. *) - - val opt : 'a t -> 'a option t - (** May return a value, or None *) - - val pair : 'a t -> 'b t -> ('a * 'b) t - - val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t - - val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a * 'b * 'c * 'd) t - - val list_repeat : int -> 'a t -> 'a list t - (** Lists of given length exactly *) - - val array : ?len:int t -> 'a t -> 'a array t - (** Random array of random length *) - - val array_repeat : int -> 'a t -> 'a array t - (** Random array of given length *) - - val among : 'a list -> 'a t - (** Choose an element among those of the list *) - - val among_array : 'a array -> 'a t - (** Choose in the array *) - - val choose : 'a t list -> 'a t - (** Choice among combinations *) - - val fix : ?max:int -> base:'a t -> ('a t -> 'a t) -> 'a t - (** Recursive arbitrary values. The optional value [max] defines - the maximal depth, if needed (default 15). [base] is the base case. *) - - val fix_depth : depth:int t -> base:'a t -> ('a t -> 'a t) -> 'a t - (** Recursive values of at most given random depth *) - - val lift : ('a -> 'b) -> 'a t -> 'b t - val lift2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t - val lift3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t - val lift4 : ('a -> 'b -> 'c -> 'd -> 'e) -> 'a t -> 'b t -> 'c t -> 'd t -> 'e t - - val (>>=) : 'a t -> ('a -> 'b t) -> 'b t - (** Monadic bind *) - - val retry : 'a option t -> 'a t - (** CCGenerate until a Some value is returned *) - - val generate : ?n:int -> ?rand:Random.State.t -> 'a t -> 'a list - (** CCGenerate [n] random values of the given type *) -end - -(** {2 Pretty printing} *) - -module PP : sig - type 'a t = 'a -> string - - val int : int t - val bool : bool t - val float : float t - val char : char t - val string : string t - - val pair : 'a t -> 'b t -> ('a*'b) t - val triple : 'a t -> 'b t -> 'c t -> ('a*'b*'c) t - val quad : 'a t -> 'b t -> 'c t -> 'd t -> ('a*'b*'c*'d) t - - val list : 'a t -> 'a list t - val array : 'a t -> 'a array t -end - -(** {2 Testing} *) - -module Prop : sig - type 'a t = 'a -> bool - - val (==>) : ('a -> bool) -> 'a t -> 'a t - (** Precondition for a test *) - - val assume : bool -> unit - (** Assume the given precondition holds. A test won't fail if the - precondition (the boolean argument) is false, but it will be - discarded. Running tests counts how many instances were - discarded for not satisfying preconditions. *) - - val assume_lazy : bool lazy_t -> unit - (** Assume the given (lazy) precondition holds. See {!assume}. *) - - val (&&&) : 'a t -> 'a t -> 'a t - (** Logical 'and' on tests *) - - val (|||) : 'a t -> 'a t -> 'a t - (** Logical 'or' on tests *) - - val (!!!) : 'a t -> 'a t - (** Logical 'not' on tests *) -end - -type 'a result = - | Ok of int * int (** total number of tests / number of failed preconditions *) - | Failed of 'a list (** Failed instances *) - | Error of 'a option * exn (** Error, and possibly instance that triggered it *) - -val check : ?rand:Random.State.t -> ?n:int -> - 'a Arbitrary.t -> 'a Prop.t -> 'a result - (** Check that the property [prop] holds on [n] random instances of the type - 'a, as generated by the arbitrary instance [gen] *) - -(** {2 Main} *) - -type test - (** A single property test *) - -val mk_test : ?n:int -> ?pp:'a PP.t -> ?name:string -> - ?size:('a -> int) -> ?limit:int -> - 'a Arbitrary.t -> 'a Prop.t -> test - (** Construct a test. Optional parameters are the same as for {!run}. - @param name is the name of the property that is checked - @param pp is a pretty printer for failing instances - @out is the channel to print results onto - @n is the number of tests (default 100) - @rand is the random generator to use - @size is a size function on values on which tests are performed. If - the test fails and a size function is given, the smallest - counter-examples with respect to [size] will be printed in priority. - @limit maximal number of counter-examples that will get printed. - Default is [10]. *) - -val run : ?out:out_channel -> ?rand:Random.State.t -> test -> bool - (** Run a test and print results *) - -type suite = test list - (** A test suite is a list of tests *) - -val flatten : suite list -> suite - -val run_tests : ?out:out_channel -> ?rand:Random.State.t -> suite -> bool - (** Run a suite of tests, and print its results *) diff --git a/string/levenshtein.ml b/string/levenshtein.ml index 9aec05c8..cf9b4f9d 100644 --- a/string/levenshtein.ml +++ b/string/levenshtein.ml @@ -37,15 +37,15 @@ module type STRING = sig end (** Continuation list *) -type 'a klist = +type 'a klist = unit -> [ | `Nil - | `Cons of 'a * (unit -> 'a klist) + | `Cons of 'a * 'a klist ] -let rec klist_to_list = function +let rec klist_to_list l = match l () with | `Nil -> [] - | `Cons (x,k) -> x :: klist_to_list (k ()) + | `Cons (x,k) -> x :: klist_to_list k module type S = sig type char_ @@ -568,7 +568,7 @@ module Make(Str : STRING) = struct let dfa = of_string ~limit s in (* traverse at index i in automaton, with [fk] the failure continuation *) - let rec traverse node i ~(fk:unit->'a klist) = + let rec traverse node i ~(fk:'a klist) () = match node with | Node (opt, m) -> (* all alternatives: continue exploring [m], or call [fk] *) @@ -577,7 +577,7 @@ module Make(Str : STRING) = struct (fun c node' fk -> try let next = __transition dfa i c in - (fun () -> traverse node' next ~fk) + traverse node' next ~fk with Not_found -> fk) m fk in @@ -617,13 +617,13 @@ module Make(Str : STRING) = struct fold (fun acc str v -> (str,v) :: acc) [] idx let to_klist idx = - let rec traverse node trail ~(fk:unit->(string_*'a) klist) = + let rec traverse node trail ~(fk:(string_*'a) klist) () = match node with | Node (opt, m) -> (* all alternatives: continue exploring [m], or call [fk] *) let fk = M.fold - (fun c node' fk () -> traverse node' (c::trail) ~fk) + (fun c node' fk -> traverse node' (c::trail) ~fk) m fk in match opt with @@ -650,6 +650,13 @@ end) let debug_print = debug_print output_char +(*$T + edit_distance "foo" "fo0" = 1 + edit_distance "foob" "foo" = 1 + edit_distance "yolo" "yoyo" = 1 + edit_distance "aaaaaaab" "aaaa" = 4 +*) + (* open Batteries;; let words = File.with_file_in "/usr/share/dict/cracklib-small" (fun i -> IO.read_all i |> String.nsplit ~by:"\\n");; diff --git a/string/levenshtein.mli b/string/levenshtein.mli index d571415e..9affac48 100644 --- a/string/levenshtein.mli +++ b/string/levenshtein.mli @@ -58,13 +58,13 @@ strings, we return a continuation list so that, even if there are many results, only those actually asked for are evaluated. *) type 'a klist = - [ + unit -> [ | `Nil - | `Cons of 'a * (unit -> 'a klist) + | `Cons of 'a * 'a klist ] val klist_to_list : 'a klist -> 'a list - (** Helper. *) + (** Helper for short lists. *) (** {2 Signature} diff --git a/tests/bench_batch.ml b/tests/bench_batch.ml new file mode 100644 index 00000000..1608cfb2 --- /dev/null +++ b/tests/bench_batch.ml @@ -0,0 +1,89 @@ +(** benchmark CCBatch *) + +module type COLL = sig + val name : string + include CCBatch.COLLECTION + val doubleton : 'a -> 'a -> 'a t + val (--) : int -> int -> int t + val equal : int t -> int t -> bool +end + +module Make(C : COLL) = struct + let f1 x = x mod 2 = 0 + let f2 x = -x + let f3 x = C.doubleton x (x+1) + let f4 x = -x + let collect a = C.fold (+) 0 a + + let naive a = + let a = C.filter f1 a in + let a = C.flat_map f3 a in + let a = C.filter f1 a in + let a = C.map f2 a in + let a = C.flat_map f3 a in + let a = C.map f4 a in + ignore (collect a); + a + + module BA = CCBatch.Make(C) + + let ops = + BA.(filter f1 >>> flat_map f3 >>> filter f1 >>> map f2 >>> flat_map f3 >>> map f4) + + let batch a = + let a = BA.apply ops a in + ignore (collect a); + a + + let bench_for ~time n = + Printf.printf "\n\nbenchmark for %s of len %d\n" C.name n; + flush stdout; + let a = C.(0 -- n) in + (* debug + CCPrint.printf "naive: %a\n" (CCArray.pp CCInt.pp) (naive a); + CCPrint.printf "simple: %a\n" (CCArray.pp CCInt.pp) (batch_simple a); + CCPrint.printf "batch: %a\n" (CCArray.pp CCInt.pp) (batch a); + *) + assert (C.equal (batch a) (naive a)); + let res = Benchmark.throughputN time + [ C.name ^ "_naive", naive, a + ; C.name ^ "_batch", batch, a + ] + in + Benchmark.tabulate res + + let bench () = + bench_for 1 100; + bench_for 4 100_000; + bench_for 4 1_000_000; + () +end + +module BenchArray = Make(struct + include CCArray + let name = "array" + let equal a b = a=b + let doubleton x y = [| x; y |] + let fold = Array.fold_left +end) + +module BenchList = Make(struct + include CCList + let name = "list" + let equal a b = a=b + let doubleton x y = [ x; y ] + let fold = List.fold_left +end) + +module BenchKList = Make(struct + include CCKList + let name = "klist" + let equal a b = equal (=) a b + let doubleton x y = CCKList.of_list [ x; y ] +end) + +let () = + BenchArray.bench(); + BenchList.bench(); + BenchKList.bench (); + ()