diff --git a/.ocamlinit b/.ocamlinit index 3a2564f1..e97f1fcf 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -27,11 +27,9 @@ #load "containers_string.cma";; #load "containers_pervasives.cma";; #load "containers_bigarray.cma";; -#load "containers_misc.cma";; #load "containers_top.cma";; #thread;; #load "containers_thread.cma";; -open Containers_misc;; #install_printer CCSexp.print;; (* vim:syntax=ocaml: *) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 8611b834..43e557f2 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -1,5 +1,60 @@ = Changelog +== 0.14 + +=== breaking changes + +- change the type `'a CCParse.t` with continuations +- add labels on `CCParse.parse_*` functions +- change semantics of `CCList.Zipper.is_empty` + +=== other changes + +- deprecate `CCVector.rev'`, renamed into `CCVector.rev_in_place` +- deprecate `CCVector.flat_map'`, renamed `flat_map_seq` + +- add `CCMap.add_{list,seq}` +- add `CCSet.add_{list,seq}` +- fix small uglyness in `Map.print` and `Set.print` +- add `CCFormat.{ksprintf,string_quoted}` +- add `CCArray.sort_generic` for sorting over array-like structures in place +- add `CCHashtbl.add` mimicking the stdlib `Hashtbl.add` +- add `CCString.replace` and tests +- add `CCPersistentHashtbl.stats` +- reimplementation of `CCPersistentHashtbl` +- add `make watch` target +- add `CCVector.rev_iter` +- add `CCVector.append_list` +- add `CCVector.ensure_with` +- add `CCVector.return` +- add `CCVector.find_map` +- add `CCVector.flat_map_list` +- add `Containers.Hashtbl` with most combinators of `CCHashtbl` +- many more functions in `CCList.Zipper` +- large update of `CCList.Zipper` +- add `CCHashtbl.update` +- improve `CCHashtbl.MakeCounter` +- add `CCList.fold_flat_map` +- add module `CCChar` +- add functions in `CCFormat` +- add `CCPrint.char` +- add `CCVector.to_seq_rev` +- doc and tests for `CCLevenshtein` +- expose blocking decoder in `CCSexpM` +- add `CCList.fold_map` +- add `CCError.guard_str_trace` +- add `CCError.of_exn_trace` +- add `CCKlist.memoize` for costly computations +- add `CCLevenshtein.Index.{of,to}_{gen,seq}` and `cardinal` + +- small bugfix in `CCSexpM.print` +- fix broken link to changelog (fix #51) +- fix doc generation for `containers.string` +- bugfix in `CCString.find` +- raise exception in `CCString.replace` if `sub=""` +- bugfix in hashtable printing +- bugfix in `CCKList.take`, it was slightly too eager + == 0.13 === Breaking changes diff --git a/Makefile b/Makefile index 25a86594..66d1cb5e 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ examples: all ocamlbuild $(OPTIONS) -package unix -I . $(EXAMPLES) push_doc: doc - scp -r containers.docdir/* cedeela.fr:~/simon/root/software/containers/ + rsync -tavu containers.docdir/* cedeela.fr:~/simon/root/software/containers/ DONTTEST=myocamlbuild.ml setup.ml $(wildcard src/**/*.cppo.*) QTESTABLE=$(filter-out $(DONTTEST), \ @@ -123,4 +123,10 @@ devel: --enable-bigarray --enable-thread --enable-advanced make all +watch: + while find src/ -print0 | xargs -0 inotifywait -e delete_self -e modify ; do \ + echo "============ at `date` ==========" ; \ + make ; \ + done + .PHONY: examples push_doc tags qtest-gen qtest-clean devel update_next_tag diff --git a/README.adoc b/README.adoc index 336a1d7c..8689a4a3 100644 --- a/README.adoc +++ b/README.adoc @@ -37,13 +37,13 @@ What is _containers_? Some of the modules have been moved to their own repository (e.g. `sequence`, `gen`, `qcheck`) and are on opam for great fun and profit. -image:http://ci.cedeela.fr/buildStatus/icon?job=containers[alt="Build Status", link="http://ci.cedeela.fr/job/containers/"] +image:https://ci.cedeela.fr/buildStatus/icon?job=containers[alt="Build Status", link="http://ci.cedeela.fr/job/containers/"] toc::[] == Change Log -See link:CHANGELOG.md[this file]. +See link:CHANGELOG.adoc[this file]. == Finding help @@ -89,6 +89,24 @@ The library contains a <> that mostly extends the stdlib and adds a few very common structures (heap, vector), and sub-libraries that deal with either more specific things, or require additional dependencies. +Some structural types are used throughout the library: + +gen:: `'a gen = unit -> 'a option` is an iterator type. Many combinators + are defined in the opam library https://github.com/c-cube/gen[gen] +sequence:: `'a sequence = (unit -> 'a) -> unit` is also an iterator type. + It is easier to define on data structures than `gen`, but it a bit less + powerful. The opam library https://github.com/c-cube/sequence[sequence] + can be used to consume and produce values of this type. +error:: `'a or_error = [`Error of string | `Ok of 'a]` is a error type + that is used in other libraries, too. The reference module in containers + is `CCError`. +klist:: `'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]` is a lazy list + without memoization, used as a persistent iterator. The reference + module is `CCKList` (in `containers.iter`). +printer:: `'a printer = Format.formatter -> 'a -> unit` is a pretty-printer + to be used with the standard module `Format`. In particular, in many cases, + `"foo: %a" Foo.print foo` will type-check. + [[core]] === Core Modules (extension of the standard library) @@ -117,6 +135,8 @@ Documentation http://cedeela.fr/~simon/software/containers[here]. - `CCError` (monadic error handling, very useful) - `CCIO`, basic utilities for IO (channels, files) - `CCInt64,` utils for `int64` +- `CCChar`, utils for `char` +- `CCFormat`, pretty-printing utils around `Format` === Containers.data diff --git a/_oasis b/_oasis index 82e584fd..0aa8b651 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.13 +Version: 0.14 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause @@ -45,8 +45,8 @@ Library "containers" Path: src/core Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCInt, CCBool, CCFloat, CCArray, CCRef, CCSet, - CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, CCIO, CCInt64, - Containers + CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, CCIO, + CCInt64, CCChar, Containers BuildDepends: bytes # BuildDepends: bytes, bisect_ppx @@ -142,7 +142,7 @@ Document containers "-docflags '-colorize-code -short-functors -charset utf-8'" XOCamlbuildLibraries: containers, containers.iter, containers.data, - containers.string, containers.bigarray, + containers.string, containers.bigarray, containers.thread, containers.advanced, containers.io, containers.unix, containers.sexp Executable run_benchs diff --git a/_tags b/_tags index a99a01e6..805fc84c 100644 --- a/_tags +++ b/_tags @@ -148,6 +148,6 @@ true: annot, bin_annot : thread : thread : inline(25) - or : inline(15) + or or : inline(15) and not : warn_A, warn(-4), warn(-44) true: no_alias_deps, safe_string diff --git a/benchs/ref_impl.ml b/benchs/ref_impl.ml new file mode 100644 index 00000000..04fca7d2 --- /dev/null +++ b/benchs/ref_impl.ml @@ -0,0 +1,346 @@ + +(* reference implementations for some structures, for comparison purpose *) + +module PersistentHashtbl(H : Hashtbl.HashedType) = struct + module Table = Hashtbl.Make(H) + (** Imperative hashtable *) + + type key = H.t + type 'a t = 'a zipper ref + and 'a zipper = + | Table of 'a Table.t (** Concrete table *) + | Add of key * 'a * 'a t (** Add key *) + | Replace of key * 'a * 'a t (** Replace key by value *) + | Remove of key * 'a t (** As the table, but without given key *) + + let create i = + ref (Table (Table.create i)) + + let empty () = create 11 + + (* pass continuation to get a tailrec rerooting *) + let rec _reroot t k = match !t with + | Table tbl -> k tbl (* done *) + | Add (key, v, t') -> + _reroot t' + (fun tbl -> + t' := Remove (key, t); + Table.add tbl key v; + t := Table tbl; + k tbl) + | Replace (key, v, t') -> + _reroot t' + (fun tbl -> + let v' = Table.find tbl key in + t' := Replace (key, v', t); + t := Table tbl; + Table.replace tbl key v; + k tbl) + | Remove (key, t') -> + _reroot t' + (fun tbl -> + let v = Table.find tbl key in + t' := Add (key, v, t); + t := Table tbl; + Table.remove tbl key; + k tbl) + + (* Reroot: modify the zipper so that the current node is a proper + hashtable, and return the hashtable *) + let reroot t = match !t with + | Table tbl -> tbl + | _ -> _reroot t (fun x -> x) + + let is_empty t = Table.length (reroot t) = 0 + + let find t k = Table.find (reroot t) k + + (*$R + let h = H.of_seq my_seq in + OUnit.assert_equal "a" (H.find h 1); + OUnit.assert_raises Not_found (fun () -> H.find h 5); + let h' = H.replace h 5 "e" in + OUnit.assert_equal "a" (H.find h' 1); + OUnit.assert_equal "e" (H.find h' 5); + OUnit.assert_equal "a" (H.find h 1); + OUnit.assert_raises Not_found (fun () -> H.find h 5); + *) + + (*$R + let n = 10000 in + let seq = Sequence.map (fun i -> i, string_of_int i) Sequence.(0--n) in + let h = H.of_seq seq in + Sequence.iter + (fun (k,v) -> + OUnit.assert_equal ~printer:(fun x -> x) v (H.find h k)) + seq; + OUnit.assert_raises Not_found (fun () -> H.find h (n+1)); + *) + + (*$QR + _list_int_int + (fun l -> + let h = H.of_list l in + List.for_all + (fun (k,v) -> + try + H.find h k = v + with Not_found -> false) + l + ) + *) + + let get_exn k t = find t k + + let get k t = + try Some (find t k) + with Not_found -> None + + let mem t k = Table.mem (reroot t) k + + let length t = Table.length (reroot t) + + (*$R + let h = H.of_seq + Sequence.(map (fun i -> i, string_of_int i) + (0 -- 200)) in + OUnit.assert_equal 201 (H.length h); + *) + + (*$QR + _list_int_int (fun l -> + let h = H.of_list l in + H.length h = List.length l + ) + *) + + let replace t k v = + let tbl = reroot t in + (* create the new hashtable *) + let t' = ref (Table tbl) in + (* update [t] to point to the new hashtable *) + (try + let v' = Table.find tbl k in + t := Replace (k, v', t') + with Not_found -> + t := Remove (k, t') + ); + (* modify the underlying hashtable *) + Table.replace tbl k v; + t' + + let remove t k = + let tbl = reroot t in + try + let v' = Table.find tbl k in + (* value present, make a new hashtable without this value *) + let t' = ref (Table tbl) in + t := Add (k, v', t'); + Table.remove tbl k; + t' + with Not_found -> + (* not member, nothing to do *) + t + + (*$R + let h = H.of_seq my_seq in + OUnit.assert_equal (H.find h 2) "b"; + OUnit.assert_equal (H.find h 3) "c"; + OUnit.assert_equal (H.find h 4) "d"; + OUnit.assert_equal (H.length h) 4; + let h = H.remove h 2 in + OUnit.assert_equal (H.find h 3) "c"; + OUnit.assert_equal (H.length h) 3; + OUnit.assert_raises Not_found (fun () -> H.find h 2) + *) + + (*$R + let open Sequence.Infix in + let n = 10000 in + let seq = Sequence.map (fun i -> i, string_of_int i) (0 -- n) in + let h = H.of_seq seq in + OUnit.assert_equal (n+1) (H.length h); + let h = Sequence.fold (fun h i -> H.remove h i) h (0 -- 500) in + OUnit.assert_equal (n-500) (H.length h); + OUnit.assert_bool "is_empty" (H.is_empty (H.create 16)); + *) + + (*$QR + _list_int_int (fun l -> + let h = H.of_list l in + let h = List.fold_left (fun h (k,_) -> H.remove h k) h l in + H.is_empty h) + *) + + let update t k f = + let v = get k t in + match v, f v with + | None, None -> t (* no change *) + | Some _, None -> remove t k + | _, Some v' -> replace t k v' + + let copy t = + let tbl = reroot t in + (* no one will point to the new [t] *) + let t = ref (Table (Table.copy tbl)) in + t + + let iter t f = + let tbl = reroot t in + Table.iter f tbl + + let fold f acc t = + let tbl = reroot t in + Table.fold (fun k v acc -> f acc k v) tbl acc + + let map f t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter (fun k v -> Table.replace res k (f k v)) tbl; + ref (Table res) + + let filter p t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter (fun k v -> if p k v then Table.replace res k v) tbl; + ref (Table res) + + let filter_map f t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter + (fun k v -> match f k v with + | None -> () + | Some v' -> Table.replace res k v' + ) tbl; + ref (Table res) + + exception ExitPTbl + + let for_all p t = + try + iter t (fun k v -> if not (p k v) then raise ExitPTbl); + true + with ExitPTbl -> false + + let exists p t = + try + iter t (fun k v -> if p k v then raise ExitPTbl); + false + with ExitPTbl -> true + + let merge f t1 t2 = + let tbl = Table.create (max (length t1) (length t2)) in + iter t1 + (fun k v1 -> + let v2 = try Some (find t2 k) with Not_found -> None in + match f k (Some v1) v2 with + | None -> () + | Some v' -> Table.replace tbl k v'); + iter t2 + (fun k v2 -> + if not (mem t1 k) then match f k None (Some v2) with + | None -> () + | Some _ -> Table.replace tbl k v2); + ref (Table tbl) + + (*$R + let t1 = H.of_list [1, "a"; 2, "b1"] in + let t2 = H.of_list [2, "b2"; 3, "c"] in + let t = H.merge + (fun _ v1 v2 -> match v1, v2 with + | None, _ -> v2 + | _ , None -> v1 + | Some s1, Some s2 -> if s1 < s2 then Some s1 else Some s2) + t1 t2 + in + OUnit.assert_equal ~printer:string_of_int 3 (H.length t); + OUnit.assert_equal "a" (H.find t 1); + OUnit.assert_equal "b1" (H.find t 2); + OUnit.assert_equal "c" (H.find t 3); + *) + + let add_seq init seq = + let tbl = ref init in + seq (fun (k,v) -> tbl := replace !tbl k v); + !tbl + + let of_seq seq = add_seq (empty ()) seq + + let add_list init l = + add_seq init (fun k -> List.iter k l) + + (*$QR + _list_int_int (fun l -> + let l1, l2 = List.partition (fun (x,_) -> x mod 2 = 0) l in + let h1 = H.of_list l1 in + let h2 = H.add_list h1 l2 in + List.for_all + (fun (k,v) -> H.find h2 k = v) + l + && + List.for_all + (fun (k,v) -> H.find h1 k = v) + l1 + && + List.length l1 = H.length h1 + && + List.length l = H.length h2 + ) + *) + + let of_list l = add_list (empty ()) l + + let to_list t = + let tbl = reroot t in + let bindings = Table.fold (fun k v acc -> (k,v)::acc) tbl [] in + bindings + + (*$R + let h = H.of_seq my_seq in + let l = Sequence.to_list (H.to_seq h) in + OUnit.assert_equal my_list (List.sort compare l) + *) + + let to_seq t = + fun k -> + let tbl = reroot t in + Table.iter (fun x y -> k (x,y)) tbl + + (*$R + let h = H.of_seq my_seq in + OUnit.assert_equal "b" (H.find h 2); + OUnit.assert_equal "a" (H.find h 1); + OUnit.assert_raises Not_found (fun () -> H.find h 42); + *) + + let equal eq t1 t2 = + length t1 = length t2 + && + for_all + (fun k v -> match get k t2 with + | None -> false + | Some v' -> eq v v' + ) t1 + + let pp pp_k pp_v buf t = + Buffer.add_string buf "{"; + let first = ref true in + iter t + (fun k v -> + if !first then first:=false else Buffer.add_string buf ", "; + Printf.bprintf buf "%a -> %a" pp_k k pp_v v + ); + Buffer.add_string buf "}" + + let print pp_k pp_v fmt t = + Format.pp_print_string fmt "{"; + let first = ref true in + iter t + (fun k v -> + if !first then first:=false + else (Format.pp_print_string fmt ", "; Format.pp_print_cut fmt ()); + Format.fprintf fmt "%a -> %a" pp_k k pp_v v + ); + Format.pp_print_string fmt "}" +end diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 23fa0be3..17bcc401 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -26,7 +26,7 @@ module L = struct let map_naive () = ignore (try List.map f_ l with Stack_overflow -> []) and map_tailrec () = ignore (List.rev (List.rev_map f_ l)) and ccmap () = ignore (CCList.map f_ l) - and ralmap () = ignore (CCRAL.map f_ ral) + and ralmap () = ignore (CCRAL.map ~f:f_ ral) in B.throughputN time ~repeat [ "List.map", map_naive, () @@ -116,6 +116,50 @@ module L = struct ) end +module Arr = struct + let rand = Random.State.make [| 1;2;3;4 |] + + let mk_arr n = + Array.init n (fun _ -> Random.State.int rand 5_000) + + module IntArr = struct + type elt=int + type t = int array + let get = Array.get + let set = Array.set + let length = Array.length + end + + let sort_ccarray a = + CCArray.sort_generic (module IntArr) ~cmp:CCInt.compare a + + let sort_std a = Array.sort CCInt.compare a + + (* helper, to apply a sort function over a list of arrays *) + let app_list sort l = + List.iter + (fun a -> + let a = Array.copy a in + sort a + ) l + + let bench_sort ?(time=2) n = + let a1 = mk_arr n in + let a2 = mk_arr n in + let a3 = mk_arr n in + B.throughputN time ~repeat + [ "std", app_list sort_std, [a1;a2;a3] + ; "ccarray.sort_gen", app_list sort_ccarray, [a1;a2;a3] + ] + + let () = + B.Tree.register ("array" @>>> + [ "sort" @>> + app_ints (bench_sort ?time:None) [100; 1000; 10_000; 50_000; 100_000; 500_000] + ] + ) +end + module Vec = struct let f x = x+1 @@ -263,23 +307,40 @@ module Tbl = struct = fun key -> let (module Key), name = arg_make key in let module T = struct - let name = sprintf "hashtbl.make(%s)" name + let name = sprintf "hashtbl(%s)" name include Hashtbl.Make(Key) end in (module T) - let persistent_hashtbl = - let module T = CCPersistentHashtbl.Make(CCInt) in + let persistent_hashtbl_ref : type a. a key_type -> (module MUT with type key = a) + = fun key -> + let (module Key), name = arg_make key in + let module T = Ref_impl.PersistentHashtbl(Key) in let module U = struct - type key = int + type key = a type 'a t = 'a T.t ref - let name = "ccpersistent_hashtbl" + let name = sprintf "persistent_tbl_old(%s)" name let create _ = ref (T.empty ()) let find m k = T.find !m k let add m k v = m := T.replace !m k v let replace = add end in - (module U : INT_MUT) + (module U) + + let persistent_hashtbl : type a. a key_type -> (module MUT with type key = a) + = fun key -> + let (module Key), name = arg_make key in + let module T = CCPersistentHashtbl.Make(Key) in + let module U = struct + type key = a + type 'a t = 'a T.t ref + let name = sprintf "persistent_tbl(%s)" name + let create _ = ref (T.empty ()) + let find m k = T.find !m k + let add m k v = m := T.replace !m k v + let replace = add + end in + (module U) let hashtbl = let module T = struct @@ -376,7 +437,7 @@ module Tbl = struct let modules_int = [ hashtbl_make Int ; hashtbl - ; persistent_hashtbl + ; persistent_hashtbl Int (* ; poly_hashtbl *) ; map Int ; wbt Int @@ -391,11 +452,12 @@ module Tbl = struct ; map Str ; wbt Str ; hashtrie Str + ; persistent_hashtbl Str ; hamt Str ; trie ] - let bench_add n = + let bench_add_to which n = let make (module T : INT_MUT) = let run() = let t = T.create 50 in @@ -405,9 +467,11 @@ module Tbl = struct in T.name, run, () in - B.throughputN 3 ~repeat (List.map make modules_int) + B.throughputN 3 ~repeat (List.map make which) - let bench_add_string n = + let bench_add = bench_add_to modules_int + + let bench_add_string_to l n = let keys = CCList.( 1 -- n |> map (fun i->string_of_int i,i)) in let make (module T : STRING_MUT) = let run() = @@ -418,7 +482,9 @@ module Tbl = struct in T.name, run, () in - B.throughputN 3 ~repeat (List.map make modules_string) + B.throughputN 3 ~repeat (List.map make l) + + let bench_add_string = bench_add_string_to modules_string let bench_replace n = let make (module T : INT_MUT) = @@ -477,7 +543,7 @@ module Tbl = struct ; persistent_array ] @ List.map find_of_mut modules_int - let bench_find n = + let bench_find_to which n = let make (module T : INT_FIND) = let m = T.init n (fun i -> i) in let run() = @@ -487,9 +553,11 @@ module Tbl = struct in T.name, run, () in - Benchmark.throughputN 3 ~repeat (List.map make modules_int_find) + Benchmark.throughputN 3 ~repeat (List.map make which) - let bench_find_string n = + let bench_find = bench_find_to modules_int_find + + let bench_find_string_to l n = let keys = CCList.( 1 -- n |> map (fun i->string_of_int i,i)) in let make (module T : STRING_MUT) = let m = T.create n in @@ -501,16 +569,31 @@ module Tbl = struct in T.name, run, () in - Benchmark.throughputN 3 ~repeat (List.map make modules_string) + Benchmark.throughputN 3 ~repeat (List.map make l) - let () = B.Tree.register ( - "tbl" @>>> + let bench_find_string = bench_find_string_to modules_string + + let () = + B.Tree.register ("tbl" @>>> [ "add_int" @>> app_ints bench_add [10; 100; 1_000; 10_000;] ; "add_string" @>> app_ints bench_add_string [10; 100; 1_000; 10_000;] ; "replace" @>> app_ints bench_replace [10; 100; 1_000; 10_000] ; "find" @>> app_ints bench_find [10; 20; 100; 1_000; 10_000] ; "find_string" @>> app_ints bench_find_string [10; 20; 100; 1_000; 10_000] - ]) + ]); + B.Tree.register ("tbl_persistent" @>>> + let l_int = [persistent_hashtbl Int; persistent_hashtbl_ref Int] in + let l_str = [persistent_hashtbl Str; persistent_hashtbl_ref Str] in + [ "add_int" @>> app_ints (bench_add_to l_int) [10; 100; 1_000; 10_000;] + ; "find_int" @>> app_ints + (bench_find_to (List.map find_of_mut l_int)) + [10; 20; 100; 1_000; 10_000] + ; "add_string" @>> app_ints + (bench_add_string_to l_str) [10; 100; 1_000; 10_000;] + ; "find_string" @>> app_ints + (bench_find_string_to l_str) [10; 20; 100; 1_000; 10_000] + ]); + () end module Iter = struct @@ -935,6 +1018,7 @@ module Thread = struct [100; 1_000] ) [ 2, 3, 3 ; 5, 3, 3 + ; 1, 5, 5 ; 2, 10, 10 ; 5, 10, 10 ; 20, 10, 10 @@ -949,4 +1033,5 @@ module Thread = struct end let () = - B.Tree.run_global () + try B.Tree.run_global () + with Arg.Help msg -> print_endline msg diff --git a/containers.odocl b/containers.odocl index 733f63e0..45828a4a 100644 --- a/containers.odocl +++ b/containers.odocl @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a900d68fa0b4b050dbefd78b29de4a01) +# DO NOT EDIT (digest: a679876a4dd37916033589f8650bb4b2) src/core/CCVector src/core/CCPrint src/core/CCError @@ -23,6 +23,7 @@ src/core/CCMap src/core/CCFormat src/core/CCIO src/core/CCInt64 +src/core/CCChar src/core/Containers src/iter/CCKTree src/iter/CCKList @@ -56,6 +57,10 @@ src/string/CCApp_parse src/string/CCParse src/bigarray/CCBigstring src/bigarray/CCArray1 +src/threads/CCFuture +src/threads/CCLock +src/threads/CCSemaphore +src/threads/CCThread src/advanced/Containers_advanced src/advanced/CCLinq src/advanced/CCBatch diff --git a/doc/intro.txt b/doc/intro.txt index 05b4cd88..fc23b41c 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -2,7 +2,7 @@ {2 Change Log} -See {{: https://github.com/c-cube/ocaml-containers/blob/master/CHANGELOG.md } this file} +See {{: https://github.com/c-cube/ocaml-containers/blob/master/CHANGELOG.adoc } this file} {2 License} @@ -25,6 +25,7 @@ by ocamlfind). {!modules: CCArray CCBool +CCChar CCError CCFloat CCFun @@ -72,6 +73,7 @@ CCFQueue CCFlatHashtbl CCHashSet CCHashTrie +CCImmutArray CCIntMap CCMixmap CCMixset @@ -111,7 +113,12 @@ Iterators: {4 String} -{!modules: Levenshtein KMP} +{!modules: +CCApp_parse +CCKMP +CCLevenshtein +CCParse +} {4 Bigarrays} @@ -128,33 +135,11 @@ requires {{:https://github.com/c-cube/sequence} Sequence}. {4 Misc} -This list is not necessarily up-to-date. - -{!modules: -AbsSet -Automaton -Bij -CSM -Hashset -LazyGraph -PHashtbl -PrintBox -RAL -RoseTree -SmallSet -UnionFind -Univ -} +Moved to its own repository. {4 Lwt} -Utils for Lwt (including experimental stuff) - -{!modules: -Lwt_actor -Lwt_klist -Lwt_pipe -} +Moved to its own repository {4 Others} @@ -162,6 +147,7 @@ Lwt_pipe CCFuture CCLock CCSemaphore +CCThread } diff --git a/setup.ml b/setup.ml index 77d4307d..cea46de1 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: c6d7f2a2c3e523530c9ff6c358014560) *) +(* DO NOT EDIT (digest: dd2796010195c6abda33b5bf5ecc73ea) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -6875,7 +6875,7 @@ let setup_t = alpha_features = ["ocamlbuild_more_args"]; beta_features = []; name = "containers"; - version = "0.13"; + version = "0.14"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -7038,6 +7038,7 @@ let setup_t = "CCFormat"; "CCIO"; "CCInt64"; + "CCChar"; "Containers" ]; lib_pack = false; @@ -7728,7 +7729,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = Some "\148\186w\011\191\130\218%\234}-\170\178\161I\r"; + oasis_digest = Some "\016\224&\n\229K}\248\171\001\211\206\025\164lj"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -7736,6 +7737,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 7740 "setup.ml" +# 7741 "setup.ml" (* OASIS_STOP *) let () = setup ();; diff --git a/src/core/CCArray.ml b/src/core/CCArray.ml index 09d3938e..17b351d5 100644 --- a/src/core/CCArray.ml +++ b/src/core/CCArray.ml @@ -641,3 +641,137 @@ module Sub = struct let to_klist a = _to_klist a.arr a.i a.j end + +(** {2 Generic Functions} *) + +module type MONO_ARRAY = sig + type elt + type t + + val length : t -> int + + val get : t -> int -> elt + + val set : t -> int -> elt -> unit +end + +(* Dual Pivot Quicksort (YaroslavSkiy) + from "average case analysis of Java 7's Dual Pivot Quicksort" *) +module SortGeneric(A : MONO_ARRAY) = struct + module Rand = Random.State + + let seed_ = [|123456|] + + type state = { + mutable l: int; (* left pointer *) + mutable g: int; (* right pointer *) + mutable k: int; + } + + let rand_idx_ rand i j = i + Rand.int rand (j-i) + + let swap_ a i j = + if i=j then () + else ( + let tmp = A.get a i in + A.set a i (A.get a j); + A.set a j tmp + ) + + let sort ~cmp a = + let rec insert_ a i k = + if k 0 then ( + swap_ a k (k+1); + insert_ a i (k-1) + ) + in + (* recursive part of insertion sort *) + let rec sort_insertion_rec a i j k = + if k 1 then sort_insertion_rec a i j (i+1) + in + let rand = Rand.make seed_ in + (* sort slice. + There is a chance that the two pivots are equal, but it's unlikely. *) + let rec sort_slice_ ~st a i j = + if j-i>10 then ( + st.l <- i; + st.g <- j-1; + st.k <- i; + (* choose pivots *) + let p = A.get a (rand_idx_ rand i j) in + let q = A.get a (rand_idx_ rand i j) in + (* invariant: st.p <= st.q, swap them otherwise *) + let p, q = if cmp p q > 0 then q, p else p, q in + while st.k <= st.g do + let cur = A.get a st.k in + if cmp cur p < 0 then ( + (* insert in leftmost band *) + if st.k <> st.l then swap_ a st.k st.l; + st.l <- st.l + 1 + ) else if cmp cur q > 0 then ( + (* insert in rightmost band *) + while st.k < st.g && cmp (A.get a st.g) q > 0 do + st.g <- st.g - 1 + done; + swap_ a st.k st.g; + st.g <- st.g - 1; + (* the element swapped from the right might be in the first situation. + that is, < p (we know it's <= q already) *) + if cmp (A.get a st.k) p < 0 then ( + if st.k <> st.l then swap_ a st.k st.l; + st.l <- st.l + 1 + ) + ); + st.k <- st.k + 1 + done; + (* save values before recursing *) + let l = st.l and g = st.g and sort_middle = cmp p q < 0 in + sort_slice_ ~st a i l; + if sort_middle then sort_slice_ ~st a l (g+1); + sort_slice_ ~st a (g+1) j; + ) else sort_insertion a i j + in + if A.length a > 0 then ( + let st = { l=0; g=A.length a; k=0; } in + sort_slice_ ~st a 0 (A.length a) + ) +end + + +let sort_generic (type arr)(type elt) +(module A : MONO_ARRAY with type t = arr and type elt = elt) +?(cmp=Pervasives.compare) a += + let module S = SortGeneric(A) in + S.sort ~cmp a + +(*$inject + module IA = struct + type elt = int + type t = int array + include Array + end + + let gen_arr = Q.Gen.(array_size (1--100) small_int) + let arr_arbitrary = Q.make + ~print:Q.Print.(array int) + ~small:Array.length + ~shrink:Q.Shrink.(array ?shrink:None) + gen_arr +*) + +(*$Q & ~count:300 + arr_arbitrary (fun a -> \ + let a1 = Array.copy a and a2 = Array.copy a in \ + Array.sort CCInt.compare a1; sort_generic ~cmp:CCInt.compare (module IA) a2; \ + a1 = a2 ) +*) + diff --git a/src/core/CCArray.mli b/src/core/CCArray.mli index 2b1256c4..2a84b628 100644 --- a/src/core/CCArray.mli +++ b/src/core/CCArray.mli @@ -232,3 +232,23 @@ module Sub : sig include S with type 'a t := 'a t end +(** {2 Generic Functions} *) + +module type MONO_ARRAY = sig + type elt + type t + + val length : t -> int + + val get : t -> int -> elt + + val set : t -> int -> elt -> unit +end + +val sort_generic : + (module MONO_ARRAY with type t = 'arr and type elt = 'elt) -> + ?cmp:('elt -> 'elt -> int) -> 'arr -> unit +(** Sort the array, without allocating (eats stack space though). Performance + might be lower than {!Array.sort}. + @since 0.14 *) + diff --git a/src/core/CCChar.ml b/src/core/CCChar.ml new file mode 100644 index 00000000..8f4db0d6 --- /dev/null +++ b/src/core/CCChar.ml @@ -0,0 +1,15 @@ +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Utils around char} + + @since 0.14 *) + +type t = char + +let equal (a:char) b = a=b +let compare = Char.compare + +let pp = Buffer.add_char +let print = Format.pp_print_char + + diff --git a/src/core/CCChar.mli b/src/core/CCChar.mli new file mode 100644 index 00000000..3f12e666 --- /dev/null +++ b/src/core/CCChar.mli @@ -0,0 +1,15 @@ + +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Utils around char} + + @since 0.14 *) + +type t = char + +val equal : t -> t -> bool +val compare : t -> t -> int + +val pp : Buffer.t -> t -> unit +val print : Format.formatter -> t -> unit + diff --git a/src/core/CCError.ml b/src/core/CCError.ml index 47498964..20746abd 100644 --- a/src/core/CCError.ml +++ b/src/core/CCError.ml @@ -59,7 +59,7 @@ let register_printer p = _printers := p :: !_printers (* FIXME: just use {!Printexc.register_printer} instead? *) let of_exn e = - let buf = Buffer.create 15 in + let buf = Buffer.create 32 in let rec try_printers l = match l with | [] -> Buffer.add_string buf (Printexc.to_string e) | p :: l' -> @@ -69,6 +69,19 @@ let of_exn e = try_printers !_printers; `Error (Buffer.contents buf) +let of_exn_trace e = + let buf = Buffer.create 128 in + let rec try_printers l = match l with + | [] -> Buffer.add_string buf (Printexc.to_string e) + | p :: l' -> + try p buf e + with _ -> try_printers l' + in + try_printers !_printers; + Buffer.add_char buf '\n'; + Buffer.add_string buf (Printexc.get_backtrace ()); + `Error (Buffer.contents buf) + let map f e = match e with | `Ok x -> `Ok (f x) | `Error s -> `Error s @@ -126,6 +139,10 @@ let guard_str f = try `Ok (f()) with e -> of_exn e +let guard_str_trace f = + try `Ok (f()) + with e -> of_exn_trace e + let wrap1 f x = try return (f x) with e -> `Error e diff --git a/src/core/CCError.mli b/src/core/CCError.mli index 072ecc96..b01af19f 100644 --- a/src/core/CCError.mli +++ b/src/core/CCError.mli @@ -50,6 +50,14 @@ val fail : 'err -> ('a,'err) t val of_exn : exn -> ('a, string) t (** [of_exn e] uses {!Printexc} to print the exception as a string *) +val of_exn_trace : exn -> ('a, string) t +(** [of_exn_trace e] is similar to [of_exn e], but it adds the stacktrace + to the error message. + + Remember to call [Printexc.record_backtrace true] and compile with the + debug flag for this to work. + @since 0.14 *) + val fail_printf : ('a, Buffer.t, unit, ('a,string) t) format4 -> 'a (** [fail_printf format] uses [format] to obtain an error message and then returns [`Error msg] @@ -110,6 +118,11 @@ val guard_str : (unit -> 'a) -> ('a, string) t (** Same as {!guard} but uses {!of_exn} to print the exception. See {!register_printer} *) +val guard_str_trace : (unit -> 'a) -> ('a, string) t +(** Same as {!guard_str} but uses {!of_exn_trace} instead of {!of_exn} so + that the stack trace is printed. + @since 0.14 *) + val wrap1 : ('a -> 'b) -> 'a -> ('b, exn) t (** Same as {!guard} but gives the function one argument. *) @@ -205,3 +218,5 @@ This way a printer that doesn't know how to deal with an exception will let other printers do it. *) val register_printer : exn printer -> unit + +(* TODO: deprecate, should use {!Printexc} *) diff --git a/src/core/CCFormat.ml b/src/core/CCFormat.ml index f20ae774..c294d91a 100644 --- a/src/core/CCFormat.ml +++ b/src/core/CCFormat.ml @@ -37,11 +37,17 @@ let silent _fmt _ = () let unit fmt () = Format.pp_print_string fmt "()" let int fmt i = Format.pp_print_string fmt (string_of_int i) -let string fmt s = Format.pp_print_string fmt s -let bool fmt b = Format.fprintf fmt "%B" b +let string = Format.pp_print_string +let bool = Format.pp_print_bool let float3 fmt f = Format.fprintf fmt "%.3f" f let float fmt f = Format.pp_print_string fmt (string_of_float f) +let char = Format.pp_print_char +let int32 fmt n = Format.fprintf fmt "%ld" n +let int64 fmt n = Format.fprintf fmt "%Ld" n +let nativeint fmt n = Format.fprintf fmt "%nd" n +let string_quoted fmt s = Format.fprintf fmt "\"%s\"" s + let list ?(start="[") ?(stop="]") ?(sep=", ") pp fmt l = let rec pp_list l = match l with | x::((_::_) as l) -> @@ -125,6 +131,16 @@ let sprintf format = fmt format +let fprintf = Format.fprintf + + +let ksprintf ~f fmt = + let buf = Buffer.create 32 in + let out = Format.formatter_of_buffer buf in + Format.kfprintf + (fun _ -> Format.pp_print_flush out (); f (Buffer.contents buf)) + out fmt + let stdout = Format.std_formatter let stderr = Format.err_formatter diff --git a/src/core/CCFormat.mli b/src/core/CCFormat.mli index 6a4c46f6..c27e0eb8 100644 --- a/src/core/CCFormat.mli +++ b/src/core/CCFormat.mli @@ -44,6 +44,15 @@ val bool : bool printer val float3 : float printer (* 3 digits after . *) val float : float printer +val char : char printer (** @since 0.14 *) +val int32 : int32 printer (** @since 0.14 *) +val int64 : int64 printer (** @since 0.14 *) +val nativeint : nativeint printer (** @since 0.14 *) + +val string_quoted : string printer +(** Similar to {!CCString.print}. + @since 0.14 *) + val list : ?start:string -> ?stop:string -> ?sep:string -> 'a printer -> 'a list printer val array : ?start:string -> ?stop:string -> ?sep:string -> 'a printer -> 'a array printer val arrayi : ?start:string -> ?stop:string -> ?sep:string -> @@ -67,7 +76,25 @@ val stdout : t val stderr : t val sprintf : ('a, t, unit, string) format4 -> 'a - (** print into a string *) +(** Print into a string any format string that would usually be compatible + with {!fprintf}. Similar to {!Format.asprintf}. *) + +val fprintf : t -> ('a, t, unit ) format -> 'a +(** Alias to {!Format.fprintf} + @since 0.14 *) + +val ksprintf : + f:(string -> 'b) -> + ('a, Format.formatter, unit, 'b) format4 -> + 'a +(** [ksprintf fmt ~f] formats using [fmt], in a way similar to {!sprintf}, + and then calls [f] on the resulting string. + @since 0.14 *) + +(*$= & ~printer:CCFormat.(to_string (opt string)) + (Some "hello world") \ + (ksprintf "hello %a" CCFormat.string "world" ~f:(fun s -> Some s)) +*) val to_file : string -> ('a, t, unit, unit) format4 -> 'a - (** Print to the given file *) +(** Print to the given file *) diff --git a/src/core/CCHashtbl.ml b/src/core/CCHashtbl.ml index 761a005f..58bf360d 100644 --- a/src/core/CCHashtbl.ml +++ b/src/core/CCHashtbl.ml @@ -71,6 +71,25 @@ let of_list l = List.iter (fun (k,v) -> Hashtbl.add tbl k v) l; tbl +let update tbl ~f ~k = + let v = get tbl k in + match v, f k v with + | None, None -> () + | None, Some v' -> Hashtbl.add tbl k v' + | Some _, Some v' -> Hashtbl.replace tbl k v' + | Some _, None -> Hashtbl.remove tbl k + +(*$R + let tbl = Hashtbl.create 32 in + update tbl ~k:1 ~f:(fun _ _ -> Some "1"); + assert_equal (Some "1") (get tbl 1); + update tbl ~k:2 ~f:(fun _ v->match v with Some _ -> assert false | None -> Some "2"); + assert_equal (Some "2") (get tbl 2); + assert_equal 2 (Hashtbl.length tbl); + update tbl ~k:1 ~f:(fun _ _ -> None); + assert_equal None (get tbl 1); +*) + let print pp_k pp_v fmt m = Format.fprintf fmt "@[tbl {@,"; let first = ref true in @@ -121,10 +140,22 @@ module type S = sig val of_list : (key * 'a) list -> 'a t (** From the given list of bindings, added in order *) + val update : 'a t -> f:(key -> 'a option -> 'a option) -> k:key -> unit + (** [update tbl ~f ~k] updates key [k] by calling [f k (Some v)] if + [k] was mapped to [v], or [f k None] otherwise; if the call + returns [None] then [k] is removed/stays removed, if the call + returns [Some v'] then the binding [k -> v'] is inserted + using {!Hashtbl.replace} + @since 0.14 *) + val print : key printer -> 'a printer -> 'a t printer + (** Printer for tables + @since 0.13 *) end -module Make(X : Hashtbl.HashedType) = struct +module Make(X : Hashtbl.HashedType) + : S with type key = X.t and type 'a t = 'a Hashtbl.Make(X).t += struct include Hashtbl.Make(X) let get tbl x = @@ -143,6 +174,14 @@ module Make(X : Hashtbl.HashedType) = struct (fun x y acc -> f x y :: acc) h [] + let update tbl ~f ~k = + let v = get tbl k in + match v, f k v with + | None, None -> () + | None, Some v' -> add tbl k v' + | Some _, Some v' -> replace tbl k v' + | Some _, None -> remove tbl k + let to_seq tbl k = iter (fun key v -> k (key,v)) tbl let of_seq seq = @@ -161,7 +200,7 @@ module Make(X : Hashtbl.HashedType) = struct tbl let print pp_k pp_v fmt m = - Format.pp_print_string fmt "@[tbl {@,"; + Format.fprintf fmt "@[tbl {@,"; let first = ref true in iter (fun k v -> @@ -171,7 +210,7 @@ module Make(X : Hashtbl.HashedType) = struct pp_v fmt v; Format.pp_print_cut fmt () ) m; - Format.pp_print_string fmt "}@]" + Format.fprintf fmt "}@]" end (** {2 Default Table} *) @@ -249,19 +288,48 @@ module type COUNTER = sig (** Increment the counter for the given element *) val incr_by : t -> int -> elt -> unit - (** Add several occurrences at once *) + (** Add or remove several occurrences at once. [incr_by c x n] + will add [n] occurrences of [x] if [n>0], + and remove [abs n] occurrences if [n<0]. *) val get : t -> elt -> int (** Number of occurrences for this element *) + val decr : t -> elt -> unit + (** Remove one occurrence of the element + @since 0.14 *) + + val length : t -> int + (** Number of distinct elements + @since 0.14 *) + val add_seq : t -> elt sequence -> unit (** Increment each element of the sequence *) val of_seq : elt sequence -> t (** [of_seq s] is the same as [add_seq (create ())] *) + + val to_seq : t -> (elt * int) sequence + (** [to_seq tbl] returns elements of [tbl] along with their multiplicity + @since 0.14 *) + + val add_list : t -> (elt * int) list -> unit + (** Similar to {!add_seq} + @since 0.14 *) + + val of_list : (elt * int) list -> t + (** Similar to {!of_seq} + @since 0.14 *) + + val to_list : t -> (elt * int) list + (** @since 0.14 *) end -module MakeCounter(X : Hashtbl.HashedType) = struct +module MakeCounter(X : Hashtbl.HashedType) + : COUNTER + with type elt = X.t + and type t = int Hashtbl.Make(X).t += struct type elt = X.t module T = Hashtbl.Make(X) @@ -272,6 +340,8 @@ module MakeCounter(X : Hashtbl.HashedType) = struct let get tbl x = try T.find tbl x with Not_found -> 0 + let length = T.length + let incr tbl x = let n = get tbl x in T.replace tbl x (n+1) @@ -282,10 +352,46 @@ module MakeCounter(X : Hashtbl.HashedType) = struct then T.remove tbl x else T.replace tbl x (n+n') + let decr tbl x = incr_by tbl 1 x + let add_seq tbl seq = seq (incr tbl) let of_seq seq = let tbl = create 32 in add_seq tbl seq; tbl + + let to_seq tbl yield = T.iter (fun x i -> yield (x,i)) tbl + + let add_list tbl l = + List.iter (fun (x,i) -> incr_by tbl i x) l + + let of_list l = + let tbl = create 32 in + add_list tbl l; + tbl + + let to_list tbl = + T.fold (fun x i acc -> (x,i) :: acc) tbl [] end + +(*$inject + module C = MakeCounter(CCInt) + + let list_int = Q.(make + ~print:Print.(list (pair int int)) + ~small:List.length + ~shrink:Shrink.(list ?shrink:None) + Gen.(list small_int >|= List.map (fun i->i,1)) + ) + + *) + +(*$Q + list_int (fun l -> \ + l |> C.of_list |> C.to_list |> List.length = \ + (l |> CCList.sort_uniq |> List.length)) + list_int (fun l -> \ + l |> C.of_list |> C.to_seq |> Sequence.fold (fun n(_,i)->i+n) 0 = \ + List.fold_left (fun n (_,_) ->n+1) 0 l) +*) diff --git a/src/core/CCHashtbl.mli b/src/core/CCHashtbl.mli index 32b4c6f4..d4079c5a 100644 --- a/src/core/CCHashtbl.mli +++ b/src/core/CCHashtbl.mli @@ -68,6 +68,14 @@ val to_list : ('a,'b) Hashtbl.t -> ('a * 'b) list val of_list : ('a * 'b) list -> ('a,'b) Hashtbl.t (** From the given list of bindings, added in order *) +val update : ('a, 'b) Hashtbl.t -> f:('a -> 'b option -> 'b option) -> k:'a -> unit +(** [update tbl ~f ~k] updates key [k] by calling [f k (Some v)] if + [k] was mapped to [v], or [f k None] otherwise; if the call + returns [None] then [k] is removed/stays removed, if the call + returns [Some v'] then the binding [k -> v'] is inserted + using {!Hashtbl.replace} + @since 0.14 *) + val print : 'a printer -> 'b printer -> ('a, 'b) Hashtbl.t printer (** Printer for table @since 0.13 *) @@ -109,6 +117,14 @@ module type S = sig val of_list : (key * 'a) list -> 'a t (** From the given list of bindings, added in order *) + val update : 'a t -> f:(key -> 'a option -> 'a option) -> k:key -> unit + (** [update tbl ~f ~k] updates key [k] by calling [f k (Some v)] if + [k] was mapped to [v], or [f k None] otherwise; if the call + returns [None] then [k] is removed/stays removed, if the call + returns [Some v'] then the binding [k -> v'] is inserted + using {!Hashtbl.replace} + @since 0.14 *) + val print : key printer -> 'a printer -> 'a t printer (** Printer for tables @since 0.13 *) @@ -169,16 +185,46 @@ module type COUNTER = sig (** Increment the counter for the given element *) val incr_by : t -> int -> elt -> unit - (** Add several occurrences at once *) + (** Add or remove several occurrences at once. [incr_by c x n] + will add [n] occurrences of [x] if [n>0], + and remove [abs n] occurrences if [n<0]. *) val get : t -> elt -> int (** Number of occurrences for this element *) + val decr : t -> elt -> unit + (** Remove one occurrence of the element + @since 0.14 *) + + val length : t -> int + (** Number of distinct elements + @since 0.14 *) + val add_seq : t -> elt sequence -> unit (** Increment each element of the sequence *) val of_seq : elt sequence -> t (** [of_seq s] is the same as [add_seq (create ())] *) + + val to_seq : t -> (elt * int) sequence + (** [to_seq tbl] returns elements of [tbl] along with their multiplicity + @since 0.14 *) + + val add_list : t -> (elt * int) list -> unit + (** Similar to {!add_seq} + @since 0.14 *) + + val of_list : (elt * int) list -> t + (** Similar to {!of_seq} + @since 0.14 *) + + val to_list : t -> (elt * int) list + (** @since 0.14 *) end -module MakeCounter(X : Hashtbl.HashedType) : COUNTER with type elt = X.t +module MakeCounter(X : Hashtbl.HashedType) + : COUNTER + with type elt = X.t + and type t = int Hashtbl.Make(X).t +(** Create a new counter type + The type [t] is exposed @since 0.14 *) diff --git a/src/core/CCHeap.ml b/src/core/CCHeap.ml index 8aaf953b..97bccb00 100644 --- a/src/core/CCHeap.ml +++ b/src/core/CCHeap.ml @@ -76,10 +76,10 @@ end *) (*$QR & ~count:30 - Q.(list_of_size Gen.(return 10_000) int) (fun l -> + Q.(list_of_size Gen.(return 1_000) int) (fun l -> (* put elements into a heap *) let h = H.of_seq H.empty (Sequence.of_list l) in - OUnit.assert_equal 10_000 (H.size h); + OUnit.assert_equal 1_000 (H.size h); let l' = extract_list h in is_sorted l' ) diff --git a/src/core/CCIO.mli b/src/core/CCIO.mli index e1c0d6e8..44e8e257 100644 --- a/src/core/CCIO.mli +++ b/src/core/CCIO.mli @@ -152,7 +152,8 @@ See {!File.walk} if you also need to list directories: module File : sig type 'a or_error = [`Ok of 'a | `Error of string] type t = string - (** A file is always represented by its absolute path *) + (** A file should be represented by its absolute path, but currently + this is not enforced. *) val to_string : t -> string diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 2c6daaae..f6239ecd 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -152,6 +152,46 @@ let rec fold_while f acc = function fold_while (fun acc b -> if b then acc+1, `Continue else acc, `Stop) 0 [true;true;false;true] = 2 *) +let fold_map f acc l = + let rec aux f acc map_acc l = match l with + | [] -> acc, List.rev map_acc + | x :: l' -> + let acc, y = f acc x in + aux f acc (y :: map_acc) l' + in + aux f acc [] l + +(*$= + (6, ["1"; "2"; "3"]) \ + (fold_map (fun acc x->acc+x, string_of_int x) 0 [1;2;3]) +*) + +(*$Q + Q.(list int) (fun l -> \ + fold_map (fun acc x -> x::acc, x) [] l = (List.rev l, l)) +*) + +let fold_flat_map f acc l = + let rec aux f acc map_acc l = match l with + | [] -> acc, List.rev map_acc + | x :: l' -> + let acc, y = f acc x in + aux f acc (List.rev_append y map_acc) l' + in + aux f acc [] l + +(*$= + (6, ["1"; "a1"; "2"; "a2"; "3"; "a3"]) \ + (let pf = Printf.sprintf in \ + fold_flat_map (fun acc x->acc+x, [pf "%d" x; pf "a%d" x]) 0 [1;2;3]) +*) + +(*$Q + Q.(list int) (fun l -> \ + fold_flat_map (fun acc x -> x::acc, [x;x+10]) [] l = \ + (List.rev l, flat_map (fun x->[x;x+10]) l) ) +*) + let init len f = let rec init_rec acc i f = if i=0 then f i :: acc @@ -775,14 +815,17 @@ module Zipper = struct let empty = [], [] let is_empty = function - | _, [] -> true - | _, _::_ -> false + | [], [] -> true + | _ -> false - let to_list (l,r) = - let rec append l acc = match l with - | [] -> acc - | x::l' -> append l' (x::acc) - in append l r + let to_list (l,r) = List.rev_append l r + + let to_rev_list (l,r) = List.rev_append r l + + (*$Q + Q.(pair (list small_int)(list small_int)) (fun z -> \ + Zipper.to_list z = List.rev (Zipper.to_rev_list z)) + *) let make l = [], l @@ -790,10 +833,18 @@ module Zipper = struct | x::l, r -> l, x::r | [], r -> [], r + let left_exn = function + | x::l, r -> l, x::r + | [], _ -> invalid_arg "zipper.left_exn" + let right = function | l, x::r -> x::l, r | l, [] -> l, [] + let right_exn = function + | l, x::r -> x::l, r + | _, [] -> invalid_arg "zipper.right_exn" + let modify f z = match z with | l, [] -> begin match f None with @@ -806,6 +857,10 @@ module Zipper = struct | Some _ -> l, x::r end + let is_focused = function + | _, [] -> true + | _ -> false + let focused = function | _, x::_ -> Some x | _, [] -> None @@ -813,6 +868,25 @@ module Zipper = struct let focused_exn = function | _, x::_ -> x | _, [] -> raise Not_found + + let insert x (l,r) = l, x::r + + let remove (l,r) = match r with + | [] -> l, [] + | _ :: r' -> l, r' + + (*$Q + Q.(triple int (list small_int)(list small_int)) (fun (x,l,r) -> \ + Zipper.insert x (l,r) |> Zipper.remove = (l,r)) + *) + + let drop_before (_, r) = [], r + + let drop_after (l, r) = match r with + | [] -> l, [] + | x :: _ -> l, [x] + + let drop_after_and_focused (l, _) = l, [] end (** {2 References on Lists} *) diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 02d8c1ce..5158730e 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -66,6 +66,16 @@ val fold_while : ('a -> 'b -> 'a * [`Stop | `Continue]) -> 'a -> 'b t -> 'a indicated by the accumulator @since 0.8 *) +val fold_map : ('acc -> 'a -> 'acc * 'b) -> 'acc -> 'a list -> 'acc * 'b list +(** [fold_map f acc l] is a [fold_left]-like function, but it also maps the + list to another list. + @since 0.14 *) + +val fold_flat_map : ('acc -> 'a -> 'acc * 'b list) -> 'acc -> 'a list -> 'acc * 'b list +(** [fold_map f acc l] is a [fold_left]-like function, but it also maps the + list to a list of list that is then [flatten]'d.. + @since 0.14 *) + val init : int -> (int -> 'a) -> 'a t (** Similar to {!Array.init} @since 0.6 *) @@ -292,15 +302,28 @@ end module Zipper : sig type 'a t = 'a list * 'a list + (** The pair [l, r] represents the list [List.rev_append l r], but + with the focus on [r]. *) val empty : 'a t (** Empty zipper *) val is_empty : _ t -> bool - (** Empty zipper, or at the end of the zipper? *) + (** Empty zipper? Returns true iff the two lists are empty. *) + + (*$T + Zipper.(is_empty empty) + not ([42] |> Zipper.make |> Zipper.right |> Zipper.is_empty) + *) val to_list : 'a t -> 'a list - (** Convert the zipper back to a list *) + (** Convert the zipper back to a list. + [to_list (l,r)] is [List.rev_append l r] *) + + val to_rev_list : 'a t -> 'a list + (** Convert the zipper back to a {i reversed} list. + In other words, [to_list (l,r)] is [List.rev_append r l] + @since 0.14 *) val make : 'a list -> 'a t (** Create a zipper pointing at the first element of the list *) @@ -308,13 +331,37 @@ module Zipper : sig val left : 'a t -> 'a t (** Go to the left, or do nothing if the zipper is already at leftmost pos *) + val left_exn : 'a t -> 'a t + (** Go to the left, or + @raise Invalid_argument if the zipper is already at leftmost pos + @since 0.14 *) + val right : 'a t -> 'a t (** Go to the right, or do nothing if the zipper is already at rightmost pos *) + val right_exn : 'a t -> 'a t + (** Go to the right, or + @raise Invalid_argument if the zipper is already at rightmost position + @since 0.14 *) + val modify : ('a option -> 'a option) -> 'a t -> 'a t (** Modify the current element, if any, by returning a new element, or returning [None] if the element is to be deleted *) + val insert : 'a -> 'a t -> 'a t + (** Insert an element at the current position. If an element was focused, + [insert x l] adds [x] just before it, and focuses on [x] + @since 0.14 *) + + val remove : 'a t -> 'a t + (** [remove l] removes the current element, if any. + @since 0.14 *) + + val is_focused : _ t -> bool + (** Is the zipper focused on some element? That is, will {!focused} + return a [Some v]? + @since 0.14 *) + val focused : 'a t -> 'a option (** Returns the focused element, if any. [focused zip = Some _] iff [empty zip = false] *) @@ -322,6 +369,26 @@ module Zipper : sig val focused_exn : 'a t -> 'a (** Returns the focused element, or @raise Not_found if the zipper is at an end *) + + val drop_before : 'a t -> 'a t + (** Drop every element on the "left" (calling {!left} then will do nothing). + @since 0.14 *) + + val drop_after : 'a t -> 'a t + (** Drop every element on the "right" (calling {!right} then will do nothing), + keeping the focused element, if any. + @since 0.14 *) + + val drop_after_and_focused : 'a t -> 'a t + (** Drop every element on the "right" (calling {!right} then will do nothing), + {i including} the focused element if it is present. + @since 0.14 *) + + (*$= + ([1], [2]) (Zipper.drop_after ([1], [2;3])) + ([1], []) (Zipper.drop_after ([1], [])) + ([1], []) (Zipper.drop_after_and_focused ([1], [2;3])) + *) end (** {2 References on Lists} diff --git a/src/core/CCMap.ml b/src/core/CCMap.ml index 0ae3cb3f..ec9c6d0e 100644 --- a/src/core/CCMap.ml +++ b/src/core/CCMap.ml @@ -44,10 +44,16 @@ module type S = sig val of_seq : (key * 'a) sequence -> 'a t + val add_seq : 'a t -> (key * 'a) sequence -> 'a t + (** @since 0.14 *) + val to_seq : 'a t -> (key * 'a) sequence val of_list : (key * 'a) list -> 'a t + val add_list : 'a t -> (key * 'a) list -> 'a t + (** @since 0.14 *) + val to_list : 'a t -> (key * 'a) list val pp : ?start:string -> ?stop:string -> ?arrow:string -> ?sep:string -> @@ -73,17 +79,19 @@ module Make(O : Map.OrderedType) = struct | None -> remove k m | Some v' -> add k v' m - let of_seq s = - let m = ref empty in + let add_seq m s = + let m = ref m in s (fun (k,v) -> m := add k v !m); !m + let of_seq s = add_seq empty s + let to_seq m yield = iter (fun k v -> yield (k,v)) m - let of_list l = - List.fold_left - (fun m (k,v) -> add k v m) empty l + let add_list m l = List.fold_left (fun m (k,v) -> add k v m) m l + + let of_list l = add_list empty l let to_list m = fold (fun k v acc -> (k,v)::acc) m [] @@ -105,11 +113,13 @@ module Make(O : Map.OrderedType) = struct let first = ref true in iter (fun k v -> - if !first then first := false else Format.pp_print_string fmt sep; + if !first then first := false else ( + Format.pp_print_string fmt sep; + Format.pp_print_cut fmt () + ); pp_k fmt k; Format.pp_print_string fmt arrow; pp_v fmt v; - Format.pp_print_cut fmt () ) m; Format.pp_print_string fmt stop end diff --git a/src/core/CCMap.mli b/src/core/CCMap.mli index 2ff1d310..b386ad63 100644 --- a/src/core/CCMap.mli +++ b/src/core/CCMap.mli @@ -47,10 +47,16 @@ module type S = sig val of_seq : (key * 'a) sequence -> 'a t + val add_seq : 'a t -> (key * 'a) sequence -> 'a t + (** @since 0.14 *) + val to_seq : 'a t -> (key * 'a) sequence val of_list : (key * 'a) list -> 'a t + val add_list : 'a t -> (key * 'a) list -> 'a t + (** @since 0.14 *) + val to_list : 'a t -> (key * 'a) list val pp : ?start:string -> ?stop:string -> ?arrow:string -> ?sep:string -> diff --git a/src/core/CCPrint.ml b/src/core/CCPrint.ml index c73dc2ce..9afcf7f9 100644 --- a/src/core/CCPrint.ml +++ b/src/core/CCPrint.ml @@ -46,6 +46,7 @@ let string buf s = Buffer.add_string buf s let bool buf b = Printf.bprintf buf "%B" b let float3 buf f = Printf.bprintf buf "%.3f" f let float buf f = Buffer.add_string buf (string_of_float f) +let char buf c = Buffer.add_char buf c let list ?(start="[") ?(stop="]") ?(sep=", ") pp buf l = let rec pp_list l = match l with @@ -148,6 +149,7 @@ let to_file filename format = module type MONAD_IO = sig type 'a t (** the IO monad *) + type output (** Output channels *) val (>>=) : 'a t -> ('a -> 'b t) -> 'b t diff --git a/src/core/CCPrint.mli b/src/core/CCPrint.mli index a54f3cb8..64eb5d24 100644 --- a/src/core/CCPrint.mli +++ b/src/core/CCPrint.mli @@ -69,6 +69,8 @@ val string : string t val bool : bool t val float3 : float t (* 3 digits after . *) val float : float t +val char : char t +(** @since 0.14 *) val list : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a list t val array : ?start:string -> ?stop:string -> ?sep:string -> 'a t -> 'a array t diff --git a/src/core/CCSet.ml b/src/core/CCSet.ml index 5abed74a..2ebe62ab 100644 --- a/src/core/CCSet.ml +++ b/src/core/CCSet.ml @@ -35,10 +35,16 @@ module type S = sig val of_seq : elt sequence -> t + val add_seq : t -> elt sequence -> t + (** @since 0.14 *) + val to_seq : t -> elt sequence val of_list : elt list -> t + val add_list : t -> elt list -> t + (** @since 0.14 *) + val to_list : t -> elt list val pp : ?start:string -> ?stop:string -> ?sep:string -> @@ -51,14 +57,18 @@ end module Make(O : Map.OrderedType) = struct include Set.Make(O) - let of_seq s = - let set = ref empty in - s (fun x -> set := add x !set); + let add_seq set seq = + let set = ref set in + seq (fun x -> set := add x !set); !set + let of_seq s = add_seq empty s + let to_seq s yield = iter yield s - let of_list l = List.fold_left (fun set x -> add x set) empty l + let add_list = List.fold_left (fun set x -> add x set) + + let of_list l = add_list empty l let to_list = elements @@ -77,9 +87,11 @@ module Make(O : Map.OrderedType) = struct let first = ref true in iter (fun x -> - if !first then first := false else Format.pp_print_string fmt sep; + if !first then first := false else ( + Format.pp_print_string fmt sep; + Format.pp_print_cut fmt () + ); pp_x fmt x; - Format.pp_print_cut fmt () ) m; Format.pp_print_string fmt stop end diff --git a/src/core/CCSet.mli b/src/core/CCSet.mli index a9b1912a..5ec3fe62 100644 --- a/src/core/CCSet.mli +++ b/src/core/CCSet.mli @@ -37,10 +37,16 @@ module type S = sig val of_seq : elt sequence -> t + val add_seq : t -> elt sequence -> t + (** @since 0.14 *) + val to_seq : t -> elt sequence val of_list : elt list -> t + val add_list : t -> elt list -> t + (** @since 0.14 *) + val to_list : t -> elt list val pp : ?start:string -> ?stop:string -> ?sep:string -> diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 7fd6e70e..bf48b58a 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -81,7 +81,7 @@ let _is_sub ~sub i s j ~len = let rec check k = if k = len then true - else sub.[i + k] = s.[j+k] && check (k+1) + else sub.[i+k] = s.[j+k] && check (k+1) in j+len <= String.length s && check 0 @@ -94,7 +94,7 @@ let find ?(start=0) ~sub s = let n = String.length sub in let i = ref start in try - while !i + n < String.length s do + while !i + n <= String.length s do if _is_sub ~sub 0 s !i ~len:n then raise Exit; incr i done; @@ -116,6 +116,41 @@ let rfind ~sub s = with Exit -> !i +(* replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *) +let replace_at_ ~pos ~len ~by s = + let b = Buffer.create (length s + length by - len) in + Buffer.add_substring b s 0 pos; + Buffer.add_string b by; + Buffer.add_substring b s (pos+len) (String.length s - pos - len); + Buffer.contents b + +let replace ?(which=`All) ~sub ~by s = + if sub="" then invalid_arg "CCstring.replace"; + match which with + | `Left -> + let i = find ~sub s in + if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s + | `Right -> + let i = rfind ~sub s in + if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s + | `All -> + let b = Buffer.create (String.length s) in + let start = ref 0 in + while !start < String.length s do + let i = find ~start:!start ~sub s in + if i>=0 then ( + (* between last and cur occurrences *) + Buffer.add_substring b s !start (i- !start); + Buffer.add_string b by; + start := i + String.length sub + ) else ( + (* add remainder *) + Buffer.add_substring b s !start (String.length s - !start); + start := String.length s (* stop *) + ) + done; + Buffer.contents b + module Split = struct type split_state = | SplitStop diff --git a/src/core/CCString.mli b/src/core/CCString.mli index e6b86ff1..5173e0be 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -66,6 +66,7 @@ module type S = sig val pp : Buffer.t -> t -> unit val print : Format.formatter -> t -> unit + (** Print the string within quotes *) end (** {2 Strings} *) @@ -102,10 +103,11 @@ val find : ?start:int -> sub:string -> string -> int (** Find [sub] in string, returns its first index or [-1]. Should only be used with very small [sub] *) -(*$T - find ~sub:"bc" "abcd" = 1 - find ~sub:"bc" "abd" = ~-1 - find ~sub:"a" "_a_a_a_" = 1 +(*$= & ~printer:string_of_int + (find ~sub:"bc" "abcd") 1 + (find ~sub:"bc" "abd") ~-1 + (find ~sub:"a" "_a_a_a_") 1 + (find ~sub:"a" ~start:5 "a1a234a") 6 *) val mem : ?start:int -> sub:string -> string -> bool @@ -122,16 +124,39 @@ val rfind : sub:string -> string -> int Should only be used with very small [sub] @since 0.12 *) -(*$T - rfind ~sub:"bc" "abcd" = 1 - rfind ~sub:"bc" "abd" = ~-1 - rfind ~sub:"a" "_a_a_a_" = 5 - rfind ~sub:"bc" "abcdbcd" = 4 +(*$= & ~printer:string_of_int + (rfind ~sub:"bc" "abcd") 1 + (rfind ~sub:"bc" "abd") ~-1 + (rfind ~sub:"a" "_a_a_a_") 5 + (rfind ~sub:"bc" "abcdbcd") 4 + (rfind ~sub:"a" "a1a234a") 6 +*) + +val replace : ?which:[`Left|`Right|`All] -> sub:string -> by:string -> string -> string +(** [replace ~sub ~by s] replaces some occurrences of [sub] by [by] in [s] + @param which decides whether the occurrences to replace are: + {ul + {- [`Left] first occurrence from the left (beginning)} + {- [`Right] first occurrence from the right (end)} + {- [`All] all occurrences (default)} + } + @raise Invalid_argument if [sub = ""] + @since 0.14 *) + +(*$= & ~printer:CCFun.id + (replace ~which:`All ~sub:"a" ~by:"b" "abcdabcd") "bbcdbbcd" + (replace ~which:`Left ~sub:"a" ~by:"b" "abcdabcd") "bbcdabcd" + (replace ~which:`Right ~sub:"a" ~by:"b" "abcdabcd") "abcdbbcd" + (replace ~which:`All ~sub:"ab" ~by:"hello" " abab cdabb a") \ + " hellohello cdhellob a" + (replace ~which:`Left ~sub:"ab" ~by:"nope" " a b c d ") " a b c d " + (replace ~sub:"a" ~by:"b" "1aa234a") "1bb234b" *) val is_sub : sub:string -> int -> string -> int -> len:int -> bool (** [is_sub ~sub i s j ~len] returns [true] iff the substring of - [sub] starting at position [i] and of length [len] *) + [sub] starting at position [i] and of length [len] is a substring + of [s] starting at position [j] *) val repeat : string -> int -> string (** The same string, repeated n times *) @@ -177,6 +202,7 @@ val unlines_gen : string gen -> string (*$Q Q.printable_string (fun s -> unlines (lines s) = s) + Q.printable_string (fun s -> unlines_gen (lines_gen s) = s) *) val set : string -> int -> char -> string @@ -355,4 +381,9 @@ module Sub : sig Sub.make "abcde" 1 3 |> Sub.copy = "bcd" Sub.full "abcde" |> Sub.copy = "abcde" *) + + (*$T + let sub = Sub.make " abc " 1 ~len:3 in \ + "\"abc\"" = (CCFormat.to_string Sub.print sub) + *) end diff --git a/src/core/CCVector.ml b/src/core/CCVector.ml index 60f68286..088b40e6 100644 --- a/src/core/CCVector.ml +++ b/src/core/CCVector.ml @@ -68,6 +68,16 @@ let create_with ?(capacity=128) x = { (create_with ~capacity:200 1 |> capacity) >= 200 *) +let return x = { + size=1; + vec= [| x |]; +} + +(*$T + return 42 |> to_list = [42] + return 42 |> length = 1 +*) + let make n x = { size=n; vec=Array.make n x; @@ -107,13 +117,12 @@ let _grow v x = _resize v size ) -(* resize so that capacity is at least size. Use a doubling-size - strategy so that calling many times [ensure] will +(* v is not empty; ensure it has at least [size] slots. + + Use a doubling-size strategy so that calling many times [ensure] will behave well *) -let ensure v size = - if Array.length v.vec = 0 - then () - else if size > Sys.max_array_length +let ensure_not_empty_ v size = + if size > Sys.max_array_length then failwith "vec.ensure: size too big" else ( let n = ref (max 16 (Array.length v.vec)) in @@ -121,6 +130,16 @@ let ensure v size = _resize v !n ) +let ensure_with ~init v size = + if Array.length v.vec = 0 + then v.vec <- Array.make size init + else ensure_not_empty_ v size + +let ensure v size = + if Array.length v.vec = 0 + then () + else ensure_not_empty_ v size + let clear v = v.size <- 0 @@ -134,14 +153,19 @@ let clear v = let is_empty v = v.size = 0 -let push_unsafe v x = +let push_unsafe_ v x = Array.unsafe_set v.vec v.size x; v.size <- v.size + 1 let push v x = if v.size = Array.length v.vec then _grow v x; - push_unsafe v x + push_unsafe_ v x + +(*$T + let v = create () in push v 1; to_list v = [1] + let v = of_list [1;2;3] in push v 4; to_list v = [1;2;3;4] +*) (** add all elements of b to a *) let append a b = @@ -203,6 +227,25 @@ let append_array a b = append_array v1 v2; to_list v1 = CCList.(0--9) *) +let append_list a b = match b with + | [] -> () + | x :: _ -> + (* need to push at least one elem *) + let len_a = a.size in + let len_b = List.length b in + ensure_with ~init:x a (len_a + len_b); + List.iter (push_unsafe_ a) b; + () + +(*$Q + Q.(pair (list int)(list int)) (fun (l1,l2) -> \ + let v = of_list l1 in append_list v l2; \ + to_list v = (l1 @ l2)) + Q.(pair (list int)(list int)) (fun (l1,l2) -> \ + let v = of_list l1 in append_list v l2; \ + length v = List.length l1 + List.length l2) +*) + (*$inject let gen x = let small = length in @@ -410,7 +453,7 @@ let filter p v = else ( let v' = create_with ~capacity:v.size v.vec.(0) in Array.iter - (fun x -> if p x then push_unsafe v' x) + (fun x -> if p x then push_unsafe_ v' x) v.vec; v' ) @@ -454,7 +497,9 @@ let find_exn p v = let n = v.size in let rec check i = if i = n then raise Not_found - else if p v.vec.(i) then v.vec.(i) + else + let x = v.vec.(i) in + if p x then x else check (i+1) in check 0 @@ -462,6 +507,23 @@ let find p v = try Some (find_exn p v) with Not_found -> None +let find_map f v = + let n = v.size in + let rec search i = + if i=n then None + else match f v.vec.(i) with + | None -> search (i+1) + | Some _ as res -> res + in + search 0 + +(*$Q + Q.(list small_int) (fun l -> \ + let v = of_list l in \ + let f x = x>30 && x < 35 in \ + find_map (fun x -> if f x then Some x else None) v = find f v) +*) + let filter_map f v = let v' = create () in iter @@ -476,20 +538,31 @@ let flat_map f v = iter (fun x -> iter (push v') (f x)) v; v' -let flat_map' f v = +let flat_map_seq f v = let v' = create () in iter (fun x -> let seq = f x in - seq (fun y -> push v' y) + append_seq v' seq; ) v; v' +let flat_map_list f v = + let v' = create () in + iter + (fun x -> + let l = f x in + append_list v' l; + ) v; + v' + +let flat_map' = flat_map_seq + let (>>=) x f = flat_map f x let (>|=) x f = map f x -let rev' v = +let rev_in_place v = if v.size > 0 then ( let n = v.size in @@ -502,9 +575,11 @@ let rev' v = done ) +let rev' = rev_in_place + let rev v = let v' = copy v in - rev' v'; + rev_in_place v'; v' (*$T @@ -513,6 +588,21 @@ let rev v = rev (create ()) |> to_list = [] *) +let rev_iter f v = + for i = v.size-1 downto 0 do + f v.vec.(i) + done + +(*$T + let v = of_list [1;2;3] in (fun f->rev_iter f v) |> Sequence.to_list = [3;2;1] +*) + +(*$Q + Q.(list int) (fun l -> \ + let v = of_list l in \ + (fun f->rev_iter f v) |> Sequence.to_list = List.rev l) +*) + let size v = v.size let length v = v.size @@ -531,6 +621,16 @@ let of_seq ?(init=create ()) seq = let to_seq v k = iter k v +let to_seq_rev v k = + for i = v.size - 1 downto 0 do + k (Array.unsafe_get v.vec i) + done + +(*$Q + Q.(list int) (fun l -> \ + let v= of_list l in v |> to_seq_rev |> Sequence.to_rev_list = l) +*) + let slice_seq v start len = assert (start >= 0 && len >= 0); fun k -> @@ -569,7 +669,7 @@ let of_list l = match l with | [] -> create() | x::_ -> let v = create_with ~capacity:(List.length l + 5) x in - List.iter (push_unsafe v) l; + List.iter (push_unsafe_ v) l; v (*$T diff --git a/src/core/CCVector.mli b/src/core/CCVector.mli index 79cc9798..12268b75 100644 --- a/src/core/CCVector.mli +++ b/src/core/CCVector.mli @@ -59,6 +59,10 @@ val create_with : ?capacity:int -> 'a -> ('a, rw) t @param capacity the size of the underlying array {b caution}: the value will likely not be GC'd before the vector is. *) +val return : 'a -> ('a, 'mut) t +(** Singleton vector + @since 0.14 *) + val make : int -> 'a -> ('a, 'mut) t (** [make n x] makes a vector of size [n], filled with [x] *) @@ -68,9 +72,16 @@ val init : int -> (int -> 'a) -> ('a, 'mut) t val clear : ('a, rw) t -> unit (** clear the content of the vector *) +val ensure_with : init:'a -> ('a, rw) t -> int -> unit +(** Hint to the vector that it should have at least the given capacity. + @param init if [capacity v = 0], used as a filler + element for the underlying array (see {!create_with}) + @since 0.14 *) + val ensure : ('a, rw) t -> int -> unit (** Hint to the vector that it should have at least the given capacity. - Just a hint, will not be enforced if the vector is empty. *) + Just a hint, will not be enforced if the vector is empty and [init] + is not provided. *) val is_empty : ('a, _) t -> bool (** is the vector empty? *) @@ -87,6 +98,10 @@ val append_array : ('a, rw) t -> 'a array -> unit val append_seq : ('a, rw) t -> 'a sequence -> unit (** Append content of sequence *) +val append_list : ('a, rw) t -> 'a list -> unit +(** Append content of list + @since 0.14 *) + val equal : 'a equal -> ('a,_) t equal val compare : 'a ord -> ('a,_) t ord @@ -164,14 +179,30 @@ val find_exn : ('a -> bool) -> ('a,_) t -> 'a (** find an element that satisfies the predicate, or @raise Not_found if no element does *) +val find_map : ('a -> 'b option) -> ('a,_) t -> 'b option +(** [find_map f v] returns the first [Some y = f x] for [x] in [v], + or [None] if [f x = None] for each [x] in [v] + @since 0.14 *) + val filter_map : ('a -> 'b option) -> ('a,_) t -> ('b, 'mut) t (** Map elements with a function, possibly filtering some of them out *) val flat_map : ('a -> ('b,_) t) -> ('a,_) t -> ('b, 'mut) t (** Map each element to a sub-vector *) +val flat_map_seq : ('a -> 'b sequence) -> ('a,_) t -> ('b, 'mut) t +(** Like {!flat_map}, but using {!sequence} for + intermediate collections. + @since 0.14 *) + +val flat_map_list : ('a -> 'b list) -> ('a,_) t -> ('b, 'mut) t +(** Like {!flat_map}, but using {!list} for + intermediate collections. + @since 0.14 *) + val flat_map' : ('a -> 'b sequence) -> ('a,_) t -> ('b, 'mut) t -(** Like {!flat_map}, but using {!sequence} for intermediate collections *) +(** Alias to {!flat_map_seq} + @deprecated since 0.14 , use {!flat_map_seq} *) val (>>=) : ('a,_) t -> ('a -> ('b,_) t) -> ('b, 'mut) t (** Infix version of {!flat_map} *) @@ -194,8 +225,16 @@ val remove : ('a, rw) t -> int -> unit val rev : ('a,_) t -> ('a, 'mut) t (** Reverse the vector *) +val rev_in_place : ('a, rw) t -> unit +(** Reverse the vector in place + @since 0.14 *) + val rev' : ('a, rw) t -> unit -(** Reverse the vector in place *) +(** @deprecated since 0.14 old name for {!rev_in_place} *) + +val rev_iter : ('a -> unit) -> ('a,_) t -> unit +(** [rev_iter f a] is the same as [iter f (rev a)], only more efficient. + @since 0.14 *) val size : ('a,_) t -> int (** number of elements in vector *) @@ -225,6 +264,11 @@ val of_seq : ?init:('a,rw) t -> 'a sequence -> ('a, rw) t val to_seq : ('a,_) t -> 'a sequence +val to_seq_rev : ('a, _) t -> 'a sequence +(** [to_seq_rev v] returns the sequence of elements of [v] in reverse order, + that is, the last elements of [v] are iterated on first. + @since 0.14 *) + val slice : ('a,rw) t -> ('a array * int * int) (** Vector as an array slice. By doing it we expose the internal array, so be careful! *) diff --git a/src/core/META b/src/core/META index 13797b92..1083a731 100644 --- a/src/core/META +++ b/src/core/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: e9cfa451e1c6a3adde9cecf89bbcbff5) -version = "0.13" +# DO NOT EDIT (digest: ca67b641b68531561920de2255f04ea0) +version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers.cma" @@ -9,7 +9,7 @@ archive(native) = "containers.cmxa" archive(native, plugin) = "containers.cmxs" exists_if = "containers.cma" package "unix" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes unix" archive(byte) = "containers_unix.cma" @@ -20,7 +20,7 @@ package "unix" ( ) package "top" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "compiler-libs.common containers containers.data containers.bigarray containers.string containers.unix containers.sexp containers.iter" @@ -32,7 +32,7 @@ package "top" ( ) package "thread" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "containers threads" archive(byte) = "containers_thread.cma" @@ -43,7 +43,7 @@ package "thread" ( ) package "string" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_string.cma" @@ -54,7 +54,7 @@ package "string" ( ) package "sexp" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_sexp.cma" @@ -65,7 +65,7 @@ package "sexp" ( ) package "iter" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." archive(byte) = "containers_iter.cma" archive(byte, plugin) = "containers_iter.cma" @@ -75,7 +75,7 @@ package "iter" ( ) package "io" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_io.cma" @@ -86,7 +86,7 @@ package "io" ( ) package "data" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_data.cma" @@ -97,7 +97,7 @@ package "data" ( ) package "bigarray" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "containers bigarray bytes" archive(byte) = "containers_bigarray.cma" @@ -108,7 +108,7 @@ package "bigarray" ( ) package "advanced" ( - version = "0.13" + version = "0.14" description = "A modular standard library focused on data structures." requires = "containers sequence" archive(byte) = "containers_advanced.cma" diff --git a/src/core/containers.ml b/src/core/containers.ml index 4ee3802b..b31539cc 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -61,17 +61,21 @@ end module Fun = CCFun module Hash = CCHash module Int = CCInt -(* FIXME + +(** @since 0.14 *) module Hashtbl = struct include (Hashtbl : module type of Hashtbl with type statistics = Hashtbl.statistics - and module Make := Hashtbl.Make - and module type S := Hashtbl.S + and module Make = Hashtbl.Make and type ('a,'b) t := ('a,'b) Hashtbl.t ) - include CCHashtbl + (* still unable to include CCHashtbl itself, for the polymorphic functions *) + module type S' = CCHashtbl.S + module Make' = CCHashtbl.Make + module Counter = CCHashtbl.MakeCounter + module MakeDefault = CCHashtbl.MakeDefault end -*) + module List = struct include List include CCList diff --git a/src/core/containers.mldylib b/src/core/containers.mldylib index cf3c2569..712a5e15 100644 --- a/src/core/containers.mldylib +++ b/src/core/containers.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a6f789ec344733a3ef2952d3113379dc) +# DO NOT EDIT (digest: be2123bb1eb73a2b66dfe501caffd4a2) CCVector CCPrint CCError @@ -23,5 +23,6 @@ CCMap CCFormat CCIO CCInt64 +CCChar Containers # OASIS_STOP diff --git a/src/core/containers.mllib b/src/core/containers.mllib index cf3c2569..712a5e15 100644 --- a/src/core/containers.mllib +++ b/src/core/containers.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a6f789ec344733a3ef2952d3113379dc) +# DO NOT EDIT (digest: be2123bb1eb73a2b66dfe501caffd4a2) CCVector CCPrint CCError @@ -23,5 +23,6 @@ CCMap CCFormat CCIO CCInt64 +CCChar Containers # OASIS_STOP diff --git a/src/data/CCGraph.ml b/src/data/CCGraph.ml index 72781d08..7b323a2e 100644 --- a/src/data/CCGraph.ml +++ b/src/data/CCGraph.ml @@ -339,6 +339,12 @@ let topo_sort ?eq ?rev ?(tbl=mk_table 128) ~graph seq = let idx_j = CCList.find_idx ((=)j) l |> CCOpt.get_exn |> fst in \ idx_i < idx_j) \ [ 42, 21; 14, 2; 3, 1; 21, 7; 42, 3] + let l = topo_sort ~rev:true ~graph:divisors_graph (Seq.return 42) in \ + List.for_all (fun (i,j) -> \ + let idx_i = CCList.find_idx ((=)i) l |> CCOpt.get_exn |> fst in \ + let idx_j = CCList.find_idx ((=)j) l |> CCOpt.get_exn |> fst in \ + idx_i > idx_j) \ + [ 42, 21; 14, 2; 3, 1; 21, 7; 42, 3] *) (** {2 Lazy Spanning Tree} *) diff --git a/src/data/CCGraph.mli b/src/data/CCGraph.mli index e7f75193..340b312c 100644 --- a/src/data/CCGraph.mli +++ b/src/data/CCGraph.mli @@ -235,7 +235,7 @@ val topo_sort_tag : ?eq:('v -> 'v -> bool) -> graph:('v, 'e) t -> 'v sequence -> 'v list -(** Same as {!topo_sort} *) +(** Same as {!topo_sort} but uses an explicit tag set *) (** {2 Lazy Spanning Tree} *) diff --git a/src/data/CCHashconsedSet.ml b/src/data/CCHashconsedSet.ml index a6533adc..5775316e 100644 --- a/src/data/CCHashconsedSet.ml +++ b/src/data/CCHashconsedSet.ml @@ -263,7 +263,7 @@ module Make(E : ELT) : S with type elt = E.t = struct let add x t = add_rec_ (E.hash x) x t (*$Q & ~count:20 - Q.(list int) (fun l -> \ + Q.(list_of_size Gen.(0 -- 300) int) (fun l -> \ let module S = Make(CCInt) in \ let m = S.of_list l in \ List.for_all (fun x -> S.mem x m) l) @@ -396,7 +396,7 @@ module Make(E : ELT) : S with type elt = E.t = struct else empty (*$Q - Q.(list int) (fun l -> \ + Q.(list_of_size Gen.(0 -- 300) int) (fun l -> \ let module S = Make(CCInt) in \ let s = S.of_list l in S.equal s (S.inter s s)) *) diff --git a/src/data/CCPersistentHashtbl.ml b/src/data/CCPersistentHashtbl.ml index 338d9826..798b04f1 100644 --- a/src/data/CCPersistentHashtbl.ml +++ b/src/data/CCPersistentHashtbl.ml @@ -67,6 +67,12 @@ module type S = sig val length : _ t -> int (** Number of bindings *) + val add : 'a t -> key -> 'a -> 'a t + (** Add the binding to the table, returning a new table. The old binding + for this key, if it exists, is shadowed and will be restored upon + [remove tbl k]. + @since 0.14 *) + val replace : 'a t -> key -> 'a -> 'a t (** Add the binding to the table, returning a new table. This erases the current binding for [key], if any. *) @@ -129,6 +135,10 @@ module type S = sig val pp : key printer -> 'a printer -> 'a t printer val print : key formatter -> 'a formatter -> 'a t formatter + + val stats : _ t -> Hashtbl.statistics + (** Statistics on the internal table. + @since 0.14 *) end (*$inject @@ -155,58 +165,85 @@ end (** {2 Implementation} *) module Make(H : HashedType) : S with type key = H.t = struct - module Table = Hashtbl.Make(H) - (** Imperative hashtable *) - type key = H.t - type 'a t = 'a zipper ref - and 'a zipper = - | Table of 'a Table.t (** Concrete table *) - | Add of key * 'a * 'a t (** Add key *) - | Replace of key * 'a * 'a t (** Replace key by value *) - | Remove of key * 'a t (** As the table, but without given key *) + + (* main hashtable *) + type 'a t = { + mutable arr: 'a p_array; (* invariant: length is a power of 2 *) + length: int; + } + + (* piece of a persistent array *) + and 'a p_array = + | Arr of 'a bucket array + | Set of int * 'a bucket * 'a t + + (* bucket of the hashtbl *) + and 'a bucket = + | Nil + | Cons of key * 'a * 'a bucket + + (* first power of two that is bigger than [than], starting from [n] *) + let rec power_two_larger ~than n = + if n>= than then n else power_two_larger ~than (2*n) let create i = - ref (Table (Table.create i)) + let i = power_two_larger ~than:i 16 in + { length=0; + arr=Arr (Array.make i Nil) + } - let empty () = create 11 + let empty () = create 16 - (* pass continuation to get a tailrec rerooting *) - let rec _reroot t k = match !t with - | Table tbl -> k tbl (* done *) - | Add (key, v, t') -> - _reroot t' - (fun tbl -> - t' := Remove (key, t); - Table.add tbl key v; - t := Table tbl; - k tbl) - | Replace (key, v, t') -> - _reroot t' - (fun tbl -> - let v' = Table.find tbl key in - t' := Replace (key, v', t); - t := Table tbl; - Table.replace tbl key v; - k tbl) - | Remove (key, t') -> - _reroot t' - (fun tbl -> - let v = Table.find tbl key in - t' := Add (key, v, t); - t := Table tbl; - Table.remove tbl key; - k tbl) + let rec reroot_rec_ t k = match t.arr with + | Arr a -> k a + | Set (i, v, t') -> + reroot_rec_ t' (fun a -> + let v' = a.(i) in + a.(i) <- v; + t.arr <- Arr a; + t'.arr <- Set (i, v', t); + k a + ) - (* Reroot: modify the zipper so that the current node is a proper - hashtable, and return the hashtable *) - let reroot t = match !t with - | Table tbl -> tbl - | _ -> _reroot t (fun x -> x) + (* obtain the array *) + let reroot_ t = match t.arr with + | Arr a -> a + | _ -> reroot_rec_ t (fun x -> x) - let is_empty t = Table.length (reroot t) = 0 + let is_empty t = t.length = 0 - let find t k = Table.find (reroot t) k + let length t = t.length + + (* find index of [h] in [a] *) + let find_idx_ a ~h = + (* bitmask 00001111 if length(a) = 10000 *) + h land (Array.length a - 1) + + let rec find_rec_ k l = match l with + | Nil -> raise Not_found + | Cons (k', v', l') -> + if H.equal k k' then v' else find_rec_ k l' + + let find t k = + let a = reroot_ t in + (* unroll like crazy *) + match a.(find_idx_ ~h:(H.hash k) a) with + | Nil -> raise Not_found + | Cons (k1, v1, l1) -> + if H.equal k k1 then v1 + else match l1 with + | Nil -> raise Not_found + | Cons (k2,v2,l2) -> + if H.equal k k2 then v2 + else match l2 with + | Nil -> raise Not_found + | Cons (k3,v3,l3) -> + if H.equal k k3 then v3 + else match l3 with + | Nil -> raise Not_found + | Cons (k4,v4,l4) -> + if H.equal k k4 then v4 else find_rec_ k l4 (*$R let h = H.of_seq my_seq in @@ -249,9 +286,9 @@ module Make(H : HashedType) : S with type key = H.t = struct try Some (find t k) with Not_found -> None - let mem t k = Table.mem (reroot t) k - - let length t = Table.length (reroot t) + let mem t k = + try ignore (find t k); true + with Not_found -> false (*$R let h = H.of_seq @@ -267,33 +304,137 @@ module Make(H : HashedType) : S with type key = H.t = struct ) *) + let rec buck_rev_iter_ ~f l = match l with + | Nil -> () + | Cons (k,v,l') -> buck_rev_iter_ ~f l'; f k v + + (* resize [a] so it has capacity [new_size], and insert [k,v] in it *) + let resize_ k v h a new_size = + assert (new_size > Array.length a); + let a' = Array.make new_size Nil in + (* preserve order of elements by iterating on each bucket in rev order *) + Array.iter + (buck_rev_iter_ + ~f:(fun k v -> + let i = find_idx_ ~h:(H.hash k) a' in + a'.(i) <- Cons (k,v,a'.(i)) + ) + ) + a; + let i = find_idx_ ~h a' in + a'.(i) <- Cons (k,v,a'.(i)); + a' + + (* insert [k,v] in [l] and returns new list and boolean flag indicating + whether it's a new element *) + let rec replace_rec_ k v l = match l with + | Nil -> Cons (k,v,Nil), true + | Cons (k',v',l') -> + if H.equal k k' + then Cons (k,v,l'), false + else + let l', is_new = replace_rec_ k v l' in + Cons (k',v',l'), is_new + let replace t k v = - let tbl = reroot t in - (* create the new hashtable *) - let t' = ref (Table tbl) in - (* update [t] to point to the new hashtable *) - (try - let v' = Table.find tbl k in - t := Replace (k, v', t') - with Not_found -> - t := Remove (k, t') - ); - (* modify the underlying hashtable *) - Table.replace tbl k v; - t' + let a = reroot_ t in + let h = H.hash k in + let i = find_idx_ ~h a in + match a.(i) with + | Nil -> + if t.length > (Array.length a) lsl 1 + then ( + (* resize *) + let new_size = min (2 * (Array.length a)) Sys.max_array_length in + let a = resize_ k v h a new_size in + {length=t.length+1; arr=Arr a} + ) else ( + a.(i) <- Cons (k, v, Nil); + let t' = {length=t.length + 1; arr=Arr a} in + t.arr <- Set (i,Nil,t'); + t' + ) + | Cons _ as l -> + let l', is_new = replace_rec_ k v l in + if is_new && t.length > (Array.length a) lsl 1 + then ( + (* resize and insert [k,v] (again, it's new anyway) *) + let new_size = min (2 * (Array.length a)) Sys.max_array_length in + let a = resize_ k v h a new_size in + {length=t.length+1; arr=Arr a} + ) else ( + (* no resize *) + a.(i) <- l'; + let t' = { + length=if is_new then t.length+1 else t.length; + arr=Arr a; + } in + t.arr <- Set (i,l,t'); + t' + ) + + let add t k v = + let a = reroot_ t in + let h = H.hash k in + let i = find_idx_ ~h a in + if t.length > (Array.length a) lsl 1 + then ( + (* resize *) + let new_size = min (2 * (Array.length a)) Sys.max_array_length in + let a = resize_ k v h a new_size in + {length=t.length+1; arr=Arr a} + ) else ( + (* prepend *) + let old = a.(i) in + a.(i) <- Cons (k, v, old); + let t' = {length=t.length + 1; arr=Arr a} in + t.arr <- Set (i,old,t'); + t' + ) + + (*$R + let h = H.of_seq my_seq in + OUnit.assert_equal "a" (H.find h 1); + OUnit.assert_raises Not_found (fun () -> H.find h 5); + let h1 = H.add h 5 "e" in + OUnit.assert_equal "a" (H.find h1 1); + OUnit.assert_equal "e" (H.find h1 5); + OUnit.assert_equal "a" (H.find h 1); + let h2 = H.add h1 5 "ee" in + OUnit.assert_equal "ee" (H.find h2 5); + OUnit.assert_raises Not_found (fun () -> H.find h 5); + let h3 = H.remove h2 1 in + OUnit.assert_equal "ee" (H.find h3 5); + OUnit.assert_raises Not_found (fun () -> H.find h3 1); + let h4 = H.remove h3 5 in + OUnit.assert_equal "e" (H.find h4 5); + OUnit.assert_equal "ee" (H.find h3 5); + *) + + + (* return [Some l'] if [l] changed into [l'] by removing [k] *) + let rec remove_rec_ k l = match l with + | Nil -> None + | Cons (k', v', l') -> + if H.equal k k' + then Some l' + else match remove_rec_ k l' with + | None -> None + | Some l' -> Some (Cons (k', v', l')) let remove t k = - let tbl = reroot t in - try - let v' = Table.find tbl k in - (* value present, make a new hashtable without this value *) - let t' = ref (Table tbl) in - t := Add (k, v', t'); - Table.remove tbl k; - t' - with Not_found -> - (* not member, nothing to do *) - t + let a = reroot_ t in + let i = find_idx_ ~h:(H.hash k) a in + match a.(i) with + | Nil -> t + | Cons _ as l -> + match remove_rec_ k l with + | None -> t + | Some l' -> + a.(i) <- l'; + let t' = {length=t.length-1; arr=Arr a} in + t.arr <- Set (i,l,t'); + t' (*$R let h = H.of_seq my_seq in @@ -333,40 +474,78 @@ module Make(H : HashedType) : S with type key = H.t = struct | _, Some v' -> replace t k v' let copy t = - let tbl = reroot t in - (* no one will point to the new [t] *) - let t = ref (Table (Table.copy tbl)) in - t + let a = Array.copy (reroot_ t) in + {t with arr=Arr a} + + let rec buck_iter_ ~f l = match l with + | Nil -> () + | Cons (k,v,l') -> f k v; buck_iter_ ~f l' let iter t f = - let tbl = reroot t in - Table.iter f tbl + let a = reroot_ t in + Array.iter (buck_iter_ ~f) a + + let rec buck_fold_ f acc l = match l with + | Nil -> acc + | Cons (k,v,l') -> + let acc = f acc k v in + buck_fold_ f acc l' let fold f acc t = - let tbl = reroot t in - Table.fold (fun k v acc -> f acc k v) tbl acc + let a = reroot_ t in + Array.fold_left (buck_fold_ f) acc a let map f t = - let tbl = reroot t in - let res = Table.create (Table.length tbl) in - Table.iter (fun k v -> Table.replace res k (f k v)) tbl; - ref (Table res) + let rec buck_map_ f l = match l with + | Nil -> Nil + | Cons (k,v,l') -> + let v' = f k v in + Cons (k,v', buck_map_ f l') + in + let a = reroot_ t in + let a' = Array.map (buck_map_ f) a in + {length=t.length; arr=Arr a'} + + let rec buck_filter_ ~f l = match l with + | Nil -> Nil + | Cons (k,v,l') -> + let l' = buck_filter_ ~f l' in + if f k v then Cons (k,v,l') else l' + + let buck_length_ b = buck_fold_ (fun n _ _ -> n+1) 0 b let filter p t = - let tbl = reroot t in - let res = Table.create (Table.length tbl) in - Table.iter (fun k v -> if p k v then Table.replace res k v) tbl; - ref (Table res) + let a = reroot_ t in + let length = ref 0 in + let a' = Array.map + (fun b -> + let b' = buck_filter_ ~f:p b in + length := !length + (buck_length_ b'); + b' + ) a + in + {length= !length; arr=Arr a'} + + let rec buck_filter_map_ ~f l = match l with + | Nil -> Nil + | Cons (k,v,l') -> + let l' = buck_filter_map_ ~f l' in + match f k v with + | None -> l' + | Some v' -> + Cons (k,v',l') let filter_map f t = - let tbl = reroot t in - let res = Table.create (Table.length tbl) in - Table.iter - (fun k v -> match f k v with - | None -> () - | Some v' -> Table.replace res k v' - ) tbl; - ref (Table res) + let a = reroot_ t in + let length = ref 0 in + let a' = Array.map + (fun b -> + let b' = buck_filter_map_ ~f b in + length := !length + (buck_length_ b'); + b' + ) a + in + {length= !length; arr=Arr a'} exception ExitPTbl @@ -383,19 +562,22 @@ module Make(H : HashedType) : S with type key = H.t = struct with ExitPTbl -> true let merge f t1 t2 = - let tbl = Table.create (max (length t1) (length t2)) in - iter t1 - (fun k v1 -> + let tbl = create (max (length t1) (length t2)) in + let tbl = fold + (fun tbl k v1 -> let v2 = try Some (find t2 k) with Not_found -> None in match f k (Some v1) v2 with - | None -> () - | Some v' -> Table.replace tbl k v'); - iter t2 - (fun k v2 -> - if not (mem t1 k) then match f k None (Some v2) with - | None -> () - | Some _ -> Table.replace tbl k v2); - ref (Table tbl) + | None -> tbl + | Some v' -> replace tbl k v') + tbl t1 + in + fold + (fun tbl k v2 -> + if mem t1 k then tbl + else match f k None (Some v2) with + | None -> tbl + | Some _ -> replace tbl k v2 + ) tbl t2 (*$R let t1 = H.of_list [1, "a"; 2, "b1"] in @@ -444,10 +626,7 @@ module Make(H : HashedType) : S with type key = H.t = struct let of_list l = add_list (empty ()) l - let to_list t = - let tbl = reroot t in - let bindings = Table.fold (fun k v acc -> (k,v)::acc) tbl [] in - bindings + let to_list t = fold (fun acc k v -> (k,v)::acc) [] t (*$R let h = H.of_seq my_seq in @@ -457,8 +636,7 @@ module Make(H : HashedType) : S with type key = H.t = struct let to_seq t = fun k -> - let tbl = reroot t in - Table.iter (fun x y -> k (x,y)) tbl + iter t (fun x y -> k (x,y)) (*$R let h = H.of_seq my_seq in @@ -496,5 +674,22 @@ module Make(H : HashedType) : S with type key = H.t = struct Format.fprintf fmt "%a -> %a" pp_k k pp_v v ); Format.pp_print_string fmt "}" + + let stats t = + let a = reroot_ t in + let max_bucket_length = + Array.fold_left (fun n b -> max n (buck_length_ b)) 0 a in + let bucket_histogram = Array.make (max_bucket_length+1) 0 in + Array.iter + (fun b -> + let l = buck_length_ b in + bucket_histogram.(l) <- bucket_histogram.(l) + 1 + ) a; + {Hashtbl. + num_bindings=t.length; + num_buckets=Array.length a; + max_bucket_length; + bucket_histogram; + } end diff --git a/src/data/CCPersistentHashtbl.mli b/src/data/CCPersistentHashtbl.mli index 30de0f03..cc1438a4 100644 --- a/src/data/CCPersistentHashtbl.mli +++ b/src/data/CCPersistentHashtbl.mli @@ -74,6 +74,12 @@ module type S = sig val length : _ t -> int (** Number of bindings *) + val add : 'a t -> key -> 'a -> 'a t + (** Add the binding to the table, returning a new table. The old binding + for this key, if it exists, is shadowed and will be restored upon + [remove tbl k]. + @since 0.14 *) + val replace : 'a t -> key -> 'a -> 'a t (** Add the binding to the table, returning a new table. This erases the current binding for [key], if any. *) @@ -136,6 +142,10 @@ module type S = sig val pp : key printer -> 'a printer -> 'a t printer val print : key formatter -> 'a formatter -> 'a t formatter + + val stats : _ t -> Hashtbl.statistics + (** Statistics on the internal table. + @since 0.14 *) end (** {2 Implementation} *) diff --git a/src/iter/CCKList.ml b/src/iter/CCKList.ml index 6adf9d1d..b09d4dde 100644 --- a/src/iter/CCKList.ml +++ b/src/iter/CCKList.ml @@ -101,10 +101,11 @@ let iteri f l = let length l = fold (fun acc _ -> acc+1) 0 l -let rec take n (l:'a t) () = match l () with - | _ when n=0 -> `Nil - | `Nil -> `Nil - | `Cons (x,l') -> `Cons (x, take (n-1) l') +let rec take n (l:'a t) () = + if n=0 then `Nil + else match l () with + | `Nil -> `Nil + | `Cons (x,l') -> `Cons (x, take (n-1) l') let rec take_while p l () = match l () with | `Nil -> `Nil @@ -440,6 +441,36 @@ let sort_uniq ?(cmp=Pervasives.compare) l = let l = to_list l in uniq (fun x y -> cmp x y = 0) (of_list (List.sort cmp l)) +type 'a memoize = + | MemoThunk + | MemoSave of [`Nil | `Cons of 'a * 'a t] + +let rec memoize f = + let r = ref MemoThunk in + fun () -> match !r with + | MemoSave l -> l + | MemoThunk -> + let l = match f() with + | `Nil -> `Nil + | `Cons (x, tail) -> `Cons (x, memoize tail) + in + r := MemoSave l; + l + +(*$R + let printer = Q.Print.(list int) in + let gen () = + let rec l = let r = ref 0 in fun () -> incr r; `Cons (!r, l) in l + in + let l1 = gen () in + assert_equal ~printer [1;2;3;4] (take 4 l1 |> to_list); + assert_equal ~printer [5;6;7;8] (take 4 l1 |> to_list); + let l2 = gen () |> memoize in + assert_equal ~printer [1;2;3;4] (take 4 l2 |> to_list); + assert_equal ~printer [1;2;3;4] (take 4 l2 |> to_list); +*) + + (** {2 Fair Combinations} *) let rec interleave a b () = match a() with diff --git a/src/iter/CCKList.mli b/src/iter/CCKList.mli index ef3ee73b..2620181e 100644 --- a/src/iter/CCKList.mli +++ b/src/iter/CCKList.mli @@ -191,6 +191,10 @@ val sort_uniq : ?cmp:'a ord -> 'a t -> 'a t finite. O(n ln(n)) time and space. @since 0.3.3 *) +val memoize : 'a t -> 'a t +(** Avoid recomputations by caching intermediate results + @since 0.14 *) + (** {2 Fair Combinations} *) val interleave : 'a t -> 'a t -> 'a t diff --git a/src/sexp/CCSexpM.ml b/src/sexp/CCSexpM.ml index 2dd9e49c..167db917 100644 --- a/src/sexp/CCSexpM.ml +++ b/src/sexp/CCSexpM.ml @@ -93,13 +93,11 @@ let rec print fmt t = match t with | `List [] -> Format.pp_print_string fmt "()" | `List [x] -> Format.fprintf fmt "@[(%a)@]" print x | `List l -> - Format.open_hovbox 2; - Format.pp_print_char fmt '('; + Format.fprintf fmt "@[("; List.iteri (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t')) l; - Format.pp_print_char fmt ')'; - Format.close_box () + Format.fprintf fmt ")@]" let rec print_noindent fmt t = match t with | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) @@ -309,11 +307,13 @@ module MakeDecode(M : MONAD) = struct expr_or_end (fun _ x -> M.return (`Ok x)) t end -module D = MakeDecode(struct +module ID_MONAD = struct type 'a t = 'a let return x = x let (>>=) x f = f x -end) +end + +module D = MakeDecode(ID_MONAD) let parse_string s : t or_error = let n = String.length s in diff --git a/src/sexp/CCSexpM.mli b/src/sexp/CCSexpM.mli index 5507fc20..66186e75 100644 --- a/src/sexp/CCSexpM.mli +++ b/src/sexp/CCSexpM.mli @@ -86,6 +86,14 @@ module MakeDecode(M : MONAD) : sig long enough or isn't a proper S-expression *) end +module ID_MONAD : MONAD +(** The monad that just uses blocking calls as bind + @since 0.14 *) + +module D : module type of MakeDecode(ID_MONAD) +(** Decoder that just blocks when input is not available + @since 0.14 *) + val parse_string : string -> t or_error (** Parse a string *) diff --git a/src/string/CCLevenshtein.ml b/src/string/CCLevenshtein.ml index 7ccbb495..8fe8dee8 100644 --- a/src/string/CCLevenshtein.ml +++ b/src/string/CCLevenshtein.ml @@ -26,6 +26,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Levenshtein distance} *) +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + module type STRING = sig type char_ type t @@ -50,6 +53,15 @@ let rec klist_to_list l = match l () with (*$inject open CCFun + let list_uniq_ = Q.( + let gen = Gen.(list_size (0 -- 100) (string_size ~gen:printable (1 -- 10)) + >|= CCList.sort_uniq ~cmp:String.compare + >|= List.map (fun s->s,s) + ) in + let print = Print.(list (pair string string)) in + let shrink = Shrink.(list ~shrink:(pair string string)) in + make ~small:List.length ~print ~shrink gen + ) *) (*$Q @@ -93,7 +105,7 @@ let rec klist_to_list l = match l () with l, Index.of_list l' in let gen = Q.Gen.( - list_size (3 -- 15) (string_size (0 -- 10)) >|= mklist + list_size (3 -- 15) (string_size (1 -- 10)) >|= mklist ) in let small (l,_) = List.length l in let print (l,_) = Q.Print.(list string) l in @@ -106,12 +118,23 @@ let rec klist_to_list l = match l () with let retrieved = Index.retrieve ~limit:2 idx s |> klist_to_list in List.for_all - (fun s' -> edit_distance s s' <= 2) retrieved + (fun s' -> edit_distance s s' <= 2) retrieved && + List.for_all + (fun s' -> not (edit_distance s s' <= 2) || List.mem s' retrieved) + l ) l ) *) +(*$R +let idx = Index.of_list ["aa", "aa"; "ab", "ab"; "cd", "cd"; "a'c", "a'c"] in + assert_equal ~printer:Q.Print.(list string) + ["a'c"; "aa"; "ab"] + (Index.retrieve ~limit:1 idx "ac" |> CCKList.to_list + |> List.sort Pervasives.compare) +*) + module type S = sig type char_ type string_ @@ -119,74 +142,96 @@ module type S = sig (** {6 Edit Distance} *) val edit_distance : string_ -> string_ -> int - (** Edition distance between two strings. This satisfies the classical - distance axioms: it is always positive, symmetric, and satisfies - the formula [distance a b + distance b c >= distance a c] *) + (** Edition distance between two strings. This satisfies the classical + distance axioms: it is always positive, symmetric, and satisfies + the formula [distance a b + distance b c >= distance a c] *) (** {6 Automaton} An automaton, built from a string [s] and a limit [n], that accepts every string that is at distance at most [n] from [s]. *) type automaton - (** Levenshtein automaton *) + (** Levenshtein automaton *) val of_string : limit:int -> string_ -> automaton - (** Build an automaton from a string, with a maximal distance [limit]. - The automaton will accept strings whose {!edit_distance} to the - parameter is at most [limit]. *) + (** Build an automaton from a string, with a maximal distance [limit]. + The automaton will accept strings whose {!edit_distance} to the + parameter is at most [limit]. *) val of_list : limit:int -> char_ list -> automaton - (** Build an automaton from a list, with a maximal distance [limit] *) + (** Build an automaton from a list, with a maximal distance [limit] *) val debug_print : (out_channel -> char_ -> unit) -> out_channel -> automaton -> unit - (** Output the automaton's structure on the given channel. *) + (** Output the automaton's structure on the given channel. *) val match_with : automaton -> string_ -> bool - (** [match_with a s] matches the string [s] against [a], and returns - [true] if the distance from [s] to the word represented by [a] is smaller - than the limit used to build [a] *) + (** [match_with a s] matches the string [s] against [a], and returns + [true] if the distance from [s] to the word represented by [a] is smaller + than the limit used to build [a] *) (** {6 Index for one-to-many matching} *) module Index : sig type 'b t - (** Index that maps strings to values of type 'b. Internally it is - based on a trie. A string can only map to one value. *) + (** Index that maps strings to values of type 'b. Internally it is + based on a trie. A string can only map to one value. *) val empty : 'b t - (** Empty index *) + (** Empty index *) val is_empty : _ t -> bool val add : 'b t -> string_ -> 'b -> 'b t - (** Add a pair string/value to the index. If a value was already present - for this string it is replaced. *) + (** Add a pair string/value to the index. If a value was already present + for this string it is replaced. *) + + val cardinal : _ t -> int + (** Number of bindings *) val remove : 'b t -> string_ -> 'b t - (** Remove a string (and its associated value, if any) from the index. *) + (** Remove a string (and its associated value, if any) from the index. *) val retrieve : limit:int -> 'b t -> string_ -> 'b klist - (** Lazy list of objects associated to strings close to the query string *) + (** Lazy list of objects associated to strings close to the query string *) val of_list : (string_ * 'b) list -> 'b t - (** Build an index from a list of pairs of strings and values *) + (** Build an index from a list of pairs of strings and values *) val to_list : 'b t -> (string_ * 'b) list - (** Extract a list of pairs from an index *) + (** Extract a list of pairs from an index *) + + val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t + (** @since 0.14 *) + + val of_seq : (string_ * 'a) sequence -> 'a t + (** @since 0.14 *) + + val to_seq : 'a t -> (string_ * 'a) sequence + (** @since 0.14 *) + + val add_gen : 'a t -> (string_ * 'a) gen -> 'a t + (** @since 0.14 *) + + val of_gen : (string_ * 'a) gen -> 'a t + (** @since 0.14 *) + + val to_gen : 'a t -> (string_ * 'a) gen + (** @since 0.14 *) val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a - (** Fold over the stored pairs string/value *) + (** Fold over the stored pairs string/value *) val iter : (string_ -> 'b -> unit) -> 'b t -> unit - (** Iterate on the pairs *) + (** Iterate on the pairs *) val to_klist : 'b t -> (string_ * 'b) klist - (** Conversion to an iterator *) + (** Conversion to an iterator *) end end -module Make(Str : STRING) = struct +module Make(Str : STRING) +: S with type char_ = Str.char_ and type string_ = Str.t = struct type string_ = Str.t type char_ = Str.char_ @@ -678,24 +723,73 @@ module Make(Str : STRING) = struct let iter f idx = fold (fun () str v -> f str v) () idx + let cardinal idx = fold (fun n _ _ -> n+1) 0 idx + let to_list idx = fold (fun acc str v -> (str,v) :: acc) [] idx + let add_seq i s = + let i = ref i in + s (fun (arr,v) -> i := add !i arr v); + !i + + let of_seq s = add_seq empty s + + let to_seq i yield = iter (fun x y -> yield (x,y)) i + + (*$Q + list_uniq_ (fun l -> \ + Sequence.of_list l |> Index.of_seq |> Index.to_seq \ + |> Sequence.to_list |> List.sort Pervasives.compare \ + = List.sort Pervasives.compare l) + *) + + let rec add_gen i g = match g() with + | None -> i + | Some (arr,v) -> add_gen (add i arr v) g + + let of_gen g = add_gen empty g + + let to_gen s = + let st = Stack.create () in + Stack.push ([],s) st; + let rec next () = + if Stack.is_empty st then None + else + let trail, Node (opt, m) = Stack.pop st in + (* explore children *) + M.iter + (fun c node' -> Stack.push (c::trail, node') st) + m; + match opt with + | None -> next() + | Some v -> + let str = Str.of_list (List.rev trail) in + Some (str,v) + in + next + + (*$Q + list_uniq_ (fun l -> \ + Gen.of_list l |> Index.of_gen |> Index.to_gen \ + |> Gen.to_list |> List.sort Pervasives.compare \ + = List.sort Pervasives.compare l) + *) + let to_klist idx = let rec traverse node trail ~(fk:(string_*'a) klist) () = - match node with - | Node (opt, m) -> - (* all alternatives: continue exploring [m], or call [fk] *) - let fk = - M.fold - (fun c node' fk -> traverse node' (c::trail) ~fk) - m fk - in - match opt with - | Some v -> - let str = Str.of_list (List.rev trail) in - `Cons ((str,v), fk) - | _ -> fk () (* fail... or explore subtrees *) + let Node (opt, m) = node in + (* all alternatives: continue exploring [m], or call [fk] *) + let fk = + M.fold + (fun c node' fk -> traverse node' (c::trail) ~fk) + m fk + in + match opt with + | Some v -> + let str = Str.of_list (List.rev trail) in + `Cons ((str,v), fk) + | _ -> fk () (* fail... or explore subtrees *) in traverse idx [] ~fk:(fun () -> `Nil) end diff --git a/src/string/CCLevenshtein.mli b/src/string/CCLevenshtein.mli index a22bbdeb..d99ef49b 100644 --- a/src/string/CCLevenshtein.mli +++ b/src/string/CCLevenshtein.mli @@ -31,6 +31,9 @@ We take inspiration from http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata for the main algorithm and ideas. However some parts are adapted *) +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + (** {2 Abstraction over Strings} Due to the existence of several encodings and string representations we abstract over the type of strings. A string is a finite array of characters @@ -79,15 +82,14 @@ The signature for a given string representation provides 3 main things: A possible use of the index could be: {[ -open Batteries;; -let words = File.with_file_in "/usr/share/dict/english" - (fun i -> IO.read_all i |> String.nsplit ~by:"\\n");; +let words = CCIO.with_in "/usr/share/dict/words" + (fun i -> CCIO.read_all i |> CCString.Split.list_cpy ~by:"\n");; let words = List.map (fun s->s,s) words;; -let idx = Levenshtein.Index.of_list words;; +let idx = CCLevenshtein.Index.of_list words;; -Levenshtein.Index.retrieve ~limit:1 idx "hell" |> Levenshtein.klist_to_list;; +CCLevenshtein.Index.retrieve ~limit:1 idx "hell" |> CCLevenshtein.klist_to_list;; ]} *) @@ -98,70 +100,91 @@ module type S = sig (** {6 Edit Distance} *) val edit_distance : string_ -> string_ -> int - (** Edition distance between two strings. This satisfies the classical - distance axioms: it is always positive, symmetric, and satisfies - the formula [distance a b + distance b c >= distance a c] *) + (** Edition distance between two strings. This satisfies the classical + distance axioms: it is always positive, symmetric, and satisfies + the formula [distance a b + distance b c >= distance a c] *) (** {6 Automaton} An automaton, built from a string [s] and a limit [n], that accepts every string that is at distance at most [n] from [s]. *) type automaton - (** Levenshtein automaton *) + (** Levenshtein automaton *) val of_string : limit:int -> string_ -> automaton - (** Build an automaton from a string, with a maximal distance [limit]. - The automaton will accept strings whose {!edit_distance} to the - parameter is at most [limit]. *) + (** Build an automaton from a string, with a maximal distance [limit]. + The automaton will accept strings whose {!edit_distance} to the + parameter is at most [limit]. *) val of_list : limit:int -> char_ list -> automaton - (** Build an automaton from a list, with a maximal distance [limit] *) + (** Build an automaton from a list, with a maximal distance [limit] *) val debug_print : (out_channel -> char_ -> unit) -> out_channel -> automaton -> unit - (** Output the automaton's structure on the given channel. *) + (** Output the automaton's structure on the given channel. *) val match_with : automaton -> string_ -> bool - (** [match_with a s] matches the string [s] against [a], and returns - [true] if the distance from [s] to the word represented by [a] is smaller - than the limit used to build [a] *) + (** [match_with a s] matches the string [s] against [a], and returns + [true] if the distance from [s] to the word represented by [a] is smaller + than the limit used to build [a] *) (** {6 Index for one-to-many matching} *) module Index : sig type 'b t - (** Index that maps strings to values of type 'b. Internally it is - based on a trie. A string can only map to one value. *) + (** Index that maps strings to values of type 'b. Internally it is + based on a trie. A string can only map to one value. *) val empty : 'b t - (** Empty index *) + (** Empty index *) val is_empty : _ t -> bool val add : 'b t -> string_ -> 'b -> 'b t - (** Add a pair string/value to the index. If a value was already present - for this string it is replaced. *) + (** Add a pair string/value to the index. If a value was already present + for this string it is replaced. *) + + val cardinal : _ t -> int + (** Number of bindings *) val remove : 'b t -> string_ -> 'b t - (** Remove a string (and its associated value, if any) from the index. *) + (** Remove a string (and its associated value, if any) from the index. *) val retrieve : limit:int -> 'b t -> string_ -> 'b klist - (** Lazy list of objects associated to strings close to the query string *) + (** Lazy list of objects associated to strings close to the query string *) val of_list : (string_ * 'b) list -> 'b t - (** Build an index from a list of pairs of strings and values *) + (** Build an index from a list of pairs of strings and values *) val to_list : 'b t -> (string_ * 'b) list - (** Extract a list of pairs from an index *) + (** Extract a list of pairs from an index *) + + val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t + (** @since 0.14 *) + + val of_seq : (string_ * 'a) sequence -> 'a t + (** @since 0.14 *) + + val to_seq : 'a t -> (string_ * 'a) sequence + (** @since 0.14 *) + + val add_gen : 'a t -> (string_ * 'a) gen -> 'a t + (** @since 0.14 *) + + val of_gen : (string_ * 'a) gen -> 'a t + (** @since 0.14 *) + + val to_gen : 'a t -> (string_ * 'a) gen + (** @since 0.14 *) val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a - (** Fold over the stored pairs string/value *) + (** Fold over the stored pairs string/value *) val iter : (string_ -> 'b -> unit) -> 'b t -> unit - (** Iterate on the pairs *) + (** Iterate on the pairs *) val to_klist : 'b t -> (string_ * 'b) klist - (** Conversion to an iterator *) + (** Conversion to an iterator *) end end diff --git a/src/string/CCParse.ml b/src/string/CCParse.ml index 9edc928e..680d82cf 100644 --- a/src/string/CCParse.ml +++ b/src/string/CCParse.ml @@ -85,13 +85,13 @@ exception ParseError of line_num * col_num * (unit -> string) (*$= & ~printer:errpptree (`Ok (N (L 1, N (L 2, L 3)))) \ - (parse_string "(1 (2 3))" ptree) + (parse_string ~p:ptree "(1 (2 3))" ) (`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \ - (parse_string "((1 2) (3 (4 5)))" ptree) + (parse_string ~p:ptree "((1 2) (3 (4 5)))" ) (`Ok (N (L 1, N (L 2, L 3)))) \ - (parse_string "(1 (2 3))" ptree' ) + (parse_string ~p:ptree' "(1 (2 3))" ) (`Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \ - (parse_string "((1 2) (3 (4 5)))" ptree' ) + (parse_string ~p:ptree' "((1 2) (3 (4 5)))" ) *) (*$R @@ -102,9 +102,26 @@ exception ParseError of line_num * col_num * (unit -> string) in assert_equal ~printer (`Ok ["abc"; "de"; "hello"; "world"]) - (parse_string "[abc , de, hello ,world ]" p); + (parse_string ~p "[abc , de, hello ,world ]"); *) +(*$R + let test n = + let p = CCParse.(U.list ~sep:"," U.int) in + + let l = CCList.(1 -- n) in + let l_printed = + CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l in + + let l' = CCParse.parse_string_exn ~p l_printed in + + assert_equal ~printer:Q.Print.(list int) l l' + in + test 100_000; + test 400_000; + +*) + let const_ x () = x let input_of_string s = @@ -179,59 +196,62 @@ let input_of_chan ?(size=1024) ic = sub=(fun j len -> assert (j + len <= !i); Bytes.sub_string !b j len); } -type 'a t = input -> 'a +type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit -let return x _ = x +let return : 'a -> 'a t = fun x _st ~ok ~err:_ -> ok x let pure = return -let (>|=) p f st = f (p st) -let (>>=) p f st = - let x = p st in - f x st -let (<*>) x y st = - let f = x st in - let g = y st in - f g -let (<* ) x y st = - let res = x st in - let _ = y st in - res -let ( *>) x y st = - let _ = x st in - let res = y st in - res +let (>|=) : 'a t -> ('a -> 'b) -> 'b t + = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> ok (f x)) +let (>>=) : 'a t -> ('a -> 'b t) -> 'b t + = fun p f st ~ok ~err -> p st ~err ~ok:(fun x -> f x st ~err ~ok) +let (<*>) : ('a -> 'b) t -> 'a t -> 'b t + = fun f x st ~ok ~err -> + f st ~err ~ok:(fun f' -> x st ~err ~ok:(fun x' -> ok (f' x'))) +let (<* ) : 'a t -> _ t -> 'a t + = fun x y st ~ok ~err -> + x st ~err ~ok:(fun res -> y st ~err ~ok:(fun _ -> ok res)) +let ( *>) : _ t -> 'a t -> 'a t + = fun x y st ~ok ~err -> + x st ~err ~ok:(fun _ -> y st ~err ~ok) let junk_ st = ignore (st.next ()) let pf = Printf.sprintf -let fail_ st msg = raise (ParseError (st.lnum(), st.cnum(), msg)) +let fail_ ~err st msg = err (ParseError (st.lnum(), st.cnum(), msg)) -let eoi st = if st.is_done() then () else fail_ st (const_ "expected EOI") -let fail msg st = fail_ st (const_ msg) -let nop _ = () +let eoi st ~ok ~err = + if st.is_done() + then ok () + else fail_ ~err st (const_ "expected EOI") + +let fail msg st ~ok:_ ~err = fail_ ~err st (const_ msg) +let nop _ ~ok ~err:_ = ok() let char c = let msg = pf "expected '%c'" c in - fun st -> if st.next () = c then c else fail_ st (const_ msg) + fun st ~ok ~err -> if st.next () = c then ok c else fail_ ~err st (const_ msg) -let char_if p st = +let char_if p st ~ok ~err = let c = st.next () in - if p c then c else fail_ st (fun () -> pf "unexpected char '%c'" c) + if p c then ok c else fail_ ~err st (fun () -> pf "unexpected char '%c'" c) -let chars_if p st = +let chars_if p st ~ok ~err:_ = let i = st.pos () in let len = ref 0 in while not (st.is_done ()) && p (st.cur ()) do junk_ st; incr len done; - st.sub i !len + ok (st.sub i !len) -let chars1_if p st = - let s = chars_if p st in - if s = "" then fail_ st (const_ "unexpected sequence of chars"); - s +let chars1_if p st ~ok ~err = + chars_if p st ~err + ~ok:(fun s -> + if s = "" then fail_ ~err st (const_ "unexpected sequence of chars"); + ok s + ) -let rec skip_chars p st = +let rec skip_chars p st ~ok ~err = if not (st.is_done ()) && p (st.cur ()) then ( junk_ st; - skip_chars p st - ) + skip_chars p st ~ok ~err + ) else ok() let is_alpha = function | 'a' .. 'z' | 'A' .. 'Z' -> true @@ -255,48 +275,50 @@ let skip_white = skip_chars is_white (* XXX: combine errors? *) -let (<|>) x y st = - let i = st.pos () in - try - x st - with ParseError _ -> - st.backtrack i; (* restore pos *) - y st +let (<|>) : 'a t -> 'a t -> 'a t + = fun x y st ~ok ~err -> + let i = st.pos () in + x st ~ok + ~err:(fun _ -> + st.backtrack i; (* restore pos *) + y st ~ok ~err + ) -let string s st = +let string s st ~ok ~err = let rec check i = i = String.length s || (s.[i] = st.next () && check (i+1)) in - if check 0 then s else fail_ st (fun () -> pf "expected \"%s\"" s) + if check 0 then ok s else fail_ ~err st (fun () -> pf "expected \"%s\"" s) -let rec many_rec p st acc = - if st.is_done () then List.rev acc +let rec many_rec : 'a t -> 'a list -> 'a list t = fun p acc st ~ok ~err -> + if st.is_done () then ok(List.rev acc) else let i = st.pos () in - try - let x = p st in - many_rec p st (x :: acc) - with ParseError _ -> - st.backtrack i; - List.rev acc + p st ~err + ~ok:(fun x -> + many_rec p (x :: acc) st ~ok + ~err:(fun _ -> + st.backtrack i; + ok(List.rev acc) + ) + ) -let many p st = many_rec p st [] +let many : 'a t -> 'a list t + = fun p st ~ok ~err -> many_rec p [] st ~ok ~err -let many1 p st = - let x = p st in - many_rec p st [x] +let many1 : 'a t -> 'a list t = + fun p st ~ok ~err -> + p st ~err ~ok:(fun x -> many_rec p [x] st ~err ~ok) -let rec skip p st = +let rec skip p st ~ok ~err = let i = st.pos () in - let matched = - try - let _ = p st in - true - with ParseError _ -> - false - in - if matched then skip p st else st.backtrack i + p st + ~ok:(fun _ -> skip p st ~ok ~err) + ~err:(fun _ -> + st.backtrack i; + ok() + ) let rec sep1 ~by p = p >>= fun x -> @@ -320,14 +342,14 @@ module MemoTbl = struct end let fix f = - let rec p st = f p st in + let rec p st ~ok ~err = f p st ~ok ~err in p -let memo p = +let memo (type a) (p:a t):a t = let id = !MemoTbl.id_ in incr MemoTbl.id_; let r = ref None in (* used for universal encoding *) - fun input -> + fun input ~ok ~err -> let i = input.pos () in let (lazy tbl) = input.memo in try @@ -337,50 +359,57 @@ let memo p = f (); begin match !r with | None -> assert false - | Some (MemoTbl.Ok x) -> x - | Some (MemoTbl.Fail e) -> raise e + | Some (MemoTbl.Ok x) -> ok x + | Some (MemoTbl.Fail e) -> err e end with Not_found -> (* parse, and save *) - try - let x = p input in - H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x)); - x - with (ParseError _) as e -> - H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e)); - raise e + p input + ~err:(fun e -> + H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Fail e)); + err e + ) + ~ok:(fun x -> + H.replace tbl (i,id) (fun () -> r := Some (MemoTbl.Ok x)); + ok x + ) let fix_memo f = let rec p = let p' = lazy (memo p) in - fun st -> f (Lazy.force p') st + fun st ~ok ~err -> f (Lazy.force p') st ~ok ~err in p -let parse_exn ~input p = p input +let parse_exn ~input ~p = + let res = ref None in + p input ~ok:(fun x -> res := Some x) ~err:(fun e -> raise e); + match !res with + | None -> failwith "no input returned by parser" + | Some x -> x -let parse ~input p = - try `Ok (parse_exn ~input p) +let parse ~input ~p = + try `Ok (parse_exn ~input ~p) with ParseError (lnum, cnum, msg) -> `Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ())) -let parse_string s p = parse ~input:(input_of_string s) p -let parse_string_exn s p = parse_exn ~input:(input_of_string s) p +let parse_string s ~p = parse ~input:(input_of_string s) ~p +let parse_string_exn s ~p = parse_exn ~input:(input_of_string s) ~p -let parse_file_exn ?size ~file p = +let parse_file_exn ?size ~file ~p = let ic = open_in file in let input = input_of_chan ?size ic in try - let res = parse_exn ~input p in + let res = parse_exn ~input ~p in close_in ic; res with e -> close_in ic; raise e -let parse_file ?size ~file p = +let parse_file ?size ~file ~p = try - `Ok (parse_file_exn ?size ~file p) + `Ok (parse_file_exn ?size ~file ~p) with | ParseError (lnum, cnum, msg) -> `Error (Printf.sprintf "at line %d, column %d: error, %s" lnum cnum (msg ())) @@ -409,4 +438,21 @@ module U = struct let word = map2 prepend_str (char_if is_alpha) (chars_if is_alpha_num) + + let pair ?(start="(") ?(stop=")") ?(sep=",") p1 p2 = + string start *> skip_white *> + p1 >>= fun x1 -> + skip_white *> string sep *> skip_white *> + p2 >>= fun x2 -> + string stop *> return (x1,x2) + + let triple ?(start="(") ?(stop=")") ?(sep=",") p1 p2 p3 = + string start *> skip_white *> + p1 >>= fun x1 -> + skip_white *> string sep *> skip_white *> + p2 >>= fun x2 -> + skip_white *> string sep *> skip_white *> + p3 >>= fun x3 -> + string stop *> return (x1,x2,x3) + end diff --git a/src/string/CCParse.mli b/src/string/CCParse.mli index da4383ec..29be44d8 100644 --- a/src/string/CCParse.mli +++ b/src/string/CCParse.mli @@ -27,6 +27,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Very Simple Parser Combinators} +{b status} still a bit unstable, the type {!'a t} might still change. + Examples: {6 parse recursive structures} @@ -59,6 +61,21 @@ let p = U.list ~sep:"," U.word;; parse_string_exn "[abc , de, hello ,world ]" p;; ]} +{6 Stress Test} +This makes a list of 100_000 integers, prints it and parses it back. + +{[ +let p = CCParse.(U.list ~sep:"," U.int);; + +let l = CCList.(1 -- 100_000);; +let l_printed = + CCFormat.to_string (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l;; + +let l' = CCParse.parse_string_exn ~p l_printed;; + +assert (l=l');; +]} + @since 0.11 *) @@ -109,8 +126,14 @@ val input_of_chan : ?size:int -> in_channel -> input (** {2 Combinators} *) -type 'a t = input -> 'a -(** @raise ParseError in case of failure *) +type 'a t = input -> ok:('a -> unit) -> err:(exn -> unit) -> unit +(** Takes the input and two continuations: + {ul + {- [ok] to call with the result when it's done} + {- [err] to call when the parser met an error} + } + The type definition changed since 0.14 to avoid stack overflows + @raise ParseError in case of failure *) val return : 'a -> 'a t (** Always succeeds, without consuming its input *) @@ -238,28 +261,31 @@ val fix_memo : ('a t -> 'a t) -> 'a t (** Same as {!fix}, but the fixpoint is memoized. @since 0.13 *) -(** {2 Parse} *) +(** {2 Parse} -val parse : input:input -> 'a t -> 'a or_error + Those functions have a label [~p] on the parser, since 0.14. +*) + +val parse : input:input -> p:'a t -> 'a or_error (** [parse ~input p] applies [p] on the input, and returns [`Ok x] if [p] succeeds with [x], or [`Error s] otherwise *) -val parse_exn : input:input -> 'a t -> 'a +val parse_exn : input:input -> p:'a t -> 'a (** @raise ParseError if it fails *) -val parse_string : string -> 'a t -> 'a or_error +val parse_string : string -> p:'a t -> 'a or_error (** Specialization of {!parse} for string inputs *) -val parse_string_exn : string -> 'a t -> 'a +val parse_string_exn : string -> p:'a t -> 'a (** @raise ParseError if it fails *) -val parse_file : ?size:int -> file:string -> 'a t -> 'a or_error +val parse_file : ?size:int -> file:string -> p:'a t -> 'a or_error (** [parse_file ~file p] parses [file] with [p] by opening the file and using {!input_of_chan}. @param size size of chunks read from file @since 0.13 *) -val parse_file_exn : ?size:int -> file:string -> 'a t -> 'a +val parse_file_exn : ?size:int -> file:string -> p:'a t -> 'a (** Unsafe version of {!parse_file} @since 0.13 *) @@ -281,4 +307,16 @@ module U : sig val map2 : ('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t val map3 : ('a -> 'b -> 'c -> 'd) -> 'a t -> 'b t -> 'c t -> 'd t + + val pair : ?start:string -> ?stop:string -> ?sep:string -> + 'a t -> 'b t -> ('a * 'b) t + (** Parse a pair using OCaml whitespace conventions. + The default is "(a, b)". + @since 0.14 *) + + val triple : ?start:string -> ?stop:string -> ?sep:string -> + 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t + (** Parse a triple using OCaml whitespace conventions. + The default is "(a, b, c)". + @since 0.14 *) end diff --git a/src/threads/CCFuture.mli b/src/threads/CCFuture.mli index ff4691a5..c42a5785 100644 --- a/src/threads/CCFuture.mli +++ b/src/threads/CCFuture.mli @@ -55,7 +55,7 @@ val make2 : ('a -> 'b -> 'c) -> 'a -> 'b -> 'c t val get : 'a t -> 'a (** Blocking get: wait for the future to be evaluated, and get the value, or the exception that failed the future is returned. - @raise e if the exception failed with e *) + raise e if the future failed with e *) val state : 'a t -> 'a state (** State of the future *) diff --git a/src/threads/CCThread.ml b/src/threads/CCThread.ml index a482b030..3e1b68ed 100644 --- a/src/threads/CCThread.ml +++ b/src/threads/CCThread.ml @@ -53,7 +53,7 @@ module Barrier = struct with_lock_ b (fun () -> while not b.activated do - Condition.wait b.cond b.lock + Condition.wait b.cond b.lock done )