diff --git a/CHANGELOG.md b/CHANGELOG.md index 50fa1a62..21485845 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,39 @@ # Changelog +## 0.12 + +### breaking + +- change type of `CCString.blit` so it writes into `Bytes.t` +- better default opening flags for `CCIO.with_{in, out}` + +### non-breaking + +note: use of `containers.io` is deprecated (its only module has moved to `containers`) + +- add `CCString.mem` +- add `CCString.set` for updating immutable strings +- add `CCList.cons` function +- enable `-safe-string` on the project; fix `-safe-string` issues +- move `CCIO` from `containers.io` to `containers`, add dummy module in `containers.io` +- add `CCIO.read_all_bytes`, reading a whole file into a `Bytes.t` +- add `CCIO.with_in_out` to read and write a file +- add `CCArray1` in containers.bigarray, a module on 1-dim bigarrays (experimental) +- add module `CCGraph` in `containers.data`, a simple graph abstraction similar to `LazyGraph` +- add a lot of string functions in `CCString` +- add `CCError.catch`, in prevision of the future standard `Result.t` type +- add `CCError.Infix` module +- add `CCHashconsedSet` in `containers.data` (set with maximal struct sharing) + +- fix: use the proper array module in `CCRingBuffer` +- bugfix: `CCRandom.float_range` + ## 0.11 - add `CCList.{remove,is_empty}` - add `CCOpt.is_none` - remove packs for `containers_string` and `containers_advanced` - add `Containers_string.Parse`, very simple monadic parser combinators -- remove warning from `.merlin` -- attempts of bugfix in PrintBox for unicode text (wip) - add `CCList.{find_pred,find_pred_exn}` - bugfix in `CCUnix.escape_str` - add methods and accessors to `CCUnix` @@ -84,7 +110,7 @@ ## 0.7 -### breaking +#### breaking - remove `cgi`/ - removed useless Lwt-related module diff --git a/README.md b/README.md index 6b8dab58..b583602d 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ What is _containers_? - Several small additional libraries that complement it: * `containers.data` with additional data structures that don't have an equivalent in the standard library; - * `containers.io` with utils to handle files and I/O streams; + * `containers.io` (deprecated) * `containers.iter` with list-like and tree-like iterators; * `containers.string` (in directory `string`) with a few packed modules that deal with strings (Levenshtein distance, @@ -26,7 +26,7 @@ What is _containers_? be able to deal with your favorite unicode library). - A sub-library with complicated abstractions, `containers.advanced` (with a LINQ-like query module, batch operations using GADTs, and others). -- Utilities aroud the `unix` library in `containers.unix` (mainly to spawn +- Utilities around the `unix` library in `containers.unix` (mainly to spawn sub-processes) - A bigstring module using `bigarray` in `containers.bigarray` - A lightweight S-expression printer and streaming parser in `containers.sexp` @@ -50,6 +50,7 @@ See [this file](https://github.com/c-cube/ocaml-containers/blob/master/CHANGELOG - the [github wiki](https://github.com/c-cube/ocaml-containers/wiki) - on IRC, ask `companion_cube` on `#ocaml` +- [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/c-cube/ocaml-containers?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) (experimental, might not exist forever) ## Use @@ -98,6 +99,7 @@ Documentation [here](http://cedeela.fr/~simon/software/containers). - `CCPrint` (printing combinators) - `CCHash` (hashing combinators) - `CCError` (monadic error handling, very useful) +- `CCIO`, basic utilities for IO (channels, files) ### Containers.data @@ -107,12 +109,20 @@ Documentation [here](http://cedeela.fr/~simon/software/containers). - `CCMultimap` and `CCMultiset`, functors defining persistent structures - `CCFQueue`, a purely functional double-ended queue structure - `CCBV`, mutable bitvectors -- `CCPersistentHashtbl`, a semi-persistent hashtable (similar to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) +- `CCPersistentHashtbl` and `CCPersistentArray`, a semi-persistent array and hashtable + (similar to [persistent arrays](https://www.lri.fr/~filliatr/ftp/ocaml/ds/parray.ml.html)) - `CCMixmap`, `CCMixtbl`, `CCMixset`, containers of universal types (heterogenous containers) +- `CCRingBuffer`, a double-ended queue on top of an array-like structure, + with batch operations +- `CCIntMap`, map specialized for integer keys based on Patricia Trees, + with fast merges +- `CCHashconsedSet`, a set structure with sharing of sub-structures +- `CCGraph`, a small collection of graph algorithms ### Containers.io -- `CCIO`, basic utilities for IO +*deprecated*, `CCIO` is now a core module. You can still install it and +depend on it but it contains no useful module. ### Containers.unix diff --git a/_oasis b/_oasis index 983c8869..aca6dae2 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.11 +Version: 0.12 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause @@ -18,8 +18,9 @@ Description: extend the stdlib (e.g. CCList provides safe map/fold_right/append, and additional functions on lists). - It also features an optional library for dealing with strings, and a `misc` - library full of experimental ideas (not stable, not necessarily usable). + It also features optional libraries for dealing with strings, helpers for unix, + threads, lwt and a `misc` library full of experimental ideas (not stable, not + necessarily usable). Flag "misc" Description: Build the misc library, with experimental modules still susceptible to change @@ -53,13 +54,13 @@ Library "containers" Path: src/core Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCInt, CCBool, CCFloat, CCArray, CCRef, CCSet, - CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, + CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, CCIO, Containers BuildDepends: bytes Library "containers_io" Path: src/io - Modules: CCIO + Modules: Containers_io_is_deprecated BuildDepends: bytes FindlibParent: containers FindlibName: io @@ -83,7 +84,7 @@ Library "containers_data" Modules: CCMultiMap, CCMultiSet, CCTrie, CCFlatHashtbl, CCCache, CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray, - CCMixset + CCMixset, CCHashconsedSet, CCGraph BuildDepends: bytes FindlibParent: containers FindlibName: data @@ -112,7 +113,7 @@ Library "containers_advanced" Library "containers_bigarray" Path: src/bigarray - Modules: CCBigstring + Modules: CCBigstring, CCArray1 FindlibName: bigarray FindlibParent: containers BuildDepends: containers, bigarray, bytes diff --git a/_tags b/_tags index 2299aed8..ce790f28 100644 --- a/_tags +++ b/_tags @@ -218,4 +218,4 @@ true: annot, bin_annot : thread : inline(25) and not : warn_A, warn(-4), warn(-44) -true: no_alias_deps +true: no_alias_deps, safe_string diff --git a/containers.odocl b/containers.odocl index 29bf76e6..e07bbb80 100644 --- a/containers.odocl +++ b/containers.odocl @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 7f7259458c1636ee0279e4fb677f4e2b) +# DO NOT EDIT (digest: f6b14d1de025e74a6698a2eae3486204) src/core/CCVector src/core/CCPrint src/core/CCError @@ -21,6 +21,7 @@ src/core/CCString src/core/CCHashtbl src/core/CCMap src/core/CCFormat +src/core/CCIO src/core/Containers src/misc/AbsSet src/misc/Automaton @@ -54,18 +55,21 @@ src/data/CCRingBuffer src/data/CCIntMap src/data/CCPersistentArray src/data/CCMixset +src/data/CCHashconsedSet +src/data/CCGraph src/string/Containers_string src/string/CCKMP src/string/CCLevenshtein src/string/CCApp_parse src/string/CCParse src/bigarray/CCBigstring +src/bigarray/CCArray1 src/advanced/Containers_advanced src/advanced/CCLinq src/advanced/CCBatch src/advanced/CCCat src/advanced/CCMonadIO -src/io/CCIO +src/io/Containers_io_is_deprecated src/unix/CCUnix src/sexp/CCSexp src/sexp/CCSexpStream diff --git a/doc/intro.txt b/doc/intro.txt index 5b68eb1f..0b692889 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -33,6 +33,7 @@ CCHash CCHashtbl CCHeap CCInt +CCIO CCList CCMap CCOpt @@ -80,9 +81,7 @@ CCTrie {4 Containers.io} -Helpers to perform simple IO (mostly on files) and iterate on channels. - -{!modules: CCIO} +{b deprecated} use {!CCIO} directly from the set of core modules. {4 Containers.unix} @@ -111,7 +110,7 @@ Iterators: Use bigarrays to hold large strings and map files directly into memory. -{!modules: CCBigstring} +{!modules: CCBigstring CCArray1} {4 Advanced} diff --git a/setup.ml b/setup.ml index 035ecf6f..975f6a79 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: ee9a9724a7939bfbe9c154b61dba7eeb) *) +(* DO NOT EDIT (digest: 1593403dc85a9c643213aaeadef20340) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -6965,7 +6965,7 @@ let setup_t = alpha_features = ["ocamlbuild_more_args"]; beta_features = []; name = "containers"; - version = "0.11"; + version = "0.12"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -6984,7 +6984,7 @@ let setup_t = Some [ OASISText.Para - "Containers is a standard library (BSD license) focused on data structures, combinators and iterators, without dependencies on unix. Every module is independent and is prefixed with 'CC' in the global namespace. Some modules extend the stdlib (e.g. CCList provides safe map/fold_right/append, and additional functions on lists). It also features an optional library for dealing with strings, and a `misc` library full of experimental ideas (not stable, not necessarily usable)." + "Containers is a standard library (BSD license) focused on data structures, combinators and iterators, without dependencies on unix. Every module is independent and is prefixed with 'CC' in the global namespace. Some modules extend the stdlib (e.g. CCList provides safe map/fold_right/append, and additional functions on lists). It also features optional libraries for dealing with strings, helpers for unix, threads, lwt and a `misc` library full of experimental ideas (not stable, not necessarily usable)." ]; categories = []; conf_type = (`Configure, "internal", Some "0.4"); @@ -7154,6 +7154,7 @@ let setup_t = "CCHashtbl"; "CCMap"; "CCFormat"; + "CCIO"; "Containers" ]; lib_pack = false; @@ -7185,7 +7186,7 @@ let setup_t = bs_nativeopt = [(OASISExpr.EBool true, [])] }, { - lib_modules = ["CCIO"]; + lib_modules = ["Containers_io_is_deprecated"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = Some "containers"; @@ -7295,7 +7296,9 @@ let setup_t = "CCRingBuffer"; "CCIntMap"; "CCPersistentArray"; - "CCMixset" + "CCMixset"; + "CCHashconsedSet"; + "CCGraph" ]; lib_pack = false; lib_internal_modules = []; @@ -7447,7 +7450,7 @@ let setup_t = bs_nativeopt = [(OASISExpr.EBool true, [])] }, { - lib_modules = ["CCBigstring"]; + lib_modules = ["CCBigstring"; "CCArray1"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = Some "containers"; @@ -8114,7 +8117,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = Some "\005\024\210\198~B\127\141$\2177\196Z573"; + oasis_digest = Some "\207\136r\164\234\165|\201u\238E6\144\155n\202"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -8122,6 +8125,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 8126 "setup.ml" +# 8129 "setup.ml" (* OASIS_STOP *) let () = setup ();; diff --git a/src/advanced/CCLinq.ml b/src/advanced/CCLinq.ml index 6bbccd6f..7da7ccda 100644 --- a/src/advanced/CCLinq.ml +++ b/src/advanced/CCLinq.ml @@ -942,20 +942,7 @@ end module IO = struct let _slurp with_input = - let l = lazy ( - with_input - (fun ic -> - let buf_size = 256 in - let content = Buffer.create 120 - and buf = String.make buf_size 'a' in - let rec next () = - let num = input ic buf 0 buf_size in - if num = 0 - then Buffer.contents content (* EOF *) - else (Buffer.add_substring content buf 0 num; next ()) - in next () - ) - ) in + let l = lazy (with_input (fun ic -> CCIO.read_all ic)) in lazy_ (return l) let slurp ic = _slurp (fun f -> f ic) diff --git a/src/advanced/CCMonadIO.ml b/src/advanced/CCMonadIO.cppo.ml similarity index 97% rename from src/advanced/CCMonadIO.ml rename to src/advanced/CCMonadIO.cppo.ml index 961880e5..b4658b15 100644 --- a/src/advanced/CCMonadIO.ml +++ b/src/advanced/CCMonadIO.cppo.ml @@ -190,16 +190,7 @@ let rec _read_lines ic acc = let read_lines ic = _read_lines ic [] -let _read_all ic () = - let buf = Buffer.create 128 in - try - while true do - Buffer.add_channel buf ic 1024 - done; - "" (* never returned *) - with End_of_file -> Buffer.contents buf - -let read_all ic = Wrap(_read_all ic) +let read_all ic = Wrap(fun () -> CCIO.read_all ic) let _open_out mode flags filename () = open_out_gen flags mode filename @@ -213,10 +204,20 @@ let with_out ?(mode=0o644) ?(flags=[]) filename = let with_out_a ?mode ?(flags=[]) filename = with_out ?mode ~flags:(Open_creat::Open_append::flags) filename -let _write oc s i len () = output oc s i len +#if OCAML_MAJOR >= 4 && OCAML_MINOR >= 2 + +let output_str_ = Pervasives.output_substring + +#else + +let output_str_ = Pervasives.output + +#endif + +let _write oc s i len () = output_str_ oc s i len let write oc s i len = Wrap (_write oc s i len) -let _write_str oc s () = output oc s 0 (String.length s) +let _write_str oc s () = output_str_ oc s 0 (String.length s) let write_str oc s = Wrap (_write_str oc s) let _write_line oc l () = @@ -517,3 +518,5 @@ end module Raw = struct let wrap f = Wrap f end + +(* vim:ft=ocaml: *) diff --git a/src/advanced/CCMonadIO.mli b/src/advanced/CCMonadIO.mli index 03c4216d..36ef97fb 100644 --- a/src/advanced/CCMonadIO.mli +++ b/src/advanced/CCMonadIO.mli @@ -145,7 +145,7 @@ val with_in : ?mode:int -> ?flags:open_flag list -> It yields a [in_channel] with a finalizer attached. See {!(>>>=)} to use it. *) -val read : in_channel -> string -> int -> int -> int t +val read : in_channel -> Bytes.t -> int -> int -> int t (** Read a chunk into the given string *) val read_line : in_channel -> string option t diff --git a/src/bigarray/CCArray1.ml b/src/bigarray/CCArray1.ml new file mode 100644 index 00000000..140e2792 --- /dev/null +++ b/src/bigarray/CCArray1.ml @@ -0,0 +1,755 @@ + + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Bigarrays of dimension 1 *) + +module A = Bigarray.Array1 + +type 'a printer = Format.formatter -> 'a -> unit +type 'a sequence = ('a -> unit) -> unit +type 'a or_error = [`Ok of 'a | `Error of string] +type random = Random.State.t + +type json = [ `Assoc of (string * json) list + | `Bool of bool + | `Float of float + | `Int of int + | `List of json list + | `Null + | `String of string ] +type 'a to_json = 'a -> json +type 'a of_json = json -> 'a or_error + +type ('a, 'b, 'perm) t = + ('a, 'b, Bigarray.c_layout) Bigarray.Array1.t + constraint 'perm = [< `R | `W] + +type ('a, 'b, 'perm) array_ = ('a, 'b, 'perm) t + +exception WrongDimension + +let make ?x ~kind n = + let a = A.create kind Bigarray.c_layout n in + begin match x with + | None -> () + | Some x -> A.fill a x + end; + a + +let make_int n = make ~kind:Bigarray.int n +let make_char n = make ~kind:Bigarray.char n +let make_int8s n = make ~kind:Bigarray.int8_signed n +let make_int8u n = make ~kind:Bigarray.int8_unsigned n +let make_int16s n = make ~kind:Bigarray.int16_signed n +let make_int16u n = make ~kind:Bigarray.int16_unsigned n +let make_int32 n = make ~kind:Bigarray.int32 n +let make_int64 n = make ~kind:Bigarray.int64 n +let make_native n = make ~kind:Bigarray.nativeint n +let make_float32 n = make ~kind:Bigarray.float32 n +let make_float64 n = make ~kind:Bigarray.float64 n +let make_complex32 n = make ~kind:Bigarray.complex32 n +let make_complex64 n = make ~kind:Bigarray.complex64 n + +let init ~kind ~f n = + let a = A.create kind Bigarray.c_layout n in + for i = 0 to n-1 do + A.unsafe_set a i (f i) + done; + a + +(*$T + let a = init ~kind:Bigarray.int 10 ~f:(fun x->x) in \ + CCList.(0 -- 9) |> List.for_all (fun i -> get a i = i) +*) + +let of_bigarray a = a +let to_bigarray a = a + +let ro (t : ('a,'b,[>`R]) t) : ('a,'b,[`R]) t = t +let wo (t : ('a,'b,[>`W]) t) : ('a,'b,[`W]) t = t + +let fill = A.fill + +let copy a = + let b = make ~kind:(A.kind a) (A.dim a) in + A.blit a b; + b + +let length a = A.dim a + +(*$T + length (make_int 42) = 42 +*) + +let set = A.set + +let get = A.get + +let blit = A.blit + +let sub = A.sub + +let iter ~f a = + for i = 0 to A.dim a - 1 do + f (A.unsafe_get a i) + done + +exception LocalExit + +let for_all ~f a = + try + for i = 0 to A.dim a - 1 do + if not (f (A.unsafe_get a i)) then raise LocalExit + done; + true + with LocalExit -> false + +let exists ~f a = + try + for i = 0 to A.dim a - 1 do + if f (A.unsafe_get a i) then raise LocalExit + done; + false + with LocalExit -> true + +(*$T + init ~kind:Bigarray.int 10 ~f:(fun x->x) |> for_all ~f:(fun x -> x<10) + init ~kind:Bigarray.int 10 ~f:(fun x->x) |> exists ~f:(fun x -> x=5) +*) + +let iteri ~f a = + for i = 0 to A.dim a - 1 do + f i (A.unsafe_get a i) + done + +let foldi f acc a = + let rec fold' f acc a i = + if i = A.dim a then acc + else + let acc = f acc i (A.unsafe_get a i) in + fold' f acc a (i+1) + in + fold' f acc a 0 + +let pp pp_x out a = + Format.pp_print_char out '['; + iteri a + ~f:(fun i x -> + if i > 0 then Format.fprintf out ",@ "; + pp_x out x + ); + Format.pp_print_char out ']'; + () + +module Bool = struct + type ('a, 'perm) t = (int, 'a, 'perm) array_ + + let set a i x = A.set a i (if x then 1 else 0) + + let get a i = A.get a i <> 0 + + let zeroes n = make ~x:0 ~kind:Bigarray.int8_unsigned n + let ones n = make ~x:1 ~kind:Bigarray.int8_unsigned n + + let iter_zeroes ~f a = + for i = 0 to A.dim a - 1 do + if A.unsafe_get a i = 0 then f i + done + + let iter_ones ~f a = + for i = 0 to A.dim a - 1 do + if A.unsafe_get a i > 0 then f i + done + + let cardinal a = + let rec fold a i acc = + if i = A.dim a then acc + else + let acc = if A.get a i <> 0 then acc+1 else acc in + fold a (i+1) acc + in + fold a 0 0 + + let or_ ?res a b = + let res = match res with + | Some r -> + if A.dim r <> max (A.dim a) (A.dim b) then raise WrongDimension; + A.fill r 0; + r + | None -> make ~x:0 ~kind:(A.kind a) (max (A.dim a) (A.dim b)) + in + (* ensure [a] is no longer than [b] *) + let a, b = if A.dim a < A.dim b then a, b else b, a in + for i = 0 to A.dim a - 1 do + if A.unsafe_get a i > 0 || A.unsafe_get b i > 0 + then set b i true + done; + res + + let and_ ?res a b = + let res = match res with + | Some r -> + if A.dim r <> max (A.dim a) (A.dim b) then raise WrongDimension; + A.fill r 0; + r + | None -> make ~x:0 ~kind:(A.kind a) (max (A.dim a) (A.dim b)) + in + (* ensure [a] is no longer than [b] *) + let a, b = if A.dim a < A.dim b then a, b else b, a in + for i=0 to A.dim a - 1 do + if A.unsafe_get a i > 0 && A.unsafe_get b i > 0 + then set res i true + done; + res + + let not_ ?res a = + let res = match res with + | Some r -> + if A.dim r <> A.dim a then raise WrongDimension; + A.fill r 0; + r + | None -> make ~x:0 ~kind:(A.kind a) (A.dim a) + in + for i=0 to A.dim a - 1 do + if A.unsafe_get a i = 0 then set res i true + done; + res + + (* assumes dimensions are ok and [A.dim a >= A.dim b] *) + let mix_ a b ~res = + let na = A.dim a + and nb = A.dim b in + assert (nb <= na); + (* a has more bits, and we group them in successive chunks of size [d] *) + let step = 1 + (na + nb) / nb in + for i = 0 to na + nb - 1 do + let q, r = i / step, i mod step in + if r = 0 + then set res i (get b q) + else set res i (get a (q + r - 1)) + done + + let mix ?res a b = + let res = match res with + | Some r -> + if A.dim a + A.dim b <> A.dim r then raise WrongDimension; + r + | None -> make ~kind:(A.kind a) (A.dim a + A.dim b) + in + if A.dim a < A.dim b then mix_ b a ~res else mix_ a b ~res; + res + + let rec big_or_ a b i j acc = + if j = A.dim b then acc + else (* acc xor (a[i+j] and b[j]) *) + let acc = acc <> (get a ((i+j) mod A.dim a) && get b j) in + big_or_ a b i (j+1) acc + + (* [into[i] = big_or_{j in [0...nb-1]} (a[i+j-1 mod na] and b[j]) *) + let convolution ?res a ~by:b = + let res = match res with + | Some r -> + if A.dim a < A.dim b || A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim res - 1 do + if big_or_ a b i 0 false then set res i true + done; + res + + let pp out a = pp + (fun oc b -> + Format.pp_print_char oc (if b>0 then '1' else '0') + ) out a +end + +let append ?res a b = + let res = match res with + | Some r -> + if A.dim a + A.dim b <> A.dim r then raise WrongDimension; + r + | None -> make ~kind:(A.kind a) (A.dim a + A.dim b) + in + let n = A.dim a in + A.blit a (A.sub res 0 n); + A.blit b (A.sub res n (A.dim b)); + res + +let map ?res ~f a = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~kind:(A.kind a) (A.dim a) + in + for i=0 to A.dim a - 1 do + A.set res i (f (A.unsafe_get a i)) + done; + res + +let map2 ?res ~f a b = + if A.dim a <> A.dim b then raise WrongDimension; + let res = match res with + | Some r -> + if A.dim r <> A.dim a then raise WrongDimension; + r + | None -> make ~kind:(A.kind a) (A.dim a) + in + for i=0 to A.dim a - 1 do + A.set res i (f (A.unsafe_get a i) (A.unsafe_get b i)) + done; + res + +let filter ?res ~f a = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0 ~kind:Bigarray.int8_unsigned (A.dim a) + in + for i=0 to A.dim a - 1 do + if f (A.unsafe_get a i) + then Bool.set res i true + done; + res + +module type S = sig + type elt + type ('a, 'perm) t = (elt, 'a, 'perm) array_ + + val add : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + ('a, [>`R]) t -> + ('a, 'perm) t + (** Elementwise sum + @raise WrongDimension if dimensions do not fit *) + + val mult : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + ('a, [>`R]) t -> + ('a, 'perm) t + (** Elementwise product *) + + val scalar_add : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + x:elt -> + ('a, 'perm) t + (** @raise WrongDimension if dimensions do not fit *) + + val scalar_mult : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + x:elt -> + ('a, 'perm) t + (** @raise WrongDimension if dimensions do not fit *) + + val sum_elt : (_, [>`R]) t -> elt + (** Efficient sum of elements *) + + val product_elt : (_, [>`R]) t -> elt + (** Efficient product of elements *) + + val dot_product : (_, [>`R]) t -> (_, [>`R]) t -> elt + (** [dot_product a b] returns [sum_i a(i)*b(i)] with the given + sum and product, on [elt]. + [dot_product a b = sum_elt (product a b)] + @raise WrongDimension if [a] and [b] do not have the same size *) + + module Infix : sig + val ( * ) : ('a, [>`R]) t -> ('a, [>`R]) t -> ('a, 'perm) t + (** Alias to {!mult} *) + + val ( + ) : ('a, [>`R]) t -> (_, [>`R]) t -> ('a, 'perm) t + (** Alias to {!add} *) + + val ( *! ) : ('a, [>`R]) t -> elt -> ('a, 'perm) t + (** Alias to {!scalar_mult} *) + + val ( +! ) : ('a, [>`R]) t -> elt -> ('a, 'perm) t + (** Alias to {!scalar_add} *) + end + + include module type of Infix +end + +module Int = struct + type elt = int + type ('a, 'perm) t = (elt, 'a, 'perm) array_ + + let add ?res a b = + if A.dim a <> A.dim b then raise WrongDimension; + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0 ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i + A.unsafe_get b i) + done; + res + + let mult ?res a b = + if A.dim a <> A.dim b then raise WrongDimension; + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0 ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i * A.unsafe_get b i) + done; + res + + let scalar_add ?res a ~x = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0 ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i + x) + done; + res + + let scalar_mult ?res a ~x = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0 ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i * x) + done; + res + + let dot_product a b = + if A.dim a <> A.dim b then raise WrongDimension; + let r = ref 0 in + for i = 0 to A.dim a - 1 do + r := !r + (A.unsafe_get a i * A.unsafe_get b i) + done; + !r + + let sum_elt a = + let r = ref 0 in + for i = 0 to A.dim a - 1 do + r := !r + A.unsafe_get a i + done; + !r + + let product_elt a = + let r = ref 1 in + for i = 0 to A.dim a - 1 do + r := !r * A.unsafe_get a i + done; + !r + + module Infix = struct + let ( + ) a b = add a b + let ( * ) a b = mult a b + + let ( +! ) a x = scalar_add a ~x + let ( *! ) a x = scalar_mult a ~x + end + + include Infix +end + +module Float = struct + type elt = float + type ('a, 'perm) t = (elt, 'a, 'perm) array_ + + let add ?res a b = + if A.dim a <> A.dim b then raise WrongDimension; + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0. ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i +. A.unsafe_get b i) + done; + res + + let mult ?res a b = + if A.dim a <> A.dim b then raise WrongDimension; + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0. ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i *. A.unsafe_get b i) + done; + res + + let scalar_add ?res a ~x = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0. ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i +. x) + done; + res + + let scalar_mult ?res a ~x = + let res = match res with + | Some r -> + if A.dim a <> A.dim r then raise WrongDimension; + r + | None -> make ~x:0. ~kind:(A.kind a) (A.dim a) + in + for i = 0 to A.dim a - 1 do + A.set res i (A.unsafe_get a i *. x) + done; + res + + let dot_product a b = + if A.dim a <> A.dim b then raise WrongDimension; + let r = ref 0. in + for i = 0 to A.dim a - 1 do + r := !r +. (A.unsafe_get a i *. A.unsafe_get b i) + done; + !r + + let sum_elt a = + let r = ref 0. in + for i = 0 to A.dim a - 1 do + r := !r +. A.unsafe_get a i + done; + !r + + let product_elt a = + let r = ref 1. in + for i = 0 to A.dim a - 1 do + r := !r *. A.unsafe_get a i + done; + !r + + module Infix = struct + let ( + ) a b = add a b + let ( * ) a b = mult a b + + let ( +! ) a x = scalar_add a ~x + let ( *! ) a x = scalar_mult a ~x + end + + include Infix +end + +let to_list a = + let l = foldi (fun acc _ x -> x::acc) [] a in + List.rev l + +let to_array a = + if A.dim a = 0 then [||] + else ( + let b = Array.make (A.dim a) (A.get a 0) in + for i = 1 to A.dim a - 1 do + Array.unsafe_set b i (A.unsafe_get a i) + done; + b + ) + +let to_seq a yield = iter a ~f:yield + +let of_array ~kind a = A.of_array kind Bigarray.c_layout a + +exception OfYojsonError of string + +let to_yojson (f:'a -> json) a : json = + let l = foldi (fun l _ x -> f x :: l) [] a in + `List (List.rev l) + +let int_to_yojson i = `Int i +let int_of_yojson = function + | `Int i -> `Ok i + | `Float f -> `Ok (int_of_float f) + | `String s -> (try `Ok (int_of_string s) with _ -> `Error "expected int") + | _ -> `Error "expected int" + +let float_to_yojson f = `Float f +let float_of_yojson = function + | `Float f -> `Ok f + | `Int i -> `Ok (float_of_int i) + | _ -> `Error "expected float" + +let of_yojson + ~(kind:('a,'b) Bigarray.kind) + (f: json -> 'a or_error) + (j : json) : ('a,'b,'perm) t or_error += + let unwrap_ = function + | `Ok x -> x + | `Error msg -> raise (OfYojsonError msg) + in + let map_l l = List.map (fun x -> unwrap_ (f x)) l + and of_list l = + let a = make ~kind (List.length l) in + List.iteri (fun i b -> set a i b) l; + a + in + try + match j with + | `List l -> `Ok (of_list (map_l l)) + | _ -> raise (OfYojsonError "invalid json (expected list)") + with OfYojsonError msg -> + `Error msg + + +module View = struct + type 'a t = { + len : int; + view : 'a view + } + and _ view = + | Arr : ('a, _, _) array_ -> 'a view + | Map : ('a -> 'b) * 'a t -> 'b view + | Map2 : ('a -> 'b -> 'c) * 'a t * 'b t -> 'c view + | Select : (int, _, _) array_ * 'a t -> 'a view + | SelectA : int array * 'a t -> 'a view + | SelectV : int t * 'a t -> 'a view + | Raw : + ('a, 'b, [>`R]) array_ * + (('a, 'b, [>`R]) array_ -> int) * + (('a, 'b, [>`R]) array_ -> int -> 'a) -> + 'a view + + let length t = t.len + + let rec get + : type a. a t -> int -> a + = fun v i -> match v.view with + | Arr a -> A.get a i + | Map (f, a) -> f (get a i) + | Map2 (f, a1, a2) -> f (get a1 i) (get a2 i) + | Select (idx, a) -> get a (A.get idx i) + | SelectA (idx, a) -> get a (Array.get idx i) + | SelectV (idx, a) -> get a (get idx i) + | Raw (a, _, f) -> f a i + + let rec iteri + : type a. f:(int -> a -> unit) -> a t -> unit + = fun ~f v -> match v.view with + | Arr a -> + for i = 0 to A.dim a - 1 do + f i (A.unsafe_get a i) + done + | Map (g, a') -> + iteri a' ~f:(fun i x -> f i (g x)) + | Map2 (g, a1, a2) -> + iteri a1 ~f:(fun i x -> let y = get a2 i in f i (g x y)) + | Select (idx, a) -> + for i = 0 to A.dim idx - 1 do + let j = A.unsafe_get idx i in + f i (get a j) + done + | SelectA (idx, a) -> + Array.iteri (fun i j -> f i (get a j)) idx + | SelectV (idx, a) -> + for i=0 to length idx - 1 do + let j = get idx i in + f i (get a j) + done + | Raw (a, len, g) -> + for i=0 to len a - 1 do + f i (g a i) + done + + let of_array a = {len=A.dim a; view=Arr a} + + let map ~f a = {len=length a; view=Map(f, a)} + let map2 ~f a b = + if length a <> length b then raise WrongDimension; + {len=length a; view=Map2(f, a, b)} + + let select ~idx a = {len=A.dim idx; view=Select(idx,a)} + let select_a ~idx a = {len=Array.length idx; view=SelectA(idx,a)} + let select_view ~idx a = {len=length idx; view=SelectV(idx,a)} + + let foldi f acc a = + let acc = ref acc in + iteri a ~f:(fun i x -> acc := f !acc i x); + !acc + + let raw ~length ~get a = {len=length a; view=Raw (a, length, get) } + + module type S = sig + type elt + val mult : elt t -> elt t -> elt t + val add : elt t -> elt t -> elt t + val sum : elt t -> elt + val prod : elt t -> elt + val add_scalar : elt t -> x:elt -> elt t + val mult_scalar : elt t -> x:elt -> elt t + end + + module Int = struct + type elt = int + let add a b = map2 ~f:(+) a b + let mult a b = map2 ~f:( * ) a b + let sum a = foldi (fun acc _ x -> acc+x) 0 a + let prod a = foldi (fun acc _ x -> acc*x) 1 a + let add_scalar a ~x = map ~f:(fun y -> x+y) a + let mult_scalar a ~x = map ~f:(fun y -> x*y) a + end + + module Float = struct + type elt = float + let add a b = map2 ~f:(+.) a b + let mult a b = map2 ~f:( *. ) a b + let sum a = foldi (fun acc _ x -> acc+.x) 0. a + let prod a = foldi (fun acc _ x -> acc*.x) 1. a + let add_scalar a ~x = map ~f:(fun y -> x+.y) a + let mult_scalar a ~x = map ~f:(fun y -> x*.y) a + end + + let to_array ?res ?kind a = + let res = match res, kind with + | Some r, None -> + if A.dim r <> length a then raise WrongDimension; + r + | None, Some kind -> A.create kind Bigarray.c_layout (length a) + | None, None + | Some _, Some _ -> invalid_arg "View.to_array" + in + iteri a ~f:(fun i x -> A.unsafe_set res i x); + res +end diff --git a/src/bigarray/CCArray1.mli b/src/bigarray/CCArray1.mli new file mode 100644 index 00000000..1a6dab57 --- /dev/null +++ b/src/bigarray/CCArray1.mli @@ -0,0 +1,371 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Bigarrays of dimension 1} + + {b status: experimental} + @since 0.12 *) + +(** {2 used types} *) + +type 'a printer = Format.formatter -> 'a -> unit +type 'a sequence = ('a -> unit) -> unit +type 'a or_error = [`Ok of 'a | `Error of string] +type random = Random.State.t + +type json = [ `Assoc of (string * json) list + | `Bool of bool + | `Float of float + | `Int of int + | `List of json list + | `Null + | `String of string ] +type 'a to_json = 'a -> json +type 'a of_json = json -> 'a or_error + +(** {2 Type Declarations} *) + +type ('a, 'b, 'perm) t constraint 'perm = [< `R | `W] +(** Array of OCaml values of type ['a] with C representation of type [b'] + with permissions ['perm] *) + +type ('a, 'b, 'perm) array_ = ('a, 'b, 'perm) t + +exception WrongDimension +(** Raised when arrays do not have expected length *) + +(** {2 Basic Operations} *) + +val make : ?x:'a -> kind:('a,'b) Bigarray.kind -> int -> ('a, 'b, 'perm) t +(** New array with undefined elements + @param kind the kind of bigarray + @param x optional element to fill every slot + @param n the number of elements *) + +val make_int : int -> (int, Bigarray.int_elt, 'perm) t +val make_char : int -> (char, Bigarray.int8_unsigned_elt, 'perm) t +val make_int8s : int -> (int, Bigarray.int8_signed_elt, 'perm) t +val make_int8u : int -> (int, Bigarray.int8_unsigned_elt, 'perm) t +val make_int16s : int -> (int, Bigarray.int16_signed_elt, 'perm) t +val make_int16u : int -> (int, Bigarray.int16_unsigned_elt, 'perm) t +val make_int32 : int -> (int32, Bigarray.int32_elt, 'perm) t +val make_int64 : int -> (int64, Bigarray.int64_elt, 'perm) t +val make_native : int -> (nativeint, Bigarray.nativeint_elt, 'perm) t +val make_float32 : int -> (float, Bigarray.float32_elt, 'perm) t +val make_float64 : int -> (float, Bigarray.float64_elt, 'perm) t +val make_complex32 : int -> (Complex.t, Bigarray.complex32_elt, 'perm) t +val make_complex64 : int -> (Complex.t, Bigarray.complex64_elt, 'perm) t + +val init : kind:('a, 'b) Bigarray.kind -> f:(int -> 'a) -> int -> ('a, 'b, 'perm) t +(** Initialize with given size and initialization function *) + +val of_bigarray : ('a, 'b, Bigarray.c_layout) Bigarray.Array1.t -> ('a, 'b, 'perm) t +(** Convert from a big array *) + +val to_bigarray : ('a, 'b, [`R | `W]) t -> ('a, 'b, Bigarray.c_layout) Bigarray.Array1.t +(** Obtain the underlying array *) + +val ro : ('a, 'b, [>`R]) t -> ('a, 'b, [`R]) t +(** Change permission (old reference to array might still be mutable!) *) + +val wo : ('a, 'b, [>`W]) t -> ('a, 'b, [`W]) t +(** Change permission *) + +val length : (_, _, [>`R]) t -> int +(** Number of elements *) + +val set : ('a, _, [>`W]) t -> int -> 'a -> unit +(** set n-th element *) + +val get : ('a, _, [>`R]) t -> int -> 'a +(** get n-th element *) + +val fill : ('a, _, [>`W]) t -> 'a -> unit +(** [fill a x] fills [a] with [x] *) + +val sub : ('a, 'b, 'perm) t -> int -> int -> ('a, 'b, 'perm) t +(** [sub a i len] takes the slice of length [len] starting at offset [i] *) + +val blit : ('a, 'b, [>`R]) t -> ('a, 'b, [>`W]) t -> unit +(** blit the first array to the second *) + +val copy : ('a, 'b, [>`R]) t -> ('a, 'b, 'perm) t +(** Fresh copy *) + +val iter : f:('a -> unit) -> ('a, _, [>`R]) t -> unit +(** [iter a ~f] calls [f v] where [get a i = v] for each [i < length a]. + It iterates on all bits in increasing order *) + +val iteri : f:(int -> 'a -> unit) -> ('a, _, [>`R]) t -> unit +(** [iteri a ~f] calls [f i v] where [get a i = v] for each [i < length a]. + It iterates on all elements in increasing order *) + +val foldi : ('b -> int -> 'a -> 'b) -> 'b -> ('a, _, [>`R]) t -> 'b + +val for_all : f:('a -> bool) -> ('a, _, [>`R]) t -> bool + +val exists : f:('a -> bool) -> ('a, _, [>`R]) t -> bool + +val pp : 'a printer -> ('a, _, [>`R]) t printer +(** Print the SDR nicely *) + +(** {2 Boolean Vectors} *) + +module Bool : sig + type ('b, 'perm) t = (int, 'b, 'perm) array_ + (** A simple bitvector based on some integral type ['b] *) + + val get : (_, [>`R]) t -> int -> bool + + val set : (_, [>`W]) t -> int -> bool -> unit + + val zeroes : int -> (Bigarray.int8_unsigned_elt, 'perm) t + val ones : int -> (Bigarray.int8_unsigned_elt, 'perm) t + + val iter_zeroes : f:(int -> unit) -> (_, [>`R]) t -> unit + (** [iter_ones ~f a] calls [f i] for every index [i] such that [get a i = false] *) + + val iter_ones : f:(int -> unit) -> (_, [>`R]) t -> unit + (** [iter_ones ~f a] calls [f i] for every index [i] such that [get a i = true] *) + + val cardinal : (_, [>`R]) t -> int + (** Number of ones *) + + val pp : (_,[>`R]) t printer + (** Print the bitvector nicely *) + + (** {6 Operations} *) + + val or_ : ?res:('b, [>`W] as 'perm) t -> ('b, [>`R]) t -> ('b, [>`R]) t -> ('b, 'perm) t + (** [or_ a b ~into] puts the boolean "or" of [a] and [b] in [into] + expects [length into = max (length a) (length b)] + @raise WrongDimension if dimensions do not match *) + + val and_ : ?res:('b, [>`W] as 'perm) t -> ('b, [>`R]) t -> ('b, [>`R]) t -> ('b, 'perm) t + (** Boolean conjunction. See {!or} for the parameters *) + + val not_ : ?res:('b, [>`W] as 'perm) t -> ('b, [>`R]) t -> ('b, 'perm) t + (** Boolean negation (negation of a 0 becomes a 1) *) + + val mix : ?res:('b, [>`W] as 'perm) t -> ('b, [>`R]) t -> ('b, [>`R]) t -> ('b, 'perm) t + (** [mix a b ~into] assumes [length a + length b = length into] and + mixes (interleaves) bits of [a] and [b] in [into]. + @raise WrongDimension if dimensions do not match *) + + val convolution : ?res:('b, [>`W] as 'perm) t -> ('b,[>`R]) t -> by:('b, [>`R]) t -> ('b,'perm) t + (** [convolution a ~by:b ~into] assumes [length into = length a >= length b] + and computes the boolean convolution of [a] by [b] + @raise WrongDimension if dimensions do not match *) +end + +(** {2 Operations} *) + +val map : + ?res:('a, 'b, ([>`W] as 'perm)) t -> + f:('a -> 'a) -> + ('a, 'b, [>`R]) t -> + ('a, 'b, 'perm) t + +val map2 : + ?res:('a, 'b, ([>`W] as 'perm)) t -> + f:('a -> 'a2 -> 'a) -> + ('a, 'b, [>`R]) t -> + ('a2, _, [>`R]) t -> + ('a, 'b, 'perm) t + +val append : + ?res:('a, 'b, ([>`W] as 'perm)) t -> + ('a, 'b, [>`R]) t -> + ('a, 'b, [>`R]) t -> + ('a, 'b, 'perm) t +(** [append a b ~into] assumes [length a + length b = length into] and + copies [a] and [b] side by side in [into] + @raise WrongDimension if dimensions do not match *) + +val filter : + ?res:(Bigarray.int8_unsigned_elt, [>`W] as 'perm) Bool.t -> + f:('a -> bool) -> + ('a, 'b, [>`R]) t -> + (Bigarray.int8_unsigned_elt, 'perm) Bool.t + +module type S = sig + type elt + type ('a, 'perm) t = (elt, 'a, 'perm) array_ + + val add : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + ('a, [>`R]) t -> + ('a, 'perm) t + (** Elementwise sum + @raise WrongDimension if dimensions do not fit *) + + val mult : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + ('a, [>`R]) t -> + ('a, 'perm) t + (** Elementwise product *) + + val scalar_add : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + x:elt -> + ('a, 'perm) t + (** @raise WrongDimension if dimensions do not fit *) + + val scalar_mult : + ?res:('a, [>`W] as 'perm) t -> + ('a, [>`R]) t -> + x:elt -> + ('a, 'perm) t + (** @raise WrongDimension if dimensions do not fit *) + + val sum_elt : (_, [>`R]) t -> elt + (** Efficient sum of elements *) + + val product_elt : (_, [>`R]) t -> elt + (** Efficient product of elements *) + + val dot_product : (_, [>`R]) t -> (_, [>`R]) t -> elt + (** [dot_product a b] returns [sum_i a(i)*b(i)] with the given + sum and product, on [elt]. + [dot_product a b = sum_elt (product a b)] + @raise WrongDimension if [a] and [b] do not have the same size *) + + module Infix : sig + val ( * ) : ('a, [>`R]) t -> ('a, [>`R]) t -> ('a, 'perm) t + (** Alias to {!mult} *) + + val ( + ) : ('a, [>`R]) t -> (_, [>`R]) t -> ('a, 'perm) t + (** Alias to {!add} *) + + val ( *! ) : ('a, [>`R]) t -> elt -> ('a, 'perm) t + (** Alias to {!scalar_mult} *) + + val ( +! ) : ('a, [>`R]) t -> elt -> ('a, 'perm) t + (** Alias to {!scalar_add} *) + end + + include module type of Infix +end + +module Int : S with type elt = int + +module Float : S with type elt = float + +(** {2 Conversions} *) + +val to_list : ('a, _, [>`R]) t -> 'a list +val to_array : ('a, _, [>`R]) t -> 'a array +val to_seq : ('a, _, [>`R]) t -> 'a sequence + +val of_array : kind:('a, 'b) Bigarray.kind -> 'a array -> ('a, 'b, 'perm) t + +(** {2 Serialization} *) + +val to_yojson : 'a to_json -> ('a, _, [>`R]) t to_json +val of_yojson : kind:('a, 'b) Bigarray.kind -> 'a of_json -> ('a, 'b, 'perm) t of_json + +val int_to_yojson : int to_json +val int_of_yojson : int of_json +val float_to_yojson : float to_json +val float_of_yojson : float of_json + +(** {2 Views} *) + +module View : sig + type 'a t + (** A view on an array or part of an array *) + + val of_array : ('a, _, [>`R]) array_ -> 'a t + + val get : 'a t -> int -> 'a + (** [get v i] returns the [i]-th element of [v]. Caution, this is not + as cheap as a regular array indexing, and it might involve recursion. + @raise Invalid_argument if index out of bounds *) + + val length : _ t -> int + (** [length v] is the number of elements of [v] *) + + val map : f:('a -> 'b) -> 'a t -> 'b t + (** Map values *) + + val map2 : f:('a -> 'b -> 'c) -> 'a t -> 'b t -> 'c t + (** Map values + @raise WrongDimension if lengths do not fit *) + + val select : idx:(int, _, [>`R]) array_ -> 'a t -> 'a t + (** [select ~idx v] is the view that has length [length idx] + and such that [get (select ~idx a) i = get a (get idx i)] *) + + val select_a : idx:int array -> 'a t -> 'a t + (** See {!select} *) + + val select_view : idx:int t -> 'a t -> 'a t + (** See {!select} *) + + val foldi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + (** fold on values with their index *) + + val iteri : f:(int -> 'a -> unit) -> 'a t -> unit + (** [iteri ~f v] iterates on elements of [v] with their index *) + + module type S = sig + type elt + val mult : elt t -> elt t -> elt t + val add : elt t -> elt t -> elt t + val sum : elt t -> elt + val prod : elt t -> elt + val add_scalar : elt t -> x:elt -> elt t + val mult_scalar : elt t -> x:elt -> elt t + end + + module Int : sig + include S with type elt = int + end + + module Float : sig + include S with type elt = float + (* TODO: more, like trigo functions *) + end + + val raw : + length:(('a, 'b, [>`R]) array_ -> int) -> + get:(('a, 'b, [>`R]) array_ -> int -> 'a) -> + ('a, 'b, [>`R]) array_ -> + 'a t + + val to_array : + ?res:('a, 'b, [>`W] as 'perm) array_ -> + ?kind:('a, 'b) Bigarray.kind -> + 'a t -> + ('a, 'b, 'perm) array_ + (** [to_array v] returns a fresh copy of the content of [v]. + Exactly one of [res] and [kind] must be provided *) +end + + diff --git a/src/bigarray/containers_bigarray.mldylib b/src/bigarray/containers_bigarray.mldylib index 10f13676..6077c8cc 100644 --- a/src/bigarray/containers_bigarray.mldylib +++ b/src/bigarray/containers_bigarray.mldylib @@ -1,4 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 6398fca785a51b3ad28defb36820d456) +# DO NOT EDIT (digest: 4901abd33a2dfcf115ddeffb93e1186e) CCBigstring +CCArray1 # OASIS_STOP diff --git a/src/bigarray/containers_bigarray.mllib b/src/bigarray/containers_bigarray.mllib index 10f13676..6077c8cc 100644 --- a/src/bigarray/containers_bigarray.mllib +++ b/src/bigarray/containers_bigarray.mllib @@ -1,4 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 6398fca785a51b3ad28defb36820d456) +# DO NOT EDIT (digest: 4901abd33a2dfcf115ddeffb93e1186e) CCBigstring +CCArray1 # OASIS_STOP diff --git a/src/core/CCError.ml b/src/core/CCError.ml index 053de05d..47498964 100644 --- a/src/core/CCError.ml +++ b/src/core/CCError.ml @@ -43,16 +43,21 @@ let return x = `Ok x let fail s = `Error s +(* TODO: optional argument for printing stacktrace? *) let fail_printf format = let buf = Buffer.create 16 in Printf.kbprintf (fun buf -> fail (Buffer.contents buf)) buf format +(* TODO: easy ways to print backtrace/stack *) + let _printers = ref [] let register_printer p = _printers := p :: !_printers +(* FIXME: just use {!Printexc.register_printer} instead? *) + let of_exn e = let buf = Buffer.create 15 in let rec try_printers l = match l with @@ -84,6 +89,10 @@ let get_exn = function | `Ok x -> x | `Error _ -> raise (Invalid_argument "CCError.get_exn") +let catch e ~ok ~err = match e with + | `Ok x -> ok x + | `Error y -> err y + let flat_map f e = match e with | `Ok x -> f x | `Error s -> `Error s @@ -187,6 +196,14 @@ let retry n f = | `Error e -> retry (n-1) (e::acc) in retry n [] +(** {2 Infix} *) + +module Infix = struct + let (>>=) = (>>=) + let (>|=) = (>|=) + let (<*>) = (<*>) +end + (** {2 Monadic Operations} *) module type MONAD = sig diff --git a/src/core/CCError.mli b/src/core/CCError.mli index 57dc714a..072ecc96 100644 --- a/src/core/CCError.mli +++ b/src/core/CCError.mli @@ -75,6 +75,14 @@ val get_exn : ('a, _) t -> 'a whenever possible. @raise Invalid_argument if the value is an error. *) +val catch : ('a, 'err) t -> ok:('a -> 'b) -> err:('err -> 'b) -> 'b +(** [catch e ~ok ~err] calls either [ok] or [err] depending on + the value of [e]. + This is useful for code that does not want to depend on the exact + definition of [('a, 'b) t] used, for instance once OCaml gets a + standard [Result.t] type. + @since 0.12 *) + val flat_map : ('a -> ('b, 'err) t) -> ('a, 'err) t -> ('b, 'err) t val (>|=) : ('a, 'err) t -> ('a -> 'b) -> ('b, 'err) t @@ -120,6 +128,16 @@ val (<*>) : ('a -> 'b, 'err) t -> ('a, 'err) t -> ('b, 'err) t [`Ok (a b)]. Otherwise, it fails, and the error of [a] is chosen over the error of [b] if both fail *) +(** {2 Infix} + + @since 0.12 *) + +module Infix : sig + val (>|=) : ('a, 'err) t -> ('a -> 'b) -> ('b, 'err) t + val (>>=) : ('a, 'err) t -> ('a -> ('b, 'err) t) -> ('b, 'err) t + val (<*>) : ('a -> 'b, 'err) t -> ('a, 'err) t -> ('b, 'err) t +end + (** {2 Collections} *) val map_l : ('a -> ('b, 'err) t) -> 'a list -> ('b list, 'err) t diff --git a/src/io/CCIO.ml b/src/core/CCIO.ml similarity index 83% rename from src/io/CCIO.ml rename to src/core/CCIO.ml index 5f2916c8..afb0c5e9 100644 --- a/src/io/CCIO.ml +++ b/src/core/CCIO.ml @@ -26,7 +26,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 IO Utils} *) -type 'a gen = unit -> 'a option (** See {!CCGen} *) +type 'a gen = unit -> 'a option let gen_singleton x = let done_ = ref false in @@ -72,8 +72,8 @@ let gen_flat_map f next_elem = in next -let with_in ?(mode=0o644) ?(flags=[]) filename f = - let ic = open_in_gen flags mode filename in +let with_in ?(mode=0o644) ?(flags=[Open_text]) filename f = + let ic = open_in_gen (Open_rdonly::flags) mode filename in try let x = f ic in close_in ic; @@ -116,7 +116,14 @@ let read_lines_l ic = with End_of_file -> List.rev !l -let read_all ?(size=1024) ic = +(* thanks to nicoo for this trick *) +type _ ret_type = + | Ret_string : string ret_type + | Ret_bytes : Bytes.t ret_type + +let read_all_ +: type a. op:a ret_type -> size:int -> in_channel -> a += fun ~op ~size ic -> let buf = ref (Bytes.create size) in let len = ref 0 in try @@ -132,10 +139,16 @@ let read_all ?(size=1024) ic = done; assert false (* never reached*) with Exit -> - Bytes.sub_string !buf 0 !len + match op with + | Ret_string -> Bytes.sub_string !buf 0 !len + | Ret_bytes -> Bytes.sub !buf 0 !len -let with_out ?(mode=0o644) ?(flags=[]) filename f = - let oc = open_out_gen flags mode filename in +let read_all_bytes ?(size=1024) ic = read_all_ ~op:Ret_bytes ~size ic + +let read_all ?(size=1024) ic = read_all_ ~op:Ret_string ~size ic + +let with_out ?(mode=0o644) ?(flags=[Open_creat; Open_trunc; Open_text]) filename f = + let oc = open_out_gen (Open_wronly::flags) mode filename in try let x = f oc in close_out oc; @@ -145,7 +158,7 @@ let with_out ?(mode=0o644) ?(flags=[]) filename f = raise e let with_out_a ?mode ?(flags=[]) filename f = - with_out ?mode ~flags:(Open_creat::Open_append::flags) filename f + with_out ?mode ~flags:(Open_wronly::Open_creat::Open_append::flags) filename f let write_line oc s = output_string oc s; @@ -173,6 +186,19 @@ let rec write_lines oc g = match g () with let write_lines_l oc l = List.iter (write_line oc) l +let with_in_out ?(mode=0o644) ?(flags=[Open_creat]) filename f = + let ic = open_in_gen (Open_rdonly::flags) mode filename in + let oc = open_out_gen (Open_wronly::flags) mode filename in + try + let x = f ic oc in + close_out oc; (* must be first?! *) + close_in ic; + x + with e -> + close_out_noerr oc; + close_in_noerr ic; + raise e + let tee funs g () = match g() with | None -> None | Some x as res -> diff --git a/src/io/CCIO.mli b/src/core/CCIO.mli similarity index 83% rename from src/io/CCIO.mli rename to src/core/CCIO.mli index e338ef16..79a8ce64 100644 --- a/src/io/CCIO.mli +++ b/src/core/CCIO.mli @@ -30,9 +30,6 @@ Simple utilities to deal with basic Input/Output tasks in a resource-safe way. For advanced IO tasks, the user is advised to use something like Lwt or Async, that are far more comprehensive. -{b NOTE} this was formerly a monadic IO module. The old module is now -in [containers.advanced] under the name [CCMonadIO]. - Examples: - obtain the list of lines of a file: @@ -48,7 +45,7 @@ Examples: with_in "/tmp/input" (fun ic -> let chunks = read_chunks ic in - with_out ~flags:[Open_creat; Open_wronly] ~mode:0o644 "/tmp/output" + with_out ~flags:[Open_binary] ~mode:0o644 "/tmp/output" (fun oc -> write_gen oc chunks ) @@ -58,10 +55,12 @@ Examples: @since 0.6 +@before 0.12 was in 'containers.io', now moved into 'containers' + *) -type 'a gen = unit -> 'a option (** See {!Gen} *) +type 'a gen = unit -> 'a option (** See {!Gen} in the gen library *) (** {2 Input} *) @@ -69,7 +68,8 @@ val with_in : ?mode:int -> ?flags:open_flag list -> string -> (in_channel -> 'a) -> 'a (** Open an input file with the given optional flag list, calls the function on the input channel. When the function raises or returns, the - channel is closed. *) + channel is closed. + @param flags opening flags (default [[Open_text]]). [Open_rdonly] is used in any cases *) val read_chunks : ?size:int -> in_channel -> string gen (** Read the channel's content into chunks of size [size] *) @@ -86,18 +86,26 @@ val read_lines_l : in_channel -> string list val read_all : ?size:int -> in_channel -> string (** Read the whole channel into a buffer, then converted into a string. - @param size the internal buffer size @since 0.7 *) + @param size the internal buffer size + @since 0.7 *) + +val read_all_bytes : ?size:int -> in_channel -> Bytes.t +(** Read the whole channel into a mutable byte array + @param size the internal buffer size + @since 0.12 *) (** {6 Output} *) val with_out : ?mode:int -> ?flags:open_flag list -> string -> (out_channel -> 'a) -> 'a -(** Same as {!with_in} but for an output channel *) +(** Same as {!with_in} but for an output channel + @param flags opening flags (default [[Open_creat; Open_trunc; Open_text]]). + [Open_wronly] is used in any cases *) val with_out_a : ?mode:int -> ?flags:open_flag list -> string -> (out_channel -> 'a) -> 'a -(** Similar to {!with_out} but with the [Open_append] and [Open_creat] - flags activated *) +(** Similar to {!with_out} but with the [[Open_append; Open_creat; Open_wronly]] + flags activated, to append to the file *) val write_line : out_channel -> string -> unit (** Write the given string on the channel, followed by "\n" *) @@ -111,6 +119,14 @@ val write_lines : out_channel -> string gen -> unit val write_lines_l : out_channel -> string list -> unit +(** {2 Both} *) + +val with_in_out : ?mode:int -> ?flags:open_flag list -> + string -> (in_channel -> out_channel -> 'a) -> 'a +(** Combines {!with_in} and {!with_out}. + @param flags opening flags (default [[Open_creat]]) + @since 0.12 *) + (** {2 Misc for Generators} *) val tee : ('a -> unit) list -> 'a gen -> 'a gen diff --git a/src/core/CCList.ml b/src/core/CCList.ml index f8a82c3b..d2704c10 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -59,6 +59,8 @@ let (>|=) l f = map f l let direct_depth_append_ = 10_000 +let cons x l = x::l + let append l1 l2 = let rec direct i l1 l2 = match l1 with | [] -> l2 diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 201a2112..06cb20db 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -41,6 +41,10 @@ val (>|=) : 'a t -> ('a -> 'b) -> 'b t (** Infix version of [map] with reversed arguments @since 0.5 *) +val cons : 'a -> 'a t -> 'a t +(** [cons x l] is [x::l] + @since 0.12 *) + val append : 'a t -> 'a t -> 'a t (** Safe version of append *) diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index b4d75504..9a0f597f 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -70,8 +70,9 @@ let float f st = Random.State.float st f let small_float = float 100.0 -let float_range i j st = i +. Random.State.float st (j-.i+.1.) +let float_range i j st = i +. Random.State.float st (j-.i) +(* TODO: sample functions *) let replicate n g st = let rec aux acc n = diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index 506fca86..05b05c8c 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -97,10 +97,9 @@ val float : float -> float t @since 0.6.1 *) val float_range : float -> float -> float t -(** Inclusive range +(** Inclusive range. [float_range a b] assumes [a < b]. @since 0.6.1 *) - val split : int -> (int * int) option t (** Split a positive value [n] into [n1,n2] where [n = n1 + n2]. @return [None] if the value is too small *) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 36ed8936..03053161 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -35,8 +35,10 @@ module type S = sig val length : t -> int - val blit : t -> int -> t -> int -> int -> unit - (** See {!String.blit} *) + val blit : t -> int -> Bytes.t -> int -> int -> unit + (** Similar to {!String.blit}. + Compatible with the [-safe-string] option. + @raise Invalid_argument if indices are not valid *) val fold : ('a -> char -> 'a) -> 'a -> t -> 'a @@ -87,6 +89,32 @@ let is_sub ~sub i s j ~len = if i+len > String.length sub then invalid_arg "String.is_sub"; _is_sub ~sub i s j ~len +(* note: inefficient *) +let find ?(start=0) ~sub s = + let n = String.length sub in + let i = ref start in + try + while !i + n < String.length s do + if _is_sub ~sub 0 s !i ~len:n then raise Exit; + incr i + done; + -1 + with Exit -> + !i + +let mem ?start ~sub s = find ?start ~sub s >= 0 + +let rfind ~sub s = + let n = String.length sub in + let i = ref (String.length s - n) in + try + while !i >= 0 do + if _is_sub ~sub 0 s !i ~len:n then raise Exit; + decr i + done; + ~-1 + with Exit -> + !i module Split = struct type split_state = @@ -158,20 +186,17 @@ module Split = struct let seq ~by s = _mkseq ~by s _tuple3 let seq_cpy ~by s = _mkseq ~by s String.sub -end -(* note: inefficient *) -let find ?(start=0) ~sub s = - let n = String.length sub in - let i = ref start in - try - while !i + n < String.length s do - if _is_sub ~sub 0 s !i ~len:n then raise Exit; - incr i - done; - -1 - with Exit -> - !i + let left ~by s = + let i = find ~sub:by s in + if i = ~-1 then None + else Some (String.sub s 0 i, String.sub s (i+1) (String.length s - i - 1)) + + let right ~by s = + let i = rfind ~sub:by s in + if i = ~-1 then None + else Some (String.sub s 0 i, String.sub s (i+1) (String.length s - i - 1)) +end let repeat s n = assert (n>=0); @@ -252,11 +277,6 @@ let of_list l = List.iter (Buffer.add_char buf) l; Buffer.contents buf -(*$T - of_list ['a'; 'b'; 'c'] = "abc" - of_list [] = "" -*) - let of_array a = init (Array.length a) (fun i -> a.(i)) @@ -281,6 +301,93 @@ let unlines l = String.concat "\n" l let unlines_gen g = concat_gen ~sep:"\n" g +let set s i c = + if i<0 || i>= String.length s then invalid_arg "CCString.set"; + init (String.length s) (fun j -> if i=j then c else s.[j]) + +let iter = String.iter + +#if OCAML_MAJOR >= 4 + +let map = String.map +let iteri = String.iteri + +#else + +let map f s = init (length s) (fun i -> f s.[i]) + +let iteri f s = + for i = 0 to String.length s - 1 do + f i s.[i] + done + +#endif + +#if OCAML_MAJOR >= 4 && OCAML_MINOR >= 2 + +let mapi = String.mapi + +#else + +let mapi f s = init (length s) (fun i -> f i s.[i]) + +#endif + +let flat_map ?sep f s = + let buf = Buffer.create (String.length s) in + iteri + (fun i c -> + begin match sep with + | Some _ when i=0 -> () + | None -> () + | Some sep -> Buffer.add_string buf sep + end; + Buffer.add_string buf (f c) + ) s; + Buffer.contents buf + +exception MyExit + +let for_all p s = + try iter (fun c -> if not (p c) then raise MyExit) s; true + with MyExit -> false + +let exists p s = + try iter (fun c -> if p c then raise MyExit) s; false + with MyExit -> true + +let map2 f s1 s2 = + if length s1 <> length s2 then invalid_arg "String.map2"; + init (String.length s1) (fun i -> f s1.[i] s2.[i]) + +let iter2 f s1 s2 = + if length s1 <> length s2 then invalid_arg "String.iter2"; + for i = 0 to String.length s1 - 1 do + f s1.[i] s2.[i] + done + +let iteri2 f s1 s2 = + if length s1 <> length s2 then invalid_arg "String.iteri2"; + for i = 0 to String.length s1 - 1 do + f i s1.[i] s2.[i] + done + +let fold2 f acc s1 s2 = + if length s1 <> length s2 then invalid_arg "String.fold2"; + let rec fold' acc s1 s2 i = + if i = String.length s1 then acc + else fold' (f acc s1.[i] s2.[i]) s1 s2 (i+1) + in + fold' acc s1 s2 0 + +let for_all2 p s1 s2 = + try iter2 (fun c1 c2 -> if not (p c1 c2) then raise MyExit) s1 s2; true + with MyExit -> false + +let exists2 p s1 s2 = + try iter2 (fun c1 c2 -> if p c1 c2 then raise MyExit) s1 s2; false + with MyExit -> true + let pp buf s = Buffer.add_char buf '"'; Buffer.add_string buf s; @@ -308,9 +415,9 @@ module Sub = struct let length (_,_,l) = l - let blit (a1,i1,len1) o1 (a2,i2,len2) o2 len = - if o1+len>len1 || o2+len>len2 then invalid_arg "CCString.Sub.blit"; - String.blit a1 (i1+o1) a2 (i2+o2) len + let blit (a1,i1,len1) o1 a2 o2 len = + if o1+len>len1 then invalid_arg "CCString.Sub.blit"; + blit a1 (i1+o1) a2 o2 len let fold f acc (s,i,len) = let rec fold_rec f acc s i j = diff --git a/src/core/CCString.mli b/src/core/CCString.mli index bf03f5e0..e4954971 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -40,8 +40,18 @@ module type S = sig val length : t -> int - val blit : t -> int -> t -> int -> int -> unit - (** See {!String.blit} *) + val blit : t -> int -> Bytes.t -> int -> int -> unit + (** Similar to {!String.blit}. + Compatible with the [-safe-string] option. + @raise Invalid_argument if indices are not valid *) + + (* + val blit_immut : t -> int -> t -> int -> int -> string + (** Immutable version of {!blit}, returning a new string. + [blit a i b j len] is the same as [b], but in which + the range [j, ..., j+len] is replaced by [a.[i], ..., a.[i + len]]. + @raise Invalid_argument if indices are not valid *) + *) val fold : ('a -> char -> 'a) -> 'a -> t -> 'a (** Fold on chars by increasing index. @@ -81,12 +91,44 @@ val of_klist : char klist -> string val of_list : char list -> string val of_array : char array -> string +(*$T + of_list ['a'; 'b'; 'c'] = "abc" + of_list [] = "" +*) + val to_array : string -> char array val find : ?start:int -> sub:string -> string -> int (** Find [sub] in string, returns its first index or [-1]. Should only be used with very small [sub] *) +(*$T + find ~sub:"bc" "abcd" = 1 + find ~sub:"bc" "abd" = ~-1 + find ~sub:"a" "_a_a_a_" = 1 +*) + +val mem : ?start:int -> sub:string -> string -> bool +(** [mem ~sub s] is true iff [sub] is a substring of [s] + @since 0.12 *) + +(*$T + mem ~sub:"bc" "abcd" + not (mem ~sub:"a b" "abcd") +*) + +val rfind : sub:string -> string -> int +(** Find [sub] in string from the right, returns its first index or [-1]. + Should only be used with very small [sub] + @since 0.12 *) + +(*$T + rfind ~sub:"bc" "abcd" = 1 + rfind ~sub:"bc" "abd" = ~-1 + rfind ~sub:"a" "_a_a_a_" = 5 + rfind ~sub:"bc" "abcdbcd" = 4 +*) + val is_sub : sub:string -> int -> string -> int -> len:int -> bool (** [is_sub ~sub i s j ~len] returns [true] iff the substring of [sub] starting at position [i] and of length [len] *) @@ -137,8 +179,81 @@ val unlines_gen : string gen -> string Q.printable_string (fun s -> unlines (lines s) = s) *) +val set : string -> int -> char -> string +(** [set s i c] creates a new string which is a copy of [s], except + for index [i], which becomes [c]. + @raise Invalid_argument if [i] is an invalid index + @since 0.12 *) + +(*$T + set "abcd" 1 '_' = "a_cd" + set "abcd" 0 '-' = "-bcd" + (try ignore (set "abc" 5 '_'); false with Invalid_argument _ -> true) +*) + +val iter : (char -> unit) -> string -> unit +(** Alias to {!String.iter} + @since 0.12 *) + +val iteri : (int -> char -> unit) -> string -> unit +(** iter on chars with their index + @since 0.12 *) + +val map : (char -> char) -> string -> string +(** map chars + @since 0.12 *) + +val mapi : (int -> char -> char) -> string -> string +(** map chars with their index + @since 0.12 *) + +val flat_map : ?sep:string -> (char -> string) -> string -> string +(** map each chars to a string, then concatenates them all + @param sep optional separator between each generated string + @since 0.12 *) + +val for_all : (char -> bool) -> string -> bool +(** true for all chars? + @since 0.12 *) + +val exists : (char -> bool) -> string -> bool +(** true for some char? + @since 0.12 *) + include S with type t := string +(** {2 Operations on 2 strings} *) + +val map2 : (char -> char -> char) -> string -> string -> string +(** map pairs of chars + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + +val iter2: (char -> char -> unit) -> string -> string -> unit +(** iterate on pairs of chars + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + +val iteri2: (int -> char -> char -> unit) -> string -> string -> unit +(** iterate on pairs of chars with their index + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + +val fold2: ('a -> char -> char -> 'a) -> 'a -> string -> string -> 'a +(** fold on pairs of chars + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + +val for_all2 : (char -> char -> bool) -> string -> string -> bool +(** all pair of chars respect the predicate? + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + +val exists2 : (char -> char -> bool) -> string -> string -> bool +(** exists a pair of chars? + @raises Invalid_argument if the strings have not the same length + @since 0.12 *) + (** {2 Splitting} *) module Split : sig @@ -175,6 +290,26 @@ module Split : sig val seq_cpy : by:string -> string -> string sequence val klist_cpy : by:string -> string -> string klist + + val left : by:string -> string -> (string * string) option + (** Split on the first occurrence of [by] from the left-most part of + the string + @since 0.12 *) + + (*$T + Split.left ~by:" " "ab cde f g " = Some ("ab", "cde f g ") + Split.left ~by:"_" "abcde" = None + *) + + val right : by:string -> string -> (string * string) option + (** Split on the first occurrence of [by] from the rightmost part of + the string + @since 0.12 *) + + (*$T + Split.right ~by:" " "ab cde f g" = Some ("ab cde f", "g") + Split.right ~by:"_" "abcde" = None + *) end (** {2 Slices} A contiguous part of a string *) diff --git a/src/core/META b/src/core/META index 25c822ad..35cf4073 100644 --- a/src/core/META +++ b/src/core/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: 21a795d293af857176fa2c97f6316578) -version = "0.11" +# DO NOT EDIT (digest: 829086f96d06e762e96acbd3a2cea082) +version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers.cma" @@ -9,7 +9,7 @@ archive(native) = "containers.cmxa" archive(native, plugin) = "containers.cmxs" exists_if = "containers.cma" package "unix" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes unix" archive(byte) = "containers_unix.cma" @@ -20,7 +20,7 @@ package "unix" ( ) package "thread" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "containers threads" archive(byte) = "containers_thread.cma" @@ -31,7 +31,7 @@ package "thread" ( ) package "string" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_string.cma" @@ -42,7 +42,7 @@ package "string" ( ) package "sexp" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_sexp.cma" @@ -53,7 +53,7 @@ package "sexp" ( ) package "misc" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "containers containers.data" archive(byte) = "containers_misc.cma" @@ -64,7 +64,7 @@ package "misc" ( ) package "lwt" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "containers lwt containers.misc" archive(byte) = "containers_lwt.cma" @@ -75,7 +75,7 @@ package "lwt" ( ) package "iter" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." archive(byte) = "containers_iter.cma" archive(byte, plugin) = "containers_iter.cma" @@ -85,7 +85,7 @@ package "iter" ( ) package "io" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_io.cma" @@ -96,7 +96,7 @@ package "io" ( ) package "data" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_data.cma" @@ -107,7 +107,7 @@ package "data" ( ) package "bigarray" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "containers bigarray bytes" archive(byte) = "containers_bigarray.cma" @@ -118,7 +118,7 @@ package "bigarray" ( ) package "advanced" ( - version = "0.11" + version = "0.12" description = "A modular standard library focused on data structures." requires = "containers sequence" archive(byte) = "containers_advanced.cma" diff --git a/src/core/containers.mldylib b/src/core/containers.mldylib index 3ac48971..fe1c5d84 100644 --- a/src/core/containers.mldylib +++ b/src/core/containers.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: b1fae2373cf2a628a9465ba233f7c127) +# DO NOT EDIT (digest: 724b9ea68be5bbd410c45a66cd7b6b97) CCVector CCPrint CCError @@ -21,5 +21,6 @@ CCString CCHashtbl CCMap CCFormat +CCIO Containers # OASIS_STOP diff --git a/src/core/containers.mllib b/src/core/containers.mllib index 3ac48971..fe1c5d84 100644 --- a/src/core/containers.mllib +++ b/src/core/containers.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: b1fae2373cf2a628a9465ba233f7c127) +# DO NOT EDIT (digest: 724b9ea68be5bbd410c45a66cd7b6b97) CCVector CCPrint CCError @@ -21,5 +21,6 @@ CCString CCHashtbl CCMap CCFormat +CCIO Containers # OASIS_STOP diff --git a/src/data/CCDeque.ml b/src/data/CCDeque.ml index cc83f425..48d05e4d 100644 --- a/src/data/CCDeque.ml +++ b/src/data/CCDeque.ml @@ -30,6 +30,7 @@ type 'a elt = { mutable prev : 'a elt; mutable next : 'a elt; } (** A cell holding a single element *) + and 'a t = 'a elt option ref (** The deque, a double linked list of cells *) diff --git a/src/data/CCGraph.ml b/src/data/CCGraph.ml new file mode 100644 index 00000000..72781d08 --- /dev/null +++ b/src/data/CCGraph.ml @@ -0,0 +1,789 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +type 'a sequence = ('a -> unit) -> unit + +type 'a sequence_once = 'a sequence + +exception Sequence_once + +let (|>) x f = f x + +module Seq = struct + type 'a t = 'a sequence + let return x k = k x + let (>>=) a f k = a (fun x -> f x k) + let map f a k = a (fun x -> k (f x)) + let filter_map f a k = a (fun x -> match f x with None -> () | Some y -> k y) + let iter f a = a f + let fold f acc a = + let acc = ref acc in + a (fun x -> acc := f !acc x); + !acc + let to_list seq = fold (fun acc x->x::acc) [] seq |> List.rev +end + +(** {2 Interfaces for graphs} *) + +(** Directed graph with vertices of type ['v] and edges of type [e'] *) +type ('v, 'e) t = { + children: 'v -> 'e sequence; + origin: 'e -> 'v; + dest: 'e -> 'v; +} + +type ('v, 'e) graph = ('v, 'e) t + +(** Mutable bitset for values of type ['v] *) +type 'v tag_set = { + get_tag: 'v -> bool; + set_tag: 'v -> unit; (** Set tag for the given element *) +} + +(** Mutable table with keys ['k] and values ['a] *) +type ('k, 'a) table = { + mem: 'k -> bool; + find: 'k -> 'a; (** @raise Not_found *) + add: 'k -> 'a -> unit; (** Erases previous binding *) +} + +(** Mutable set *) +type 'a set = ('a, unit) table + +let mk_table (type k) ?(eq=(=)) ?(hash=Hashtbl.hash) size = + let module H = Hashtbl.Make(struct + type t = k + let equal = eq + let hash = hash + end) in + let tbl = H.create size in + { mem=(fun k -> H.mem tbl k) + ; find=(fun k -> H.find tbl k) + ; add=(fun k v -> H.replace tbl k v) + } + +let mk_map (type k) ?(cmp=Pervasives.compare) () = + let module M = Map.Make(struct + type t = k + let compare = cmp + end) in + let tbl = ref M.empty in + { mem=(fun k -> M.mem k !tbl) + ; find=(fun k -> M.find k !tbl) + ; add=(fun k v -> tbl := M.add k v !tbl) + } + +(** {2 Bags} *) + +type 'a bag = { + push: 'a -> unit; + is_empty: unit -> bool; + pop: unit -> 'a; (** raises some exception is empty *) +} + +let mk_queue () = + let q = Queue.create() in + { push=(fun x -> Queue.push x q) + ; is_empty=(fun () -> Queue.is_empty q) + ; pop=(fun () -> Queue.pop q); + } + +let mk_stack() = + let s = Stack.create() in + { push=(fun x -> Stack.push x s) + ; is_empty=(fun () -> Stack.is_empty s) + ; pop=(fun () -> Stack.pop s); + } + +(** Implementation from http://en.wikipedia.org/wiki/Skew_heap *) +module Heap = struct + type 'a t = + | E + | N of 'a * 'a t * 'a t + + let is_empty = function + | E -> true + | N _ -> false + + let rec union ~leq t1 t2 = match t1, t2 with + | E, _ -> t2 + | _, E -> t1 + | N (x1, l1, r1), N (x2, l2, r2) -> + if leq x1 x2 + then N (x1, union ~leq t2 r1, l1) + else N (x2, union ~leq t1 r2, l2) + + let insert ~leq h x = union ~leq (N (x, E, E)) h + + let pop ~leq h = match h with + | E -> raise Not_found + | N (x, l, r) -> + x, union ~leq l r +end + +let mk_heap ~leq = + let t = ref Heap.E in + { push=(fun x -> t := Heap.insert ~leq !t x) + ; is_empty=(fun () -> Heap.is_empty !t) + ; pop=(fun () -> + let x, h = Heap.pop ~leq !t in + t := h; + x + ) + } + +(** {2 Traversals} *) + +module Traverse = struct + type 'e path = 'e list + + let generic_tag ~tags ~bag ~graph seq = + let first = ref true in + fun k -> + (* ensure linearity *) + if !first then first := false else raise Sequence_once; + Seq.iter bag.push seq; + while not (bag.is_empty ()) do + let x = bag.pop () in + if not (tags.get_tag x) then ( + k x; + tags.set_tag x; + Seq.iter + (fun e -> bag.push (graph.dest e)) + (graph.children x) + ) + done + + let generic ?(tbl=mk_table 128) ~bag ~graph seq = + let tags = { + get_tag=tbl.mem; + set_tag=(fun v -> tbl.add v ()); + } in + generic_tag ~tags ~bag ~graph seq + + let bfs ?tbl ~graph seq = + generic ?tbl ~bag:(mk_queue ()) ~graph seq + + let bfs_tag ~tags ~graph seq = + generic_tag ~tags ~bag:(mk_queue()) ~graph seq + + let dijkstra_tag ?(dist=fun _ -> 1) ~tags ~graph seq = + let tags' = { + get_tag=(fun (v,_,_) -> tags.get_tag v); + set_tag=(fun (v,_,_) -> tags.set_tag v); + } + and seq' = Seq.map (fun v -> v, 0, []) seq + and graph' = { + children=(fun (v,d,p) -> Seq.map (fun e -> e, d, p) (graph.children v)); + origin=(fun (e, d, p) -> graph.origin e, d, p); + dest=(fun (e, d, p) -> graph.dest e, d + dist e, e :: p); + } in + let bag = mk_heap ~leq:(fun (_,d1,_) (_,d2,_) -> d1 <= d2) in + generic_tag ~tags:tags' ~bag ~graph:graph' seq' + + let dijkstra ?(tbl=mk_table 128) ?dist ~graph seq = + let tags = { + get_tag=tbl.mem; + set_tag=(fun v -> tbl.add v ()); + } in + dijkstra_tag ~tags ?dist ~graph seq + + let dfs ?tbl ~graph seq = + generic ?tbl ~bag:(mk_stack ()) ~graph seq + + let dfs_tag ~tags ~graph seq = + generic_tag ~tags ~bag:(mk_stack()) ~graph seq + + module Event = struct + type edge_kind = [`Forward | `Back | `Cross ] + + (** A traversal is a sequence of such events *) + type ('v,'e) t = + [ `Enter of 'v * int * 'e path (* unique index in traversal, path from start *) + | `Exit of 'v + | `Edge of 'e * edge_kind + ] + + let get_vertex = function + | `Enter (v, _, _) -> Some (v, `Enter) + | `Exit v -> Some (v, `Exit) + | `Edge _ -> None + + let get_enter = function + | `Enter (v, _, _) -> Some v + | `Exit _ + | `Edge _ -> None + + let get_exit = function + | `Exit v -> Some v + | `Enter _ + | `Edge _ -> None + + let get_edge = function + | `Edge (e, _) -> Some e + | `Enter _ + | `Exit _ -> None + + let get_edge_kind = function + | `Edge (e, k) -> Some (e, k) + | `Enter _ + | `Exit _ -> None + + (* is [v] the origin of some edge in [path]? *) + let rec list_mem_ ~eq ~graph v path = match path with + | [] -> false + | e :: path' -> + eq v (graph.origin e) || list_mem_ ~eq ~graph v path' + + let dfs_tag ?(eq=(=)) ~tags ~graph seq = + let first = ref true in + fun k -> + if !first then first := false else raise Sequence_once; + let bag = mk_stack() in + let n = ref 0 in + Seq.iter + (fun v -> + (* start DFS from this vertex *) + bag.push (`Enter (v, [])); + while not (bag.is_empty ()) do + match bag.pop () with + | `Enter (x, path) -> + if not (tags.get_tag x) then ( + let num = !n in + incr n; + tags.set_tag x; + k (`Enter (x, num, path)); + bag.push (`Exit x); + Seq.iter + (fun e -> bag.push (`Edge (e, e :: path))) + (graph.children x); + ) + | `Exit x -> k (`Exit x) + | `Edge (e, path) -> + let v = graph.dest e in + let edge_kind = + if tags.get_tag v + then if list_mem_ ~eq ~graph v path + then `Back + else `Cross + else ( + bag.push (`Enter (v, path)); + `Forward + ) in + k (`Edge (e, edge_kind)) + done + ) seq + + let dfs ?(tbl=mk_table 128) ?eq ~graph seq = + let tags = { + set_tag=(fun v -> tbl.add v ()); + get_tag=tbl.mem; + } in + dfs_tag ?eq ~tags ~graph seq + end +end + +(** {2 Topological Sort} *) + +exception Has_cycle + +let topo_sort_tag ?(eq=(=)) ?(rev=false) ~tags ~graph seq = + (* use DFS *) + let l = + Traverse.Event.dfs_tag ~eq ~tags ~graph seq + |> Seq.filter_map + (function + | `Exit v -> Some v + | `Edge (_, `Back) -> raise Has_cycle + | `Enter _ + | `Edge _ -> None + ) + |> Seq.fold (fun acc x -> x::acc) [] + in + if rev then List.rev l else l + +let topo_sort ?eq ?rev ?(tbl=mk_table 128) ~graph seq = + let tags = { + get_tag=tbl.mem; + set_tag=(fun v -> tbl.add v ()); + } in + topo_sort_tag ?eq ?rev ~tags ~graph seq + +(*$T + let l = topo_sort ~graph:divisors_graph (Seq.return 42) in \ + List.for_all (fun (i,j) -> \ + let idx_i = CCList.find_idx ((=)i) l |> CCOpt.get_exn |> fst in \ + let idx_j = CCList.find_idx ((=)j) l |> CCOpt.get_exn |> fst in \ + idx_i < idx_j) \ + [ 42, 21; 14, 2; 3, 1; 21, 7; 42, 3] +*) + +(** {2 Lazy Spanning Tree} *) + +module LazyTree = struct + type ('v, 'e) t = + | Vertex of 'v * ('e * ('v, 'e) t) list Lazy.t + + let rec map_v f (Vertex (v, l)) = + let l' = lazy (List.map (fun (e, child) -> e, map_v f child) (Lazy.force l)) in + Vertex (f v, l') + + let rec fold_v f acc t = match t with + | Vertex (v, l) -> + let acc = f acc v in + List.fold_left + (fun acc (_, t') -> fold_v f acc t') + acc + (Lazy.force l) +end + +let spanning_tree_tag ~tags ~graph v = + let rec mk_node v = + let children = lazy ( + Seq.fold + (fun acc e -> + let v' = graph.dest e in + if tags.get_tag v' + then acc + else ( + tags.set_tag v'; + (e, mk_node v') :: acc + ) + ) [] (graph.children v) + ) + in + LazyTree.Vertex (v, children) + in + mk_node v + +let spanning_tree ?(tbl=mk_table 128) ~graph v = + let tags = { + get_tag=tbl.mem; + set_tag=(fun v -> tbl.add v ()); + } in + spanning_tree_tag ~tags ~graph v + +(** {2 Strongly Connected Components} *) + +module SCC = struct + type 'v state = { + mutable min_id: int; (* min ID of the vertex' scc *) + id: int; (* ID of the vertex *) + mutable on_stack: bool; + mutable vertex: 'v; + } + + let mk_cell v n = { + min_id=n; + id=n; + on_stack=false; + vertex=v; + } + + (* pop elements of [stack] until we reach node with given [id] *) + let rec pop_down_to ~id acc stack = + assert (not(Stack.is_empty stack)); + let cell = Stack.pop stack in + cell.on_stack <- false; + if cell.id = id then ( + assert (cell.id = cell.min_id); + cell.vertex :: acc (* return SCC *) + ) else pop_down_to ~id (cell.vertex::acc) stack + + let explore ~tbl ~graph seq = + let first = ref true in + fun k -> + if !first then first := false else raise Sequence_once; + (* stack of nodes being explored, for the DFS *) + let to_explore = Stack.create() in + (* stack for Tarjan's algorithm itself *) + let stack = Stack.create () in + (* unique ID *) + let n = ref 0 in + (* exploration *) + Seq.iter + (fun v -> + Stack.push (`Enter v) to_explore; + while not (Stack.is_empty to_explore) do + match Stack.pop to_explore with + | `Enter v -> + if not (tbl.mem v) then ( + (* remember unique ID for [v] *) + let id = !n in + incr n; + let cell = mk_cell v id in + cell.on_stack <- true; + tbl.add v cell; + Stack.push cell stack; + Stack.push (`Exit (v, cell)) to_explore; + (* explore children *) + Seq.iter + (fun e -> Stack.push (`Enter (graph.dest e)) to_explore) + (graph.children v) + ) + | `Exit (v, cell) -> + (* update [min_id] *) + assert cell.on_stack; + Seq.iter + (fun e -> + let dest = graph.dest e in + (* must not fail, [dest] already explored *) + let dest_cell = tbl.find dest in + (* same SCC? yes if [dest] points to [cell.v] *) + if dest_cell.on_stack + then cell.min_id <- min cell.min_id dest_cell.min_id + ) (graph.children v); + (* pop from stack if SCC found *) + if cell.id = cell.min_id then ( + let scc = pop_down_to ~id:cell.id [] stack in + k scc + ) + done + ) seq; + assert (Stack.is_empty stack); + () +end + +type 'v scc_state = 'v SCC.state + +let scc ?(tbl=mk_table 128) ~graph seq = SCC.explore ~tbl ~graph seq + +(* example from https://en.wikipedia.org/wiki/Strongly_connected_component *) +(*$R + let set_eq ?(eq=(=)) l1 l2 = CCList.Set.subset ~eq l1 l2 && CCList.Set.subset ~eq l2 l1 in + let graph = of_list + [ "a", "b" + ; "b", "e" + ; "e", "a" + ; "b", "f" + ; "e", "f" + ; "f", "g" + ; "g", "f" + ; "b", "c" + ; "c", "g" + ; "c", "d" + ; "d", "c" + ; "d", "h" + ; "h", "d" + ; "h", "g" + ] in + let res = scc ~graph (Seq.return "a") |> Seq.to_list in + assert_bool "scc" + (set_eq ~eq:(set_eq ?eq:None) res + [ [ "a"; "b"; "e" ] + ; [ "f"; "g" ] + ; [ "c"; "d"; "h" ] + ] + ) +*) + +(** {2 Pretty printing in the DOT (graphviz) format} *) + +module Dot = struct + type attribute = [ + | `Color of string + | `Shape of string + | `Weight of int + | `Style of string + | `Label of string + | `Other of string * string + ] (** Dot attribute *) + + let pp_list pp_x out l = + Format.pp_print_string out "["; + List.iteri (fun i x -> + if i > 0 then Format.fprintf out ",@;"; + pp_x out x + ) l; + Format.pp_print_string out "]" + + type vertex_state = { + mutable explored : bool; + id : int; + } + + (** Print an enum of Full.traverse_event *) + let pp_seq + ?(tbl=mk_table 128) + ?(attrs_v=fun _ -> []) + ?(attrs_e=fun _ -> []) + ?(name="graph") + ~graph out seq = + (* print an attribute *) + let pp_attr out attr = match attr with + | `Color c -> Format.fprintf out "color=%s" c + | `Shape s -> Format.fprintf out "shape=%s" s + | `Weight w -> Format.fprintf out "weight=%d" w + | `Style s -> Format.fprintf out "style=%s" s + | `Label l -> Format.fprintf out "label=\"%s\"" l + | `Other (name, value) -> Format.fprintf out "%s=\"%s\"" name value + (* map from vertices to integers *) + and get_node = + let count = ref 0 in + fun v -> + try tbl.find v + with Not_found -> + let node = {id= !count; explored=false} in + incr count; + tbl.add v node; + node + and vertex_explored v = + try (tbl.find v).explored + with Not_found -> false + in + let set_explored v = (get_node v).explored <- true + and get_id v = (get_node v).id in + (* the unique name of a vertex *) + let pp_vertex out v = Format.fprintf out "vertex_%d" (get_id v) in + (* print preamble *) + Format.fprintf out "@[digraph \"%s\" {@;" name; + (* traverse *) + let tags = { + get_tag=vertex_explored; + set_tag=set_explored; (* allocate new ID *) + } in + let events = Traverse.Event.dfs_tag ~tags ~graph seq in + Seq.iter + (function + | `Enter (v, _n, _path) -> + let attrs = attrs_v v in + Format.fprintf out " @[%a %a;@]@." pp_vertex v (pp_list pp_attr) attrs + | `Exit _ -> () + | `Edge (e, _) -> + let v1 = graph.origin e in + let v2 = graph.dest e in + let attrs = attrs_e e in + Format.fprintf out " @[%a -> %a %a;@]@." + pp_vertex v1 pp_vertex v2 + (pp_list pp_attr) + attrs + ) events; + (* close *) + Format.fprintf out "}@]@;@?"; + () + + let pp ?tbl ?attrs_v ?attrs_e ?name ~graph fmt v = + pp_seq ?tbl ?attrs_v ?attrs_e ?name ~graph fmt (Seq.return v) + + let with_out filename f = + let oc = open_out filename in + try + let fmt = Format.formatter_of_out_channel oc in + let x = f fmt in + Format.pp_print_flush fmt (); + close_out oc; + x + with e -> + close_out oc; + raise e +end + +(** {2 Mutable Graph} *) + +type ('v, 'e) mut_graph = < + graph: ('v, 'e) t; + add_edge: 'e -> unit; + remove : 'v -> unit; +> + +let mk_mut_tbl (type k) ?(eq=(=)) ?(hash=Hashtbl.hash) size = + let module Tbl = Hashtbl.Make(struct + type t = k + let hash = hash + let equal = eq + end) in + let tbl = Tbl.create size in + object + method graph = { + origin=(fun (x,_,_) -> x); + dest=(fun (_,_,x) -> x); + children=(fun v k -> + try List.iter k (Tbl.find tbl v) + with Not_found -> () + ); + } + method add_edge (v1,e,v2) = + let l = try Tbl.find tbl v1 with Not_found -> [] in + Tbl.replace tbl v1 ((v1,e,v2)::l) + method remove v = Tbl.remove tbl v + end + +(** {2 Immutable Graph} *) + +module type MAP = sig + type vertex + type t + + val as_graph : t -> (vertex, (vertex * vertex)) graph + (** Graph view of the map *) + + val empty : t + + val add_edge : vertex -> vertex -> t -> t + + val remove_edge : vertex -> vertex -> t -> t + + val add : vertex -> t -> t + (** Add a vertex, possibly with no outgoing edge *) + + val remove : vertex -> t -> t + (** Remove the vertex and all its outgoing edges. + Edges that point to the vertex are {b NOT} removed, they must be + manually removed with {!remove_edge} *) + + val union : t -> t -> t + + val vertices : t -> vertex sequence + + val vertices_l : t -> vertex list + + val of_list : (vertex * vertex) list -> t + + val add_list : (vertex * vertex) list -> t -> t + + val to_list : t -> (vertex * vertex) list + + val of_seq : (vertex * vertex) sequence -> t + + val add_seq : (vertex * vertex) sequence -> t -> t + + val to_seq : t -> (vertex * vertex) sequence +end + +module Map(O : Map.OrderedType) = struct + module M = Map.Make(O) + module S = Set.Make(O) + + type vertex = O.t + type t = { + edges: S.t M.t; + vertices: S.t; + } + + let as_graph m = { + origin=fst; + dest=snd; + children=(fun v yield -> + try + let set = M.find v m.edges in + S.iter (fun v' -> yield (v, v')) set + with Not_found -> () + ); + } + + let empty = {edges=M.empty; vertices=S.empty} + + let add_edge v1 v2 m = + let set = try M.find v1 m.edges with Not_found -> S.empty in + let edges = M.add v1 (S.add v2 set) m.edges in + let vertices = S.add v1 (S.add v2 m.vertices) in + { edges; vertices; } + + let remove_edge v1 v2 m = + try + let set = S.remove v2 (M.find v1 m.edges) in + if S.is_empty set + then {m with edges=M.remove v1 m.edges} + else {m with edges=M.add v1 set m.edges} + with Not_found -> m + + let add v m = { m with vertices=S.add v m.vertices } + + let remove v m = + { edges=M.remove v m.edges; vertices=S.remove v m.vertices } + + let union m1 m2 = + {edges=M.merge + (fun _ s1 s2 -> match s1, s2 with + | Some s, None + | None, Some s -> Some s + | None, None -> assert false + | Some s1, Some s2 -> Some (S.union s1 s2) + ) m1.edges m2.edges; + vertices=S.union m1.vertices m2.vertices + } + + let vertices m yield = S.iter yield m.vertices + + let vertices_l m = S.fold (fun v acc -> v::acc) m.vertices [] + + let add_list l m = List.fold_left (fun m (v1,v2) -> add_edge v1 v2 m) m l + + let of_list l = add_list l empty + + let to_list m = + M.fold + (fun v set acc -> S.fold (fun v' acc -> (v,v')::acc) set acc) + m.edges [] + + let add_seq seq m = Seq.fold (fun m (v1,v2) -> add_edge v1 v2 m) m seq + + let of_seq seq = add_seq seq empty + + let to_seq m k = M.iter (fun v set -> S.iter (fun v' -> k(v,v')) set) m.edges +end + +(** {2 Misc} *) + +let of_list ?(eq=(=)) l = { + origin=fst; + dest=snd; + children=(fun v yield -> List.iter (fun (a,b) -> if eq a v then yield (a,b)) l) +} + +let of_fun f = { + origin=fst; + dest=snd; + children=(fun v yield -> + let l = f v in + List.iter (fun v' -> yield (v,v')) l + ); +} + +let of_hashtbl tbl = { + origin=fst; + dest=snd; + children=(fun v yield -> + try List.iter (fun b -> yield (v, b)) (Hashtbl.find tbl v) + with Not_found -> () + ) +} + +let divisors_graph = { + origin=fst; + dest=snd; + children=(fun i -> + (* divisors of [i] that are [>= j] *) + let rec divisors j i yield = + if j < i + then ( + if (i mod j = 0) then yield (i,j); + divisors (j+1) i yield + ) + in + divisors 1 i + ); +} diff --git a/src/data/CCGraph.mli b/src/data/CCGraph.mli new file mode 100644 index 00000000..e7f75193 --- /dev/null +++ b/src/data/CCGraph.mli @@ -0,0 +1,412 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Simple Graph Interface} + + A collections of algorithms on (mostly read-only) graph structures. + The user provides her own graph structure as a [('v, 'e) CCGraph.t], + where ['v] is the type of vertices and ['e] the type of edges + (for instance, ['e = ('v * 'v)] is perfectly fine in many cases). + + Such a [('v, 'e) CCGraph.t] structure is a record containing + three functions: two relate edges to their origin and destination, + and one maps vertices to their outgoing edges. + This abstract notion of graph makes it possible to run the algorithms + on any user-specific type that happens to have a graph structure. + + Many graph algorithms here take a sequence of vertices as input. + If the user only has a single vertex (e.g., for a topological sort + from a given vertex), she can use [Seq.return x] to build a sequence + of one element. + + {b status: unstable} + + @since 0.12 *) + +type 'a sequence = ('a -> unit) -> unit +(** A sequence of items of type ['a], possibly infinite *) + +type 'a sequence_once = 'a sequence +(** Sequence that should be used only once *) + +exception Sequence_once +(** raised when a sequence meant to be used once is used several times *) + +module Seq : sig + type 'a t = 'a sequence + val return : 'a -> 'a sequence + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t + val map : ('a -> 'b) -> 'a t -> 'b t + val filter_map : ('a -> 'b option) -> 'a t -> 'b t + val iter : ('a -> unit) -> 'a t -> unit + val fold: ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b + val to_list : 'a t -> 'a list +end + +(** {2 Interfaces for graphs} *) + +(** Directed graph with vertices of type ['v] and edges of type [e'] *) +type ('v, 'e) t = { + children: 'v -> 'e sequence; + origin: 'e -> 'v; + dest: 'e -> 'v; +} + +type ('v, 'e) graph = ('v, 'e) t + +(** Mutable tags from values of type ['v] to tags of type [bool] *) +type 'v tag_set = { + get_tag: 'v -> bool; + set_tag: 'v -> unit; (** Set tag for the given element *) +} + +(** Mutable table with keys ['k] and values ['a] *) +type ('k, 'a) table = { + mem: 'k -> bool; + find: 'k -> 'a; (** @raise Not_found if element not added before *) + add: 'k -> 'a -> unit; (** Erases previous binding *) +} + +(** Mutable set *) +type 'a set = ('a, unit) table + +val mk_table: ?eq:('k -> 'k -> bool) -> ?hash:('k -> int) -> int -> ('k, 'a) table +(** Default implementation for {!table}: a {!Hashtbl.t} *) + +val mk_map: ?cmp:('k -> 'k -> int) -> unit -> ('k, 'a) table +(** Use a {!Map.S} underneath *) + +(** {2 Bags of vertices} *) + +(** Bag of elements of type ['a] *) +type 'a bag = { + push: 'a -> unit; + is_empty: unit -> bool; + pop: unit -> 'a; (** raises some exception is empty *) +} + +val mk_queue: unit -> 'a bag +val mk_stack: unit -> 'a bag + +val mk_heap: leq:('a -> 'a -> bool) -> 'a bag +(** [mk_heap ~leq] makes a priority queue where [leq x y = true] means that + [x] is smaller than [y] and should be prioritary *) + +(** {2 Traversals} *) + +module Traverse : sig + type 'e path = 'e list + + val generic: ?tbl:'v set -> + bag:'v bag -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + (** Traversal of the given graph, starting from a sequence + of vertices, using the given bag to choose the next vertex to + explore. Each vertex is visited at most once. *) + + val generic_tag: tags:'v tag_set -> + bag:'v bag -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + (** One-shot traversal of the graph using a tag set and the given bag *) + + val dfs: ?tbl:'v set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + + val dfs_tag: tags:'v tag_set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + + val bfs: ?tbl:'v set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + + val bfs_tag: tags:'v tag_set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v sequence_once + + val dijkstra : ?tbl:'v set -> + ?dist:('e -> int) -> + graph:('v, 'e) t -> + 'v sequence -> + ('v * int * 'e path) sequence_once + (** Dijkstra algorithm, traverses a graph in increasing distance order. + Yields each vertex paired with its distance to the set of initial vertices + (the smallest distance needed to reach the node from the initial vertices) + @param dist distance from origin of the edge to destination, + must be strictly positive. Default is 1 for every edge *) + + val dijkstra_tag : ?dist:('e -> int) -> + tags:'v tag_set -> + graph:('v, 'e) t -> + 'v sequence -> + ('v * int * 'e path) sequence_once + + (** {2 More detailed interface} *) + module Event : sig + type edge_kind = [`Forward | `Back | `Cross ] + + (** A traversal is a sequence of such events *) + type ('v,'e) t = + [ `Enter of 'v * int * 'e path (* unique index in traversal, path from start *) + | `Exit of 'v + | `Edge of 'e * edge_kind + ] + + val get_vertex : ('v, 'e) t -> ('v * [`Enter | `Exit]) option + val get_enter : ('v, 'e) t -> 'v option + val get_exit : ('v, 'e) t -> 'v option + val get_edge : ('v, 'e) t -> 'e option + val get_edge_kind : ('v, 'e) t -> ('e * edge_kind) option + + val dfs: ?tbl:'v set -> + ?eq:('v -> 'v -> bool) -> + graph:('v, 'e) graph -> + 'v sequence -> + ('v,'e) t sequence_once + (** Full version of DFS. + @param eq equality predicate on vertices *) + + val dfs_tag: ?eq:('v -> 'v -> bool) -> + tags:'v tag_set -> + graph:('v, 'e) graph -> + 'v sequence -> + ('v,'e) t sequence_once + (** Full version of DFS using integer tags + @param eq equality predicate on vertices *) + end +end + +(** {2 Topological Sort} *) + +exception Has_cycle + +val topo_sort : ?eq:('v -> 'v -> bool) -> + ?rev:bool -> + ?tbl:'v set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v list +(** [topo_sort ~graph seq] returns a list of vertices [l] where each + element of [l] is reachable from [seq]. + The list is sorted in a way such that if [v -> v'] in the graph, then + [v] comes before [v'] in the list (i.e. has a smaller index). + Basically [v -> v'] means that [v] is smaller than [v'] + see {{: https://en.wikipedia.org/wiki/Topological_sorting} wikipedia} + @param eq equality predicate on vertices (default [(=)]) + @param rev if true, the dependency relation is inverted ([v -> v'] means + [v'] occurs before [v]) + @raise Has_cycle if the graph is not a DAG *) + +val topo_sort_tag : ?eq:('v -> 'v -> bool) -> + ?rev:bool -> + tags:'v tag_set -> + graph:('v, 'e) t -> + 'v sequence -> + 'v list +(** Same as {!topo_sort} *) + +(** {2 Lazy Spanning Tree} *) + +module LazyTree : sig + type ('v, 'e) t = + | Vertex of 'v * ('e * ('v, 'e) t) list Lazy.t + + val map_v : ('a -> 'b) -> ('a, 'e) t -> ('b, 'e) t + + val fold_v : ('acc -> 'v -> 'acc) -> 'acc -> ('v, _) t -> 'acc +end + +val spanning_tree : ?tbl:'v set -> + graph:('v, 'e) t -> + 'v -> + ('v, 'e) LazyTree.t +(** [spanning_tree ~graph v] computes a lazy spanning tree that has [v] + as a root. The table [tbl] is used for the memoization part *) + +val spanning_tree_tag : tags:'v tag_set -> + graph:('v, 'e) t -> + 'v -> + ('v, 'e) LazyTree.t + +(** {2 Strongly Connected Components} *) + +type 'v scc_state +(** Hidden state for {!scc} *) + +val scc : ?tbl:('v, 'v scc_state) table -> + graph:('v, 'e) t -> + 'v sequence -> + 'v list sequence_once +(** Strongly connected components reachable from the given vertices. + Each component is a list of vertices that are all mutually reachable + in the graph. + Uses {{: https://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm} Tarjan's algorithm} + @param tbl table used to map nodes to some hidden state + *) + +(** {2 Pretty printing in the DOT (graphviz) format} + + Example (print divisors from [42]): + + {[ + let open CCGraph in + let open Dot in + with_out "/tmp/truc.dot" + (fun out -> + pp ~attrs_v:(fun i -> [`Label (string_of_int i)]) ~graph:divisors_graph out 42 + ) + ]} + +*) + +module Dot : sig + type attribute = [ + | `Color of string + | `Shape of string + | `Weight of int + | `Style of string + | `Label of string + | `Other of string * string + ] (** Dot attribute *) + + type vertex_state + (** Hidden state associated to a vertex *) + + val pp : ?tbl:('v,vertex_state) table -> + ?attrs_v:('v -> attribute list) -> + ?attrs_e:('e -> attribute list) -> + ?name:string -> + graph:('v,'e) t -> + Format.formatter -> + 'v -> + unit + (** Print the graph, starting from given vertex, on the formatter + @param attrs_v attributes for vertices + @param attrs_e attributes for edges + @param name name of the graph *) + + val pp_seq : ?tbl:('v,vertex_state) table -> + ?attrs_v:('v -> attribute list) -> + ?attrs_e:('e -> attribute list) -> + ?name:string -> + graph:('v,'e) t -> + Format.formatter -> + 'v sequence -> + unit + + val with_out : string -> (Format.formatter -> 'a) -> 'a + (** Shortcut to open a file and write to it *) +end + +(** {2 Mutable Graph} *) + +type ('v, 'e) mut_graph = < + graph: ('v, 'e) t; + add_edge: 'e -> unit; + remove : 'v -> unit; +> + +val mk_mut_tbl : ?eq:('v -> 'v -> bool) -> + ?hash:('v -> int) -> + int -> + ('v, ('v * 'a * 'v)) mut_graph +(** make a new mutable graph from a Hashtbl. Edges are labelled with type ['a] *) + +(** {2 Immutable Graph} + + A classic implementation of a graph structure on totally ordered vertices, + with unlabelled edges. The graph allows to add and remove edges and vertices, + and to iterate on edges and vertices. +*) + +module type MAP = sig + type vertex + type t + + val as_graph : t -> (vertex, (vertex * vertex)) graph + (** Graph view of the map *) + + val empty : t + + val add_edge : vertex -> vertex -> t -> t + + val remove_edge : vertex -> vertex -> t -> t + + val add : vertex -> t -> t + (** Add a vertex, possibly with no outgoing edge *) + + val remove : vertex -> t -> t + (** Remove the vertex and all its outgoing edges. + Edges that point to the vertex are {b NOT} removed, they must be + manually removed with {!remove_edge} *) + + val union : t -> t -> t + + val vertices : t -> vertex sequence + + val vertices_l : t -> vertex list + + val of_list : (vertex * vertex) list -> t + + val add_list : (vertex * vertex) list -> t -> t + + val to_list : t -> (vertex * vertex) list + + val of_seq : (vertex * vertex) sequence -> t + + val add_seq : (vertex * vertex) sequence -> t -> t + + val to_seq : t -> (vertex * vertex) sequence +end + +module Map(O : Map.OrderedType) : MAP with type vertex = O.t + +(** {2 Misc} *) + +val of_list : ?eq:('v -> 'v -> bool) -> ('v * 'v) list -> ('v, ('v * 'v)) t +(** [of_list l] makes a graph from a list of pairs of vertices. + Each pair [(a,b)] is an edge from [a] to [b]. + @param eq equality used to compare vertices *) + +val of_hashtbl : ('v, 'v list) Hashtbl.t -> ('v, ('v * 'v)) t +(** [of_hashtbl tbl] makes a graph from a hashtable that maps vertices + to lists of children *) + +val of_fun : ('v -> 'v list) -> ('v, ('v * 'v)) t +(** [of_fun f] makes a graph out of a function that maps a vertex to + the list of its children. The function is assumed to be deterministic. *) + +val divisors_graph : (int, (int * int)) t +(** [n] points to all its strict divisors *) diff --git a/src/data/CCHashconsedSet.ml b/src/data/CCHashconsedSet.ml new file mode 100644 index 00000000..9a9d7af1 --- /dev/null +++ b/src/data/CCHashconsedSet.ml @@ -0,0 +1,479 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Hashconsed Sets} *) + + +(* uses "Fast Mergeable Integer Maps", Okasaki & Gill, as a hash tree. +We use big-endian trees. *) + +module type ELT = sig + type t + + val compare : t -> t -> int + (** Total order *) + + val hash : t -> int + (** Deterministic *) +end + +module type S = sig + type elt + + type t + (** Set of elements *) + + val empty : t + + val singleton : elt -> t + + val doubleton : elt -> elt -> t + + val mem : elt -> t -> bool + + val equal : t -> t -> bool + (** Fast equality test [O(1)] *) + + val compare : t -> t -> int + (** Fast (arbitrary) comparisontest [O(1)] *) + + val hash : t -> int + (** Fast (arbitrary, deterministic) hash [O(1)] *) + + val add : elt -> t -> t + + val remove : elt -> t -> t + + val cardinal : t -> int + + val iter : (elt -> unit) -> t -> unit + (** Iterate on elements, in no particular order *) + + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + (** fold on elements, in arbitrary order *) + + val choose : t -> elt option + + val choose_exn : t -> elt + + val union : t -> t -> t + + val inter : t -> t -> t + + val diff : t -> t -> t + + (** {2 Whole-collection operations} *) + + type 'a sequence = ('a -> unit) -> unit + type 'a gen = unit -> 'a option + + val add_list : t -> elt list -> t + + val of_list : elt list -> t + + val to_list : t -> elt list + + val add_seq : t -> elt sequence -> t + + val of_seq : elt sequence -> t + + val to_seq : t -> elt sequence +end + +module Make(E : ELT) : S with type elt = E.t = struct + type elt = E.t + + type t = { + cell: cell; + id: int; (* unique hashconsing ID *) + } + and cell = + | E (* empty *) + | L of int * elt list (* leaf: sorted list of elements *) + | N of int (* common prefix *) * int (* bit switch *) * t * t + + let rec eq_list_ l1 l2 = match l1, l2 with + | [], [] -> true + | [], _ + | _, [] -> false + | x1 :: tl1, x2 :: tl2 -> + E.compare x1 x2 = 0 && eq_list_ tl1 tl2 + + let hash_pair_ a b = Hashtbl.hash (a,b) + let hash_quad_ a b c d = Hashtbl.hash (a,b,c,d) + + let rec hash_list_ l = match l with + | [] -> 0xf00d + | x :: tl -> hash_pair_ x (hash_list_ tl) + + (* hashconsing table *) + module Tbl = Weak.Make(struct + type t_ = t + type t = t_ + let equal t1 t2 = match t1.cell, t2.cell with + | E, E -> true + | L (k1, l1), L (k2, l2) -> k1==k2 && eq_list_ l1 l2 + | N (a1, b1, l1, r1), N (a2, b2, l2, r2) -> + a1==a2 && b1==b2 && l1.id == l2.id && r1.id == r2.id + | E, _ + | L _, _ + | N _, _ -> false + let hash t = match t.cell with + | E -> 42 + | L (k, l) -> hash_pair_ k (hash_list_ l) + | N (a, b, l, r) -> + hash_quad_ a b l.id r.id + end) + + let table_ = Tbl.create 4096 + let id_ = ref 1 + + (* make a node out of a cell, with hashconsing *) + let hashcons_ cell = + let n = {cell; id= !id_} in + let n' = Tbl.merge table_ n in + if n==n' then incr id_; + n' + + (* empty tree *) + let empty = hashcons_ E + + let bit_is_0_ x ~bit = x land bit = 0 + + let mask_ x ~mask = (x lor (mask -1)) land (lnot mask) + (* low endian: let mask_ x ~mask = x land (mask - 1) *) + + let is_prefix_ ~prefix y ~bit = prefix = mask_ y ~mask:bit + + (* loop down until x=lowest_bit_ x *) + let rec highest_bit_naive x m = + if m = 0 then 0 + else if x land m = 0 then highest_bit_naive x (m lsr 1) + else m + + let highest_bit_ = + (* the highest representable 2^n *) + let max_log = 1 lsl (Sys.word_size - 2) in + fun x -> + if x > 1 lsl 20 + then (* small shortcut: remove least significant 20 bits *) + let x' = x land (lnot ((1 lsl 20) -1)) in + highest_bit_naive x' max_log + else highest_bit_naive x max_log + + let branching_bit_ a b = highest_bit_ (a lxor b) + + let rec list_mem_ x l = match l with + | [] -> false + | y :: tl -> + match E.compare x y with + | 0 -> true + | c when c > 0 -> list_mem_ x tl + | _ -> false (* [x] cannot be in the tail, all elements are larger *) + + let rec mem_rec_ k x t = match t.cell with + | E -> false + | L (k', l) when k = k' -> + list_mem_ x l + | L _ -> false + | N (prefix, m, l, r) -> + if is_prefix_ ~prefix k ~bit:m + then if bit_is_0_ k ~bit:m + then mem_rec_ k x l + else mem_rec_ k x r + else raise Not_found + + let equal t1 t2 = t1.id = t2.id + + let compare t1 t2 = Pervasives.compare t1.id t2.id + + let hash t = t.id land max_int + + let mem x t = mem_rec_ (E.hash x) x t + + let mk_node_ prefix switch l r = match l.cell, r.cell with + | E, _ -> r + | _, E -> l + | _ -> hashcons_ (N (prefix, switch, l, r)) + + let mk_leaf_ hash l = match l with + | [] -> empty + | _::_ -> hashcons_ (L (hash, l)) + + (* join trees t1 and t2 with prefix p1 and p2 respectively + (p1 and p2 do not overlap) *) + let join_ t1 p1 t2 p2 = + let switch = branching_bit_ p1 p2 in + let prefix = mask_ p1 ~mask:switch in + if bit_is_0_ p1 ~bit:switch + then mk_node_ prefix switch t1 t2 + else (assert (bit_is_0_ p2 ~bit:switch); mk_node_ prefix switch t2 t1) + + let singleton_ k x = hashcons_ (L (k, [x])) + + let singleton x = singleton_ (E.hash x) x + + (* insert [x] in [l], keeping [l] sorted *) + let rec insert_list_ x l = match l with + | [] -> [x] + | y :: tl -> + match E.compare x y with + | 0 -> l (* already in there *) + | c when c<0 -> + (* x y :: insert_list_ x tl + + let rec add_rec_ k x t = match t.cell with + | E -> hashcons_ (L (k, [x])) + | L (k', l) -> + if k=k' + then hashcons_ (L (k, insert_list_ x l)) + else join_ t k' (singleton_ k x) k + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then hashcons_ (N(prefix, switch, add_rec_ k x l, r)) + else hashcons_ (N(prefix, switch, l, add_rec_ k x r)) + else join_ (singleton_ k x) k t prefix + + let add x t = add_rec_ (E.hash x) x t + + (*$Q & ~count:20 + Q.(list int) (fun l -> \ + let module S = Make(CCInt) in \ + let m = S.of_list l in \ + List.for_all (fun x -> S.mem x m) l) + *) + + let rec remove_list_ x l = match l with + | [] -> [] + | y :: tl -> + match E.compare x y with + | 0 -> tl (* eliminate *) + | c when c<0 -> l (* cannot be in [l] *) + | _ -> y :: remove_list_ x tl + + let rec remove_rec_ k x t = match t.cell with + | E -> empty + | L (k', l) when k=k' -> + mk_leaf_ k (remove_list_ x l) + | L _ -> t (* preserve *) + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then mk_node_ prefix switch (remove_rec_ k x l) r + else mk_node_ prefix switch l (remove_rec_ k x r) + else t (* not present *) + + let remove x l = remove_rec_ (E.hash x) x l + + let doubleton v1 v2 = add v1 (singleton v2) + + let rec iter f t = match t.cell with + | E -> () + | L (_, v) -> List.iter f v + | N (_, _, l, r) -> iter f l; iter f r + + let rec fold f t acc = match t.cell with + | E -> acc + | L (_, l) -> List.fold_right f l acc + | N (_, _, l, r) -> + let acc = fold f l acc in + fold f r acc + + let cardinal t = fold (fun _ n -> n+1) t 0 + + let rec choose_exn t = match t.cell with + | E -> raise Not_found + | L (_, []) -> assert false + | L (_, x :: _) -> x + | N (_, _, l, _) -> choose_exn l + + let choose t = + try Some (choose_exn t) + with Not_found -> None + + let rec union_list_ l1 l2 = match l1, l2 with + | [], _ -> l2 + | _, [] -> l1 + | x1 :: tl1, x2 :: tl2 -> + match E.compare x1 x2 with + | 0 -> x1 :: union_list_ tl1 tl2 + | c when c<0 -> x1 :: union_list_ tl1 l2 + | _ -> x2 :: union_list_ l1 tl2 + + (* add elements of [l], all of which have hash [k], to [t] *) + let add_list_hash_ k l t = + List.fold_left + (fun t x -> add_rec_ k x t) + t l + + let rec union a b = match a.cell, b.cell with + | E, _ -> b + | _, E -> a + | L (k1, l1), L(k2, l2) when k1==k2 -> + mk_leaf_ k1 (union_list_ l1 l2) (* merge leaves *) + | L (k, l), _ -> add_list_hash_ k l b + | _, L (k, l) -> add_list_hash_ k l a + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (union l1 l2) (union r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then hashcons_ (N (p1, m1, union l1 b, r1)) + else hashcons_ (N (p1, m1, l1, union r1 b)) + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then hashcons_ (N (p2, m2, union l2 a, r2)) + else hashcons_ (N (p2, m2, l2, union r2 a)) + else join_ a p1 b p2 + + (*$Q + Q.(list int) (fun l -> \ + let module S = Make(CCInt) in \ + let s = S.of_list l in S.equal s (S.union s s)) + *) + + (*$= & ~printer:(CCPrint.to_string (CCList.pp CCInt.pp)) + [1;2;4;5;6;7;8;10] (let module S = Make(CCInt) in \ + let s1 = S.of_list [1;2;4;5; 7;8 ] in \ + let s2 = S.of_list [ 2;4; 6;7; 10] in \ + S.union s1 s2 |> S.to_list |> List.sort compare ) + *) + + let rec inter_list_ l1 l2 = match l1, l2 with + | [], _ + | _, [] -> [] + | x1 :: tl1, x2 :: tl2 -> + match E.compare x1 x2 with + | 0 -> x1 :: inter_list_ tl1 tl2 + | c when c<0 -> inter_list_ tl1 l2 + | _ -> inter_list_ l1 tl2 + + let rec inter a b = match a.cell, b.cell with + | E, _ | _, E -> empty + | L (k1, l1), L (k2, l2) when k1==k2 -> + mk_leaf_ k1 (inter_list_ l1 l2) + | L (k,l), _ -> + mk_leaf_ k (List.filter (fun x -> mem_rec_ k x b) l) + | _, L (k,l) -> + mk_leaf_ k (List.filter (fun x -> mem_rec_ k x a) l) + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (inter l1 l2) (inter r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then inter l1 b + else inter r1 b + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then inter a l2 + else inter a r2 + else empty + + (*$Q + Q.(list int) (fun l -> \ + let module S = Make(CCInt) in \ + let s = S.of_list l in S.equal s (S.inter s s)) + *) + + (*$= & ~printer:(CCPrint.to_string (CCList.pp CCInt.pp)) + [2;4;7] (let module S = Make(CCInt) in \ + let s1 = S.of_list [1;2;4;5; 7;8 ] in \ + let s2 = S.of_list [ 2;4; 6;7; 10] in \ + S.inter s1 s2 |> S.to_list |> List.sort compare ) + *) + + (* remove elements of [l] from [t]; they all have hash [k] *) + let rec remove_list_hash_ k l t = match l with + | [] -> t + | x :: tl -> + remove_list_hash_ k tl (remove_rec_ k x t) + + let rec diff_list_ l1 l2 = match l1, l2 with + | [], _ -> [] + | _, [] -> l1 + | x1 :: tl1, x2 :: tl2 -> + match E.compare x1 x2 with + | 0 -> diff_list_ tl1 tl2 + | c when c<0 -> x1 :: diff_list_ tl1 l2 + | _ -> diff_list_ l1 tl2 + + let rec diff a b = match a.cell, b.cell with + | E, _ -> empty + | _, E -> a + | L (k1, l1), L (k2, l2) when k1==k2 -> + mk_leaf_ k1 (diff_list_ l1 l2) + | L (k,l), _ -> + mk_leaf_ k (List.filter (fun x -> not (mem_rec_ k x b)) l) + | _, L (k,l) -> remove_list_hash_ k l a + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (diff l1 l2) (diff r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then hashcons_ (N (p1, m1, diff l1 b, r1)) + else hashcons_ (N (p1, m1, l1, diff r1 b)) + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then diff a l2 + else diff a r2 + else a + + (*$= & ~printer:(CCPrint.to_string (CCList.pp CCInt.pp)) + [1;5;8] (let module S = Make(CCInt) in \ + let s1 = S.of_list [1;2;4;5; 7;8 ] in \ + let s2 = S.of_list [ 2;4; 6;7; 10] in \ + S.diff s1 s2 |> S.to_list |> List.sort compare ) + *) + + (** {2 Whole-collection operations} *) + + type 'a sequence = ('a -> unit) -> unit + type 'a gen = unit -> 'a option + + let add_list t l = List.fold_left (fun t x -> add x t) t l + + let of_list l = add_list empty l + + let to_list t = fold (fun x l -> x:: l) t [] + + (*$Q + Q.(list int) (fun l -> \ + let module S = Make(CCInt) in \ + S.of_list l |> S.cardinal = List.length l) + *) + + let add_seq t seq = + let t = ref t in + seq (fun x -> t := add x !t); + !t + + let of_seq seq = add_seq empty seq + + let to_seq t yield = iter yield t +end diff --git a/src/data/CCHashconsedSet.mli b/src/data/CCHashconsedSet.mli new file mode 100644 index 00000000..972a0668 --- /dev/null +++ b/src/data/CCHashconsedSet.mli @@ -0,0 +1,110 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Hashconsed Sets} + + Sets are hashconsed, so that set equality is physical equality. Some + sub-structure that is common to several sets is also perfectly shared. + + {b status: unstable} + + @since 0.12 +*) + +module type ELT = sig + type t + + val compare : t -> t -> int + (** Total order *) + + val hash : t -> int + (** Deterministic *) +end + +module type S = sig + type elt + + type t + (** Set of elements *) + + val empty : t + + val singleton : elt -> t + + val doubleton : elt -> elt -> t + + val mem : elt -> t -> bool + + val equal : t -> t -> bool + (** Fast equality test [O(1)] *) + + val compare : t -> t -> int + (** Fast (arbitrary) comparisontest [O(1)] *) + + val hash : t -> int + (** Fast (arbitrary, deterministic) hash [O(1)] *) + + val add : elt -> t -> t + + val remove : elt -> t -> t + + val cardinal : t -> int + + val iter : (elt -> unit) -> t -> unit + (** Iterate on elements, in no particular order *) + + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + (** fold on elements, in arbitrary order *) + + val choose : t -> elt option + + val choose_exn : t -> elt + + val union : t -> t -> t + + val inter : t -> t -> t + + val diff : t -> t -> t + + (** {2 Whole-collection operations} *) + + type 'a sequence = ('a -> unit) -> unit + type 'a gen = unit -> 'a option + + val add_list : t -> elt list -> t + + val of_list : elt list -> t + + val to_list : t -> elt list + + val add_seq : t -> elt sequence -> t + + val of_seq : elt sequence -> t + + val to_seq : t -> elt sequence +end + +module Make(E : ELT) : S with type elt = E.t diff --git a/src/data/CCRingBuffer.ml b/src/data/CCRingBuffer.ml index fb8e820d..91baf4dc 100644 --- a/src/data/CCRingBuffer.ml +++ b/src/data/CCRingBuffer.ml @@ -203,8 +203,8 @@ module type S = sig @since 0.11 *) end -module MakeFromArray(Array:Array.S) = struct - module Array = Array +module MakeFromArray(A:Array.S) = struct + module Array = A type t = { mutable start : int; @@ -221,14 +221,14 @@ module MakeFromArray(Array:Array.S) = struct stop=0; bounded; size; - buf = Array.empty + buf = A.empty } let copy b = - { b with buf=Array.copy b.buf; } + { b with buf=A.copy b.buf; } (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -237,18 +237,18 @@ module MakeFromArray(Array:Array.S) = struct *) (*$T - let b = Byte.of_array "abc" in \ + let b = Byte.of_array (Bytes.of_string "abc") in \ let b' = Byte.copy b in \ Byte.clear b; \ - Byte.to_array b' = "abc" && Byte.to_array b = "" + Byte.to_array b' = (Bytes.of_string "abc") && Byte.to_array b = Bytes.empty *) let capacity b = - let len = Array.length b.buf in + let len = A.length b.buf in match len with 0 -> 0 | l -> l - 1 (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -256,7 +256,7 @@ module MakeFromArray(Array:Array.S) = struct *) (*$Q - (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> \ + (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> let s = Bytes.of_string s in \ let i = abs i in \ let s_len = Bytes.length s in \ let b = Byte.create ~bounded:true i in \ @@ -283,10 +283,10 @@ module MakeFromArray(Array:Array.S) = struct let length b = if b.stop >= b.start then b.stop - b.start - else (Array.length b.buf - b.start) + b.stop + else (A.length b.buf - b.start) + b.stop (*$Q - (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> \ + (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> let s = Bytes.of_string s in \ let i = abs i in \ let s_len = Bytes.length s in \ let b = Byte.create i in \ @@ -295,7 +295,7 @@ module MakeFromArray(Array:Array.S) = struct *) (*$Q - (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> \ + (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> let s = Bytes.of_string s in \ let i = abs i in \ let s_len = Bytes.length s in \ let b = Byte.create ~bounded:true i in \ @@ -305,16 +305,16 @@ module MakeFromArray(Array:Array.S) = struct (* resize [b] so that inner capacity is [cap] *) let resize b cap elem = - assert (cap >= Array.length b.buf); - let buf' = Array.make cap elem in + assert (cap >= A.length b.buf); + let buf' = A.make cap elem in (* copy into buf' *) if b.stop >= b.start then - Array.blit b.buf b.start buf' 0 (b.stop - b.start) + A.blit b.buf b.start buf' 0 (b.stop - b.start) else begin - let len_end = Array.length b.buf - b.start in - Array.blit b.buf b.start buf' 0 len_end; - Array.blit b.buf 0 buf' len_end b.stop; + let len_end = A.length b.buf - b.start in + A.blit b.buf b.start buf' 0 len_end; + A.blit b.buf 0 buf' len_end b.stop; end; b.buf <- buf' @@ -323,48 +323,49 @@ module MakeFromArray(Array:Array.S) = struct (* resize if needed, with a constant to amortize *) if cap < len then ( let new_size = - let desired = Array.length b.buf + len + 24 in + let desired = A.length b.buf + len + 24 in min (b.size+1) desired in - resize b new_size from_buf.(0); + resize b new_size (A.get from_buf 0); let good = capacity b = b.size || capacity b - length b >= len in assert good; ); - let sub = Array.sub from_buf o len in + let sub = A.sub from_buf o len in let iter x = - let capacity = Array.length b.buf in - Array.set b.buf b.stop x; + let capacity = A.length b.buf in + A.set b.buf b.stop x; if b.stop = capacity-1 then b.stop <- 0 else b.stop <- b.stop + 1; if b.start = b.stop then if b.start = capacity-1 then b.start <- 0 else b.start <- b.start + 1 in - Array.iter iter sub + A.iter iter sub let blit_from_unbounded b from_buf o len = let cap = capacity b - length b in (* resize if needed, with a constant to amortize *) - if cap < len then resize b (max (b.size+1) (Array.length b.buf + len + 24)) from_buf.(0); + if cap < len + then resize b (max (b.size+1) (A.length b.buf + len + 24)) (A.get from_buf 0); let good = capacity b - length b >= len in assert good; if b.stop >= b.start then (* [_______ start xxxxxxxxx stop ______] *) - let len_end = Array.length b.buf - b.stop in + let len_end = A.length b.buf - b.stop in if len_end >= len - then (Array.blit from_buf o b.buf b.stop len; + then (A.blit from_buf o b.buf b.stop len; b.stop <- b.stop + len) - else (Array.blit from_buf o b.buf b.stop len_end; - Array.blit from_buf (o+len_end) b.buf 0 (len-len_end); + else (A.blit from_buf o b.buf b.stop len_end; + A.blit from_buf (o+len_end) b.buf 0 (len-len_end); b.stop <- len-len_end) else begin (* [xxxxx stop ____________ start xxxxxx] *) let len_middle = b.start - b.stop in assert (len_middle >= len); - Array.blit from_buf o b.buf b.stop len; + A.blit from_buf o b.buf b.stop len; b.stop <- b.stop + len end; () let blit_from b from_buf o len = - if Array.length from_buf = 0 then () else + if A.length from_buf = 0 then () else if b.bounded then blit_from_bounded b from_buf o len else @@ -372,6 +373,7 @@ module MakeFromArray(Array:Array.S) = struct (*$Q (Q.pair Q.printable_string Q.printable_string) (fun (s,s') -> \ + let s = Bytes.of_string s in let s' = Bytes.of_string s' in \ (let b = Byte.create 24 in \ Byte.blit_from b s 0 (Bytes.length s); \ Byte.blit_from b s' 0 (Bytes.length s'); \ @@ -381,6 +383,7 @@ module MakeFromArray(Array:Array.S) = struct (*$Q (Q.pair Q.printable_string Q.printable_string) (fun (s,s') -> \ + let s = Bytes.of_string s in let s' = Bytes.of_string s' in \ (let b = Byte.create ~bounded:true (Bytes.length s + Bytes.length s') in \ Byte.blit_from b s 0 (Bytes.length s); \ Byte.blit_from b s' 0 (Bytes.length s'); \ @@ -389,27 +392,27 @@ module MakeFromArray(Array:Array.S) = struct let blit_into b to_buf o len = - if o+len > Array.length to_buf + if o+len > A.length to_buf then invalid_arg "RingBuffer.blit_into"; if b.stop >= b.start then let n = min (b.stop - b.start) len in - let _ = Array.blit b.buf b.start to_buf o n in + let _ = A.blit b.buf b.start to_buf o n in n else begin - let len_end = Array.length b.buf - b.start in - Array.blit b.buf b.start to_buf o (min len_end len); + let len_end = A.length b.buf - b.start in + A.blit b.buf b.start to_buf o (min len_end len); if len_end >= len then len (* done *) else begin let n = min b.stop (len - len_end) in - Array.blit b.buf 0 to_buf (o+len_end) n; + A.blit b.buf 0 to_buf (o+len_end) n; n + len_end end end (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let b = Byte.create (Bytes.length s) in \ Byte.blit_from b s 0 (Bytes.length s); \ let to_buf = Bytes.create (Bytes.length s) in \ @@ -423,7 +426,7 @@ module MakeFromArray(Array:Array.S) = struct () (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -434,10 +437,10 @@ module MakeFromArray(Array:Array.S) = struct let reset b = clear b; - b.buf <- Array.empty + b.buf <- A.empty (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -449,7 +452,7 @@ module MakeFromArray(Array:Array.S) = struct let is_empty b = b.start = b.stop (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -459,8 +462,8 @@ module MakeFromArray(Array:Array.S) = struct let take_front_exn b = if b.start = b.stop then raise Empty; - let c = b.buf.(b.start) in - if b.start + 1 = Array.length b.buf + let c = A.get b.buf b.start in + if b.start + 1 = A.length b.buf then b.start <- 0 else b.start <- b.start + 1; c @@ -468,7 +471,7 @@ module MakeFromArray(Array:Array.S) = struct let take_front b = try Some (take_front_exn b) with Empty -> None (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -479,14 +482,14 @@ module MakeFromArray(Array:Array.S) = struct let take_back_exn b = if b.start = b.stop then raise Empty; if b.stop - 1 = 0 - then b.stop <- Array.length b.buf - 1 + then b.stop <- A.length b.buf - 1 else b.stop <- b.stop - 1; - b.buf.(b.stop) + A.get b.buf b.stop let take_back b = try Some (take_back_exn b) with Empty -> None (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -496,12 +499,12 @@ module MakeFromArray(Array:Array.S) = struct let junk_front b = if b.start = b.stop then raise Empty; - if b.start + 1 = Array.length b.buf + if b.start + 1 = A.length b.buf then b.start <- 0 else b.start <- b.start + 1 (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -512,11 +515,11 @@ module MakeFromArray(Array:Array.S) = struct let junk_back b = if b.start = b.stop then raise Empty; if b.stop = 0 - then b.stop <- Array.length b.buf - 1 + then b.stop <- A.length b.buf - 1 else b.stop <- b.stop - 1 (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -530,39 +533,41 @@ module MakeFromArray(Array:Array.S) = struct if b.stop >= b.start then b.start <- b.start + len else - let len_end = Array.length b.buf - b.start in + let len_end = A.length b.buf - b.start in if len > len_end then b.start <- len-len_end (* wrap to the beginning *) else b.start <- b.start + len (*$Q (Q.pair Q.printable_string Q.printable_string) (fun (s,s') -> \ + let s = Bytes.of_string s in let s' = Bytes.of_string s' in \ (let b = Byte.create 24 in \ Byte.blit_from b s 0 (Bytes.length s); \ Byte.blit_from b s' 0 (Bytes.length s'); \ - Byte.blit_from b "hello world" 0 (Bytes.length "hello world"); (* big enough *) \ + let h = Bytes.of_string "hello world" in \ + Byte.blit_from b h 0 (Bytes.length h); (* big enough *) \ let l = Byte.length b in let l' = l/2 in Byte.skip b l'; \ Byte.length b + l' = l)) *) let iter b ~f = if b.stop >= b.start - then for i = b.start to b.stop - 1 do f b.buf.(i) done + then for i = b.start to b.stop - 1 do f (A.get b.buf i) done else ( - for i = b.start to Array.length b.buf -1 do f b.buf.(i) done; - for i = 0 to b.stop - 1 do f b.buf.(i) done; + for i = b.start to A.length b.buf -1 do f (A.get b.buf i) done; + for i = 0 to b.stop - 1 do f (A.get b.buf i) done; ) let iteri b ~f = if b.stop >= b.start - then for i = b.start to b.stop - 1 do f i b.buf.(i) done + then for i = b.start to b.stop - 1 do f i (A.get b.buf i) done else ( - for i = b.start to Array.length b.buf -1 do f i b.buf.(i) done; - for i = 0 to b.stop - 1 do f i b.buf.(i) done; + for i = b.start to A.length b.buf -1 do f i (A.get b.buf i) done; + for i = 0 to b.stop - 1 do f i (A.get b.buf i) done; ) (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -575,14 +580,14 @@ module MakeFromArray(Array:Array.S) = struct then if i >= b.stop - b.start then invalid_arg ("CCRingBuffer.get:" ^ string_of_int i) - else b.buf.(b.start + i) + else A.get b.buf (b.start + i) else - let len_end = Array.length b.buf - b.start in + let len_end = A.length b.buf - b.start in if i < len_end - then b.buf.(b.start + i) + then A.get b.buf (b.start + i) else if i - len_end > b.stop then invalid_arg ("CCRingBuffer.get: " ^ string_of_int i) - else b.buf.(i - len_end) + else A.get b.buf (i - len_end) let get_front b i = if is_empty b then @@ -592,7 +597,7 @@ module MakeFromArray(Array:Array.S) = struct (*$Q (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> \ - let s = s ^ " " in \ + let s = Bytes.of_string (s ^ " ") in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -609,7 +614,7 @@ module MakeFromArray(Array:Array.S) = struct (*$Q (Q.pair Q.small_int Q.printable_string) (fun (i, s) -> \ - let s = s ^ " " in \ + let s = Bytes.of_string (s ^ " ") in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -627,21 +632,21 @@ module MakeFromArray(Array:Array.S) = struct build [] (len-1) (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ let l = Byte.to_list b in \ let explode s = let rec exp i l = \ - if i < 0 then l else exp (i - 1) (s.[i] :: l) in \ + if i < 0 then l else exp (i - 1) (Bytes.get s i :: l) in \ exp (Bytes.length s - 1) [] in \ explode s = l) *) - let push_back b e = blit_from b (Array.make 1 e) 0 1 + let push_back b e = blit_from b (A.make 1 e) 0 1 (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -655,10 +660,10 @@ module MakeFromArray(Array:Array.S) = struct let peek_front b = if is_empty b then raise Empty - else Array.get b.buf b.start + else A.get b.buf b.start (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -668,11 +673,11 @@ module MakeFromArray(Array:Array.S) = struct let peek_back b = if is_empty b then raise Empty - else Array.get b.buf + else A.get b.buf (if b.stop = 0 then capacity b - 1 else b.stop-1) (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let s_len = Bytes.length s in \ let b = Byte.create s_len in \ Byte.blit_from b s 0 s_len; \ @@ -681,21 +686,21 @@ module MakeFromArray(Array:Array.S) = struct *) let of_array a = - let b = create (max (Array.length a) 16) in - blit_from b a 0 (Array.length a); + let b = create (max (A.length a) 16) in + blit_from b a 0 (A.length a); b let to_array b = - if is_empty b then Array.empty + if is_empty b then A.empty else ( - let a = Array.make (length b) (peek_front b) in + let a = A.make (length b) (peek_front b) in let n = blit_into b a 0 (length b) in assert (n = length b); a ) (*$Q - Q.printable_string (fun s -> \ + Q.printable_string (fun s -> let s = Bytes.of_string s in \ let b = Byte.of_array s in let s' = Byte.to_array b in \ s = s') *) diff --git a/src/data/CCRingBuffer.mli b/src/data/CCRingBuffer.mli index 2c7cdbb3..b657f47a 100644 --- a/src/data/CCRingBuffer.mli +++ b/src/data/CCRingBuffer.mli @@ -195,7 +195,8 @@ module type S = sig val of_array : Array.t -> t (** Create a buffer from an initial array, but doesn't take ownership - of it (stills allocates a new internal array) *) + of it (stills allocates a new internal array) + @since 0.11 *) val to_array : t -> Array.t (** Create an array from the elements, in order. diff --git a/src/data/containers_data.mldylib b/src/data/containers_data.mldylib index e45f6801..88cbf74c 100644 --- a/src/data/containers_data.mldylib +++ b/src/data/containers_data.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 423faeb80b3829590072ca8f5414955c) +# DO NOT EDIT (digest: eb3c5babbb4a2d9bd921bfaf77125f8f) CCMultiMap CCMultiSet CCTrie @@ -15,4 +15,6 @@ CCRingBuffer CCIntMap CCPersistentArray CCMixset +CCHashconsedSet +CCGraph # OASIS_STOP diff --git a/src/data/containers_data.mllib b/src/data/containers_data.mllib index e45f6801..88cbf74c 100644 --- a/src/data/containers_data.mllib +++ b/src/data/containers_data.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 423faeb80b3829590072ca8f5414955c) +# DO NOT EDIT (digest: eb3c5babbb4a2d9bd921bfaf77125f8f) CCMultiMap CCMultiSet CCTrie @@ -15,4 +15,6 @@ CCRingBuffer CCIntMap CCPersistentArray CCMixset +CCHashconsedSet +CCGraph # OASIS_STOP diff --git a/src/io/containers_io.mldylib b/src/io/containers_io.mldylib index 54acdc47..98113180 100644 --- a/src/io/containers_io.mldylib +++ b/src/io/containers_io.mldylib @@ -1,4 +1,4 @@ # OASIS_START -# DO NOT EDIT (digest: 60d34ed5d3f17d5a8ac1501b3c6db7e7) -CCIO +# DO NOT EDIT (digest: 9573c9c3109b1d53a61739444853a7b2) +Containers_io_is_deprecated # OASIS_STOP diff --git a/src/io/containers_io.mllib b/src/io/containers_io.mllib index 54acdc47..98113180 100644 --- a/src/io/containers_io.mllib +++ b/src/io/containers_io.mllib @@ -1,4 +1,4 @@ # OASIS_START -# DO NOT EDIT (digest: 60d34ed5d3f17d5a8ac1501b3c6db7e7) -CCIO +# DO NOT EDIT (digest: 9573c9c3109b1d53a61739444853a7b2) +Containers_io_is_deprecated # OASIS_STOP diff --git a/src/io/containers_io_is_deprecated.ml b/src/io/containers_io_is_deprecated.ml new file mode 100644 index 00000000..4784fafb --- /dev/null +++ b/src/io/containers_io_is_deprecated.ml @@ -0,0 +1,7 @@ +(** {!CCIO} has moved into {!Containers}, the main library. + + The reason is that it has no additional dependency and is arguably a + useful completement to parts of {!Pervasives} (the channel management) + + As a result, linking "containers" rather than "containers.io" should be + enough if one needs {!CCIO}. *) diff --git a/src/lwt/lwt_automaton.ml b/src/lwt/lwt_automaton.ml index 2f8d98f1..017951d8 100644 --- a/src/lwt/lwt_automaton.ml +++ b/src/lwt/lwt_automaton.ml @@ -60,7 +60,7 @@ module Unix = struct Lwt.ignore_result (Lwt_unix.close fd); `Stopped, [`Closed] | `Active, `Write s -> - let fut = Lwt_unix.write fd s 0 (String.length s) in + let fut = Lwt_unix.write fd s 0 (Bytes.length s) in (* propagate error *) Lwt.on_failure fut (fun e -> Lwt.wakeup err_send e); st, [] @@ -68,15 +68,15 @@ module Unix = struct st, [`Read s] in let a = Automaton.Instance.create ~f:transition `Active in - let buf = String.make 128 ' ' in + let buf = Bytes.make 128 ' ' in (* read a string from buffer *) let rec _read () = if Automaton.Instance.state a = `Active - then Lwt_unix.read fd buf 0 (String.length buf) >>= fun n -> + then Lwt_unix.read fd buf 0 (Bytes.length buf) >>= fun n -> begin if n = 0 then Automaton.Instance.send a `Stop else - let s = String.sub buf 0 n in + let s = Bytes.sub_string buf 0 n in Automaton.Instance.send a (`JustRead s) end; _read () diff --git a/src/lwt/lwt_automaton.mli b/src/lwt/lwt_automaton.mli index daa03517..b3d4e585 100644 --- a/src/lwt/lwt_automaton.mli +++ b/src/lwt/lwt_automaton.mli @@ -50,7 +50,7 @@ val next_transition : module Unix : sig val read_write : Lwt_unix.file_descr -> ( [ `Active | `Stopped | `Error of exn ] - , [ `Stop | `Write of string | `JustRead of string | `Failwith of exn ] + , [ `Stop | `Write of Bytes.t | `JustRead of string | `Failwith of exn ] , [> `Read of string | `Closed | `Error of exn ] ) Automaton.Instance.t (** Read and write on the given filedescriptor *) diff --git a/src/sexp/CCSexpStream.ml b/src/sexp/CCSexpStream.ml index 38f25c15..ff7f76d0 100644 --- a/src/sexp/CCSexpStream.ml +++ b/src/sexp/CCSexpStream.ml @@ -184,7 +184,7 @@ module Source = struct ) let of_chan ?(bufsize=1024) ic = - let buf = String.make bufsize ' ' in + let buf = Bytes.make bufsize ' ' in let i = ref 0 in let n = ref 0 in let stop = ref false in @@ -196,7 +196,7 @@ module Source = struct n := input ic buf 0 bufsize; if !n = 0 then (stop := true; NC_end) else next() ) else ( (* yield *) - let c = String.get buf !i in + let c = Bytes.get buf !i in incr i; NC_yield c ) diff --git a/src/threads/CCFuture.ml b/src/threads/CCFuture.ml index 8860cc5b..19b62dc5 100644 --- a/src/threads/CCFuture.ml +++ b/src/threads/CCFuture.ml @@ -359,19 +359,7 @@ let choose futures = Run cell (** slurp the entire state of the file_descr into a string *) -let slurp i_chan = - let buf_size = 128 in - let state = Buffer.create 120 - and buf = String.make 128 'a' in - let rec next () = - let num = input i_chan buf 0 buf_size in - if num = 0 - then Buffer.contents state (* EOF *) - else ( - Buffer.add_substring state buf 0 num; - next () - ) - in next () +let slurp ic = CCIO.read_all_bytes ic let read_chan ic = make1 slurp ic @@ -451,7 +439,7 @@ module Timer = struct (** Wait for next event, run it, and loop *) let serve timer = - let buf = String.make 1 '_' in + let buf = Bytes.make 1 '_' in (* acquire lock, call [process_task] and do as it commands *) let rec next () = match with_lock_ timer process_task with | Loop -> next () @@ -492,6 +480,8 @@ module Timer = struct timer.thread <- Some t; timer + let underscore_ = Bytes.make 1 '_' + (** [timerule_at s t act] will run [act] at the Unix echo [t] *) let at timer time = let now = Unix.gettimeofday () in @@ -510,7 +500,7 @@ module Timer = struct timer.tasks <- TaskHeap.insert (time, cell) timer.tasks; (* see if the timer thread needs to be awaken earlier *) if time < next_time - then ignore (Unix.single_write timer.fifo_out "_" 0 1) + then ignore (Unix.single_write timer.fifo_out underscore_ 0 1) ); Run cell )