diff --git a/.merlin b/.merlin index d5a2d81a..67556471 100644 --- a/.merlin +++ b/.merlin @@ -9,7 +9,6 @@ S src/threads/ S src/misc S src/string S src/bigarray -S src/pervasives S benchs S examples S tests @@ -24,7 +23,6 @@ B _build/src/threads/ B _build/src/misc B _build/src/string B _build/src/bigarray -B _build/src/pervasives B _build/benchs B _build/examples B _build/tests diff --git a/.ocamlinit b/.ocamlinit index 2d7217dd..ec0513a8 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -1,11 +1,13 @@ #use "topfind";; #thread #require "bigarray";; +#require "unix";; #directory "_build/src/core";; #directory "_build/src/misc";; #directory "_build/src/pervasives/";; #directory "_build/src/string";; #directory "_build/src/io";; +#directory "_build/src/unix";; #directory "_build/src/iter";; #directory "_build/src/data";; #directory "_build/src/sexp";; @@ -16,6 +18,7 @@ #load "containers_iter.cma";; #load "containers_data.cma";; #load "containers_io.cma";; +#load "containers_unix.cma";; #load "containers_sexp.cma";; #load "containers_string.cma";; #load "containers_pervasives.cma";; diff --git a/AUTHORS.md b/AUTHORS.md index 29251ed6..4a690488 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -10,3 +10,4 @@ - Bernardo da Costa - Vincent Bernardoff (vbmithr) - Emmanuel Surleau (emm) +- Guillaume Bury (guigui) diff --git a/CHANGELOG.md b/CHANGELOG.md index c05947a4..d55ea6b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## 0.10 + +- add `containers_misc.Puf.iter` +- add `CCString.{lines,unlines,concat_gen}` +- `CCUnix` (with a small subprocess API) +- add `CCList.{sorted_merge_uniq, uniq_succ}` +- breaking: fix documentation of `CCList.sorted_merge` (different semantics) +- `CCPersistentArray` (credit to @gbury and Jean-Christophe Filliâtre) +- `CCIntMap` (big-endian patricia trees) in containers.data +- bugfix in `CCFQueue.add_seq_front` +- add `CCFQueue.{rev, --}` +- add `App_parse` in `containers_string`, experimental applicative parser combinators +- remove `containers.pervasives`, add the module `Containers` to core +- bugfix in `CCFormat.to_file` + ## 0.9 - add `Float`, `Ref`, `Set`, `Format` to `CCPervasives` @@ -178,7 +193,7 @@ - renamed threads/future to threads/CCFuture - big upgrade of `RAL` (random access lists) - `CCList.Ref` to help use references on lists -- `CCKList`: group,uniq,sort,sort_uniq,repeat and cycle, infix ops, applicative,product +- `CCKList`: `group,uniq,sort,sort_uniq,repeat` and `cycle`, infix ops, applicative,product - `CCTrie.above/below`: ranges of items - more functions in `CCPair` - `CCCat`: funny (though useless) definitions inspired from Haskell @@ -192,7 +207,7 @@ - conversions for `CCString` - `CCHashtbl`: open-addressing table (Robin-Hood hashing) - registered printers for `CCError`.guard,wrap1,etc. -- monadic operator in `CCList`: map_m_par +- monadic operator in `CCList`: `map_m_par` - simple interface to `PrintBox` now more powerful - constructors for 1 or 2 elements fqueues - bugfixes in BTree (insertion should work now) @@ -206,7 +221,7 @@ - `CCopt.pure` - updated `CCPersistentHashtbl` with new functions; updated doc, simplified code - move `CCString` into core/, since it deals with a basic type; also add some features to `CCString` (Sub and Split modules to deal with slices and splitting by a string) -- `CCArray.blit`, .Sub.to_slice; some bugfixes +- `CCArray.blit`, `.Sub.to_slice`; some bugfixes - applicative and lifting operators for `CCError` - `CCError.map2` - more combinators in `CCError` @@ -219,9 +234,9 @@ - `CCOpt.sequence_l` - mplus instance for `CCOpt` - monad instance for `CCFun` -- updated description in _oasis +- updated description in `_oasis` - `CCTrie`, a compressed functorial persistent trie structure - fix `CCPrint.unit`, add `CCPrint.silent` - fix type mismatch -note: git log --no-merges previous_version..HEAD --pretty=%s +note: `git log --no-merges previous_version..HEAD --pretty=%s` diff --git a/HOWTO.md b/HOWTO.md index 626395e5..34e0cda3 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -1,7 +1,7 @@ ## Make a release -1. `make test-all` +1. `make test` 2. update version in `_oasis` 3. `make update_next_tag` (to update `@since` comments; be careful not to change symlinks) 4. update `CHANGELOG.md` (see its end to find the right git command) diff --git a/Makefile b/Makefile index 6302348f..0f266171 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,8 @@ QTESTABLE=$(filter-out $(DONTTEST), \ $(wildcard src/string/*.mli) \ $(wildcard src/io/*.ml) \ $(wildcard src/io/*.mli) \ + $(wildcard src/unix/*.ml) \ + $(wildcard src/unix/*.mli) \ $(wildcard src/sexp/*.ml) \ $(wildcard src/sexp/*.mli) \ $(wildcard src/advanced/*.ml) \ diff --git a/README.md b/README.md index a0ac39b6..ef330830 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,10 @@ What is _containers_? are totally independent and are prefixed with `CC` (for "containers-core" or "companion-cube" because I'm megalomaniac). This part should be usable and should work. For instance, `CCList` contains functions and - lists including safe versions of `map` and `append`. + lists including safe versions of `map` and `append`. It also + provides a drop-in replacement to the standard library, in the module + `Containers` (intended to be opened, replaces some stdlib modules + with extended ones) - Several small additional libraries that complement it: * `containers.data` with additional data structures that don't have an equivalent in the standard library; @@ -21,9 +24,6 @@ What is _containers_? KMP search algorithm, and a few naive utils). Again, modules are independent and sometimes parametric on the string and char types (so they should be able to deal with your favorite unicode library). -- A drop-in replacement to the standard library, `containers.pervasives`, - that defined a `CCPervasives` module intented to be opened to extend some - modules of the stdlib. - A sub-library with complicated abstractions, `containers.advanced` (with a LINQ-like query module, batch operations using GADTs, and others). - A library using [Lwt](https://github.com/ocsigen/lwt/), `containers.lwt`. @@ -45,7 +45,7 @@ See [this file](https://github.com/c-cube/ocaml-containers/blob/master/CHANGELOG ## Finding help - the [github wiki](https://github.com/c-cube/ocaml-containers/wiki) -- the IRC channel (`##ocaml-containers` on Freenode) +- on IRC, ask `companion_cube` on `#ocaml` ## Use @@ -109,6 +109,10 @@ Documentation [here](http://cedeela.fr/~simon/software/containers). - `CCIO`, basic utilities for IO +### Containers.unix + +- `CCUnix`, utils for `Unix` + ### Containers.sexp A small S-expression library. diff --git a/_oasis b/_oasis index 9fc4ec59..c5834652 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.9 +Version: 0.10 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause @@ -25,6 +25,10 @@ Flag "misc" Description: Build the misc library, with experimental modules still susceptible to change Default: true +Flag "unix" + Description: Build the containers.unix library (depends on Unix) + Default: false + Flag "lwt" Description: Build modules which depend on Lwt Default: false @@ -49,7 +53,8 @@ Library "containers" Path: src/core Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCInt, CCBool, CCFloat, CCArray, CCRef, CCSet, - CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat + CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, + Containers BuildDepends: bytes Library "containers_io" @@ -59,6 +64,13 @@ Library "containers_io" FindlibParent: containers FindlibName: io +Library "containers_unix" + Path: src/unix + Modules: CCUnix + BuildDepends: bytes, unix + FindlibParent: containers + FindlibName: unix + Library "containers_sexp" Path: src/sexp Modules: CCSexp, CCSexpStream, CCSexpM @@ -70,7 +82,7 @@ Library "containers_data" Path: src/data Modules: CCMultiMap, CCMultiSet, CCTrie, CCFlatHashtbl, CCCache, CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, - CCMixmap, CCRingBuffer + CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray BuildDepends: bytes FindlibParent: containers FindlibName: data @@ -84,7 +96,8 @@ Library "containers_iter" Library "containers_string" Path: src/string Pack: true - Modules: KMP, Levenshtein + Modules: KMP, Levenshtein, App_parse + BuildDepends: bytes FindlibName: string FindlibParent: containers @@ -105,18 +118,12 @@ Library "containers_bigarray" FindlibParent: containers BuildDepends: containers, bigarray, bytes -Library "containers_pervasives" - Path: src/pervasives - Modules: CCPervasives - BuildDepends: containers - FindlibName: pervasives - FindlibParent: containers - Library "containers_misc" Path: src/misc Pack: true Modules: AbsSet, Automaton, Bij, CSM, Hashset, LazyGraph, PHashtbl, - PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ + PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ, Puf, + Backtrack BuildDepends: containers, containers.data FindlibName: misc FindlibParent: containers @@ -145,15 +152,15 @@ Document containers Title: Containers docs Type: ocamlbuild (0.3) BuildTools+: ocamldoc - Build$: flag(docs) && flag(advanced) && flag(bigarray) && flag(lwt) && flag(misc) + Build$: flag(docs) && flag(advanced) && flag(bigarray) && flag(lwt) && flag(misc) && flag(unix) Install: true XOCamlbuildPath: . XOCamlbuildExtraArgs: "-docflags '-colorize-code -short-functors -charset utf-8'" XOCamlbuildLibraries: containers, containers.misc, containers.iter, containers.data, - containers.string, containers.pervasives, containers.bigarray, - containers.advanced, containers.io, containers.sexp, + containers.string, containers.bigarray, + containers.advanced, containers.io, containers.unix, containers.sexp, containers.lwt Executable run_benchs @@ -166,12 +173,12 @@ Executable run_benchs containers.data, containers.string, containers.iter, sequence, gen, benchmark -Executable bench_hash +Executable run_bench_hash Path: benchs/ Install: false CompiledObject: best Build$: flag(bench) && flag(misc) - MainIs: bench_hash.ml + MainIs: run_bench_hash.ml BuildDepends: containers, containers.misc Executable run_test_future @@ -194,11 +201,11 @@ Executable run_qtest Install: false CompiledObject: best MainIs: run_qtest.ml - Build$: flag(tests) && flag(bigarray) + Build$: flag(tests) && flag(misc) && flag(bigarray) && flag(unix) && flag(advanced) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, - containers.bigarray, - sequence, gen, oUnit, QTest2Lib + containers.bigarray, containers.unix, + sequence, gen, unix, oUnit, QTest2Lib Executable run_qtest_lwt Path: qtest/lwt/ @@ -222,7 +229,7 @@ Executable run_tests Test all Command: make test-all TestTools: run_tests, run_qtest - Run$: flag(tests) && flag(misc) + Run$: flag(tests) && flag(misc) && flag(unix) && flag(advanced) && flag(bigarray) Test lwt Command: echo "test lwt"; ./run_qtest_lwt.native diff --git a/_tags b/_tags index 99009386..a20a5b46 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 4bc9d475d595a814a666d126274b25b1) +# DO NOT EDIT (digest: 2d4ff427096956a049556073cd9b4191) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -20,6 +20,10 @@ true: annot, bin_annot # Library containers_io "src/io/containers_io.cmxs": use_containers_io : package(bytes) +# Library containers_unix +"src/unix/containers_unix.cmxs": use_containers_unix +: package(bytes) +: package(unix) # Library containers_sexp "src/sexp/containers_sexp.cmxs": use_containers_sexp : package(bytes) @@ -32,6 +36,8 @@ true: annot, bin_annot "src/string/containers_string.cmxs": use_containers_string "src/string/KMP.cmx": for-pack(Containers_string) "src/string/levenshtein.cmx": for-pack(Containers_string) +"src/string/app_parse.cmx": for-pack(Containers_string) +: package(bytes) # Library containers_advanced "src/advanced/containers_advanced.cmxs": use_containers_advanced "src/advanced/CCLinq.cmx": for-pack(Containers_advanced) @@ -46,10 +52,6 @@ true: annot, bin_annot : package(bigarray) : package(bytes) : use_containers -# Library containers_pervasives -"src/pervasives/containers_pervasives.cmxs": use_containers_pervasives -: package(bytes) -: use_containers # Library containers_misc "src/misc/containers_misc.cmxs": use_containers_misc "src/misc/absSet.cmx": for-pack(Containers_misc) @@ -65,6 +67,8 @@ true: annot, bin_annot "src/misc/smallSet.cmx": for-pack(Containers_misc) "src/misc/unionFind.cmx": for-pack(Containers_misc) "src/misc/univ.cmx": for-pack(Containers_misc) +"src/misc/puf.cmx": for-pack(Containers_misc) +"src/misc/backtrack.cmx": for-pack(Containers_misc) : package(bytes) : use_containers : use_containers_data @@ -101,11 +105,11 @@ true: annot, bin_annot : use_containers_advanced : use_containers_iter : use_containers_string -# Executable bench_hash -: package(bytes) -: use_containers -: use_containers_data -: use_containers_misc +# Executable run_bench_hash +: package(bytes) +: use_containers +: use_containers_data +: use_containers_misc : package(bytes) : use_containers : use_containers_data @@ -130,6 +134,7 @@ true: annot, bin_annot : package(gen) : package(oUnit) : package(sequence) +: package(unix) : use_containers : use_containers_advanced : use_containers_bigarray @@ -139,12 +144,14 @@ true: annot, bin_annot : use_containers_misc : use_containers_sexp : use_containers_string +: use_containers_unix : package(QTest2Lib) : package(bigarray) : package(bytes) : package(gen) : package(oUnit) : package(sequence) +: package(unix) : use_containers : use_containers_advanced : use_containers_bigarray @@ -154,6 +161,7 @@ true: annot, bin_annot : use_containers_misc : use_containers_sexp : use_containers_string +: use_containers_unix # Executable run_qtest_lwt : package(QTest2Lib) : package(bytes) diff --git a/benchs/bench_hash.ml b/benchs/run_bench_hash.ml similarity index 100% rename from benchs/bench_hash.ml rename to benchs/run_bench_hash.ml diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 3c2e5e22..c65165bf 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -4,6 +4,7 @@ module B = Benchmark let (@>) = B.Tree.(@>) let (@>>) = B.Tree.(@>>) let (@>>>) = B.Tree.(@>>>) +let (|>) = CCFun.(|>) let app_int f n = string_of_int n @> lazy (f n) let app_ints f l = B.Tree.concat (List.map (app_int f) l) @@ -234,6 +235,13 @@ module Tbl = struct done; !h + let intmap_add n = + let h = ref CCIntMap.empty in + for i = n downto 0 do + h := CCIntMap.add i i !h; + done; + !h + let icchashtbl_add n = let h = ICCHashtbl.create 50 in for i = n downto 0 do @@ -248,6 +256,7 @@ module Tbl = struct "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)), n; "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)), n; "imap_add", (fun n -> ignore (imap_add n)), n; + "intmap_add", (fun n -> ignore (intmap_add n)), n; "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)), n; ] @@ -301,6 +310,16 @@ module Tbl = struct done; !h + let intmap_replace n = + let h = ref CCIntMap.empty in + for i = 0 to n do + h := CCIntMap.add i i !h; + done; + for i = n downto 0 do + h := CCIntMap.add i i !h; + done; + !h + let icchashtbl_replace n = let h = ICCHashtbl.create 50 in for i = 0 to n do @@ -318,11 +337,10 @@ module Tbl = struct "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)), n; "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)), n; "imap_replace", (fun n -> ignore (imap_replace n)), n; + "intmap_replace", (fun n -> ignore (intmap_replace n)), n; "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)), n; ] - let my_len = 250 - let phashtbl_find h = fun n -> for i = 0 to n-1 do @@ -353,12 +371,24 @@ module Tbl = struct ignore (Array.get a i); done + let persistent_array_find a = + fun n -> + for i = 0 to n-1 do + ignore (CCPersistentArray.get a i); + done + let imap_find m = fun n -> for i = 0 to n-1 do ignore (IMap.find i m); done + let intmap_find m = + fun n -> + for i = 0 to n-1 do + ignore (CCIntMap.find i m); + done + let icchashtbl_find m = fun n -> for i = 0 to n-1 do @@ -370,8 +400,10 @@ module Tbl = struct let h' = hashtbl_add n in let h'' = ihashtbl_add n in let h''''' = ipersistenthashtbl_add n in - let a = Array.init n (fun i -> string_of_int i) in + let a = Array.init n string_of_int in + let pa = CCPersistentArray.init n string_of_int in let m = imap_add n in + let m' = intmap_add n in let h'''''' = icchashtbl_add n in B.throughputN 3 [ "phashtbl_find", (fun () -> phashtbl_find h n), (); @@ -379,7 +411,9 @@ module Tbl = struct "ihashtbl_find", (fun () -> ihashtbl_find h'' n), (); "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' n), (); "array_find", (fun () -> array_find a n), (); + "persistent_array_find", (fun () -> persistent_array_find pa n), (); "imap_find", (fun () -> imap_find m n), (); + "intmap_find", (fun () -> intmap_find m' n), (); "cchashtbl_find", (fun () -> icchashtbl_find h'''''' n), (); ] diff --git a/containers.odocl b/containers.odocl index a7d4b7c3..a45bc47d 100644 --- a/containers.odocl +++ b/containers.odocl @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 98c09c3ae4c860914660bcfa48ec375f) +# DO NOT EDIT (digest: 463813d3e54d45bc5b6a9d7d4eb17cd0) src/core/CCVector src/core/CCPrint src/core/CCError @@ -21,6 +21,7 @@ src/core/CCString src/core/CCHashtbl src/core/CCMap src/core/CCFormat +src/core/Containers src/misc/AbsSet src/misc/Automaton src/misc/Bij @@ -34,6 +35,8 @@ src/misc/RoseTree src/misc/SmallSet src/misc/UnionFind src/misc/Univ +src/misc/Puf +src/misc/Backtrack src/iter/CCKTree src/iter/CCKList src/data/CCMultiMap @@ -48,15 +51,18 @@ src/data/CCBV src/data/CCMixtbl src/data/CCMixmap src/data/CCRingBuffer +src/data/CCIntMap +src/data/CCPersistentArray src/string/KMP src/string/Levenshtein -src/pervasives/CCPervasives +src/string/App_parse src/bigarray/CCBigstring src/advanced/CCLinq src/advanced/CCBatch src/advanced/CCCat src/advanced/CCMonadIO src/io/CCIO +src/unix/CCUnix src/sexp/CCSexp src/sexp/CCSexpStream src/sexp/CCSexpM diff --git a/doc/intro.txt b/doc/intro.txt index b0bbb36a..42f0f3c6 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -44,11 +44,11 @@ CCRef CCSet CCString CCVector +Containers } -{4 Pervasives (aliases to Core Modules)} - -Contains aliases to most modules from {i containers core}, and mixins +The module {!Containers} contains aliases to most other modules defined +in {i containers core}, and mixins such as: {[ module List = struct @@ -57,8 +57,6 @@ such as: end ]} -{!modules: CCPervasives} - {4 Containers.data} Various data structures. @@ -68,10 +66,12 @@ CCBV CCCache CCFQueue CCFlatHashtbl +CCIntMap CCMixmap CCMixtbl CCMultiMap CCMultiSet +CCPersistentArray CCPersistentHashtbl CCRingBuffer CCTrie @@ -83,6 +83,12 @@ Helpers to perform simple IO (mostly on files) and iterate on channels. {!modules: CCIO} +{4 Containers.unix} + +Helpers that depend on {!Unix}, e.g. to spawn sub-processes. + +{!modules: CCUnix} + {4 Containers.sexp} A small S-expression library. The interface is relatively unstable, but diff --git a/myocamlbuild.ml b/myocamlbuild.ml index feb6658c..40b0da2b 100644 --- a/myocamlbuild.ml +++ b/myocamlbuild.ml @@ -1,5 +1,5 @@ (* OASIS_START *) -(* DO NOT EDIT (digest: fb8dea068c03b0d63bc05634c5db1689) *) +(* DO NOT EDIT (digest: c0298c035a279ad3c641dc2bb1ecc03b) *) module OASISGettext = struct (* # 22 "src/oasis/OASISGettext.ml" *) @@ -611,13 +611,13 @@ let package_default = [ ("containers", ["src/core"], []); ("containers_io", ["src/io"], []); + ("containers_unix", ["src/unix"], []); ("containers_sexp", ["src/sexp"], []); ("containers_data", ["src/data"], []); ("containers_iter", ["src/iter"], []); ("containers_string", ["src/string"], []); ("containers_advanced", ["src/advanced"], []); ("containers_bigarray", ["src/bigarray"], []); - ("containers_pervasives", ["src/pervasives"], []); ("containers_misc", ["src/misc"], []); ("containers_thread", ["src/threads"], []); ("containers_lwt", ["src/lwt"], []) @@ -629,7 +629,6 @@ let package_default = ("tests/threads", ["src/core"; "src/threads"]); ("tests", ["src/core"; "src/data"; "src/misc"; "src/string"]); ("src/threads", ["src/core"]); - ("src/pervasives", ["src/core"]); ("src/misc", ["src/core"; "src/data"]); ("src/lwt", ["src/core"; "src/misc"]); ("src/bigarray", ["src/core"]); @@ -644,7 +643,8 @@ let package_default = "src/iter"; "src/misc"; "src/sexp"; - "src/string" + "src/string"; + "src/unix" ]); ("examples", ["src/core"; "src/misc"; "src/sexp"]); ("benchs", diff --git a/opam b/opam index 10719d1a..4c53640f 100644 --- a/opam +++ b/opam @@ -12,6 +12,7 @@ build: [ "--%{lwt:enable}%-lwt" "--%{base-bigarray:enable}%-bigarray" "--%{sequence:enable}%-advanced" + "--%{base-unix:enable}%-unix" "--enable-docs" "--enable-misc"] [make "build"] diff --git a/setup.ml b/setup.ml index a0def9ae..d83438c0 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: d2414bb4ed47c14d1e696e080da28357) *) +(* DO NOT EDIT (digest: bc1fcdeddb836af6942617417a65ae05) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -6965,7 +6965,7 @@ let setup_t = alpha_features = ["ocamlbuild_more_args"]; beta_features = []; name = "containers"; - version = "0.9"; + version = "0.10"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -7041,6 +7041,18 @@ let setup_t = "Build the misc library, with experimental modules still susceptible to change"; flag_default = [(OASISExpr.EBool true, true)] }); + Flag + ({ + cs_name = "unix"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + flag_description = + Some + "Build the containers.unix library (depends on Unix)"; + flag_default = [(OASISExpr.EBool true, false)] + }); Flag ({ cs_name = "lwt"; @@ -7141,7 +7153,8 @@ let setup_t = "CCString"; "CCHashtbl"; "CCMap"; - "CCFormat" + "CCFormat"; + "Containers" ]; lib_pack = false; lib_internal_modules = []; @@ -7179,6 +7192,40 @@ let setup_t = lib_findlib_name = Some "io"; lib_findlib_containers = [] }); + Library + ({ + cs_name = "containers_unix"; + cs_data = PropList.Data.create (); + cs_plugin_data = [] + }, + { + bs_build = [(OASISExpr.EBool true, true)]; + bs_install = [(OASISExpr.EBool true, true)]; + bs_path = "src/unix"; + bs_compiled_object = Best; + bs_build_depends = + [ + FindlibPackage ("bytes", None); + FindlibPackage ("unix", None) + ]; + bs_build_tools = [ExternalTool "ocamlbuild"]; + bs_c_sources = []; + bs_data_files = []; + bs_ccopt = [(OASISExpr.EBool true, [])]; + bs_cclib = [(OASISExpr.EBool true, [])]; + bs_dlllib = [(OASISExpr.EBool true, [])]; + bs_dllpath = [(OASISExpr.EBool true, [])]; + bs_byteopt = [(OASISExpr.EBool true, [])]; + bs_nativeopt = [(OASISExpr.EBool true, [])] + }, + { + lib_modules = ["CCUnix"]; + lib_pack = false; + lib_internal_modules = []; + lib_findlib_parent = Some "containers"; + lib_findlib_name = Some "unix"; + lib_findlib_containers = [] + }); Library ({ cs_name = "containers_sexp"; @@ -7245,7 +7292,9 @@ let setup_t = "CCBV"; "CCMixtbl"; "CCMixmap"; - "CCRingBuffer" + "CCRingBuffer"; + "CCIntMap"; + "CCPersistentArray" ]; lib_pack = false; lib_internal_modules = []; @@ -7294,7 +7343,7 @@ let setup_t = bs_install = [(OASISExpr.EBool true, true)]; bs_path = "src/string"; bs_compiled_object = Best; - bs_build_depends = []; + bs_build_depends = [FindlibPackage ("bytes", None)]; bs_build_tools = [ExternalTool "ocamlbuild"]; bs_c_sources = []; bs_data_files = []; @@ -7306,7 +7355,7 @@ let setup_t = bs_nativeopt = [(OASISExpr.EBool true, [])] }, { - lib_modules = ["KMP"; "Levenshtein"]; + lib_modules = ["KMP"; "Levenshtein"; "App_parse"]; lib_pack = true; lib_internal_modules = []; lib_findlib_parent = Some "containers"; @@ -7391,36 +7440,6 @@ let setup_t = lib_findlib_name = Some "bigarray"; lib_findlib_containers = [] }); - Library - ({ - cs_name = "containers_pervasives"; - cs_data = PropList.Data.create (); - cs_plugin_data = [] - }, - { - bs_build = [(OASISExpr.EBool true, true)]; - bs_install = [(OASISExpr.EBool true, true)]; - bs_path = "src/pervasives"; - bs_compiled_object = Best; - bs_build_depends = [InternalLibrary "containers"]; - bs_build_tools = [ExternalTool "ocamlbuild"]; - bs_c_sources = []; - bs_data_files = []; - bs_ccopt = [(OASISExpr.EBool true, [])]; - bs_cclib = [(OASISExpr.EBool true, [])]; - bs_dlllib = [(OASISExpr.EBool true, [])]; - bs_dllpath = [(OASISExpr.EBool true, [])]; - bs_byteopt = [(OASISExpr.EBool true, [])]; - bs_nativeopt = [(OASISExpr.EBool true, [])] - }, - { - lib_modules = ["CCPervasives"]; - lib_pack = false; - lib_internal_modules = []; - lib_findlib_parent = Some "containers"; - lib_findlib_name = Some "pervasives"; - lib_findlib_containers = [] - }); Library ({ cs_name = "containers_misc"; @@ -7462,7 +7481,9 @@ let setup_t = "RoseTree"; "SmallSet"; "UnionFind"; - "Univ" + "Univ"; + "Puf"; + "Backtrack" ]; lib_pack = true; lib_internal_modules = []; @@ -7592,7 +7613,9 @@ let setup_t = (OASISExpr.EFlag "bigarray", OASISExpr.EAnd (OASISExpr.EFlag "lwt", - OASISExpr.EFlag "misc"))))), + OASISExpr.EAnd + (OASISExpr.EFlag "misc", + OASISExpr.EFlag "unix")))))), true) ]; doc_install = [(OASISExpr.EBool true, true)]; @@ -7648,7 +7671,7 @@ let setup_t = {exec_custom = false; exec_main_is = "run_benchs.ml"}); Executable ({ - cs_name = "bench_hash"; + cs_name = "run_bench_hash"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, @@ -7679,7 +7702,7 @@ let setup_t = bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, - {exec_custom = false; exec_main_is = "bench_hash.ml"}); + {exec_custom = false; exec_main_is = "run_bench_hash.ml"}); Executable ({ cs_name = "run_test_future"; @@ -7772,7 +7795,13 @@ let setup_t = (OASISExpr.EBool true, false); (OASISExpr.EAnd (OASISExpr.EFlag "tests", - OASISExpr.EFlag "bigarray"), + OASISExpr.EAnd + (OASISExpr.EFlag "misc", + OASISExpr.EAnd + (OASISExpr.EFlag "bigarray", + OASISExpr.EAnd + (OASISExpr.EFlag "unix", + OASISExpr.EFlag "advanced")))), true) ]; bs_install = [(OASISExpr.EBool true, false)]; @@ -7788,8 +7817,10 @@ let setup_t = InternalLibrary "containers_advanced"; InternalLibrary "containers_sexp"; InternalLibrary "containers_bigarray"; + InternalLibrary "containers_unix"; FindlibPackage ("sequence", None); FindlibPackage ("gen", None); + FindlibPackage ("unix", None); FindlibPackage ("oUnit", None); FindlibPackage ("QTest2Lib", None) ]; @@ -7908,7 +7939,13 @@ let setup_t = (OASISExpr.EFlag "tests", OASISExpr.EAnd (OASISExpr.EFlag "tests", - OASISExpr.EFlag "misc")), + OASISExpr.EAnd + (OASISExpr.EFlag "misc", + OASISExpr.EAnd + (OASISExpr.EFlag "unix", + OASISExpr.EAnd + (OASISExpr.EFlag "advanced", + OASISExpr.EFlag "bigarray"))))), true) ]; test_tools = @@ -8063,7 +8100,8 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = Some "\180\018\197c\134\002\173(\245'\138\144\0262\197z"; + oasis_digest = + Some "Q\133\224\006'\239^\194\020\007 \247\168\220\142\188"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -8071,6 +8109,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 8075 "setup.ml" +# 8113 "setup.ml" (* OASIS_STOP *) let () = setup ();; diff --git a/src/core/CCError.mli b/src/core/CCError.mli index 7ab6bef2..57dc714a 100644 --- a/src/core/CCError.mli +++ b/src/core/CCError.mli @@ -113,8 +113,12 @@ val wrap3 : ('a -> 'b -> 'c -> 'd) -> 'a -> 'b -> 'c -> ('d, exn) t (** {2 Applicative} *) val pure : 'a -> ('a, 'err) t +(** Synonym of {!return} *) val (<*>) : ('a -> 'b, 'err) t -> ('a, 'err) t -> ('b, 'err) t +(** [a <*> b] evaluates [a] and [b], and, in case of success, returns + [`Ok (a b)]. Otherwise, it fails, and the error of [a] is chosen + over the error of [b] if both fail *) (** {2 Collections} *) diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 48846818..ea84429e 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -231,6 +231,12 @@ let sorted_merge ?(cmp=Pervasives.compare) l1 l2 = (*$T List.sort Pervasives.compare ([(( * )2); ((+)1)] <*> [10;100]) \ = [11; 20; 101; 200] + sorted_merge [1;1;2] [1;2;3] = [1;1;1;2;2;3] +*) + +(*$Q + Q.(pair (list int) (list int)) (fun (l1,l2) -> \ + List.length (sorted_merge l1 l2) = List.length l1 + List.length l2) *) let sort_uniq (type elt) ?(cmp=Pervasives.compare) l = @@ -247,6 +253,56 @@ let sort_uniq (type elt) ?(cmp=Pervasives.compare) l = sort_uniq [10;10;10;10;1;10] = [1;10] *) +let uniq_succ ?(eq=(=)) l = + let rec f acc l = match l with + | [] -> List.rev acc + | [x] -> List.rev (x::acc) + | x :: ((y :: _) as tail) when eq x y -> f acc tail + | x :: tail -> f (x::acc) tail + in + f [] l + +(*$T + uniq_succ [1;1;2;3;1;6;6;4;6;1] = [1;2;3;1;6;4;6;1] +*) + +let sorted_merge_uniq ?(cmp=Pervasives.compare) l1 l2 = + let push ~cmp acc x = match acc with + | [] -> [x] + | y :: _ when cmp x y > 0 -> x :: acc + | _ -> acc (* duplicate, do not yield *) + in + let rec recurse ~cmp acc l1 l2 = match l1,l2 with + | [], l + | l, [] -> + let acc = List.fold_left (push ~cmp) acc l in + List.rev acc + | x1::l1', x2::l2' -> + let c = cmp x1 x2 in + if c < 0 then recurse ~cmp (push ~cmp acc x1) l1' l2 + else if c > 0 then recurse ~cmp (push ~cmp acc x2) l1 l2' + else recurse ~cmp acc l1 l2' (* drop one of the [x] *) + in + recurse ~cmp [] l1 l2 + +(*$T + sorted_merge_uniq [1; 1; 2; 3; 5; 8] [1; 2; 3; 4; 6; 8; 9; 9] = [1;2;3;4;5;6;8;9] +*) + +(*$Q + Q.(list int) (fun l -> \ + let l = List.sort Pervasives.compare l in \ + sorted_merge_uniq l [] = uniq_succ l) + Q.(list int) (fun l -> \ + let l = List.sort Pervasives.compare l in \ + sorted_merge_uniq [] l = uniq_succ l) + Q.(pair (list int) (list int)) (fun (l1, l2) -> \ + let l1 = List.sort Pervasives.compare l1 \ + and l2 = List.sort Pervasives.compare l2 in \ + let l3 = sorted_merge_uniq l1 l2 in \ + uniq_succ l3 = l3) +*) + let take n l = let rec direct i n l = match l with | [] -> [] diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 57a2944d..6021cf9d 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -118,11 +118,23 @@ val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map and remove elements at the same time *) val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list -(** merges elements from both sorted list, removing duplicates *) +(** merges elements from both sorted list *) val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list (** Sort the list and remove duplicate elements *) +val sorted_merge_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list +(** [sorted_merge_uniq l1 l2] merges the sorted lists [l1] and [l2] and + removes duplicates + @since 0.10 *) + +val uniq_succ : ?eq:('a -> 'a -> bool) -> 'a list -> 'a list +(** [uniq_succ l] removes duplicate elements that occur one next to the other. + Examples: + [uniq_succ [1;2;1] = [1;2;1]] + [uniq_succ [1;1;2] = [1;2]] + @since 0.10 *) + (** {2 Indices} *) module Idx : sig diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 097cd078..36ed8936 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -263,6 +263,24 @@ let of_array a = let to_array s = Array.init (String.length s) (fun i -> s.[i]) +let lines_gen s = Split.gen_cpy ~by:"\n" s + +let lines s = Split.list_cpy ~by:"\n" s + +let concat_gen ~sep g = + let b = Buffer.create 256 in + let rec aux ~first () = match g () with + | None -> Buffer.contents b + | Some s -> + if not first then Buffer.add_string b sep; + Buffer.add_string b s; + aux ~first:false () + in aux ~first:true () + +let unlines l = String.concat "\n" l + +let unlines_gen g = concat_gen ~sep:"\n" g + let pp buf s = Buffer.add_char buf '"'; Buffer.add_string buf s; diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 807bb938..fcfc32db 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -113,6 +113,30 @@ val suffix : suf:string -> string -> bool not (suffix ~suf:"abcd" "cd") *) +val lines : string -> string list +(** [lines s] returns a list of the lines of [s] (splits along '\n') + @since 0.10 *) + +val lines_gen : string -> string gen +(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') + @since 0.10 *) + +val concat_gen : sep:string -> string gen -> string +(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. + @since 0.10 *) + +val unlines : string list -> string +(** [unlines l] concatenates all strings of [l], separated with '\n' + @since 0.10 *) + +val unlines_gen : string gen -> string +(** [unlines_gen g] concatenates all strings of [g], separated with '\n' + @since 0.10 *) + +(*$Q + Q.printable_string (fun s -> unlines (lines s) = s) +*) + include S with type t := string (** {2 Splitting} *) diff --git a/src/core/META b/src/core/META index b7423bf6..800bab57 100644 --- a/src/core/META +++ b/src/core/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: 71114627b2165c5eaff8d7c614d71974) -version = "0.9" +# DO NOT EDIT (digest: 09a66d8274446aebd1544537d064203d) +version = "0.10" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers.cma" @@ -8,8 +8,19 @@ archive(byte, plugin) = "containers.cma" archive(native) = "containers.cmxa" archive(native, plugin) = "containers.cmxs" exists_if = "containers.cma" +package "unix" ( + version = "0.10" + description = "A modular standard library focused on data structures." + requires = "bytes unix" + archive(byte) = "containers_unix.cma" + archive(byte, plugin) = "containers_unix.cma" + archive(native) = "containers_unix.cmxa" + archive(native, plugin) = "containers_unix.cmxs" + exists_if = "containers_unix.cma" +) + package "thread" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "containers threads" archive(byte) = "containers_thread.cma" @@ -20,8 +31,9 @@ package "thread" ( ) package "string" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." + requires = "bytes" archive(byte) = "containers_string.cma" archive(byte, plugin) = "containers_string.cma" archive(native) = "containers_string.cmxa" @@ -30,7 +42,7 @@ package "string" ( ) package "sexp" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_sexp.cma" @@ -40,19 +52,8 @@ package "sexp" ( exists_if = "containers_sexp.cma" ) -package "pervasives" ( - version = "0.9" - description = "A modular standard library focused on data structures." - requires = "containers" - archive(byte) = "containers_pervasives.cma" - archive(byte, plugin) = "containers_pervasives.cma" - archive(native) = "containers_pervasives.cmxa" - archive(native, plugin) = "containers_pervasives.cmxs" - exists_if = "containers_pervasives.cma" -) - package "misc" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "containers containers.data" archive(byte) = "containers_misc.cma" @@ -63,7 +64,7 @@ package "misc" ( ) package "lwt" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "containers lwt containers.misc" archive(byte) = "containers_lwt.cma" @@ -74,7 +75,7 @@ package "lwt" ( ) package "iter" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." archive(byte) = "containers_iter.cma" archive(byte, plugin) = "containers_iter.cma" @@ -84,7 +85,7 @@ package "iter" ( ) package "io" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_io.cma" @@ -95,7 +96,7 @@ package "io" ( ) package "data" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_data.cma" @@ -106,7 +107,7 @@ package "data" ( ) package "bigarray" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "containers bigarray bytes" archive(byte) = "containers_bigarray.cma" @@ -117,7 +118,7 @@ package "bigarray" ( ) package "advanced" ( - version = "0.9" + version = "0.10" description = "A modular standard library focused on data structures." requires = "containers sequence" archive(byte) = "containers_advanced.cma" diff --git a/src/pervasives/CCPervasives.ml b/src/core/containers.ml similarity index 95% rename from src/pervasives/CCPervasives.ml rename to src/core/containers.ml index 96410c18..d1c862c3 100644 --- a/src/pervasives/CCPervasives.ml +++ b/src/core/containers.ml @@ -40,6 +40,11 @@ Changed [Opt] to [Option] to better reflect that this module is about the ['a option] type, with [module Option = CCOpt]. @since 0.5 + +Renamed from [CCPervasives] in [containers.pervasives], to [Containers] +in the core library [containers] + +@since 0.10 *) module Array = struct diff --git a/src/core/containers.mldylib b/src/core/containers.mldylib index 03fbbecd..3ac48971 100644 --- a/src/core/containers.mldylib +++ b/src/core/containers.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 5c58c781604360016ba544a7c9d0c597) +# DO NOT EDIT (digest: b1fae2373cf2a628a9465ba233f7c127) CCVector CCPrint CCError @@ -21,4 +21,5 @@ CCString CCHashtbl CCMap CCFormat +Containers # OASIS_STOP diff --git a/src/core/containers.mllib b/src/core/containers.mllib index 03fbbecd..3ac48971 100644 --- a/src/core/containers.mllib +++ b/src/core/containers.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 5c58c781604360016ba544a7c9d0c597) +# DO NOT EDIT (digest: b1fae2373cf2a628a9465ba233f7c127) CCVector CCPrint CCError @@ -21,4 +21,5 @@ CCString CCHashtbl CCMap CCFormat +Containers # OASIS_STOP diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 638a1617..0f828d8c 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -74,6 +74,11 @@ let rec cons : 'a. 'a -> 'a t -> 'a t | Deep (n,Three (y,z,z'), lazy q', tail) -> _deep (n+1) (Two (x,y)) (lazy (cons (z,z') q')) tail +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + cons x (of_list l) |> to_list = x::l) + *) + let rec snoc : 'a. 'a t -> 'a -> 'a t = fun q x -> match q with | Shallow Zero -> _single x @@ -87,6 +92,11 @@ let rec snoc : 'a. 'a t -> 'a -> 'a t | Deep (n,hd, lazy q', Three (y,z,z')) -> _deep (n+1) hd (lazy (snoc q' (y,z))) (Two(z',x)) +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + snoc (of_list l) x |> to_list = l @ [x]) + *) + let rec take_front_exn : 'a. 'a t -> ('a *'a t) = fun q -> match q with | Shallow Zero -> raise Empty @@ -105,6 +115,12 @@ let rec take_front_exn : 'a. 'a t -> ('a *'a t) | Deep (n,Three (x,y,z), middle, tail) -> x, _deep (n-1) (Two(y,z)) middle tail +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + let x', q = cons x (of_list l) |> take_front_exn in \ + x'=x && to_list q = l) + *) + let take_front q = try Some (take_front_exn q) with Empty -> None @@ -117,6 +133,11 @@ let take_front_l n q = aux (x::acc) q' (n-1) in aux [] q n +(*$T + let l, q = take_front_l 5 (1 -- 10) in \ + l = [1;2;3;4;5] && to_list q = [6;7;8;9;10] +*) + let take_front_while p q = let rec aux acc q = if is_empty q then List.rev acc, q @@ -125,6 +146,10 @@ let take_front_while p q = if p x then aux (x::acc) q' else List.rev acc, q in aux [] q +(*$T + take_front_while (fun x-> x<5) (1 -- 10) |> fst = [1;2;3;4] +*) + let rec take_back_exn : 'a. 'a t -> 'a t * 'a = fun q -> match q with | Shallow Zero -> invalid_arg "FQueue.take_back_exn" @@ -141,6 +166,12 @@ let rec take_back_exn : 'a. 'a t -> 'a t * 'a | Deep (n, hd, middle, Two(x,y)) -> _deep (n-1) hd middle (One x), y | Deep (n, hd, middle, Three(x,y,z)) -> _deep (n-1) hd middle (Two (x,y)), z +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + let q,x' = snoc (of_list l) x |> take_back_exn in \ + x'=x && to_list q = l) + *) + let take_back q = try Some (take_back_exn q) with Empty -> None @@ -186,6 +217,11 @@ let size : 'a. 'a t -> int | Shallow d -> _size_digit d | Deep (n, _, _, _) -> n +(*$Q + (Q.list Q.int) (fun l -> \ + size (of_list l) = List.length l) +*) + let _nth_digit i d = match i, d with | _, Zero -> raise Not_found | 0, One x -> x @@ -228,18 +264,41 @@ let nth i q = try Some (nth_exn i q) with Failure _ -> None +(*$Q + (Q.list Q.int) (fun l -> \ + let len = List.length l in let idx = CCList.(0 -- (len - 1)) in \ + let q = of_list l in \ + l = [] || List.for_all (fun i -> nth i q = Some (List.nth l i)) idx) +*) + let init q = try fst (take_back_exn q) with Empty -> q +(*$Q + (Q.list Q.int) (fun l -> \ + l = [] || (of_list l |> init |> to_list = List.rev (List.tl (List.rev l)))) +*) + let tail q = try snd (take_front_exn q) with Empty -> q +(*$Q + (Q.list Q.int) (fun l -> \ + l = [] || (of_list l |> tail |> to_list = List.tl l)) +*) + let add_seq_front seq q = - let q = ref q in - seq (fun x -> q := cons x !q); - !q + let l = ref [] in + (* reversed seq *) + seq (fun x -> l := x :: !l); + List.fold_left (fun q x -> cons x q) q !l + +(*$Q + Q.(pair (list int) (list int)) (fun (l1, l2) -> \ + add_seq_front (Sequence.of_list l1) (of_list l2) |> to_list = l1 @ l2) + *) let add_seq_back q seq = let q = ref q in @@ -260,12 +319,22 @@ let rec to_seq : 'a. 'a t -> 'a sequence to_seq q' (fun (x,y) -> k x; k y); _digit_to_seq tail k +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> to_seq |> Sequence.to_list = l) +*) + let append q1 q2 = match q1, q2 with | Shallow Zero, _ -> q2 | _, Shallow Zero -> q1 | _ -> add_seq_back q1 (to_seq q2) +(*$Q + (Q.pair (Q.list Q.int)(Q.list Q.int)) (fun (l1,l2) -> \ + append (of_list l1) (of_list l2) |> to_list = l1 @ l2) +*) + let _map_digit f d = match d with | Zero -> Zero | One x -> One (f x) @@ -279,6 +348,11 @@ let rec map : 'a 'b. ('a -> 'b) -> 'a t -> 'b t let q'' = map (fun (x,y) -> f x, f y) q' in _deep size (_map_digit f hd) (Lazy.from_val q'') (_map_digit f tl) +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> map string_of_int |> to_list = List.map string_of_int l) +*) + let (>|=) q f = map f q let _fold_digit f acc d = match d with @@ -295,6 +369,11 @@ let rec fold : 'a 'b. ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b let acc = fold (fun acc (x,y) -> f (f acc x) y) acc q' in _fold_digit f acc tl +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> fold (fun acc x->x::acc) [] = List.rev l) +*) + let iter f q = to_seq q f let of_list l = List.fold_left snoc empty l @@ -304,16 +383,23 @@ let to_list q = to_seq q (fun x -> l := x :: !l); List.rev !l -let of_seq seq = - let l = ref [] in - seq (fun x -> l := x :: !l); - List.fold_left (fun q x -> cons x q) empty !l +let of_seq seq = add_seq_front seq empty (*$Q (Q.list Q.int) (fun l -> \ Sequence.of_list l |> of_seq |> to_list = l) *) +let rev q = + let q' = ref empty in + iter (fun x -> q' := cons x !q') q; + !q' + +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> rev |> to_list = List.rev l) +*) + let _nil () = `Nil let _single x cont () = `Cons (x, cont) let _double x y cont () = `Cons (x, _single y cont) @@ -358,3 +444,24 @@ let rec _equal_klist eq l1 l2 = match l1(), l2() with eq x1 x2 && _equal_klist eq l1' l2' let equal eq q1 q2 = _equal_klist eq (to_klist q1) (to_klist q2) + +(*$T + let q1 = 1 -- 10 and q2 = append (1 -- 5) (6 -- 10) in \ + equal (=) q1 q2 +*) + +let (--) a b = + let rec up_to q a b = if a = b + then snoc q a + else up_to (snoc q a) (a+1) b + and down_to q a b = if a = b then snoc q a + else down_to (snoc q a) (a-1) b + in + if a <= b then up_to empty a b else down_to empty a b + +(*$T + 1 -- 5 |> to_list = [1;2;3;4;5] + 5 -- 1 |> to_list = [5;4;3;2;1] + 0 -- 0 |> to_list = [0] +*) + diff --git a/src/data/CCFQueue.mli b/src/data/CCFQueue.mli index 397155c1..aac4a484 100644 --- a/src/data/CCFQueue.mli +++ b/src/data/CCFQueue.mli @@ -110,10 +110,15 @@ val append : 'a t -> 'a t -> 'a t after elements of the first one. Linear in the size of the second queue. *) +val rev : 'a t -> 'a t +(** Reverse the queue, O(n) complexity + @since 0.10 *) + val map : ('a -> 'b) -> 'a t -> 'b t (** Map values *) val (>|=) : 'a t -> ('a -> 'b) -> 'b t +(** Synonym to {!map} *) val size : 'a t -> int (** Number of elements in the queue (constant time) *) @@ -130,6 +135,7 @@ val of_list : 'a list -> 'a t val to_list : 'a t -> 'a list val add_seq_front : 'a sequence -> 'a t -> 'a t + val add_seq_back : 'a t -> 'a sequence -> 'a t val to_seq : 'a t -> 'a sequence @@ -138,3 +144,7 @@ val of_seq : 'a sequence -> 'a t val to_klist : 'a t -> 'a klist val of_klist : 'a klist -> 'a t +val (--) : int -> int -> int t +(** [a -- b] is the integer range from [a] to [b], both included. + @since 0.10 *) + diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml new file mode 100644 index 00000000..c3fecc7f --- /dev/null +++ b/src/data/CCIntMap.ml @@ -0,0 +1,276 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Map specialized for Int keys} *) + +(* "Fast Mergeable Integer Maps", Okasaki & Gill. +We use big-endian trees. *) + +type 'a t = + | E (* empty *) + | L of int * 'a (* leaf *) + | N of int (* common prefix *) * int (* bit switch *) * 'a t * 'a t + +let empty = E + +let bit_is_0_ x ~bit = x land bit = 0 + +let mask_ x ~mask = (x lor (mask -1)) land (lnot mask) +(* low endian: let mask_ x ~mask = x land (mask - 1) *) + +let is_prefix_ ~prefix y ~bit = prefix = mask_ y ~mask:bit + +(* loop down until x=lowest_bit_ x *) +let rec highest_bit_naive x m = + if m = 0 then 0 + else if x land m = 0 then highest_bit_naive x (m lsr 1) + else m + +let highest_bit = + (* the highest representable 2^n *) + let max_log = 1 lsl (Sys.word_size - 2) in + fun x -> + if x > 1 lsl 20 + then (* small shortcut: remove least significant 20 bits *) + let x' = x land (lnot ((1 lsl 20) -1)) in + highest_bit_naive x' max_log + else highest_bit_naive x max_log + +(*$Q + Q.int (fun i -> \ + let b = highest_bit i in \ + i < 0 || (b <= i && (i-b) < b)) +*) + +(* helper: + + let b_of_i i = + let rec f acc i = + if i=0 then acc else let q, r = i/2, i mod 2 + in + f (r::acc) q in f [] i;; +*) + +(* low endian: let branching_bit_ a _ b _ = lowest_bit_ (a lxor b) *) +let branching_bit_ a b = + highest_bit (a lxor b) + +let rec find_exn k t = match t with + | E -> raise Not_found + | L (k', v) when k = k' -> v + | L _ -> raise Not_found + | N (prefix, m, l, r) -> + if is_prefix_ ~prefix k ~bit:m + then if bit_is_0_ k ~bit:m + then find_exn k l + else find_exn k r + else raise Not_found + + (* FIXME: valid if k < 0? + if k <= prefix (* search tree *) + then find_exn k l + else find_exn k r + *) + +let find k t = + try Some (find_exn k t) + with Not_found -> None + +let mem k t = + try ignore (find_exn k t); true + with Not_found -> false + +let mk_node_ prefix switch l r = match l, r with + | E, o | o, E -> o + | _ -> N (prefix, switch, l, r) + +(* join trees t1 and t2 with prefix p1 and p2 respectively + (p1 and p2 do not overlap) *) +let join_ t1 p1 t2 p2 = + let switch = branching_bit_ p1 p2 in + let prefix = mask_ p1 ~mask:switch in + if bit_is_0_ p1 ~bit:switch + then mk_node_ prefix switch t1 t2 + else (assert (bit_is_0_ p2 ~bit:switch); mk_node_ prefix switch t2 t1) + +let singleton k v = L (k, v) + +(* c: conflict function *) +let rec insert_ c k v t = match t with + | E -> L (k, v) + | L (k', v') -> + if k=k' + then L (k, c ~old:v' v) + else join_ t k' (L (k, v)) k + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then N(prefix, switch, insert_ c k v l, r) + else N(prefix, switch, l, insert_ c k v r) + else join_ (L(k,v)) k t prefix + +let add k v t = insert_ (fun ~old:_ v -> v) k v t + +(*$Q & ~count:20 + Q.(list (pair int int)) (fun l -> \ + let l = CCList.Set.uniq l in let m = of_list l in \ + List.for_all (fun (k,v) -> find_exn k m = v) l) +*) + +let rec remove k t = match t with + | E -> E + | L (k', _) -> if k=k' then E else t + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then mk_node_ prefix switch (remove k l) r + else mk_node_ prefix switch l (remove k r) + else t (* not present *) + +let update k f t = + try + let v = find_exn k t in + begin match f (Some v) with + | None -> remove k t + | Some v' -> add k v' t + end + with Not_found -> + match f None with + | None -> t + | Some v -> add k v t + +let doubleton k1 v1 k2 v2 = add k1 v1 (singleton k2 v2) + +let rec iter f t = match t with + | E -> () + | L (k, v) -> f k v + | N (_, _, l, r) -> iter f l; iter f r + +let rec fold f t acc = match t with + | E -> acc + | L (k, v) -> f k v acc + | N (_, _, l, r) -> + let acc = fold f l acc in + fold f r acc + +let cardinal t = fold (fun _ _ n -> n+1) t 0 + +let rec choose_exn = function + | E -> raise Not_found + | L (k, v) -> k, v + | N (_, _, l, _) -> choose_exn l + +let choose t = + try Some (choose_exn t) + with Not_found -> None + +let rec union f a b = match a, b with + | E, o | o, E -> o + | L (k, v), o + | o, L (k, v) -> + (* insert k, v into o *) + insert_ (fun ~old v -> f k old v) k v o + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (union f l1 l2) (union f r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then N (p1, m1, union f l1 b, r1) + else N (p1, m1, l1, union f r1 b) + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then N (p2, m2, union f l2 a, r2) + else N (p2, m2, l2, union f r2 a) + else join_ a p1 b p2 + +let rec inter f a b = match a, b with + | E, _ | _, E -> E + | L (k, v), o + | o, L (k, v) -> + begin try + let v' = find_exn k o in + L (k, f k v v') + with Not_found -> E + end + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (inter f l1 l2) (inter f r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then inter f l1 b + else inter f r1 b + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then inter f l2 a + else inter f r2 a + else E + +(* TODO: write tests *) + +(** {2 Whole-collection operations} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +let add_list t l = List.fold_left (fun t (k,v) -> add k v t) t l + +let of_list l = add_list empty l + +let to_list t = fold (fun k v l -> (k,v) :: l) t [] + +(*$Q + Q.(list (pair int int)) (fun l -> \ + let l = List.map (fun (k,v) -> abs k,v) l in \ + let rec is_sorted = function [] | [_] -> true \ + | x::y::tail -> x <= y && is_sorted (y::tail) in \ + of_list l |> to_list |> List.rev_map fst |> is_sorted) +*) + +(*$Q + Q.(list (pair int int)) (fun l -> \ + of_list l |> cardinal = List.length l) + *) + +let add_seq t seq = + let t = ref t in + seq (fun (k,v) -> t := add k v !t); + !t + +let of_seq seq = add_seq empty seq + +let to_seq t yield = iter (fun k v -> yield (k,v)) t + +let keys t yield = iter (fun k _ -> yield k) t + +let values t yield = iter (fun _ v -> yield v) t + +type 'a tree = unit -> [`Nil | `Node of 'a * 'a tree list] + +let rec as_tree t () = match t with + | E -> `Nil + | L (k, v) -> `Node (`Leaf (k, v), []) + | N (prefix, switch, l, r) -> + `Node (`Node (prefix, switch), [as_tree l; as_tree r]) diff --git a/src/data/CCIntMap.mli b/src/data/CCIntMap.mli new file mode 100644 index 00000000..61a78c00 --- /dev/null +++ b/src/data/CCIntMap.mli @@ -0,0 +1,96 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Map specialized for Int keys} + +{b status: unstable} +@since 0.10 *) + +type 'a t + +val empty : 'a t + +val singleton : int -> 'a -> 'a t + +val doubleton : int -> 'a -> int -> 'a -> 'a t + +val mem : int -> _ t -> bool + +val find : int -> 'a t -> 'a option + +val find_exn : int -> 'a t -> 'a +(** Same as {!find} but unsafe + @raise Not_found if key not present *) + +val add : int -> 'a -> 'a t -> 'a t + +val remove : int -> 'a t -> 'a t + +val update : int -> ('a option -> 'a option) -> 'a t -> 'a t + +val cardinal : _ t -> int + +val iter : (int -> 'a -> unit) -> 'a t -> unit + +val fold : (int -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + +val choose : 'a t -> (int * 'a) option + +val choose_exn : 'a t -> int * 'a + +val union : (int -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t + +val inter : (int -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t + +(** {2 Whole-collection operations} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +val add_list : 'a t -> (int * 'a) list -> 'a t + +val of_list : (int * 'a) list -> 'a t + +val to_list : 'a t -> (int * 'a) list + +val add_seq : 'a t -> (int * 'a) sequence -> 'a t + +val of_seq : (int * 'a) sequence -> 'a t + +val to_seq : 'a t -> (int * 'a) sequence + +val keys : _ t -> int sequence + +val values : 'a t -> 'a sequence + + +(** Helpers *) + +val highest_bit : int -> int + +type 'a tree = unit -> [`Nil | `Node of 'a * 'a tree list] + +val as_tree : 'a t -> [`Node of int * int | `Leaf of int * 'a ] tree diff --git a/src/data/CCPersistentArray.ml b/src/data/CCPersistentArray.ml new file mode 100644 index 00000000..f674cc22 --- /dev/null +++ b/src/data/CCPersistentArray.ml @@ -0,0 +1,92 @@ +(* +copyright (c) 2013-2015, Guillaume Bury +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + + +(* Persistent arrays *) + +type 'a t = 'a data ref +and 'a data = + | Array of 'a array + | Diff of int * 'a * 'a t + +let make n a = ref (Array (Array.make n a)) +let init n f = ref (Array (Array.init n f)) + +let rec _reroot t k = match !t with + | Array a -> k a + | Diff (i, v, t') -> + _reroot t' (fun a -> + let v' = a.(i) in + a.(i) <- v; + t := Array a; + t' := Diff(i, v', t); + k a + ) + +let reroot t = match !t with + | Array a -> a + | _ -> _reroot t (fun x -> x) + +let copy t = ref (Array(Array.copy (reroot t))) + +let get t i = match !t with + | Array a -> a.(i) + | _ -> (reroot t).(i) + +let set t i v = + let a = reroot t in + let old = a.(i) in + a.(i) <- v; + let t' = ref (Array a) in + t := Diff (i, old, t'); + t' + +let length t = Array.length (reroot t) + +let map f t = ref (Array (Array.map f (reroot t))) +let mapi f t = ref (Array (Array.mapi f (reroot t))) + +let iter f t = Array.iter f (reroot t) +let iteri f t = Array.iteri f (reroot t) + +let fold_left f acc t = Array.fold_left f acc (reroot t) +let fold_right f t acc = Array.fold_right f (reroot t) acc + +let to_array t = Array.copy (reroot t) +let of_array a = init (Array.length a) (fun i -> a.(i)) + +let to_list t = Array.to_list (reroot t) +let of_list l = ref (Array (Array.of_list l)) + +type 'a sequence = ('a -> unit) -> unit + +let to_seq a yield = iter yield a + +let of_seq seq = + let l = ref [] in + seq (fun x -> l := x :: !l); + of_list (List.rev !l) + + diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli new file mode 100644 index 00000000..ae0bebfd --- /dev/null +++ b/src/data/CCPersistentArray.mli @@ -0,0 +1,105 @@ +(* +copyright (c) 2013-2015, Guillaume Bury +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + + +(** {1 Persistent Arrays} + +From the paper by Jean-Christophe Filliâtre, +"A persistent Union-Find data structure", see +{{: https://www.lri.fr/~filliatr/ftp/publis/puf-wml07.ps} the ps version} + +@since 0.10 *) + +type 'a t +(** The type of persistent arrays *) + +val make : int -> 'a -> 'a t +(** [make n x] returns a persistent array of length n, with [x]. All the + elements of this new array are initially physically equal to x + (in the sense of the == predicate). Consequently, if x is mutable, it is + shared among all elements of the array, and modifying x through one of the + array entries will modify all other entries at the same time. + @raise Invalid_argument if [n < 0] or [n > Sys.max_array_length]. + If the value of x is a floating-point number, then the maximum size is + only [Sys.max_array_length / 2].*) + +val init : int -> (int -> 'a) -> 'a t +(** [make n f] returns a persistent array of length n, with element + [i] initialized to the result of [f i]. + @raise Invalid_argument if [n < 0] or [n > Sys.max_array_length]. + If the value of x is a floating-point number, then the maximum size is + only [Sys.max_array_length / 2].*) + +val get : 'a t -> int -> 'a +(** [get a i] Returns the element with index [i] from the array [a]. + @raise Invalid_argument "index out of bounds" if [n] is outside the + range [0] to [Array.length a - 1].*) + +val set : 'a t -> int -> 'a -> 'a t +(** [set a i v] sets the element index [i] from the array [a] to [v]. + @raise Invalid_argument "index out of bounds" if [n] is outside the + range [0] to [Array.length a - 1].*) + +val length : 'a t -> int +(** Returns the length of the persistent array. *) + +val copy : 'a t -> 'a t +(** [copy a] returns a fresh copy of [a]. Both copies are independent. *) + +val map : ('a -> 'b) -> 'a t -> 'b t +val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t +(** Applies the given function to all elements of the array, and returns + a persistent array initialized by the results of f. In the case of [mapi], + the function is also given the index of the element. + It is equivalent to [fun f t -> init (fun i -> f (get t i))]. *) + +val iter : ('a -> unit) -> 'a t -> unit +val iteri : (int -> 'a -> unit) -> 'a t -> unit +(** [iter f t] applies function [f] to all elements of the persistent array, + in order from element [0] to element [length t - 1]. *) + +val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(** Fold on the elements of the array. *) + +val to_array : 'a t -> 'a array +(** [to_array t] returns a mutable copy of [t]. *) + +val of_array : 'a array -> 'a t +(** [from_array a] returns an immutable copy of [a]. *) + +val to_list : 'a t -> 'a list +(** [to_list t] returns the list of elements in [t]. *) + +val of_list : 'a list -> 'a t +(** [of_list l] returns a fresh persistent array containing the elements of [l]. *) + +type 'a sequence = ('a -> unit) -> unit + +val to_seq : 'a t -> 'a sequence + +val of_seq : 'a sequence -> 'a t + + diff --git a/src/data/containers_data.mldylib b/src/data/containers_data.mldylib index f5be522f..ad8398b5 100644 --- a/src/data/containers_data.mldylib +++ b/src/data/containers_data.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 868cf65b04ece1e5b4b46f9a48586507) +# DO NOT EDIT (digest: b83e1a21d44ea00373b0dde5cda9eedd) CCMultiMap CCMultiSet CCTrie @@ -12,4 +12,6 @@ CCBV CCMixtbl CCMixmap CCRingBuffer +CCIntMap +CCPersistentArray # OASIS_STOP diff --git a/src/data/containers_data.mllib b/src/data/containers_data.mllib index f5be522f..ad8398b5 100644 --- a/src/data/containers_data.mllib +++ b/src/data/containers_data.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 868cf65b04ece1e5b4b46f9a48586507) +# DO NOT EDIT (digest: b83e1a21d44ea00373b0dde5cda9eedd) CCMultiMap CCMultiSet CCTrie @@ -12,4 +12,6 @@ CCBV CCMixtbl CCMixmap CCRingBuffer +CCIntMap +CCPersistentArray # OASIS_STOP diff --git a/src/lwt/lwt_pipe.mli b/src/lwt/lwt_pipe.mli index 46702c78..fce6de12 100644 --- a/src/lwt/lwt_pipe.mli +++ b/src/lwt/lwt_pipe.mli @@ -74,20 +74,20 @@ type ('a, +'perm) t constraint 'perm = [< `r | `w] type ('a, 'perm) pipe = ('a, 'perm) t -val keep : _ t -> unit Lwt.t -> unit +val keep : (_,_) t -> unit Lwt.t -> unit (** [keep p fut] adds a pointer from [p] to [fut] so that [fut] is not garbage-collected before [p] *) -val is_closed : _ t -> bool +val is_closed : (_,_) t -> bool -val close : _ t -> unit Lwt.t +val close : (_,_) t -> unit Lwt.t (** [close p] closes [p], which will not accept input anymore. This sends [`End] to all readers connected to [p] *) -val close_async : _ t -> unit +val close_async : (_,_) t -> unit (** Same as {!close} but closes in the background *) -val wait : _ t -> unit Lwt.t +val wait : (_,_) t -> unit Lwt.t (** Evaluates once the pipe closes *) val create : ?max_size:int -> unit -> ('a, 'perm) t @@ -101,7 +101,7 @@ val connect : ?ownership:[`None | `InOwnsOut | `OutOwnsIn] -> @param own determines which pipes owns which (the owner, when it closes, also closes the ownee) *) -val link_close : _ t -> after:_ t -> unit +val link_close : (_,_) t -> after:(_,_) t -> unit (** [link_close p ~after] will close [p] when [after] closes. if [after] is closed already, closes [p] immediately *) diff --git a/src/misc/backtrack.ml b/src/misc/backtrack.ml new file mode 100644 index 00000000..d6562db0 --- /dev/null +++ b/src/misc/backtrack.ml @@ -0,0 +1,193 @@ + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +module NonLogical = struct + type 'a t = unit -> 'a + let return x () = x + let (>>=) x f () = let y = x() in f y () +end + +type ('a, 'b) list_view = + | Nil of exn + | Cons of 'a * 'b + +(** The monad is parametrised in the types of state, environment and + writer. *) +module type Param = sig + (** Read only *) + type e +(** Write only *) + type w +(** [w] must be a monoid *) + val wunit : w + val wprod : w -> w -> w +(** Read-write *) + type s +(** Update-only. Essentially a writer on [u->u]. *) + type u +(** [u] must be pointed. *) + val uunit : u +end + +module Logical (P:Param) = struct + type state = { + e: P.e; + w: P.w; + s: P.s; + u: P.u; + } + + type _ t = + | Ignore : _ t -> unit t + | Return : 'a -> 'a t + | Bind : 'a t * ('a -> 'b t) -> 'b t + | Map : 'a t * ('a -> 'b) -> 'b t + | Get : P.s t + | Set : P.s -> unit t + | Modify : (P.s -> P.s) -> unit t + | Put : P.w -> unit t + | Current : P.e t + | Local : P.e * 'a t -> 'a t (* local bind *) + | Update : (P.u -> P.u) -> unit t + | Zero : exn -> 'a t + | WithState : state * 'a t -> 'a t (* use other state *) + | Plus : 'a t * (exn -> 'a t ) -> 'a t + | Split : 'a t -> ('a, exn -> 'a t) list_view t + | Once : 'a t -> 'a t (* keep at most one element *) + | Break : (exn -> exn option) * 'a t -> 'a t + + let return x = Return x + + let (>>=) x f = Bind (x, f) + + let map f x = match x with + | Return x -> return (f x) + | Map (y, g) -> Map (y, fun x -> f (g x)) + | _ -> Map (x, f) + + let rec ignore : type a. a t -> unit t = function + | Return _ -> Return () + | Map (x, _) -> ignore x + | x -> Ignore x + + let set x = Set x + let get = Get + let modify f = Modify f + let put x = Put x + let current = Current + let local x y = Local (x, y) + let update f = Update f + let zero e = Zero e + let with_state st x = WithState (st, x) + + let rec plus a f = match a with + | Zero e -> f e + | Plus (a1, f1) -> + plus a1 (fun e -> plus (f1 e) f) + | _ -> Plus (a, f) + + let split x = Split x + + let rec once : type a. a t -> a t = function + | Zero e -> Zero e + | Return x -> Return x + | Map (x, f) -> map f (once x) + | x -> Once x + + let break f x = Break (f, x) + + type 'a reified = + | RNil of exn + | RCons of 'a * (exn -> 'a reified) + + let repr r () = match r with + | RNil e -> Nil e + | RCons (x, f) -> Cons (x, f) + + let cons x cont = Cons (x, cont) + let nil e = Nil e + + let rcons x cont = RCons (x, cont) + let rnil e = RNil e + + (* TODO: maybe (('a * state), exn -> state -> 'a t) list_view is better + for bind and local? *) + type 'a splitted = (('a * state), exn -> 'a t) list_view + + let rec run_rec + : type a. state -> a t -> a splitted + = fun st t -> match t with + | Return x -> cons (x, st) zero + | Ignore x -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((_, st), cont) -> cons ((), st) (fun e -> Ignore (cont e)) + end + | Bind (x,f) -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st_x), cont) -> + let y = f x in + run_rec st_x (plus y (fun e -> with_state st (cont e >>= f))) + end + | Map (x,f) -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st), cont) -> + cons (f x, st) (fun e -> map f (cont e)) + end + | Get -> cons (st.s, st) zero + | Set s -> cons ((), {st with s}) zero + | Modify f -> + let st = {st with s = f st.s} in + cons ((), st) zero + | Put w -> cons ((), {st with w}) zero + | Current -> cons (st.e, st) zero + | Local (e,x) -> + (* bind [st.e = e] in [x], then restore old [e] in each result *) + let old_e = st.e in + let st' = {st with e} in + begin match run_rec st' x with + | Nil e -> Nil e + | Cons ((x, st''), cont) -> + cons (x, {st'' with e=old_e}) (fun e -> assert false) (* TODO: restore old_e*) + end + | Update f -> + let st = {st with u=f st.u} in + cons ((), st) zero + | WithState (st', x) -> run_rec st' x (* ignore [st] *) + | Zero e -> Nil e (* failure *) + | Plus (x,cont) -> + begin match run_rec st x with + | Nil e -> run_rec st (cont e) + | Cons ((x, st), cont') -> + cons (x, st) (fun e -> plus (cont' e) cont) + end + | Split x -> + begin match run_rec st x with + | Nil e -> cons (Nil e, st) zero + | Cons ((x, st'), cont) -> cons (cons x cont, st') zero + end + | Once x -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st), _) -> cons (x, st) zero + end + | Break (f,x) -> assert false (* TODO: ? *) + + let run t e s = + let state = {e; s; u=P.uunit; w=P.wunit} in + let rec run_list + : type a. state -> a t -> (a * state) reified + = fun state t -> match run_rec state t with + | Nil e -> rnil e + | Cons ((x, st), cont) -> + rcons (x, st) (fun e -> run_list state (cont e)) + in + run_list state t +end + diff --git a/src/misc/backtrack.mli b/src/misc/backtrack.mli new file mode 100644 index 00000000..c74ccf52 --- /dev/null +++ b/src/misc/backtrack.mli @@ -0,0 +1,88 @@ + +(** {1 Experiment with Backtracking Monad} + +Playing stuff, don't use (yet?). + +{b status: experimental} +@since 0.10 +*) + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +(** Taken from Coq "logic_monad.mli" *) + +module NonLogical : sig + type 'a t = unit -> 'a + include MONAD with type 'a t := 'a t +end + +(** {6 Logical layer} *) +(** The logical monad is a backtracking monad on top of which is + layered a state monad (which is used to implement all of read/write, + read only, and write only effects). The state monad being layered on + top of the backtracking monad makes it so that the state is + backtracked on failure. + Backtracking differs from regular exception in that, writing (+) + for exception catching and (>>=) for bind, we require the + following extra distributivity laws: + x+(y+z) = (x+y)+z + zero+x = x + x+zero = x + (x+y)>>=k = (x>>=k)+(y>>=k) *) +(** A view type for the logical monad, which is a form of list, hence + we can decompose it with as a list. *) +type ('a, 'b) list_view = + | Nil of exn + | Cons of 'a * 'b + +(** The monad is parametrised in the types of state, environment and + writer. *) +module type Param = sig + (** Read only *) + type e +(** Write only *) + type w +(** [w] must be a monoid *) + val wunit : w + val wprod : w -> w -> w +(** Read-write *) + type s +(** Update-only. Essentially a writer on [u->u]. *) + type u +(** [u] must be pointed. *) + val uunit : u +end + +module Logical (P:Param) : sig + include MONAD + val map : ('a -> 'b) -> 'a t -> 'b t + val ignore : 'a t -> unit t + val set : P.s -> unit t + val get : P.s t + val modify : (P.s -> P.s) -> unit t + val put : P.w -> unit t + val current : P.e t + val local : P.e -> 'a t -> 'a t + val update : (P.u -> P.u) -> unit t + val zero : exn -> 'a t + val plus : 'a t -> (exn -> 'a t) -> 'a t + val split : 'a t -> (('a,(exn->'a t)) list_view) t + val once : 'a t -> 'a t + val break : (exn -> exn option) -> 'a t -> 'a t + (* val lift : 'a NonLogical.t -> 'a t *) + type 'a reified + + type state = { + e: P.e; + w: P.w; + s: P.s; + u: P.u; + } + + val repr : 'a reified -> ('a, exn -> 'a reified) list_view NonLogical.t + val run : 'a t -> P.e -> P.s -> ('a * state) reified +end diff --git a/src/misc/containers_misc.mlpack b/src/misc/containers_misc.mlpack index 80596ad7..2683c47c 100644 --- a/src/misc/containers_misc.mlpack +++ b/src/misc/containers_misc.mlpack @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 64ac3a98881a419a2ed1f076194542f9) +# DO NOT EDIT (digest: a0730df368ed19a3b181d80ccf7985b6) AbsSet Automaton Bij @@ -13,4 +13,6 @@ RoseTree SmallSet UnionFind Univ +Puf +Backtrack # OASIS_STOP diff --git a/src/misc/puf.ml b/src/misc/puf.ml index 2a338fd2..919f2bcf 100644 --- a/src/misc/puf.ml +++ b/src/misc/puf.ml @@ -58,6 +58,8 @@ module PArray = struct a end + let iteri f t = Array.iteri f (reroot t) + let get t i = match !t with | Array a -> a.(i) @@ -204,6 +206,9 @@ module type S = sig (** [iter_equiv_class uf a f] calls [f] on every element of [uf] that is congruent to [a], including [a] itself. *) + val iter : _ t -> (elt -> unit) -> unit + (** Iterate on all root values *) + val inconsistent : _ t -> (elt * elt * elt * elt) option (** Check whether the UF is inconsistent. It returns [Some (a, b, a', b')] in case of inconsistency, where a = b, a = a' and b = b' by congruence, @@ -222,7 +227,8 @@ module type S = sig val explain_distinct : 'e t -> elt -> elt -> elt * elt (** [explain_distinct uf a b] gives the original pair [a', b'] that - made [a] and [b] distinct by calling [distinct a' b'] *) + made [a] and [b] distinct by calling [distinct a' b']. The + terms must be distinct, otherwise Failure is raised. *) end module IH = Hashtbl.Make(struct type t = int let equal i j = i = j let hash i = i end) @@ -446,6 +452,14 @@ module Make(X : ID) : S with type elt = X.t = struct in traverse ia + let iter uf f = + PArray.iteri + (fun i i' -> + if i = i' then match PArray.get uf.data i with + | None -> () + | Some d -> f d.elt + ) uf.parent + let inconsistent uf = uf.inconsistent (** Closest common ancestor of the two elements in the proof forest *) diff --git a/src/misc/puf.mli b/src/misc/puf.mli index c44f4e2b..6ae10d5e 100644 --- a/src/misc/puf.mli +++ b/src/misc/puf.mli @@ -113,6 +113,10 @@ module type S = sig (** [iter_equiv_class uf a f] calls [f] on every element of [uf] that is congruent to [a], including [a] itself. *) + val iter : _ t -> (elt -> unit) -> unit + (** Iterate on all root values + @since NExT_RELEASE *) + val inconsistent : _ t -> (elt * elt * elt * elt) option (** Check whether the UF is inconsistent. It returns [Some (a, b, a', b')] in case of inconsistency, where a = b, a = a' and b = b' by congruence, diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml new file mode 100644 index 00000000..e841d10b --- /dev/null +++ b/src/string/app_parse.ml @@ -0,0 +1,835 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} *) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type multiplicity = + | Star (* 0 or more *) + | Plus (* 1 or more *) + | Question (* 0 or 1 *) + +let str fmt = Printf.sprintf fmt + +module CharSet = Set.Make(Char) +module CharMap = Map.Make(Char) + +let print_char = function + | '\t' -> "\\t" + | '\n' -> "\\n" + | '\r' -> "\\r" + | '"' -> "\\\"" + | c -> str "%c" c + +let print_char_set set = + let buf = Buffer.create 32 in + Buffer.add_char buf '"'; + CharSet.iter (fun c -> Buffer.add_string buf (print_char c)) set; + Buffer.add_char buf '"'; + Buffer.contents buf + +let domain_of_char_map m = + CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty + +let print_char_map map = + let l = CharMap.fold + (fun c _ acc -> print_char c :: acc) map [] in + String.concat ", " l + +let ppmap ?(sep=", ") pp_k pp_v fmt m = + let first = ref true in + CharMap.iter + (fun k v -> + if !first then first := false else Format.pp_print_string fmt sep; + pp_k fmt k; + Format.pp_print_string fmt " → "; + pp_v fmt v; + Format.pp_print_cut fmt () + ) m; + () + +let set_of_string s = + let set = ref CharSet.empty in + String.iter + (fun c -> + if CharSet.mem c !set + then invalid_arg (str "any_of: duplicate char %c" c); + set := CharSet.add c !set + ) s; + !set + +(* add [c -> p] to the map, for every [c] in [set] *) +let map_add_set init set p = + CharSet.fold + (fun c map -> CharMap.add c p map) + set init + +(* function composition *) +let compose f g x = f (g x) + +let str_of_l l = + let b = Bytes.make (List.length l) ' ' in + List.iteri (fun i c -> Bytes.set b i c) l; + Bytes.unsafe_to_string b + +type 'a t = { + mutable st : 'a parse_or_compiled; +} + +(* syntactic version *) +and _ parse = + | Many : 'a t * unit t * multiplicity -> 'a list parse + | Skip : 'a t * multiplicity -> unit parse (* same as Many, but ignores *) + | Lazy : 'a t lazy_t -> 'a parse + +(* compiled version *) +and _ compiled = + | C_Return : 'a -> 'a compiled + | C_Map : ('a -> 'b) * 'a t -> 'b compiled + | C_Filter: ('a -> bool) * 'a t -> 'a compiled + | C_App : ('a -> 'b) t * 'a t -> 'b compiled + | C_AppLeft : 'a t * 'b t -> 'a compiled + | C_AppRight : 'a t * 'b t -> 'b compiled + | C_Fail : string -> 'a compiled + | C_Int : int compiled + | C_Float : float compiled + | C_Junk : unit compiled (* ignore next char *) + | C_AnyOf : CharSet.t -> char compiled + | C_SwitchC : 'a t CharMap.t * 'a t option -> 'a compiled + | C_Eof : unit compiled + +and 'a parse_or_compiled = + | Parse of 'a parse + | Compiled of 'a compiled + +(** {2 Helpers} *) + +(* build a new parser *) +let make p = {st=Parse p} +let make_c c = {st=Compiled c} +let make_pc st = {st} + +let ppmult fmt = function + | Star -> Format.pp_print_string fmt "*" + | Plus -> Format.pp_print_string fmt "+" + | Question -> Format.pp_print_string fmt "?" + +let print fmt p = + let depth = ref 0 in + (* print up to a given limit into lazy values *) + let rec print_aux + : type a. Format.formatter -> a t -> unit + = fun fmt p -> + let ppstr = Format.pp_print_string + and ppf fmt x = Format.fprintf fmt x in + let ppc fmt c = ppf fmt "'%s'" (print_char c) in + match p.st with + | Compiled (C_Return _) -> ppstr fmt "" + | Compiled (C_Map (_, x)) -> ppf fmt "@[(map@ %a)@]" print_aux x + | Compiled (C_Filter (_, x)) -> ppf fmt "@[(filter@ %a)@]" print_aux x + | Compiled (C_App (f, x)) -> ppf fmt "@[<2>@[%a@]@ <*>@ @[%a@]@]" print_aux f print_aux x + | Compiled (C_AppLeft (a, b)) -> ppf fmt "@[%a@ <<@ %a@]" print_aux a print_aux b + | Compiled (C_AppRight (a, b)) -> ppf fmt "@[%a@ >>@ %a@]" print_aux a print_aux b + | Compiled (C_Fail _) -> ppf fmt "" + | Compiled C_Int -> ppstr fmt "" + | Compiled C_Float -> ppstr fmt "" + | Compiled C_Junk -> ppstr fmt "" + | Compiled (C_AnyOf set) -> ppf fmt "@[(any@ %s)@]" (print_char_set set) + | Parse (Many (p, sep, mult)) -> + ppf fmt "@[<2>(@[%a@]@ sep:@[%a@])%a@]" print_aux p print_aux sep ppmult mult + | Parse (Skip (p, mult)) -> + ppf fmt "@[<2>(skip @[%a@]%a)@]" print_aux p ppmult mult + | Compiled (C_SwitchC (map, None)) -> + ppf fmt "@[(switch@ @[%a@])@]" (ppmap ppc print_aux) map + | Compiled (C_SwitchC (map, Some o)) -> + ppf fmt "@[(switch@ @[%a@]@ or:%a)@]" (ppmap ppc print_aux) map print_aux o + | Parse (Lazy _) when !depth > 3 -> ppf fmt "" + | Parse (Lazy (lazy p)) -> + incr depth; + print_aux fmt p; + decr depth + | Compiled C_Eof -> ppstr fmt "" + in + print_aux fmt p + +let int_first_char = lazy (set_of_string "-0123456789") +let float_first_char = lazy (set_of_string ".-0123456789") + +(* a set of characters that are valid as first characters of a parser *) +type possible_first_chars = + | Set of CharSet.t + | AllChars + | NoChar + | NoCharOrSet of CharSet.t (* either no char, or something starting with set *) + | IsFail of string + +let ret_set set = match CharSet.cardinal set with + | 0 -> NoChar + | 256 -> AllChars + | _ -> Set set + +let ret_no_char_or set = match CharSet.cardinal set with + | 0 -> NoChar + | 256 -> AllChars + | _ -> NoCharOrSet set + +(* pfc of parsing a or b *) +let union_pfc a b = match a, b with + | Set a, Set b -> ret_set (CharSet.union a b) + | NoCharOrSet s, Set s' + | Set s', NoCharOrSet s -> ret_no_char_or (CharSet.union s s') + | NoChar, Set s + | Set s, NoChar -> ret_no_char_or s + | NoCharOrSet s, NoCharOrSet s' -> ret_no_char_or (CharSet.union s s') + | IsFail e, _ | _, IsFail e -> IsFail e + | AllChars, _ | _, AllChars -> AllChars + | NoChar, o | o, NoChar -> o + +(* pfc of parsing a then b *) +let then_pfc a b = match a, b with + | Set a, Set b -> ret_set (CharSet.union a b) + | NoCharOrSet s, NoCharOrSet s' -> ret_no_char_or (CharSet.union s s') + | NoCharOrSet s, Set s' -> ret_set (CharSet.union s s') + | NoCharOrSet s, NoChar -> ret_no_char_or s + | Set s, _ -> ret_set s + | IsFail e, _ | _, IsFail e -> IsFail e + | AllChars, _ | _, AllChars -> AllChars + | NoChar, o -> o + +let (<|||>) a b = match a with + | NoChar -> Lazy.force b + | NoCharOrSet _ -> then_pfc a (Lazy.force b) + | _ -> a + +(* set of possibilities for the first char of a parser *) +let rec pfc : type a. a t -> possible_first_chars = fun t -> pfc_pc t.st + +and pfc_pc + : type a. a parse_or_compiled -> possible_first_chars + = function + | Parse p -> pfc_p p + | Compiled c -> pfc_c c + +and pfc_p + : type a. a parse -> possible_first_chars + = function + | Many (p, _, (Question | Star)) -> union_pfc (pfc p) NoChar + | Many (p, _, Plus) -> pfc p + | Skip (p, (Question | Star)) -> union_pfc (pfc p) NoChar + | Skip (p, Plus) -> pfc p + | Lazy (lazy p) -> pfc p + +and pfc_c + : type a. a compiled -> possible_first_chars + = function + | C_Return _ -> NoChar + | C_Map (_, x) -> pfc x + | C_Filter (_, x) -> pfc x + | C_App (f, x) -> pfc f <|||> lazy (pfc x) + | C_AppLeft (a, b) -> pfc a <|||> lazy (pfc b) + | C_AppRight (a, b) -> pfc a <|||> lazy (pfc b) + | C_Fail e -> IsFail e + | C_Int -> Set (Lazy.force int_first_char) + | C_Float -> Set (Lazy.force float_first_char) + | C_Junk -> AllChars + | C_AnyOf set -> ret_set set + | C_SwitchC (map, None) -> ret_set (domain_of_char_map map) + | C_SwitchC (map, Some o) -> + let s = domain_of_char_map map in + union_pfc (ret_set s) (pfc o) + | C_Eof -> NoChar + +let possible_first_chars = pfc + +(** {2 Combinators} *) + +let return x = make_c (C_Return x) +let pure = return + +let success = pure () + +let fail msg = make_c (C_Fail msg) + +let junk = make_c C_Junk + +let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt + +let map f x = match x.st with + | Compiled (C_Map (g, y)) -> make_c (C_Map (compose f g, y)) + | Compiled (C_Return x) -> pure (f x) + | _ -> make_c (C_Map (f, x)) + +let app f x = match f.st with + | Compiled (C_Return f) -> map f x + | _ -> make_c (C_App (f, x)) + +let fun_and f f' x = f x && f' x + +let filter f x = match x.st with + | Compiled (C_Return y) -> if f y then return y else fail "filter failed" + | Compiled (C_Filter (f', y)) -> make_c (C_Filter (fun_and f f', y)) + | _ -> make_c (C_Filter (f, x)) + +let app_left a b = make_c (C_AppLeft (a, b)) (* return (fun x y -> x) <*> a <*> b *) + +let app_right a b = make_c (C_AppRight (a, b)) (* return (fun x y -> y) <*> a <*> b *) + +let int = make_c C_Int + +let float = make_c C_Float + +let many ?(sep=success) p = make (Many (p, sep, Star)) + +let many1 ?(sep=success) p = make (Many (p, sep, Plus)) + +let skip p = make (Skip (p, Star)) + +let skip1 p = make (Skip (p, Plus)) + +let opt p = + map + (function + | [x] -> Some x + | [] -> None + | _ -> assert false + ) (make (Many (p, success, Question))) + +let any_of' s = make_c (C_AnyOf s) +let any_of s = any_of' (set_of_string s) + +let char c = any_of' (CharSet.singleton c) + +let spaces = skip (any_of " \t") +let spaces1 = skip1 (any_of " \t") + +let white = skip (any_of " \t\n") +let white1 = skip1 (any_of " \t\n") + +let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz" +let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" +let num_ = set_of_string "0123456789" +let alpha_ = CharSet.union alpha_lower_ alpha_upper_ +let symbols_ = set_of_string "|!;$#@%&-_/=" + +let alpha_lower = any_of' alpha_lower_ +let alpha_upper = any_of' alpha_upper_ +let num = any_of' num_ +let symbols = any_of' symbols_ +let alpha = any_of' alpha_ +let alpha_num = any_of' (CharSet.union num_ alpha_) + +let eof = make_c C_Eof + +let switch_c ?default l = + if l = [] then match default with + | None -> invalid_arg "switch_c: empty list"; + | Some d -> d + else + let map = List.fold_left + (fun map (c, t) -> + if CharMap.mem c map + then invalid_arg (str "switch_c: duplicate char %c" c); + CharMap.add c t map + ) CharMap.empty l + in + make_c (C_SwitchC (map, default)) + +exception ExnIsFail of string + +let make_switch_c a b = make_c (C_SwitchC (a, b)) + +(* binary choice: compiled into decision tree *) +let rec merge a b = + (* build a switch by first char *) + try + begin match a.st, b.st with + | Compiled (C_SwitchC (map_a, def_a)), + Compiled (C_SwitchC (map_b, def_b)) -> + (* merge jump tables *) + let def = match def_a, def_b with + | None, None -> None + | Some d, None + | None, Some d -> Some d + | Some _, Some _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + in + let map = CharMap.merge + (fun _ a b -> match a, b with + | Some a', Some b' -> Some (merge a' b') + | Some m, None + | None, Some m -> Some m + | None, None -> assert false + ) map_a map_b + in + make_switch_c map def + | Compiled (C_SwitchC (map, def)), other + | other, Compiled (C_SwitchC (map, def)) -> + let map', def' = match pfc_pc other, def with + | AllChars, _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + | NoChar, None -> map, Some (make_pc other) + | NoChar, Some _ -> + invalid_arg "choice: ambiguous" + | IsFail msg, _ -> raise (ExnIsFail msg) + | NoCharOrSet set, def + | Set set, def -> + if CharSet.exists (fun c -> CharMap.mem c map) set + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set (domain_of_char_map map)))); + (* else: merge jump tables *) + let map = map_add_set map set (make_pc other) in + map, def + in + make_switch_c map' def' + | _ -> + begin match possible_first_chars a, possible_first_chars b with + | (Set set1 | NoCharOrSet set1), (Set set2 | NoCharOrSet set2) -> + if CharSet.exists (fun c -> CharSet.mem c set2) set1 + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set1 set2))); + let map = map_add_set CharMap.empty set1 a in + let map = map_add_set map set2 b in + make_switch_c map None + | IsFail e, _ | _, IsFail e -> raise (ExnIsFail e) + | Set s, NoChar -> make_switch_c (map_add_set CharMap.empty s a) (Some b) + | NoChar, Set s -> make_switch_c (map_add_set CharMap.empty s b) (Some a) + | AllChars, _ | _, AllChars -> + invalid_arg "choice: ambiguous parsers (one accepts everything)" + | (NoChar | NoCharOrSet _), (NoChar | NoCharOrSet _) -> + invalid_arg "choice: ambiguous parsers (both accept nothing)" + end + end + with ExnIsFail msg -> make_c (C_Fail msg) + +let rec choice = function + | [] -> invalid_arg "choice: empty list"; + | [x] -> x + | a :: tl -> merge a (choice tl) + +(* temporary structure for buildings switches *) +type 'a trie = + | TrieLeaf of 'a t + | TrieNode of 'a trie CharMap.t + +let trie_empty = TrieNode CharMap.empty + +let rec parser_of_trie : type a. a trie -> a t = function + | TrieLeaf p -> p + | TrieNode m -> + make_switch_c (CharMap.map parser_of_trie' m) None +(* consume next char, then build sub-trie *) +and parser_of_trie' + : type a. a trie -> a t + = fun x -> app_right junk (parser_of_trie x) + +(* build prefix trie *) +let switch_s l = + if l = [] then invalid_arg "switch_s: empty list"; + (* add parser p in trie [t], with key slice of [s] starting at [i] *) + let rec add_trie t s i p = + if i = String.length s + then match t with + | TrieNode m when CharMap.is_empty m -> TrieLeaf p + | TrieNode _ -> invalid_arg (str "key \"%s\" is prefix of another key" s) + | TrieLeaf _ -> invalid_arg (str "duplicate key \"%s\"" s) + else + let c = String.get s i in + match t with + | TrieLeaf _ -> + invalid_arg (str "key \"%s\" is prefixed by another key" s) + | TrieNode map -> + try + let sub = CharMap.find c map in + let sub = add_trie sub s (i+1) p in + TrieNode (CharMap.add c sub map) + with Not_found -> + let sub = add_trie trie_empty s (i+1) p in + TrieNode (CharMap.add c sub map) + in + let trie = + List.fold_left + (fun trie (s, p) -> + if s = "" then invalid_arg "switch_s: empty string"; + add_trie trie s 0 p + ) trie_empty l + in + parser_of_trie trie + +let bool = + switch_s + [ "true", return true + ; "false", return false + ] + +let fix f = + (* outermost lazy needed for the recursive definition *) + let rec r = { + st=Parse (Lazy (lazy (f r))); + } in + r + +module Infix = struct + let (>|=) x f = map f x + let (<*>) = app + let (<<) = app_left + let (>>) = app_right + let (<+>) a b = choice [a; b] + let (<::>) a b = pure (fun x l -> x::l) <*> a <*> b +end + +include Infix + +let word = + pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num + +let quoted = + let q = char '"' in + let escaped = char '\\' >> char '"' in + let inner = choice [escaped; alpha_num; any_of "()' \t\n|!;$#@%&-_/=~.,:<>[]"] in + q >> (many inner >|= str_of_l) << q + +(** {2 Compilation} *) + +let encode_cons x sep tl = pure (fun x _sep tl -> x :: tl) <*> x <*> sep <*> tl + +let encode_many + : type a. set:CharSet.t -> p:a t -> self:a list t -> sep:unit t -> a list t + = fun ~set ~p ~self ~sep -> + let on_success = encode_cons p sep self + and on_fail = pure [] in + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) + +let encode_opt ~set x = + let mk_one x = [x] in + let on_success = make_c (C_Map (mk_one, x)) + and on_fail = pure [] in + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) + +let encode_skip + : type a. set:CharSet.t -> p:a t -> self:unit t -> unit t + = fun ~set ~p ~self -> + let on_success = p >> self + and on_fail = pure () in + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) + +let many_ + : type a. sep:unit t -> mult:multiplicity -> p:a t -> a list t + = fun ~sep ~mult ~p -> match possible_first_chars p with + | Set set -> + begin match mult with + | Star -> fix (fun self -> encode_many ~set ~sep ~p ~self) + | Plus -> encode_cons p sep (fix (fun self -> encode_many ~set ~sep ~p ~self)) + | Question -> encode_opt ~set p + end + | IsFail msg -> fail msg + | NoCharOrSet _ -> invalid_arg (str "many: invalid parser (might not consume input)") + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let skip_ : type a. mult:multiplicity -> p:a t -> unit t + = fun ~mult ~p -> match possible_first_chars p with + | Set set -> + begin match mult with + | Star -> fix (fun self -> encode_skip ~set ~p ~self) + | Plus -> p >> fix (fun self -> encode_skip ~set ~p ~self) + | Question -> encode_opt ~set p >> pure () + end + | IsFail msg -> fail msg + | NoCharOrSet _ -> invalid_arg (str "many: invalid parser (might not consume input)") + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let rec compile + : type a. a t -> a compiled + = fun t -> match t.st with + | Compiled c -> c (* already compiled *) + | Parse (Many (p, sep, mult)) -> + let c = compile (many_ ~sep ~mult ~p) in + t.st <- Compiled c; + c + | Parse (Skip (p, mult)) -> + let c = compile (skip_ ~mult ~p) in + t.st <- Compiled c; + c + | Parse (Lazy (lazy p)) -> + let c = compile p in + t.st <- Compiled c; + c + +(** {2 Signatures} *) + +type error = { + line: int; + col: int; + msg: string; +} + +let string_of_error e = str "at %d:%d; %s" e.line e.col e.msg + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Build a parser from a given Monadic Input} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct + type t = { + mutable rline : int; + mutable rcol : int; + input : I.t; + buf : Bytes.t; + mutable i : int; + mutable len : int; + } + type source = I.t + + let line t = t.rline + let col t = t.rcol + + let create input = { + rline=1; + rcol=0; + input; + buf = Bytes.make 1024 ' '; + i=1; + len=1; (* trick for initialization *) + } + + let read_next t = + let c = Bytes.get t.buf t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 0; + t.rline <- t.rline + 1; + ) else ( + t.rcol <- t.rcol + 1 + ); + Yield c + + let refill t = + t.len <- I.read t.input t.buf 0 (Bytes.length t.buf); + t.i <- 0; + () + + let next t = + if t.len = 0 then EOF + else if t.i = t.len + then ( + refill t; + if t.len = 0 then EOF else read_next t + ) else read_next t + + let peek t = + if t.i = t.len + then refill t; + Yield (Bytes.get t.buf t.i) + + let junk t = + assert (t.len > 0 && t.i < t.len); + t.i <- t.i + 1 +end + +module MakeFromReader(R : READER) : S with type source = R.source = struct + type source = R.source + + let error r msg = + raise (Error { + line = R.line r; + col = R.col r; + msg; + }) + let errorf r fmt = + Printf.ksprintf + (fun msg -> error r msg) + fmt + + let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9' + let to_int c = Char.code c - Char.code '0' + + let rec parse_int r ~sign i = match R.peek r with + | EOF -> i + | Yield c when is_int c -> + R.junk r; + parse_int r ~sign (10 * i + to_int c) + | Yield '-' when i = 0 && sign -> + (* switch sign: only on first char *) + R.junk r; + parse_int r ~sign:false 0 + | _ -> if sign then i else -i + + let parse_float _r _buf = assert false + + let rec parse_rec : type a. R.t -> a t -> a = + fun r p -> match compile p with + | C_Return x -> x + | C_Map (f, x) -> + let y = parse_rec r x in + f y + | C_Filter (f, x) -> + let y = parse_rec r x in + if f y then y else errorf r "filter failed" + | C_App (f, x) -> + let f' = parse_rec r f in + let x' = parse_rec r x in + f' x' + | C_AppLeft (a, b) -> + let a' = parse_rec r a in + let _ = parse_rec r b in + a' + | C_AppRight (a, b) -> + let _ = parse_rec r a in + let b' = parse_rec r b in + b' + | C_Fail msg -> error r msg + | C_Int -> parse_int r ~sign:true 0 + | C_Float -> parse_float r (Buffer.create 8) + | C_Junk -> R.junk r + | C_AnyOf set -> + begin match R.next r with + | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) + | Yield c -> + if CharSet.mem c set then c + else errorf r "expected any of %s, got '%s'" (print_char_set set) (print_char c) + end + | C_SwitchC (map, def) -> + begin match R.peek r with + | EOF -> errorf r "expected any of %s, got EOF" (print_char_map map) + | Yield c -> + begin try + let p' = CharMap.find c map in + parse_rec r p' + with Not_found -> match def with + | None -> + errorf r "expected any of %s, got %c" (print_char_map map) c + | Some d -> parse_rec r d + end + end + | C_Eof -> + begin match R.next r with + | EOF -> () + | Yield c -> errorf r "expected EOF, got %c" c + end + + (* public functions *) + let parse_exn src p = + let r = R.create src in + parse_rec r p + + let parse src p = + let r = R.create src in + try + `Ok (parse_rec r p) + with Error e -> + `Error e + + let parse' src p = match parse src p with + | `Ok x -> `Ok x + | `Error e -> `Error (string_of_error e) +end + +module Make(I : INPUT) = struct + module R = ReaderOfInput(I) + include MakeFromReader(R) +end + +module Str = MakeFromReader(struct + (* reader of string *) + type t = { + str : string; + mutable i : int; + mutable rcol : int; + mutable rline : int; + } + type source = string + + let create str = { + str; + i = 0; + rcol = 1; + rline = 1; + } + let line t = t.rline + let col t = t.rcol + let peek t = + if t.i = String.length t.str then EOF else Yield (String.get t.str t.i) + let junk t = + assert (t.i < String.length t.str); + t.i <- t.i + 1 + let next t = + if t.i = String.length t.str then EOF + else ( + let c = String.get t.str t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 1; + t.rline <- t.rline + 1 + ) else t.rcol <- t.rcol + 1; + Yield c + ) +end) + +module Chan = Make(struct + type t = in_channel + let read = input +end) diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli new file mode 100644 index 00000000..f4c9ce1a --- /dev/null +++ b/src/string/app_parse.mli @@ -0,0 +1,271 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} + + Example: basic S-expr parser + +{[ + open Containers_string.App_parse;; + + type sexp = Atom of string | List of sexp list;; + + let mkatom a = Atom a;; + let mklist l = List l;; + + let ident_char = alpha_num <+> any_of "|!;$#@%&-_/=*.:~+[]<>'" ;; + let ident = many1 ident_char >|= str_of_l ;; + let atom = (ident <+> quoted) >|= mkatom ;; + + let sexp = fix (fun sexp -> + white >> + (atom <+> + ((char '(' >> many sexp << char ')') >|= mklist) + ) + );; + + Str.parse_exn "(a (b c d) e)" sexp;; + +]} + +{b status: experimental} +@since 0.10 +*) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type 'a t +(** Parser that yields an error or a value of type 'a *) + +(** {6 Combinators} *) + +val return : 'a -> 'a t +(** Parser that succeeds with the given value *) + +val pure : 'a -> 'a t +(** Synonym to {!return} *) + +val junk : unit t +(** Skip next char *) + +val fail : string -> 'a t +(** [fail msg] fails with the given error message *) + +val failf : ('a, unit, string, 'b t) format4 -> 'a + +val app : ('a -> 'b) t -> 'a t -> 'b t +(** Applicative *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** Map the parsed value *) + +val int : int t +(** Parse an integer *) + +val float : float t +(** Parse a floating point number *) + +val bool : bool t +(** Parse "true" or "false" *) + +val char : char -> char t +(** [char c] parses [c] and [c] only *) + +val any_of : string -> char t +(** Parse any of the chars present in the given string *) + +val alpha_lower : char t + +val alpha_upper : char t + +val alpha : char t + +val symbols : char t +(** symbols, such as "!-=_"... *) + +val num : char t + +val alpha_num : char t + +val word : string t +(** [word] parses any identifier not starting with an integer and + not containing any whitespace nor delimiter + TODO: specify *) + +val quoted : string t +(** Quoted string, following OCaml conventions *) + +val str_of_l : char list -> string +(** Helper to build strings from lists of chars *) + +val spaces : unit t +(** Parse a sequence of ['\t'] and [' '] *) + +val spaces1 : unit t +(** Same as {!spaces} but requires at least one space *) + +val white : unit t +(** Parse a sequence of ['\t'], ['\n'] and [' '] *) + +val white1 : unit t + +val eof : unit t +(** Matches the end of input, fails otherwise *) + +val many : ?sep:unit t -> 'a t -> 'a list t +(** 0 or more parsed elements of the given type. + @param sep separator between elements of the list (for instance, {!space}) *) + +val many1 : ?sep:unit t -> 'a t -> 'a list t +(** Same as {!many}, but needs at least one element *) + +val skip : _ t -> unit t +(** Skip 0 or more instances of the given parser *) + +val skip1 : _ t -> unit t + +val opt : 'a t -> 'a option t +(** [opt x] tries to parse [x], and returns [None] otherwise *) + +val filter : ('a -> bool) -> 'a t -> 'a t +(** [filter f p] parses the same as [p], but fails if the returned value + does not satisfy [f] *) + + +(* TODO: complement operator any_but (all but \, for instance) *) +(* TODO: a "if-then-else" combinator (assuming the test has a + set of possible first chars) *) + +val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t +(** [switch_c l] matches the next char and uses the corresponding parser. + Fails if the next char is not in the list, unless default is defined. + @param default parser to use if no char matches + @raise Invalid_argument if some char occurs several times in [l] *) + +val switch_s : (string * 'a t) list -> 'a t +(** [switch_s l] attempts to match matches any of the strings in [l]. + If one of those strings matches, the corresponding parser + is used from now on. + @raise Invalid_argument if some string is a prefix of another string, + or is empty, or if the list is empty *) + +val choice : 'a t list -> 'a t +(** [choice l] chooses between the parsers, unambiguously + @raise Invalid_argument if the list is empty, or if some parsers + overlap, making the choice ambiguous *) + +val fix : ('a t -> 'a t) -> 'a t +(** [fix f] makes a fixpoint *) + +module Infix : sig + val (>|=) : 'a t -> ('a -> 'b) -> 'b t + (** Infix version of {!map} *) + + val (<*>) : ('a -> 'b) t -> 'a t -> 'b t + (** Synonym to {!app} *) + + val (>>) : _ t -> 'a t -> 'a t + (** [a >> b] parses [a], ignores its result, then parses [b] *) + + val (<<) : 'a t -> _ t -> 'a t + (** [a << b] parses [a], then [b], and discards [b] to return [a] *) + + val (<+>) : 'a t -> 'a t -> 'a t + (** [a <+> b] is [choice [a;b]], a binary choice *) + + val (<::>) : 'a t -> 'a list t -> 'a list t + (** [a <::> b] is [app (fun x l -> x::l) a b] *) +end + +include module type of Infix + +(** {2 Signatures} *) + +(** {6 Parsing} *) + +type error = { + line: int; + col: int; + msg: string; +} + +val string_of_error : error -> string + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Parse} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +module Make(I : INPUT) : S with type source = I.t + +(** {2 Low-level interface} *) + +val print : Format.formatter -> _ t -> unit +(** Print a parser structure, for debug purpose *) + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source (* underlying source *) + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module MakeFromReader(R : READER) : S with type source = R.source + +(** {2 Defaults} *) + +module Str : S with type source = string + +module Chan : S with type source = in_channel diff --git a/src/string/containers_string.mlpack b/src/string/containers_string.mlpack index 6daaf586..9bb5e104 100644 --- a/src/string/containers_string.mlpack +++ b/src/string/containers_string.mlpack @@ -1,5 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: eed887f169b0c8e02f98f97c676f846c) +# DO NOT EDIT (digest: 200ff8feb7cb7b8d5e2aea5b7c63241a) KMP Levenshtein +App_parse # OASIS_STOP diff --git a/src/unix/.merlin b/src/unix/.merlin new file mode 100644 index 00000000..9ed5b46a --- /dev/null +++ b/src/unix/.merlin @@ -0,0 +1,2 @@ +PKG unix +REC diff --git a/src/unix/CCUnix.ml b/src/unix/CCUnix.ml new file mode 100644 index 00000000..ec739f0f --- /dev/null +++ b/src/unix/CCUnix.ml @@ -0,0 +1,115 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 High-level Functions on top of Unix} *) + +type 'a or_error = [`Ok of 'a | `Error of string] +type 'a gen = unit -> 'a option + +(** {2 Calling Commands} *) + +let int_of_process_status = function + | Unix.WEXITED i + | Unix.WSIGNALED i + | Unix.WSTOPPED i -> i + +let str_exists s p = + let rec f s p i = + if i = String.length s then false + else p s.[i] || f s p (i+1) + in + f s p 0 + +let rec iter_gen f g = match g() with + | None -> () + | Some x -> f x; iter_gen f g + +(* print a string, but escaped if required *) +let escape_str buf s = + if str_exists s + (function ' ' | '"' | '\'' | '\n' | '\t'-> true | _ -> false) + then ( + Buffer.add_char buf '\''; + String.iter + (function + | '\'' -> Buffer.add_string buf "''" + | c -> Buffer.add_char buf c + ) s; + Buffer.add_char buf '\''; + ) else Buffer.add_string buf s + +let read_all ?(size=1024) ic = + let buf = ref (Bytes.create size) in + let len = ref 0 in + try + while true do + (* resize *) + if !len = Bytes.length !buf then ( + buf := Bytes.extend !buf 0 !len; + ); + assert (Bytes.length !buf > !len); + let n = input ic !buf !len (Bytes.length !buf - !len) in + len := !len + n; + if n = 0 then raise Exit; (* exhausted *) + done; + assert false (* never reached*) + with Exit -> + Bytes.sub_string !buf 0 !len + +type call_result = + < stdout:string; + stderr:string; + status:Unix.process_status; + errcode:int; (** extracted from status *) + > + +let kbprintf' buf fmt k = Printf.kbprintf k buf fmt + +let call ?(bufsize=2048) ?(stdin=`Str "") ?(env=[||]) cmd = + (* render the command *) + let buf = Buffer.create 256 in + kbprintf' buf cmd + (fun buf -> + let cmd = Buffer.contents buf in + let oc, ic, errc = Unix.open_process_full cmd env in + (* send stdin *) + begin match stdin with + | `Str s -> output_string ic s + | `Gen g -> iter_gen (output_string ic) g + end; + close_out ic; + (* read out and err *) + let out = read_all ~size:bufsize oc in + let err = read_all ~size:bufsize errc in + let status = Unix.close_process_full (oc, ic, errc) in + object + method stdout = out + method stderr = err + method status = status + method errcode = int_of_process_status status + end + ) + diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli new file mode 100644 index 00000000..e1e75ba7 --- /dev/null +++ b/src/unix/CCUnix.mli @@ -0,0 +1,77 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 High-level Functions on top of Unix} + +Some useful functions built on top of Unix. + +{b status: unstable} +@since 0.10 *) + +type 'a or_error = [`Ok of 'a | `Error of string] +type 'a gen = unit -> 'a option + +(** {2 Calling Commands} *) + +val escape_str : Buffer.t -> string -> unit +(** Escape a string so it can be a shell argument. +*) + +(*$T + CCPrint.sprintf "%a" escape_str "foo" = "foo" + CCPrint.sprintf "%a" escape_str "foo bar" = "'foo bar'" + CCPrint.sprintf "%a" escape_str "fo'o b'ar" = "'fo''o b''ar'" +*) + +type call_result = + < stdout:string; + stderr:string; + status:Unix.process_status; + errcode:int; (** extracted from status *) + > + +val call : ?bufsize:int -> + ?stdin:[`Gen of string gen | `Str of string] -> + ?env:string array -> + ('a, Buffer.t, unit, call_result) format4 -> + 'a +(** [call cmd] wraps the result of [Unix.open_process_full cmd] into an + object. It reads the full stdout and stderr of the subprocess before + returning. + @param stdin if provided, the generator or string is consumed and fed to + the subprocess input channel, which is then closed. + @param bufsize buffer size used to read stdout and stderr + @param env environment to run the command in +*) + +(*$T + (call ~stdin:(`Str "abc") "cat")#stdout = "abc" + (call "echo %a" escape_str "a'b'c")#stdout = "abc\n" + (call "echo %s" "a'b'c")#stdout = "abc\n" +*) + + + diff --git a/src/unix/containers_unix.mldylib b/src/unix/containers_unix.mldylib new file mode 100644 index 00000000..95342249 --- /dev/null +++ b/src/unix/containers_unix.mldylib @@ -0,0 +1,4 @@ +# OASIS_START +# DO NOT EDIT (digest: cc54fa6ddd5d32bdf577cb187f4cf07c) +CCUnix +# OASIS_STOP diff --git a/src/unix/containers_unix.mllib b/src/unix/containers_unix.mllib new file mode 100644 index 00000000..95342249 --- /dev/null +++ b/src/unix/containers_unix.mllib @@ -0,0 +1,4 @@ +# OASIS_START +# DO NOT EDIT (digest: cc54fa6ddd5d32bdf577cb187f4cf07c) +CCUnix +# OASIS_STOP diff --git a/tests/test_puf.ml b/tests/test_puf.ml index d5af04d3..c309f09c 100644 --- a/tests/test_puf.ml +++ b/tests/test_puf.ml @@ -1,6 +1,7 @@ (** Tests for persistent union find *) open OUnit +open Containers_misc module P = Puf.Make(struct type t = int let get_id i = i end)