diff --git a/AUTHORS.adoc b/AUTHORS.adoc index c4772003..b1ee4699 100644 --- a/AUTHORS.adoc +++ b/AUTHORS.adoc @@ -12,3 +12,4 @@ - Emmanuel Surleau (emm) - Guillaume Bury (guigui) - JP Rodi +- octachron (Florian Angeletti) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 43e557f2..e2ec92c3 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -1,5 +1,28 @@ = Changelog +== 0.15 + +=== breaking changes + +- remove deprecated `CCFloat.sign` +- remove deprecated `CCSexpStream` + +=== other changes + +- basic color handling in `CCFormat`, using tags and ANSI codes +- add `CCVector.ro_vector` as a convenience alias +- add `CCOrd.option` +- add `CCMap.{keys,values}` +- add wip `CCAllocCache`, an allocation cache for short-lived arrays +- add `CCError.{join,both}` applicative functions for CCError +- opam: depend on ocamlbuild +- work on `CCRandom` by octachron: + * add an uniformity test + * Make `split_list` uniform + * Add sample_without_replacement + +- bugfix: forgot to export `{Set.Map}.OrderedType` in `Containers` + == 0.14 === breaking changes @@ -13,7 +36,7 @@ - deprecate `CCVector.rev'`, renamed into `CCVector.rev_in_place` - deprecate `CCVector.flat_map'`, renamed `flat_map_seq` -- add `CCMap.add_{list,seq}` +- add `CCMap.add_{list,seqe` - add `CCSet.add_{list,seq}` - fix small uglyness in `Map.print` and `Set.print` - add `CCFormat.{ksprintf,string_quoted}` diff --git a/Makefile b/Makefile index 66d1cb5e..35270e8a 100644 --- a/Makefile +++ b/Makefile @@ -124,7 +124,7 @@ devel: make all watch: - while find src/ -print0 | xargs -0 inotifywait -e delete_self -e modify ; do \ + while find src/ benchs/ -print0 | xargs -0 inotifywait -e delete_self -e modify ; do \ echo "============ at `date` ==========" ; \ make ; \ done diff --git a/_oasis b/_oasis index 0aa8b651..0fd3144b 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.14 +Version: 0.15 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause @@ -66,7 +66,7 @@ Library "containers_unix" Library "containers_sexp" Path: src/sexp - Modules: CCSexp, CCSexpStream, CCSexpM + Modules: CCSexp, CCSexpM BuildDepends: bytes FindlibParent: containers FindlibName: sexp @@ -77,7 +77,7 @@ Library "containers_data" CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray, CCMixset, CCHashconsedSet, CCGraph, CCHashSet, CCBitField, - CCHashTrie, CCBloom, CCWBTree, CCRAL + CCHashTrie, CCBloom, CCWBTree, CCRAL, CCAllocCache BuildDepends: bytes # BuildDepends: bytes, bisect_ppx FindlibParent: containers @@ -182,13 +182,6 @@ Test all TestTools: run_qtest Run$: flag(tests) && flag(unix) && flag(advanced) && flag(bigarray) -Executable id_sexp - Path: examples/ - Install: false - CompiledObject: best - MainIs: id_sexp.ml - BuildDepends: containers.sexp - Executable mem_measure Path: benchs/ Install: false @@ -197,11 +190,11 @@ Executable mem_measure Build$: flag(bench) BuildDepends: sequence, unix, containers, containers.data, hamt -Executable id_sexp2 +Executable id_sexp Path: examples/ Install: false CompiledObject: best - MainIs: id_sexp2.ml + MainIs: id_sexp.ml BuildDepends: containers.sexp SourceRepository head diff --git a/_tags b/_tags index 805fc84c..5f9aea26 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 0e7b7eeffb179d552ac9c060b7ab3be9) +# DO NOT EDIT (digest: 1dc452faf114e2c3c507c622ca14c960) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -123,9 +123,6 @@ true: annot, bin_annot : use_containers_string : use_containers_thread : use_containers_unix -# Executable id_sexp -: package(bytes) -: use_containers_sexp # Executable mem_measure "benchs/mem_measure.native": package(bytes) "benchs/mem_measure.native": package(hamt) @@ -139,9 +136,9 @@ true: annot, bin_annot : package(unix) : use_containers : use_containers_data -# Executable id_sexp2 -: package(bytes) -: use_containers_sexp +# Executable id_sexp +: package(bytes) +: use_containers_sexp : package(bytes) : use_containers_sexp # OASIS_STOP @@ -150,4 +147,4 @@ true: annot, bin_annot : inline(25) or or : inline(15) and not : warn_A, warn(-4), warn(-44) -true: no_alias_deps, safe_string +true: no_alias_deps, safe_string, short_paths diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 17bcc401..555ca079 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -582,8 +582,9 @@ module Tbl = struct ; "find_string" @>> app_ints bench_find_string [10; 20; 100; 1_000; 10_000] ]); B.Tree.register ("tbl_persistent" @>>> - let l_int = [persistent_hashtbl Int; persistent_hashtbl_ref Int] in - let l_str = [persistent_hashtbl Str; persistent_hashtbl_ref Str] in + (* we also compare to the regular Hashtbl, as a frame of reference *) + let l_int = [persistent_hashtbl Int; persistent_hashtbl_ref Int; hashtbl_make Int ] in + let l_str = [persistent_hashtbl Str; persistent_hashtbl_ref Str; hashtbl_make Str ] in [ "add_int" @>> app_ints (bench_add_to l_int) [10; 100; 1_000; 10_000;] ; "find_int" @>> app_ints (bench_find_to (List.map find_of_mut l_int)) @@ -1032,6 +1033,147 @@ module Thread = struct ) end +module Graph = struct + + (* divisors graph *) + let div_children_ i = + (* divisors of [i] that are [>= j] *) + let rec aux j i yield = + if j < i + then ( + if (i mod j = 0) then yield (i,j); + aux (j+1) i yield + ) + in + aux 1 i + + let div_graph_ = {CCGraph. + origin=fst; + dest=snd; + children=div_children_ + } + + module H = Hashtbl.Make(CCInt) + + let dfs_raw n () = + let explored = H.create (n+10) in + let st = Stack.create() in + let res = ref 0 in + Stack.push n st; + while not (Stack.is_empty st) do + let i = Stack.pop st in + if not (H.mem explored i) then ( + H.add explored i (); + incr res; + div_children_ i (fun (_,j) -> Stack.push j st); + ) + done; + !res + + let dfs_ n () = + let tbl = CCGraph.mk_table ~eq:CCInt.equal ~hash:CCInt.hash (n+10) in + CCGraph.Traverse.dfs ~tbl ~graph:div_graph_ + (Sequence.return n) + |> Sequence.fold (fun acc _ -> acc+1) 0 + + let dfs_event n () = + let tbl = CCGraph.mk_table ~eq:CCInt.equal ~hash:CCInt.hash (n+10) in + CCGraph.Traverse.Event.dfs ~tbl ~graph:div_graph_ + (Sequence.return n) + |> Sequence.fold + (fun acc -> function + | `Enter _ -> acc+1 + | `Exit _ + | `Edge _ -> acc) + 0 + + let bench_dfs n = + assert ( + let n1 = dfs_raw n () in + let n2 = dfs_ n () in + let n3 = dfs_event n () in + n1 = n2 && + n2 = n3); + B.throughputN 2 ~repeat + [ "raw", dfs_raw n, () + ; "ccgraph", dfs_ n, () + ; "ccgraph_event", dfs_event n, () + ] + + let () = + B.Tree.register ("graph" @>>> + [ "dfs" @>> + app_ints bench_dfs [100; 1000; 10_000; 50_000; 100_000; 500_000] + ] + ) +end + +module Alloc = struct + module type ALLOC_ARR = sig + type 'a t + val name : string + val create : int -> 'a t + val make : 'a t -> int -> 'a -> 'a array + val free : 'a t -> 'a array -> unit + end + + let dummy = + let module A = struct + type _ t = unit + let name = "dummy" + let create _ = () + let make _ i x = Array.make i x + let free _ _ = () + end in + (module A : ALLOC_ARR) + + let alloc_cache ~buck_size = + let module A = struct + type 'a t = 'a CCAllocCache.Arr.t + let name = Printf.sprintf "alloc_cache(%d)" buck_size + let create n = CCAllocCache.Arr.create ~buck_size n + let make = CCAllocCache.Arr.make + let free = CCAllocCache.Arr.free + end in + (module A : ALLOC_ARR) + + (* repeat [n] times: + - repeat [batch] times: + - allocate [batch] arrays of size from 1 to batch+1 + - free those arrays + *) + let bench1 ~batch n = + let make (module C : ALLOC_ARR) () = + let c = C.create (batch*2) in + let tmp = Array.make (batch * batch) [||] in (* temporary storage *) + for _ = 1 to n do + for j = 0 to batch-1 do + for k = 0 to batch-1 do + tmp.(j*batch + k) <- C.make c (k+1) '_'; + done; + done; + Array.iter (C.free c) tmp (* free the whole array *) + done + in + B.throughputN 3 ~repeat + [ "dummy", make dummy, () + ; "cache(5)", make (alloc_cache ~buck_size:5), () + ; "cache(20)", make (alloc_cache ~buck_size:20), () + ; "cache(50)", make (alloc_cache ~buck_size:50), () + ] + + let () = B.Tree.register ( + "alloc" @>>> + [ "bench1(batch=5)" @>> + app_ints (bench1 ~batch:5) [100; 1_000] + ; "bench1(batch=15)" @>> + app_ints (bench1 ~batch:15) [100; 1_000] + ; "bench1(batch=50)" @>> + app_ints (bench1 ~batch:50) [100; 1_000] + ] + ) +end + let () = try B.Tree.run_global () with Arg.Help msg -> print_endline msg diff --git a/containers.odocl b/containers.odocl index 45828a4a..8cc627ce 100644 --- a/containers.odocl +++ b/containers.odocl @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a679876a4dd37916033589f8650bb4b2) +# DO NOT EDIT (digest: e5c366e1cd8e09a92eff04bbdc3ad4f9) src/core/CCVector src/core/CCPrint src/core/CCError @@ -50,6 +50,7 @@ src/data/CCHashTrie src/data/CCBloom src/data/CCWBTree src/data/CCRAL +src/data/CCAllocCache src/string/Containers_string src/string/CCKMP src/string/CCLevenshtein @@ -69,6 +70,5 @@ src/advanced/CCMonadIO src/io/Containers_io_is_deprecated src/unix/CCUnix src/sexp/CCSexp -src/sexp/CCSexpStream src/sexp/CCSexpM # OASIS_STOP diff --git a/doc/intro.txt b/doc/intro.txt index 8032b938..338a2596 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -65,6 +65,7 @@ such as: Various data structures. {!modules: +CCAllocCache CCBitField CCBloom CCBV @@ -73,7 +74,6 @@ CCFQueue CCFlatHashtbl CCHashSet CCHashTrie -CCImmutArray CCIntMap CCMixmap CCMixset @@ -105,7 +105,6 @@ the main type ([CCSexp.t]) isn't. {!modules: CCSexp -CCSexpStream CCSexpM } diff --git a/examples/id_sexp.ml b/examples/id_sexp.ml index 1adf3080..90e63c27 100644 --- a/examples/id_sexp.ml +++ b/examples/id_sexp.ml @@ -3,11 +3,11 @@ let () = if Array.length Sys.argv <> 2 then failwith "usage: id_sexp file"; let f = Sys.argv.(1) in - let s = CCSexpStream.L.of_file f in + let s = CCSexpM.parse_file_list f in match s with | `Ok l -> List.iter - (fun s -> Format.printf "@[%a@]@." CCSexpStream.print s) + (fun s -> Format.printf "@[%a@]@." CCSexpM.print s) l | `Error msg -> Format.printf "error: %s@." msg diff --git a/examples/id_sexp2.ml b/examples/id_sexp2.ml deleted file mode 100644 index 90e63c27..00000000 --- a/examples/id_sexp2.ml +++ /dev/null @@ -1,13 +0,0 @@ - - -let () = - if Array.length Sys.argv <> 2 then failwith "usage: id_sexp file"; - let f = Sys.argv.(1) in - let s = CCSexpM.parse_file_list f in - match s with - | `Ok l -> - List.iter - (fun s -> Format.printf "@[%a@]@." CCSexpM.print s) - l - | `Error msg -> - Format.printf "error: %s@." msg diff --git a/myocamlbuild.ml b/myocamlbuild.ml index 926fb2fd..f6541c4b 100644 --- a/myocamlbuild.ml +++ b/myocamlbuild.ml @@ -671,7 +671,6 @@ let dispatch_default = MyOCamlbuildBase.dispatch_default conf package_default;; # 673 "myocamlbuild.ml" (* OASIS_STOP *) - let doc_intro = "doc/intro.txt" ;; Ocamlbuild_plugin.dispatch dispatch_default;; diff --git a/opam b/opam index 1d961671..a3b0dfe8 100644 --- a/opam +++ b/opam @@ -28,6 +28,8 @@ depends: [ "ocamlfind" {build} "base-bytes" "cppo" {build} + "oasis" {build} + "ocamlbuild" {build} ] depopts: [ "sequence" "base-bigarray" "base-unix" "base-threads" ] tags: [ "stdlib" "containers" "iterators" "list" "heap" "queue" ] diff --git a/setup.ml b/setup.ml index cea46de1..7999dda7 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.4.4 *) (* OASIS_START *) -(* DO NOT EDIT (digest: dd2796010195c6abda33b5bf5ecc73ea) *) +(* DO NOT EDIT (digest: 520720667caa5285972393b25de31806) *) (* Regenerated by OASIS v0.4.5 Visit http://oasis.forge.ocamlcore.org for more information and @@ -6875,7 +6875,7 @@ let setup_t = alpha_features = ["ocamlbuild_more_args"]; beta_features = []; name = "containers"; - version = "0.14"; + version = "0.15"; license = OASISLicense.DEP5License (OASISLicense.DEP5Unit @@ -7134,7 +7134,7 @@ let setup_t = bs_nativeopt = [(OASISExpr.EBool true, [])] }, { - lib_modules = ["CCSexp"; "CCSexpStream"; "CCSexpM"]; + lib_modules = ["CCSexp"; "CCSexpM"]; lib_pack = false; lib_internal_modules = []; lib_findlib_parent = Some "containers"; @@ -7188,7 +7188,8 @@ let setup_t = "CCHashTrie"; "CCBloom"; "CCWBTree"; - "CCRAL" + "CCRAL"; + "CCAllocCache" ]; lib_pack = false; lib_internal_modules = []; @@ -7622,29 +7623,6 @@ let setup_t = InternalExecutable "run_qtest" ] }); - Executable - ({ - cs_name = "id_sexp"; - cs_data = PropList.Data.create (); - cs_plugin_data = [] - }, - { - bs_build = [(OASISExpr.EBool true, true)]; - bs_install = [(OASISExpr.EBool true, false)]; - bs_path = "examples/"; - bs_compiled_object = Best; - bs_build_depends = [InternalLibrary "containers_sexp"]; - bs_build_tools = [ExternalTool "ocamlbuild"]; - bs_c_sources = []; - bs_data_files = []; - bs_ccopt = [(OASISExpr.EBool true, [])]; - bs_cclib = [(OASISExpr.EBool true, [])]; - bs_dlllib = [(OASISExpr.EBool true, [])]; - bs_dllpath = [(OASISExpr.EBool true, [])]; - bs_byteopt = [(OASISExpr.EBool true, [])]; - bs_nativeopt = [(OASISExpr.EBool true, [])] - }, - {exec_custom = false; exec_main_is = "id_sexp.ml"}); Executable ({ cs_name = "mem_measure"; @@ -7681,7 +7659,7 @@ let setup_t = {exec_custom = false; exec_main_is = "mem_measure.ml"}); Executable ({ - cs_name = "id_sexp2"; + cs_name = "id_sexp"; cs_data = PropList.Data.create (); cs_plugin_data = [] }, @@ -7701,7 +7679,7 @@ let setup_t = bs_byteopt = [(OASISExpr.EBool true, [])]; bs_nativeopt = [(OASISExpr.EBool true, [])] }, - {exec_custom = false; exec_main_is = "id_sexp2.ml"}); + {exec_custom = false; exec_main_is = "id_sexp.ml"}); SrcRepo ({ cs_name = "head"; @@ -7729,7 +7707,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.5"; - oasis_digest = Some "\016\224&\n\229K}\248\171\001\211\206\025\164lj"; + oasis_digest = Some "\183\156\139\200Ys\193\023\212>%\209\180\133\193p"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -7737,6 +7715,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 7741 "setup.ml" +# 7719 "setup.ml" (* OASIS_STOP *) let () = setup ();; diff --git a/src/core/CCError.ml b/src/core/CCError.ml index a587ccef..3bc727ee 100644 --- a/src/core/CCError.ml +++ b/src/core/CCError.ml @@ -162,6 +162,17 @@ let (<*>) f x = match f with | `Error s -> fail s | `Ok f -> map f x +let join t = match t with + | `Ok (`Ok o) -> `Ok o + | `Ok (`Error e) -> `Error e + | (`Error _) as e -> e + +let both x y = + match x,y with + | `Ok o, `Ok o' -> `Ok (o, o') + | `Ok _, `Error e -> `Error e + | `Error e, _ -> `Error e + (** {2 Collections} *) let map_l f l = diff --git a/src/core/CCError.mli b/src/core/CCError.mli index 30d9810a..f7e5fa34 100644 --- a/src/core/CCError.mli +++ b/src/core/CCError.mli @@ -141,7 +141,18 @@ val pure : 'a -> ('a, 'err) t val (<*>) : ('a -> 'b, 'err) t -> ('a, 'err) t -> ('b, 'err) t (** [a <*> b] evaluates [a] and [b], and, in case of success, returns [`Ok (a b)]. Otherwise, it fails, and the error of [a] is chosen - over the error of [b] if both fail *) + over the error of [b] if both fail. *) + +val join : (('a, 'err) t, 'err) t -> ('a, 'err) t +(** [join t], in case of success, returns [`Ok o] from [`Ok (`Ok o)]. Otherwise, + it fails with [`Error e] where [e] is the unwrapped error of [t]. + @since 0.15 *) + +val both : ('a, 'err) t -> ('b, 'err) t -> (('a * 'b), 'err) t +(** [both a b], in case of success, returns [`Ok (o, o')] with the ok values + of [a] and [b]. Otherwise, it fails, and the error of [a] is chosen over the + error of [b] if both fail. + @since 0.15 *) (** {2 Infix} diff --git a/src/core/CCFloat.ml b/src/core/CCFloat.ml index b73b311b..75336d7f 100644 --- a/src/core/CCFloat.ml +++ b/src/core/CCFloat.ml @@ -71,11 +71,6 @@ type 'a random_gen = Random.State.t -> 'a let pp buf = Printf.bprintf buf "%f" let print fmt = Format.pp_print_float fmt -let sign (a:float) = - if a < 0.0 then -1 - else if a > 0.0 then 1 - else 0 - let fsign a = if is_nan a then nan else if a = 0. then a diff --git a/src/core/CCFloat.mli b/src/core/CCFloat.mli index 7485206d..1cc33188 100644 --- a/src/core/CCFloat.mli +++ b/src/core/CCFloat.mli @@ -76,11 +76,6 @@ val random : t -> t random_gen val random_small : t random_gen val random_range : t -> t -> t random_gen -val sign : t -> int -(** [sign t] is one of [-1, 0, 1], depending on how the float - compares to [0.] - @deprecated since 0.7 use {! fsign} or {!sign_exn} since it's more accurate *) - val fsign : t -> float (** [fsign x] is one of [-1., -0., +0., +1.], or [nan] if [x] is NaN. @since 0.7 *) diff --git a/src/core/CCFormat.ml b/src/core/CCFormat.ml index bdb425d6..51ec4613 100644 --- a/src/core/CCFormat.ml +++ b/src/core/CCFormat.ml @@ -122,24 +122,8 @@ let to_string pp x = Format.pp_print_flush fmt (); Buffer.contents buf -let sprintf format = - let buf = Buffer.create 64 in - let fmt = Format.formatter_of_buffer buf in - Format.kfprintf - (fun _fmt -> Format.pp_print_flush fmt (); Buffer.contents buf) - fmt - format - let fprintf = Format.fprintf - -let ksprintf ~f fmt = - let buf = Buffer.create 32 in - let out = Format.formatter_of_buffer buf in - Format.kfprintf - (fun _ -> Format.pp_print_flush out (); f (Buffer.contents buf)) - out fmt - let stdout = Format.std_formatter let stderr = Format.err_formatter @@ -159,3 +143,136 @@ let _with_file_out filename f = let to_file filename format = _with_file_out filename (fun fmt -> Format.fprintf fmt format) + +type color = + [ `Black + | `Red + | `Yellow + | `Green + | `Blue + | `Magenta + | `Cyan + | `White + ] + +let int_of_color_ = function + | `Black -> 0 + | `Red -> 1 + | `Green -> 2 + | `Yellow -> 3 + | `Blue -> 4 + | `Magenta -> 5 + | `Cyan -> 6 + | `White -> 7 + +type style = + [ `FG of color (* foreground *) + | `BG of color (* background *) + | `Bold + | `Reset + ] + +let code_of_style : style -> int = function + | `FG c -> 30 + int_of_color_ c + | `BG c -> 40 + int_of_color_ c + | `Bold -> 1 + | `Reset -> 0 + +let ansi_l_to_str_ = function + | [] -> "\x1b[0m" + | [a] -> Format.sprintf "\x1b[%dm" (code_of_style a) + | [a;b] -> Format.sprintf "\x1b[%d;%dm" (code_of_style a) (code_of_style b) + | l -> + let pp_num out c = int out (code_of_style c) in + to_string (list ~start:"\x1b[" ~stop:"m" ~sep:";" pp_num) l + +(* parse a tag *) +let style_of_tag_ s = match String.trim s with + | "reset" -> [`Reset] + | "black" -> [`FG `Black] + | "red" -> [`FG `Red] + | "green" -> [`FG `Green] + | "yellow" -> [`FG `Yellow] + | "blue" -> [`FG `Blue] + | "magenta" -> [`FG `Magenta] + | "cyan" -> [`FG `Cyan] + | "white" -> [`FG `White] + | "Black" -> [`FG `Black] + | "Red" -> [`FG `Red; `Bold] + | "Green" -> [`FG `Green; `Bold] + | "Yellow" -> [`FG `Yellow; `Bold] + | "Blue" -> [`FG `Blue; `Bold] + | "Magenta" -> [`FG `Magenta; `Bold] + | "Cyan" -> [`FG `Cyan; `Bold] + | "White" -> [`FG `White; `Bold] + | s -> failwith ("unknown style: " ^ s) + +let color_enabled = ref false + +(* either prints the tag of [s] or delegate to [or_else] *) +let mark_open_tag ~or_else s = + try + let style = style_of_tag_ s in + if !color_enabled then ansi_l_to_str_ style else "" + with Not_found -> or_else s + +let mark_close_tag ~or_else s = + try + let _ = style_of_tag_ s in (* check if it's indeed about color *) + if !color_enabled then ansi_l_to_str_ [`Reset] else "" + with Not_found -> or_else s + +(* add color handling to formatter [ppf] *) +let set_color_tag_handling ppf = + let open Format in + let functions = pp_get_formatter_tag_functions ppf () in + let functions' = {functions with + mark_open_tag=(mark_open_tag ~or_else:functions.mark_open_tag); + mark_close_tag=(mark_close_tag ~or_else:functions.mark_close_tag); + } in + pp_set_mark_tags ppf true; (* enable tags *) + pp_set_formatter_tag_functions ppf functions' + +let set_color_default = + let first = ref true in + fun b -> + if b && not !color_enabled then ( + color_enabled := true; + if !first then ( + first := false; + set_color_tag_handling stdout; + set_color_tag_handling stderr; + ); + ) else if not b && !color_enabled then color_enabled := false + +(*$R + set_color_default true; + let s = sprintf + "what is your @{favorite color@}? @{blue@}! No, @{red@}! Ahhhhhhh@." + in + assert_equal ~printer:CCFun.id + "what is your \027[37;1mfavorite color\027[0m? \027[34mblue\027[0m! No, \027[31mred\027[0m! Ahhhhhhh\n" + s +*) + +let sprintf format = + let buf = Buffer.create 64 in + let fmt = Format.formatter_of_buffer buf in + if !color_enabled then set_color_tag_handling fmt; + Format.kfprintf + (fun _fmt -> Format.pp_print_flush fmt (); Buffer.contents buf) + fmt + format + +(*$T + sprintf "yolo %s %d" "a b" 42 = "yolo a b 42" + sprintf "%d " 0 = "0 " +*) + +let ksprintf ~f fmt = + let buf = Buffer.create 32 in + let out = Format.formatter_of_buffer buf in + if !color_enabled then set_color_tag_handling out; + Format.kfprintf + (fun _ -> Format.pp_print_flush out (); f (Buffer.contents buf)) + out fmt diff --git a/src/core/CCFormat.mli b/src/core/CCFormat.mli index bb7279d6..8ab2e98f 100644 --- a/src/core/CCFormat.mli +++ b/src/core/CCFormat.mli @@ -66,6 +66,55 @@ val quad : 'a printer -> 'b printer -> 'c printer -> 'd printer -> ('a * 'b * 'c val map : ('a -> 'b) -> 'b printer -> 'a printer +(** {2 ASCII codes} + + Use ANSI escape codes https://en.wikipedia.org/wiki/ANSI_escape_code + to put some colors on the terminal. + + This uses {b tags} in format strings to specify the style. Current styles + are the following: + + {ul + {- "reset" resets style} + {- "black" } + {- "red" } + {- "green" } + {- "yellow" } + {- "blue" } + {- "magenta" } + {- "cyan" } + {- "white" } + {- "Black" bold black} + {- "Red" bold red } + {- "Green" bold green } + {- "Yellow" bold yellow } + {- "Blue" bold blue } + {- "Magenta" bold magenta } + {- "Cyan" bold cyan } + {- "White" bold white } + } + + Example: + + {[ + set_color_default true;; + + Format.printf + "what is your @{favorite color@}? @{blue@}! No, @{red@}! Ahhhhhhh@.";; + ]} + + {b status: experimental} + @since 0.15 *) + +val set_color_tag_handling : t -> unit +(** adds functions to support color tags to the given formatter. + @since 0.15 *) + +val set_color_default : bool -> unit +(** [set_color_default b] enables color handling on the standard formatters + (stdout, stderr) if [b = true] as well as on {!sprintf} formatters; + it disables the color handling if [b = false]. *) + (** {2 IO} *) val output : t -> 'a printer -> 'a -> unit diff --git a/src/core/CCMap.ml b/src/core/CCMap.ml index 0fdc6e9e..2dc4a5df 100644 --- a/src/core/CCMap.ml +++ b/src/core/CCMap.ml @@ -53,6 +53,14 @@ module type S = sig val add_list : 'a t -> (key * 'a) list -> 'a t (** @since 0.14 *) + val keys : _ t -> key sequence + (** Iterate on keys only + @since 0.15 *) + + val values : 'a t -> 'a sequence + (** Iterate on values only + @since 0.15 *) + val to_list : 'a t -> (key * 'a) list val pp : ?start:string -> ?stop:string -> ?arrow:string -> ?sep:string -> @@ -88,6 +96,12 @@ module Make(O : Map.OrderedType) = struct let to_seq m yield = iter (fun k v -> yield (k,v)) m + let keys m yield = + iter (fun k _ -> yield k) m + + let values m yield = + iter (fun _ v -> yield v) m + let add_list m l = List.fold_left (fun m (k,v) -> add k v m) m l let of_list l = add_list empty l diff --git a/src/core/CCMap.mli b/src/core/CCMap.mli index 51ec28fc..524e56d2 100644 --- a/src/core/CCMap.mli +++ b/src/core/CCMap.mli @@ -56,6 +56,14 @@ module type S = sig val add_list : 'a t -> (key * 'a) list -> 'a t (** @since 0.14 *) + val keys : _ t -> key sequence + (** Iterate on keys only + @since 0.15 *) + + val values : 'a t -> 'a sequence + (** Iterate on values only + @since 0.15 *) + val to_list : 'a t -> (key * 'a) list val pp : ?start:string -> ?stop:string -> ?arrow:string -> ?sep:string -> diff --git a/src/core/CCOrd.ml b/src/core/CCOrd.ml index 4f2ace2a..f1c974b3 100644 --- a/src/core/CCOrd.ml +++ b/src/core/CCOrd.ml @@ -59,6 +59,16 @@ let () c (ord,x,y) = then ord x y else c +let option c o1 o2 = match o1, o2 with + | None, None -> 0 + | None, Some _ -> -1 + | Some _, None -> 1 + | Some x1, Some x2 -> c x1 x2 + +(*$Q + Q.(option int) (fun o -> option int_ None o <= 0) + *) + let pair o_x o_y (x1,y1) (x2,y2) = let c = o_x x1 x2 in if c = 0 diff --git a/src/core/CCOrd.mli b/src/core/CCOrd.mli index 52dae3b7..9c9ed76a 100644 --- a/src/core/CCOrd.mli +++ b/src/core/CCOrd.mli @@ -55,6 +55,10 @@ val () : int -> ('a t * 'a * 'a) -> int (CCBool.compare, true, false)]} *) +val option : 'a t -> 'a option t +(** Comparison of optional values. [None] is smaller than any [Some _]. + @since 0.15 *) + val pair : 'a t -> 'b t -> ('a * 'b) t val triple : 'a t -> 'b t -> 'c t -> ('a * 'b * 'c) t diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index cc387065..3d762620 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -78,35 +78,49 @@ let replicate n g st = if n = 0 then acc else aux (g st :: acc) (n-1) in aux [] n +(* Sample without replacement using rejection sampling. *) +let sample_without_replacement (type elt) ?(compare=compare) k (rng:elt t) st= + let module S = Set.Make(struct type t=elt let compare = compare end) in + let rec aux s k = + if k <= 0 then + S.elements s + else + let x = rng st in + if S.mem x s then + aux s k + else + aux (S.add x s) (k-1) in + aux S.empty k + let list_seq l st = List.map (fun f -> f st) l -exception SplitFail - -let _split i st = - if i < 2 then raise SplitFail +let split i st = + if i < 2 then None else let j = 1 + Random.State.int st (i-1) in - (j, i-j) + Some (j, i-j) -let split i st = try Some (_split i st) with SplitFail -> None - -(* Partition of an int into [len] integers. We divide-and-conquer on - the expected length, until it reaches 1. *) -let split_list i ~len st = - let rec aux i ~len acc = - if i < len then raise SplitFail - else if len = 1 then i::acc - else - (* split somewhere in the middle *) - let len1, len2 = _split len st in - assert (len = len1+len2); - if i = len - then aux len1 ~len:len1 (aux len2 ~len:len2 acc) - else - let i1, i2 = _split (i-len) st in - aux (i1+len1) ~len:len1 (aux (i2+len2) ~len:len2 acc) +let _diff_list ~last l = + let rec diff_list acc = function + | [a] -> Some ( (last - a)::acc ) + | a::( b::_ as r ) -> diff_list ( (b-a)::acc ) r + | [] -> None in - try Some (aux i ~len []) with SplitFail -> None + diff_list [] l + + +(* Partition of an int into [len] integers uniformly. + We first sample (len-1) points from the set {1,..i-1} without replacement. + We sort these points and add back 0 and i, we have thus + x_0 = 0 < x_1 < x_2 < ... < x_{len-1} < i = x_{len}. + If we define, y_k = x_{k+1} - x_{k} for k in 0..(len-1), then by construction + ∑_k y_k = ∑_k (x_{k+1} - x_k ) = x_{len} - x_0 = i. *) +let split_list i ~len st = + if i >= len then + let xs = sample_without_replacement (len-1) (int_range 1 (i-1)) st in + _diff_list ( 0::xs ) ~last:i + else + None let retry ?(max=10) g st = let rec aux n = @@ -177,3 +191,31 @@ let (<*>) f g st = f st (g st) let __default_state = Random.State.make_self_init () let run ?(st=__default_state) g = g st + +let uniformity_test ?(size_hint=10) k rng st = + let histogram = Hashtbl.create size_hint in + let add x = let n = try Hashtbl.find histogram x with Not_found -> 0 in + Hashtbl.replace histogram x (n + 1) in + let () = + for _i = 0 to ( k - 1 ) do + add (rng st) + done in + let cardinal = float_of_int (Hashtbl.length histogram) in + let kf = float_of_int k in + (* average number of points assuming an uniform distribution *) + let average = kf /. cardinal in + (* The number of points is a sum of random variables with binomial distribution *) + let p = 1. /. cardinal in + (* The variance of a binomial distribution with average p is *) + let variance = p *. (1. -. p ) in + (* Central limit theorem: a confidence interval of 4σ provides a false positive rate + of 0.00634% *) + let confidence = 4. in + let std = confidence *. (sqrt (kf *. variance)) in + let predicate _key n acc = + acc && abs_float (average -. float_of_int n) < std in + Hashtbl.fold predicate histogram true + +(*$T split_list + run ~st:(Runner.random_state()) ( uniformity_test 50_000 (split_list 10 ~len:3) ) +*) diff --git a/src/core/CCRandom.mli b/src/core/CCRandom.mli index e42e1f01..c0b8c604 100644 --- a/src/core/CCRandom.mli +++ b/src/core/CCRandom.mli @@ -76,6 +76,14 @@ val replicate : int -> 'a t -> 'a list t (** [replicate n g] makes a list of [n] elements which are all generated randomly using [g] *) +val sample_without_replacement: + ?compare:('a -> 'a -> int) -> int -> 'a t -> 'a list t +(** [sample_without_replacement n g] makes a list of [n] elements which are all + generated randomly using [g] with the added constraint that none of the generated + random values are equal + @since 0.15 + *) + val list_seq : 'a t list -> 'a list t (** Build random lists from lists of random generators @since 0.4 *) @@ -145,3 +153,11 @@ val (<*>) : ('a -> 'b) t -> 'a t -> 'b t val run : ?st:state -> 'a t -> 'a (** Using a random state (possibly the one in argument) run a generator *) + +(**/**) + +val uniformity_test : ?size_hint:int -> int -> 'a t -> bool t +(** [uniformity_test k rng] tests the uniformity of the random generator [rng] using + [k] samples. + @since 0.15 +*) diff --git a/src/core/CCVector.ml b/src/core/CCVector.ml index 2bb0d17a..39f53715 100644 --- a/src/core/CCVector.ml +++ b/src/core/CCVector.ml @@ -44,6 +44,8 @@ type ('a,'mut) t = { type 'a vector = ('a, rw) t +type 'a ro_vector = ('a, ro) t + let freeze v = { size=v.size; vec=v.vec; diff --git a/src/core/CCVector.mli b/src/core/CCVector.mli index d4beb99d..b2c2a2b5 100644 --- a/src/core/CCVector.mli +++ b/src/core/CCVector.mli @@ -37,6 +37,10 @@ type ('a, 'mut) t type 'a vector = ('a, rw) t (** Type synonym: a ['a vector] is mutable. *) +type 'a ro_vector = ('a, ro) t +(** Alias for immutable vectors. + @since 0.15 *) + type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] type 'a gen = unit -> 'a option diff --git a/src/core/META b/src/core/META index 1083a731..a9bd92fa 100644 --- a/src/core/META +++ b/src/core/META @@ -1,6 +1,6 @@ # OASIS_START -# DO NOT EDIT (digest: ca67b641b68531561920de2255f04ea0) -version = "0.14" +# DO NOT EDIT (digest: c783171c5b71c6a746d5d622c2f8b012) +version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers.cma" @@ -9,7 +9,7 @@ archive(native) = "containers.cmxa" archive(native, plugin) = "containers.cmxs" exists_if = "containers.cma" package "unix" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes unix" archive(byte) = "containers_unix.cma" @@ -20,7 +20,7 @@ package "unix" ( ) package "top" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "compiler-libs.common containers containers.data containers.bigarray containers.string containers.unix containers.sexp containers.iter" @@ -32,7 +32,7 @@ package "top" ( ) package "thread" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "containers threads" archive(byte) = "containers_thread.cma" @@ -43,7 +43,7 @@ package "thread" ( ) package "string" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_string.cma" @@ -54,7 +54,7 @@ package "string" ( ) package "sexp" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_sexp.cma" @@ -65,7 +65,7 @@ package "sexp" ( ) package "iter" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." archive(byte) = "containers_iter.cma" archive(byte, plugin) = "containers_iter.cma" @@ -75,7 +75,7 @@ package "iter" ( ) package "io" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_io.cma" @@ -86,7 +86,7 @@ package "io" ( ) package "data" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "bytes" archive(byte) = "containers_data.cma" @@ -97,7 +97,7 @@ package "data" ( ) package "bigarray" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "containers bigarray bytes" archive(byte) = "containers_bigarray.cma" @@ -108,7 +108,7 @@ package "bigarray" ( ) package "advanced" ( - version = "0.14" + version = "0.15" description = "A modular standard library focused on data structures." requires = "containers sequence" archive(byte) = "containers_advanced.cma" diff --git a/src/core/containers.ml b/src/core/containers.ml index 1c527b6b..5f5b4b05 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -79,7 +79,10 @@ module List = struct include List include CCList end -module Map = CCMap +module Map = struct + module type OrderedType = Map.OrderedType + include CCMap +end module Option = CCOpt module Pair = CCPair module Random = struct @@ -87,7 +90,10 @@ module Random = struct include CCRandom end module Ref = CCRef -module Set = CCSet +module Set = struct + module type OrderedType = Set.OrderedType + include CCSet +end module String = struct include String include CCString diff --git a/src/data/CCAllocCache.ml b/src/data/CCAllocCache.ml new file mode 100644 index 00000000..3d47c8c7 --- /dev/null +++ b/src/data/CCAllocCache.ml @@ -0,0 +1,75 @@ + +(* This file is free software, part of Logtk. See file "license" for more details. *) + +(** {1 Simple Cache for Allocations} *) + +module Arr = struct + type 'a t = { + caches: 'a array array; + (* 2-dim array of cached arrays. The 2-dim array is flattened into + one dimension *) + max_buck_size: int; + (* number of cached arrays per length *) + sizes: int array; + (* number of cached arrays in each bucket *) + } + + let create ?(buck_size=16) n = + if n<1 then invalid_arg "AllocCache.Arr.create"; + { max_buck_size=buck_size; + sizes=Array.make n 0; + caches=Array.make (n * buck_size) [||]; + } + + let make c i x = + if i=0 then [||] + else if i 0 && n < Array.length c.sizes then ( + let bs = c.sizes.(n) in + if bs < c.max_buck_size then ( + (* store [a] *) + c.caches.(n * c.max_buck_size + bs) <- a; + c.sizes.(n) <- bs + 1 + ) + ) + + let with_ c i x ~f = + let a = make c i x in + try + let ret = f a in + free c a; + ret + with e -> + free c a; + raise e +end + +(*$inject + let c = Arr.create ~buck_size:2 20 + +*) + +(*$Q + Q.small_int (fun n -> Array.length (Arr.make c n '_') = n) +*) + +(*$T + let a = Arr.make c 1 '_' in Array.length a = 1 + let a = Arr.make c 2 '_' in Array.length a = 2 + let a = Arr.make c 3 '_' in Array.length a = 3 + let a = Arr.make c 4 '_' in Array.length a = 4 +*) + + diff --git a/src/data/CCAllocCache.mli b/src/data/CCAllocCache.mli new file mode 100644 index 00000000..d8538a96 --- /dev/null +++ b/src/data/CCAllocCache.mli @@ -0,0 +1,35 @@ + +(* This file is free software, part of Logtk. See file "license" for more details. *) + +(** {1 Simple Cache for Allocations} + + Be very careful not to use-after-free or double-free. + + {b NOT THREAD SAFE} + {b status: experimental} + + @since 0.15 + +*) + +module Arr : sig + type 'a t + (** Cache for 'a arrays *) + + val create: ?buck_size:int -> int -> 'a t + (** [create n] makes a new cache of arrays up to length [n] + @param buck_size number of arrays cached for each array length + @param n maximum size of arrays put in cache *) + + val make : 'a t -> int -> 'a -> 'a array + (** [make cache i x] is like [Array.make i x], + but might return a cached array instead of allocating one. + {b NOTE}: if the array is already allocated then it + will NOT be filled with [x] *) + + val free : 'a t -> 'a array -> unit + (** Return array to the cache. The array's elements will not be GC'd *) + + val with_ : 'a t -> int -> 'a -> f:('a array -> 'b) -> 'b + (** Combines {!make} and {!free} *) +end diff --git a/src/data/CCGraph.mli b/src/data/CCGraph.mli index e59fc5ea..f8710e82 100644 --- a/src/data/CCGraph.mli +++ b/src/data/CCGraph.mli @@ -271,8 +271,11 @@ val scc : ?tbl:('v, 'v scc_state) table -> (** Strongly connected components reachable from the given vertices. Each component is a list of vertices that are all mutually reachable in the graph. + The components are explored in a topological order (if C1 and C2 are + components, and C1 points to C2, then C2 will be yielded before C1). Uses {{: https://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm} Tarjan's algorithm} @param tbl table used to map nodes to some hidden state + @raise Sequence_once if the result is iterated on more than once. *) (** {2 Pretty printing in the DOT (graphviz) format} diff --git a/src/data/containers_data.mldylib b/src/data/containers_data.mldylib index 094428e7..797bbd30 100644 --- a/src/data/containers_data.mldylib +++ b/src/data/containers_data.mldylib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 69220d33fe7db598cd4d72fc5d813a8f) +# DO NOT EDIT (digest: f1eb737bc11930f88f05f61212c0f303) CCMultiMap CCMultiSet CCTrie @@ -23,4 +23,5 @@ CCHashTrie CCBloom CCWBTree CCRAL +CCAllocCache # OASIS_STOP diff --git a/src/data/containers_data.mllib b/src/data/containers_data.mllib index 094428e7..797bbd30 100644 --- a/src/data/containers_data.mllib +++ b/src/data/containers_data.mllib @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 69220d33fe7db598cd4d72fc5d813a8f) +# DO NOT EDIT (digest: f1eb737bc11930f88f05f61212c0f303) CCMultiMap CCMultiSet CCTrie @@ -23,4 +23,5 @@ CCHashTrie CCBloom CCWBTree CCRAL +CCAllocCache # OASIS_STOP diff --git a/src/sexp/CCSexpStream.ml b/src/sexp/CCSexpStream.ml deleted file mode 100644 index 4dc20ad2..00000000 --- a/src/sexp/CCSexpStream.ml +++ /dev/null @@ -1,559 +0,0 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 S-expressions Parser} *) - -type 'a or_error = [ `Ok of 'a | `Error of string ] -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -type t = [ - | `Atom of string - | `List of t list - ] - -let _with_in filename f = - let ic = open_in filename in - try - let x = f ic in - close_in ic; - x - with e -> - close_in ic; - `Error (Printexc.to_string e) - -let _with_out filename f = - let oc = open_out filename in - try - let x = f oc in - close_out oc; - x - with e -> - close_out oc; - raise e - -(** {2 Serialization (encoding)} *) - -(* shall we escape the string because of one of its chars? *) -let _must_escape s = - try - for i = 0 to String.length s - 1 do - let c = String.unsafe_get s i in - match c with - | ' ' | ';' | ')' | '(' | '"' | '\n' | '\t' -> raise Exit - | _ when Char.code c > 127 -> raise Exit (* non-ascii *) - | _ -> () - done; - false - with Exit -> true - -let rec to_buf b t = match t with - | `Atom s when _must_escape s -> Printf.bprintf b "\"%s\"" (String.escaped s) - | `Atom s -> Buffer.add_string b s - | `List [] -> Buffer.add_string b "()" - | `List [x] -> Printf.bprintf b "(%a)" to_buf x - | `List l -> - Buffer.add_char b '('; - List.iteri - (fun i t' -> (if i > 0 then Buffer.add_char b ' '; to_buf b t')) - l; - Buffer.add_char b ')' - -let to_string t = - let b = Buffer.create 128 in - to_buf b t; - Buffer.contents b - -let rec print fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "@[(%a)@]" print x - | `List l -> - Format.open_hovbox 2; - Format.pp_print_char fmt '('; - List.iteri - (fun i t' -> (if i > 0 then Format.fprintf fmt "@ "; print fmt t')) - l; - Format.pp_print_char fmt ')'; - Format.close_box () - -let rec print_noindent fmt t = match t with - | `Atom s when _must_escape s -> Format.fprintf fmt "\"%s\"" (String.escaped s) - | `Atom s -> Format.pp_print_string fmt s - | `List [] -> Format.pp_print_string fmt "()" - | `List [x] -> Format.fprintf fmt "(%a)" print_noindent x - | `List l -> - Format.pp_print_char fmt '('; - List.iteri - (fun i t' -> (if i > 0 then Format.pp_print_char fmt ' '; print_noindent fmt t')) - l; - Format.pp_print_char fmt ')' - -let to_chan oc t = - let fmt = Format.formatter_of_out_channel oc in - print fmt t; - Format.pp_print_flush fmt () - -let to_file_seq filename seq = - _with_out filename - (fun oc -> - seq (fun t -> to_chan oc t; output_char oc '\n') - ) - -let to_file filename t = to_file_seq filename (fun k -> k t) - -(** {2 Deserialization (decoding)} *) - -type 'a parse_result = ['a or_error | `End ] -type 'a partial_result = [ 'a parse_result | `Await ] - -module Source = struct - type individual_char = - | NC_yield of char - | NC_end - | NC_await - - type t = unit -> individual_char - type source = t - - module Manual = struct - type t = { - mutable i : int; (* offset *) - mutable stop : bool; - buf : Buffer.t; (* accessible chunk of input *) - } - - let make() = { - i = 0; - stop = false; - buf=Buffer.create 32; - } - - let to_src d () = - if d.i = Buffer.length d.buf - then - if d.stop then NC_end else NC_await - else ( - let c = Buffer.nth d.buf d.i in - d.i <- d.i + 1; - NC_yield c - ) - - let feed d s i len = - if d.stop then failwith "CCSexpStream.Source.Manual.feed: reached EOI"; - Buffer.add_substring d.buf s i len - - let reached_end d = d.stop <- true - end - - let of_string s = - let i = ref 0 in - fun () -> - if !i=String.length s - then NC_end - else ( - let c = String.get s !i in - incr i; - NC_yield c - ) - - let of_chan ?(bufsize=1024) ic = - let buf = Bytes.make bufsize ' ' in - let i = ref 0 in - let n = ref 0 in - let stop = ref false in - let rec next() = - if !stop then NC_end - else if !i = !n - then ( (* refill *) - i := 0; - n := input ic buf 0 bufsize; - if !n = 0 then (stop := true; NC_end) else next() - ) else ( (* yield *) - let c = Bytes.get buf !i in - incr i; - NC_yield c - ) - in next - - let of_gen g = - let s = ref "" in - let i = ref 0 in - let stop = ref false in - let rec next() = - if !stop then NC_end - else if !i = String.length !s - then ( - match g() with - | None -> stop := true; NC_end - | Some buf -> s := buf; i := 0; next () - ) else ( - let c = String.get !s !i in - incr i; - NC_yield c - ) - in next -end - -module Lexer = struct - (** An individual character returned by a source *) - type token = - | Open - | Close - | Atom of string - - type decode_state = - | St_start - | St_atom - | St_quoted - | St_comment - | St_escaped - | St_raw_char1 of int - | St_raw_char2 of int - | St_yield of token - | St_error of string - | St_end - - type t = { - src : Source.t; - atom : Buffer.t; (* atom being parsed *) - mutable st : decode_state; - mutable line : int; - mutable col : int; - } - - let make src = { - src; - st = St_start; - line = 1; - col = 1; - atom = Buffer.create 32; - } - - let of_string s = make (Source.of_string s) - - let of_chan ic = make (Source.of_chan ic) - - let line t = t.line - let col t = t.col - - (* yield [x] with current state [st] *) - let _yield d st x = - d.st <- st; - `Ok x - - let _take_buffer b = - let s = Buffer.contents b in - Buffer.clear b; - s - - (* raise an error *) - let _error d msg = - let b = Buffer.create 32 in - Printf.bprintf b "at %d, %d: " d.line d.col; - Printf.kbprintf - (fun b -> - let msg' = Buffer.contents b in - d.st <- St_error msg'; - `Error msg') - b msg - - let _end d = - d.st <- St_end; - `End - - let _is_digit c = Char.code '0' <= Char.code c && Char.code c <= Char.code '9' - let _digit2i c = Char.code c - Char.code '0' - - (* next token *) - let rec _next d st : token partial_result = - match st with - | St_error msg -> `Error msg - | St_end -> _end d - | St_yield x -> - (* yield the given token, then start a fresh one *) - _yield d St_start x - | _ -> - d.st <- st; - _process_next d st - - (* read and process the next character *) - and _process_next d st = - match d.src () with - | Source.NC_end -> - begin match st with - | St_error _ | St_end | St_yield _ -> assert false - | St_start | St_comment -> _end d - | St_atom -> - let a = _take_buffer d.atom in - _yield d St_end (Atom a) - | St_quoted -> - let a = _take_buffer d.atom in - _yield d St_end (Atom a) - | (St_escaped | St_raw_char1 _ | St_raw_char2 _) -> - _error d "unexpected end of input (escaping)" - end - | Source.NC_await -> `Await - | Source.NC_yield c -> - if c='\n' - then (d.col <- 1; d.line <- d.line + 1) - else (d.col <- d.col + 1); - (* use the next char *) - match st with - | St_error _ | St_end | St_yield _ -> assert false - | St_comment -> - begin match c with - | '\n' -> _next d St_start - | _ -> _next d St_comment - end - | St_start -> - begin match c with - | ' ' | '\t' | '\n' -> _next d St_start - | ';' -> _next d St_comment - | '(' -> _yield d St_start Open - | ')' -> _yield d St_start Close - | '"' -> _next d St_quoted - | _ -> (* read regular atom *) - Buffer.add_char d.atom c; - _next d St_atom - end - | St_atom -> - begin match c with - | ' ' | '\t' | '\n' -> - let a = _take_buffer d.atom in - _yield d St_start (Atom a) - | ';' -> - let a = _take_buffer d.atom in - _yield d St_comment (Atom a) - | ')' -> - let a = _take_buffer d.atom in - _yield d (St_yield Close) (Atom a) - | '(' -> - let a = _take_buffer d.atom in - _yield d (St_yield Open) (Atom a) - | '"' -> _error d "unexpected \" (parsing atom %s)" (Buffer.contents d.atom) - | '\\' -> _error d "unexpected \\" - | _ -> - Buffer.add_char d.atom c; - _next d St_atom - end - | St_quoted -> - (* reading an unquoted atom *) - begin match c with - | '\\' -> _next d St_escaped - | '"' -> - let a = _take_buffer d.atom in - _yield d St_start (Atom a) - | _ -> - Buffer.add_char d.atom c; - _next d St_quoted - end - | St_escaped -> - begin match c with - | 'n' -> Buffer.add_char d.atom '\n'; _next d St_quoted - | 't' -> Buffer.add_char d.atom '\t'; _next d St_quoted - | 'r' -> Buffer.add_char d.atom '\r'; _next d St_quoted - | 'b' -> Buffer.add_char d.atom '\b'; _next d St_quoted - | '"' -> Buffer.add_char d.atom '"'; _next d St_quoted - | '\\' -> Buffer.add_char d.atom '\\'; _next d St_quoted - | _ when _is_digit c -> _next d (St_raw_char1 (_digit2i c)) - | _ -> _error d "unexpected escaped character %c" c - end - | St_raw_char1 i -> - begin match c with - | _ when _is_digit c -> _next d (St_raw_char2 (i*10 + _digit2i c)) - | _ -> _error d "expected digit, got %c" c - end - | St_raw_char2 i -> - begin match c with - | c when _is_digit c -> - (* read an escaped char *) - Buffer.add_char d.atom (Char.chr (i*10+_digit2i c)); - _next d St_quoted - | c -> _error d "expected digit, got %c" c - end - - let next d = _next d d.st -end - -module ParseGen = struct - type 'a t = unit -> 'a parse_result - - let to_list g : 'a list or_error = - let rec aux acc = match g() with - | `Error e -> `Error e - | `Ok x -> aux (x::acc) - | `End -> `Ok (List.rev acc) - in - aux [] - - let head g = match g() with - | `End -> `Error "expected at least one element" - | #or_error as x -> x - - let head_exn g = match g() with - | `Ok x -> x - | `Error msg -> failwith msg - | `End -> failwith "expected at least one element" - - let take n g = - assert (n>=0); - let n = ref n in - fun () -> - if !n = 0 then `End - else ( - decr n; - g() - ) -end - -(* hidden parser state *) -type parser_state = { - ps_d : Lexer.t; - mutable ps_stack : t list list; -} - -let mk_ps src = { - ps_d = Lexer.make src; - ps_stack = []; -} - -let _error ps msg = - let msg' = Printf.sprintf "at %d,%d: %s" (Lexer.line ps.ps_d) (Lexer.col ps.ps_d) msg in - `Error msg' - -(* next token, or await *) -let rec _next ps : t partial_result = - match Lexer.next ps.ps_d with - | `Ok (Lexer.Atom s) -> - _push ps (`Atom s) - | `Ok Lexer.Open -> - ps.ps_stack <- [] :: ps.ps_stack; - _next ps - | `Ok Lexer.Close -> - begin match ps.ps_stack with - | [] -> _error ps "unbalanced ')'" - | l :: stack -> - ps.ps_stack <- stack; - _push ps (`List (List.rev l)) - end - | `Error msg -> `Error msg - | `Await -> `Await - | `End -> `End - -(* push a S-expr on top of the parser stack *) -and _push ps e = match ps.ps_stack with - | [] -> - `Ok e - | l :: tl -> - ps.ps_stack <- (e :: l) :: tl; - _next ps - -(* assume [ps] never needs [`Await] *) -let _never_block ps () = match _next ps with - | `Await -> assert false - | `Ok x -> `Ok x - | `Error e -> `Error e - | `End -> `End - -(* parse from a generator of string slices *) -let parse_gen g : t ParseGen.t = - let ps = mk_ps (Source.of_gen g) in - _never_block ps - -let parse_string s = - let ps = mk_ps (Source.of_string s) in - _never_block ps - -let parse_chan ?bufsize ic = - let ps = mk_ps (Source.of_chan ?bufsize ic) in - _never_block ps - -(** {6 Blocking} *) - -let of_chan ic = - ParseGen.head (parse_chan ic) - -let of_string s = - ParseGen.head (parse_string s) - -let of_file f = - _with_in f of_chan - -module L = struct - let to_buf b l = - List.iter (to_buf b) l - - let to_string l = - let b = Buffer.create 32 in - to_buf b l; - Buffer.contents b - - let to_chan oc l = - let fmt = Format.formatter_of_out_channel oc in - List.iter (Format.fprintf fmt "%a@." print) l; - Format.pp_print_flush fmt () - - let to_file filename l = - _with_out filename (fun oc -> to_chan oc l) - - let of_chan ?bufsize ic = - ParseGen.to_list (parse_chan ?bufsize ic) - - let of_file ?bufsize filename = - _with_in filename - (fun ic -> of_chan ?bufsize ic) - - let of_string s = - ParseGen.to_list (parse_string s) - - let of_gen g = - ParseGen.to_list (parse_gen g) - - exception OhNoes of string - exception StopNaow - - let of_seq seq = - let src = Source.Manual.make () in - let ps = mk_ps (Source.Manual.to_src src) in - let l = ref [] in - (* read as many expressions as possible *) - let rec _nexts () = match _next ps with - | `Ok x -> l := x :: !l; _nexts () - | `Error e -> raise (OhNoes e) - | `End -> raise StopNaow - | `Await -> () - in - try - seq - (fun s -> Source.Manual.feed src s 0 (String.length s); _nexts ()); - Source.Manual.reached_end src; - _nexts (); - `Ok (List.rev !l) - with - | OhNoes msg -> `Error msg - | StopNaow -> `Ok (List.rev !l) -end diff --git a/src/sexp/CCSexpStream.mli b/src/sexp/CCSexpStream.mli deleted file mode 100644 index 2c87e38d..00000000 --- a/src/sexp/CCSexpStream.mli +++ /dev/null @@ -1,199 +0,0 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 S-expressions Parser} - -@since 0.4 -@deprecated consider using {!CCSexpM} *) - -type 'a or_error = [ `Ok of 'a | `Error of string ] -type 'a sequence = ('a -> unit) -> unit -type 'a gen = unit -> 'a option - -type t = [ - | `Atom of string - | `List of t list - ] - -(** {2 Serialization (encoding)} *) - -val to_buf : Buffer.t -> t -> unit - -val to_string : t -> string - -val to_file : string -> t -> unit - -val to_file_seq : string -> t sequence -> unit -(** Print the given sequence of expressions to a file *) - -val to_chan : out_channel -> t -> unit - -val print : Format.formatter -> t -> unit -(** Pretty-printer nice on human eyes (including indentation) *) - -val print_noindent : Format.formatter -> t -> unit -(** Raw, direct printing as compact as possible *) - -(** {2 Deserialization (decoding)} *) - -type 'a parse_result = ['a or_error | `End ] -type 'a partial_result = [ 'a parse_result | `Await ] - -(** {6 Source of characters} *) -module Source : sig - type individual_char = - | NC_yield of char - | NC_end - | NC_await - (** An individual character returned by a source *) - - type t = unit -> individual_char - (** A source of characters can yield them one by one, or signal the end, - or signal that some external intervention is needed *) - - type source = t - - (** A manual source of individual characters. When it has exhausted its - own input, it asks its caller to provide more or signal that none remains. - This is especially useful when the source of data is monadic IO *) - module Manual : sig - type t - - val make : unit -> t - (** Make a new manual source. It needs to be fed input manually, - using {!feed} *) - - val to_src : t -> source - (** The manual source contains a source! *) - - val feed : t -> string -> int -> int -> unit - (** Feed a chunk of input to the manual source *) - - val reached_end : t -> unit - (** Tell the decoder that end of input has been reached. From now - the source will only yield [NC_end] *) - end - - val of_string : string -> t - (** Use a single string as the source *) - - val of_chan : ?bufsize:int -> in_channel -> t - (** Use a channel as the source *) - - val of_gen : string gen -> t -end - -(** {6 Streaming Lexer} -Splits the input into opening parenthesis, closing ones, and atoms *) - -module Lexer : sig - type t - (** A streaming lexer, that parses atomic chunks of S-expressions (atoms - and delimiters) *) - - val make : Source.t -> t - (** Create a lexer that uses the given source of characters as an input *) - - val of_string : string -> t - - val of_chan : in_channel -> t - - val line : t -> int - val col : t -> int - - (** Obtain next token *) - - type token = - | Open - | Close - | Atom of string - (** An individual S-exp token *) - - val next : t -> token partial_result - (** Obtain the next token, an error, or block/end stream *) -end - -(** {6 Generator with errors} *) -module ParseGen : sig - type 'a t = unit -> 'a parse_result - (** A generator-like structure, but with the possibility of errors. - When called, it can yield a new element, signal the end of stream, - or signal an error. *) - - val to_list : 'a t -> 'a list or_error - - val head : 'a t -> 'a or_error - - val head_exn : 'a t -> 'a - - val take : int -> 'a t -> 'a t -end - -(** {6 Stream Parser} -Returns a lazy stream of S-expressions. *) - -val parse_string : string -> t ParseGen.t -(** Parse a string *) - -val parse_chan : ?bufsize:int -> in_channel -> t ParseGen.t -(** Parse a channel *) - -val parse_gen : string gen -> t ParseGen.t -(** Parse chunks of string *) - -(** {6 Blocking API} -Parse one S-expression from some source. *) - -val of_chan : in_channel -> t or_error -(** Parse a S-expression from the given channel. Can read more data than - necessary, so don't use this if you need finer-grained control (e.g. - to read something else {b after} the S-exp) *) - -val of_string : string -> t or_error - -val of_file : string -> t or_error -(** Open the file and read a S-exp from it *) - -(** {6 Lists of S-exps} *) - -module L : sig - val to_buf : Buffer.t -> t list -> unit - - val to_string : t list -> string - - val to_file : string -> t list -> unit - - val to_chan : out_channel -> t list -> unit - - val of_chan : ?bufsize:int -> in_channel -> t list or_error - - val of_file : ?bufsize:int -> string -> t list or_error - - val of_string : string -> t list or_error - - val of_gen : string gen -> t list or_error - - val of_seq : string sequence -> t list or_error -end diff --git a/src/sexp/containers_sexp.mldylib b/src/sexp/containers_sexp.mldylib index d0508313..fb76d5af 100644 --- a/src/sexp/containers_sexp.mldylib +++ b/src/sexp/containers_sexp.mldylib @@ -1,6 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: e7d1bfe0f18e27e2b9ff76951f3a9524) +# DO NOT EDIT (digest: 3a36b0ae70bf5e8f3f11d6a4f5f7d948) CCSexp -CCSexpStream CCSexpM # OASIS_STOP diff --git a/src/sexp/containers_sexp.mllib b/src/sexp/containers_sexp.mllib index d0508313..fb76d5af 100644 --- a/src/sexp/containers_sexp.mllib +++ b/src/sexp/containers_sexp.mllib @@ -1,6 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: e7d1bfe0f18e27e2b9ff76951f3a9524) +# DO NOT EDIT (digest: 3a36b0ae70bf5e8f3f11d6a4f5f7d948) CCSexp -CCSexpStream CCSexpM # OASIS_STOP