From 5427e9db314eabfcda0c73f47a5efca4021af5c1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 4 Mar 2015 17:50:34 +0100 Subject: [PATCH 01/46] remove containers.pervasives, add the module Containers to core/ --- README.md | 8 ++++---- _oasis | 12 +++--------- doc/intro.txt | 8 +++----- .../CCPervasives.ml => core/containers.ml} | 0 4 files changed, 10 insertions(+), 18 deletions(-) rename src/{pervasives/CCPervasives.ml => core/containers.ml} (100%) diff --git a/README.md b/README.md index 38b5a44b..2dc0a6ed 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,10 @@ What is _containers_? are totally independent and are prefixed with `CC` (for "containers-core" or "companion-cube" because I'm megalomaniac). This part should be usable and should work. For instance, `CCList` contains functions and - lists including safe versions of `map` and `append`. + lists including safe versions of `map` and `append`. It also + provides a drop-in replacement to the standard library, in the module + `Containers` (intended to be opened, replaces some stdlib modules + with extended ones) - Several small additional libraries that complement it: * `containers.data` with additional data structures that don't have an equivalent in the standard library; @@ -21,9 +24,6 @@ What is _containers_? KMP search algorithm, and a few naive utils). Again, modules are independent and sometimes parametric on the string and char types (so they should be able to deal with your favorite unicode library). -- A drop-in replacement to the standard library, `containers.pervasives`, - that defined a `CCPervasives` module intented to be opened to extend some - modules of the stdlib. - A sub-library with complicated abstractions, `containers.advanced` (with a LINQ-like query module, batch operations using GADTs, and others). - A library using [Lwt](https://github.com/ocsigen/lwt/), `containers.lwt`. diff --git a/_oasis b/_oasis index 78f4d98b..3ee93dd6 100644 --- a/_oasis +++ b/_oasis @@ -49,7 +49,8 @@ Library "containers" Path: src/core Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCInt, CCBool, CCFloat, CCArray, CCRef, CCSet, - CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat + CCOrd, CCRandom, CCString, CCHashtbl, CCMap, CCFormat, + Containers BuildDepends: bytes Library "containers_io" @@ -105,13 +106,6 @@ Library "containers_bigarray" FindlibParent: containers BuildDepends: containers, bigarray, bytes -Library "containers_pervasives" - Path: src/pervasives - Modules: CCPervasives - BuildDepends: containers - FindlibName: pervasives - FindlibParent: containers - Library "containers_misc" Path: src/misc Pack: true @@ -152,7 +146,7 @@ Document containers "-docflags '-colorize-code -short-functors -charset utf-8'" XOCamlbuildLibraries: containers, containers.misc, containers.iter, containers.data, - containers.string, containers.pervasives, containers.bigarray, + containers.string, containers.bigarray, containers.advanced, containers.io, containers.sexp, containers.lwt diff --git a/doc/intro.txt b/doc/intro.txt index b0bbb36a..3bd2e895 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -44,11 +44,11 @@ CCRef CCSet CCString CCVector +Containers } -{4 Pervasives (aliases to Core Modules)} - -Contains aliases to most modules from {i containers core}, and mixins +The module {!Containers} contains aliases to most other modules defined +in {i containers core}, and mixins such as: {[ module List = struct @@ -57,8 +57,6 @@ such as: end ]} -{!modules: CCPervasives} - {4 Containers.data} Various data structures. diff --git a/src/pervasives/CCPervasives.ml b/src/core/containers.ml similarity index 100% rename from src/pervasives/CCPervasives.ml rename to src/core/containers.ml From 0a20cf0e3d8e83dac777f99f0f9a3a82fdd98f84 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 4 Mar 2015 17:57:09 +0100 Subject: [PATCH 02/46] update doc of Containers module --- src/core/containers.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/core/containers.ml b/src/core/containers.ml index 96410c18..efb293b9 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -40,6 +40,11 @@ Changed [Opt] to [Option] to better reflect that this module is about the ['a option] type, with [module Option = CCOpt]. @since 0.5 + +Renamed from [CCPervasives] in [containers.pervasives], to [Containers] +in the core library [containers] + +@since NEXT_RELEASE *) module Array = struct From af4bf49156408db8bbb981f9a1ce218973901391 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 5 Mar 2015 01:55:08 +0100 Subject: [PATCH 03/46] remove junk in .merlin --- .merlin | 2 -- 1 file changed, 2 deletions(-) diff --git a/.merlin b/.merlin index d5a2d81a..67556471 100644 --- a/.merlin +++ b/.merlin @@ -9,7 +9,6 @@ S src/threads/ S src/misc S src/string S src/bigarray -S src/pervasives S benchs S examples S tests @@ -24,7 +23,6 @@ B _build/src/threads/ B _build/src/misc B _build/src/string B _build/src/bigarray -B _build/src/pervasives B _build/benchs B _build/examples B _build/tests From ee8c7c03f20bbe744137b9adc0a07796415984e6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 5 Mar 2015 01:55:43 +0100 Subject: [PATCH 04/46] wip: App_parse in containers_string, a draft of efficient applicative parser combinators --- _oasis | 2 +- src/string/app_parse.ml | 617 +++++++++++++++++++++++++++++++++++++++ src/string/app_parse.mli | 219 ++++++++++++++ 3 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 src/string/app_parse.ml create mode 100644 src/string/app_parse.mli diff --git a/_oasis b/_oasis index 3ee93dd6..19eacd1d 100644 --- a/_oasis +++ b/_oasis @@ -85,7 +85,7 @@ Library "containers_iter" Library "containers_string" Path: src/string Pack: true - Modules: KMP, Levenshtein + Modules: KMP, Levenshtein, App_parse FindlibName: string FindlibParent: containers diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml new file mode 100644 index 00000000..acd9035a --- /dev/null +++ b/src/string/app_parse.ml @@ -0,0 +1,617 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} *) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type multiplicity = + | Star (* 0 or more *) + | Plus (* 1 or more *) + | Question (* 0 or 1 *) + +let str fmt = Printf.sprintf fmt + +module CharSet = Set.Make(Char) +module CharMap = Map.Make(Char) + +let print_char_set set = + let l = CharSet.fold + (fun c acc -> str "'%c'" c :: acc) set [] in + String.concat ", " l + +let domain_of_char_map m = + CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty + +let print_char_map map = + let l = CharMap.fold + (fun c _ acc -> str "'%c'" c :: acc) map [] in + String.concat ", " l + +(* function composition *) +let compose f g x = f (g x) + +let string_of_list l = + let b = Bytes.make (List.length l) ' ' in + List.iteri (fun i c -> Bytes.set b i c) l; + Bytes.unsafe_to_string b + +type _ t = + | Return : 'a -> 'a t + | Map : ('a -> 'b) * 'a t -> 'b t + | Filter: ('a -> bool) * 'a t -> 'a t + | App : ('a -> 'b) t * 'a t -> 'b t + | AppLeft : 'a t * 'b t -> 'a t + | AppRight : 'a t * 'b t -> 'b t + | Fail : string -> 'a t + | Int : int t + | Float : float t + | AnyOf : CharSet.t -> char t + | Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t + | Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *) + | SwitchC : 'a t CharMap.t * 'a t option -> 'a t + | SwitchS :'a trie -> 'a t + | Fix : ('a t -> 'a t) -> 'a t + | Eof : unit t + +(* a prefix trie *) +and 'a trie = + | TrieEmpty + | TrieNode of 'a t option * 'a trie CharMap.t + +let return x = Return x + +let success = Return () + +let fail msg = Fail msg + +let app f x = App (f, x) + +let map f x = match x with + | Map (g, y) -> Map (compose f g, y) + | Return x -> Return (f x) + | _ -> Map (f,x) + +let filter f x = Filter (f, x) + +let int = Int + +let float = Float + +let int_first_char = + lazy (CharSet.of_list ['-'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9']) + +(* a set of characters that are valid as first characters of a parser *) +type possible_first_chars = + | Set of CharSet.t + | AllChars + | NoChar + | IsFail of string + +(* set of possibilities for the first char of a parser *) +let rec possible_first_chars + : type a. a t -> possible_first_chars + = function + | Return _ -> NoChar + | Map (_, x) -> possible_first_chars x + | Filter (_, x) -> possible_first_chars x + | App (f, _) -> possible_first_chars f + | AppLeft (a, _) -> possible_first_chars a + | AppRight (a, _) -> possible_first_chars a + | Fail e -> IsFail e + | Int -> Set (Lazy.force int_first_char) + | Float -> Set (Lazy.force int_first_char) + | AnyOf set -> Set set + | Many(set, _, _, _) -> Set set + | Skip (set, _, _) -> Set set + | SwitchC (map, None) -> Set (domain_of_char_map map) + | SwitchC (_, Some _) -> AllChars + | SwitchS TrieEmpty -> assert false + | SwitchS (TrieNode (_, m)) -> Set (domain_of_char_map m) + | Fix f -> + let p = f (Fix f) in + possible_first_chars p + | Eof -> NoChar + +let many ?(sep=success) t = + match possible_first_chars t with + | Set set -> Many (set, t, sep, Star) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let many1 ?(sep=success) t = + match possible_first_chars t with + | Set set -> Many (set, t, sep, Plus) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let skip t = + match possible_first_chars t with + | Set set -> Skip (set, t, Star) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let skip1 t = + match possible_first_chars t with + | Set set -> Skip (set, t, Plus) + | IsFail msg -> Fail msg + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let opt t = + match possible_first_chars t with + | Set set -> + map + (function + | [x] -> Some x + | [] -> None + | _ -> assert false + ) (Many (set, t, success, Question)) + | IsFail msg -> Fail msg + | AllChars -> map (fun x -> Some x) t (* always succeeds *) + | NoChar -> invalid_arg (str "opt: invalid parser (does not consume input)") + +let set_of_string s = + let set = ref CharSet.empty in + String.iter + (fun c -> + if CharSet.mem c !set + then invalid_arg (str "any_of: duplicate char %c" c); + set := CharSet.add c !set + ) s; + !set + +let any_of s = AnyOf (set_of_string s) + +let char c = AnyOf (CharSet.singleton c) + +let spaces = skip (any_of " \t") +let spaces1 = skip1 (any_of " \t") + +let white = skip (any_of " \t\n") +let white1 = skip1 (any_of " \t\n") + +let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz" +let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" +let num_ = set_of_string "0123456789" +let alpha_ = CharSet.union alpha_lower_ alpha_upper_ + +let alpha_lower = AnyOf alpha_lower_ +let alpha_upper = AnyOf alpha_upper_ +let num = AnyOf num_ +let alpha = AnyOf alpha_ +let alpha_num = AnyOf (CharSet.union num_ alpha_) + +let eof = Eof + +let switch_c ?default l = + if l = [] then match default with + | None -> invalid_arg "switch_c: empty list"; + | Some d -> d + else + let map = List.fold_left + (fun map (c, t) -> + if CharMap.mem c map + then invalid_arg (str "switch_c: duplicate char %c" c); + CharMap.add c t map + ) CharMap.empty l + in + SwitchC (map, default) + +exception ExnIsFail of string + +let choice l = + if l = [] then invalid_arg "choice: empty list"; + (* build a switch by first char *) + try + (* a map and possibly a default parser *) + let map, def = List.fold_left + (fun (map, def) p -> + match possible_first_chars p, def with + | AllChars, Some _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + | AllChars, None -> map, Some p + | NoChar, _ -> map, def + | IsFail msg, _ -> raise (ExnIsFail msg) + | Set set, def -> + if CharSet.exists (fun c -> CharMap.mem c map) set + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set (domain_of_char_map map)))); + let map = CharSet.fold + (fun c map -> CharMap.add c p map) + set map + in map, def + ) (CharMap.empty, None) l + in + SwitchC (map, def) + with ExnIsFail msg -> + fail msg + +(* build prefix trie *) +let switch_s l = + if l = [] then invalid_arg "switch_s: empty list"; + (* add parser p in trie [t], with key slice of [s] starting at [i] *) + let rec add_trie t s i p = + if i = String.length s + then match t with + | TrieEmpty -> TrieNode (Some p, CharMap.empty) + | TrieNode (Some _, _) -> invalid_arg (str "duplicate key \"%s\"" s) + | TrieNode (None, m) -> TrieNode (Some p, m) + else + let c = String.get s i in + match t with + | TrieEmpty -> + let sub = add_trie TrieEmpty s (i+1) p in + TrieNode (None, CharMap.singleton c sub) + | TrieNode (opt, map) -> + try + let sub = CharMap.find c map in + let sub = add_trie sub s (i+1) p in + TrieNode (opt, CharMap.add c sub map) + with Not_found -> + let sub = add_trie TrieEmpty s (i+1) p in + TrieNode (opt, CharMap.add c sub map) + in + let trie = + List.fold_left + (fun trie (s, p) -> + if s = "" then invalid_arg "switch_s: empty string"; + add_trie trie s 0 p + ) TrieEmpty l + in + SwitchS trie + +let bool = + switch_s + [ "true", Return true + ; "false", Return false + ] + +let fix f = Fix f + +module Infix = struct + let (>|=) x f = map f x + let (<*>) = app + let (<<) a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *) + let (>>) a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *) + let (<+>) a b = choice [a; b] +end + +include Infix + +(* TODO: more efficient version, with buffer *) +let word = + return (fun c s -> string_of_list (c :: s)) <*> alpha <*> many alpha_num + +(** {2 Signatures} *) + +type error = { + line: int; + col: int; + msg: string; +} + +let string_of_error e = str "at %d:%d; %s" e.line e.col e.msg + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Build a parser from a given Monadic Input} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct + type t = { + mutable rline : int; + mutable rcol : int; + input : I.t; + buf : Bytes.t; + mutable i : int; + mutable len : int; + } + type source = I.t + + let line t = t.rline + let col t = t.rcol + + let create input = { + rline=0; + rcol=0; + input; + buf = Bytes.make 1024 ' '; + i=1; + len=1; (* trick for initialization *) + } + + let read_next t = + let c = Bytes.get t.buf t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 0; + t.rline <- t.rline + 1; + ) else ( + t.rcol <- t.rcol + 1 + ); + Yield c + + let refill t = + t.len <- I.read t.input t.buf 0 (Bytes.length t.buf); + t.i <- 0; + () + + let next t = + if t.len = 0 then EOF + else if t.i = t.len + then ( + refill t; + if t.len = 0 then EOF else read_next t + ) else read_next t + + let peek t = + if t.i = t.len + then refill t; + Yield (Bytes.get t.buf t.i) + + let junk t = + assert (t.len > 0 && t.i < t.len); + t.i <- t.i + 1 +end + +module MakeFromReader(R : READER) : S with type source = R.source = struct + type source = R.source + + let error r msg = + raise (Error { + line = R.line r; + col = R.col r; + msg; + }) + let errorf r fmt = + Printf.ksprintf + (fun msg -> error r msg) + fmt + + let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9' + let to_int c = Char.code c - Char.code '0' + + let rec parse_int r sign i = match R.peek r with + | EOF -> i + | Yield c when is_int c -> + R.junk r; + parse_int r sign (10 * i + to_int c) + | Yield '-' when i = 0 && sign -> + (* switch sign: only on first char *) + R.junk r; + parse_int r false 0 + | _ -> if sign then i else -i + + let parse_float _r _buf = assert false + + let rec parse_rec : type a. R.t -> a t -> a = + fun r p -> match p with + | Return x -> x + | Map (f, x) -> + let y = parse_rec r x in + f y + | Filter (f, x) -> + let y = parse_rec r x in + if f y then y else errorf r "filter failed" + | App (f, x) -> + let f' = parse_rec r f in + let x' = parse_rec r x in + f' x' + | AppLeft (a, b) -> + let a' = parse_rec r a in + let _ = parse_rec r b in + a' + | AppRight (a, b) -> + let _ = parse_rec r a in + let b' = parse_rec r b in + b' + | Fail msg -> error r msg + | Int -> parse_int r true 0 + | Float -> parse_float r (Buffer.create 8) + | AnyOf set -> + begin match R.next r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set) + | Yield c -> + if CharSet.mem c set then c + else errorf r "expected any of {%s}, got %c" (print_char_set set) c + end + | Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult [] + | Skip (set, p, mult) -> parse_skip r ~set ~p ~mult + | SwitchC (map, def) -> + begin match R.peek r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) + | Yield c -> + begin try + let p' = CharMap.find c map in + parse_rec r p' + with Not_found -> + match def with + | None -> + errorf r "expected any of {%s}, got %c" (print_char_map map) c + | Some d -> parse_rec r d + end + end + | SwitchS TrieEmpty -> assert false + | SwitchS (TrieNode (Some p, _)) -> + parse_rec r p + | SwitchS (TrieNode (None, map)) -> + begin match R.next r with + | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) + | Yield c -> + begin try + let trie = CharMap.find c map in + parse_rec r (SwitchS trie) (* recurse in subtree *) + with Not_found -> + errorf r "expected any of {%s}, got %c" (print_char_map map) c + end + end + | Fix f -> + let p = f (Fix f) in + parse_rec r p + | Eof -> + begin match R.next r with + | EOF -> () + | Yield c -> errorf r "expected EOF, got %c" c + end + + and parse_many + : type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t -> + mult:multiplicity -> a list -> a list + = fun r ~set ~p ~sep ~mult acc -> + match R.peek r with + | EOF -> List.rev acc + | Yield c -> + if CharSet.mem c set + then + let x = parse_rec r p in + match mult with + | Question -> assert (acc = []); [x] + | Plus | Star -> + let _ = parse_rec r sep in (* separator *) + parse_many r ~set ~p ~sep ~mult:Star (x::acc) + else if mult = Plus + then errorf r "expected {%s}, got %c" (print_char_set set) c + else List.rev acc + + and parse_skip + : type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit + = fun r ~set ~p ~mult -> + match R.peek r with + | EOF -> () + | Yield c -> + if CharSet.mem c set + then + let _ = parse_rec r p in + match mult with + | Question -> () + | Plus | Star -> parse_skip r ~set ~p ~mult:Star + else if mult = Plus + then errorf r "expected {%s}, got %c" (print_char_set set) c + else () + + (* public functions *) + let parse_exn src p = + let r = R.create src in + parse_rec r p + + let parse src p = + let r = R.create src in + try + `Ok (parse_rec r p) + with Error e -> + `Error e + + let parse' src p = match parse src p with + | `Ok x -> `Ok x + | `Error e -> `Error (string_of_error e) +end + +module Make(I : INPUT) = struct + module R = ReaderOfInput(I) + include MakeFromReader(R) +end + +module Str = MakeFromReader(struct + (* reader of string *) + type t = { + str : string; + mutable i : int; + mutable rcol : int; + mutable rline : int; + } + type source = string + + let create str = { + str; + i = 0; + rcol = 1; + rline = 1; + } + let line t = t.rline + let col t = t.rcol + let peek t = + if t.i = String.length t.str then EOF else Yield (String.get t.str t.i) + let junk t = + assert (t.i < String.length t.str); + t.i <- t.i + 1 + let next t = + if t.i = String.length t.str then EOF + else ( + let c = String.get t.str t.i in + t.i <- t.i + 1; + if c = '\n' then ( + t.rcol <- 1; + t.rline <- t.rline + 1 + ) else t.rcol <- t.rcol + 1; + Yield c + ) +end) + +module Chan = Make(struct + type t = in_channel + let read = input +end) diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli new file mode 100644 index 00000000..efe04756 --- /dev/null +++ b/src/string/app_parse.mli @@ -0,0 +1,219 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Applicative Parser Combinators} + +{b status: experimental} +@since NEXT_RELEASE +*) + +type ('a,'b) result = [`Error of 'b | `Ok of 'a] + +type 'a t +(** Parser that yields an error or a value of type 'a *) + +(** {6 Combinators} *) + +val return : 'a -> 'a t +(** Parser that succeeds with the given value *) + +val fail : string -> 'a t +(** [fail msg] fails with the given error message *) + +(* TODO: a format version of fail *) + +val app : ('a -> 'b) t -> 'a t -> 'b t +(** Applicative *) + +val map : ('a -> 'b) -> 'a t -> 'b t +(** Map the parsed value *) + +val int : int t +(** Parse an integer *) + +val float : float t +(** Parse a floating point number *) + +val bool : bool t +(** Parse "true" or "false" *) + +val char : char -> char t +(** [char c] parses [c] and [c] only *) + +val any_of : string -> char t +(** Parse any of the chars present in the given string *) + +val alpha_lower : char t + +val alpha_upper : char t + +val alpha : char t + +val num : char t + +val alpha_num : char t + +val word : string t +(** [word] parses any identifier not starting with an integer and + not containing any whitespace nor delimiter + TODO: specify *) + +val spaces : unit t +(** Parse a sequence of ['\t'] and [' '] *) + +val spaces1 : unit t +(** Same as {!spaces} but requires at least one space *) + +val white : unit t +(** Parse a sequence of ['\t'], ['\n'] and [' '] *) + +val white1 : unit t + +val eof : unit t +(** Matches the end of input, fails otherwise *) + +val many : ?sep:unit t -> 'a t -> 'a list t +(** 0 or more parsed elements of the given type. + @param sep separator between elements of the list (for instance, {!space}) *) + +val many1 : ?sep:unit t -> 'a t -> 'a list t +(** Same as {!many}, but needs at least one element *) + +val skip : _ t -> unit t +(** Skip 0 or more instances of the given parser *) + +val skip1 : _ t -> unit t + +val opt : 'a t -> 'a option t +(** [opt x] tries to parse [x], and returns [None] otherwise *) + +val filter : ('a -> bool) -> 'a t -> 'a t +(** [filter f p] parses the same as [p], but fails if the returned value + does not satisfy [f] *) + +val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t +(** [switch_c l] matches the next char and uses the corresponding parser. + Fails if the next char is not in the list, unless default is defined. + @param default parser to use if no char matches + @raise Invalid_argument if some char occurs several times in [l] *) + +val switch_s : (string * 'a t) list -> 'a t +(** [switch_s l] attempts to match matches any of the strings in [l]. + If one of those strings matches, the corresponding parser + is used from now on. + @raise Invalid_argument if some string is a prefix of another string, + or is empty, or if the list is empty *) + +val choice : 'a t list -> 'a t +(** [choice l] chooses between the parsers, unambiguously + @raise Invalid_argument if the list is empty, or if some parsers + overlap, making the choice ambiguous *) + +val fix : ('a t -> 'a t) -> 'a t +(** [fix f] makes a fixpoint *) + +module Infix : sig + val (>|=) : 'a t -> ('a -> 'b) -> 'b t + + val (<*>) : ('a -> 'b) t -> 'a t -> 'b t + (** Synonym to {!app} *) + + val (>>) : _ t -> 'a t -> 'a t + (** [a >> b] parses [a], ignores its result, then parses [b] *) + + val (<<) : 'a t -> _ t -> 'a t + (** [a << b] parses [a], then [b], and discards [b] to return [a] *) + + val (<+>) : 'a t -> 'a t -> 'a t + (** [a <+> b] is [choice [a;b]], a binary choice *) +end + +include module type of Infix + +(** {2 Signatures} *) + +(** {6 Parsing} *) + +type error = { + line: int; + col: int; + msg: string; +} + +val string_of_error : error -> string + +exception Error of error + +module type S = sig + type source + (** Source of characters *) + + val parse : source -> 'a t -> ('a, error) result + (** Parse the given source using the parser, and returns the parsed value. *) + + val parse': source -> 'a t -> ('a, string) result + (** Same as {!parse}, but returns a user-friendly string in case of failure *) + + val parse_exn : source -> 'a t -> 'a + (** Unsafe version of {!parse}. + @raise Error if parsing fails *) +end + +(** {2 Parse} *) + +module type INPUT = sig + type t + + val read : t -> Bytes.t -> int -> int -> int +end + +module Make(I : INPUT) : S with type source = I.t + +(** {2 Low-level interface} *) + +type token = + | Yield of char + | EOF + +module type READER = sig + type t + type source (* underlying source *) + + val create : source -> t + val peek : t -> token (* peek; do not consume *) + val next : t -> token (* read and consume *) + val junk : t -> unit (* consume last token, obtained with junk *) + val line : t -> int + val col : t -> int +end + +module MakeFromReader(R : READER) : S with type source = R.source + +(** {2 Defaults} *) + +module Str : S with type source = string + +module Chan : S with type source = in_channel From 5c7cb55378bf91945a5b6413578f5f6b229b231a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 5 Mar 2015 14:06:30 +0100 Subject: [PATCH 05/46] wip: parser combinators, trying to fix fix --- _oasis | 1 + src/string/app_parse.ml | 286 +++++++++++++++++++++++---------------- src/string/app_parse.mli | 39 +++++- 3 files changed, 208 insertions(+), 118 deletions(-) diff --git a/_oasis b/_oasis index 19eacd1d..59a5b0c4 100644 --- a/_oasis +++ b/_oasis @@ -86,6 +86,7 @@ Library "containers_string" Path: src/string Pack: true Modules: KMP, Levenshtein, App_parse + BuildDepends: bytes FindlibName: string FindlibParent: containers diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml index acd9035a..105647c9 100644 --- a/src/string/app_parse.ml +++ b/src/string/app_parse.ml @@ -51,10 +51,26 @@ let print_char_map map = (fun c _ acc -> str "'%c'" c :: acc) map [] in String.concat ", " l +let set_of_string s = + let set = ref CharSet.empty in + String.iter + (fun c -> + if CharSet.mem c !set + then invalid_arg (str "any_of: duplicate char %c" c); + set := CharSet.add c !set + ) s; + !set + +(* add [c -> p] to the map, for every [c] in [set] *) +let map_add_set init set p = + CharSet.fold + (fun c map -> CharMap.add c p map) + set init + (* function composition *) let compose f g x = f (g x) -let string_of_list l = +let str_of_l l = let b = Bytes.make (List.length l) ' ' in List.iteri (fun i c -> Bytes.set b i c) l; Bytes.unsafe_to_string b @@ -73,36 +89,42 @@ type _ t = | Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t | Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *) | SwitchC : 'a t CharMap.t * 'a t option -> 'a t - | SwitchS :'a trie -> 'a t - | Fix : ('a t -> 'a t) -> 'a t + | Lazy : 'a t Lazy.t -> 'a t | Eof : unit t -(* a prefix trie *) -and 'a trie = - | TrieEmpty - | TrieNode of 'a t option * 'a trie CharMap.t - let return x = Return x +let pure = return let success = Return () let fail msg = Fail msg -let app f x = App (f, x) +let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt let map f x = match x with | Map (g, y) -> Map (compose f g, y) | Return x -> Return (f x) | _ -> Map (f,x) -let filter f x = Filter (f, x) +let app f x = match f with + | Return f -> map f x + | _ -> App (f, x) + +let filter f x = match x with + | Return y -> if f y then Return y else fail "filter failed" + | Filter (f', y) -> Filter ((fun x -> f' x && f x), y) + | _ -> Filter (f, x) + +let app_left a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *) + +let app_right a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *) let int = Int let float = Float -let int_first_char = - lazy (CharSet.of_list ['-'; '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9']) +let int_first_char = lazy (set_of_string "-0123456789") +let float_first_char = lazy (set_of_string ".-0123456789") (* a set of characters that are valid as first characters of a parser *) type possible_first_chars = @@ -111,6 +133,15 @@ type possible_first_chars = | NoChar | IsFail of string +let ret_set set = + if CharSet.cardinal set = 256 then AllChars else Set set + +let union_possible_first_chars a b = match a, b with + | Set a, Set b -> Set (CharSet.union a b) + | IsFail e, _ | _, IsFail e -> IsFail e + | AllChars, _ | _, AllChars -> AllChars + | NoChar, o | o, NoChar -> o + (* set of possibilities for the first char of a parser *) let rec possible_first_chars : type a. a t -> possible_first_chars @@ -123,69 +154,46 @@ let rec possible_first_chars | AppRight (a, _) -> possible_first_chars a | Fail e -> IsFail e | Int -> Set (Lazy.force int_first_char) - | Float -> Set (Lazy.force int_first_char) - | AnyOf set -> Set set - | Many(set, _, _, _) -> Set set - | Skip (set, _, _) -> Set set - | SwitchC (map, None) -> Set (domain_of_char_map map) + | Float -> Set (Lazy.force float_first_char) + | AnyOf set -> ret_set set + | Many(set, p, _, (Question | Star)) -> + union_possible_first_chars (ret_set set) (possible_first_chars p) + | Skip (set, p, (Question | Star)) -> + union_possible_first_chars (ret_set set) (possible_first_chars p) + | Many (set, _, _, Plus) -> Set set + | Skip (set, _, Plus) -> ret_set set + | SwitchC (map, None) -> ret_set (domain_of_char_map map) | SwitchC (_, Some _) -> AllChars - | SwitchS TrieEmpty -> assert false - | SwitchS (TrieNode (_, m)) -> Set (domain_of_char_map m) - | Fix f -> - let p = f (Fix f) in - possible_first_chars p + | Lazy (lazy p) -> possible_first_chars p | Eof -> NoChar -let many ?(sep=success) t = - match possible_first_chars t with - | Set set -> Many (set, t, sep, Star) +let many_ ~sep ~mult ~p = match possible_first_chars p with + | Set set -> Many (set, p, sep, mult) | IsFail msg -> Fail msg | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") -let many1 ?(sep=success) t = - match possible_first_chars t with - | Set set -> Many (set, t, sep, Plus) - | IsFail msg -> Fail msg - | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") - | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") +let many ?(sep=success) p = many_ ~sep ~mult:Star ~p -let skip t = - match possible_first_chars t with - | Set set -> Skip (set, t, Star) +let many1 ?(sep=success) p = many_ ~sep ~mult:Plus ~p + +let skip_ ~mult ~p = match possible_first_chars p with + | Set set -> Skip (set, p, mult) | IsFail msg -> Fail msg | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") -let skip1 t = - match possible_first_chars t with - | Set set -> Skip (set, t, Plus) - | IsFail msg -> Fail msg - | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") - | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") +let skip p = skip_ ~mult:Star ~p -let opt t = - match possible_first_chars t with - | Set set -> +let skip1 p = skip_ ~mult:Plus ~p + +let opt p = map (function | [x] -> Some x | [] -> None | _ -> assert false - ) (Many (set, t, success, Question)) - | IsFail msg -> Fail msg - | AllChars -> map (fun x -> Some x) t (* always succeeds *) - | NoChar -> invalid_arg (str "opt: invalid parser (does not consume input)") - -let set_of_string s = - let set = ref CharSet.empty in - String.iter - (fun c -> - if CharSet.mem c !set - then invalid_arg (str "any_of: duplicate char %c" c); - set := CharSet.add c !set - ) s; - !set + ) (many_ ~sep:success ~mult:Question ~p) let any_of s = AnyOf (set_of_string s) @@ -226,34 +234,86 @@ let switch_c ?default l = exception ExnIsFail of string -let choice l = - if l = [] then invalid_arg "choice: empty list"; +(* binary choice *) +let rec merge a b = (* build a switch by first char *) try - (* a map and possibly a default parser *) - let map, def = List.fold_left - (fun (map, def) p -> - match possible_first_chars p, def with - | AllChars, Some _ -> - invalid_arg "choice: ambiguous, several parsers accept any input" - | AllChars, None -> map, Some p - | NoChar, _ -> map, def - | IsFail msg, _ -> raise (ExnIsFail msg) - | Set set, def -> - if CharSet.exists (fun c -> CharMap.mem c map) set - then invalid_arg - (str "choice: ambiguous parsers (overlap on {%s})" - (print_char_set (CharSet.inter set (domain_of_char_map map)))); - let map = CharSet.fold - (fun c map -> CharMap.add c p map) - set map - in map, def - ) (CharMap.empty, None) l - in - SwitchC (map, def) + begin match a, b with + | SwitchC (map_a, def_a), SwitchC (map_b, def_b) -> + (* merge jump tables *) + let def = match def_a, def_b with + | None, None -> None + | Some d, None + | None, Some d -> Some d + | Some _, Some _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + in + let map = CharMap.merge + (fun _ a b -> match a, b with + | Some a', Some b' -> Some (merge a' b') + | Some m, None + | None, Some m -> Some m + | None, None -> assert false + ) map_a map_b + in + SwitchC (map, def) + | SwitchC (map, def), other + | other, SwitchC (map, def) -> + let map', def' = match possible_first_chars other, def with + | AllChars, _ -> + invalid_arg "choice: ambiguous, several parsers accept any input" + | NoChar, None -> map, Some other + | NoChar, Some _ -> + invalid_arg "choice: ambiguous" + | IsFail msg, _ -> raise (ExnIsFail msg) + | Set set, def -> + if CharSet.exists (fun c -> CharMap.mem c map) set + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set (domain_of_char_map map)))); + (* else: merge jump tables *) + let map = map_add_set map set other in + map, def + in + SwitchC (map', def') + | _ -> + begin match possible_first_chars a, possible_first_chars b with + | Set set1, Set set2 -> + if CharSet.exists (fun c -> CharSet.mem c set2) set1 + then invalid_arg + (str "choice: ambiguous parsers (overlap on {%s})" + (print_char_set (CharSet.inter set1 set2))); + let map = map_add_set CharMap.empty set1 a in + let map = map_add_set map set2 b in + SwitchC (map, None) + | IsFail e, _ | _, IsFail e -> raise (ExnIsFail e) + | Set s, NoChar -> SwitchC (map_add_set CharMap.empty s a, Some b) + | NoChar, Set s -> SwitchC (map_add_set CharMap.empty s b, Some a) + | AllChars, _ | _, AllChars -> + invalid_arg "choice: ambiguous parsers (one accepts everything)" + | NoChar, NoChar -> + invalid_arg "choice: ambiguous parsers (both accept nothing)" + end + end with ExnIsFail msg -> fail msg +let rec choice l = match l with + | [] -> invalid_arg "choice: empty list"; + | [x] -> x + | a :: b :: tail -> choice (merge a b :: tail) + +(* temporary structure for buildings switches *) +type 'a trie = + | TrieLeaf of 'a t + | TrieNode of 'a trie CharMap.t + +let trie_empty = TrieNode CharMap.empty + +let rec parser_of_trie : type a. a trie -> a t = function + | TrieLeaf p -> p + | TrieNode m -> SwitchC (CharMap.map parser_of_trie m, None) + (* build prefix trie *) let switch_s l = if l = [] then invalid_arg "switch_s: empty list"; @@ -261,54 +321,64 @@ let switch_s l = let rec add_trie t s i p = if i = String.length s then match t with - | TrieEmpty -> TrieNode (Some p, CharMap.empty) - | TrieNode (Some _, _) -> invalid_arg (str "duplicate key \"%s\"" s) - | TrieNode (None, m) -> TrieNode (Some p, m) + | TrieNode m when CharMap.is_empty m -> TrieLeaf p + | TrieNode _ -> invalid_arg (str "key \"%s\" is prefix of another key" s) + | TrieLeaf _ -> invalid_arg (str "duplicate key \"%s\"" s) else let c = String.get s i in match t with - | TrieEmpty -> - let sub = add_trie TrieEmpty s (i+1) p in - TrieNode (None, CharMap.singleton c sub) - | TrieNode (opt, map) -> + | TrieLeaf _ -> + invalid_arg (str "key \"%s\" is prefixed by another key" s) + | TrieNode map -> try let sub = CharMap.find c map in let sub = add_trie sub s (i+1) p in - TrieNode (opt, CharMap.add c sub map) + TrieNode (CharMap.add c sub map) with Not_found -> - let sub = add_trie TrieEmpty s (i+1) p in - TrieNode (opt, CharMap.add c sub map) + let sub = add_trie trie_empty s (i+1) p in + TrieNode (CharMap.add c sub map) in let trie = List.fold_left (fun trie (s, p) -> if s = "" then invalid_arg "switch_s: empty string"; add_trie trie s 0 p - ) TrieEmpty l + ) trie_empty l in - SwitchS trie + parser_of_trie trie let bool = switch_s - [ "true", Return true - ; "false", Return false + [ "true", return true + ; "false", return false ] -let fix f = Fix f +let delay p = Lazy p + +(* FIXME: does not work in practice. Must separate definition of combinators + from compilation to decision tree *) +let fix f = + let rec r = lazy (f r) in + Lazy.force r module Infix = struct let (>|=) x f = map f x let (<*>) = app - let (<<) a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *) - let (>>) a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *) - let (<+>) a b = choice [a; b] + let (<<) = app_left + let (>>) = app_right + let (<+>) = merge + let (<::>) a b = pure (fun x l -> x::l) <*> a <*> b end include Infix (* TODO: more efficient version, with buffer *) let word = - return (fun c s -> string_of_list (c :: s)) <*> alpha <*> many alpha_num + pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num + +(* TODO *) +let quoted = + Lazy (lazy (failwith "quoted: not implemented")) (** {2 Signatures} *) @@ -495,23 +565,7 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct | Some d -> parse_rec r d end end - | SwitchS TrieEmpty -> assert false - | SwitchS (TrieNode (Some p, _)) -> - parse_rec r p - | SwitchS (TrieNode (None, map)) -> - begin match R.next r with - | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) - | Yield c -> - begin try - let trie = CharMap.find c map in - parse_rec r (SwitchS trie) (* recurse in subtree *) - with Not_found -> - errorf r "expected any of {%s}, got %c" (print_char_map map) c - end - end - | Fix f -> - let p = f (Fix f) in - parse_rec r p + | Lazy (lazy p) -> parse_rec r p | Eof -> begin match R.next r with | EOF -> () diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index efe04756..b5eb2789 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -26,6 +26,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Applicative Parser Combinators} + Example: basic S-expr parser + +{[ + open Containers_string.App_parse;; + + type sexp = Atom of string | List of sexp list;; + + let mkatom a = Atom a;; + let mklist l = List l;; + + let sexp = fix (fun sexp -> + spaces >> + ((word >|= mkatom) <+> + ((char '(' >> many (delay sexp) << char ')') >|= mklist) + ) + );; + +]} + {b status: experimental} @since NEXT_RELEASE *) @@ -40,10 +59,13 @@ type 'a t val return : 'a -> 'a t (** Parser that succeeds with the given value *) +val pure : 'a -> 'a t +(** Synonym to {!return} *) + val fail : string -> 'a t (** [fail msg] fails with the given error message *) -(* TODO: a format version of fail *) +val failf : ('a, unit, string, 'b t) format4 -> 'a val app : ('a -> 'b) t -> 'a t -> 'b t (** Applicative *) @@ -81,6 +103,12 @@ val word : string t not containing any whitespace nor delimiter TODO: specify *) +val quoted : string t +(** Quoted string, following OCaml conventions *) + +val str_of_l : char list -> string +(** Helper to build strings from lists of chars *) + val spaces : unit t (** Parse a sequence of ['\t'] and [' '] *) @@ -132,11 +160,15 @@ val choice : 'a t list -> 'a t @raise Invalid_argument if the list is empty, or if some parsers overlap, making the choice ambiguous *) -val fix : ('a t -> 'a t) -> 'a t +val delay : 'a t Lazy.t -> 'a t +(** delay evaluation. Useful in combination with {!fix} *) + +val fix : ('a t Lazy.t -> 'a t) -> 'a t (** [fix f] makes a fixpoint *) module Infix : sig val (>|=) : 'a t -> ('a -> 'b) -> 'b t + (** Infix version of {!map} *) val (<*>) : ('a -> 'b) t -> 'a t -> 'b t (** Synonym to {!app} *) @@ -149,6 +181,9 @@ module Infix : sig val (<+>) : 'a t -> 'a t -> 'a t (** [a <+> b] is [choice [a;b]], a binary choice *) + + val (<::>) : 'a t -> 'a list t -> 'a list t + (** [a <::> b] is [app (fun x l -> x::l) a b] *) end include module type of Infix From 3e769750b64c21150c3f3ebc2688d9c4d05bd9aa Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 11 Mar 2015 23:41:55 +0100 Subject: [PATCH 06/46] wip: app_parse, now with both definition and compiled AST, lazy compilation, printer --- src/string/app_parse.ml | 456 ++++++++++++++++++++++++--------------- src/string/app_parse.mli | 20 +- 2 files changed, 297 insertions(+), 179 deletions(-) diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml index 105647c9..dcdfd576 100644 --- a/src/string/app_parse.ml +++ b/src/string/app_parse.ml @@ -38,19 +38,39 @@ let str fmt = Printf.sprintf fmt module CharSet = Set.Make(Char) module CharMap = Map.Make(Char) +let print_char = function + | '\t' -> "\\t" + | '\n' -> "\\n" + | '\r' -> "\\r" + | c -> str "%c" c + let print_char_set set = - let l = CharSet.fold - (fun c acc -> str "'%c'" c :: acc) set [] in - String.concat ", " l + let buf = Buffer.create 32 in + Buffer.add_char buf '"'; + CharSet.iter (fun c -> Buffer.add_string buf (print_char c)) set; + Buffer.add_char buf '"'; + Buffer.contents buf let domain_of_char_map m = CharMap.fold (fun c _ set -> CharSet.add c set) m CharSet.empty let print_char_map map = let l = CharMap.fold - (fun c _ acc -> str "'%c'" c :: acc) map [] in + (fun c _ acc -> print_char c :: acc) map [] in String.concat ", " l +let ppmap ?(sep=", ") pp_k pp_v fmt m = + let first = ref true in + CharMap.iter + (fun k v -> + if !first then first := false else Format.pp_print_string fmt sep; + pp_k fmt k; + Format.pp_print_string fmt " → "; + pp_v fmt v; + Format.pp_print_cut fmt () + ) m; + () + let set_of_string s = let set = ref CharSet.empty in String.iter @@ -75,53 +95,91 @@ let str_of_l l = List.iteri (fun i c -> Bytes.set b i c) l; Bytes.unsafe_to_string b -type _ t = - | Return : 'a -> 'a t - | Map : ('a -> 'b) * 'a t -> 'b t - | Filter: ('a -> bool) * 'a t -> 'a t - | App : ('a -> 'b) t * 'a t -> 'b t - | AppLeft : 'a t * 'b t -> 'a t - | AppRight : 'a t * 'b t -> 'b t - | Fail : string -> 'a t - | Int : int t - | Float : float t - | AnyOf : CharSet.t -> char t - | Many : CharSet.t * 'a t * unit t * multiplicity -> 'a list t - | Skip : CharSet.t * 'a t * multiplicity -> unit t (* same as Many, but ignores *) - | SwitchC : 'a t CharMap.t * 'a t option -> 'a t - | Lazy : 'a t Lazy.t -> 'a t - | Eof : unit t +type 'a t = { + parse : 'a parse; + mutable compiled : 'a compiled; +} -let return x = Return x -let pure = return +(* syntactic version *) +and _ parse = + | Return : 'a -> 'a parse + | Map : ('a -> 'b) * 'a t -> 'b parse + | Filter: ('a -> bool) * 'a t -> 'a parse + | App : ('a -> 'b) t * 'a t -> 'b parse + | AppLeft : 'a t * 'b t -> 'a parse + | AppRight : 'a t * 'b t -> 'b parse + | Fail : string -> 'a parse + | Int : int parse + | Float : float parse + | AnyOf : CharSet.t -> char parse + | Many : 'a t * unit t * multiplicity -> 'a list parse + | Skip : 'a t * multiplicity -> unit parse (* same as Many, but ignores *) + | SwitchC : 'a t CharMap.t * 'a t option -> 'a parse + | Lazy : 'a t lazy_t -> 'a parse + | Eof : unit parse -let success = Return () +(* compiled version *) +and 'a compiled = + | C_Return : 'a -> 'a compiled + | C_Map : ('a -> 'b) * 'a t -> 'b compiled + | C_Filter: ('a -> bool) * 'a t -> 'a compiled + | C_App : ('a -> 'b) t * 'a t -> 'b compiled + | C_AppLeft : 'a t * 'b t -> 'a compiled + | C_AppRight : 'a t * 'b t -> 'b compiled + | C_Fail : string -> 'a compiled + | C_Int : int compiled + | C_Float : float compiled + | C_AnyOf : CharSet.t -> char compiled + | C_SwitchC : 'a t CharMap.t * 'a t option -> 'a compiled + | C_Eof : unit compiled + | C_NotCompiled : 'a compiled (* to be compiled *) -let fail msg = Fail msg +(** {2 Helpers} *) -let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt +(* build a new parser *) +let make parse = { parse; compiled=C_NotCompiled; } -let map f x = match x with - | Map (g, y) -> Map (compose f g, y) - | Return x -> Return (f x) - | _ -> Map (f,x) +let ppmult fmt = function + | Star -> Format.pp_print_string fmt "*" + | Plus -> Format.pp_print_string fmt "+" + | Question -> Format.pp_print_string fmt "?" -let app f x = match f with - | Return f -> map f x - | _ -> App (f, x) - -let filter f x = match x with - | Return y -> if f y then Return y else fail "filter failed" - | Filter (f', y) -> Filter ((fun x -> f' x && f x), y) - | _ -> Filter (f, x) - -let app_left a b = AppLeft (a, b) (* return (fun x y -> x) <*> a <*> b *) - -let app_right a b = AppRight (a, b) (* return (fun x y -> y) <*> a <*> b *) - -let int = Int - -let float = Float +let print fmt p = + let depth = ref 0 in + (* print up to a given limit into lazy values *) + let rec print_aux + : type a. Format.formatter -> a t -> unit + = fun fmt p -> + let ppstr = Format.pp_print_string + and ppf fmt x = Format.fprintf fmt x in + let ppc fmt c = ppf fmt "'%s'" (print_char c) in + match p.parse with + | Return _ -> ppstr fmt "" + | Map (_, x) -> ppf fmt "@[(map@ %a)@]" print_aux x + | Filter (_, x) -> ppf fmt "@[(filter@ %a)@]" print_aux x + | App (f, x) -> ppf fmt "@[<2>@[%a@]@ <*>@ @[%a@]@]" print_aux f print_aux x + | AppLeft (a, b) -> ppf fmt "@[%a@ <<@ %a@]" print_aux a print_aux b + | AppRight (a, b) -> ppf fmt "@[%a@ >>@ %a@]" print_aux a print_aux b + | Fail _ -> ppf fmt "" + | Int -> ppstr fmt "" + | Float -> ppstr fmt "" + | AnyOf set -> ppf fmt "@[(any@ %s)@]" (print_char_set set) + | Many (p, sep, mult) -> + ppf fmt "@[<2>(@[%a@]@ sep:@[%a@])%a@]" print_aux p print_aux sep ppmult mult + | Skip (p, mult) -> + ppf fmt "@[<2>(skip @[%a@]%a)@]" print_aux p ppmult mult + | SwitchC (map, None) -> + ppf fmt "@[(switch@ @[%a@])@]" (ppmap ppc print_aux) map + | SwitchC (map, Some o) -> + ppf fmt "@[(switch@ @[%a@]@ or:%a)@]" (ppmap ppc print_aux) map print_aux o + | Lazy _ when !depth > 3 -> ppf fmt "" + | Lazy (lazy p) -> + incr depth; + print_aux fmt p; + decr depth + | Eof -> ppstr fmt "" + in + print_aux fmt p let int_first_char = lazy (set_of_string "-0123456789") let float_first_char = lazy (set_of_string ".-0123456789") @@ -136,56 +194,77 @@ type possible_first_chars = let ret_set set = if CharSet.cardinal set = 256 then AllChars else Set set +(* let union_possible_first_chars a b = match a, b with | Set a, Set b -> Set (CharSet.union a b) | IsFail e, _ | _, IsFail e -> IsFail e | AllChars, _ | _, AllChars -> AllChars | NoChar, o | o, NoChar -> o + *) + +(* TODO: handle cases that can consume 0 or more chars (skip, many...) *) (* set of possibilities for the first char of a parser *) let rec possible_first_chars - : type a. a t -> possible_first_chars + : type a. a parse -> possible_first_chars = function | Return _ -> NoChar - | Map (_, x) -> possible_first_chars x - | Filter (_, x) -> possible_first_chars x - | App (f, _) -> possible_first_chars f - | AppLeft (a, _) -> possible_first_chars a - | AppRight (a, _) -> possible_first_chars a + | Map (_, x) -> possible_first_chars x.parse + | Filter (_, x) -> possible_first_chars x.parse + | App (f, _) -> possible_first_chars f.parse + | AppLeft (a, _) -> possible_first_chars a.parse (* TODO: handle NoChar *) + | AppRight (a, _) -> possible_first_chars a.parse | Fail e -> IsFail e | Int -> Set (Lazy.force int_first_char) | Float -> Set (Lazy.force float_first_char) | AnyOf set -> ret_set set - | Many(set, p, _, (Question | Star)) -> - union_possible_first_chars (ret_set set) (possible_first_chars p) - | Skip (set, p, (Question | Star)) -> - union_possible_first_chars (ret_set set) (possible_first_chars p) - | Many (set, _, _, Plus) -> Set set - | Skip (set, _, Plus) -> ret_set set + | Many (p, _, _) -> possible_first_chars p.parse + | Skip (p, _) -> possible_first_chars p.parse | SwitchC (map, None) -> ret_set (domain_of_char_map map) | SwitchC (_, Some _) -> AllChars - | Lazy (lazy p) -> possible_first_chars p + | Lazy (lazy p) -> possible_first_chars p.parse | Eof -> NoChar -let many_ ~sep ~mult ~p = match possible_first_chars p with - | Set set -> Many (set, p, sep, mult) - | IsFail msg -> Fail msg - | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") - | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") +(** {2 Combinators} *) -let many ?(sep=success) p = many_ ~sep ~mult:Star ~p +let return x = {parse=Return x; compiled=C_Return x} +let pure = return -let many1 ?(sep=success) p = many_ ~sep ~mult:Plus ~p +let success = pure () -let skip_ ~mult ~p = match possible_first_chars p with - | Set set -> Skip (set, p, mult) - | IsFail msg -> Fail msg - | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") - | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") +let fail msg = make (Fail msg) -let skip p = skip_ ~mult:Star ~p +let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt -let skip1 p = skip_ ~mult:Plus ~p +let map f x = match x.parse with + | Map (g, y) -> make (Map (compose f g, y)) + | Return x -> pure (f x) + | _ -> make (Map (f, x)) + +let app f x = match f.parse with + | Return f -> map f x + | _ -> make (App (f, x)) + +let filter f x = match x.parse with + | Return y -> if f y then return y else fail "filter failed" + | Filter (f', y) -> make (Filter ((fun x -> f' x && f x), y)) + | _ -> make (Filter (f, x)) + +let app_left a b = make (AppLeft (a, b)) (* return (fun x y -> x) <*> a <*> b *) + +let app_right a b = make (AppRight (a, b)) (* return (fun x y -> y) <*> a <*> b *) + +let int = make Int + +let float = make Float + +let many ?(sep=success) p = make (Many (p, sep, Star)) + +let many1 ?(sep=success) p = make (Many (p, sep, Plus)) + +let skip p = make (Skip (p, Star)) + +let skip1 p = make (Skip (p, Plus)) let opt p = map @@ -193,11 +272,12 @@ let opt p = | [x] -> Some x | [] -> None | _ -> assert false - ) (many_ ~sep:success ~mult:Question ~p) + ) (make (Many (p, success, Question))) -let any_of s = AnyOf (set_of_string s) +let any_of' s = make (AnyOf s) +let any_of s = any_of' (set_of_string s) -let char c = AnyOf (CharSet.singleton c) +let char c = make (AnyOf (CharSet.singleton c)) let spaces = skip (any_of " \t") let spaces1 = skip1 (any_of " \t") @@ -210,13 +290,13 @@ let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" let num_ = set_of_string "0123456789" let alpha_ = CharSet.union alpha_lower_ alpha_upper_ -let alpha_lower = AnyOf alpha_lower_ -let alpha_upper = AnyOf alpha_upper_ -let num = AnyOf num_ -let alpha = AnyOf alpha_ -let alpha_num = AnyOf (CharSet.union num_ alpha_) +let alpha_lower = any_of' alpha_lower_ +let alpha_upper = any_of' alpha_upper_ +let num = any_of' num_ +let alpha = any_of' alpha_ +let alpha_num = any_of' (CharSet.union num_ alpha_) -let eof = Eof +let eof = make Eof let switch_c ?default l = if l = [] then match default with @@ -230,15 +310,15 @@ let switch_c ?default l = CharMap.add c t map ) CharMap.empty l in - SwitchC (map, default) + make (SwitchC (map, default)) exception ExnIsFail of string -(* binary choice *) +(* binary choice: compiled into decision tree *) let rec merge a b = (* build a switch by first char *) try - begin match a, b with + begin match a.parse, b.parse with | SwitchC (map_a, def_a), SwitchC (map_b, def_b) -> (* merge jump tables *) let def = match def_a, def_b with @@ -256,13 +336,13 @@ let rec merge a b = | None, None -> assert false ) map_a map_b in - SwitchC (map, def) + make (SwitchC (map, def)) | SwitchC (map, def), other | other, SwitchC (map, def) -> let map', def' = match possible_first_chars other, def with | AllChars, _ -> invalid_arg "choice: ambiguous, several parsers accept any input" - | NoChar, None -> map, Some other + | NoChar, None -> map, Some (make other) | NoChar, Some _ -> invalid_arg "choice: ambiguous" | IsFail msg, _ -> raise (ExnIsFail msg) @@ -272,12 +352,12 @@ let rec merge a b = (str "choice: ambiguous parsers (overlap on {%s})" (print_char_set (CharSet.inter set (domain_of_char_map map)))); (* else: merge jump tables *) - let map = map_add_set map set other in + let map = map_add_set map set (make other) in map, def - in - SwitchC (map', def') + in + make (SwitchC (map', def')) | _ -> - begin match possible_first_chars a, possible_first_chars b with + begin match possible_first_chars a.parse, possible_first_chars b.parse with | Set set1, Set set2 -> if CharSet.exists (fun c -> CharSet.mem c set2) set1 then invalid_arg @@ -285,23 +365,22 @@ let rec merge a b = (print_char_set (CharSet.inter set1 set2))); let map = map_add_set CharMap.empty set1 a in let map = map_add_set map set2 b in - SwitchC (map, None) + make (SwitchC (map, None)) | IsFail e, _ | _, IsFail e -> raise (ExnIsFail e) - | Set s, NoChar -> SwitchC (map_add_set CharMap.empty s a, Some b) - | NoChar, Set s -> SwitchC (map_add_set CharMap.empty s b, Some a) + | Set s, NoChar -> make (SwitchC (map_add_set CharMap.empty s a, Some b)) + | NoChar, Set s -> make (SwitchC (map_add_set CharMap.empty s b, Some a)) | AllChars, _ | _, AllChars -> invalid_arg "choice: ambiguous parsers (one accepts everything)" | NoChar, NoChar -> invalid_arg "choice: ambiguous parsers (both accept nothing)" end end - with ExnIsFail msg -> - fail msg + with ExnIsFail msg -> make (Fail msg) -let rec choice l = match l with +let rec choice = function | [] -> invalid_arg "choice: empty list"; | [x] -> x - | a :: b :: tail -> choice (merge a b :: tail) + | a :: tl -> merge a (choice tl) (* temporary structure for buildings switches *) type 'a trie = @@ -312,7 +391,7 @@ let trie_empty = TrieNode CharMap.empty let rec parser_of_trie : type a. a trie -> a t = function | TrieLeaf p -> p - | TrieNode m -> SwitchC (CharMap.map parser_of_trie m, None) + | TrieNode m -> make (SwitchC (CharMap.map parser_of_trie m, None)) (* build prefix trie *) let switch_s l = @@ -353,20 +432,20 @@ let bool = ; "false", return false ] -let delay p = Lazy p - -(* FIXME: does not work in practice. Must separate definition of combinators - from compilation to decision tree *) let fix f = - let rec r = lazy (f r) in - Lazy.force r + (* outermost lazy needed for the recursive definition *) + let rec r = { + parse=Lazy (lazy (f r)); + compiled=C_NotCompiled; + } in + r module Infix = struct let (>|=) x f = map f x let (<*>) = app let (<<) = app_left let (>>) = app_right - let (<+>) = merge + let (<+>) a b = choice [a; b] let (<::>) a b = pure (fun x l -> x::l) <*> a <*> b end @@ -378,7 +457,82 @@ let word = (* TODO *) let quoted = - Lazy (lazy (failwith "quoted: not implemented")) + make (Lazy (lazy (failwith "quoted: not implemented"))) + +(** {2 Compilation} *) + +let encode_cons x sep tl = pure (fun x _sep tl -> x :: tl) <*> x <*> sep <*> tl + +let encode_many + : type a. set:CharSet.t -> p:a t -> self:a list t -> sep:unit t -> a list t + = fun ~set ~p ~self ~sep -> + let on_success = encode_cons p sep self + and on_fail = pure [] in + make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + +let encode_opt ~set x = + let mk_one x = [x] in + let on_success = make (Map (mk_one, x)) + and on_fail = pure [] in + make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + +let encode_skip + : type a. set:CharSet.t -> p:a t -> self:unit t -> unit t + = fun ~set ~p ~self -> + let on_success = p >> self + and on_fail = pure () in + make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + +let many_ + : type a. sep:unit t -> mult:multiplicity -> p:a t -> a list t + = fun ~sep ~mult ~p -> match possible_first_chars p.parse with + | Set set -> + begin match mult with + | Star -> fix (fun self -> encode_many ~set ~sep ~p ~self) + | Plus -> encode_cons p sep (fix (fun self -> encode_many ~set ~sep ~p ~self)) + | Question -> encode_opt ~set p + end + | IsFail msg -> make (Fail msg) + | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") + +let skip_ : type a. mult:multiplicity -> p:a t -> unit t + = fun ~mult ~p -> match possible_first_chars p.parse with + | Set set -> + begin match mult with + | Star -> fix (fun self -> encode_skip ~set ~p ~self) + | Plus -> p >> fix (fun self -> encode_skip ~set ~p ~self) + | Question -> encode_opt ~set p >> pure () + end + | IsFail msg -> make (Fail msg) + | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") + | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") + +let rec compile + : type a. a t -> a compiled + = fun t -> match t.compiled with + | C_NotCompiled -> + let c = match t.parse with + | Return x -> C_Return x + | Map (f, x) -> C_Map (f, x) + | Filter (f, x) -> C_Filter (f, x) + | App (f, x) -> C_App (f, x) + | AppLeft (a, b) -> C_AppLeft (a, b) + | AppRight (a, b) -> C_AppRight (a, b) + | Fail msg -> C_Fail msg + | Int -> C_Int + | Float -> C_Float + | AnyOf set -> C_AnyOf set + | Many (p, sep, mult) -> compile (many_ ~sep ~mult ~p) + | Skip (p, mult) -> compile (skip_ ~mult ~p) + | SwitchC (map, None) -> C_SwitchC (map, None) + | SwitchC (map, Some o) -> C_SwitchC (map, Some o) + | Eof -> C_Eof + | Lazy (lazy p) -> compile p + in + t.compiled <- c; + c + | c -> c (* already compiled *) (** {2 Signatures} *) @@ -505,108 +659,70 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct let is_int c = Char.code c >= Char.code '0' && Char.code c <= Char.code '9' let to_int c = Char.code c - Char.code '0' - let rec parse_int r sign i = match R.peek r with + let rec parse_int r ~sign i = match R.peek r with | EOF -> i | Yield c when is_int c -> R.junk r; - parse_int r sign (10 * i + to_int c) + parse_int r ~sign (10 * i + to_int c) | Yield '-' when i = 0 && sign -> (* switch sign: only on first char *) R.junk r; - parse_int r false 0 + parse_int r ~sign:false 0 | _ -> if sign then i else -i let parse_float _r _buf = assert false let rec parse_rec : type a. R.t -> a t -> a = - fun r p -> match p with - | Return x -> x - | Map (f, x) -> + fun r p -> match compile p with + | C_Return x -> x + | C_Map (f, x) -> let y = parse_rec r x in f y - | Filter (f, x) -> + | C_Filter (f, x) -> let y = parse_rec r x in if f y then y else errorf r "filter failed" - | App (f, x) -> + | C_App (f, x) -> let f' = parse_rec r f in let x' = parse_rec r x in f' x' - | AppLeft (a, b) -> + | C_AppLeft (a, b) -> let a' = parse_rec r a in let _ = parse_rec r b in a' - | AppRight (a, b) -> + | C_AppRight (a, b) -> let _ = parse_rec r a in let b' = parse_rec r b in b' - | Fail msg -> error r msg - | Int -> parse_int r true 0 - | Float -> parse_float r (Buffer.create 8) - | AnyOf set -> + | C_Fail msg -> error r msg + | C_Int -> parse_int r ~sign:true 0 + | C_Float -> parse_float r (Buffer.create 8) + | C_AnyOf set -> begin match R.next r with - | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_set set) + | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) | Yield c -> if CharSet.mem c set then c - else errorf r "expected any of {%s}, got %c" (print_char_set set) c + else errorf r "expected any of %s, got %c" (print_char_set set) c end - | Many (set, p, sep, mult) -> parse_many r ~set ~sep ~p ~mult [] - | Skip (set, p, mult) -> parse_skip r ~set ~p ~mult - | SwitchC (map, def) -> + | C_SwitchC (map, def) -> begin match R.peek r with - | EOF -> errorf r "expected any of {%s}, got EOF" (print_char_map map) + | EOF -> errorf r "expected any of %s, got EOF" (print_char_map map) | Yield c -> begin try - let p' = CharMap.find c map in - parse_rec r p' - with Not_found -> - match def with - | None -> - errorf r "expected any of {%s}, got %c" (print_char_map map) c - | Some d -> parse_rec r d + let p' = CharMap.find c map in + parse_rec r p' + with Not_found -> match def with + | None -> + errorf r "expected any of %s, got %c" (print_char_map map) c + | Some d -> parse_rec r d end end - | Lazy (lazy p) -> parse_rec r p - | Eof -> + | C_NotCompiled -> assert false + | C_Eof -> begin match R.next r with | EOF -> () | Yield c -> errorf r "expected EOF, got %c" c end - and parse_many - : type a. R.t -> set:CharSet.t -> p:a t -> sep:unit t -> - mult:multiplicity -> a list -> a list - = fun r ~set ~p ~sep ~mult acc -> - match R.peek r with - | EOF -> List.rev acc - | Yield c -> - if CharSet.mem c set - then - let x = parse_rec r p in - match mult with - | Question -> assert (acc = []); [x] - | Plus | Star -> - let _ = parse_rec r sep in (* separator *) - parse_many r ~set ~p ~sep ~mult:Star (x::acc) - else if mult = Plus - then errorf r "expected {%s}, got %c" (print_char_set set) c - else List.rev acc - - and parse_skip - : type a. R.t -> set:CharSet.t -> p:a t -> mult:multiplicity -> unit - = fun r ~set ~p ~mult -> - match R.peek r with - | EOF -> () - | Yield c -> - if CharSet.mem c set - then - let _ = parse_rec r p in - match mult with - | Question -> () - | Plus | Star -> parse_skip r ~set ~p ~mult:Star - else if mult = Plus - then errorf r "expected {%s}, got %c" (print_char_set set) c - else () - (* public functions *) let parse_exn src p = let r = R.create src in diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index b5eb2789..806e1e95 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -37,11 +37,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. let mklist l = List l;; let sexp = fix (fun sexp -> - spaces >> - ((word >|= mkatom) <+> - ((char '(' >> many (delay sexp) << char ')') >|= mklist) - ) - );; + spaces >> + ((word >|= mkatom) <+> + ((char '(' >> many sexp << char ')') >|= mklist) + ) + );; + + Str.parse_exn "(a (b c d) e)" sexp;; ]} @@ -160,10 +162,7 @@ val choice : 'a t list -> 'a t @raise Invalid_argument if the list is empty, or if some parsers overlap, making the choice ambiguous *) -val delay : 'a t Lazy.t -> 'a t -(** delay evaluation. Useful in combination with {!fix} *) - -val fix : ('a t Lazy.t -> 'a t) -> 'a t +val fix : ('a t -> 'a t) -> 'a t (** [fix f] makes a fixpoint *) module Infix : sig @@ -229,6 +228,9 @@ module Make(I : INPUT) : S with type source = I.t (** {2 Low-level interface} *) +val print : Format.formatter -> _ t -> unit +(** Print a parser structure, for debug purpose *) + type token = | Yield of char | EOF From dccb1499f4d8cc2dba6d0faf7330f358487606da Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Mar 2015 10:56:35 +0100 Subject: [PATCH 07/46] doc --- src/core/CCError.mli | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/CCError.mli b/src/core/CCError.mli index 7ab6bef2..57dc714a 100644 --- a/src/core/CCError.mli +++ b/src/core/CCError.mli @@ -113,8 +113,12 @@ val wrap3 : ('a -> 'b -> 'c -> 'd) -> 'a -> 'b -> 'c -> ('d, exn) t (** {2 Applicative} *) val pure : 'a -> ('a, 'err) t +(** Synonym of {!return} *) val (<*>) : ('a -> 'b, 'err) t -> ('a, 'err) t -> ('b, 'err) t +(** [a <*> b] evaluates [a] and [b], and, in case of success, returns + [`Ok (a b)]. Otherwise, it fails, and the error of [a] is chosen + over the error of [b] if both fail *) (** {2 Collections} *) From cb68e1ae66b736b6f6d2ca84ea91e5d7adec6218 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Mar 2015 18:09:32 +0100 Subject: [PATCH 08/46] app_parse: expose "junk" combinator --- src/string/app_parse.mli | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index 806e1e95..7d1249bb 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -64,6 +64,9 @@ val return : 'a -> 'a t val pure : 'a -> 'a t (** Synonym to {!return} *) +val junk : unit t +(** Skip next char *) + val fail : string -> 'a t (** [fail msg] fails with the given error message *) From 0dc8b90d66767e413c0cab8b5be9c41202bdc583 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Mar 2015 18:10:00 +0100 Subject: [PATCH 09/46] refactor internals of app_parse (simpler compilation, less duplication, fix pfc computation) --- src/string/app_parse.ml | 306 +++++++++++++++++++++++----------------- 1 file changed, 175 insertions(+), 131 deletions(-) diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml index dcdfd576..5376f4ff 100644 --- a/src/string/app_parse.ml +++ b/src/string/app_parse.ml @@ -96,30 +96,17 @@ let str_of_l l = Bytes.unsafe_to_string b type 'a t = { - parse : 'a parse; - mutable compiled : 'a compiled; + mutable st : 'a parse_or_compiled; } (* syntactic version *) and _ parse = - | Return : 'a -> 'a parse - | Map : ('a -> 'b) * 'a t -> 'b parse - | Filter: ('a -> bool) * 'a t -> 'a parse - | App : ('a -> 'b) t * 'a t -> 'b parse - | AppLeft : 'a t * 'b t -> 'a parse - | AppRight : 'a t * 'b t -> 'b parse - | Fail : string -> 'a parse - | Int : int parse - | Float : float parse - | AnyOf : CharSet.t -> char parse | Many : 'a t * unit t * multiplicity -> 'a list parse | Skip : 'a t * multiplicity -> unit parse (* same as Many, but ignores *) - | SwitchC : 'a t CharMap.t * 'a t option -> 'a parse | Lazy : 'a t lazy_t -> 'a parse - | Eof : unit parse (* compiled version *) -and 'a compiled = +and _ compiled = | C_Return : 'a -> 'a compiled | C_Map : ('a -> 'b) * 'a t -> 'b compiled | C_Filter: ('a -> bool) * 'a t -> 'a compiled @@ -129,15 +116,21 @@ and 'a compiled = | C_Fail : string -> 'a compiled | C_Int : int compiled | C_Float : float compiled + | C_Junk : unit compiled (* ignore next char *) | C_AnyOf : CharSet.t -> char compiled | C_SwitchC : 'a t CharMap.t * 'a t option -> 'a compiled | C_Eof : unit compiled - | C_NotCompiled : 'a compiled (* to be compiled *) + +and 'a parse_or_compiled = + | Parse of 'a parse + | Compiled of 'a compiled (** {2 Helpers} *) (* build a new parser *) -let make parse = { parse; compiled=C_NotCompiled; } +let make p = {st=Parse p} +let make_c c = {st=Compiled c} +let make_pc st = {st} let ppmult fmt = function | Star -> Format.pp_print_string fmt "*" @@ -153,31 +146,32 @@ let print fmt p = let ppstr = Format.pp_print_string and ppf fmt x = Format.fprintf fmt x in let ppc fmt c = ppf fmt "'%s'" (print_char c) in - match p.parse with - | Return _ -> ppstr fmt "" - | Map (_, x) -> ppf fmt "@[(map@ %a)@]" print_aux x - | Filter (_, x) -> ppf fmt "@[(filter@ %a)@]" print_aux x - | App (f, x) -> ppf fmt "@[<2>@[%a@]@ <*>@ @[%a@]@]" print_aux f print_aux x - | AppLeft (a, b) -> ppf fmt "@[%a@ <<@ %a@]" print_aux a print_aux b - | AppRight (a, b) -> ppf fmt "@[%a@ >>@ %a@]" print_aux a print_aux b - | Fail _ -> ppf fmt "" - | Int -> ppstr fmt "" - | Float -> ppstr fmt "" - | AnyOf set -> ppf fmt "@[(any@ %s)@]" (print_char_set set) - | Many (p, sep, mult) -> + match p.st with + | Compiled (C_Return _) -> ppstr fmt "" + | Compiled (C_Map (_, x)) -> ppf fmt "@[(map@ %a)@]" print_aux x + | Compiled (C_Filter (_, x)) -> ppf fmt "@[(filter@ %a)@]" print_aux x + | Compiled (C_App (f, x)) -> ppf fmt "@[<2>@[%a@]@ <*>@ @[%a@]@]" print_aux f print_aux x + | Compiled (C_AppLeft (a, b)) -> ppf fmt "@[%a@ <<@ %a@]" print_aux a print_aux b + | Compiled (C_AppRight (a, b)) -> ppf fmt "@[%a@ >>@ %a@]" print_aux a print_aux b + | Compiled (C_Fail _) -> ppf fmt "" + | Compiled C_Int -> ppstr fmt "" + | Compiled C_Float -> ppstr fmt "" + | Compiled C_Junk -> ppstr fmt "" + | Compiled (C_AnyOf set) -> ppf fmt "@[(any@ %s)@]" (print_char_set set) + | Parse (Many (p, sep, mult)) -> ppf fmt "@[<2>(@[%a@]@ sep:@[%a@])%a@]" print_aux p print_aux sep ppmult mult - | Skip (p, mult) -> + | Parse (Skip (p, mult)) -> ppf fmt "@[<2>(skip @[%a@]%a)@]" print_aux p ppmult mult - | SwitchC (map, None) -> + | Compiled (C_SwitchC (map, None)) -> ppf fmt "@[(switch@ @[%a@])@]" (ppmap ppc print_aux) map - | SwitchC (map, Some o) -> + | Compiled (C_SwitchC (map, Some o)) -> ppf fmt "@[(switch@ @[%a@]@ or:%a)@]" (ppmap ppc print_aux) map print_aux o - | Lazy _ when !depth > 3 -> ppf fmt "" - | Lazy (lazy p) -> + | Parse (Lazy _) when !depth > 3 -> ppf fmt "" + | Parse (Lazy (lazy p)) -> incr depth; print_aux fmt p; decr depth - | Eof -> ppstr fmt "" + | Compiled C_Eof -> ppstr fmt "" in print_aux fmt p @@ -189,74 +183,123 @@ type possible_first_chars = | Set of CharSet.t | AllChars | NoChar + | NoCharOrSet of CharSet.t (* either no char, or something starting with set *) | IsFail of string -let ret_set set = - if CharSet.cardinal set = 256 then AllChars else Set set +let ret_set set = match CharSet.cardinal set with + | 0 -> NoChar + | 256 -> AllChars + | _ -> Set set -(* -let union_possible_first_chars a b = match a, b with - | Set a, Set b -> Set (CharSet.union a b) +let ret_no_char_or set = match CharSet.cardinal set with + | 0 -> NoChar + | 256 -> AllChars + | _ -> NoCharOrSet set + +(* pfc of parsing a or b *) +let union_pfc a b = match a, b with + | Set a, Set b -> ret_set (CharSet.union a b) + | NoCharOrSet s, Set s' + | Set s', NoCharOrSet s -> ret_no_char_or (CharSet.union s s') + | NoChar, Set s + | Set s, NoChar -> ret_no_char_or s + | NoCharOrSet s, NoCharOrSet s' -> ret_no_char_or (CharSet.union s s') | IsFail e, _ | _, IsFail e -> IsFail e | AllChars, _ | _, AllChars -> AllChars | NoChar, o | o, NoChar -> o - *) -(* TODO: handle cases that can consume 0 or more chars (skip, many...) *) +(* pfc of parsing a then b *) +let then_pfc a b = match a, b with + | Set a, Set b -> ret_set (CharSet.union a b) + | NoCharOrSet s, NoCharOrSet s' -> ret_no_char_or (CharSet.union s s') + | NoCharOrSet s, Set s' -> ret_set (CharSet.union s s') + | NoCharOrSet s, NoChar -> ret_no_char_or s + | Set s, _ -> ret_set s + | IsFail e, _ | _, IsFail e -> IsFail e + | AllChars, _ | _, AllChars -> AllChars + | NoChar, o -> o + +let (<|||>) a b = match a with + | NoChar -> Lazy.force b + | NoCharOrSet _ -> then_pfc a (Lazy.force b) + | _ -> a (* set of possibilities for the first char of a parser *) -let rec possible_first_chars +let rec pfc : type a. a t -> possible_first_chars = fun t -> pfc_pc t.st + +and pfc_pc + : type a. a parse_or_compiled -> possible_first_chars + = function + | Parse p -> pfc_p p + | Compiled c -> pfc_c c + +and pfc_p : type a. a parse -> possible_first_chars = function - | Return _ -> NoChar - | Map (_, x) -> possible_first_chars x.parse - | Filter (_, x) -> possible_first_chars x.parse - | App (f, _) -> possible_first_chars f.parse - | AppLeft (a, _) -> possible_first_chars a.parse (* TODO: handle NoChar *) - | AppRight (a, _) -> possible_first_chars a.parse - | Fail e -> IsFail e - | Int -> Set (Lazy.force int_first_char) - | Float -> Set (Lazy.force float_first_char) - | AnyOf set -> ret_set set - | Many (p, _, _) -> possible_first_chars p.parse - | Skip (p, _) -> possible_first_chars p.parse - | SwitchC (map, None) -> ret_set (domain_of_char_map map) - | SwitchC (_, Some _) -> AllChars - | Lazy (lazy p) -> possible_first_chars p.parse - | Eof -> NoChar + | Many (p, _, (Question | Star)) -> union_pfc (pfc p) NoChar + | Many (p, _, Plus) -> pfc p + | Skip (p, (Question | Star)) -> union_pfc (pfc p) NoChar + | Skip (p, Plus) -> pfc p + | Lazy (lazy p) -> pfc p + +and pfc_c + : type a. a compiled -> possible_first_chars + = function + | C_Return _ -> NoChar + | C_Map (_, x) -> pfc x + | C_Filter (_, x) -> pfc x + | C_App (f, x) -> pfc f <|||> lazy (pfc x) + | C_AppLeft (a, b) -> pfc a <|||> lazy (pfc b) + | C_AppRight (a, b) -> pfc a <|||> lazy (pfc b) + | C_Fail e -> IsFail e + | C_Int -> Set (Lazy.force int_first_char) + | C_Float -> Set (Lazy.force float_first_char) + | C_Junk -> AllChars + | C_AnyOf set -> ret_set set + | C_SwitchC (map, None) -> ret_set (domain_of_char_map map) + | C_SwitchC (map, Some o) -> + let s = domain_of_char_map map in + union_pfc (ret_set s) (pfc o) + | C_Eof -> NoChar + +let possible_first_chars = pfc (** {2 Combinators} *) -let return x = {parse=Return x; compiled=C_Return x} +let return x = make_c (C_Return x) let pure = return let success = pure () -let fail msg = make (Fail msg) +let fail msg = make_c (C_Fail msg) + +let junk = make_c C_Junk let failf fmt = Printf.ksprintf (fun msg -> fail msg) fmt -let map f x = match x.parse with - | Map (g, y) -> make (Map (compose f g, y)) - | Return x -> pure (f x) - | _ -> make (Map (f, x)) +let map f x = match x.st with + | Compiled (C_Map (g, y)) -> make_c (C_Map (compose f g, y)) + | Compiled (C_Return x) -> pure (f x) + | _ -> make_c (C_Map (f, x)) -let app f x = match f.parse with - | Return f -> map f x - | _ -> make (App (f, x)) +let app f x = match f.st with + | Compiled (C_Return f) -> map f x + | _ -> make_c (C_App (f, x)) -let filter f x = match x.parse with - | Return y -> if f y then return y else fail "filter failed" - | Filter (f', y) -> make (Filter ((fun x -> f' x && f x), y)) - | _ -> make (Filter (f, x)) +let fun_and f f' x = f x && f' x -let app_left a b = make (AppLeft (a, b)) (* return (fun x y -> x) <*> a <*> b *) +let filter f x = match x.st with + | Compiled (C_Return y) -> if f y then return y else fail "filter failed" + | Compiled (C_Filter (f', y)) -> make_c (C_Filter (fun_and f f', y)) + | _ -> make_c (C_Filter (f, x)) -let app_right a b = make (AppRight (a, b)) (* return (fun x y -> y) <*> a <*> b *) +let app_left a b = make_c (C_AppLeft (a, b)) (* return (fun x y -> x) <*> a <*> b *) -let int = make Int +let app_right a b = make_c (C_AppRight (a, b)) (* return (fun x y -> y) <*> a <*> b *) -let float = make Float +let int = make_c C_Int + +let float = make_c C_Float let many ?(sep=success) p = make (Many (p, sep, Star)) @@ -274,10 +317,10 @@ let opt p = | _ -> assert false ) (make (Many (p, success, Question))) -let any_of' s = make (AnyOf s) +let any_of' s = make_c (C_AnyOf s) let any_of s = any_of' (set_of_string s) -let char c = make (AnyOf (CharSet.singleton c)) +let char c = any_of' (CharSet.singleton c) let spaces = skip (any_of " \t") let spaces1 = skip1 (any_of " \t") @@ -296,7 +339,7 @@ let num = any_of' num_ let alpha = any_of' alpha_ let alpha_num = any_of' (CharSet.union num_ alpha_) -let eof = make Eof +let eof = make_c C_Eof let switch_c ?default l = if l = [] then match default with @@ -310,16 +353,19 @@ let switch_c ?default l = CharMap.add c t map ) CharMap.empty l in - make (SwitchC (map, default)) + make_c (C_SwitchC (map, default)) exception ExnIsFail of string +let make_switch_c a b = make_c (C_SwitchC (a, b)) + (* binary choice: compiled into decision tree *) let rec merge a b = (* build a switch by first char *) try - begin match a.parse, b.parse with - | SwitchC (map_a, def_a), SwitchC (map_b, def_b) -> + begin match a.st, b.st with + | Compiled (C_SwitchC (map_a, def_a)), + Compiled (C_SwitchC (map_b, def_b)) -> (* merge jump tables *) let def = match def_a, def_b with | None, None -> None @@ -336,46 +382,47 @@ let rec merge a b = | None, None -> assert false ) map_a map_b in - make (SwitchC (map, def)) - | SwitchC (map, def), other - | other, SwitchC (map, def) -> - let map', def' = match possible_first_chars other, def with + make_switch_c map def + | Compiled (C_SwitchC (map, def)), other + | other, Compiled (C_SwitchC (map, def)) -> + let map', def' = match pfc_pc other, def with | AllChars, _ -> invalid_arg "choice: ambiguous, several parsers accept any input" - | NoChar, None -> map, Some (make other) + | NoChar, None -> map, Some (make_pc other) | NoChar, Some _ -> invalid_arg "choice: ambiguous" | IsFail msg, _ -> raise (ExnIsFail msg) + | NoCharOrSet set, def | Set set, def -> if CharSet.exists (fun c -> CharMap.mem c map) set then invalid_arg (str "choice: ambiguous parsers (overlap on {%s})" (print_char_set (CharSet.inter set (domain_of_char_map map)))); (* else: merge jump tables *) - let map = map_add_set map set (make other) in + let map = map_add_set map set (make_pc other) in map, def in - make (SwitchC (map', def')) + make_switch_c map' def' | _ -> - begin match possible_first_chars a.parse, possible_first_chars b.parse with - | Set set1, Set set2 -> + begin match possible_first_chars a, possible_first_chars b with + | (Set set1 | NoCharOrSet set1), (Set set2 | NoCharOrSet set2) -> if CharSet.exists (fun c -> CharSet.mem c set2) set1 then invalid_arg (str "choice: ambiguous parsers (overlap on {%s})" (print_char_set (CharSet.inter set1 set2))); let map = map_add_set CharMap.empty set1 a in let map = map_add_set map set2 b in - make (SwitchC (map, None)) + make_switch_c map None | IsFail e, _ | _, IsFail e -> raise (ExnIsFail e) - | Set s, NoChar -> make (SwitchC (map_add_set CharMap.empty s a, Some b)) - | NoChar, Set s -> make (SwitchC (map_add_set CharMap.empty s b, Some a)) + | Set s, NoChar -> make_switch_c (map_add_set CharMap.empty s a) (Some b) + | NoChar, Set s -> make_switch_c (map_add_set CharMap.empty s b) (Some a) | AllChars, _ | _, AllChars -> invalid_arg "choice: ambiguous parsers (one accepts everything)" - | NoChar, NoChar -> + | (NoChar | NoCharOrSet _), (NoChar | NoCharOrSet _) -> invalid_arg "choice: ambiguous parsers (both accept nothing)" end end - with ExnIsFail msg -> make (Fail msg) + with ExnIsFail msg -> make_c (C_Fail msg) let rec choice = function | [] -> invalid_arg "choice: empty list"; @@ -391,7 +438,12 @@ let trie_empty = TrieNode CharMap.empty let rec parser_of_trie : type a. a trie -> a t = function | TrieLeaf p -> p - | TrieNode m -> make (SwitchC (CharMap.map parser_of_trie m, None)) + | TrieNode m -> + make_switch_c (CharMap.map parser_of_trie' m) None +(* consume next char, then build sub-trie *) +and parser_of_trie' + : type a. a trie -> a t + = fun x -> app_right junk (parser_of_trie x) (* build prefix trie *) let switch_s l = @@ -435,8 +487,7 @@ let bool = let fix f = (* outermost lazy needed for the recursive definition *) let rec r = { - parse=Lazy (lazy (f r)); - compiled=C_NotCompiled; + st=Parse (Lazy (lazy (f r))); } in r @@ -468,71 +519,64 @@ let encode_many = fun ~set ~p ~self ~sep -> let on_success = encode_cons p sep self and on_fail = pure [] in - make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) let encode_opt ~set x = let mk_one x = [x] in - let on_success = make (Map (mk_one, x)) + let on_success = make_c (C_Map (mk_one, x)) and on_fail = pure [] in - make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) let encode_skip : type a. set:CharSet.t -> p:a t -> self:unit t -> unit t = fun ~set ~p ~self -> let on_success = p >> self and on_fail = pure () in - make (SwitchC (map_add_set CharMap.empty set on_success, Some on_fail)) + make_switch_c (map_add_set CharMap.empty set on_success) (Some on_fail) let many_ : type a. sep:unit t -> mult:multiplicity -> p:a t -> a list t - = fun ~sep ~mult ~p -> match possible_first_chars p.parse with + = fun ~sep ~mult ~p -> match possible_first_chars p with | Set set -> begin match mult with | Star -> fix (fun self -> encode_many ~set ~sep ~p ~self) | Plus -> encode_cons p sep (fix (fun self -> encode_many ~set ~sep ~p ~self)) | Question -> encode_opt ~set p end - | IsFail msg -> make (Fail msg) + | IsFail msg -> fail msg + | NoCharOrSet _ -> invalid_arg (str "many: invalid parser (might not consume input)") | AllChars -> invalid_arg (str "many: invalid parser (always succeeds)") | NoChar -> invalid_arg (str "many: invalid parser (does not consume input)") let skip_ : type a. mult:multiplicity -> p:a t -> unit t - = fun ~mult ~p -> match possible_first_chars p.parse with + = fun ~mult ~p -> match possible_first_chars p with | Set set -> begin match mult with | Star -> fix (fun self -> encode_skip ~set ~p ~self) | Plus -> p >> fix (fun self -> encode_skip ~set ~p ~self) - | Question -> encode_opt ~set p >> pure () + | Question -> encode_opt ~set p >> pure () end - | IsFail msg -> make (Fail msg) + | IsFail msg -> fail msg + | NoCharOrSet _ -> invalid_arg (str "many: invalid parser (might not consume input)") | AllChars -> invalid_arg (str "skip: invalid parser (always succeeds)") | NoChar -> invalid_arg (str "skip: invalid parser (does not consume input)") let rec compile : type a. a t -> a compiled - = fun t -> match t.compiled with - | C_NotCompiled -> - let c = match t.parse with - | Return x -> C_Return x - | Map (f, x) -> C_Map (f, x) - | Filter (f, x) -> C_Filter (f, x) - | App (f, x) -> C_App (f, x) - | AppLeft (a, b) -> C_AppLeft (a, b) - | AppRight (a, b) -> C_AppRight (a, b) - | Fail msg -> C_Fail msg - | Int -> C_Int - | Float -> C_Float - | AnyOf set -> C_AnyOf set - | Many (p, sep, mult) -> compile (many_ ~sep ~mult ~p) - | Skip (p, mult) -> compile (skip_ ~mult ~p) - | SwitchC (map, None) -> C_SwitchC (map, None) - | SwitchC (map, Some o) -> C_SwitchC (map, Some o) - | Eof -> C_Eof - | Lazy (lazy p) -> compile p - in - t.compiled <- c; + = fun t -> match t.st with + | Compiled c -> c (* already compiled *) + | Parse (Many (p, sep, mult)) -> + let c = compile (many_ ~sep ~mult ~p) in + t.st <- Compiled c; + c + | Parse (Skip (p, mult)) -> + let c = compile (skip_ ~mult ~p) in + t.st <- Compiled c; + c + | Parse (Lazy (lazy p)) -> + let c = compile p in + t.st <- Compiled c; c - | c -> c (* already compiled *) (** {2 Signatures} *) @@ -696,6 +740,7 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct | C_Fail msg -> error r msg | C_Int -> parse_int r ~sign:true 0 | C_Float -> parse_float r (Buffer.create 8) + | C_Junk -> R.junk r | C_AnyOf set -> begin match R.next r with | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) @@ -716,7 +761,6 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct | Some d -> parse_rec r d end end - | C_NotCompiled -> assert false | C_Eof -> begin match R.next r with | EOF -> () From 0e62f9a345b956846fe3a8705bf31a6fc8dd3caf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 12 Mar 2015 18:51:26 +0100 Subject: [PATCH 10/46] a few fixes, including implementing quoted --- src/string/app_parse.ml | 14 +++++++++----- src/string/app_parse.mli | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/string/app_parse.ml b/src/string/app_parse.ml index 5376f4ff..e841d10b 100644 --- a/src/string/app_parse.ml +++ b/src/string/app_parse.ml @@ -42,6 +42,7 @@ let print_char = function | '\t' -> "\\t" | '\n' -> "\\n" | '\r' -> "\\r" + | '"' -> "\\\"" | c -> str "%c" c let print_char_set set = @@ -332,10 +333,12 @@ let alpha_lower_ = set_of_string "abcdefghijklmonpqrstuvwxyz" let alpha_upper_ = set_of_string "ABCDEFGHIJKLMONPQRSTUVWXYZ" let num_ = set_of_string "0123456789" let alpha_ = CharSet.union alpha_lower_ alpha_upper_ +let symbols_ = set_of_string "|!;$#@%&-_/=" let alpha_lower = any_of' alpha_lower_ let alpha_upper = any_of' alpha_upper_ let num = any_of' num_ +let symbols = any_of' symbols_ let alpha = any_of' alpha_ let alpha_num = any_of' (CharSet.union num_ alpha_) @@ -502,13 +505,14 @@ end include Infix -(* TODO: more efficient version, with buffer *) let word = pure (fun c s -> str_of_l (c :: s)) <*> alpha <*> many alpha_num -(* TODO *) let quoted = - make (Lazy (lazy (failwith "quoted: not implemented"))) + let q = char '"' in + let escaped = char '\\' >> char '"' in + let inner = choice [escaped; alpha_num; any_of "()' \t\n|!;$#@%&-_/=~.,:<>[]"] in + q >> (many inner >|= str_of_l) << q (** {2 Compilation} *) @@ -644,7 +648,7 @@ module ReaderOfInput(I : INPUT) : READER with type source = I.t = struct let col t = t.rcol let create input = { - rline=0; + rline=1; rcol=0; input; buf = Bytes.make 1024 ' '; @@ -746,7 +750,7 @@ module MakeFromReader(R : READER) : S with type source = R.source = struct | EOF -> errorf r "expected any of %s, got EOF" (print_char_set set) | Yield c -> if CharSet.mem c set then c - else errorf r "expected any of %s, got %c" (print_char_set set) c + else errorf r "expected any of %s, got '%s'" (print_char_set set) (print_char c) end | C_SwitchC (map, def) -> begin match R.peek r with diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index 7d1249bb..8e8cab4c 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -36,9 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. let mkatom a = Atom a;; let mklist l = List l;; + let ident_char = alpha_num <+> any_of "|!;$#@%&-_/=*.:~+[]<>'" ;; + let ident = many1 ident_char >|= str_of_l ;; + let atom = (ident <+> quoted) >|= mkatom ;; + let sexp = fix (fun sexp -> - spaces >> - ((word >|= mkatom) <+> + white >> + (atom <+> ((char '(' >> many sexp << char ')') >|= mklist) ) );; @@ -99,6 +103,9 @@ val alpha_upper : char t val alpha : char t +val symbols : char t +(** symbols, such as "!-=_"... *) + val num : char t val alpha_num : char t @@ -147,6 +154,11 @@ val filter : ('a -> bool) -> 'a t -> 'a t (** [filter f p] parses the same as [p], but fails if the returned value does not satisfy [f] *) + +(* TODO: complement operator any_but (all but \, for instance) *) +(* TODO: a "if-then-else" combinator (assuming the test has a + set of possible first chars) *) + val switch_c : ?default:'a t -> (char * 'a t) list -> 'a t (** [switch_c l] matches the next char and uses the corresponding parser. Fails if the next char is not in the list, unless default is defined. From adcff57e4c40f7176aec594cff66bb842f8adb59 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Mar 2015 22:20:01 +0100 Subject: [PATCH 11/46] add `--` to CCFQueue --- src/data/CCFQueue.ml | 9 +++++++++ src/data/CCFQueue.mli | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 638a1617..96c62b92 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -358,3 +358,12 @@ let rec _equal_klist eq l1 l2 = match l1(), l2() with eq x1 x2 && _equal_klist eq l1' l2' let equal eq q1 q2 = _equal_klist eq (to_klist q1) (to_klist q2) + +let (--) a b = + let rec up_to q a b = if a = b + then snoc q a + else up_to (snoc q a) (a+1) b + and down_to q a b = if a = b then snoc q a + else down_to (snoc q a) (a-1) b + in + if a <= b then up_to empty a b else down_to empty a b diff --git a/src/data/CCFQueue.mli b/src/data/CCFQueue.mli index 397155c1..3c5d4ab4 100644 --- a/src/data/CCFQueue.mli +++ b/src/data/CCFQueue.mli @@ -114,6 +114,7 @@ val map : ('a -> 'b) -> 'a t -> 'b t (** Map values *) val (>|=) : 'a t -> ('a -> 'b) -> 'b t +(** Synonym to {!map} *) val size : 'a t -> int (** Number of elements in the queue (constant time) *) @@ -138,3 +139,7 @@ val of_seq : 'a sequence -> 'a t val to_klist : 'a t -> 'a klist val of_klist : 'a klist -> 'a t +val (--) : int -> int -> int t +(** [a -- b] is the integer range from [a] to [b], both included. + @since NEXT_RELEASE *) + From 2d9988f0800cc7c36cb66e21ec85149149ee4ed8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 15 Mar 2015 22:51:10 +0100 Subject: [PATCH 12/46] add tests to CCFQueue --- src/data/CCFQueue.ml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 96c62b92..682274ec 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -232,10 +232,20 @@ let init q = try fst (take_back_exn q) with Empty -> q +(*$Q + (Q.list Q.int) (fun l -> \ + l = [] || (of_list l |> init |> to_list = List.rev (List.tl (List.rev l)))) +*) + let tail q = try snd (take_front_exn q) with Empty -> q +(*$Q + (Q.list Q.int) (fun l -> \ + l = [] || (of_list l |> tail |> to_list = List.tl l)) +*) + let add_seq_front seq q = let q = ref q in seq (fun x -> q := cons x !q); @@ -260,6 +270,11 @@ let rec to_seq : 'a. 'a t -> 'a sequence to_seq q' (fun (x,y) -> k x; k y); _digit_to_seq tail k +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> to_seq |> Sequence.to_list = l) +*) + let append q1 q2 = match q1, q2 with | Shallow Zero, _ -> q2 @@ -359,6 +374,11 @@ let rec _equal_klist eq l1 l2 = match l1(), l2() with let equal eq q1 q2 = _equal_klist eq (to_klist q1) (to_klist q2) +(*$T + let q1 = 1 -- 10 and q2 = append (1 -- 5) (6 -- 10) in \ + equal (=) q1 q2 +*) + let (--) a b = let rec up_to q a b = if a = b then snoc q a @@ -367,3 +387,10 @@ let (--) a b = else down_to (snoc q a) (a-1) b in if a <= b then up_to empty a b else down_to empty a b + +(*$T + 1 -- 5 |> to_list = [1;2;3;4;5] + 5 -- 1 |> to_list = [5;4;3;2;1] + 0 -- 0 |> to_list = [0] +*) + From 27e63e6cfa77b49da80edbef047eb95a694f54f0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 16 Mar 2015 00:25:42 +0100 Subject: [PATCH 13/46] many more tests for CCFQueue --- src/data/CCFQueue.ml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 682274ec..62a799b6 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -74,6 +74,11 @@ let rec cons : 'a. 'a -> 'a t -> 'a t | Deep (n,Three (y,z,z'), lazy q', tail) -> _deep (n+1) (Two (x,y)) (lazy (cons (z,z') q')) tail +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + cons x (of_list l) |> to_list = x::l) + *) + let rec snoc : 'a. 'a t -> 'a -> 'a t = fun q x -> match q with | Shallow Zero -> _single x @@ -87,6 +92,11 @@ let rec snoc : 'a. 'a t -> 'a -> 'a t | Deep (n,hd, lazy q', Three (y,z,z')) -> _deep (n+1) hd (lazy (snoc q' (y,z))) (Two(z',x)) +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + snoc (of_list l) x |> to_list = l @ [x]) + *) + let rec take_front_exn : 'a. 'a t -> ('a *'a t) = fun q -> match q with | Shallow Zero -> raise Empty @@ -105,6 +115,12 @@ let rec take_front_exn : 'a. 'a t -> ('a *'a t) | Deep (n,Three (x,y,z), middle, tail) -> x, _deep (n-1) (Two(y,z)) middle tail +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + let x', q = cons x (of_list l) |> take_front_exn in \ + x'=x && to_list q = l) + *) + let take_front q = try Some (take_front_exn q) with Empty -> None @@ -141,6 +157,12 @@ let rec take_back_exn : 'a. 'a t -> 'a t * 'a | Deep (n, hd, middle, Two(x,y)) -> _deep (n-1) hd middle (One x), y | Deep (n, hd, middle, Three(x,y,z)) -> _deep (n-1) hd middle (Two (x,y)), z +(*$Q + (Q.pair Q.int (Q.list Q.int)) (fun (x,l) -> \ + let q,x' = snoc (of_list l) x |> take_back_exn in \ + x'=x && to_list q = l) + *) + let take_back q = try Some (take_back_exn q) with Empty -> None @@ -186,6 +208,11 @@ let size : 'a. 'a t -> int | Shallow d -> _size_digit d | Deep (n, _, _, _) -> n +(*$Q + (Q.list Q.int) (fun l -> \ + size (of_list l) = List.length l) +*) + let _nth_digit i d = match i, d with | _, Zero -> raise Not_found | 0, One x -> x @@ -281,6 +308,11 @@ let append q1 q2 = | _, Shallow Zero -> q1 | _ -> add_seq_back q1 (to_seq q2) +(*$Q + (Q.pair (Q.list Q.int)(Q.list Q.int)) (fun (l1,l2) -> \ + append (of_list l1) (of_list l2) |> to_list = l1 @ l2) +*) + let _map_digit f d = match d with | Zero -> Zero | One x -> One (f x) @@ -294,6 +326,11 @@ let rec map : 'a 'b. ('a -> 'b) -> 'a t -> 'b t let q'' = map (fun (x,y) -> f x, f y) q' in _deep size (_map_digit f hd) (Lazy.from_val q'') (_map_digit f tl) +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> map string_of_int |> to_list = List.map string_of_int l) +*) + let (>|=) q f = map f q let _fold_digit f acc d = match d with @@ -310,6 +347,11 @@ let rec fold : 'a 'b. ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b let acc = fold (fun acc (x,y) -> f (f acc x) y) acc q' in _fold_digit f acc tl +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> fold (fun acc x->x::acc) [] = List.rev l) +*) + let iter f q = to_seq q f let of_list l = List.fold_left snoc empty l From d4ccbccbb82eae693cb24decf40720d5fde7b51c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 16 Mar 2015 23:22:03 +0100 Subject: [PATCH 14/46] add CCFQueue.rev --- src/data/CCFQueue.ml | 10 ++++++++++ src/data/CCFQueue.mli | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 62a799b6..09290e8a 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -371,6 +371,16 @@ let of_seq seq = Sequence.of_list l |> of_seq |> to_list = l) *) +let rev q = + let q' = ref empty in + iter (fun x -> q' := cons x !q') q; + !q' + +(*$Q + (Q.list Q.int) (fun l -> \ + of_list l |> rev |> to_list = List.rev l) +*) + let _nil () = `Nil let _single x cont () = `Cons (x, cont) let _double x y cont () = `Cons (x, _single y cont) diff --git a/src/data/CCFQueue.mli b/src/data/CCFQueue.mli index 3c5d4ab4..6a112b7b 100644 --- a/src/data/CCFQueue.mli +++ b/src/data/CCFQueue.mli @@ -110,6 +110,10 @@ val append : 'a t -> 'a t -> 'a t after elements of the first one. Linear in the size of the second queue. *) +val rev : 'a t -> 'a t +(** Reverse the queue, O(n) complexity + @since NEXT_RELEASE *) + val map : ('a -> 'b) -> 'a t -> 'b t (** Map values *) @@ -131,6 +135,7 @@ val of_list : 'a list -> 'a t val to_list : 'a t -> 'a list val add_seq_front : 'a sequence -> 'a t -> 'a t + val add_seq_back : 'a t -> 'a sequence -> 'a t val to_seq : 'a t -> 'a sequence From c1313e094df252f13b352ee85995ced3fca07c41 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 17 Mar 2015 01:01:06 +0100 Subject: [PATCH 15/46] more tests! yay! --- src/data/CCFQueue.ml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 09290e8a..831f88a7 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -133,6 +133,11 @@ let take_front_l n q = aux (x::acc) q' (n-1) in aux [] q n +(*$T + let l, q = take_front_l 5 (1 -- 10) in \ + l = [1;2;3;4;5] && to_list q = [6;7;8;9;10] +*) + let take_front_while p q = let rec aux acc q = if is_empty q then List.rev acc, q @@ -141,6 +146,10 @@ let take_front_while p q = if p x then aux (x::acc) q' else List.rev acc, q in aux [] q +(*$T + take_front_while (fun x-> x<5) (1 -- 10) |> fst = [1;2;3;4] +*) + let rec take_back_exn : 'a. 'a t -> 'a t * 'a = fun q -> match q with | Shallow Zero -> invalid_arg "FQueue.take_back_exn" @@ -255,6 +264,13 @@ let nth i q = try Some (nth_exn i q) with Failure _ -> None +(*$Q + (Q.list Q.int) (fun l -> \ + let len = List.length l in let idx = CCList.(0 -- (len - 1)) in \ + let q = of_list l in \ + l = [] || List.for_all (fun i -> nth i q = Some (List.nth l i)) idx) +*) + let init q = try fst (take_back_exn q) with Empty -> q From 4b409ca34fb1885eaf9508dc85481cb697209380 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 21 Mar 2015 16:10:48 +0100 Subject: [PATCH 16/46] bugfix in CCFQueue.add_seq_front --- src/data/CCFQueue.ml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/data/CCFQueue.ml b/src/data/CCFQueue.ml index 831f88a7..0f828d8c 100644 --- a/src/data/CCFQueue.ml +++ b/src/data/CCFQueue.ml @@ -290,9 +290,15 @@ let tail q = *) let add_seq_front seq q = - let q = ref q in - seq (fun x -> q := cons x !q); - !q + let l = ref [] in + (* reversed seq *) + seq (fun x -> l := x :: !l); + List.fold_left (fun q x -> cons x q) q !l + +(*$Q + Q.(pair (list int) (list int)) (fun (l1, l2) -> \ + add_seq_front (Sequence.of_list l1) (of_list l2) |> to_list = l1 @ l2) + *) let add_seq_back q seq = let q = ref q in @@ -377,10 +383,7 @@ let to_list q = to_seq q (fun x -> l := x :: !l); List.rev !l -let of_seq seq = - let l = ref [] in - seq (fun x -> l := x :: !l); - List.fold_left (fun q x -> cons x q) empty !l +let of_seq seq = add_seq_front seq empty (*$Q (Q.list Q.int) (fun l -> \ From 2a2250faa6009f32272929808d1f0f552ea2d531 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 21 Mar 2015 21:44:27 +0100 Subject: [PATCH 17/46] wip: CCIntMap (big-endian patricia trees) in containers.data --- _oasis | 2 +- doc/intro.txt | 1 + src/data/CCIntMap.ml | 220 ++++++++++++++++++++++++++++++++++++++++++ src/data/CCIntMap.mli | 93 ++++++++++++++++++ 4 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 src/data/CCIntMap.ml create mode 100644 src/data/CCIntMap.mli diff --git a/_oasis b/_oasis index 59a5b0c4..1bc41b99 100644 --- a/_oasis +++ b/_oasis @@ -71,7 +71,7 @@ Library "containers_data" Path: src/data Modules: CCMultiMap, CCMultiSet, CCTrie, CCFlatHashtbl, CCCache, CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, - CCMixmap, CCRingBuffer + CCMixmap, CCRingBuffer, CCIntMap BuildDepends: bytes FindlibParent: containers FindlibName: data diff --git a/doc/intro.txt b/doc/intro.txt index 3bd2e895..8c23d683 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -66,6 +66,7 @@ CCBV CCCache CCFQueue CCFlatHashtbl +CCIntMap CCMixmap CCMixtbl CCMultiMap diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml new file mode 100644 index 00000000..17822b71 --- /dev/null +++ b/src/data/CCIntMap.ml @@ -0,0 +1,220 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Map specialized for Int keys} *) + +(* "Fast Mergeable Integer Maps", Okasaki & Gill. +We use big-endian trees. *) + +type 'a t = + | E (* empty *) + | L of int * 'a (* leaf *) + | N of int (* common prefix *) * int (* bit switch *) * 'a t * 'a t + +let empty = E + +let bit_is_0_ x ~bit = x land bit = 0 + +let mask_ x ~mask = (x lor (mask -1)) land (lnot mask) +(* low endian: let mask_ x ~mask = x land (mask - 1) *) + +let is_prefix_ ~prefix y ~bit = prefix = mask_ y ~mask:bit + +let lowest_bit_ a = a land (- a) + +(* loop down until x=lowest_bit_ x *) +let rec highest_bit x = + let m = lowest_bit_ x in + if x = m then m else highest_bit (x-m) + +(*$Q + Q.int (fun i -> \ + let b = highest_bit i in \ + i < 0 || (b <= i && (i-b) < b)) +*) + +(* helper: + + let b_of_i i = + let rec f acc i = + if i=0 then acc else let q, r = i/2, i mod 2 + in + f (r::acc) q in f [] i;; +*) + +(* low endian: let branching_bit_ a _ b _ = lowest_bit_ (a lxor b) *) +let branching_bit_ a b = + highest_bit (a lxor b) + +let rec find_exn k t = match t with + | E -> raise Not_found + | L (k', v) when k = k' -> v + | L _ -> raise Not_found + | N (prefix, _, l, r) -> + if k <= prefix (* search tree *) + then find_exn k l + else find_exn k r + +let find k t = + try Some (find_exn k t) + with Not_found -> None + +let mem k t = + try ignore (find_exn k t); true + with Not_found -> false + +let mk_node_ prefix switch l r = match l, r with + | E, o | o, E -> o + | _ -> N (prefix, switch, l, r) + +(* join trees t1 and t2 with prefix p1 and p2 respectively + (p1 and p2 do not overlap) *) +let join_ t1 p1 t2 p2 = + let switch = branching_bit_ p1 p2 in + let prefix = mask_ p1 ~mask:switch in + if bit_is_0_ p1 ~bit:switch + then mk_node_ prefix switch t1 t2 + else (assert (bit_is_0_ p2 ~bit:switch); mk_node_ prefix switch t2 t1) + +let singleton k v = L (k, v) + +(* c: conflict function *) +let rec insert_ c k v t = match t with + | E -> L (k, v) + | L (k', v') -> + if k=k' + then L (k, c ~old:v' v) + else join_ t k' (L (k, v)) k + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then N(prefix, switch, insert_ c k v l, r) + else N(prefix, switch, l, insert_ c k v r) + else join_ (L(k,v)) k t prefix + +let add k v t = insert_ (fun ~old:_ v -> v) k v t + +(*$Q + Q.(list (pair int int)) (fun l -> \ + let l = CCList.Set.uniq l in let m = of_list l in \ + List.for_all (fun (k,v) -> find_exn k m = v) l) +*) + +let rec remove k t = match t with + | E -> E + | L (k', _) -> if k=k' then E else t + | N (prefix, switch, l, r) -> + if is_prefix_ ~prefix k ~bit:switch + then if bit_is_0_ k ~bit:switch + then mk_node_ prefix switch (remove k l) r + else mk_node_ prefix switch l (remove k r) + else t (* not present *) + +let update k f t = + try + let v = find_exn k t in + begin match f (Some v) with + | None -> remove k t + | Some v' -> add k v' t + end + with Not_found -> + match f None with + | None -> t + | Some v -> add k v t + +let doubleton k1 v1 k2 v2 = add k1 v1 (singleton k2 v2) + +let rec iter f t = match t with + | E -> () + | L (k, v) -> f k v + | N (_, _, l, r) -> iter f l; iter f r + +let rec fold f t acc = match t with + | E -> acc + | L (k, v) -> f k v acc + | N (_, _, l, r) -> + let acc = fold f l acc in + fold f r acc + +let cardinal t = fold (fun _ _ n -> n+1) t 0 + +let rec choose_exn = function + | E -> raise Not_found + | L (k, v) -> k, v + | N (_, _, l, _) -> choose_exn l + +let choose t = + try Some (choose_exn t) + with Not_found -> None + +let union _ _ _ = assert false + +let inter _ _ _ = assert false + +(** {2 Whole-collection operations} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +let add_list t l = List.fold_left (fun t (k,v) -> add k v t) t l + +let of_list l = add_list empty l + +let to_list t = fold (fun k v l -> (k,v) :: l) t [] + +(*$Q + Q.(list (pair int int)) (fun l -> \ + let l = List.map (fun (k,v) -> abs k,v) l in \ + let rec is_sorted = function [] | [_] -> true \ + | x::y::tail -> x <= y && is_sorted (y::tail) in \ + of_list l |> to_list |> List.rev_map fst |> is_sorted) +*) + +(*$Q + Q.(list (pair int int)) (fun l -> \ + of_list l |> cardinal = List.length l) + *) + +let add_seq t seq = + let t = ref t in + seq (fun (k,v) -> t := add k v !t); + !t + +let of_seq seq = add_seq empty seq + +let to_seq t yield = iter (fun k v -> yield (k,v)) t + +let keys t yield = iter (fun k _ -> yield k) t + +let values t yield = iter (fun _ v -> yield v) t + +type 'a tree = unit -> [`Nil | `Node of 'a * 'a tree list] + +let rec as_tree t () = match t with + | E -> `Nil + | L (k, v) -> `Node (`Leaf (k, v), []) + | N (prefix, switch, l, r) -> + `Node (`Node (prefix, switch), [as_tree l; as_tree r]) diff --git a/src/data/CCIntMap.mli b/src/data/CCIntMap.mli new file mode 100644 index 00000000..68269317 --- /dev/null +++ b/src/data/CCIntMap.mli @@ -0,0 +1,93 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Map specialized for Int keys} *) + +type 'a t + +val empty : 'a t + +val singleton : int -> 'a -> 'a t + +val doubleton : int -> 'a -> int -> 'a -> 'a t + +val mem : int -> _ t -> bool + +val find : int -> 'a t -> 'a option + +val find_exn : int -> 'a t -> 'a +(** Same as {!find} but unsafe + @raise Not_found if key not present *) + +val add : int -> 'a -> 'a t -> 'a t + +val remove : int -> 'a t -> 'a t + +val update : int -> ('a option -> 'a option) -> 'a t -> 'a t + +val cardinal : _ t -> int + +val iter : (int -> 'a -> unit) -> 'a t -> unit + +val fold : (int -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b + +val choose : 'a t -> (int * 'a) option + +val choose_exn : 'a t -> int * 'a + +val union : (int -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + +val inter : (int -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t + +(** {2 Whole-collection operations} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +val add_list : 'a t -> (int * 'a) list -> 'a t + +val of_list : (int * 'a) list -> 'a t + +val to_list : 'a t -> (int * 'a) list + +val add_seq : 'a t -> (int * 'a) sequence -> 'a t + +val of_seq : (int * 'a) sequence -> 'a t + +val to_seq : 'a t -> (int * 'a) sequence + +val keys : _ t -> int sequence + +val values : 'a t -> 'a sequence + + +(** Helpers *) + +val highest_bit : int -> int + +type 'a tree = unit -> [`Nil | `Node of 'a * 'a tree list] + +val as_tree : 'a t -> [`Node of int * int | `Leaf of int * 'a ] tree From a5a45efa8c87ffa22465100f56c82a8491acd203 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 21 Mar 2015 22:11:26 +0100 Subject: [PATCH 18/46] kind of fix the highest_bit function --- src/data/CCIntMap.ml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml index 17822b71..fe59398b 100644 --- a/src/data/CCIntMap.ml +++ b/src/data/CCIntMap.ml @@ -43,12 +43,21 @@ let mask_ x ~mask = (x lor (mask -1)) land (lnot mask) let is_prefix_ ~prefix y ~bit = prefix = mask_ y ~mask:bit -let lowest_bit_ a = a land (- a) - (* loop down until x=lowest_bit_ x *) -let rec highest_bit x = - let m = lowest_bit_ x in - if x = m then m else highest_bit (x-m) +let rec highest_bit_naive x m = + if m = 0 then 0 + else if x land m = 0 then highest_bit_naive x (m lsr 1) + else m + +let highest_bit = + (* the highest representable 2^n *) + let max_log = 1 lsl (Sys.word_size - 2) in + fun x -> + if x > 1 lsl 20 + then (* small shortcut: remove least significant 20 bits *) + let x' = x land (lnot ((1 lsl 20) -1)) in + highest_bit_naive x' max_log + else highest_bit_naive x max_log (*$Q Q.int (fun i -> \ @@ -120,9 +129,11 @@ let add k v t = insert_ (fun ~old:_ v -> v) k v t (*$Q Q.(list (pair int int)) (fun l -> \ let l = CCList.Set.uniq l in let m = of_list l in \ - List.for_all (fun (k,v) -> find_exn k m = v) l) + List.for_all (fun (k,v) -> k < 0 || find_exn k m = v) l) *) +(* TODO: fix the previous test *) + let rec remove k t = match t with | E -> E | L (k', _) -> if k=k' then E else t From 1cdd678eb366d53088f83158b5ac956661521ea8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 21 Mar 2015 22:14:29 +0100 Subject: [PATCH 19/46] fix: CCIntMap not a search tree for negative keys, apparently --- src/data/CCIntMap.ml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml index fe59398b..200d0ec6 100644 --- a/src/data/CCIntMap.ml +++ b/src/data/CCIntMap.ml @@ -82,10 +82,18 @@ let rec find_exn k t = match t with | E -> raise Not_found | L (k', v) when k = k' -> v | L _ -> raise Not_found - | N (prefix, _, l, r) -> + | N (prefix, m, l, r) -> + if is_prefix_ ~prefix k ~bit:m + then if bit_is_0_ k ~bit:m + then find_exn k l + else find_exn k r + else raise Not_found + + (* FIXME: valid if k < 0? if k <= prefix (* search tree *) then find_exn k l else find_exn k r + *) let find k t = try Some (find_exn k t) @@ -129,11 +137,9 @@ let add k v t = insert_ (fun ~old:_ v -> v) k v t (*$Q Q.(list (pair int int)) (fun l -> \ let l = CCList.Set.uniq l in let m = of_list l in \ - List.for_all (fun (k,v) -> k < 0 || find_exn k m = v) l) + List.for_all (fun (k,v) -> find_exn k m = v) l) *) -(* TODO: fix the previous test *) - let rec remove k t = match t with | E -> E | L (k', _) -> if k=k' then E else t From 26c1f873117eaa9d9ef782bb113d62c9843832d3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 21 Mar 2015 22:47:14 +0100 Subject: [PATCH 20/46] wip: inter/union for CCIntMap --- src/data/CCIntMap.ml | 43 +++++++++++++++++++++++++++++++++++++++++-- src/data/CCIntMap.mli | 4 ++-- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml index 200d0ec6..7a0fed15 100644 --- a/src/data/CCIntMap.ml +++ b/src/data/CCIntMap.ml @@ -187,9 +187,48 @@ let choose t = try Some (choose_exn t) with Not_found -> None -let union _ _ _ = assert false +let rec union f a b = match a, b with + | E, o | o, E -> o + | L (k, v), o + | o, L (k, v) -> + (* insert k, v into o *) + insert_ (fun ~old v -> f k old v) k v o + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (union f l1 l2) (union f r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then N (p1, m1, union f l1 b, r1) + else N (p1, m1, l1, union f r1 b) + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then N (p2, m2, union f l2 a, r2) + else N (p2, m2, l2, union f r2 a) + else join_ a p1 b p2 -let inter _ _ _ = assert false +let rec inter f a b = match a, b with + | E, _ | _, E -> E + | L (k, v), o + | o, L (k, v) -> + begin try + let v' = find_exn k o in + L (k, f k v v') + with Not_found -> E + end + | N (p1, m1, l1, r1), N (p2, m2, l2, r2) -> + if p1 = p2 && m1 = m2 + then mk_node_ p1 m1 (inter f l1 l2) (inter f r1 r2) + else if m1 < m2 && is_prefix_ ~prefix:p2 p1 ~bit:m1 + then if bit_is_0_ p2 ~bit:m1 + then inter f l1 b + else inter f r1 b + else if m1 > m2 && is_prefix_ ~prefix:p1 p2 ~bit:m2 + then if bit_is_0_ p1 ~bit:m2 + then inter f l2 a + else inter f r2 a + else E + +(* TODO: write tests *) (** {2 Whole-collection operations} *) diff --git a/src/data/CCIntMap.mli b/src/data/CCIntMap.mli index 68269317..4caf0d14 100644 --- a/src/data/CCIntMap.mli +++ b/src/data/CCIntMap.mli @@ -58,9 +58,9 @@ val choose : 'a t -> (int * 'a) option val choose_exn : 'a t -> int * 'a -val union : (int -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t +val union : (int -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t -val inter : (int -> 'a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t +val inter : (int -> 'a -> 'a -> 'a) -> 'a t -> 'a t -> 'a t (** {2 Whole-collection operations} *) From ab110b694d8f8f4dcbe3fc4f7b25a625ed0be18a Mon Sep 17 00:00:00 2001 From: Guillaume Bury Date: Mon, 23 Mar 2015 17:59:34 +0100 Subject: [PATCH 21/46] New module for Persistent Arrays --- _oasis | 2 +- doc/intro.txt | 1 + src/data/CCPersistentArray.ml | 80 +++++++++++++++++++++++++++++ src/data/CCPersistentArray.mli | 92 ++++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 src/data/CCPersistentArray.ml create mode 100644 src/data/CCPersistentArray.mli diff --git a/_oasis b/_oasis index 1bc41b99..cda0e31d 100644 --- a/_oasis +++ b/_oasis @@ -71,7 +71,7 @@ Library "containers_data" Path: src/data Modules: CCMultiMap, CCMultiSet, CCTrie, CCFlatHashtbl, CCCache, CCPersistentHashtbl, CCDeque, CCFQueue, CCBV, CCMixtbl, - CCMixmap, CCRingBuffer, CCIntMap + CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray BuildDepends: bytes FindlibParent: containers FindlibName: data diff --git a/doc/intro.txt b/doc/intro.txt index 8c23d683..3db1fe94 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -71,6 +71,7 @@ CCMixmap CCMixtbl CCMultiMap CCMultiSet +CCPersistentArray CCPersistentHashtbl CCRingBuffer CCTrie diff --git a/src/data/CCPersistentArray.ml b/src/data/CCPersistentArray.ml new file mode 100644 index 00000000..fb7b3991 --- /dev/null +++ b/src/data/CCPersistentArray.ml @@ -0,0 +1,80 @@ +(* +copyright (c) 2013-2015, Guillaume Bury +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + + +(* Persistent arrays *) + +type 'a t = 'a data ref +and 'a data = + | Array of 'a array + | Diff of int * 'a * 'a t + +let make n a = ref (Array (Array.make n a)) +let init n f = ref (Array (Array.init n f)) + +let rec _reroot t k = match !t with + | Array a -> k a + | Diff (i, v, t') -> + _reroot t' (fun a -> + let v' = a.(i) in + a.(i) <- v; + t := Array a; + t' := Diff(i, v', t); + k a + ) + +let reroot t = match !t with + | Array a -> a + | _ -> _reroot t (fun x -> x) + +let get t i = match !t with + | Array a -> a.(i) + | _ -> (reroot t).(i) + +let set t i v = + let a = reroot t in + let old = a.(i) in + a.(i) <- v; + let t' = ref (Array a) in + t := Diff (i, old, t'); + t' + +let length t = Array.length (reroot t) + +let map f t = ref (Array (Array.map f (reroot t))) +let mapi f t = ref (Array (Array.mapi f (reroot t))) + +let iter f t = Array.iter f (reroot t) +let iteri f t = Array.iteri f (reroot t) + +let fold_left f acc t = Array.fold_left f acc (reroot t) +let fold_right f t acc = Array.fold_right f (reroot t) acc + +let to_array t = Array.copy (reroot t) +let of_array a = init (Array.length a) (fun i -> a.(i)) + +let to_list t = Array.to_list (reroot t) +let of_list l = ref (Array (Array.of_list l)) + diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli new file mode 100644 index 00000000..b8a77166 --- /dev/null +++ b/src/data/CCPersistentArray.mli @@ -0,0 +1,92 @@ +(* +copyright (c) 2013-2015, Guillaume Bury +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + + +(** {1 Persistent Arrays} + +@since NEXT_RELEASE *) + +type 'a t +(** The type of persistent arrays *) + +val make : int -> 'a -> 'a t +(** [make n x] returns a persistent array of length n, with [x]. All the + elements of this new array are initially physically equal to x + (in the sense of the == predicate). Consequently, if x is mutable, it is + shared among all elements of the array, and modifying x through one of the + array entries will modify all other entries at the same time. + @raise Invalid_argument if [n < 0] or [n > Sys.max_array_length]. + If the value of x is a floating-point number, then the maximum size is + only [Sys.max_array_length / 2].*) + +val init : int -> (int -> 'a) -> 'a t +(** [make n f] returns a persistent array of length n, with element + [i] initialized to the result of [f i]. + @raise Invalid_argument if [n < 0] or [n > Sys.max_array_length]. + If the value of x is a floating-point number, then the maximum size is + only [Sys.max_array_length / 2].*) + +val get : 'a t -> int -> 'a +(** [get a i] Returns the element with index [i] from the array [a]. + @raise Invalid_argument "index out of bounds" if [n] is outside the + range [0] to [Array.length a - 1].*) + +val set : 'a t -> int -> 'a -> 'a t +(** [set a i v] sets the element index [i] from the array [a] to [v]. + @raise Invalid_argument "index out of bounds" if [n] is outside the + range [0] to [Array.length a - 1].*) + +val length : 'a t -> int +(** Returns the length of the persistent array. *) + +val map : ('a -> 'b) -> 'a t -> 'b t +val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t +(** Applies the given function to all elements of the array, and returns + a persistent array initialized by the results of f. In the case of [mapi], + the function is also given the index of the element. + It is equivalent to [fun f t -> init (fun i -> f (get t i))]. *) + +val iter : ('a -> unit) -> 'a t -> unit +val iteri : (int -> 'a -> unit) -> 'a t -> unit +(** [iter f t] applies function [f] to all elements of the persistent array, in order + from element [0] to element [length t - 1]. *) + +val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a +val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b +(** Fold on the elements of the array. *) + +val to_array : 'a t -> 'a array +(** [to_array t] returns a mutable copy of [t]. *) + +val of_array : 'a array -> 'a t +(** [from_array a] returns an immutable copy of [a]. *) + +val to_list : 'a t -> 'a list +(** [to_list t] returns the list of elements in [t]. *) + +val of_list : 'a list -> 'a t +(** [of_list l] returns a fresh persistent array containing the elements of [l]. *) + + From 9a9eb486a8be6206718302e10730a756d68a58b4 Mon Sep 17 00:00:00 2001 From: Guillaume Bury Date: Mon, 23 Mar 2015 18:07:19 +0100 Subject: [PATCH 22/46] Updated authors --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index 29251ed6..4a690488 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -10,3 +10,4 @@ - Bernardo da Costa - Vincent Bernardoff (vbmithr) - Emmanuel Surleau (emm) +- Guillaume Bury (guigui) From adee01be65eaa134c100511fb44db8fc9c661bc3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Mar 2015 10:06:41 +0100 Subject: [PATCH 23/46] =?UTF-8?q?due=20reference=20to=20Jean-Christophe=20?= =?UTF-8?q?Filli=C3=A2tre=20in=20CCPersistentArray?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/data/CCPersistentArray.mli | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli index b8a77166..e1478c16 100644 --- a/src/data/CCPersistentArray.mli +++ b/src/data/CCPersistentArray.mli @@ -26,6 +26,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Persistent Arrays} +From the paper by Jean-Christophe Filliâtre, +"A persistent Union-Find data structure", see +{{: https://www.lri.fr/~filliatr/ftp/publis/puf-wml07.ps} the ps version} + @since NEXT_RELEASE *) type 'a t From 76f966aed31a55636ac4d34ab2f5f758ba6f91bf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Mar 2015 12:21:14 +0100 Subject: [PATCH 24/46] ccpersistentArray.copy --- src/data/CCPersistentArray.ml | 2 ++ src/data/CCPersistentArray.mli | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data/CCPersistentArray.ml b/src/data/CCPersistentArray.ml index fb7b3991..f855f556 100644 --- a/src/data/CCPersistentArray.ml +++ b/src/data/CCPersistentArray.ml @@ -49,6 +49,8 @@ let reroot t = match !t with | Array a -> a | _ -> _reroot t (fun x -> x) +let copy t = ref (Array(Array.copy (reroot t))) + let get t i = match !t with | Array a -> a.(i) | _ -> (reroot t).(i) diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli index e1478c16..d5200153 100644 --- a/src/data/CCPersistentArray.mli +++ b/src/data/CCPersistentArray.mli @@ -65,9 +65,12 @@ val set : 'a t -> int -> 'a -> 'a t val length : 'a t -> int (** Returns the length of the persistent array. *) +val copy : 'a t -> 'a t +(** [copy a] returns a fresh copy of [a]. Both copies are independent. *) + val map : ('a -> 'b) -> 'a t -> 'b t val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t -(** Applies the given function to all elements of the array, and returns +(** Applies the given function to all elements of the array, and returns a persistent array initialized by the results of f. In the case of [mapi], the function is also given the index of the element. It is equivalent to [fun f t -> init (fun i -> f (get t i))]. *) From 4c22a770f87d934091d3fd4700cdf201ba3a2fce Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Mar 2015 13:39:39 +0100 Subject: [PATCH 25/46] rename benchmarks; doc --- _oasis | 4 ++-- benchs/{bench_hash.ml => run_bench_hash.ml} | 0 src/data/CCPersistentArray.mli | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename benchs/{bench_hash.ml => run_bench_hash.ml} (100%) diff --git a/_oasis b/_oasis index cda0e31d..60f7567d 100644 --- a/_oasis +++ b/_oasis @@ -161,12 +161,12 @@ Executable run_benchs containers.data, containers.string, containers.iter, sequence, gen, benchmark -Executable bench_hash +Executable run_bench_hash Path: benchs/ Install: false CompiledObject: best Build$: flag(bench) && flag(misc) - MainIs: bench_hash.ml + MainIs: run_bench_hash.ml BuildDepends: containers, containers.misc Executable run_test_future diff --git a/benchs/bench_hash.ml b/benchs/run_bench_hash.ml similarity index 100% rename from benchs/bench_hash.ml rename to benchs/run_bench_hash.ml diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli index d5200153..914f5e95 100644 --- a/src/data/CCPersistentArray.mli +++ b/src/data/CCPersistentArray.mli @@ -77,8 +77,8 @@ val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t val iter : ('a -> unit) -> 'a t -> unit val iteri : (int -> 'a -> unit) -> 'a t -> unit -(** [iter f t] applies function [f] to all elements of the persistent array, in order - from element [0] to element [length t - 1]. *) +(** [iter f t] applies function [f] to all elements of the persistent array, + in order from element [0] to element [length t - 1]. *) val fold_left : ('a -> 'b -> 'a) -> 'a -> 'b t -> 'a val fold_right : ('a -> 'b -> 'b) -> 'a t -> 'b -> 'b From 3b3a69716818a23f736ce022f382d7fa9e5d974b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Mar 2015 13:49:25 +0100 Subject: [PATCH 26/46] benchmark --- benchs/run_benchs.ml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 3c2e5e22..5fa29837 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -353,6 +353,12 @@ module Tbl = struct ignore (Array.get a i); done + let persistent_array_find a = + fun n -> + for i = 0 to n-1 do + ignore (CCPersistentArray.get a i); + done + let imap_find m = fun n -> for i = 0 to n-1 do @@ -370,7 +376,8 @@ module Tbl = struct let h' = hashtbl_add n in let h'' = ihashtbl_add n in let h''''' = ipersistenthashtbl_add n in - let a = Array.init n (fun i -> string_of_int i) in + let a = Array.init n string_of_int in + let pa = CCPersistentArray.init n string_of_int in let m = imap_add n in let h'''''' = icchashtbl_add n in B.throughputN 3 [ @@ -379,6 +386,7 @@ module Tbl = struct "ihashtbl_find", (fun () -> ihashtbl_find h'' n), (); "ipersistenthashtbl_find", (fun () -> ipersistenthashtbl_find h''''' n), (); "array_find", (fun () -> array_find a n), (); + "persistent_array_find", (fun () -> persistent_array_find pa n), (); "imap_find", (fun () -> imap_find m n), (); "cchashtbl_find", (fun () -> icchashtbl_find h'''''' n), (); ] From da8566910b4ad606055dabe5ec413131a6d2b866 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Mar 2015 14:50:13 +0100 Subject: [PATCH 27/46] breaking: fix documentation of CCList.sorted_merge (different semantics) --- src/core/CCList.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 57a2944d..7ea28fdd 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -118,7 +118,7 @@ val filter_map : ('a -> 'b option) -> 'a t -> 'b t (** Map and remove elements at the same time *) val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list -(** merges elements from both sorted list, removing duplicates *) +(** merges elements from both sorted list *) val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list (** Sort the list and remove duplicate elements *) From 4ab5a709add4325f93f2dd13842e3baf6134f9b6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Mar 2015 14:50:31 +0100 Subject: [PATCH 28/46] add `CCList.{sorted_merge_uniq, uniq_succ}` --- src/core/CCList.ml | 56 +++++++++++++++++++++++++++++++++++++++++++++ src/core/CCList.mli | 12 ++++++++++ 2 files changed, 68 insertions(+) diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 48846818..ea84429e 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -231,6 +231,12 @@ let sorted_merge ?(cmp=Pervasives.compare) l1 l2 = (*$T List.sort Pervasives.compare ([(( * )2); ((+)1)] <*> [10;100]) \ = [11; 20; 101; 200] + sorted_merge [1;1;2] [1;2;3] = [1;1;1;2;2;3] +*) + +(*$Q + Q.(pair (list int) (list int)) (fun (l1,l2) -> \ + List.length (sorted_merge l1 l2) = List.length l1 + List.length l2) *) let sort_uniq (type elt) ?(cmp=Pervasives.compare) l = @@ -247,6 +253,56 @@ let sort_uniq (type elt) ?(cmp=Pervasives.compare) l = sort_uniq [10;10;10;10;1;10] = [1;10] *) +let uniq_succ ?(eq=(=)) l = + let rec f acc l = match l with + | [] -> List.rev acc + | [x] -> List.rev (x::acc) + | x :: ((y :: _) as tail) when eq x y -> f acc tail + | x :: tail -> f (x::acc) tail + in + f [] l + +(*$T + uniq_succ [1;1;2;3;1;6;6;4;6;1] = [1;2;3;1;6;4;6;1] +*) + +let sorted_merge_uniq ?(cmp=Pervasives.compare) l1 l2 = + let push ~cmp acc x = match acc with + | [] -> [x] + | y :: _ when cmp x y > 0 -> x :: acc + | _ -> acc (* duplicate, do not yield *) + in + let rec recurse ~cmp acc l1 l2 = match l1,l2 with + | [], l + | l, [] -> + let acc = List.fold_left (push ~cmp) acc l in + List.rev acc + | x1::l1', x2::l2' -> + let c = cmp x1 x2 in + if c < 0 then recurse ~cmp (push ~cmp acc x1) l1' l2 + else if c > 0 then recurse ~cmp (push ~cmp acc x2) l1 l2' + else recurse ~cmp acc l1 l2' (* drop one of the [x] *) + in + recurse ~cmp [] l1 l2 + +(*$T + sorted_merge_uniq [1; 1; 2; 3; 5; 8] [1; 2; 3; 4; 6; 8; 9; 9] = [1;2;3;4;5;6;8;9] +*) + +(*$Q + Q.(list int) (fun l -> \ + let l = List.sort Pervasives.compare l in \ + sorted_merge_uniq l [] = uniq_succ l) + Q.(list int) (fun l -> \ + let l = List.sort Pervasives.compare l in \ + sorted_merge_uniq [] l = uniq_succ l) + Q.(pair (list int) (list int)) (fun (l1, l2) -> \ + let l1 = List.sort Pervasives.compare l1 \ + and l2 = List.sort Pervasives.compare l2 in \ + let l3 = sorted_merge_uniq l1 l2 in \ + uniq_succ l3 = l3) +*) + let take n l = let rec direct i n l = match l with | [] -> [] diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 7ea28fdd..5fd0eb7a 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -123,6 +123,18 @@ val sorted_merge : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list (** Sort the list and remove duplicate elements *) +val sorted_merge_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list +(** [sorted_merge_uniq l1 l2] merges the sorted lists [l1] and [l2] and + removes duplicates + @since NEXT_RELEASE *) + +val uniq_succ : ?eq:('a -> 'a -> bool) -> 'a list -> 'a list +(** [uniq_succ l] removes duplicate elements that occur one next to the other. + Examples: + [uniq_succ [1;2;1] = [1;2;1]] + [uniq_succ [1;1;2] = [1;2] + @since NEXT_RELEASE *) + (** {2 Indices} *) module Idx : sig From 62426ed4dcd71d643d68ef5e7e90eba75a3109de Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Mar 2015 23:36:43 +0100 Subject: [PATCH 29/46] wip: CCUnix (with a small subprocess API, not working well yet) --- .ocamlinit | 3 ++ _oasis | 11 ++++++ opam | 1 + src/unix/.merlin | 2 ++ src/unix/CCUnix.ml | 81 +++++++++++++++++++++++++++++++++++++++++++++ src/unix/CCUnix.mli | 51 ++++++++++++++++++++++++++++ 6 files changed, 149 insertions(+) create mode 100644 src/unix/.merlin create mode 100644 src/unix/CCUnix.ml create mode 100644 src/unix/CCUnix.mli diff --git a/.ocamlinit b/.ocamlinit index 2d7217dd..ec0513a8 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -1,11 +1,13 @@ #use "topfind";; #thread #require "bigarray";; +#require "unix";; #directory "_build/src/core";; #directory "_build/src/misc";; #directory "_build/src/pervasives/";; #directory "_build/src/string";; #directory "_build/src/io";; +#directory "_build/src/unix";; #directory "_build/src/iter";; #directory "_build/src/data";; #directory "_build/src/sexp";; @@ -16,6 +18,7 @@ #load "containers_iter.cma";; #load "containers_data.cma";; #load "containers_io.cma";; +#load "containers_unix.cma";; #load "containers_sexp.cma";; #load "containers_string.cma";; #load "containers_pervasives.cma";; diff --git a/_oasis b/_oasis index 60f7567d..9b2f05df 100644 --- a/_oasis +++ b/_oasis @@ -25,6 +25,10 @@ Flag "misc" Description: Build the misc library, with experimental modules still susceptible to change Default: true +Flag "unix" + Description: Build the containers.unix library (depends on Unix) + Default: false + Flag "lwt" Description: Build modules which depend on Lwt Default: false @@ -60,6 +64,13 @@ Library "containers_io" FindlibParent: containers FindlibName: io +Library "containers_unix" + Path: src/unix + Modules: CCUnix + BuildDepends: bytes, unix + FindlibParent: containers + FindlibName: unix + Library "containers_sexp" Path: src/sexp Modules: CCSexp, CCSexpStream, CCSexpM diff --git a/opam b/opam index 10719d1a..4c53640f 100644 --- a/opam +++ b/opam @@ -12,6 +12,7 @@ build: [ "--%{lwt:enable}%-lwt" "--%{base-bigarray:enable}%-bigarray" "--%{sequence:enable}%-advanced" + "--%{base-unix:enable}%-unix" "--enable-docs" "--enable-misc"] [make "build"] diff --git a/src/unix/.merlin b/src/unix/.merlin new file mode 100644 index 00000000..9ed5b46a --- /dev/null +++ b/src/unix/.merlin @@ -0,0 +1,2 @@ +PKG unix +REC diff --git a/src/unix/CCUnix.ml b/src/unix/CCUnix.ml new file mode 100644 index 00000000..b7bd8dc2 --- /dev/null +++ b/src/unix/CCUnix.ml @@ -0,0 +1,81 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 High-level Functions on top of Unix} *) + +type 'a or_error = [`Ok of 'a | `Error of string] + +(** {2 Calling Commands} *) + +type cmd = string * string array +(** A command: program + arguments *) + +let cmd_of_sh s = "/bin/sh", [| "/bin/sh"; "-c"; s |] + +let int_of_process_status = function + | Unix.WEXITED i + | Unix.WSIGNALED i + | Unix.WSTOPPED i -> i + +let read_all ?(size=1024) ic = + let buf = ref (Bytes.create size) in + let len = ref 0 in + try + while true do + (* resize *) + if !len = Bytes.length !buf then ( + buf := Bytes.extend !buf 0 !len; + ); + assert (Bytes.length !buf > !len); + let n = input ic !buf !len (Bytes.length !buf - !len) in + len := !len + n; + if n = 0 then raise Exit; (* exhausted *) + done; + assert false (* never reached*) + with Exit -> + Bytes.sub_string !buf 0 !len + +let call ?(stdin="") cmd = + let cmd, args = match cmd with + | `Sh s -> cmd_of_sh s + | `Cmd (c, args) -> c, args + in + let oc, ic, errc = Unix.open_process_full cmd args in + (* send stdin *) + output_string ic stdin; + close_out ic; + (* read out and err *) + let out = read_all oc in + let err = read_all errc in + let status = Unix.close_process_full (oc, ic, errc) in + object + method stdout = out + method stderr = err + method status = status + method errcode = int_of_process_status status + end + + diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli new file mode 100644 index 00000000..003f8406 --- /dev/null +++ b/src/unix/CCUnix.mli @@ -0,0 +1,51 @@ + +(* +copyright (c) 2013-2015, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 High-level Functions on top of Unix} + +Some useful functions built on top of Unix. + +@since NEXT_RELEASE *) + +type 'a or_error = [`Ok of 'a | `Error of string] + +(** {2 Calling Commands} *) + +type cmd = string * string array +(** A command: program + arguments *) + +val call : + ?stdin:string -> + [`Sh of string | `Cmd of cmd] -> + < stdout:string; + stderr:string; + status:Unix.process_status; + errcode:int; (** extracted from status *) + > + + + + From 6fb26288ad46fc107d064046d33a50724f3fe432 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Mar 2015 12:00:08 +0100 Subject: [PATCH 30/46] richer CCUnix.call API, with tests --- Makefile | 2 ++ _oasis | 4 +-- src/data/CCIntMap.ml | 2 +- src/unix/CCUnix.ml | 82 +++++++++++++++++++++++++++++++------------- src/unix/CCUnix.mli | 35 ++++++++++++++++--- 5 files changed, 93 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index a7079cec..81d3621b 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,8 @@ QTESTABLE=$(filter-out $(DONTTEST), \ $(wildcard src/string/*.mli) \ $(wildcard src/io/*.ml) \ $(wildcard src/io/*.mli) \ + $(wildcard src/unix/*.ml) \ + $(wildcard src/unix/*.mli) \ $(wildcard src/sexp/*.ml) \ $(wildcard src/sexp/*.mli) \ $(wildcard src/advanced/*.ml) \ diff --git a/_oasis b/_oasis index 9b2f05df..7a2b6dce 100644 --- a/_oasis +++ b/_oasis @@ -203,8 +203,8 @@ Executable run_qtest Build$: flag(tests) && flag(bigarray) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, - containers.bigarray, - sequence, gen, oUnit, QTest2Lib + containers.bigarray, containers.unix, + sequence, gen, unix, oUnit, QTest2Lib Executable run_qtest_lwt Path: qtest/lwt/ diff --git a/src/data/CCIntMap.ml b/src/data/CCIntMap.ml index 7a0fed15..c3fecc7f 100644 --- a/src/data/CCIntMap.ml +++ b/src/data/CCIntMap.ml @@ -134,7 +134,7 @@ let rec insert_ c k v t = match t with let add k v t = insert_ (fun ~old:_ v -> v) k v t -(*$Q +(*$Q & ~count:20 Q.(list (pair int int)) (fun l -> \ let l = CCList.Set.uniq l in let m = of_list l in \ List.for_all (fun (k,v) -> find_exn k m = v) l) diff --git a/src/unix/CCUnix.ml b/src/unix/CCUnix.ml index b7bd8dc2..238ef0c2 100644 --- a/src/unix/CCUnix.ml +++ b/src/unix/CCUnix.ml @@ -27,19 +27,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 High-level Functions on top of Unix} *) type 'a or_error = [`Ok of 'a | `Error of string] +type 'a gen = unit -> 'a option (** {2 Calling Commands} *) -type cmd = string * string array -(** A command: program + arguments *) - -let cmd_of_sh s = "/bin/sh", [| "/bin/sh"; "-c"; s |] - let int_of_process_status = function | Unix.WEXITED i | Unix.WSIGNALED i | Unix.WSTOPPED i -> i +let str_exists s p = + let rec f s p i = + if i = String.length s then false + else p s.[i] || f s p (i+1) + in + f s p 0 + +let rec iter_gen f g = match g() with + | None -> () + | Some x -> f x; iter_gen f g + +(* print a string, but escaped if required *) +let escape_str buf s = + if str_exists s + (function ' ' | '"' | '\'' -> true | _ -> false) + then ( + Buffer.add_char buf '\''; + String.iter + (function + | '\'' -> Buffer.add_string buf "''" + | c -> Buffer.add_char buf c + ) s; + Buffer.add_char buf '\''; + ) else Buffer.add_string buf s + let read_all ?(size=1024) ic = let buf = ref (Bytes.create size) in let len = ref 0 in @@ -58,24 +79,37 @@ let read_all ?(size=1024) ic = with Exit -> Bytes.sub_string !buf 0 !len -let call ?(stdin="") cmd = - let cmd, args = match cmd with - | `Sh s -> cmd_of_sh s - | `Cmd (c, args) -> c, args - in - let oc, ic, errc = Unix.open_process_full cmd args in - (* send stdin *) - output_string ic stdin; - close_out ic; - (* read out and err *) - let out = read_all oc in - let err = read_all errc in - let status = Unix.close_process_full (oc, ic, errc) in - object - method stdout = out - method stderr = err - method status = status - method errcode = int_of_process_status status - end +type call_result = + < stdout:string; + stderr:string; + status:Unix.process_status; + errcode:int; (** extracted from status *) + > +let kbprintf' buf fmt k = Printf.kbprintf k buf fmt + +let call ?(bufsize=2048) ?(stdin=`Str "") ?(env=[||]) cmd = + (* render the command *) + let buf = Buffer.create 256 in + kbprintf' buf cmd + (fun buf -> + let cmd = Buffer.contents buf in + let oc, ic, errc = Unix.open_process_full cmd env in + (* send stdin *) + begin match stdin with + | `Str s -> output_string ic s + | `Gen g -> iter_gen (output_string ic) g + end; + close_out ic; + (* read out and err *) + let out = read_all ~size:bufsize oc in + let err = read_all ~size:bufsize errc in + let status = Unix.close_process_full (oc, ic, errc) in + object + method stdout = out + method stderr = err + method status = status + method errcode = int_of_process_status status + end + ) diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli index 003f8406..a4f037d0 100644 --- a/src/unix/CCUnix.mli +++ b/src/unix/CCUnix.mli @@ -31,21 +31,46 @@ Some useful functions built on top of Unix. @since NEXT_RELEASE *) type 'a or_error = [`Ok of 'a | `Error of string] +type 'a gen = unit -> 'a option (** {2 Calling Commands} *) -type cmd = string * string array -(** A command: program + arguments *) +val escape_str : Buffer.t -> string -> unit +(** Escape a string so it can be a shell argument. +*) -val call : - ?stdin:string -> - [`Sh of string | `Cmd of cmd] -> +(*$T + CCPrint.sprintf "%a" escape_str "foo" = "foo" + CCPrint.sprintf "%a" escape_str "foo bar" = "'foo bar'" + CCPrint.sprintf "%a" escape_str "fo'o b'ar" = "'fo''o b''ar'" +*) + +type call_result = < stdout:string; stderr:string; status:Unix.process_status; errcode:int; (** extracted from status *) > +val call : ?bufsize:int -> + ?stdin:[`Gen of string gen | `Str of string] -> + ?env:string array -> + ('a, Buffer.t, unit, call_result) format4 -> + 'a +(** [call cmd] wraps the result of [Unix.open_process_full cmd] into an + object. It reads the full stdout and stderr of the subprocess before + returning. + @param stdin if provided, the generator or string is consumed and fed to + the subprocess input channel, which is then closed. + @param bufsize buffer size used to read stdout and stderr + @param env environment to run the command in +*) + +(*$T + (call ~stdin:(`Str "abc") "cat")#stdout = "abc" + (call "echo %a" escape_str "a'b'c")#stdout = "abc\n" + (call "echo %s" "a'b'c")#stdout = "abc\n" +*) From 1da809c59639f2dcf71e2aa2749946cb26062a70 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 11:53:55 +0200 Subject: [PATCH 31/46] fix CCUnix.escape_str --- src/unix/CCUnix.ml | 4 ++-- src/unix/CCUnix.mli | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/unix/CCUnix.ml b/src/unix/CCUnix.ml index 238ef0c2..ec739f0f 100644 --- a/src/unix/CCUnix.ml +++ b/src/unix/CCUnix.ml @@ -50,7 +50,7 @@ let rec iter_gen f g = match g() with (* print a string, but escaped if required *) let escape_str buf s = if str_exists s - (function ' ' | '"' | '\'' -> true | _ -> false) + (function ' ' | '"' | '\'' | '\n' | '\t'-> true | _ -> false) then ( Buffer.add_char buf '\''; String.iter @@ -79,7 +79,7 @@ let read_all ?(size=1024) ic = with Exit -> Bytes.sub_string !buf 0 !len -type call_result = +type call_result = < stdout:string; stderr:string; status:Unix.process_status; diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli index a4f037d0..59c9958a 100644 --- a/src/unix/CCUnix.mli +++ b/src/unix/CCUnix.mli @@ -45,7 +45,7 @@ val escape_str : Buffer.t -> string -> unit CCPrint.sprintf "%a" escape_str "fo'o b'ar" = "'fo''o b''ar'" *) -type call_result = +type call_result = < stdout:string; stderr:string; status:Unix.process_status; From e3b4c5eaf9a37f58bd6f29bd7eca0737b867c86a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 14:43:05 +0200 Subject: [PATCH 32/46] add `CCString.{lines,unlines,concat_gen}` --- src/core/CCString.cppo.ml | 14 ++++++++++++++ src/core/CCString.mli | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 097cd078..6f64b4e1 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -263,6 +263,20 @@ let of_array a = let to_array s = Array.init (String.length s) (fun i -> s.[i]) +let lines s = Split.gen_cpy ~by:"\n" s + +let concat_gen ~sep g = + let b = Buffer.create 256 in + let rec aux ~first () = match g () with + | None -> Buffer.contents b + | Some s -> + if not first then Buffer.add_string b sep; + Buffer.add_string b s; + aux ~first:false () + in aux ~first:true () + +let unlines g = concat_gen ~sep:"\n" g + let pp buf s = Buffer.add_char buf '"'; Buffer.add_string buf s; diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 807bb938..ad0e0317 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -113,6 +113,22 @@ val suffix : suf:string -> string -> bool not (suffix ~suf:"abcd" "cd") *) +val lines : string -> string gen +(** [lines s] returns a generator of the lines of [s] (splits along '\n') + @since NEXT_RELEASE *) + +val concat_gen : sep:string -> string gen -> string +(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. + @since NEXT_RELEASE *) + +val unlines : string gen -> string +(** [unlines g] concatenates all strings of [g], separated with '\n' + @since NEXT_RELEASE *) + +(*$Q + Q.printable_string (fun s -> unlines (lines s) = s) +*) + include S with type t := string (** {2 Splitting} *) From ff956b9cc2e08540ffde95a95895d6ac50e1b2e0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 21:05:58 +0200 Subject: [PATCH 33/46] add `containers_misc.Puf.iter` --- src/misc/puf.ml | 15 ++++++++++++++- src/misc/puf.mli | 4 ++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/misc/puf.ml b/src/misc/puf.ml index 2a338fd2..ef4ca0c8 100644 --- a/src/misc/puf.ml +++ b/src/misc/puf.ml @@ -58,6 +58,8 @@ module PArray = struct a end + let iteri f t = Array.iteri f (reroot t) + let get t i = match !t with | Array a -> a.(i) @@ -204,6 +206,9 @@ module type S = sig (** [iter_equiv_class uf a f] calls [f] on every element of [uf] that is congruent to [a], including [a] itself. *) + val iter : _ t -> (elt -> unit) -> unit + (** Iterate on all root values *) + val inconsistent : _ t -> (elt * elt * elt * elt) option (** Check whether the UF is inconsistent. It returns [Some (a, b, a', b')] in case of inconsistency, where a = b, a = a' and b = b' by congruence, @@ -222,7 +227,8 @@ module type S = sig val explain_distinct : 'e t -> elt -> elt -> elt * elt (** [explain_distinct uf a b] gives the original pair [a', b'] that - made [a] and [b] distinct by calling [distinct a' b'] *) + made [a] and [b] distinct by calling [distinct a' b']. The + terms must be distinct, otherwise Failure is raised. *) end module IH = Hashtbl.Make(struct type t = int let equal i j = i = j let hash i = i end) @@ -446,6 +452,13 @@ module Make(X : ID) : S with type elt = X.t = struct in traverse ia + let iter uf f = + PArray.iteri + (fun i e -> match e with + | None -> () + | Some d -> if d.next = i then f d.elt + ) uf.data + let inconsistent uf = uf.inconsistent (** Closest common ancestor of the two elements in the proof forest *) diff --git a/src/misc/puf.mli b/src/misc/puf.mli index c44f4e2b..6ae10d5e 100644 --- a/src/misc/puf.mli +++ b/src/misc/puf.mli @@ -113,6 +113,10 @@ module type S = sig (** [iter_equiv_class uf a f] calls [f] on every element of [uf] that is congruent to [a], including [a] itself. *) + val iter : _ t -> (elt -> unit) -> unit + (** Iterate on all root values + @since NExT_RELEASE *) + val inconsistent : _ t -> (elt * elt * elt * elt) option (** Check whether the UF is inconsistent. It returns [Some (a, b, a', b')] in case of inconsistency, where a = b, a = a' and b = b' by congruence, From 814aaadab5e3e3b181fc6feac566f4c4ee5a7e4a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 22:42:06 +0200 Subject: [PATCH 34/46] fix in Puf.iter --- src/misc/puf.ml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/misc/puf.ml b/src/misc/puf.ml index ef4ca0c8..919f2bcf 100644 --- a/src/misc/puf.ml +++ b/src/misc/puf.ml @@ -454,10 +454,11 @@ module Make(X : ID) : S with type elt = X.t = struct let iter uf f = PArray.iteri - (fun i e -> match e with + (fun i i' -> + if i = i' then match PArray.get uf.data i with | None -> () - | Some d -> if d.next = i then f d.elt - ) uf.data + | Some d -> f d.elt + ) uf.parent let inconsistent uf = uf.inconsistent From 26b4b14979ff89f0a0a94de1af96ca263a80f230 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 23:33:09 +0200 Subject: [PATCH 35/46] fix build system --- _oasis | 4 ++-- tests/test_puf.ml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/_oasis b/_oasis index 7a2b6dce..1e2c3f43 100644 --- a/_oasis +++ b/_oasis @@ -122,7 +122,7 @@ Library "containers_misc" Path: src/misc Pack: true Modules: AbsSet, Automaton, Bij, CSM, Hashset, LazyGraph, PHashtbl, - PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ + PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ, Puf BuildDepends: containers, containers.data FindlibName: misc FindlibParent: containers @@ -200,7 +200,7 @@ Executable run_qtest Install: false CompiledObject: best MainIs: run_qtest.ml - Build$: flag(tests) && flag(bigarray) + Build$: flag(tests) && flag(bigarray) && flag(unix) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, containers.bigarray, containers.unix, diff --git a/tests/test_puf.ml b/tests/test_puf.ml index d5af04d3..c309f09c 100644 --- a/tests/test_puf.ml +++ b/tests/test_puf.ml @@ -1,6 +1,7 @@ (** Tests for persistent union find *) open OUnit +open Containers_misc module P = Puf.Make(struct type t = int let get_id i = i end) From 9002694aab1796693df22f1b34b3a596ea13a97d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 23:33:38 +0200 Subject: [PATCH 36/46] wip: toy backtracking monad with lots'a GADTs (cc @aspiwack) --- _oasis | 3 +- src/misc/backtrack.ml | 164 +++++++++++++++++++++++++++++++++++++++++ src/misc/backtrack.mli | 80 ++++++++++++++++++++ 3 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 src/misc/backtrack.ml create mode 100644 src/misc/backtrack.mli diff --git a/_oasis b/_oasis index 1e2c3f43..3b4093d6 100644 --- a/_oasis +++ b/_oasis @@ -122,7 +122,8 @@ Library "containers_misc" Path: src/misc Pack: true Modules: AbsSet, Automaton, Bij, CSM, Hashset, LazyGraph, PHashtbl, - PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ, Puf + PrintBox, RAL, RoseTree, SmallSet, UnionFind, Univ, Puf, + Backtrack BuildDepends: containers, containers.data FindlibName: misc FindlibParent: containers diff --git a/src/misc/backtrack.ml b/src/misc/backtrack.ml new file mode 100644 index 00000000..61db5f54 --- /dev/null +++ b/src/misc/backtrack.ml @@ -0,0 +1,164 @@ + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +module NonLogical = struct + type 'a t = unit -> 'a + let return x () = x + let (>>=) x f () = let y = x() in f y () +end + +type ('a, 'b) list_view = + | Nil of exn + | Cons of 'a * 'b + +(** The monad is parametrised in the types of state, environment and + writer. *) +module type Param = sig + (** Read only *) + type e +(** Write only *) + type w +(** [w] must be a monoid *) + val wunit : w + val wprod : w -> w -> w +(** Read-write *) + type s +(** Update-only. Essentially a writer on [u->u]. *) + type u +(** [u] must be pointed. *) + val uunit : u +end + +module Logical (P:Param) = struct + type state = { + e: P.e; + w: P.w; + s: P.s; + u: P.u; + } + + type _ t = + | Ignore : _ t -> unit t + | Return : 'a -> 'a t + | Bind : 'a t * ('a -> 'b t) -> 'b t + | Map : 'a t * ('a -> 'b) -> 'b t + | Get : P.s t + | Set : P.s -> unit t + | Modify : (P.s -> P.s) -> unit t + | Put : P.w -> unit t + | Current : P.e t + | Local : P.e * 'a t -> 'a t (* local bind *) + | Update : (P.u -> P.u) -> unit t + | Zero : exn -> 'a t + | WithState : state * 'a t -> 'a t (* use other state *) + | Plus : 'a t * (exn -> 'a t ) -> 'a t + | Split : 'a t -> ('a, exn -> 'a t) list_view t + | Once : 'a t -> 'a t (* keep at most one element *) + | Break : (exn -> exn option) * 'a t -> 'a t + + let return x = Return x + let (>>=) x f = Bind (x, f) + let map f x = Map (x, f) + let ignore x = Ignore x + let set x = Set x + let get = Get + let modify f = Modify f + let put x = Put x + let current = Current + let local x y = Local (x, y) + let update f = Update f + let zero e = Zero e + let with_state st x = WithState (st, x) + let plus a f = Plus (a, f) + let split x = Split x + let once x = Once x + let break f x = Break (f, x) + + type 'a reified = + | RNil of exn + | RCons of 'a * (exn -> 'a reified) + + let repr r () = match r with + | RNil e -> Nil e + | RCons (x, f) -> Cons (x, f) + + let cons x cont = Cons (x, cont) + let nil e = Nil e + + let rcons x cont = RCons (x, cont) + let rnil e = RNil e + + type 'a splitted = (('a * state), exn -> 'a t) list_view + + let rec run_rec + : type a. state -> a t -> a splitted + = fun st t -> match t with + | Return x -> cons (x, st) zero + | Ignore x -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((_, st), cont) -> cons ((), st) (fun e -> Ignore (cont e)) + end + | Bind (x,f) -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st_x), cont) -> + let y = f x in + run_rec st_x (plus y (fun e -> with_state st (cont e >>= f))) + end + | Map (x,f) -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st), cont) -> + cons (f x, st) (fun e -> map f (cont e)) + end + | Get -> cons (st.s, st) zero + | Set s -> cons ((), {st with s}) zero + | Modify f -> + let st = {st with s = f st.s} in + cons ((), st) zero + | Put w -> cons ((), {st with w}) zero + | Current -> cons (st.e, st) zero + | Local (e,x) -> + (* bind [st.e = e] in [x] *) + let st' = {st with e} in + run_rec st' x + | Update f -> + let st = {st with u=f st.u} in + cons ((), st) zero + | WithState (st', x) -> run_rec st' x (* ignore [st] *) + | Zero e -> Nil e (* failure *) + | Plus (x,cont) -> + begin match run_rec st x with + | Nil e -> run_rec st (cont e) + | Cons ((x, st), cont') -> + cons (x, st) (fun e -> plus (cont' e) cont) + end + | Split x -> + begin match run_rec st x with + | Nil e -> cons (Nil e, st) zero + | Cons ((x, st'), cont) -> cons (cons x cont, st') zero + end + | Once x -> + begin match run_rec st x with + | Nil e -> Nil e + | Cons ((x, st), _) -> cons (x, st) zero + end + | Break (f,x) -> assert false (* TODO: ? *) + + let run t e s = + let state = {e; s; u=P.uunit; w=P.wunit} in + let rec run_list + : type a. state -> a t -> (a * state) reified + = fun state t -> match run_rec state t with + | Nil e -> rnil e + | Cons ((x, st), cont) -> + rcons (x, st) (fun e -> run_list state (cont e)) + in + run_list state t +end + diff --git a/src/misc/backtrack.mli b/src/misc/backtrack.mli new file mode 100644 index 00000000..321bb444 --- /dev/null +++ b/src/misc/backtrack.mli @@ -0,0 +1,80 @@ + +module type MONAD = sig + type 'a t + val return : 'a -> 'a t + val (>>=) : 'a t -> ('a -> 'b t) -> 'b t +end + +(** Taken from Coq "logic_monad.mli" *) + +module NonLogical : sig + type 'a t = unit -> 'a + include MONAD with type 'a t := 'a t +end + +(** {6 Logical layer} *) +(** The logical monad is a backtracking monad on top of which is + layered a state monad (which is used to implement all of read/write, + read only, and write only effects). The state monad being layered on + top of the backtracking monad makes it so that the state is + backtracked on failure. + Backtracking differs from regular exception in that, writing (+) + for exception catching and (>>=) for bind, we require the + following extra distributivity laws: + x+(y+z) = (x+y)+z + zero+x = x + x+zero = x + (x+y)>>=k = (x>>=k)+(y>>=k) *) +(** A view type for the logical monad, which is a form of list, hence + we can decompose it with as a list. *) +type ('a, 'b) list_view = + | Nil of exn + | Cons of 'a * 'b + +(** The monad is parametrised in the types of state, environment and + writer. *) +module type Param = sig + (** Read only *) + type e +(** Write only *) + type w +(** [w] must be a monoid *) + val wunit : w + val wprod : w -> w -> w +(** Read-write *) + type s +(** Update-only. Essentially a writer on [u->u]. *) + type u +(** [u] must be pointed. *) + val uunit : u +end + +module Logical (P:Param) : sig + include MONAD + val map : ('a -> 'b) -> 'a t -> 'b t + val ignore : 'a t -> unit t + val set : P.s -> unit t + val get : P.s t + val modify : (P.s -> P.s) -> unit t + val put : P.w -> unit t + val current : P.e t + val local : P.e -> 'a t -> 'a t + val update : (P.u -> P.u) -> unit t + val zero : exn -> 'a t + val plus : 'a t -> (exn -> 'a t) -> 'a t + val split : 'a t -> (('a,(exn->'a t)) list_view) t + val once : 'a t -> 'a t + val break : (exn -> exn option) -> 'a t -> 'a t + (* val lift : 'a NonLogical.t -> 'a t *) + type 'a reified + + type state = { + e: P.e; + w: P.w; + s: P.s; + u: P.u; + } + + val repr : 'a reified -> ('a, exn -> 'a reified) list_view NonLogical.t + val run : 'a t -> P.e -> P.s -> ('a * state) reified +end From 1d2780794f57a08e3a571288a1525be16ad2b217 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 30 Mar 2015 23:41:10 +0200 Subject: [PATCH 37/46] wip: a few simplifications in backtrack --- src/misc/backtrack.ml | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/misc/backtrack.ml b/src/misc/backtrack.ml index 61db5f54..ccb25a85 100644 --- a/src/misc/backtrack.ml +++ b/src/misc/backtrack.ml @@ -61,9 +61,19 @@ module Logical (P:Param) = struct | Break : (exn -> exn option) * 'a t -> 'a t let return x = Return x + let (>>=) x f = Bind (x, f) - let map f x = Map (x, f) - let ignore x = Ignore x + + let map f x = match x with + | Return x -> return (f x) + | Map (y, g) -> Map (y, fun x -> f (g x)) + | _ -> Map (x, f) + + let rec ignore : type a. a t -> unit t = function + | Return _ -> Return () + | Map (x, _) -> ignore x + | x -> Ignore x + let set x = Set x let get = Get let modify f = Modify f @@ -73,9 +83,20 @@ module Logical (P:Param) = struct let update f = Update f let zero e = Zero e let with_state st x = WithState (st, x) - let plus a f = Plus (a, f) + + let rec plus a f = match a with + | Zero e -> f e + | Plus (a1, f1) -> + plus a1 (fun e -> plus (f1 e) f) + let split x = Split x - let once x = Once x + + let rec once : type a. a t -> a t = function + | Zero e -> Zero e + | Return x -> Return x + | Map (x, f) -> map f (once x) + | x -> Once x + let break f x = Break (f, x) type 'a reified = From ceb68ee15fd60a61ae09993f5e793826c19a0d62 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 31 Mar 2015 00:12:50 +0200 Subject: [PATCH 38/46] small fixes --- _oasis | 2 +- benchs/run_benchs.ml | 1 + src/lwt/lwt_pipe.mli | 12 ++++++------ src/misc/backtrack.ml | 1 + 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/_oasis b/_oasis index 3b4093d6..7c00af27 100644 --- a/_oasis +++ b/_oasis @@ -201,7 +201,7 @@ Executable run_qtest Install: false CompiledObject: best MainIs: run_qtest.ml - Build$: flag(tests) && flag(bigarray) && flag(unix) + Build$: flag(tests) && flag(bigarray) && flag(unix) && flag(advanced) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, containers.bigarray, containers.unix, diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 5fa29837..257b092c 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -4,6 +4,7 @@ module B = Benchmark let (@>) = B.Tree.(@>) let (@>>) = B.Tree.(@>>) let (@>>>) = B.Tree.(@>>>) +let (|>) = CCFun.(|>) let app_int f n = string_of_int n @> lazy (f n) let app_ints f l = B.Tree.concat (List.map (app_int f) l) diff --git a/src/lwt/lwt_pipe.mli b/src/lwt/lwt_pipe.mli index 46702c78..fce6de12 100644 --- a/src/lwt/lwt_pipe.mli +++ b/src/lwt/lwt_pipe.mli @@ -74,20 +74,20 @@ type ('a, +'perm) t constraint 'perm = [< `r | `w] type ('a, 'perm) pipe = ('a, 'perm) t -val keep : _ t -> unit Lwt.t -> unit +val keep : (_,_) t -> unit Lwt.t -> unit (** [keep p fut] adds a pointer from [p] to [fut] so that [fut] is not garbage-collected before [p] *) -val is_closed : _ t -> bool +val is_closed : (_,_) t -> bool -val close : _ t -> unit Lwt.t +val close : (_,_) t -> unit Lwt.t (** [close p] closes [p], which will not accept input anymore. This sends [`End] to all readers connected to [p] *) -val close_async : _ t -> unit +val close_async : (_,_) t -> unit (** Same as {!close} but closes in the background *) -val wait : _ t -> unit Lwt.t +val wait : (_,_) t -> unit Lwt.t (** Evaluates once the pipe closes *) val create : ?max_size:int -> unit -> ('a, 'perm) t @@ -101,7 +101,7 @@ val connect : ?ownership:[`None | `InOwnsOut | `OutOwnsIn] -> @param own determines which pipes owns which (the owner, when it closes, also closes the ownee) *) -val link_close : _ t -> after:_ t -> unit +val link_close : (_,_) t -> after:(_,_) t -> unit (** [link_close p ~after] will close [p] when [after] closes. if [after] is closed already, closes [p] immediately *) diff --git a/src/misc/backtrack.ml b/src/misc/backtrack.ml index ccb25a85..db0d06d9 100644 --- a/src/misc/backtrack.ml +++ b/src/misc/backtrack.ml @@ -88,6 +88,7 @@ module Logical (P:Param) = struct | Zero e -> f e | Plus (a1, f1) -> plus a1 (fun e -> plus (f1 e) f) + | _ -> Plus (a, f) let split x = Split x From 0b38c9e2724650b78950445247e807835382f173 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 31 Mar 2015 00:40:18 +0200 Subject: [PATCH 39/46] fix in oasis --- _oasis | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_oasis b/_oasis index 7c00af27..63b91e3d 100644 --- a/_oasis +++ b/_oasis @@ -201,7 +201,7 @@ Executable run_qtest Install: false CompiledObject: best MainIs: run_qtest.ml - Build$: flag(tests) && flag(bigarray) && flag(unix) && flag(advanced) + Build$: flag(tests) && flag(misc) && flag(bigarray) && flag(unix) && flag(advanced) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, containers.bigarray, containers.unix, @@ -229,7 +229,7 @@ Executable run_tests Test all Command: make test-all TestTools: run_tests, run_qtest - Run$: flag(tests) && flag(misc) + Run$: flag(tests) && flag(misc) && flag(unix) && flag(advanced) && flag(bigarray) Test lwt Command: echo "test lwt"; ./run_qtest_lwt.native From ef92ef19fd7a068ef1b8cad293eb589a56e1399e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 31 Mar 2015 15:24:10 +0200 Subject: [PATCH 40/46] add `CCString.{lines,unlines}` that work on lines --- src/core/CCString.cppo.ml | 8 ++++++-- src/core/CCString.mli | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 6f64b4e1..36ed8936 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -263,7 +263,9 @@ let of_array a = let to_array s = Array.init (String.length s) (fun i -> s.[i]) -let lines s = Split.gen_cpy ~by:"\n" s +let lines_gen s = Split.gen_cpy ~by:"\n" s + +let lines s = Split.list_cpy ~by:"\n" s let concat_gen ~sep g = let b = Buffer.create 256 in @@ -275,7 +277,9 @@ let concat_gen ~sep g = aux ~first:false () in aux ~first:true () -let unlines g = concat_gen ~sep:"\n" g +let unlines l = String.concat "\n" l + +let unlines_gen g = concat_gen ~sep:"\n" g let pp buf s = Buffer.add_char buf '"'; diff --git a/src/core/CCString.mli b/src/core/CCString.mli index ad0e0317..b18df249 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -113,16 +113,24 @@ val suffix : suf:string -> string -> bool not (suffix ~suf:"abcd" "cd") *) -val lines : string -> string gen -(** [lines s] returns a generator of the lines of [s] (splits along '\n') +val lines : string -> string list +(** [lines s] returns a list of the lines of [s] (splits along '\n') + @since NEXT_RELEASE *) + +val lines_gen : string -> string gen +(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') @since NEXT_RELEASE *) val concat_gen : sep:string -> string gen -> string (** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. @since NEXT_RELEASE *) -val unlines : string gen -> string -(** [unlines g] concatenates all strings of [g], separated with '\n' +val unlines : string list -> string +(** [unlines l] concatenates all strings of [l], separated with '\n' + @since NEXT_RELEASE *) + +val unlines_gen : string gen -> string +(** [unlines_gen g] concatenates all strings of [g], separated with '\n' @since NEXT_RELEASE *) (*$Q From e22c29c27f9fc51e9b0c58ed844041937cca9aad Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 31 Mar 2015 15:52:30 +0200 Subject: [PATCH 41/46] doc --- README.md | 6 +++++- _oasis | 4 ++-- doc/intro.txt | 6 ++++++ src/core/CCList.mli | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2dc0a6ed..de8f5e25 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ See [this file](https://github.com/c-cube/ocaml-containers/blob/master/CHANGELOG ## Finding help - the [github wiki](https://github.com/c-cube/ocaml-containers/wiki) -- the IRC channel (`##ocaml-containers` on Freenode) +- on IRC, ask `companion_cube` on `#ocaml` ## Use @@ -109,6 +109,10 @@ Documentation [here](http://cedeela.fr/~simon/software/containers). - `CCIO`, basic utilities for IO +### Containers.unix + +- `CCUnix`, utils for `Unix` + ### Containers.sexp A small S-expression library. diff --git a/_oasis b/_oasis index 63b91e3d..0a671601 100644 --- a/_oasis +++ b/_oasis @@ -152,7 +152,7 @@ Document containers Title: Containers docs Type: ocamlbuild (0.3) BuildTools+: ocamldoc - Build$: flag(docs) && flag(advanced) && flag(bigarray) && flag(lwt) && flag(misc) + Build$: flag(docs) && flag(advanced) && flag(bigarray) && flag(lwt) && flag(misc) && flag(unix) Install: true XOCamlbuildPath: . XOCamlbuildExtraArgs: @@ -160,7 +160,7 @@ Document containers XOCamlbuildLibraries: containers, containers.misc, containers.iter, containers.data, containers.string, containers.bigarray, - containers.advanced, containers.io, containers.sexp, + containers.advanced, containers.io, containers.unix, containers.sexp, containers.lwt Executable run_benchs diff --git a/doc/intro.txt b/doc/intro.txt index 3db1fe94..42f0f3c6 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -83,6 +83,12 @@ Helpers to perform simple IO (mostly on files) and iterate on channels. {!modules: CCIO} +{4 Containers.unix} + +Helpers that depend on {!Unix}, e.g. to spawn sub-processes. + +{!modules: CCUnix} + {4 Containers.sexp} A small S-expression library. The interface is relatively unstable, but diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 5fd0eb7a..75f29f13 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -132,7 +132,7 @@ val uniq_succ : ?eq:('a -> 'a -> bool) -> 'a list -> 'a list (** [uniq_succ l] removes duplicate elements that occur one next to the other. Examples: [uniq_succ [1;2;1] = [1;2;1]] - [uniq_succ [1;1;2] = [1;2] + [uniq_succ [1;1;2] = [1;2]] @since NEXT_RELEASE *) (** {2 Indices} *) From c8005f388a3dd94bad0d096dcabbe4168f90f3f8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 31 Mar 2015 17:18:17 +0200 Subject: [PATCH 42/46] a few todos --- src/misc/backtrack.ml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/misc/backtrack.ml b/src/misc/backtrack.ml index db0d06d9..d6562db0 100644 --- a/src/misc/backtrack.ml +++ b/src/misc/backtrack.ml @@ -114,6 +114,8 @@ module Logical (P:Param) = struct let rcons x cont = RCons (x, cont) let rnil e = RNil e + (* TODO: maybe (('a * state), exn -> state -> 'a t) list_view is better + for bind and local? *) type 'a splitted = (('a * state), exn -> 'a t) list_view let rec run_rec @@ -146,9 +148,14 @@ module Logical (P:Param) = struct | Put w -> cons ((), {st with w}) zero | Current -> cons (st.e, st) zero | Local (e,x) -> - (* bind [st.e = e] in [x] *) + (* bind [st.e = e] in [x], then restore old [e] in each result *) + let old_e = st.e in let st' = {st with e} in - run_rec st' x + begin match run_rec st' x with + | Nil e -> Nil e + | Cons ((x, st''), cont) -> + cons (x, {st'' with e=old_e}) (fun e -> assert false) (* TODO: restore old_e*) + end | Update f -> let st = {st with u=f st.u} in cons ((), st) zero From d0c270504a8ae8d299a4d2dd8833b4f109637da0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 6 Apr 2015 19:50:02 +0200 Subject: [PATCH 43/46] update benchmark: add CCIntMap to tbl bench --- benchs/run_benchs.ml | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 257b092c..c65165bf 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -235,6 +235,13 @@ module Tbl = struct done; !h + let intmap_add n = + let h = ref CCIntMap.empty in + for i = n downto 0 do + h := CCIntMap.add i i !h; + done; + !h + let icchashtbl_add n = let h = ICCHashtbl.create 50 in for i = n downto 0 do @@ -249,6 +256,7 @@ module Tbl = struct "ihashtbl_add", (fun n -> ignore (ihashtbl_add n)), n; "ipersistenthashtbl_add", (fun n -> ignore (ipersistenthashtbl_add n)), n; "imap_add", (fun n -> ignore (imap_add n)), n; + "intmap_add", (fun n -> ignore (intmap_add n)), n; "ccflathashtbl_add", (fun n -> ignore (icchashtbl_add n)), n; ] @@ -302,6 +310,16 @@ module Tbl = struct done; !h + let intmap_replace n = + let h = ref CCIntMap.empty in + for i = 0 to n do + h := CCIntMap.add i i !h; + done; + for i = n downto 0 do + h := CCIntMap.add i i !h; + done; + !h + let icchashtbl_replace n = let h = ICCHashtbl.create 50 in for i = 0 to n do @@ -319,11 +337,10 @@ module Tbl = struct "ihashtbl_replace", (fun n -> ignore (ihashtbl_replace n)), n; "ipersistenthashtbl_replace", (fun n -> ignore (ipersistenthashtbl_replace n)), n; "imap_replace", (fun n -> ignore (imap_replace n)), n; + "intmap_replace", (fun n -> ignore (intmap_replace n)), n; "ccflathashtbl_replace", (fun n -> ignore (icchashtbl_replace n)), n; ] - let my_len = 250 - let phashtbl_find h = fun n -> for i = 0 to n-1 do @@ -366,6 +383,12 @@ module Tbl = struct ignore (IMap.find i m); done + let intmap_find m = + fun n -> + for i = 0 to n-1 do + ignore (CCIntMap.find i m); + done + let icchashtbl_find m = fun n -> for i = 0 to n-1 do @@ -380,6 +403,7 @@ module Tbl = struct let a = Array.init n string_of_int in let pa = CCPersistentArray.init n string_of_int in let m = imap_add n in + let m' = intmap_add n in let h'''''' = icchashtbl_add n in B.throughputN 3 [ "phashtbl_find", (fun () -> phashtbl_find h n), (); @@ -389,6 +413,7 @@ module Tbl = struct "array_find", (fun () -> array_find a n), (); "persistent_array_find", (fun () -> persistent_array_find pa n), (); "imap_find", (fun () -> imap_find m n), (); + "intmap_find", (fun () -> intmap_find m' n), (); "cchashtbl_find", (fun () -> icchashtbl_find h'''''' n), (); ] From c71595f691b952f31abdbbd83ffcc6e93410239c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 10 Apr 2015 16:29:27 +0200 Subject: [PATCH 44/46] add a few functions to CCPersistentArray --- src/data/CCPersistentArray.ml | 10 ++++++++++ src/data/CCPersistentArray.mli | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/data/CCPersistentArray.ml b/src/data/CCPersistentArray.ml index f855f556..f674cc22 100644 --- a/src/data/CCPersistentArray.ml +++ b/src/data/CCPersistentArray.ml @@ -80,3 +80,13 @@ let of_array a = init (Array.length a) (fun i -> a.(i)) let to_list t = Array.to_list (reroot t) let of_list l = ref (Array (Array.of_list l)) +type 'a sequence = ('a -> unit) -> unit + +let to_seq a yield = iter yield a + +let of_seq seq = + let l = ref [] in + seq (fun x -> l := x :: !l); + of_list (List.rev !l) + + diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli index 914f5e95..36e5c8ac 100644 --- a/src/data/CCPersistentArray.mli +++ b/src/data/CCPersistentArray.mli @@ -96,4 +96,10 @@ val to_list : 'a t -> 'a list val of_list : 'a list -> 'a t (** [of_list l] returns a fresh persistent array containing the elements of [l]. *) +type 'a sequence = ('a -> unit) -> unit + +val to_seq : 'a t -> 'a sequence + +val of_seq : 'a sequence -> 'a t + From 136df67606ebb1a797ff4c407b04cf6dd5570514 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 10 Apr 2015 16:30:09 +0200 Subject: [PATCH 45/46] documentation stuff --- src/data/CCIntMap.mli | 5 ++++- src/misc/backtrack.mli | 8 ++++++++ src/unix/CCUnix.mli | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/data/CCIntMap.mli b/src/data/CCIntMap.mli index 4caf0d14..cd2afb00 100644 --- a/src/data/CCIntMap.mli +++ b/src/data/CCIntMap.mli @@ -24,7 +24,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Map specialized for Int keys} *) +(** {1 Map specialized for Int keys} + +{b status: unstable} +@since NEXT_RELEASE *) type 'a t diff --git a/src/misc/backtrack.mli b/src/misc/backtrack.mli index 321bb444..1efba86d 100644 --- a/src/misc/backtrack.mli +++ b/src/misc/backtrack.mli @@ -1,4 +1,12 @@ +(** {1 Experiment with Backtracking Monad} + +Playing stuff, don't use (yet?). + +{b status: experimental} +@since NEXT_RELEASE +*) + module type MONAD = sig type 'a t val return : 'a -> 'a t diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli index 59c9958a..507cf1ca 100644 --- a/src/unix/CCUnix.mli +++ b/src/unix/CCUnix.mli @@ -28,6 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Some useful functions built on top of Unix. +{b status: unstable} @since NEXT_RELEASE *) type 'a or_error = [`Ok of 'a | `Error of string] From 0efc34217717ca5ef63225629a71eaf919f28672 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 10 Apr 2015 16:40:45 +0200 Subject: [PATCH 46/46] preparing version 0.10 --- CHANGELOG.md | 25 ++++++++++++++++++++----- HOWTO.md | 2 +- _oasis | 2 +- src/core/CCList.mli | 4 ++-- src/core/CCString.mli | 10 +++++----- src/core/containers.ml | 2 +- src/data/CCFQueue.mli | 4 ++-- src/data/CCIntMap.mli | 2 +- src/data/CCPersistentArray.mli | 2 +- src/misc/backtrack.mli | 2 +- src/string/app_parse.mli | 2 +- src/unix/CCUnix.mli | 2 +- 12 files changed, 37 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c05947a4..d55ea6b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## 0.10 + +- add `containers_misc.Puf.iter` +- add `CCString.{lines,unlines,concat_gen}` +- `CCUnix` (with a small subprocess API) +- add `CCList.{sorted_merge_uniq, uniq_succ}` +- breaking: fix documentation of `CCList.sorted_merge` (different semantics) +- `CCPersistentArray` (credit to @gbury and Jean-Christophe Filliâtre) +- `CCIntMap` (big-endian patricia trees) in containers.data +- bugfix in `CCFQueue.add_seq_front` +- add `CCFQueue.{rev, --}` +- add `App_parse` in `containers_string`, experimental applicative parser combinators +- remove `containers.pervasives`, add the module `Containers` to core +- bugfix in `CCFormat.to_file` + ## 0.9 - add `Float`, `Ref`, `Set`, `Format` to `CCPervasives` @@ -178,7 +193,7 @@ - renamed threads/future to threads/CCFuture - big upgrade of `RAL` (random access lists) - `CCList.Ref` to help use references on lists -- `CCKList`: group,uniq,sort,sort_uniq,repeat and cycle, infix ops, applicative,product +- `CCKList`: `group,uniq,sort,sort_uniq,repeat` and `cycle`, infix ops, applicative,product - `CCTrie.above/below`: ranges of items - more functions in `CCPair` - `CCCat`: funny (though useless) definitions inspired from Haskell @@ -192,7 +207,7 @@ - conversions for `CCString` - `CCHashtbl`: open-addressing table (Robin-Hood hashing) - registered printers for `CCError`.guard,wrap1,etc. -- monadic operator in `CCList`: map_m_par +- monadic operator in `CCList`: `map_m_par` - simple interface to `PrintBox` now more powerful - constructors for 1 or 2 elements fqueues - bugfixes in BTree (insertion should work now) @@ -206,7 +221,7 @@ - `CCopt.pure` - updated `CCPersistentHashtbl` with new functions; updated doc, simplified code - move `CCString` into core/, since it deals with a basic type; also add some features to `CCString` (Sub and Split modules to deal with slices and splitting by a string) -- `CCArray.blit`, .Sub.to_slice; some bugfixes +- `CCArray.blit`, `.Sub.to_slice`; some bugfixes - applicative and lifting operators for `CCError` - `CCError.map2` - more combinators in `CCError` @@ -219,9 +234,9 @@ - `CCOpt.sequence_l` - mplus instance for `CCOpt` - monad instance for `CCFun` -- updated description in _oasis +- updated description in `_oasis` - `CCTrie`, a compressed functorial persistent trie structure - fix `CCPrint.unit`, add `CCPrint.silent` - fix type mismatch -note: git log --no-merges previous_version..HEAD --pretty=%s +note: `git log --no-merges previous_version..HEAD --pretty=%s` diff --git a/HOWTO.md b/HOWTO.md index 626395e5..34e0cda3 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -1,7 +1,7 @@ ## Make a release -1. `make test-all` +1. `make test` 2. update version in `_oasis` 3. `make update_next_tag` (to update `@since` comments; be careful not to change symlinks) 4. update `CHANGELOG.md` (see its end to find the right git command) diff --git a/_oasis b/_oasis index 0a671601..9d5c8663 100644 --- a/_oasis +++ b/_oasis @@ -1,6 +1,6 @@ OASISFormat: 0.4 Name: containers -Version: 0.9 +Version: 0.10 Homepage: https://github.com/c-cube/ocaml-containers Authors: Simon Cruanes License: BSD-2-clause diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 75f29f13..6021cf9d 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -126,14 +126,14 @@ val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list val sorted_merge_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list -> 'a list (** [sorted_merge_uniq l1 l2] merges the sorted lists [l1] and [l2] and removes duplicates - @since NEXT_RELEASE *) + @since 0.10 *) val uniq_succ : ?eq:('a -> 'a -> bool) -> 'a list -> 'a list (** [uniq_succ l] removes duplicate elements that occur one next to the other. Examples: [uniq_succ [1;2;1] = [1;2;1]] [uniq_succ [1;1;2] = [1;2]] - @since NEXT_RELEASE *) + @since 0.10 *) (** {2 Indices} *) diff --git a/src/core/CCString.mli b/src/core/CCString.mli index b18df249..fcfc32db 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -115,23 +115,23 @@ val suffix : suf:string -> string -> bool val lines : string -> string list (** [lines s] returns a list of the lines of [s] (splits along '\n') - @since NEXT_RELEASE *) + @since 0.10 *) val lines_gen : string -> string gen (** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') - @since NEXT_RELEASE *) + @since 0.10 *) val concat_gen : sep:string -> string gen -> string (** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. - @since NEXT_RELEASE *) + @since 0.10 *) val unlines : string list -> string (** [unlines l] concatenates all strings of [l], separated with '\n' - @since NEXT_RELEASE *) + @since 0.10 *) val unlines_gen : string gen -> string (** [unlines_gen g] concatenates all strings of [g], separated with '\n' - @since NEXT_RELEASE *) + @since 0.10 *) (*$Q Q.printable_string (fun s -> unlines (lines s) = s) diff --git a/src/core/containers.ml b/src/core/containers.ml index efb293b9..d1c862c3 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -44,7 +44,7 @@ Changed [Opt] to [Option] to better reflect that this module is about the Renamed from [CCPervasives] in [containers.pervasives], to [Containers] in the core library [containers] -@since NEXT_RELEASE +@since 0.10 *) module Array = struct diff --git a/src/data/CCFQueue.mli b/src/data/CCFQueue.mli index 6a112b7b..aac4a484 100644 --- a/src/data/CCFQueue.mli +++ b/src/data/CCFQueue.mli @@ -112,7 +112,7 @@ val append : 'a t -> 'a t -> 'a t val rev : 'a t -> 'a t (** Reverse the queue, O(n) complexity - @since NEXT_RELEASE *) + @since 0.10 *) val map : ('a -> 'b) -> 'a t -> 'b t (** Map values *) @@ -146,5 +146,5 @@ val of_klist : 'a klist -> 'a t val (--) : int -> int -> int t (** [a -- b] is the integer range from [a] to [b], both included. - @since NEXT_RELEASE *) + @since 0.10 *) diff --git a/src/data/CCIntMap.mli b/src/data/CCIntMap.mli index cd2afb00..61a78c00 100644 --- a/src/data/CCIntMap.mli +++ b/src/data/CCIntMap.mli @@ -27,7 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Map specialized for Int keys} {b status: unstable} -@since NEXT_RELEASE *) +@since 0.10 *) type 'a t diff --git a/src/data/CCPersistentArray.mli b/src/data/CCPersistentArray.mli index 36e5c8ac..ae0bebfd 100644 --- a/src/data/CCPersistentArray.mli +++ b/src/data/CCPersistentArray.mli @@ -30,7 +30,7 @@ From the paper by Jean-Christophe Filliâtre, "A persistent Union-Find data structure", see {{: https://www.lri.fr/~filliatr/ftp/publis/puf-wml07.ps} the ps version} -@since NEXT_RELEASE *) +@since 0.10 *) type 'a t (** The type of persistent arrays *) diff --git a/src/misc/backtrack.mli b/src/misc/backtrack.mli index 1efba86d..c74ccf52 100644 --- a/src/misc/backtrack.mli +++ b/src/misc/backtrack.mli @@ -4,7 +4,7 @@ Playing stuff, don't use (yet?). {b status: experimental} -@since NEXT_RELEASE +@since 0.10 *) module type MONAD = sig diff --git a/src/string/app_parse.mli b/src/string/app_parse.mli index 8e8cab4c..f4c9ce1a 100644 --- a/src/string/app_parse.mli +++ b/src/string/app_parse.mli @@ -52,7 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ]} {b status: experimental} -@since NEXT_RELEASE +@since 0.10 *) type ('a,'b) result = [`Error of 'b | `Ok of 'a] diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli index 507cf1ca..e1e75ba7 100644 --- a/src/unix/CCUnix.mli +++ b/src/unix/CCUnix.mli @@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Some useful functions built on top of Unix. {b status: unstable} -@since NEXT_RELEASE *) +@since 0.10 *) type 'a or_error = [`Ok of 'a | `Error of string] type 'a gen = unit -> 'a option