diff --git a/.ocamlinit b/.ocamlinit index f2acb85d..f477d71c 100644 --- a/.ocamlinit +++ b/.ocamlinit @@ -2,8 +2,10 @@ #thread #directory "_build/core";; #directory "_build/misc";; +#directory "_build/string";; #directory "_build/tests/";; #load "containers.cma";; +#load "containers_string.cma";; #load "containers_misc.cma";; #require "threads";; #load "containers_thread.cma";; diff --git a/Makefile b/Makefile index 90956761..3a121ca2 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,7 @@ DONTTEST=myocamlbuild.ml setup.ml QTESTABLE=$(filter-out $(DONTTEST), \ $(wildcard core/*.ml) $(wildcard core/*.mli) \ $(wildcard misc/*.ml) $(wildcard misc/*.mli) \ + $(wildcard string/*.ml) $(wildcard string/*.mli) \ ) qtest-clean: @@ -62,7 +63,9 @@ qtest-clean: qtest: qtest-clean build @mkdir -p qtest @qtest extract -o qtest/qtest_all.ml $(QTESTABLE) 2> /dev/null - @ocamlbuild $(OPTIONS) -pkg oUnit,QTest2Lib -I core -I misc qtest/qtest_all.native + @ocamlbuild $(OPTIONS) -pkg oUnit,QTest2Lib \ + -I core -I misc -I string \ + qtest/qtest_all.native @echo ./qtest_all.native diff --git a/_oasis b/_oasis index 255b7395..982f3d63 100644 --- a/_oasis +++ b/_oasis @@ -46,7 +46,7 @@ Library "containers" Library "containers_string" Path: string Pack: true - Modules: KMP + Modules: KMP, CCString, Levenshtein FindlibName: string FindlibParent: containers @@ -59,7 +59,7 @@ Library "containers_misc" Bij, PiCalculus, Bencode, Sexp, RAL, UnionFind, SmallSet, AbsSet, CSM, ActionMan, QCheck, BencodeOnDisk, TTree, - HGraph, Automaton, Conv, Levenshtein, Bidir, Iteratee, + HGraph, Automaton, Conv, Bidir, Iteratee, Ty, Tell, BencodeStream, RatTerm, Cause BuildDepends: unix,containers FindlibName: misc @@ -102,7 +102,15 @@ Document containers BuildTools+: ocamldoc Install: true XOCamlbuildPath: . - XOCamlbuildLibraries: containers + XOCamlbuildLibraries: containers,containers.string + +Document containers_misc + Title: Containers_misc docs + Type: ocamlbuild (0.3) + BuildTools+: ocamldoc + Install: true + XOCamlbuildPath: . + XOCamlbuildLibraries: containers.misc Executable benchs Path: tests/ @@ -110,7 +118,7 @@ Executable benchs CompiledObject: native Build$: flag(bench) MainIs: benchs.ml - BuildDepends: containers,containers.misc,bench + BuildDepends: containers,containers.string,containers.misc,bench Executable bench_conv Path: tests/ diff --git a/_tags b/_tags index 988b9a40..94f7bebc 100644 --- a/_tags +++ b/_tags @@ -125,3 +125,4 @@ # OASIS_STOP : thread : thread +<**/*.ml>: warn_K, warn_Y, warn_X diff --git a/core/CCFun.ml b/core/CCFun.ml index ba6173d5..2a675b42 100644 --- a/core/CCFun.ml +++ b/core/CCFun.ml @@ -37,3 +37,12 @@ let (%) f g x = f (g x) let lexicographic f1 f2 x y = let c = f1 x y in if c <> 0 then c else f2 x y + +let finally ~h ~f = + try + let x = f () in + h (); + x + with e -> + h (); + raise e diff --git a/core/CCFun.mli b/core/CCFun.mli index aa0010e1..dc3085a5 100644 --- a/core/CCFun.mli +++ b/core/CCFun.mli @@ -40,3 +40,8 @@ val (%) : ('b -> 'c) -> ('a -> 'b) -> 'a -> 'c val lexicographic : ('a -> 'a -> int) -> ('a -> 'a -> int) -> 'a -> 'a -> int (** Lexicographic combination of comparison functions *) + +val finally : h:(unit -> unit) -> f:(unit -> 'a) -> 'a + (** [finally h f] calls [f ()] and returns its result. If it raises, the + same exception is raised; in {b any} case, [h ()] is called after + [f ()] terminates. *) diff --git a/string/CCString.ml b/string/CCString.ml new file mode 100644 index 00000000..e21225a1 --- /dev/null +++ b/string/CCString.ml @@ -0,0 +1,144 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Basic String Utils} *) + +type t = string + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit + +let is_sub ~sub i s j = + let rec check k = + if i + k = String.length sub + then true + else sub.[i + k] = s.[j+k] && check (k+1) + in + check 0 + +(* note: quite inefficient if [by] is long *) +let split_gen ~by s = + let len_by = String.length by in + assert (len_by > 0); + let n = String.length s in + let prev = ref 0 in + let stop = ref false in + let rec search i = + if !stop then None + else if i >= n + then ( + stop := true; + Some (String.sub s !prev (n- !prev)) (* done *) + ) + else if is_prefix i 0 + then ( + let p = !prev in + prev := i+len_by; + Some (String.sub s p (i-p)) + ) + else search (i+1) + and is_prefix i j = + if j = len_by + then true + else if i = n + then false + else s.[i] = by.[j] && is_prefix (i+1) (j+1) + in + fun () -> + search !prev + +let split_seq ~by s k = + let rec aux g = match g () with + | None -> () + | Some x -> k x; aux g + in aux (split_gen ~by s) + +let split ~by s = + let rec aux g acc = match g () with + | None -> List.rev acc + | Some x -> aux g (x::acc) + in aux (split_gen ~by s) [] + +(*$T + split ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] + split ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""] +*) + +(* note: inefficient *) +let find ?(start=0) ~sub s = + let n = String.length sub in + let i = ref start in + try + while !i + n < String.length s do + if is_sub ~sub 0 s !i then raise Exit; + incr i + done; + -1 + with Exit -> + !i + +let repeat s n = + assert (n>=0); + let len = String.length s in + assert(len > 0); + let buf = String.create (len * n) in + for i = 0 to n-1 do + String.blit s 0 buf (i * len) len; + done; + buf + +let prefix ~pre s = + String.length pre <= String.length s && + (let i = ref 0 in + while !i < String.length pre && s.[!i] = pre.[!i] do incr i done; + !i = String.length pre) + + +let to_gen s = + let i = ref 0 in + fun () -> + if !i = String.length s then None + else ( + let c = String.unsafe_get s !i in + incr i; + Some c + ) + +let of_gen g = + let b = Buffer.create 32 in + let rec aux () = match g () with + | None -> Buffer.contents b + | Some c -> Buffer.add_char b c; aux () + in aux () + +let to_seq s k = String.iter k s + +let of_seq seq = + let b= Buffer.create 32 in + seq (Buffer.add_char b); + Buffer.contents b + +let pp = Buffer.add_string diff --git a/string/CCString.mli b/string/CCString.mli new file mode 100644 index 00000000..a3938e1b --- /dev/null +++ b/string/CCString.mli @@ -0,0 +1,64 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Basic String Utils} +Consider using KMP instead. *) + +type t = string + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit + +val is_sub : sub:t -> int -> t -> int -> bool +(** [is_sub ~sub i s j] returns [true] iff [sub] is a substring of [s] starting + at position [j] *) + +val split : by:t -> t -> t list +(** split the given string along the given separator [by]. Should only + be used with very small separators, otherwise use {!KMP}. + @raise Failure if [by = ""] *) + +val split_gen : by:t -> t -> t gen + +val split_seq : by:t -> t -> t sequence + +val find : ?start:int -> sub:t -> t -> int +(** Find [sub] in the string, returns its first index or -1. + Should only be used with very small [sub] *) + +val repeat : t -> int -> t +(** The same string, repeated n times *) + +val prefix : pre:t -> t -> bool +(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *) + +val to_gen : t -> char gen +val of_gen : char gen -> t + +val to_seq : t -> char sequence +val of_seq : char sequence -> t + +val pp : Buffer.t -> t -> unit diff --git a/string/KMP.ml b/string/KMP.ml index a49ce282..092528bb 100644 --- a/string/KMP.ml +++ b/string/KMP.ml @@ -167,7 +167,7 @@ module Make(Str : STRING) = struct seq ~pattern:(compile pattern) s 0 end -module Default = Make(struct +include Make(struct type char_ = char type char = char_ type t = string diff --git a/string/KMP.mli b/string/KMP.mli index f431137a..49ba791c 100644 --- a/string/KMP.mli +++ b/string/KMP.mli @@ -72,4 +72,4 @@ end module Make(Str : STRING) : S with type string = Str.t -module Default : S with type string = string +include S with type string = string diff --git a/misc/levenshtein.ml b/string/levenshtein.ml similarity index 100% rename from misc/levenshtein.ml rename to string/levenshtein.ml diff --git a/misc/levenshtein.mli b/string/levenshtein.mli similarity index 100% rename from misc/levenshtein.mli rename to string/levenshtein.mli