From 9c338f193e76aac9f69a04e1e97cea3f69a73051 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 19:45:38 +0100 Subject: [PATCH 01/30] add `CCString.rev` --- src/core/CCString.cppo.ml | 4 ++++ src/core/CCString.mli | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 0574eab9..1cbd0e73 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -50,6 +50,10 @@ let init n f = let length = String.length +let rev s = + let n = length s in + init n (fun i -> s.[n-i-1]) + let rec _to_list s acc i len = if len=0 then List.rev acc else _to_list s (s.[i]::acc) (i+1) (len-1) diff --git a/src/core/CCString.mli b/src/core/CCString.mli index c036700e..10194703 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -63,6 +63,21 @@ val init : int -> (int -> char) -> string init 0 (fun _ -> assert false) = "" *) +val rev : string -> string +(** [rev s] returns the reverse of [s] + @since NEXT_RELEASE *) + +(*$Q + Q.printable_string (fun s -> s = rev (rev s)) + Q.printable_string (fun s -> length s = length (rev s)) +*) + +(*$= + "abc" (rev "cba") + "" (rev "") + " " (rev " ") +*) + val of_gen : char gen -> string val of_seq : char sequence -> string val of_klist : char klist -> string From 5f188c4f7e2f6a02aaa2ef79f256defd37990961 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 21:09:32 +0100 Subject: [PATCH 02/30] in CCString, use KMP for faster sub-string search; add `find_all{,_l}` --- src/core/CCString.cppo.ml | 151 ++++++++++++++++++++++++++++++++------ src/core/CCString.mli | 46 +++++++++--- 2 files changed, 165 insertions(+), 32 deletions(-) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 1cbd0e73..5bca58d8 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -70,32 +70,135 @@ let is_sub ~sub i s j ~len = if i+len > String.length sub then invalid_arg "CCString.is_sub"; _is_sub ~sub i s j ~len -(* note: inefficient *) -let find ?(start=0) ~sub s = - let n = String.length sub in - let i = ref start in - try - while !i + n <= String.length s do - if _is_sub ~sub 0 s !i ~len:n then raise Exit; - incr i +type _ direction = + | Direct : [`Direct] direction + | Reverse : [`Reverse] direction + +module KMP = struct + type 'a pattern = { + failure : int array; + str : string; + } + (* invariant: [length failure = length str]. + We use a phantom type to avoid mixing the directions. *) + + let pattern_length p = String.length p.str + + (* access the [i]-th element of [s] according to direction [dir] *) + let get_ + : type a. dir:a direction -> string -> int -> char + = fun ~dir -> match dir with + | Direct -> (fun s i -> s.[i]) + | Reverse -> (fun s i -> s.[String.length s - i - 1]) + + let compile_ + : type a. dir:a direction -> string -> a pattern + = fun ~dir str -> + let len = length str in + let get = get_ ~dir in (* how to read elements of the string *) + match len with + | 0 -> {failure=[| |]; str;} + | 1 -> {failure=[| -1 |]; str;} + | _ -> + (* at least 2 elements, the algorithm can work *) + let failure = Array.make len 0 in + (* i: current index in str *) + let i = ref 1 in + (* j: index of candidate substring *) + let j = ref 0 in + while !i < len-1 do + match !j with + | _ when get str !i = get str !j -> + (* substring starting at !j continues matching current char *) + i := !i+1; + j := !j+1; + failure.(!i) <- !j; + | 0 -> + (* back to the beginning *) + i := !i+1; + failure.(!i) <- 0; + | _ -> + (* fallback for the prefix string *) + assert (!j > 0); + j := failure.(!j) + done; + { failure; str; } + + let compile s = compile_ ~dir:Direct s + let rcompile s = compile_ ~dir:Reverse s + + (* proper search function. + [i] index in [s] + [j] index in [pattern] + [len] length of [s] *) + let find_ + : type a. dir:a direction -> pattern:a pattern -> string -> int -> int + = fun ~dir ~pattern s idx -> + let len = length s in + let get = get_ ~dir in + let i = ref idx in + let j = ref 0 in + let pat_len = pattern_length pattern in + while !i < len && !j < pat_len do + let c = get s !i in + let expected = get pattern.str !j in + if c = expected + then ( + (* char matches *) + i := !i + 1; j := !j + 1 + ) else ( + if !j=0 + then (* beginning of the pattern *) + i := !i + 1 + else (* follow the failure link *) + j := pattern.failure.(!j) + ) done; - -1 - with Exit -> - !i + if !j = pat_len + then !i - pat_len + else -1 + + let find ~pattern s i = find_ ~dir:Direct ~pattern s i + + let rfind ~pattern s i = + let i = String.length s - i - 1 in + let res = find_ ~dir:Reverse ~pattern s i in + (* adjust result: first, [res = string.length s - res -1] to convert + back to real indices; then, what we got is actually the position + of the end of the pattern, so we subtract the [length of the pattern -1] + to obtain the real result. *) + if res = ~-1 + then res + else (String.length s - res) - pattern_length pattern +end + +let find ?(start=0) ~sub s = + let pattern = KMP.compile sub in + KMP.find ~pattern s start + +let find_all ?(start=0) ~sub s = + let pattern = KMP.compile sub in + let i = ref start in + fun () -> + let res = KMP.find ~pattern s !i in + if res = ~-1 then None + else ( + i := res + KMP.pattern_length pattern; + Some res + ) + +let find_all_l ?start ~sub s = + let rec aux acc g = match g () with + | None -> List.rev acc + | Some i -> aux (i::acc) g + in + aux [] (find_all ?start ~sub s) let mem ?start ~sub s = find ?start ~sub s >= 0 let rfind ~sub s = - let n = String.length sub in - let i = ref (String.length s - n) in - try - while !i >= 0 do - if _is_sub ~sub 0 s !i ~len:n then raise Exit; - decr i - done; - ~-1 - with Exit -> - !i + let pattern = KMP.rcompile sub in + KMP.rfind ~pattern s (String.length s-1) (* Replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *) let replace_at_ ~pos ~len ~by s = @@ -109,16 +212,18 @@ let replace ?(which=`All) ~sub ~by s = if sub="" then invalid_arg "CCString.replace"; match which with | `Left -> - let i = find ~sub s in + let i = find ~sub s ~start:0 in if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s | `Right -> let i = rfind ~sub s in if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s | `All -> + (* compile search pattern only once *) + let pattern = KMP.compile sub in let b = Buffer.create (String.length s) in let start = ref 0 in while !start < String.length s do - let i = find ~start:!start ~sub s in + let i = KMP.find ~pattern s !start in if i>=0 then ( (* between last and cur occurrences *) Buffer.add_substring b s !start (i- !start); diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 10194703..cfdfe959 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -96,10 +96,32 @@ val find : ?start:int -> sub:string -> string -> int Should only be used with very small [sub] *) (*$= & ~printer:string_of_int - (find ~sub:"bc" "abcd") 1 - (find ~sub:"bc" "abd") ~-1 - (find ~sub:"a" "_a_a_a_") 1 - (find ~sub:"a" ~start:5 "a1a234a") 6 + 1 (find ~sub:"bc" "abcd") + ~-1 (find ~sub:"bc" "abd") + 1 (find ~sub:"a" "_a_a_a_") + 6 (find ~sub:"a" ~start:5 "a1a234a") +*) + +(*$Q & ~count:300 + Q.(pair printable_string printable_string) (fun (s1,s2) -> \ + let i = find ~sub:s2 s1 in \ + i < 0 || String.sub s1 i (length s2) = s2) +*) + +val find_all : ?start:int -> sub:string -> string -> int gen +(** [find_all ~sub s] finds all occurrences of [sub] in [s] + @param start starting position in [s] + @since NEXT_RELEASE *) + +val find_all_l : ?start:int -> sub:string -> string -> int list +(** [find_all ~sub s] finds all occurrences of [sub] in [s] and returns + them in a list + @param start starting position in [s] + @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.(list int) + [1; 6] (find_all_l ~sub:"bc" "abc aabc aab") + [] (find_all_l ~sub:"bc" "abd") *) val mem : ?start:int -> sub:string -> string -> bool @@ -117,11 +139,17 @@ val rfind : sub:string -> string -> int @since 0.12 *) (*$= & ~printer:string_of_int - (rfind ~sub:"bc" "abcd") 1 - (rfind ~sub:"bc" "abd") ~-1 - (rfind ~sub:"a" "_a_a_a_") 5 - (rfind ~sub:"bc" "abcdbcd") 4 - (rfind ~sub:"a" "a1a234a") 6 + 1 (rfind ~sub:"bc" "abcd") + ~-1 (rfind ~sub:"bc" "abd") + 5 (rfind ~sub:"a" "_a_a_a_") + 4 (rfind ~sub:"bc" "abcdbcd") + 6 (rfind ~sub:"a" "a1a234a") +*) + +(*$Q & ~count:300 + Q.(pair printable_string printable_string) (fun (s1,s2) -> \ + let i = rfind ~sub:s2 s1 in \ + i < 0 || String.sub s1 i (length s2) = s2) *) val replace : ?which:[`Left|`Right|`All] -> sub:string -> by:string -> string -> string From b5f54e3424163162396ebe90beaf66a5ae3496f3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 21:10:06 +0100 Subject: [PATCH 03/30] new benchmarks for strings --- _oasis | 2 +- benchs/run_benchs.ml | 110 ++++++++++++++++++++++++++++++++++++++++++- src/string/CCKMP.ml | 24 +--------- src/string/CCKMP.mli | 24 +--------- 4 files changed, 112 insertions(+), 48 deletions(-) diff --git a/_oasis b/_oasis index 1e942b52..437da1e2 100644 --- a/_oasis +++ b/_oasis @@ -155,7 +155,7 @@ Executable run_benchs CompiledObject: best Build$: flag(bench) MainIs: run_benchs.ml - BuildDepends: containers, containers.advanced, + BuildDepends: containers, containers.advanced, QTest2Lib, containers.data, containers.string, containers.iter, containers.thread, sequence, gen, benchmark, hamt diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index a7c5c1d1..3daf52ae 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1081,7 +1081,6 @@ module Thread = struct end module Graph = struct - (* divisors graph *) let div_children_ i = (* divisors of [i] that are [>= j] *) @@ -1155,6 +1154,115 @@ module Graph = struct ) end +module Str = struct + (* random string, but always returns the same for a given size *) + let rand_str_ n = + let module Q = Quickcheck in + let st = Random.State.make [| n |] in + let gen_c = Q.Gen.oneofl (CCString.to_list "abcdefghijkl") in + Q.Gen.string_size ~gen:gen_c (Q.Gen.return n) st + + (* note: inefficient *) + let find ?(start=0) ~sub s = + let n = String.length sub in + let i = ref start in + try + while !i + n <= String.length s do + if CCString.is_sub ~sub 0 s !i ~len:n then raise Exit; + incr i + done; + -1 + with Exit -> + !i + + (* note: inefficient *) + let rfind ~sub s = + let n = String.length sub in + let i = ref (String.length s - n) in + try + while !i >= 0 do + if CCString.is_sub ~sub 0 s !i ~len:n then raise Exit; + decr i + done; + ~-1 + with Exit -> + !i + + let find_all ?(start=0) ~sub s = + let i = ref start in + fun () -> + let res = find ~sub s ~start:!i in + if res = ~-1 then None + else ( + i := res + String.length sub; + Some res + ) + + let find_all_l ?start ~sub s = find_all ?start ~sub s |> Gen.to_list + + let pp_pb needle haystack = + Format.printf "search needle `%s` in `%s`...@." + needle (String.sub haystack 0 (min 300 (String.length haystack))) + + (* benchmark String.{,r}find *) + let bench_find_ ~dir ~size n = + let needle = rand_str_ size in + let haystack = rand_str_ n in + pp_pb needle haystack; + let mk_naive = match dir with + | `Direct -> fun () -> find ~sub:needle haystack + | `Reverse -> fun () -> rfind ~sub:needle haystack + and mk_current = match dir with + | `Direct -> fun () -> CCString.find ~sub:needle haystack + | `Reverse -> fun () -> CCString.rfind ~sub:needle haystack + in + assert (mk_naive () = mk_current ()); + B.throughputN 3 ~repeat + [ "naive", mk_naive, () + ; "current", mk_current, () + ] + + (* benchmark String.find_all *) + let bench_find_all ~size n = + let needle = rand_str_ size in + let haystack = rand_str_ n in + pp_pb needle haystack; + let mk_naive () = find_all_l ~sub:needle haystack + and mk_current () = CCString.find_all_l ~sub:needle haystack in + assert (mk_naive () = mk_current ()); + B.throughputN 3 ~repeat + [ "naive", mk_naive, () + ; "current", mk_current, () + ] + + let bench_find = bench_find_ ~dir:`Direct + let bench_rfind = bench_find_ ~dir:`Reverse + + let () = B.Tree.register ( + "string" @>>> + [ "find" @>>> + [ "1" @>> app_ints (bench_find ~size:1) [100; 100_000; 500_000] + ; "5" @>> app_ints (bench_find ~size:5) [100; 100_000; 500_000] + ; "15" @>> app_ints (bench_find ~size:15) [100; 100_000; 500_000] + ; "50" @>> app_ints (bench_find ~size:50) [100; 100_000; 500_000] + ; "500" @>> app_ints (bench_find ~size:500) [100_000; 500_000] + ]; + "find_all" @>>> + [ "1" @>> app_ints (bench_find_all ~size:1) [100; 100_000; 500_000] + ; "5" @>> app_ints (bench_find_all ~size:5) [100; 100_000; 500_000] + ; "15" @>> app_ints (bench_find_all ~size:15) [100; 100_000; 500_000] + ; "50" @>> app_ints (bench_find_all ~size:50) [100; 100_000; 500_000] + ; "500" @>> app_ints (bench_find_all ~size:500) [100_000; 500_000] + ]; + "rfind" @>>> + [ "15" @>> app_ints (bench_rfind ~size:15) [100; 100_000; 500_000] + ; "50" @>> app_ints (bench_rfind ~size:50) [100; 100_000; 500_000] + ; "500" @>> app_ints (bench_rfind ~size:500) [100_000; 500_000] + ]; + ]) + +end + module Alloc = struct module type ALLOC_ARR = sig type 'a t diff --git a/src/string/CCKMP.ml b/src/string/CCKMP.ml index 1b7073b5..5511fad1 100644 --- a/src/string/CCKMP.ml +++ b/src/string/CCKMP.ml @@ -1,27 +1,5 @@ -(* -copyright (c) 2013-2014, simon cruanes -all rights reserved. -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Knuth-Morris-Pratt} *) diff --git a/src/string/CCKMP.mli b/src/string/CCKMP.mli index 7d8f8d56..13b059f5 100644 --- a/src/string/CCKMP.mli +++ b/src/string/CCKMP.mli @@ -1,27 +1,5 @@ -(* -copyright (c) 2013-2014, simon cruanes -all rights reserved. -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Knuth-Morris-Pratt} *) From fb8661d1ba13b75f3a5525142630cf606930b003 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 21:18:40 +0100 Subject: [PATCH 04/30] moar inlining --- _tags | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_tags b/_tags index bcb2222e..0d8627c3 100644 --- a/_tags +++ b/_tags @@ -155,7 +155,7 @@ true: annot, bin_annot # OASIS_STOP : thread : thread -: inline(25) + or : inline(25) or or : inline(15) and not : warn_A, warn(-4), warn(-44) true: no_alias_deps, safe_string, short_paths From ab0b198f97b8190f23da8c6f4bf7d78b6ed4acdf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 22:24:57 +0100 Subject: [PATCH 05/30] more tests, bugfixes, and benchs for KMP in CCString --- benchs/run_benchs.ml | 25 ++++++++++++--- src/core/CCString.cppo.ml | 66 +++++++++++++++++++++------------------ src/core/CCString.mli | 2 ++ 3 files changed, 57 insertions(+), 36 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 3daf52ae..593c8da7 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1156,13 +1156,12 @@ end module Str = struct (* random string, but always returns the same for a given size *) - let rand_str_ n = + let rand_str_ ?(among="abcdefgh") n = let module Q = Quickcheck in let st = Random.State.make [| n |] in - let gen_c = Q.Gen.oneofl (CCString.to_list "abcdefghijkl") in + let gen_c = Q.Gen.oneofl (CCString.to_list among) in Q.Gen.string_size ~gen:gen_c (Q.Gen.return n) st - (* note: inefficient *) let find ?(start=0) ~sub s = let n = String.length sub in let i = ref start in @@ -1175,7 +1174,6 @@ module Str = struct with Exit -> !i - (* note: inefficient *) let rfind ~sub s = let n = String.length sub in let i = ref (String.length s - n) in @@ -1235,6 +1233,19 @@ module Str = struct ; "current", mk_current, () ] + (* benchmark String.find_all on constant strings *) + let bench_find_all_special ~size n = + let needle = CCString.repeat "a" (size-1) ^ "b" in + let haystack = rand_str_ ~among:"ab" n in + pp_pb needle haystack; + let mk_naive () = find_all_l ~sub:needle haystack + and mk_current () = CCString.find_all_l ~sub:needle haystack in + assert (mk_naive () = mk_current ()); + B.throughputN 3 ~repeat + [ "naive", mk_naive, () + ; "current", mk_current, () + ] + let bench_find = bench_find_ ~dir:`Direct let bench_rfind = bench_find_ ~dir:`Reverse @@ -1242,6 +1253,7 @@ module Str = struct "string" @>>> [ "find" @>>> [ "1" @>> app_ints (bench_find ~size:1) [100; 100_000; 500_000] + ; "3" @>> app_ints (bench_find ~size:3) [100; 100_000; 500_000] ; "5" @>> app_ints (bench_find ~size:5) [100; 100_000; 500_000] ; "15" @>> app_ints (bench_find ~size:15) [100; 100_000; 500_000] ; "50" @>> app_ints (bench_find ~size:50) [100; 100_000; 500_000] @@ -1249,13 +1261,16 @@ module Str = struct ]; "find_all" @>>> [ "1" @>> app_ints (bench_find_all ~size:1) [100; 100_000; 500_000] + ; "3" @>> app_ints (bench_find_all ~size:3) [100; 100_000; 500_000] ; "5" @>> app_ints (bench_find_all ~size:5) [100; 100_000; 500_000] ; "15" @>> app_ints (bench_find_all ~size:15) [100; 100_000; 500_000] ; "50" @>> app_ints (bench_find_all ~size:50) [100; 100_000; 500_000] ; "500" @>> app_ints (bench_find_all ~size:500) [100_000; 500_000] + ; "special" @>> app_ints (bench_find_all_special ~size:6) [100_000; 500_000] ]; "rfind" @>>> - [ "15" @>> app_ints (bench_rfind ~size:15) [100; 100_000; 500_000] + [ "3" @>> app_ints (bench_rfind ~size:3) [100; 100_000; 500_000] + ; "15" @>> app_ints (bench_rfind ~size:15) [100; 100_000; 500_000] ; "50" @>> app_ints (bench_rfind ~size:50) [100; 100_000; 500_000] ; "500" @>> app_ints (bench_rfind ~size:500) [100_000; 500_000] ]; diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 5bca58d8..ee2ab50b 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -74,6 +74,7 @@ type _ direction = | Direct : [`Direct] direction | Reverse : [`Reverse] direction +(* we follow https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm *) module KMP = struct type 'a pattern = { failure : int array; @@ -88,7 +89,7 @@ module KMP = struct let get_ : type a. dir:a direction -> string -> int -> char = fun ~dir -> match dir with - | Direct -> (fun s i -> s.[i]) + | Direct -> String.get | Reverse -> (fun s i -> s.[String.length s - i - 1]) let compile_ @@ -102,26 +103,28 @@ module KMP = struct | _ -> (* at least 2 elements, the algorithm can work *) let failure = Array.make len 0 in + failure.(0) <- -1; (* i: current index in str *) - let i = ref 1 in + let i = ref 2 in (* j: index of candidate substring *) let j = ref 0 in - while !i < len-1 do + while !i < len do match !j with - | _ when get str !i = get str !j -> + | _ when get str (!i-1) = get str !j -> (* substring starting at !j continues matching current char *) - i := !i+1; - j := !j+1; + incr j; failure.(!i) <- !j; + incr i; | 0 -> (* back to the beginning *) - i := !i+1; failure.(!i) <- 0; + incr i; | _ -> (* fallback for the prefix string *) assert (!j > 0); j := failure.(!j) done; + (* Format.printf "{@[failure:%a, str:%s@]}@." CCFormat.(array int) failure str; *) { failure; str; } let compile s = compile_ ~dir:Direct s @@ -139,23 +142,30 @@ module KMP = struct let i = ref idx in let j = ref 0 in let pat_len = pattern_length pattern in - while !i < len && !j < pat_len do - let c = get s !i in + while !j < pat_len && !i + !j < len do + let c = get s (!i + !j) in let expected = get pattern.str !j in if c = expected then ( (* char matches *) - i := !i + 1; j := !j + 1 + incr j; ) else ( - if !j=0 - then (* beginning of the pattern *) - i := !i + 1 - else (* follow the failure link *) - j := pattern.failure.(!j) + let fail_offset = pattern.failure.(!j) in + if fail_offset >= 0 + then ( + assert (fail_offset < !j); + (* follow the failure link *) + i := !i + !j - fail_offset; + j := fail_offset + ) else ( + (* beginning of pattern *) + j := 0; + incr i + ) ) done; if !j = pat_len - then !i - pat_len + then !i else -1 let find ~pattern s i = find_ ~dir:Direct ~pattern s i @@ -242,29 +252,20 @@ module Split = struct | SplitStop | SplitAt of int (* previous *) - (* [by_j... prefix of s_i...] ? *) - let rec _is_prefix ~by s i j = - j = String.length by - || - ( i < String.length s && - s.[i] = by.[j] && - _is_prefix ~by s (i+1) (j+1) - ) - let rec _split ~by s state = match state with | SplitStop -> None - | SplitAt prev -> _split_search ~by s prev prev - and _split_search ~by s prev i = - if i >= String.length s + | SplitAt prev -> _split_search ~by s prev + and _split_search ~by s prev = + let j = KMP.find ~pattern:by s prev in + if j < 0 then Some (SplitStop, prev, String.length s - prev) - else if _is_prefix ~by s i 0 - then Some (SplitAt (i+String.length by), prev, i-prev) - else _split_search ~by s prev (i+1) + else Some (SplitAt (j+KMP.pattern_length by), prev, j-prev) let _tuple3 x y z = x,y,z let _mkgen ~by s k = let state = ref (SplitAt 0) in + let by = KMP.compile by in fun () -> match _split ~by s !state with | None -> None @@ -277,6 +278,7 @@ module Split = struct let gen_cpy ~by s = _mkgen ~by s String.sub let _mklist ~by s k = + let by = KMP.compile by in let rec build acc state = match _split ~by s state with | None -> List.rev acc | Some (state', i, len) -> @@ -289,6 +291,7 @@ module Split = struct let list_cpy ~by s = _mklist ~by s String.sub let _mkklist ~by s k = + let by = KMP.compile by in let rec make state () = match _split ~by s state with | None -> `Nil | Some (state', i, len) -> @@ -300,6 +303,7 @@ module Split = struct let klist_cpy ~by s = _mkklist ~by s String.sub let _mkseq ~by s f k = + let by = KMP.compile by in let rec aux state = match _split ~by s state with | None -> () | Some (state', i, len) -> k (f s i len); aux state' diff --git a/src/core/CCString.mli b/src/core/CCString.mli index cfdfe959..930eadc3 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -122,6 +122,8 @@ val find_all_l : ?start:int -> sub:string -> string -> int list (*$= & ~printer:Q.Print.(list int) [1; 6] (find_all_l ~sub:"bc" "abc aabc aab") [] (find_all_l ~sub:"bc" "abd") + [76] (find_all_l ~sub:"aaaaaa" \ + "aabbaabbaaaaabbbbabababababbbbabbbabbaaababbbaaabaabbaabbaaaabbababaaaabbaabaaaaaabbbaaaabababaabaaabbaabaaaabbababbaabbaaabaabbabababbbaabababaaabaaababbbaaaabbbaabaaababbabaababbaabbaaaaabababbabaababbbaaabbabbabababaaaabaaababaaaaabbabbaabbabbbbbbbbbbbbbbaabbabbbbbabbaaabbabbbbabaaaaabbababbbaaaa") *) val mem : ?start:int -> sub:string -> string -> bool From ce6d9819735eb07d03f53c8880dd084a3df6fac0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Mar 2016 23:34:38 +0100 Subject: [PATCH 06/30] add a special case for pattern of length 1 in CCString.find --- benchs/run_benchs.ml | 14 +++-- src/core/CCString.cppo.ml | 105 ++++++++++++++++++++++++-------------- 2 files changed, 77 insertions(+), 42 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 593c8da7..bb7fd23e 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1158,7 +1158,7 @@ module Str = struct (* random string, but always returns the same for a given size *) let rand_str_ ?(among="abcdefgh") n = let module Q = Quickcheck in - let st = Random.State.make [| n |] in + let st = Random.State.make [| n + 17 |] in let gen_c = Q.Gen.oneofl (CCString.to_list among) in Q.Gen.string_size ~gen:gen_c (Q.Gen.return n) st @@ -1213,11 +1213,15 @@ module Str = struct and mk_current = match dir with | `Direct -> fun () -> CCString.find ~sub:needle haystack | `Reverse -> fun () -> CCString.rfind ~sub:needle haystack + and mk_current_compiled = match dir with + | `Direct -> let f = CCString.find ~start:0 ~sub:needle in fun () -> f haystack + | `Reverse -> let f = CCString.rfind ~sub:needle in fun () -> f haystack in assert (mk_naive () = mk_current ()); B.throughputN 3 ~repeat [ "naive", mk_naive, () ; "current", mk_current, () + ; "current_compiled", mk_current_compiled, () ] (* benchmark String.find_all *) @@ -1226,11 +1230,14 @@ module Str = struct let haystack = rand_str_ n in pp_pb needle haystack; let mk_naive () = find_all_l ~sub:needle haystack - and mk_current () = CCString.find_all_l ~sub:needle haystack in + and mk_current () = CCString.find_all_l ~sub:needle haystack + and mk_current_compiled = + let f = CCString.find_all_l ~start:0 ~sub:needle in fun () -> f haystack in assert (mk_naive () = mk_current ()); B.throughputN 3 ~repeat [ "naive", mk_naive, () ; "current", mk_current, () + ; "current_compiled", mk_current_compiled, () ] (* benchmark String.find_all on constant strings *) @@ -1252,8 +1259,7 @@ module Str = struct let () = B.Tree.register ( "string" @>>> [ "find" @>>> - [ "1" @>> app_ints (bench_find ~size:1) [100; 100_000; 500_000] - ; "3" @>> app_ints (bench_find ~size:3) [100; 100_000; 500_000] + [ "3" @>> app_ints (bench_find ~size:3) [100; 100_000; 500_000] ; "5" @>> app_ints (bench_find ~size:5) [100; 100_000; 500_000] ; "15" @>> app_ints (bench_find ~size:15) [100; 100_000; 500_000] ; "50" @>> app_ints (bench_find ~size:50) [100; 100_000; 500_000] diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index ee2ab50b..5c14f724 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -75,15 +75,15 @@ type _ direction = | Reverse : [`Reverse] direction (* we follow https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm *) -module KMP = struct - type 'a pattern = { +module Find = struct + type 'a kmp_pattern = { failure : int array; str : string; } (* invariant: [length failure = length str]. We use a phantom type to avoid mixing the directions. *) - let pattern_length p = String.length p.str + let kmp_pattern_length p = String.length p.str (* access the [i]-th element of [s] according to direction [dir] *) let get_ @@ -92,8 +92,8 @@ module KMP = struct | Direct -> String.get | Reverse -> (fun s i -> s.[String.length s - i - 1]) - let compile_ - : type a. dir:a direction -> string -> a pattern + let kmp_compile_ + : type a. dir:a direction -> string -> a kmp_pattern = fun ~dir str -> let len = length str in let get = get_ ~dir in (* how to read elements of the string *) @@ -127,21 +127,21 @@ module KMP = struct (* Format.printf "{@[failure:%a, str:%s@]}@." CCFormat.(array int) failure str; *) { failure; str; } - let compile s = compile_ ~dir:Direct s - let rcompile s = compile_ ~dir:Reverse s + let kmp_compile s = kmp_compile_ ~dir:Direct s + let kmp_rcompile s = kmp_compile_ ~dir:Reverse s (* proper search function. [i] index in [s] [j] index in [pattern] [len] length of [s] *) - let find_ - : type a. dir:a direction -> pattern:a pattern -> string -> int -> int + let kmp_find_ + : type a. dir:a direction -> pattern:a kmp_pattern -> string -> int -> int = fun ~dir ~pattern s idx -> let len = length s in let get = get_ ~dir in let i = ref idx in let j = ref 0 in - let pat_len = pattern_length pattern in + let pat_len = kmp_pattern_length pattern in while !j < pat_len && !i + !j < len do let c = get s (!i + !j) in let expected = get pattern.str !j in @@ -168,34 +168,63 @@ module KMP = struct then !i else -1 - let find ~pattern s i = find_ ~dir:Direct ~pattern s i + let kmp_find ~pattern s i = kmp_find_ ~dir:Direct ~pattern s i - let rfind ~pattern s i = + let kmp_rfind ~pattern s i = let i = String.length s - i - 1 in - let res = find_ ~dir:Reverse ~pattern s i in + let res = kmp_find_ ~dir:Reverse ~pattern s i in (* adjust result: first, [res = string.length s - res -1] to convert back to real indices; then, what we got is actually the position of the end of the pattern, so we subtract the [length of the pattern -1] to obtain the real result. *) if res = ~-1 then res - else (String.length s - res) - pattern_length pattern + else (String.length s - res) - kmp_pattern_length pattern + + type 'a pattern = + | P_char of char + | P_KMP of 'a kmp_pattern + + let pattern_length = function + | P_char _ -> 1 + | P_KMP p -> kmp_pattern_length p + + let compile ~sub : [`Direct] pattern = + if length sub=1 + then P_char sub.[0] + else P_KMP (kmp_compile sub) + + let rcompile ~sub : [`Reverse] pattern = + if length sub=1 + then P_char sub.[0] + else P_KMP (kmp_rcompile sub) + + let find ~pattern s start = match pattern with + | P_char c -> + (try String.index_from s start c with Not_found -> -1) + | P_KMP pattern -> kmp_find ~pattern s start + + let rfind ~pattern s start = match pattern with + | P_char c -> + (try String.rindex_from s start c with Not_found -> -1) + | P_KMP pattern -> kmp_rfind ~pattern s start end -let find ?(start=0) ~sub s = - let pattern = KMP.compile sub in - KMP.find ~pattern s start +let find ?(start=0) ~sub = + let pattern = Find.compile ~sub in + fun s -> Find.find ~pattern s start -let find_all ?(start=0) ~sub s = - let pattern = KMP.compile sub in - let i = ref start in - fun () -> - let res = KMP.find ~pattern s !i in - if res = ~-1 then None - else ( - i := res + KMP.pattern_length pattern; - Some res - ) +let find_all ?(start=0) ~sub = + let pattern = Find.compile ~sub in + fun s -> + let i = ref start in + fun () -> + let res = Find.find ~pattern s !i in + if res = ~-1 then None + else ( + i := res + Find.pattern_length pattern; + Some res + ) let find_all_l ?start ~sub s = let rec aux acc g = match g () with @@ -206,9 +235,9 @@ let find_all_l ?start ~sub s = let mem ?start ~sub s = find ?start ~sub s >= 0 -let rfind ~sub s = - let pattern = KMP.rcompile sub in - KMP.rfind ~pattern s (String.length s-1) +let rfind ~sub = + let pattern = Find.rcompile ~sub in + fun s -> Find.rfind ~pattern s (String.length s-1) (* Replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *) let replace_at_ ~pos ~len ~by s = @@ -229,11 +258,11 @@ let replace ?(which=`All) ~sub ~by s = if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s | `All -> (* compile search pattern only once *) - let pattern = KMP.compile sub in + let pattern = Find.compile ~sub in let b = Buffer.create (String.length s) in let start = ref 0 in while !start < String.length s do - let i = KMP.find ~pattern s !start in + let i = Find.find ~pattern s !start in if i>=0 then ( (* between last and cur occurrences *) Buffer.add_substring b s !start (i- !start); @@ -256,16 +285,16 @@ module Split = struct | SplitStop -> None | SplitAt prev -> _split_search ~by s prev and _split_search ~by s prev = - let j = KMP.find ~pattern:by s prev in + let j = Find.find ~pattern:by s prev in if j < 0 then Some (SplitStop, prev, String.length s - prev) - else Some (SplitAt (j+KMP.pattern_length by), prev, j-prev) + else Some (SplitAt (j+Find.pattern_length by), prev, j-prev) let _tuple3 x y z = x,y,z let _mkgen ~by s k = let state = ref (SplitAt 0) in - let by = KMP.compile by in + let by = Find.compile ~sub:by in fun () -> match _split ~by s !state with | None -> None @@ -278,7 +307,7 @@ module Split = struct let gen_cpy ~by s = _mkgen ~by s String.sub let _mklist ~by s k = - let by = KMP.compile by in + let by = Find.compile ~sub:by in let rec build acc state = match _split ~by s state with | None -> List.rev acc | Some (state', i, len) -> @@ -291,7 +320,7 @@ module Split = struct let list_cpy ~by s = _mklist ~by s String.sub let _mkklist ~by s k = - let by = KMP.compile by in + let by = Find.compile ~sub:by in let rec make state () = match _split ~by s state with | None -> `Nil | Some (state', i, len) -> @@ -303,7 +332,7 @@ module Split = struct let klist_cpy ~by s = _mkklist ~by s String.sub let _mkseq ~by s f k = - let by = KMP.compile by in + let by = Find.compile ~sub:by in let rec aux state = match _split ~by s state with | None -> () | Some (state', i, len) -> k (f s i len); aux state' From 7c9633f06fc56d2fb8988dc4a62a678e453c82d9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 10 Mar 2016 16:34:29 +0100 Subject: [PATCH 07/30] change the semantics of `CCString.find_all` (allow overlaps) --- src/core/CCString.cppo.ml | 2 +- src/core/CCString.mli | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 5c14f724..9c72ae0c 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -222,7 +222,7 @@ let find_all ?(start=0) ~sub = let res = Find.find ~pattern s !i in if res = ~-1 then None else ( - i := res + Find.pattern_length pattern; + i := res + 1; (* possible overlap *) Some res ) diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 930eadc3..79bbbadf 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -109,7 +109,8 @@ val find : ?start:int -> sub:string -> string -> int *) val find_all : ?start:int -> sub:string -> string -> int gen -(** [find_all ~sub s] finds all occurrences of [sub] in [s] +(** [find_all ~sub s] finds all occurrences of [sub] in [s], even overlapping + instances. @param start starting position in [s] @since NEXT_RELEASE *) From f3f6df104eb25e51393554bd995608e7c4c8223c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 10 Mar 2016 16:34:42 +0100 Subject: [PATCH 08/30] more benchs --- .merlin | 1 + benchs/run_benchs.ml | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.merlin b/.merlin index 3b321723..776492dd 100644 --- a/.merlin +++ b/.merlin @@ -26,4 +26,5 @@ PKG bigarray PKG sequence PKG hamt PKG gen +PKG QTest2Lib FLG -w +a -w -4 -w -44 diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index bb7fd23e..aabc99f6 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1192,7 +1192,7 @@ module Str = struct let res = find ~sub s ~start:!i in if res = ~-1 then None else ( - i := res + String.length sub; + i := res + 1; Some res ) @@ -1243,7 +1243,7 @@ module Str = struct (* benchmark String.find_all on constant strings *) let bench_find_all_special ~size n = let needle = CCString.repeat "a" (size-1) ^ "b" in - let haystack = rand_str_ ~among:"ab" n in + let haystack = CCString.repeat "a" n in pp_pb needle haystack; let mk_naive () = find_all_l ~sub:needle haystack and mk_current () = CCString.find_all_l ~sub:needle haystack in @@ -1272,7 +1272,11 @@ module Str = struct ; "15" @>> app_ints (bench_find_all ~size:15) [100; 100_000; 500_000] ; "50" @>> app_ints (bench_find_all ~size:50) [100; 100_000; 500_000] ; "500" @>> app_ints (bench_find_all ~size:500) [100_000; 500_000] - ; "special" @>> app_ints (bench_find_all_special ~size:6) [100_000; 500_000] + ; "special" @>>> + [ "6" @>> app_ints (bench_find_all_special ~size:6) [100_000; 500_000] + ; "30" @>> app_ints (bench_find_all_special ~size:30) [100_000; 500_000] + ; "100" @>> app_ints (bench_find_all_special ~size:100) [100_000; 500_000] + ] ]; "rfind" @>>> [ "3" @>> app_ints (bench_rfind ~size:3) [100; 100_000; 500_000] From 3a34cc9aa8cea54c1a9d9d2a4b5c9f82a5d6f1d1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 11 Mar 2016 16:11:16 +0100 Subject: [PATCH 09/30] add `CCHet`, heterogeneous containers (table/map) indexed by keys Difference with CCMix{tbl,map} is that there is no other key than the polymorphic injection. --- _oasis | 2 +- src/data/CCHet.ml | 133 +++++++++++++++++++++++++++++++++++++++++++++ src/data/CCHet.mli | 90 ++++++++++++++++++++++++++++++ 3 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 src/data/CCHet.ml create mode 100644 src/data/CCHet.mli diff --git a/_oasis b/_oasis index 437da1e2..700291cd 100644 --- a/_oasis +++ b/_oasis @@ -80,7 +80,7 @@ Library "containers_data" CCMixmap, CCRingBuffer, CCIntMap, CCPersistentArray, CCMixset, CCHashconsedSet, CCGraph, CCHashSet, CCBitField, CCHashTrie, CCBloom, CCWBTree, CCRAL, CCAllocCache, - CCImmutArray + CCImmutArray, CCHet BuildDepends: bytes # BuildDepends: bytes, bisect_ppx FindlibParent: containers diff --git a/src/data/CCHet.ml b/src/data/CCHet.ml new file mode 100644 index 00000000..ed46f1d3 --- /dev/null +++ b/src/data/CCHet.ml @@ -0,0 +1,133 @@ + +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Associative containers with Heterogenerous Values} *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +module Key = struct + type 'a t = int + + let create = + let _n = ref 0 in + fun () -> + incr _n; + !_n + + let id a = a + + let equal + : type a b. a t -> b t -> bool + = fun a b -> + let ia = (a : a t :> int) in + let ib = (b : b t :> int) in + ia=ib + + (* XXX: the only ugly part *) + (* [cast_res k1 k2 v2] casts [v2] into a value of type [a] if [k1=k2] *) + let cast_res_ : type a b. a t -> b t -> b -> a + = fun k1 k2 v2 -> + if k1=k2 then Obj.magic v2 else raise Not_found +end + +type pair = + | Pair : 'a Key.t * 'a -> pair + +module Tbl = struct + module M = Hashtbl.Make(struct + type t = int + let equal (i:int) j = i=j + let hash (i:int) = Hashtbl.hash i + end) + + type t = pair M.t + + let create ?(size=16) () = M.create size + + let mem t k = M.mem t (Key.id k) + + let find_exn (type a) t (k : a Key.t) : a = + let Pair (k', v) = M.find t (Key.id k) in + Key.cast_res_ k k' v + + let find t k = + try Some (find_exn t k) + with Not_found -> None + + let add_pair_ t p = + let Pair (k,_) = p in + M.replace t (Key.id k) p + + let add t k v = add_pair_ t (Pair (k,v)) + + let length t = M.length t + + let iter f t = M.iter (fun _ pair -> f pair) t + + let to_seq t yield = iter yield t + + let to_list t = M.fold (fun _ p l -> p::l) t [] + + let add_list t l = List.iter (add_pair_ t) l + + let add_seq t seq = seq (add_pair_ t) + + let of_list l = + let t = create() in + add_list t l; + t + + let of_seq seq = + let t = create() in + add_seq t seq; + t +end + +module Map = struct + module M = Map.Make(struct + type t = int + let compare (i:int) j = Pervasives.compare i j + end) + + type t = pair M.t + + let empty = M.empty + + let mem k t = M.mem (Key.id k) t + + let find_exn (type a) (k : a Key.t) t : a = + let Pair (k', v) = M.find (Key.id k) t in + Key.cast_res_ k k' v + + let find k t = + try Some (find_exn k t) + with Not_found -> None + + let add_pair_ p t = + let Pair (k,_) = p in + M.add (Key.id k) p t + + let add k v t = add_pair_ (Pair (k,v)) t + + let cardinal t = M.cardinal t + + let length = cardinal + + let iter f t = M.iter (fun _ pair -> f pair) t + + let to_seq t yield = iter yield t + + let to_list t = M.fold (fun _ p l -> p::l) t [] + + let add_list t l = List.fold_right add_pair_ l t + + let add_seq t seq = + let t = ref t in + seq (fun pair -> t := add_pair_ pair !t); + !t + + let of_list l = add_list empty l + + let of_seq seq = add_seq empty seq +end diff --git a/src/data/CCHet.mli b/src/data/CCHet.mli new file mode 100644 index 00000000..1fd33be9 --- /dev/null +++ b/src/data/CCHet.mli @@ -0,0 +1,90 @@ + +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Associative containers with Heterogenerous Values} + + This is similar to {!CCMixtbl}, but the injection is directly used as + a key. + + @since NEXT_RELEASE *) + +type 'a sequence = ('a -> unit) -> unit +type 'a gen = unit -> 'a option + +module Key : sig + type 'a t + + val create : unit -> 'a t + + val equal : 'a t -> 'a t -> bool + (** Compare two keys that have compatible types *) +end + +type pair = + | Pair : 'a Key.t * 'a -> pair + +(** {2 Imperative table indexed by {!Key}} *) +module Tbl : sig + type t + + val create : ?size:int -> unit -> t + + val mem : t -> _ Key.t -> bool + + val add : t -> 'a Key.t -> 'a -> unit + + val length : t -> int + + val find : t -> 'a Key.t -> 'a option + + val find_exn : t -> 'a Key.t -> 'a + (** @raise Not_found if the key is not in the table *) + + val iter : (pair -> unit) -> t -> unit + + val to_seq : t -> pair sequence + + val of_seq : pair sequence -> t + + val add_seq : t -> pair sequence -> unit + + val add_list : t -> pair list -> unit + + val of_list : pair list -> t + + val to_list : t -> pair list +end + +(** {2 Immutable map} *) +module Map : sig + type t + + val empty : t + + val mem : _ Key.t -> t -> bool + + val add : 'a Key.t -> 'a -> t -> t + + val length : t -> int + + val cardinal : t -> int + + val find : 'a Key.t -> t -> 'a option + + val find_exn : 'a Key.t -> t -> 'a + (** @raise Not_found if the key is not in the table *) + + val iter : (pair -> unit) -> t -> unit + + val to_seq : t -> pair sequence + + val of_seq : pair sequence -> t + + val add_seq : t -> pair sequence -> t + + val add_list : t -> pair list -> t + + val of_list : pair list -> t + + val to_list : t -> pair list +end From 0c04df58b011de710ee621466fb4043750770954 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 11 Mar 2016 21:16:34 +0100 Subject: [PATCH 10/30] update CCHet to not use Obj.magic; add test --- src/data/CCHet.ml | 124 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 33 deletions(-) diff --git a/src/data/CCHet.ml b/src/data/CCHet.ml index ed46f1d3..ff86f672 100644 --- a/src/data/CCHet.ml +++ b/src/data/CCHet.ml @@ -3,37 +3,80 @@ (** {1 Associative containers with Heterogenerous Values} *) +(*$R + let k1 : int Key.t = Key.create() in + let k2 : int Key.t = Key.create() in + let k3 : string Key.t = Key.create() in + let k4 : float Key.t = Key.create() in + + let tbl = Tbl.create () in + + Tbl.add tbl k1 1; + Tbl.add tbl k2 2; + Tbl.add tbl k3 "k3"; + + assert_equal (Some 1) (Tbl.find tbl k1); + assert_equal (Some 2) (Tbl.find tbl k2); + assert_equal (Some "k3") (Tbl.find tbl k3); + assert_equal None (Tbl.find tbl k4); + assert_equal 3 (Tbl.length tbl); + + Tbl.add tbl k1 10; + assert_equal (Some 10) (Tbl.find tbl k1); + assert_equal 3 (Tbl.length tbl); + assert_equal None (Tbl.find tbl k4); + + Tbl.add tbl k4 0.0; + assert_equal (Some 0.0) (Tbl.find tbl k4); + + () + + +*) + type 'a sequence = ('a -> unit) -> unit type 'a gen = unit -> 'a option +module type KEY_IMPL = sig + type t + exception Store of t + val id : int +end + module Key = struct - type 'a t = int + type 'a t = (module KEY_IMPL with type t = 'a) - let create = - let _n = ref 0 in - fun () -> - incr _n; - !_n + let _n = ref 0 - let id a = a + let create (type k) () = + incr _n; + let id = !_n in + let module K = struct + type t = k + let id = id + exception Store of k + end in + (module K : KEY_IMPL with type t = k) + + let id (type k) (module K : KEY_IMPL with type t = k) = K.id let equal : type a b. a t -> b t -> bool - = fun a b -> - let ia = (a : a t :> int) in - let ib = (b : b t :> int) in - ia=ib - - (* XXX: the only ugly part *) - (* [cast_res k1 k2 v2] casts [v2] into a value of type [a] if [k1=k2] *) - let cast_res_ : type a b. a t -> b t -> b -> a - = fun k1 k2 v2 -> - if k1=k2 then Obj.magic v2 else raise Not_found + = fun (module K1) (module K2) -> K1.id = K2.id end type pair = | Pair : 'a Key.t * 'a -> pair +type exn_pair = + | E_pair : 'a Key.t * exn -> exn_pair + +let pair_of_e_pair (E_pair (k,e)) = + let module K = (val k) in + match e with + | K.Store v -> Pair (k,v) + | _ -> assert false + module Tbl = struct module M = Hashtbl.Make(struct type t = int @@ -41,33 +84,38 @@ module Tbl = struct let hash (i:int) = Hashtbl.hash i end) - type t = pair M.t + type t = exn_pair M.t let create ?(size=16) () = M.create size let mem t k = M.mem t (Key.id k) let find_exn (type a) t (k : a Key.t) : a = - let Pair (k', v) = M.find t (Key.id k) in - Key.cast_res_ k k' v + let module K = (val k) in + let E_pair (_, v) = M.find t K.id in + match v with + | K.Store v -> v + | _ -> assert false let find t k = try Some (find_exn t k) with Not_found -> None let add_pair_ t p = - let Pair (k,_) = p in - M.replace t (Key.id k) p + let Pair (k,v) = p in + let module K = (val k) in + let p = E_pair (k, K.Store v) in + M.replace t K.id p let add t k v = add_pair_ t (Pair (k,v)) let length t = M.length t - let iter f t = M.iter (fun _ pair -> f pair) t + let iter f t = M.iter (fun _ pair -> f (pair_of_e_pair pair)) t let to_seq t yield = iter yield t - let to_list t = M.fold (fun _ p l -> p::l) t [] + let to_list t = M.fold (fun _ p l -> pair_of_e_pair p::l) t [] let add_list t l = List.iter (add_pair_ t) l @@ -90,35 +138,45 @@ module Map = struct let compare (i:int) j = Pervasives.compare i j end) - type t = pair M.t + type t = exn_pair M.t let empty = M.empty let mem k t = M.mem (Key.id k) t let find_exn (type a) (k : a Key.t) t : a = - let Pair (k', v) = M.find (Key.id k) t in - Key.cast_res_ k k' v + let module K = (val k) in + let E_pair (_, e) = M.find K.id t in + match e with + | K.Store v -> v + | _ -> assert false let find k t = try Some (find_exn k t) with Not_found -> None - let add_pair_ p t = - let Pair (k,_) = p in - M.add (Key.id k) p t + let add_e_pair_ p t = + let E_pair ((module K),_) = p in + M.add K.id p t - let add k v t = add_pair_ (Pair (k,v)) t + let add_pair_ p t = + let Pair ((module K) as k,v) = p in + let p = E_pair (k, K.Store v) in + M.add K.id p t + + let add (type a) (k : a Key.t) v t = + let module K = (val k) in + add_e_pair_ (E_pair (k, K.Store v)) t let cardinal t = M.cardinal t let length = cardinal - let iter f t = M.iter (fun _ pair -> f pair) t + let iter f t = M.iter (fun _ p -> f (pair_of_e_pair p)) t let to_seq t yield = iter yield t - let to_list t = M.fold (fun _ p l -> p::l) t [] + let to_list t = M.fold (fun _ p l -> pair_of_e_pair p::l) t [] let add_list t l = List.fold_right add_pair_ l t From c7766d195a925fda2f2ea098e082f5b05dee18bd Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 11 Mar 2016 23:08:27 +0100 Subject: [PATCH 11/30] small comment --- doc/build_deps.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/build_deps.ml b/doc/build_deps.ml index 37633b20..7763f622 100755 --- a/doc/build_deps.ml +++ b/doc/build_deps.ml @@ -1,5 +1,8 @@ #!/usr/bin/env ocaml +(* note: this requires to generate documentation first, so that + .odoc files are generated *) + #use "topfind";; #require "containers";; #require "containers.io";; From 33dd681acd62c1c6b8d2622cc9675344d748fe34 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 14 Mar 2016 20:04:52 +0100 Subject: [PATCH 12/30] update headers --- src/data/CCPersistentHashtbl.ml | 24 +----------------------- src/data/CCPersistentHashtbl.mli | 24 +----------------------- 2 files changed, 2 insertions(+), 46 deletions(-) diff --git a/src/data/CCPersistentHashtbl.ml b/src/data/CCPersistentHashtbl.ml index 1fd06f67..09abe04f 100644 --- a/src/data/CCPersistentHashtbl.ml +++ b/src/data/CCPersistentHashtbl.ml @@ -1,27 +1,5 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Persistent hash-table on top of OCaml's hashtables} *) diff --git a/src/data/CCPersistentHashtbl.mli b/src/data/CCPersistentHashtbl.mli index 6fed4d96..1fa02fee 100644 --- a/src/data/CCPersistentHashtbl.mli +++ b/src/data/CCPersistentHashtbl.mli @@ -1,27 +1,5 @@ -(* -Copyright (c) 2013, Simon Cruanes -All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. Redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Persistent hash-table on top of OCaml's hashtables} From d694d20b26fb81a559ef771fba928657f2154d9b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 14 Mar 2016 20:13:20 +0100 Subject: [PATCH 13/30] modify `CCPersistentHashtbl.merge` and add `CCMap.merge_safe` --- src/core/CCMap.ml | 15 +++++++++++++++ src/core/CCMap.mli | 6 ++++++ src/data/CCPersistentHashtbl.ml | 14 +++++++++----- src/data/CCPersistentHashtbl.mli | 5 +++-- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/core/CCMap.ml b/src/core/CCMap.ml index d8a69a32..d9114c41 100644 --- a/src/core/CCMap.ml +++ b/src/core/CCMap.ml @@ -24,6 +24,12 @@ module type S = sig [k] is removed from [m], and if the result is [Some v'] then [add k v' m] is returned. *) + val merge_safe : + f:(key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> + 'a t -> 'b t -> 'c t + (** [merge_safe ~f a b] merges the maps [a] and [b] together. + @since NEXT_RELEASE *) + val of_seq : (key * 'a) sequence -> 'a t val add_seq : 'a t -> (key * 'a) sequence -> 'a t @@ -75,6 +81,15 @@ module Make(O : Map.OrderedType) = struct | None -> remove k m | Some v' -> add k v' m + let merge_safe ~f a b = + merge + (fun k v1 v2 -> match v1, v2 with + | None, None -> assert false + | Some v1, None -> f k (`Left v1) + | None, Some v2 -> f k (`Right v2) + | Some v1, Some v2 -> f k (`Both (v1,v2))) + a b + let add_seq m s = let m = ref m in s (fun (k,v) -> m := add k v !m); diff --git a/src/core/CCMap.mli b/src/core/CCMap.mli index f03b59ff..d97c973b 100644 --- a/src/core/CCMap.mli +++ b/src/core/CCMap.mli @@ -27,6 +27,12 @@ module type S = sig [k] is removed from [m], and if the result is [Some v'] then [add k v' m] is returned. *) + val merge_safe : + f:(key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> + 'a t -> 'b t -> 'c t + (** [merge_safe ~f a b] merges the maps [a] and [b] together. + @since NEXT_RELEASE *) + val of_seq : (key * 'a) sequence -> 'a t val add_seq : 'a t -> (key * 'a) sequence -> 'a t diff --git a/src/data/CCPersistentHashtbl.ml b/src/data/CCPersistentHashtbl.ml index 09abe04f..d0d7ab8b 100644 --- a/src/data/CCPersistentHashtbl.ml +++ b/src/data/CCPersistentHashtbl.ml @@ -67,8 +67,9 @@ module type S = sig (** Fresh copy of the table; the underlying structure is not shared anymore, so using both tables alternatively will be efficient *) - val merge : (key -> 'a option -> 'b option -> 'c option) -> - 'a t -> 'b t -> 'c t + val merge : + (key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> + 'a t -> 'b t -> 'c t (** Merge two tables together into a new table. The function's argument correspond to values associated with the key (if present); if the function returns [None] the key will not appear in the result. *) @@ -543,8 +544,11 @@ module Make(H : HashedType) : S with type key = H.t = struct let tbl = create (max (length t1) (length t2)) in let tbl = fold (fun tbl k v1 -> - let v2 = try Some (find t2 k) with Not_found -> None in - match f k (Some v1) v2 with + let comb = + try `Both (v1, find t2 k) + with Not_found -> `Left v1 + in + match f k comb with | None -> tbl | Some v' -> replace tbl k v') tbl t1 @@ -552,7 +556,7 @@ module Make(H : HashedType) : S with type key = H.t = struct fold (fun tbl k v2 -> if mem t1 k then tbl - else match f k None (Some v2) with + else match f k (`Right v2) with | None -> tbl | Some v' -> replace tbl k v' ) tbl t2 diff --git a/src/data/CCPersistentHashtbl.mli b/src/data/CCPersistentHashtbl.mli index 1fa02fee..e2b12d9d 100644 --- a/src/data/CCPersistentHashtbl.mli +++ b/src/data/CCPersistentHashtbl.mli @@ -74,8 +74,9 @@ module type S = sig (** Fresh copy of the table; the underlying structure is not shared anymore, so using both tables alternatively will be efficient *) - val merge : (key -> 'a option -> 'b option -> 'c option) -> - 'a t -> 'b t -> 'c t + val merge : + f:(key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> + 'a t -> 'b t -> 'c t (** Merge two tables together into a new table. The function's argument correspond to values associated with the key (if present); if the function returns [None] the key will not appear in the result. *) From 6e46687ee8c5cbc32d4f6ccfec25969fc1b8ba3e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 14 Mar 2016 20:38:59 +0100 Subject: [PATCH 14/30] fix compilation error --- src/data/CCPersistentHashtbl.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/data/CCPersistentHashtbl.ml b/src/data/CCPersistentHashtbl.ml index d0d7ab8b..99aa4672 100644 --- a/src/data/CCPersistentHashtbl.ml +++ b/src/data/CCPersistentHashtbl.ml @@ -68,7 +68,7 @@ module type S = sig anymore, so using both tables alternatively will be efficient *) val merge : - (key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> + f:(key -> [`Left of 'a | `Right of 'b | `Both of 'a * 'b] -> 'c option) -> 'a t -> 'b t -> 'c t (** Merge two tables together into a new table. The function's argument correspond to values associated with the key (if present); if the @@ -540,7 +540,7 @@ module Make(H : HashedType) : S with type key = H.t = struct false with ExitPTbl -> true - let merge f t1 t2 = + let merge ~f t1 t2 = let tbl = create (max (length t1) (length t2)) in let tbl = fold (fun tbl k v1 -> @@ -565,10 +565,10 @@ module Make(H : HashedType) : S with type key = H.t = struct let t1 = H.of_list [1, "a"; 2, "b1"] in let t2 = H.of_list [2, "b2"; 3, "c"] in let t = H.merge - (fun _ v1 v2 -> match v1, v2 with - | None, _ -> v2 - | _ , None -> v1 - | Some s1, Some s2 -> if s1 < s2 then Some s1 else Some s2) + ~f:(fun _ -> function + | `Right v2 -> Some v2 + | `Left v1 -> Some v1 + | `Both (s1,s2) -> if s1 < s2 then Some s1 else Some s2) t1 t2 in OUnit.assert_equal ~printer:string_of_int 3 (H.length t); From 71794d8d457af9821d5258f32435213e4de0923d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 20 Mar 2016 16:02:11 +0100 Subject: [PATCH 15/30] migrate to new qtest --- .merlin | 2 +- _oasis | 4 ++-- benchs/run_benchs.ml | 2 +- src/core/CCRandom.ml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.merlin b/.merlin index 776492dd..f9975114 100644 --- a/.merlin +++ b/.merlin @@ -26,5 +26,5 @@ PKG bigarray PKG sequence PKG hamt PKG gen -PKG QTest2Lib +PKG qcheck FLG -w +a -w -4 -w -44 diff --git a/_oasis b/_oasis index 700291cd..23277415 100644 --- a/_oasis +++ b/_oasis @@ -155,7 +155,7 @@ Executable run_benchs CompiledObject: best Build$: flag(bench) MainIs: run_benchs.ml - BuildDepends: containers, containers.advanced, QTest2Lib, + BuildDepends: containers, containers.advanced, qcheck, containers.data, containers.string, containers.iter, containers.thread, sequence, gen, benchmark, hamt @@ -179,7 +179,7 @@ Executable run_qtest containers.io, containers.advanced, containers.sexp, containers.bigarray, containers.unix, containers.thread, containers.data, - sequence, gen, unix, oUnit, QTest2Lib + sequence, gen, unix, oUnit, qcheck Test all Command: ./run_qtest.native diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index aabc99f6..e22b7ce5 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1157,7 +1157,7 @@ end module Str = struct (* random string, but always returns the same for a given size *) let rand_str_ ?(among="abcdefgh") n = - let module Q = Quickcheck in + let module Q = QCheck in let st = Random.State.make [| n + 17 |] in let gen_c = Q.Gen.oneofl (CCString.to_list among) in Q.Gen.string_size ~gen:gen_c (Q.Gen.return n) st diff --git a/src/core/CCRandom.ml b/src/core/CCRandom.ml index 9e0ad1fe..ed8ed0a4 100644 --- a/src/core/CCRandom.ml +++ b/src/core/CCRandom.ml @@ -213,5 +213,5 @@ let uniformity_test ?(size_hint=10) k rng st = Hashtbl.fold predicate histogram true (*$T split_list - run ~st:(Runner.random_state()) ( uniformity_test 50_000 (split_list 10 ~len:3) ) + run ~st:(QCheck_runner.random_state()) ( uniformity_test 50_000 (split_list 10 ~len:3) ) *) From 5e30104954260af622d2c495cd8208203f92ddf3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 20 Mar 2016 23:20:46 +0100 Subject: [PATCH 16/30] optimize KMP search in CCString.Find (hand-specialize code) --- src/core/CCString.cppo.ml | 51 ++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 9c72ae0c..d7accf93 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -134,17 +134,14 @@ module Find = struct [i] index in [s] [j] index in [pattern] [len] length of [s] *) - let kmp_find_ - : type a. dir:a direction -> pattern:a kmp_pattern -> string -> int -> int - = fun ~dir ~pattern s idx -> + let kmp_find ~pattern s idx = let len = length s in - let get = get_ ~dir in let i = ref idx in let j = ref 0 in let pat_len = kmp_pattern_length pattern in while !j < pat_len && !i + !j < len do - let c = get s (!i + !j) in - let expected = get pattern.str !j in + let c = String.get s (!i + !j) in + let expected = String.get pattern.str !j in if c = expected then ( (* char matches *) @@ -168,18 +165,44 @@ module Find = struct then !i else -1 - let kmp_find ~pattern s i = kmp_find_ ~dir:Direct ~pattern s i - - let kmp_rfind ~pattern s i = - let i = String.length s - i - 1 in - let res = kmp_find_ ~dir:Reverse ~pattern s i in + (* proper search function, from the right. + [i] index in [s] + [j] index in [pattern] + [len] length of [s] *) + let kmp_rfind ~pattern s idx = + let len = length s in + let i = ref (len - idx - 1) in + let j = ref 0 in + let pat_len = kmp_pattern_length pattern in + while !j < pat_len && !i + !j < len do + let c = String.get s (len - !i - !j - 1) in + let expected = String.get pattern.str (String.length pattern.str - !j - 1) in + if c = expected + then ( + (* char matches *) + incr j; + ) else ( + let fail_offset = pattern.failure.(!j) in + if fail_offset >= 0 + then ( + assert (fail_offset < !j); + (* follow the failure link *) + i := !i + !j - fail_offset; + j := fail_offset + ) else ( + (* beginning of pattern *) + j := 0; + incr i + ) + ) + done; (* adjust result: first, [res = string.length s - res -1] to convert back to real indices; then, what we got is actually the position of the end of the pattern, so we subtract the [length of the pattern -1] to obtain the real result. *) - if res = ~-1 - then res - else (String.length s - res) - kmp_pattern_length pattern + if !j = pat_len + then len - !i - kmp_pattern_length pattern + else -1 type 'a pattern = | P_char of char From 6e905a839d3cd1c8d3ce9127131543bcf8f4f161 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 22 Mar 2016 11:14:11 +0100 Subject: [PATCH 17/30] more iterations for some tests --- src/core/CCString.mli | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 79bbbadf..f6cda140 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -102,7 +102,7 @@ val find : ?start:int -> sub:string -> string -> int 6 (find ~sub:"a" ~start:5 "a1a234a") *) -(*$Q & ~count:300 +(*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = find ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) @@ -149,7 +149,7 @@ val rfind : sub:string -> string -> int 6 (rfind ~sub:"a" "a1a234a") *) -(*$Q & ~count:300 +(*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = rfind ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) From a039add6e749b10bb260af93bacf51eb3870a04f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 22 Mar 2016 18:42:08 +0100 Subject: [PATCH 18/30] add `Containers.{Char,Result}` --- src/core/containers.ml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/containers.ml b/src/core/containers.ml index d38654de..21b95f65 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -80,3 +80,12 @@ module Vector = CCVector module Int64 = CCInt64 (** @since 0.13 *) + +module Char = struct + include Char + include (CCChar : module type of CCChar with type t := t) +end +(** @since NEXT_RELEASE *) + +module Result = CCResult +(** @since NEXT_RELEASE *) From ef4c86d8a1199537141630e31599fe49cbe3d0f6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 24 Mar 2016 18:24:24 +0100 Subject: [PATCH 19/30] add `CCString.pad` for more webscale --- src/core/CCString.cppo.ml | 9 +++++++++ src/core/CCString.mli | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index d7accf93..155892eb 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -433,6 +433,15 @@ let fold f acc s = else fold_rec f (f acc s.[i]) s (i+1) in fold_rec f acc s 0 +let pad ?(side=`Left) ?(c=' ') n s = + let len_s = String.length s in + if len_s >= n then s + else + let pad_len = n - len_s in + match side with + | `Left -> init n (fun i -> if i < pad_len then c else s.[i-pad_len]) + | `Right -> init n (fun i -> if i < len_s then s.[i] else c) + let _to_gen s i0 len = let i = ref i0 in fun () -> diff --git a/src/core/CCString.mli b/src/core/CCString.mli index f6cda140..6720bf1c 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -78,6 +78,22 @@ val rev : string -> string " " (rev " ") *) +val pad : ?side:[`Left|`Right] -> ?c:char -> int -> string -> string +(** [pad n str] ensures that [str] is at least [n] bytes long, + and pads it on the [side] with [c] if it's not the case. + @param side determines where padding occurs (default: [`Left]) + @param c the char used to pad (default: ' ') + @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.string + " 42" (pad 4 "42") + "0042" (pad ~c:'0' 4 "42") + "4200" (pad ~side:`Right ~c:'0' 4 "42") + "hello" (pad 4 "hello") + "aaa" (pad ~c:'a' 3 "") + "aaa" (pad ~side:`Right ~c:'a' 3 "") +*) + val of_gen : char gen -> string val of_seq : char sequence -> string val of_klist : char klist -> string From 6ccad958c490506f19afa97583b5102782537b7a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 11:53:58 +0100 Subject: [PATCH 20/30] make some tests a bit faster --- src/threads/CCPool.ml | 2 +- src/threads/CCTimer.ml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/threads/CCPool.ml b/src/threads/CCPool.ml index 86ea2bf5..401863ca 100644 --- a/src/threads/CCPool.ml +++ b/src/threads/CCPool.ml @@ -264,7 +264,7 @@ module Make(P : PARAM) = struct let l = List.rev_map (fun i -> Fut.make (fun () -> - Thread.delay 0.1; + Thread.delay 0.05; 1 )) l in let l' = List.map Fut.get l in diff --git a/src/threads/CCTimer.ml b/src/threads/CCTimer.ml index cb4739dd..3fd93934 100644 --- a/src/threads/CCTimer.ml +++ b/src/threads/CCTimer.ml @@ -184,12 +184,12 @@ let stop timer = let timer = create () in let n = CCLock.create 1 in let res = CCLock.create 0 in - after timer 0.6 + after timer 0.3 ~f:(fun () -> CCLock.update n (fun x -> x+2)); ignore (Thread.create - (fun _ -> Thread.delay 0.8; CCLock.set res (CCLock.get n)) ()); - after timer 0.4 + (fun _ -> Thread.delay 0.4; CCLock.set res (CCLock.get n)) ()); + after timer 0.2 ~f:(fun () -> CCLock.update n (fun x -> x * 4)); - Thread.delay 1. ; + Thread.delay 0.6 ; OUnit.assert_equal 6 (CCLock.get res); *) From 8d41623ba50d20b64b7e1e0e8509cf0e0133fe27 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:05:31 +0100 Subject: [PATCH 21/30] add `{CCArray,CCVector,CCList}.(--^)` for right-open ranges --- src/core/CCArray.ml | 22 ++++++++++++++++++++++ src/core/CCArray.mli | 4 ++++ src/core/CCList.ml | 8 ++++++++ src/core/CCList.mli | 7 +++++++ src/core/CCVector.ml | 16 ++++++++++++++++ src/core/CCVector.mli | 5 +++++ 6 files changed, 62 insertions(+) diff --git a/src/core/CCArray.ml b/src/core/CCArray.ml index de3b8b43..848952a6 100644 --- a/src/core/CCArray.ml +++ b/src/core/CCArray.ml @@ -443,6 +443,28 @@ let (--) i j = else Array.init (i-j+1) (fun k -> i-k) +(*$T + (1 -- 4) |> Array.to_list = [1;2;3;4] + (4 -- 1) |> Array.to_list = [4;3;2;1] + (0 -- 0) |> Array.to_list = [0] +*) + +(*$Q + Q.(pair small_int small_int) (fun (a,b) -> \ + (a -- b) |> Array.to_list = CCList.(a -- b)) +*) + +let (--^) i j = + if i=j then [| |] + else if i>j + then Array.init (i-j) (fun k -> i-k) + else Array.init (j-i) (fun k -> i+k) + +(*$Q + Q.(pair small_int small_int) (fun (a,b) -> \ + (a --^ b) |> Array.to_list = CCList.(a --^ b)) +*) + (** all the elements of a, but the i-th, into a list *) let except_idx a i = foldi diff --git a/src/core/CCArray.mli b/src/core/CCArray.mli index dd87dd40..71853a1e 100644 --- a/src/core/CCArray.mli +++ b/src/core/CCArray.mli @@ -166,6 +166,10 @@ val except_idx : 'a t -> int -> 'a list val (--) : int -> int -> int t (** Range array *) +val (--^) : int -> int -> int t +(** Range array, excluding right bound + @since NEXT_RELEASE *) + val random : 'a random_gen -> 'a t random_gen val random_non_empty : 'a random_gen -> 'a t random_gen val random_len : int -> 'a random_gen -> 'a t random_gen diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 5d926df0..09a7d067 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -763,11 +763,18 @@ let range' i j = let (--) = range +let (--^) = range' + (*$T append (range 0 100) (range 101 1000) = range 0 1000 append (range 1000 501) (range 500 0) = range 1000 0 *) +(*$Q + Q.(pair small_int small_int) (fun (a,b) -> \ + let l = (a--^b) in not (List.mem b l)) +*) + let replicate i x = let rec aux acc i = if i = 0 then acc @@ -1103,6 +1110,7 @@ module Infix = struct let (<$>) = (<$>) let (>>=) = (>>=) let (--) = (--) + let (--^) = (--^) end (** {2 IO} *) diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 8a9afb25..5da90920 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -263,6 +263,10 @@ val range' : int -> int -> int t val (--) : int -> int -> int t (** Infix alias for [range] *) +val (--^) : int -> int -> int t +(** Infix alias for [range'] + @since NEXT_RELEASE *) + val replicate : int -> 'a -> 'a t (** Replicate the given element [n] times *) @@ -482,6 +486,9 @@ module Infix : sig val (<$>) : ('a -> 'b) -> 'a t -> 'b t val (>>=) : 'a t -> ('a -> 'b t) -> 'b t val (--) : int -> int -> int t + + val (--^) : int -> int -> int t + (** @since NEXT_RELEASE *) end (** {2 IO} *) diff --git a/src/core/CCVector.ml b/src/core/CCVector.ml index 6eb571e0..0fce2699 100644 --- a/src/core/CCVector.ml +++ b/src/core/CCVector.ml @@ -631,12 +631,28 @@ let (--) i j = then init (i-j+1) (fun k -> i-k) else init (j-i+1) (fun k -> i+k) +(*$Q + Q.(pair small_int small_int) (fun (a,b) -> \ + (a -- b) |> to_list = CCList.(a -- b)) +*) + +let (--^) i j = + if i=j then create() + else if i>j + then init (i-j) (fun k -> i-k) + else init (j-i) (fun k -> i+k) + (*$T (1 -- 4) |> to_list = [1;2;3;4] (4 -- 1) |> to_list = [4;3;2;1] (0 -- 0) |> to_list = [0] *) +(*$Q + Q.(pair small_int small_int) (fun (a,b) -> \ + (a --^ b) |> to_list = CCList.(a --^ b)) +*) + let of_array a = if Array.length a = 0 then create () diff --git a/src/core/CCVector.mli b/src/core/CCVector.mli index ea9088d9..10b5c17d 100644 --- a/src/core/CCVector.mli +++ b/src/core/CCVector.mli @@ -237,6 +237,11 @@ val (--) : int -> int -> (int, 'mut) t therefore the result is never empty). Example: [1 -- 10] returns the vector [[1;2;3;4;5;6;7;8;9;10]] *) +val (--^) : int -> int -> (int, 'mut) t +(** Range of integers, either ascending or descending, but excluding right., + Example: [1 --^ 10] returns the vector [[1;2;3;4;5;6;7;8;9]] + @since NEXT_RELEASE *) + val of_array : 'a array -> ('a, 'mut) t val of_list : 'a list -> ('a, 'mut) t val to_array : ('a,_) t -> 'a array From cbe060fd036dbfc754cfd35cd3aaccfc927b2461 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:08:07 +0100 Subject: [PATCH 22/30] headers --- src/unix/CCUnix.ml | 24 +----------------------- src/unix/CCUnix.mli | 24 +----------------------- 2 files changed, 2 insertions(+), 46 deletions(-) diff --git a/src/unix/CCUnix.ml b/src/unix/CCUnix.ml index 09ee3022..7a9e9e02 100644 --- a/src/unix/CCUnix.ml +++ b/src/unix/CCUnix.ml @@ -1,27 +1,5 @@ -(* -copyright (c) 2013-2015, simon cruanes -all rights reserved. -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 High-level Functions on top of Unix} *) diff --git a/src/unix/CCUnix.mli b/src/unix/CCUnix.mli index 82b29502..8bcf017c 100644 --- a/src/unix/CCUnix.mli +++ b/src/unix/CCUnix.mli @@ -1,27 +1,5 @@ -(* -copyright (c) 2013-2015, simon cruanes -all rights reserved. -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) +(* This file is free software, part of containers. See file "license" for more details. *) (** {1 High-level Functions on top of Unix} From 0d2fc07e523ecd5da2e57f12697865385ee5c35b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:08:20 +0100 Subject: [PATCH 23/30] move tests --- src/core/CCVector.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/CCVector.ml b/src/core/CCVector.ml index 0fce2699..d1290cf5 100644 --- a/src/core/CCVector.ml +++ b/src/core/CCVector.ml @@ -631,6 +631,12 @@ let (--) i j = then init (i-j+1) (fun k -> i-k) else init (j-i+1) (fun k -> i+k) +(*$T + (1 -- 4) |> to_list = [1;2;3;4] + (4 -- 1) |> to_list = [4;3;2;1] + (0 -- 0) |> to_list = [0] +*) + (*$Q Q.(pair small_int small_int) (fun (a,b) -> \ (a -- b) |> to_list = CCList.(a -- b)) @@ -642,12 +648,6 @@ let (--^) i j = then init (i-j) (fun k -> i-k) else init (j-i) (fun k -> i+k) -(*$T - (1 -- 4) |> to_list = [1;2;3;4] - (4 -- 1) |> to_list = [4;3;2;1] - (0 -- 0) |> to_list = [0] -*) - (*$Q Q.(pair small_int small_int) (fun (a,b) -> \ (a --^ b) |> to_list = CCList.(a --^ b)) From 03350031a3d4c1cbeeab3d356aff9f463a9457c1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:21:20 +0100 Subject: [PATCH 24/30] small cleanup --- src/core/CCIO.ml | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/core/CCIO.ml b/src/core/CCIO.ml index 5de7ed60..b3c1231a 100644 --- a/src/core/CCIO.ml +++ b/src/core/CCIO.ml @@ -49,15 +49,18 @@ let gen_flat_map f next_elem = in next +let finally_ f x ~h = + try + let res = f x in + h x; + res + with e -> + h x; + raise e + let with_in ?(mode=0o644) ?(flags=[Open_text]) filename f = let ic = open_in_gen (Open_rdonly::flags) mode filename in - try - let x = f ic in - close_in ic; - x - with e -> - close_in ic; - raise e + finally_ f ic ~h:close_in let read_chunks ?(size=1024) ic = let buf = Bytes.create size in @@ -139,13 +142,7 @@ let read_all ?(size=1024) ic = read_all_ ~op:Ret_string ~size ic let with_out ?(mode=0o644) ?(flags=[Open_creat; Open_trunc; Open_text]) filename f = let oc = open_out_gen (Open_wronly::flags) mode filename in - try - let x = f oc in - close_out oc; - x - with e -> - close_out oc; - raise e + finally_ f oc ~h:close_out let with_out_a ?mode ?(flags=[]) filename f = with_out ?mode ~flags:(Open_wronly::Open_creat::Open_append::flags) filename f @@ -323,8 +320,8 @@ module File = struct gen_filter_map (function | `File, f -> Some f - | `Dir, _ -> None - ) (walk d) + | `Dir, _ -> None) + (walk d) else read_dir_base d let show_walk_item (i,f) = From 13dad5b6ac7197dbcdbb9daa86937e184dc11659 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:22:25 +0100 Subject: [PATCH 25/30] add `CCIO.File.with_temp` for creating temporary files --- src/core/CCIO.ml | 4 ++++ src/core/CCIO.mli | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/core/CCIO.ml b/src/core/CCIO.ml index b3c1231a..b8c12cca 100644 --- a/src/core/CCIO.ml +++ b/src/core/CCIO.ml @@ -329,4 +329,8 @@ module File = struct | `File -> "file:" | `Dir -> "dir:" ) ^ f + + let with_temp ?temp_dir ~prefix ~suffix f = + let name = Filename.temp_file ?temp_dir prefix suffix in + finally_ f name ~h:remove_noerr end diff --git a/src/core/CCIO.mli b/src/core/CCIO.mli index 92e6a119..98134e4d 100644 --- a/src/core/CCIO.mli +++ b/src/core/CCIO.mli @@ -195,4 +195,14 @@ module File : sig symlinks, etc.) *) val show_walk_item : walk_item -> string + + val with_temp : + ?temp_dir:string -> prefix:string -> suffix:string -> + (string -> 'a) -> 'a + (** [with_temp ~prefix ~suffix f] will call [f] with the name of a new + temporary file (located in [temp_dir]). + After [f] returns, the file is deleted. Best to be used in + combination with {!with_out}. + See {!Filename.temp_file} + @since NEXT_RELEASE *) end From eea9d8139e5f10eab037dfeb9e4688d0c0f5c38d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 26 Mar 2016 12:31:00 +0100 Subject: [PATCH 26/30] additional test for CCParse (using temp file) --- src/string/CCParse.ml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/string/CCParse.ml b/src/string/CCParse.ml index ab1235a5..db34c1ec 100644 --- a/src/string/CCParse.ml +++ b/src/string/CCParse.ml @@ -121,6 +121,31 @@ exception ParseError of line_num * col_num * (unit -> string) *) +(* test with a temporary file *) +(*$R + let test n = + let p = CCParse.(U.list ~sep:"," U.int) in + + let l = CCList.(1 -- n) in + let l' = + CCIO.File.with_temp ~temp_dir:"/tmp/" + ~prefix:"containers_test" ~suffix:"" + (fun name -> + (* write test into file *) + CCIO.with_out name + (fun oc -> + let fmt = Format.formatter_of_out_channel oc in + Format.fprintf fmt "@[%a@]@." + (CCList.print ~sep:"," ~start:"[" ~stop:"]" CCInt.print) l); + (* parse it back *) + CCParse.parse_file_exn ~size:1024 ~file:name ~p) + in + assert_equal ~printer:Q.Print.(list int) l l' + in + test 100_000; + test 400_000; +*) + let const_ x () = x let input_of_string s = From 22b001c60090ec68a54d4af074dbc578a89bdc09 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 29 Mar 2016 11:52:16 +0200 Subject: [PATCH 27/30] add `CCList.fold_filter_map` --- src/core/CCList.ml | 15 +++++++++++++++ src/core/CCList.mli | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 09a7d067..1416e5d0 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -174,6 +174,21 @@ let fold_map2 f acc l1 l2 = with Invalid_argument _ -> true) *) +let fold_filter_map f acc l = + let rec aux f acc map_acc l = match l with + | [] -> acc, List.rev map_acc + | x :: l' -> + let acc, y = f acc x in + aux f acc (cons_maybe y map_acc) l' + in + aux f acc [] l + +(*$= & ~printer:Q.Print.(pair int (list int)) + (List.fold_left (+) 0 (1--10), [2;4;6;8;10]) \ + (fold_filter_map (fun acc x -> acc+x, if x mod 2 = 0 then Some x else None) \ + 0 (1--10)) +*) + let fold_flat_map f acc l = let rec aux f acc map_acc l = match l with | [] -> acc, List.rev map_acc diff --git a/src/core/CCList.mli b/src/core/CCList.mli index 5da90920..a3cacac1 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -53,6 +53,11 @@ val fold_map2 : ('acc -> 'a -> 'b -> 'acc * 'c) -> 'acc -> 'a list -> 'b list -> @raise Invalid_argument if the lists do not have the same length @since 0.16 *) +val fold_filter_map : ('acc -> 'a -> 'acc * 'b option) -> 'acc -> 'a list -> 'acc * 'b list +(** [fold_filter_map f acc l] is a [fold_left]-like function, but also + generates a list of output in a way similar to {!filter_map} + @since NEXT_RELEASE *) + val fold_flat_map : ('acc -> 'a -> 'acc * 'b list) -> 'acc -> 'a list -> 'acc * 'b list (** [fold_flat_map f acc l] is a [fold_left]-like function, but it also maps the list to a list of lists that is then [flatten]'d.. From 2e5a360bcd349614cc45b50a8aff537de2887342 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 3 Apr 2016 00:01:53 +0200 Subject: [PATCH 28/30] fix for qtest 2.2 --- benchs/run_benchs.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index e22b7ce5..beb35bde 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -1159,8 +1159,8 @@ module Str = struct let rand_str_ ?(among="abcdefgh") n = let module Q = QCheck in let st = Random.State.make [| n + 17 |] in - let gen_c = Q.Gen.oneofl (CCString.to_list among) in - Q.Gen.string_size ~gen:gen_c (Q.Gen.return n) st + let gen_c = QCheck.Gen.oneofl (CCString.to_list among) in + QCheck.Gen.string_size ~gen:gen_c (QCheck.Gen.return n) st let find ?(start=0) ~sub s = let n = String.length sub in From 4bb65a67df93e97de7561ce4973bbe929f09c7d7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 5 Apr 2016 14:25:30 +0200 Subject: [PATCH 29/30] more benchs --- benchs/run_benchs.ml | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index beb35bde..0c7323bd 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -42,14 +42,24 @@ module L = struct else if x mod 5 = 1 then [x;x+1] else [x;x+1;x+2;x+3] + let f_ral_ x = + if x mod 10 = 0 then CCRAL.empty + else if x mod 5 = 1 then CCRAL.of_list [x;x+1] + else CCRAL.of_list [x;x+1;x+2;x+3] + let bench_flat_map ?(time=2) n = let l = CCList.(1 -- n) in - let flatten_map_ l = List.flatten (CCList.map f_ l) - and flatten_ccmap_ l = List.flatten (List.map f_ l) in + let ral = CCRAL.of_list l in + let flatten_map_ l () = ignore @@ List.flatten (CCList.map f_ l) + and flatmap l () = ignore @@ CCList.flat_map f_ l + and flatten_ccmap_ l () = ignore @@ List.flatten (List.map f_ l) + and flatmap_ral_ l () = ignore @@ CCRAL.flat_map f_ral_ l + in B.throughputN time ~repeat - [ "flat_map", CCList.flat_map f_, l - ; "flatten o CCList.map", flatten_ccmap_, l - ; "flatten o map", flatten_map_, l + [ "flat_map", flatmap l, () + ; "flatten o CCList.map", flatten_ccmap_ l, () + ; "flatten o map", flatten_map_ l, () + ; "ral_flatmap", flatmap_ral_ ral, () ] (* APPEND *) @@ -87,6 +97,21 @@ module L = struct ; "CCList.(fold_right append)", cc_fold_right_append_, l ] + (* RANDOM ACCESS *) + + let bench_nth ?(time=2) n = + let l = CCList.(1 -- n) in + let ral = CCRAL.of_list l in + let bench_list l () = + for i = 0 to n-1 do ignore (List.nth l i) done + and bench_ral l () = + for i = 0 to n-1 do ignore (CCRAL.get_exn l i) done + in + B.throughputN time ~repeat + [ "List.nth", bench_list l, () + ; "RAL.get", bench_ral ral, () + ] + (* MAIN *) let () = B.Tree.register ( @@ -112,6 +137,11 @@ module L = struct [ app_int (bench_append ~time:2) 100 ; app_int (bench_append ~time:2) 10_000 ; app_int (bench_append ~time:4) 100_000] + ; "nth" @>> + B.Tree.concat + [ app_int (bench_nth ~time:2) 100 + ; app_int (bench_nth ~time:2) 10_000 + ; app_int (bench_nth ~time:4) 100_000] ] ) end From f8bb365c94a94135a87a9525aba7a40cca28499e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 5 Apr 2016 14:38:49 +0200 Subject: [PATCH 30/30] implement `CCString.{drop,take,chop_prefix,chop_suffix,filter,filter_map}` --- src/core/CCString.cppo.ml | 37 ++++++++++++++++++++++++ src/core/CCString.mli | 59 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 155892eb..b476f92f 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -424,6 +424,27 @@ let suffix ~suf s = !i = String.length suf ) +let take n s = + if n < String.length s + then String.sub s 0 n + else s + +let drop n s = + if n < String.length s + then String.sub s n (String.length s - n) + else "" + +let take_drop n s = take n s, drop n s + +let chop_suffix ~suf s = + if suffix ~suf s + then Some (String.sub s 0 (String.length s-String.length suf)) + else None + +let chop_prefix ~pre s = + if prefix ~pre s + then Some (String.sub s (String.length pre) (String.length s-String.length pre)) + else None let blit = String.blit @@ -547,6 +568,22 @@ let mapi f s = init (length s) (fun i -> f i s.[i]) #endif +let filter_map f s = + let buf = Buffer.create (String.length s) in + iter + (fun c -> match f c with + | None -> () + | Some c' -> Buffer.add_char buf c') + s; + Buffer.contents buf + +let filter f s = + let buf = Buffer.create (String.length s) in + iter + (fun c -> if f c then Buffer.add_char buf c) + s; + Buffer.contents buf + let flat_map ?sep f s = let buf = Buffer.create (String.length s) in iteri diff --git a/src/core/CCString.mli b/src/core/CCString.mli index 6720bf1c..9cf809ba 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -219,6 +219,46 @@ val suffix : suf:string -> string -> bool not (suffix ~suf:"abcd" "cd") *) +val chop_prefix : pre:string -> string -> string option +(** [chop_pref ~pre s] removes [pre] from [s] if [pre] really is a prefix + of [s], returns [None] otherwise + @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.(option string) + (Some "cd") (chop_prefix ~pre:"aab" "aabcd") + None (chop_prefix ~pre:"ab" "aabcd") + None (chop_prefix ~pre:"abcd" "abc") +*) + +val chop_suffix : suf:string -> string -> string option +(** [chop_suffix ~suf s] removes [suf] from [s] if [suf] really is a suffix + of [s], returns [None] otherwise + @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.(option string) + (Some "ab") (chop_suffix ~suf:"cd" "abcd") + None (chop_suffix ~suf:"cd" "abcde") + None (chop_suffix ~suf:"abcd" "cd") +*) + +val take : int -> string -> string +(** [take n s] keeps only the [n] first chars of [s] + @since NEXT_RELEASE *) + +val drop : int -> string -> string +(** [drop n s] removes the [n] first chars of [s] + @since NEXT_RELEASE *) + +val take_drop : int -> string -> string * string +(** [take_drop n s = take n s, drop n s] + @since NEXT_RELEASE *) + +(*$= + ("ab", "cd") (take_drop 2 "abcd") + ("abc", "") (take_drop 3 "abc") + ("abc", "") (take_drop 5 "abc") +*) + val lines : string -> string list (** [lines s] returns a list of the lines of [s] (splits along '\n') @since 0.10 *) @@ -272,6 +312,25 @@ val mapi : (int -> char -> char) -> string -> string (** Map chars with their index @since 0.12 *) +val filter_map : (char -> char option) -> string -> string +(** @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.string + "bcef" (filter_map \ + (function 'c' -> None | c -> Some (Char.chr (Char.code c + 1))) "abcde") +*) + +val filter : (char -> bool) -> string -> string +(** @since NEXT_RELEASE *) + +(*$= & ~printer:Q.Print.string + "abde" (filter (function 'c' -> false | _ -> true) "abcdec") +*) + +(*$Q + Q.printable_string (fun s -> filter (fun _ -> true) s = s) +*) + val flat_map : ?sep:string -> (char -> string) -> string -> string (** Map each chars to a string, then concatenates them all @param sep optional separator between each generated string