diff --git a/KMP.ml b/KMP.ml new file mode 100644 index 00000000..dafecace --- /dev/null +++ b/KMP.ml @@ -0,0 +1,158 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Knuth-Morris-Pratt} *) + +module type STRING = sig + type t + type char + + val length : t -> int + val get : t -> int -> char + val char_equal : char -> char -> bool +end + +type 'a gen = unit -> 'a option + +module type S = sig + type string + + type pattern + (** Compiled pattern (needle: string to search in another string) *) + + val compile : string -> pattern + (** Compile a string into a pattern *) + + val find : pattern:pattern -> string -> int -> int option + (** [find ~pattern s i] finds the next occurrence of [pattern] + in [s] starting at offset [i], and returns it, + or returns [None] if the pattern doesn't occur. *) + + val search : pattern:pattern -> string -> int option + (** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *) + + val find_all : pattern:pattern -> string -> int -> int gen + (** Generator on all occurrences of the pattern *) + + (** {6 One-shot functions that compile the pattern on-the-fly} *) + + val search' : pattern:string -> string -> int option + + val find_all' : pattern:string -> string -> int gen +end + +module Make(Str : STRING) = struct + type string = Str.t + type pattern = { + failure : int array; + str : Str.t; + len : int; (* = length str = length failure *) + } + + let compile str = + let len = Str.length str in + match len with + | 0 -> {failure=[| |]; len; str;} + | 1 -> {failure=[| -1 |]; len; str;} + | _ -> + (* at least 2 elements, the algorithm can work *) + let failure = Array.make len 0 in + (* i: current index in str *) + let i = ref 1 in + (* j: index of candidate substring *) + let j = ref 0 in + while !i < len-1 do + match !j with + | _ when Str.char_equal (Str.get str !i) (Str.get str !j) -> + (* substring starting at !j continues matching current char *) + i := !i+1; + j := !j+1; + failure.(!i) <- !j; + | 0 -> + (* back to the beginning *) + i := !i+1; + failure.(!i) <- 0; + | _ -> + (* fallback for the prefix string *) + assert (!j > 0); + j := failure.(!j) + done; + { failure; str; len; } + + let find ~pattern s idx = + (* proper search function. + [i] index in [s] + [j] index in [pattern] + [len] length of [s] *) + let len = Str.length s in + let i = ref idx in + let j = ref 0 in + while !i < len && !j < pattern.len do + let c = Str.get s !i in + let expected = Str.get pattern.str !j in + if Str.char_equal c expected + then ( + (* char matches *) + i := !i + 1; j := !j + 1 + ) else + if !j=0 + then (* beginning of the pattern *) + i := !i + 1 + else (* follow the failure link *) + j := pattern.failure.(!j) + done; + if !j = pattern.len + then Some (!i-pattern.len) + else None + + let search ~pattern s = find ~pattern s 0 + + let find_all ~pattern s i = + let i = ref i in + fun () -> + if !i >= Str.length s + then None + else match find ~pattern s !i with + | None -> None + | (Some j) as res -> + i := j + pattern.len; + res + + let search' ~pattern s = + search ~pattern:(compile pattern) s + + let find_all' ~pattern s = + find_all ~pattern:(compile pattern) s 0 +end + +module Default = Make(struct + type char_ = char + type char = char_ + type t = string + let char_equal a b = a=b + let get = String.get + let length = String.length +end) diff --git a/KMP.mli b/KMP.mli new file mode 100644 index 00000000..a2cc09f6 --- /dev/null +++ b/KMP.mli @@ -0,0 +1,69 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Knuth-Morris-Pratt} *) + +module type STRING = sig + type t + type char + + val length : t -> int + val get : t -> int -> char + val char_equal : char -> char -> bool +end + +type 'a gen = unit -> 'a option + +module type S = sig + type string + + type pattern + (** Compiled pattern (needle: string to search in another string) *) + + val compile : string -> pattern + (** Compile a string into a pattern *) + + val find : pattern:pattern -> string -> int -> int option + (** [find ~pattern s i] finds the next occurrence of [pattern] + in [s] starting at offset [i], and returns it, + or returns [None] if the pattern doesn't occur. *) + + val search : pattern:pattern -> string -> int option + (** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *) + + val find_all : pattern:pattern -> string -> int -> int gen + (** Generator on all occurrences of the pattern *) + + (** {6 One-shot functions that compile the pattern on-the-fly} *) + + val search' : pattern:string -> string -> int option + + val find_all' : pattern:string -> string -> int gen +end + +module Make(Str : STRING) : S with type string = Str.t + +module Default : S with type string = string diff --git a/_oasis b/_oasis index c8dcef11..efd56b7d 100644 --- a/_oasis +++ b/_oasis @@ -41,7 +41,7 @@ Library "containers" UnionFind, SmallSet, Leftistheap, AbsSet, CSM, MultiMap, ActionMan, BV, QCheck, BencodeOnDisk, Show, TTree, HGraph, Automaton, Conv, Levenshtein, Bidir, Iteratee, - Ty, Tell, BencodeStream, RatTerm, Cause + Ty, Tell, BencodeStream, RatTerm, Cause, KMP BuildDepends: unix Library "containers_thread" diff --git a/_tags b/_tags index b8d7a0a7..a0ddaf26 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: a253a3102af478e2c2a6c4a7d330a848) +# DO NOT EDIT (digest: 13877dc814f0b2dee886bafc27842dfc) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -62,6 +62,7 @@ "bencodeStream.cmx": for-pack(Containers) "ratTerm.cmx": for-pack(Containers) "cause.cmx": for-pack(Containers) +"KMP.cmx": for-pack(Containers) # Library containers_thread "threads/containers_thread.cmxs": use_containers_thread : package(threads) diff --git a/containers.mlpack b/containers.mlpack index 3790fe30..d5f309a5 100644 --- a/containers.mlpack +++ b/containers.mlpack @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 9824af535a520376fdb1b1ac58c878c9) +# DO NOT EDIT (digest: 404fe51c40218ed7bf430446cec5efde) Cache Deque Gen @@ -47,4 +47,5 @@ Tell BencodeStream RatTerm Cause +KMP # OASIS_STOP diff --git a/containers.odocl b/containers.odocl index 3790fe30..d5f309a5 100644 --- a/containers.odocl +++ b/containers.odocl @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: 9824af535a520376fdb1b1ac58c878c9) +# DO NOT EDIT (digest: 404fe51c40218ed7bf430446cec5efde) Cache Deque Gen @@ -47,4 +47,5 @@ Tell BencodeStream RatTerm Cause +KMP # OASIS_STOP diff --git a/setup.ml b/setup.ml index b2614038..9a2d285c 100644 --- a/setup.ml +++ b/setup.ml @@ -1,7 +1,7 @@ (* setup.ml generated for the first time by OASIS v0.3.0 *) (* OASIS_START *) -(* DO NOT EDIT (digest: d81d54173b4f70a25124ba24fef82bc2) *) +(* DO NOT EDIT (digest: b545212f57a5e5b473eeea4866484af0) *) (* Regenerated by OASIS v0.4.4 Visit http://oasis.forge.ocamlcore.org for more information and @@ -7005,7 +7005,8 @@ let setup_t = "Tell"; "BencodeStream"; "RatTerm"; - "Cause" + "Cause"; + "KMP" ]; lib_pack = true; lib_internal_modules = []; @@ -7392,7 +7393,7 @@ let setup_t = }; oasis_fn = Some "_oasis"; oasis_version = "0.4.4"; - oasis_digest = Some "\242\031\132\250-\201+\tJ\171/\017\158\211\194\168"; + oasis_digest = Some "\228\227\"@\138\216\007\132\190[\139\215a\153B<"; oasis_exec = None; oasis_setup_args = []; setup_update = false @@ -7400,6 +7401,6 @@ let setup_t = let setup () = BaseSetup.setup setup_t;; -# 7404 "setup.ml" +# 7405 "setup.ml" (* OASIS_STOP *) let () = setup ();;