mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 03:35:30 -05:00
KMP algorithm (as a functor on the string type)
This commit is contained in:
parent
23a5dc1756
commit
cd35c46a58
7 changed files with 239 additions and 8 deletions
158
KMP.ml
Normal file
158
KMP.ml
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Knuth-Morris-Pratt} *)
|
||||
|
||||
module type STRING = sig
|
||||
type t
|
||||
type char
|
||||
|
||||
val length : t -> int
|
||||
val get : t -> int -> char
|
||||
val char_equal : char -> char -> bool
|
||||
end
|
||||
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
module type S = sig
|
||||
type string
|
||||
|
||||
type pattern
|
||||
(** Compiled pattern (needle: string to search in another string) *)
|
||||
|
||||
val compile : string -> pattern
|
||||
(** Compile a string into a pattern *)
|
||||
|
||||
val find : pattern:pattern -> string -> int -> int option
|
||||
(** [find ~pattern s i] finds the next occurrence of [pattern]
|
||||
in [s] starting at offset [i], and returns it,
|
||||
or returns [None] if the pattern doesn't occur. *)
|
||||
|
||||
val search : pattern:pattern -> string -> int option
|
||||
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
|
||||
|
||||
val find_all : pattern:pattern -> string -> int -> int gen
|
||||
(** Generator on all occurrences of the pattern *)
|
||||
|
||||
(** {6 One-shot functions that compile the pattern on-the-fly} *)
|
||||
|
||||
val search' : pattern:string -> string -> int option
|
||||
|
||||
val find_all' : pattern:string -> string -> int gen
|
||||
end
|
||||
|
||||
module Make(Str : STRING) = struct
|
||||
type string = Str.t
|
||||
type pattern = {
|
||||
failure : int array;
|
||||
str : Str.t;
|
||||
len : int; (* = length str = length failure *)
|
||||
}
|
||||
|
||||
let compile str =
|
||||
let len = Str.length str in
|
||||
match len with
|
||||
| 0 -> {failure=[| |]; len; str;}
|
||||
| 1 -> {failure=[| -1 |]; len; str;}
|
||||
| _ ->
|
||||
(* at least 2 elements, the algorithm can work *)
|
||||
let failure = Array.make len 0 in
|
||||
(* i: current index in str *)
|
||||
let i = ref 1 in
|
||||
(* j: index of candidate substring *)
|
||||
let j = ref 0 in
|
||||
while !i < len-1 do
|
||||
match !j with
|
||||
| _ when Str.char_equal (Str.get str !i) (Str.get str !j) ->
|
||||
(* substring starting at !j continues matching current char *)
|
||||
i := !i+1;
|
||||
j := !j+1;
|
||||
failure.(!i) <- !j;
|
||||
| 0 ->
|
||||
(* back to the beginning *)
|
||||
i := !i+1;
|
||||
failure.(!i) <- 0;
|
||||
| _ ->
|
||||
(* fallback for the prefix string *)
|
||||
assert (!j > 0);
|
||||
j := failure.(!j)
|
||||
done;
|
||||
{ failure; str; len; }
|
||||
|
||||
let find ~pattern s idx =
|
||||
(* proper search function.
|
||||
[i] index in [s]
|
||||
[j] index in [pattern]
|
||||
[len] length of [s] *)
|
||||
let len = Str.length s in
|
||||
let i = ref idx in
|
||||
let j = ref 0 in
|
||||
while !i < len && !j < pattern.len do
|
||||
let c = Str.get s !i in
|
||||
let expected = Str.get pattern.str !j in
|
||||
if Str.char_equal c expected
|
||||
then (
|
||||
(* char matches *)
|
||||
i := !i + 1; j := !j + 1
|
||||
) else
|
||||
if !j=0
|
||||
then (* beginning of the pattern *)
|
||||
i := !i + 1
|
||||
else (* follow the failure link *)
|
||||
j := pattern.failure.(!j)
|
||||
done;
|
||||
if !j = pattern.len
|
||||
then Some (!i-pattern.len)
|
||||
else None
|
||||
|
||||
let search ~pattern s = find ~pattern s 0
|
||||
|
||||
let find_all ~pattern s i =
|
||||
let i = ref i in
|
||||
fun () ->
|
||||
if !i >= Str.length s
|
||||
then None
|
||||
else match find ~pattern s !i with
|
||||
| None -> None
|
||||
| (Some j) as res ->
|
||||
i := j + pattern.len;
|
||||
res
|
||||
|
||||
let search' ~pattern s =
|
||||
search ~pattern:(compile pattern) s
|
||||
|
||||
let find_all' ~pattern s =
|
||||
find_all ~pattern:(compile pattern) s 0
|
||||
end
|
||||
|
||||
module Default = Make(struct
|
||||
type char_ = char
|
||||
type char = char_
|
||||
type t = string
|
||||
let char_equal a b = a=b
|
||||
let get = String.get
|
||||
let length = String.length
|
||||
end)
|
||||
69
KMP.mli
Normal file
69
KMP.mli
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013-2014, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Knuth-Morris-Pratt} *)
|
||||
|
||||
module type STRING = sig
|
||||
type t
|
||||
type char
|
||||
|
||||
val length : t -> int
|
||||
val get : t -> int -> char
|
||||
val char_equal : char -> char -> bool
|
||||
end
|
||||
|
||||
type 'a gen = unit -> 'a option
|
||||
|
||||
module type S = sig
|
||||
type string
|
||||
|
||||
type pattern
|
||||
(** Compiled pattern (needle: string to search in another string) *)
|
||||
|
||||
val compile : string -> pattern
|
||||
(** Compile a string into a pattern *)
|
||||
|
||||
val find : pattern:pattern -> string -> int -> int option
|
||||
(** [find ~pattern s i] finds the next occurrence of [pattern]
|
||||
in [s] starting at offset [i], and returns it,
|
||||
or returns [None] if the pattern doesn't occur. *)
|
||||
|
||||
val search : pattern:pattern -> string -> int option
|
||||
(** [search ~pattern s] is a shortcut for [find ~pattern s 0]. *)
|
||||
|
||||
val find_all : pattern:pattern -> string -> int -> int gen
|
||||
(** Generator on all occurrences of the pattern *)
|
||||
|
||||
(** {6 One-shot functions that compile the pattern on-the-fly} *)
|
||||
|
||||
val search' : pattern:string -> string -> int option
|
||||
|
||||
val find_all' : pattern:string -> string -> int gen
|
||||
end
|
||||
|
||||
module Make(Str : STRING) : S with type string = Str.t
|
||||
|
||||
module Default : S with type string = string
|
||||
2
_oasis
2
_oasis
|
|
@ -41,7 +41,7 @@ Library "containers"
|
|||
UnionFind, SmallSet, Leftistheap, AbsSet, CSM, MultiMap,
|
||||
ActionMan, BV, QCheck, BencodeOnDisk, Show, TTree,
|
||||
HGraph, Automaton, Conv, Levenshtein, Bidir, Iteratee,
|
||||
Ty, Tell, BencodeStream, RatTerm, Cause
|
||||
Ty, Tell, BencodeStream, RatTerm, Cause, KMP
|
||||
BuildDepends: unix
|
||||
|
||||
Library "containers_thread"
|
||||
|
|
|
|||
3
_tags
3
_tags
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: a253a3102af478e2c2a6c4a7d330a848)
|
||||
# DO NOT EDIT (digest: 13877dc814f0b2dee886bafc27842dfc)
|
||||
# Ignore VCS directories, you can use the same kind of rule outside
|
||||
# OASIS_START/STOP if you want to exclude directories that contains
|
||||
# useless stuff for the build process
|
||||
|
|
@ -62,6 +62,7 @@
|
|||
"bencodeStream.cmx": for-pack(Containers)
|
||||
"ratTerm.cmx": for-pack(Containers)
|
||||
"cause.cmx": for-pack(Containers)
|
||||
"KMP.cmx": for-pack(Containers)
|
||||
# Library containers_thread
|
||||
"threads/containers_thread.cmxs": use_containers_thread
|
||||
<threads/*.ml{,i}>: package(threads)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: 9824af535a520376fdb1b1ac58c878c9)
|
||||
# DO NOT EDIT (digest: 404fe51c40218ed7bf430446cec5efde)
|
||||
Cache
|
||||
Deque
|
||||
Gen
|
||||
|
|
@ -47,4 +47,5 @@ Tell
|
|||
BencodeStream
|
||||
RatTerm
|
||||
Cause
|
||||
KMP
|
||||
# OASIS_STOP
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: 9824af535a520376fdb1b1ac58c878c9)
|
||||
# DO NOT EDIT (digest: 404fe51c40218ed7bf430446cec5efde)
|
||||
Cache
|
||||
Deque
|
||||
Gen
|
||||
|
|
@ -47,4 +47,5 @@ Tell
|
|||
BencodeStream
|
||||
RatTerm
|
||||
Cause
|
||||
KMP
|
||||
# OASIS_STOP
|
||||
|
|
|
|||
9
setup.ml
9
setup.ml
|
|
@ -1,7 +1,7 @@
|
|||
(* setup.ml generated for the first time by OASIS v0.3.0 *)
|
||||
|
||||
(* OASIS_START *)
|
||||
(* DO NOT EDIT (digest: d81d54173b4f70a25124ba24fef82bc2) *)
|
||||
(* DO NOT EDIT (digest: b545212f57a5e5b473eeea4866484af0) *)
|
||||
(*
|
||||
Regenerated by OASIS v0.4.4
|
||||
Visit http://oasis.forge.ocamlcore.org for more information and
|
||||
|
|
@ -7005,7 +7005,8 @@ let setup_t =
|
|||
"Tell";
|
||||
"BencodeStream";
|
||||
"RatTerm";
|
||||
"Cause"
|
||||
"Cause";
|
||||
"KMP"
|
||||
];
|
||||
lib_pack = true;
|
||||
lib_internal_modules = [];
|
||||
|
|
@ -7392,7 +7393,7 @@ let setup_t =
|
|||
};
|
||||
oasis_fn = Some "_oasis";
|
||||
oasis_version = "0.4.4";
|
||||
oasis_digest = Some "\242\031\132\250-\201+\tJ\171/\017\158\211\194\168";
|
||||
oasis_digest = Some "\228\227\"@\138\216\007\132\190[\139\215a\153B<";
|
||||
oasis_exec = None;
|
||||
oasis_setup_args = [];
|
||||
setup_update = false
|
||||
|
|
@ -7400,6 +7401,6 @@ let setup_t =
|
|||
|
||||
let setup () = BaseSetup.setup setup_t;;
|
||||
|
||||
# 7404 "setup.ml"
|
||||
# 7405 "setup.ml"
|
||||
(* OASIS_STOP *)
|
||||
let () = setup ();;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue