mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-08 04:05:30 -05:00
functor interface to Levenshtein automaton/index
This commit is contained in:
parent
c5473857f8
commit
b6310ae17d
2 changed files with 576 additions and 549 deletions
1007
levenshtein.ml
1007
levenshtein.ml
File diff suppressed because it is too large
Load diff
118
levenshtein.mli
118
levenshtein.mli
|
|
@ -31,34 +31,19 @@ We take inspiration from
|
||||||
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
|
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
|
||||||
for the main algorithm and ideas. However some parts are adapted *)
|
for the main algorithm and ideas. However some parts are adapted *)
|
||||||
|
|
||||||
(** {2 Automaton} *)
|
(** {2 Abstraction over Strings} *)
|
||||||
|
|
||||||
type 'a automaton
|
module type STRING = sig
|
||||||
(** Levenshtein automaton for characters of type 'a *)
|
type char_
|
||||||
|
type t
|
||||||
|
|
||||||
val of_array : ?compare:('a -> 'a -> int) -> limit:int -> 'a array -> 'a automaton
|
val of_list : char_ list -> t
|
||||||
(** Build an automaton from an array, with a maximal distance [limit] *)
|
val get : t -> int -> char_
|
||||||
|
val length : t -> int
|
||||||
|
val compare_char : char_ -> char_ -> int
|
||||||
|
end
|
||||||
|
|
||||||
val of_list : ?compare:('a -> 'a -> int) -> limit:int -> 'a list -> 'a automaton
|
(** {2 Continuation list} *)
|
||||||
(** Build an automaton from a list, with a maximal distance [limit] *)
|
|
||||||
|
|
||||||
val of_string : limit:int -> string -> char automaton
|
|
||||||
(** Automaton for the special case of strings *)
|
|
||||||
|
|
||||||
val debug_print : out_channel -> char automaton -> unit
|
|
||||||
(** Output the automaton on the given channel. Only for string automata. *)
|
|
||||||
|
|
||||||
val match_with : 'a automaton -> 'a array -> bool
|
|
||||||
(** [match_with a s] matches the string [s] against [a], and returns
|
|
||||||
[true] if the distance from [s] to the word represented by [a] is smaller
|
|
||||||
than the limit used to build [a] *)
|
|
||||||
|
|
||||||
val match_with_string : char automaton -> string -> bool
|
|
||||||
(** Specialized version of {!match_with} for strings *)
|
|
||||||
|
|
||||||
(** {6 Index for one-to-many matching} *)
|
|
||||||
|
|
||||||
(** Continuation list *)
|
|
||||||
type 'a klist =
|
type 'a klist =
|
||||||
[
|
[
|
||||||
| `Nil
|
| `Nil
|
||||||
|
|
@ -68,49 +53,66 @@ type 'a klist =
|
||||||
val klist_to_list : 'a klist -> 'a list
|
val klist_to_list : 'a klist -> 'a list
|
||||||
(** Helper. *)
|
(** Helper. *)
|
||||||
|
|
||||||
module Index(X : Map.OrderedType) : sig
|
(** {2 Signature} *)
|
||||||
type key = X.t
|
|
||||||
|
|
||||||
type 'b t
|
module type S = sig
|
||||||
(** Index that maps [key] strings to values of type 'b. Internally it is
|
type char_
|
||||||
based on a trie. *)
|
type string_
|
||||||
|
|
||||||
val empty : 'b t
|
(** {6 Automaton} *)
|
||||||
(** Empty index *)
|
|
||||||
|
|
||||||
val is_empty : _ t -> bool
|
type automaton
|
||||||
|
(** Levenshtein automaton *)
|
||||||
|
|
||||||
val add : 'b t -> key array -> 'b -> 'b t
|
val of_string : limit:int -> string_ -> automaton
|
||||||
(** Add a char array to the index. If a value was already present
|
(** Build an automaton from an array, with a maximal distance [limit] *)
|
||||||
for this array it is replaced. *)
|
|
||||||
|
|
||||||
val remove : 'b t -> key array -> 'b -> 'b t
|
val of_list : limit:int -> char_ list -> automaton
|
||||||
(** Remove a char array from the index. *)
|
(** Build an automaton from a list, with a maximal distance [limit] *)
|
||||||
|
|
||||||
val retrieve : limit:int -> 'b t -> key array -> 'b klist
|
val debug_print : (out_channel -> char_ -> unit) ->
|
||||||
(** Lazy list of objects associated to strings close to
|
out_channel -> automaton -> unit
|
||||||
the query string *)
|
(** Output the automaton on the given channel. *)
|
||||||
|
|
||||||
val of_list : (key array * 'b) list -> 'b t
|
val match_with : automaton -> string_ -> bool
|
||||||
|
(** [match_with a s] matches the string [s] against [a], and returns
|
||||||
|
[true] if the distance from [s] to the word represented by [a] is smaller
|
||||||
|
than the limit used to build [a] *)
|
||||||
|
|
||||||
val to_list : 'b t -> (key array * 'b) list
|
(** {6 Index for one-to-many matching} *)
|
||||||
|
|
||||||
(* TODO sequence/iteration functions *)
|
module Index : sig
|
||||||
|
type 'b t
|
||||||
|
(** Index that maps strings to values of type 'b. Internally it is
|
||||||
|
based on a trie. *)
|
||||||
|
|
||||||
|
val empty : 'b t
|
||||||
|
(** Empty index *)
|
||||||
|
|
||||||
|
val is_empty : _ t -> bool
|
||||||
|
|
||||||
|
val add : 'b t -> string_ -> 'b -> 'b t
|
||||||
|
(** Add a char array to the index. If a value was already present
|
||||||
|
for this array it is replaced. *)
|
||||||
|
|
||||||
|
val remove : 'b t -> string_ -> 'b -> 'b t
|
||||||
|
(** Remove a string from the index. *)
|
||||||
|
|
||||||
|
val retrieve : limit:int -> 'b t -> string_ -> 'b klist
|
||||||
|
(** Lazy list of objects associated to strings close to the query string *)
|
||||||
|
|
||||||
|
val of_list : (string_ * 'b) list -> 'b t
|
||||||
|
|
||||||
|
val to_list : 'b t -> (string_ * 'b) list
|
||||||
|
|
||||||
|
(* TODO sequence/iteration functions *)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
(** Specific case for strings *)
|
module Make(Str : STRING) : S
|
||||||
module StrIndex : sig
|
with type string_ = Str.t
|
||||||
include module type of Index(Char)
|
and type char_ = Str.char_
|
||||||
|
|
||||||
val add_string : 'b t -> string -> 'b -> 'b t
|
include S with type char_ = char and type string_ = string
|
||||||
(** Add a string to a char index *)
|
|
||||||
|
|
||||||
val remove_string : 'b t -> string -> 'b -> 'b t
|
val debug_print : out_channel -> automaton -> unit
|
||||||
(** Remove a string from a char index *)
|
|
||||||
|
|
||||||
val retrieve_string : limit:int -> 'b t -> string -> 'b klist
|
|
||||||
|
|
||||||
val of_str_list : (string * 'b) list -> 'b t
|
|
||||||
|
|
||||||
val to_str_list : 'b t -> (string * 'b) list
|
|
||||||
end
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue