mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
bugfixes and improved API (not implemented yet) for Levenshtein;
some bugs remain in DFA construction
This commit is contained in:
parent
387ec86560
commit
ea92877a0a
2 changed files with 223 additions and 81 deletions
255
levenshtein.ml
255
levenshtein.ml
|
|
@ -46,11 +46,21 @@ module NDA = struct
|
||||||
|
|
||||||
let get_compare nda = nda.compare
|
let get_compare nda = nda.compare
|
||||||
|
|
||||||
|
let rec mem_tr ~compare tr l = match tr, l with
|
||||||
|
| _, [] -> false
|
||||||
|
| Success, Success::_ -> true
|
||||||
|
| Epsilon (i,j), Epsilon(i',j')::_ -> i=i' && j=j'
|
||||||
|
| Upon (Any,i,j), Upon(Any,i',j')::_ when i=i' && j=j' -> true
|
||||||
|
| Upon (Char c,i,j), Upon(Char c',i',j')::_
|
||||||
|
when compare c c' = 0 && i=i' && j=j' -> true
|
||||||
|
| _, _::l' -> mem_tr ~compare tr l'
|
||||||
|
|
||||||
(* build NDA from the "get : int -> 'a" function *)
|
(* build NDA from the "get : int -> 'a" function *)
|
||||||
let make ~compare ~limit ~len ~get =
|
let make ~compare ~limit ~len ~get =
|
||||||
let m = Array.make_matrix len limit [] in
|
let m = Array.make_matrix (len+1) (limit+1) [] in
|
||||||
let add_transition i j tr =
|
let add_transition i j tr =
|
||||||
m.(i).(j) <- tr :: m.(i).(j)
|
if not (mem_tr ~compare tr m.(i).(j))
|
||||||
|
then m.(i).(j) <- tr :: m.(i).(j)
|
||||||
in
|
in
|
||||||
(* internal transitions *)
|
(* internal transitions *)
|
||||||
for i = 0 to len-1 do
|
for i = 0 to len-1 do
|
||||||
|
|
@ -71,9 +81,9 @@ module NDA = struct
|
||||||
for j = 0 to limit do
|
for j = 0 to limit do
|
||||||
(* deletions at the end *)
|
(* deletions at the end *)
|
||||||
if j < limit
|
if j < limit
|
||||||
then add_transition (len-1) j (Upon (Any, len-1, j+1));
|
then add_transition len j (Upon (Any, len, j+1));
|
||||||
(* win in any case *)
|
(* win in any case *)
|
||||||
add_transition (len-1) j Success;
|
add_transition len j Success;
|
||||||
done;
|
done;
|
||||||
{ matrix=m; compare; }
|
{ matrix=m; compare; }
|
||||||
|
|
||||||
|
|
@ -83,14 +93,11 @@ end
|
||||||
|
|
||||||
(** deterministic automaton *)
|
(** deterministic automaton *)
|
||||||
module DFA = struct
|
module DFA = struct
|
||||||
type 'a transition =
|
|
||||||
| Success
|
|
||||||
| Upon of 'a * int (* transition to state i *)
|
|
||||||
| Otherwise of int (* transition to state i *)
|
|
||||||
|
|
||||||
type 'a t = {
|
type 'a t = {
|
||||||
compare : 'a -> 'a -> int;
|
compare : 'a -> 'a -> int;
|
||||||
mutable transitions : 'a transition list array;
|
mutable transitions : ('a * int) list array;
|
||||||
|
mutable is_final : bool array;
|
||||||
|
mutable otherwise : int array; (* transition by default *)
|
||||||
mutable len : int;
|
mutable len : int;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -98,23 +105,43 @@ module DFA = struct
|
||||||
compare;
|
compare;
|
||||||
len = 0;
|
len = 0;
|
||||||
transitions = Array.make size [];
|
transitions = Array.make size [];
|
||||||
|
is_final = Array.make size false;
|
||||||
|
otherwise = Array.make size ~-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let _double_array a =
|
||||||
|
let a' = Array.make (2 * Array.length a) a.(0) in
|
||||||
|
Array.blit a 0 a' 0 (Array.length a);
|
||||||
|
a'
|
||||||
|
|
||||||
(* add a new state *)
|
(* add a new state *)
|
||||||
let add_state dfa =
|
let add_state dfa =
|
||||||
let n = dfa.len in
|
let n = dfa.len in
|
||||||
(* resize *)
|
(* resize *)
|
||||||
if n = Array.length dfa.transitions then begin
|
if n = Array.length dfa.transitions then begin
|
||||||
let a' = Array.make (2*n) [] in
|
dfa.transitions <- _double_array dfa.transitions;
|
||||||
Array.blit dfa.transitions 0 a' 0 n;
|
dfa.is_final <- _double_array dfa.is_final;
|
||||||
dfa.transitions <- a'
|
dfa.otherwise <- _double_array dfa.otherwise;
|
||||||
end;
|
end;
|
||||||
dfa.len <- n + 1;
|
dfa.len <- n + 1;
|
||||||
n
|
n
|
||||||
|
|
||||||
|
let rec __mem_tr ~compare tr l = match tr, l with
|
||||||
|
| _, [] -> false
|
||||||
|
| (c,i), (c',i')::l' ->
|
||||||
|
(i=i' && compare c c' = 0)
|
||||||
|
|| __mem_tr ~compare tr l'
|
||||||
|
|
||||||
(* add transition *)
|
(* add transition *)
|
||||||
let add_transition dfa i tr =
|
let add_transition dfa i tr =
|
||||||
dfa.transitions.(i) <- tr :: dfa.transitions.(i)
|
if not (__mem_tr ~compare:dfa.compare tr dfa.transitions.(i))
|
||||||
|
then dfa.transitions.(i) <- tr :: dfa.transitions.(i)
|
||||||
|
|
||||||
|
let add_otherwise dfa i j =
|
||||||
|
dfa.otherwise.(i) <- j
|
||||||
|
|
||||||
|
let set_final dfa i =
|
||||||
|
dfa.is_final.(i) <- true
|
||||||
|
|
||||||
(* set of pairs of ints: used for representing a set of states of the NDA *)
|
(* set of pairs of ints: used for representing a set of states of the NDA *)
|
||||||
module NDAStateSet = Set.Make(struct
|
module NDAStateSet = Set.Make(struct
|
||||||
|
|
@ -122,6 +149,17 @@ module DFA = struct
|
||||||
let compare = Pervasives.compare
|
let compare = Pervasives.compare
|
||||||
end)
|
end)
|
||||||
|
|
||||||
|
(*
|
||||||
|
let set_to_string s =
|
||||||
|
let b = Buffer.create 15 in
|
||||||
|
Buffer.add_char b '{';
|
||||||
|
NDAStateSet.iter
|
||||||
|
(fun (x,y) -> Printf.bprintf b "(%d,%d)" x y)
|
||||||
|
s;
|
||||||
|
Buffer.add_char b '}';
|
||||||
|
Buffer.contents b
|
||||||
|
*)
|
||||||
|
|
||||||
(* list of characters that can specifically be followed from the given set *)
|
(* list of characters that can specifically be followed from the given set *)
|
||||||
let chars_from_set nda set =
|
let chars_from_set nda set =
|
||||||
NDAStateSet.fold
|
NDAStateSet.fold
|
||||||
|
|
@ -144,11 +182,13 @@ module DFA = struct
|
||||||
let set = ref set in
|
let set = ref set in
|
||||||
while not (Queue.is_empty q) do
|
while not (Queue.is_empty q) do
|
||||||
let state = Queue.pop q in
|
let state = Queue.pop q in
|
||||||
|
(*Printf.printf "saturate epsilon: add state %d,%d\n" (fst state)(snd state);*)
|
||||||
|
set := NDAStateSet.add state !set;
|
||||||
List.iter
|
List.iter
|
||||||
(fun tr' -> match tr' with
|
(fun tr' -> match tr' with
|
||||||
| NDA.Epsilon (i,j) ->
|
| NDA.Epsilon (i,j) ->
|
||||||
if not (NDAStateSet.mem (i,j) !set)
|
if not (NDAStateSet.mem (i,j) !set)
|
||||||
then (set := NDAStateSet.add (i,j) !set; Queue.push (i,j) q)
|
then Queue.push (i,j) q
|
||||||
| _ -> ()
|
| _ -> ()
|
||||||
) (NDA.get nda state)
|
) (NDA.get nda state)
|
||||||
done;
|
done;
|
||||||
|
|
@ -159,19 +199,27 @@ module DFA = struct
|
||||||
(* find the transition that matches the given char (if any);
|
(* find the transition that matches the given char (if any);
|
||||||
may raise exceptions Not_found or LeadToSuccess. *)
|
may raise exceptions Not_found or LeadToSuccess. *)
|
||||||
let rec get_transition_for_char nda c transitions =
|
let rec get_transition_for_char nda c transitions =
|
||||||
match c, transitions with
|
match transitions with
|
||||||
| _, NDA.Success::_ -> raise LeadToSuccess
|
| NDA.Success::_ -> raise LeadToSuccess
|
||||||
| NDA.Char c', NDA.Upon (NDA.Char c'', i, j) :: transitions' ->
|
| NDA.Upon (NDA.Char c', i, j) :: transitions' ->
|
||||||
if nda.NDA.compare c' c'' = 0
|
if nda.NDA.compare c c' = 0
|
||||||
then i, j
|
then i, j
|
||||||
else get_transition_for_char nda c transitions'
|
else get_transition_for_char nda c transitions'
|
||||||
| NDA.Any, NDA.Upon (NDA.Any, i, j) :: _ -> i, j
|
| NDA.Upon (NDA.Any, i, j) :: transitions' ->
|
||||||
| _, NDA.Upon (NDA.Any, i, j) :: transitions' ->
|
|
||||||
begin try get_transition_for_char nda c transitions'
|
begin try get_transition_for_char nda c transitions'
|
||||||
with Not_found -> i, j (* only if no other transition works *)
|
with Not_found -> i, j (* only if no other transition works *)
|
||||||
end
|
end
|
||||||
| _, _::transitions' -> get_transition_for_char nda c transitions'
|
| _::transitions' -> get_transition_for_char nda c transitions'
|
||||||
| _, [] -> raise Not_found
|
| [] -> raise Not_found
|
||||||
|
|
||||||
|
let rec get_transitions_for_any nda acc transitions =
|
||||||
|
match transitions with
|
||||||
|
| NDA.Success::_ -> raise LeadToSuccess
|
||||||
|
| NDA.Upon (NDA.Any, i, j) :: transitions' ->
|
||||||
|
let acc = NDAStateSet.add (i,j) acc in
|
||||||
|
get_transitions_for_any nda acc transitions'
|
||||||
|
| _:: transitions' -> get_transitions_for_any nda acc transitions'
|
||||||
|
| [] -> acc
|
||||||
|
|
||||||
(* follow transition for given NDA.char, returns a new state
|
(* follow transition for given NDA.char, returns a new state
|
||||||
and a boolean indicating whether it's final *)
|
and a boolean indicating whether it's final *)
|
||||||
|
|
@ -179,10 +227,11 @@ module DFA = struct
|
||||||
let is_final = ref false in
|
let is_final = ref false in
|
||||||
let set' = NDAStateSet.fold
|
let set' = NDAStateSet.fold
|
||||||
(fun state acc ->
|
(fun state acc ->
|
||||||
(* possible transitions *)
|
|
||||||
let transitions = NDA.get nda state in
|
let transitions = NDA.get nda state in
|
||||||
|
(* among possible transitions, follow the one that matches c
|
||||||
|
the most closely *)
|
||||||
try
|
try
|
||||||
let state' = get_transition_for_char nda c transitions in
|
let state' = get_transition_for_char nda c transitions in
|
||||||
NDAStateSet.add state' acc
|
NDAStateSet.add state' acc
|
||||||
with
|
with
|
||||||
| LeadToSuccess -> is_final := true; acc
|
| LeadToSuccess -> is_final := true; acc
|
||||||
|
|
@ -192,19 +241,16 @@ module DFA = struct
|
||||||
let set' = saturate_epsilon nda set' in
|
let set' = saturate_epsilon nda set' in
|
||||||
set', !is_final
|
set', !is_final
|
||||||
|
|
||||||
(* only follow "Any" transitions *)
|
let follow_transition_any nda set =
|
||||||
let follow_other_transition nda set =
|
|
||||||
let is_final = ref false in
|
let is_final = ref false in
|
||||||
let set' = NDAStateSet.fold
|
let set' = NDAStateSet.fold
|
||||||
(fun state acc ->
|
(fun state acc ->
|
||||||
(* possible transitions *)
|
|
||||||
let transitions = NDA.get nda state in
|
let transitions = NDA.get nda state in
|
||||||
|
(* among possible transitions, follow the ones that are labelled with "*" *)
|
||||||
try
|
try
|
||||||
let state' = get_transition_for_char nda NDA.Any transitions in
|
get_transitions_for_any nda acc transitions
|
||||||
NDAStateSet.add state' acc
|
|
||||||
with
|
with
|
||||||
| LeadToSuccess -> is_final := true; acc
|
| LeadToSuccess -> is_final := true; acc
|
||||||
| Not_found -> acc (* state dies *)
|
|
||||||
) set NDAStateSet.empty
|
) set NDAStateSet.empty
|
||||||
in
|
in
|
||||||
let set' = saturate_epsilon nda set' in
|
let set' = saturate_epsilon nda set' in
|
||||||
|
|
@ -217,12 +263,17 @@ module DFA = struct
|
||||||
let chars = chars_from_set nda set in
|
let chars = chars_from_set nda set in
|
||||||
List.iter
|
List.iter
|
||||||
(fun c ->
|
(fun c ->
|
||||||
let set', is_final = follow_transition nda set (NDA.Char c) in
|
(*Printf.printf "iterate_transition follows %c (at %s)\n"
|
||||||
k ~is_final (NDA.Char c) set')
|
(Obj.magic c) (set_to_string set);*)
|
||||||
chars;
|
let set', is_final = follow_transition nda set c in
|
||||||
|
if not (NDAStateSet.is_empty set')
|
||||||
|
then k ~is_final (NDA.Char c) set';
|
||||||
|
) chars;
|
||||||
(* remaining transitions, with only "Any" *)
|
(* remaining transitions, with only "Any" *)
|
||||||
let set', is_final = follow_other_transition nda set in
|
(*Printf.printf "iterate transition follows * (at %s)\n" (set_to_string set);*)
|
||||||
k ~is_final NDA.Any set'
|
let set', is_final = follow_transition_any nda set in
|
||||||
|
if not (NDAStateSet.is_empty set')
|
||||||
|
then k ~is_final NDA.Any set'
|
||||||
|
|
||||||
module StateSetMap = Map.Make(NDAStateSet)
|
module StateSetMap = Map.Make(NDAStateSet)
|
||||||
|
|
||||||
|
|
@ -237,20 +288,22 @@ module DFA = struct
|
||||||
|
|
||||||
(* traverse the NDA. Currently we're at [set] *)
|
(* traverse the NDA. Currently we're at [set] *)
|
||||||
let rec traverse nda dfa states set =
|
let rec traverse nda dfa states set =
|
||||||
let set = saturate_epsilon nda set in
|
|
||||||
let set_i = get_state dfa states set in
|
let set_i = get_state dfa states set in
|
||||||
iterate_transition_set nda set
|
iterate_transition_set nda set
|
||||||
(fun ~is_final c set' ->
|
(fun ~is_final c set' ->
|
||||||
let set'_i = get_state dfa states set' in
|
let set_i' = get_state dfa states set' in
|
||||||
(* did we reach success? *)
|
(* did we reach success? *)
|
||||||
if is_final
|
if is_final
|
||||||
then add_transition dfa set'_i Success;
|
then set_final dfa set_i'
|
||||||
|
|
||||||
(* link set -> set' *)
|
(* link set -> set' *)
|
||||||
match c with
|
else match c with
|
||||||
| NDA.Any ->
|
|
||||||
add_transition dfa set_i (Otherwise set'_i)
|
|
||||||
| NDA.Char c' ->
|
| NDA.Char c' ->
|
||||||
add_transition dfa set_i (Upon (c', set'_i))
|
add_transition dfa set_i (c', set_i');
|
||||||
|
traverse nda dfa states set'
|
||||||
|
| NDA.Any ->
|
||||||
|
add_otherwise dfa set_i set_i';
|
||||||
|
traverse nda dfa states set'
|
||||||
)
|
)
|
||||||
|
|
||||||
let of_nda nda =
|
let of_nda nda =
|
||||||
|
|
@ -259,13 +312,38 @@ module DFA = struct
|
||||||
(* map (set of NDA states) to int (state in DFA) *)
|
(* map (set of NDA states) to int (state in DFA) *)
|
||||||
let states = ref StateSetMap.empty in
|
let states = ref StateSetMap.empty in
|
||||||
(* traverse the NDA to build the NFA *)
|
(* traverse the NDA to build the NFA *)
|
||||||
traverse nda dfa states (NDAStateSet.singleton (0,0));
|
let set = NDAStateSet.singleton (0,0) in
|
||||||
|
let set = saturate_epsilon nda set in
|
||||||
|
traverse nda dfa states set;
|
||||||
|
(*StateSetMap.iter
|
||||||
|
(fun set i ->
|
||||||
|
Printf.printf "set %s --> state %d\n" (set_to_string set) i
|
||||||
|
) !states; *)
|
||||||
dfa
|
dfa
|
||||||
|
|
||||||
let get dfa i =
|
let get dfa i =
|
||||||
dfa.transitions.(i)
|
dfa.transitions.(i)
|
||||||
|
|
||||||
|
let otherwise dfa i =
|
||||||
|
dfa.otherwise.(i)
|
||||||
|
|
||||||
|
let is_final dfa i =
|
||||||
|
dfa.is_final.(i)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
let debug_print oc dfa =
|
||||||
|
Printf.fprintf oc "automaton of %d states\n" dfa.DFA.len;
|
||||||
|
for i = 0 to dfa.DFA.len-1 do
|
||||||
|
let transitions = DFA.get dfa i in
|
||||||
|
if DFA.is_final dfa i
|
||||||
|
then Printf.fprintf oc " success %d\n" i;
|
||||||
|
List.iter
|
||||||
|
(fun (c, j) -> Printf.fprintf oc " (%c) %d -> %d\n" c i j ) transitions;
|
||||||
|
let o = DFA.otherwise dfa i in
|
||||||
|
if o >= 0
|
||||||
|
then Printf.fprintf oc " (*) %d -> %d\n" i o
|
||||||
|
done
|
||||||
|
|
||||||
type 'a automaton = 'a DFA.t
|
type 'a automaton = 'a DFA.t
|
||||||
|
|
||||||
let of_array ?(compare=Pervasives.compare) ~limit a =
|
let of_array ?(compare=Pervasives.compare) ~limit a =
|
||||||
|
|
@ -279,6 +357,8 @@ let of_list ?compare ~limit l =
|
||||||
let of_string ~limit a =
|
let of_string ~limit a =
|
||||||
let compare = Char.compare in
|
let compare = Char.compare in
|
||||||
let nda = NDA.make ~compare ~limit ~len:(String.length a) ~get:(String.get a) in
|
let nda = NDA.make ~compare ~limit ~len:(String.length a) ~get:(String.get a) in
|
||||||
|
(*debug_print_nda stdout nda;
|
||||||
|
flush stdout;*)
|
||||||
let dfa = DFA.of_nda nda in
|
let dfa = DFA.of_nda nda in
|
||||||
dfa
|
dfa
|
||||||
|
|
||||||
|
|
@ -288,48 +368,75 @@ type match_result =
|
||||||
|
|
||||||
exception FoundDistance of int
|
exception FoundDistance of int
|
||||||
|
|
||||||
let rec __has_success = function
|
let rec __find_char ~compare c l = match l with
|
||||||
| [] -> false
|
| [] -> raise Not_found
|
||||||
| DFA.Success :: _ -> true
|
| (c', next) :: l' ->
|
||||||
| _ :: l' -> __has_success l'
|
|
||||||
|
|
||||||
let rec __find_char ~compare c l k = match l with
|
|
||||||
| [] -> ()
|
|
||||||
| DFA.Upon (c', next) :: l' ->
|
|
||||||
if compare c c' = 0
|
if compare c c' = 0
|
||||||
then k next
|
then next
|
||||||
else __find_char ~compare c l' k
|
else __find_char ~compare c l'
|
||||||
| _ :: l' -> __find_char ~compare c l' k
|
|
||||||
|
|
||||||
let rec __find_otherwise l k = match l with
|
|
||||||
| [] -> ()
|
|
||||||
| DFA.Otherwise next :: _ -> k next
|
|
||||||
| _::l' -> __find_otherwise l' k
|
|
||||||
|
|
||||||
(* real matching function *)
|
(* real matching function *)
|
||||||
let __match ~len ~get dfa =
|
let __match ~len ~get dfa =
|
||||||
let rec search ~dist i state =
|
let rec search i state =
|
||||||
if i = len then raise (FoundDistance dist)
|
(*Printf.printf "at state %d (dist %d)\n" i dist;*)
|
||||||
|
if i = len
|
||||||
|
then DFA.is_final dfa state
|
||||||
else begin
|
else begin
|
||||||
let transitions = DFA.get dfa state in
|
let transitions = DFA.get dfa state in
|
||||||
if __has_success transitions
|
|
||||||
then raise (FoundDistance dist);
|
|
||||||
(* current char *)
|
(* current char *)
|
||||||
let c = get i in
|
let c = get i in
|
||||||
__find_char ~compare:dfa.DFA.compare c transitions
|
try
|
||||||
(fun next -> search ~dist (i+1) next);
|
let next = __find_char ~compare:dfa.DFA.compare c transitions in
|
||||||
__find_otherwise transitions
|
search (i+1) next
|
||||||
(fun next -> search ~dist:(dist+1) (i+1) next);
|
with Not_found ->
|
||||||
|
let o = DFA.otherwise dfa state in
|
||||||
|
if o >= 0
|
||||||
|
then search (i+1) o
|
||||||
|
else false
|
||||||
end
|
end
|
||||||
in
|
in
|
||||||
try
|
search 0 0
|
||||||
search ~dist:0 0 0;
|
|
||||||
TooFar
|
|
||||||
with FoundDistance i ->
|
|
||||||
Distance i
|
|
||||||
|
|
||||||
let match_with dfa a =
|
let match_with dfa a =
|
||||||
__match ~len:(Array.length a) ~get:(Array.get a) dfa
|
__match ~len:(Array.length a) ~get:(Array.get a) dfa
|
||||||
|
|
||||||
let match_with_string dfa s =
|
let match_with_string dfa s =
|
||||||
__match ~len:(String.length s) ~get:(String.get s) dfa
|
__match ~len:(String.length s) ~get:(String.get s) dfa
|
||||||
|
|
||||||
|
(** {6 Index for one-to-many matching} *)
|
||||||
|
|
||||||
|
(** Continuation list *)
|
||||||
|
type 'a klist =
|
||||||
|
[
|
||||||
|
| `Nil
|
||||||
|
| `Cons of 'a * (unit -> 'a klist)
|
||||||
|
]
|
||||||
|
|
||||||
|
module Index = struct
|
||||||
|
type ('a, 'b) node =
|
||||||
|
| Empty
|
||||||
|
| Node of 'b option * ('a, 'b) assoc_list
|
||||||
|
and ('a, 'b) assoc_list = ('a * ('a, 'b) node) list
|
||||||
|
|
||||||
|
type ('a, 'b) t = {
|
||||||
|
tree : ('a, 'b) node;
|
||||||
|
compare : 'a -> 'a -> int;
|
||||||
|
}
|
||||||
|
|
||||||
|
let empty ?(compare=Pervasives.compare) () = {
|
||||||
|
tree = Empty;
|
||||||
|
compare;
|
||||||
|
}
|
||||||
|
|
||||||
|
let add idx arr value =
|
||||||
|
assert false (* TODO *)
|
||||||
|
|
||||||
|
let add_string idx arr str =
|
||||||
|
assert false (* TODO *)
|
||||||
|
|
||||||
|
let retrieve ~limit idx arr =
|
||||||
|
assert false (* TODO *)
|
||||||
|
|
||||||
|
let retrieve_string ~limit idx str =
|
||||||
|
assert false (* TODO *)
|
||||||
|
end
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
|
||||||
(** {1 Levenshtein distance} *)
|
(** {1 Levenshtein distance}
|
||||||
|
|
||||||
|
We take inspiration from
|
||||||
|
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
|
||||||
|
for the main algorithm and ideas. However some parts are adapted *)
|
||||||
|
|
||||||
(** {2 Automaton} *)
|
(** {2 Automaton} *)
|
||||||
|
|
||||||
|
|
@ -41,13 +45,44 @@ val of_list : ?compare:('a -> 'a -> int) -> limit:int -> 'a list -> 'a automaton
|
||||||
val of_string : limit:int -> string -> char automaton
|
val of_string : limit:int -> string -> char automaton
|
||||||
(** Automaton for the special case of strings *)
|
(** Automaton for the special case of strings *)
|
||||||
|
|
||||||
type match_result =
|
val debug_print : out_channel -> char automaton -> unit
|
||||||
| TooFar
|
(** Output the automaton on the given channel. Only for string automata. *)
|
||||||
| Distance of int
|
|
||||||
|
|
||||||
val match_with : 'a automaton -> 'a array -> match_result
|
val match_with : 'a automaton -> 'a array -> bool
|
||||||
(** [match_with a s] matches the string [s] against [a], and returns
|
(** [match_with a s] matches the string [s] against [a], and returns
|
||||||
the distance from [s] to the word represented by [a] *)
|
[true] if the distance from [s] to the word represented by [a] is smaller
|
||||||
|
than the limit used to build [a] *)
|
||||||
|
|
||||||
val match_with_string : char automaton -> string -> match_result
|
val match_with_string : char automaton -> string -> bool
|
||||||
(** Specialized version of {!match_with} for strings *)
|
(** Specialized version of {!match_with} for strings *)
|
||||||
|
|
||||||
|
(** {6 Index for one-to-many matching} *)
|
||||||
|
|
||||||
|
(** Continuation list *)
|
||||||
|
type 'a klist =
|
||||||
|
[
|
||||||
|
| `Nil
|
||||||
|
| `Cons of 'a * (unit -> 'a klist)
|
||||||
|
]
|
||||||
|
|
||||||
|
module Index : sig
|
||||||
|
type ('a, 'b) t
|
||||||
|
(** Index that maps 'a strings to values of type 'b. Internally it is
|
||||||
|
based on a trie. *)
|
||||||
|
|
||||||
|
val empty : ?compare:('a -> 'a -> int) -> unit -> ('a, 'b) t
|
||||||
|
(** Empty index, possibly with a specific comparison function *)
|
||||||
|
|
||||||
|
val add : ('a, 'b) t -> 'a array -> 'b -> ('a, 'b) t
|
||||||
|
(** Add a char array to the index. If a value was already present
|
||||||
|
for this char it is replaced. *)
|
||||||
|
|
||||||
|
val add_string : (char, 'b) t -> string -> 'b -> (char, 'b) t
|
||||||
|
(** Add a string to a char index *)
|
||||||
|
|
||||||
|
val retrieve : limit:int -> ('a, 'b) t -> 'a array -> 'b klist
|
||||||
|
(** Lazy list of objects associated to strings close to
|
||||||
|
the query string *)
|
||||||
|
|
||||||
|
val retrieve_string : limit:int -> (char,'b) t -> string -> 'b klist
|
||||||
|
end
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue