mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 19:55:31 -05:00
add CCString.edit_distance
This commit is contained in:
parent
72d43c6eeb
commit
bd7a9ce070
2 changed files with 66 additions and 0 deletions
|
|
@ -407,6 +407,38 @@ let compare_versions a b =
|
||||||
in
|
in
|
||||||
cmp_rec (Split.gen_cpy ~by:"." a) (Split.gen_cpy ~by:"." b)
|
cmp_rec (Split.gen_cpy ~by:"." a) (Split.gen_cpy ~by:"." b)
|
||||||
|
|
||||||
|
let edit_distance s1 s2 =
|
||||||
|
if length s1 = 0
|
||||||
|
then length s2
|
||||||
|
else if length s2 = 0
|
||||||
|
then length s1
|
||||||
|
else if s1 = s2
|
||||||
|
then 0
|
||||||
|
else begin
|
||||||
|
(* distance vectors (v0=previous, v1=current) *)
|
||||||
|
let v0 = Array.make (length s2 + 1) 0 in
|
||||||
|
let v1 = Array.make (length s2 + 1) 0 in
|
||||||
|
(* initialize v0: v0(i) = A(0)(i) = delete i chars from t *)
|
||||||
|
for i = 0 to length s2 do
|
||||||
|
v0.(i) <- i
|
||||||
|
done;
|
||||||
|
(* main loop for the bottom up dynamic algorithm *)
|
||||||
|
for i = 0 to length s1 - 1 do
|
||||||
|
(* first edit distance is the deletion of i+1 elements from s *)
|
||||||
|
v1.(0) <- i+1;
|
||||||
|
|
||||||
|
(* try add/delete/replace operations *)
|
||||||
|
for j = 0 to length s2 - 1 do
|
||||||
|
let cost = if Char.compare (String.get s1 i) (String.get s2 j) = 0 then 0 else 1 in
|
||||||
|
v1.(j+1) <- min (v1.(j) + 1) (min (v0.(j+1) + 1) (v0.(j) + cost));
|
||||||
|
done;
|
||||||
|
|
||||||
|
(* copy v1 into v0 for next iteration *)
|
||||||
|
Array.blit v1 0 v0 0 (length s2 + 1);
|
||||||
|
done;
|
||||||
|
v1.(length s2)
|
||||||
|
end
|
||||||
|
|
||||||
let repeat s n =
|
let repeat s n =
|
||||||
assert (n>=0);
|
assert (n>=0);
|
||||||
let len = String.length s in
|
let len = String.length s in
|
||||||
|
|
|
||||||
|
|
@ -517,6 +517,40 @@ val compare_versions : string -> string -> int
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
|
||||||
|
val edit_distance : string -> string -> int
|
||||||
|
(** Edition distance between two strings. This satisfies the classical
|
||||||
|
distance axioms: it is always positive, symmetric, and satisfies
|
||||||
|
the formula [distance a b + distance b c >= distance a c] *)
|
||||||
|
|
||||||
|
(*$Q
|
||||||
|
Q.(string_of_size Gen.(0 -- 30)) (fun s -> \
|
||||||
|
edit_distance s s = 0)
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* test that building a from s, and mutating one char of s, yields
|
||||||
|
a string s' that is accepted by a.
|
||||||
|
|
||||||
|
--> generate triples (s, i, c) where c is a char, s a non empty string
|
||||||
|
and i a valid index in s
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*$QR
|
||||||
|
(
|
||||||
|
let gen = Q.Gen.(
|
||||||
|
3 -- 10 >>= fun len ->
|
||||||
|
0 -- (len-1) >>= fun i ->
|
||||||
|
string_size (return len) >>= fun s ->
|
||||||
|
char >|= fun c -> (s,i,c)
|
||||||
|
) in
|
||||||
|
let small (s,_,_) = String.length s in
|
||||||
|
Q.make ~small gen
|
||||||
|
)
|
||||||
|
(fun (s,i,c) ->
|
||||||
|
let s' = Bytes.of_string s in
|
||||||
|
Bytes.set s' i c;
|
||||||
|
edit_distance s (Bytes.to_string s') <= 1)
|
||||||
|
*)
|
||||||
|
|
||||||
(** {2 Slices} A contiguous part of a string *)
|
(** {2 Slices} A contiguous part of a string *)
|
||||||
|
|
||||||
module Sub : sig
|
module Sub : sig
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue