mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 11:45:31 -05:00
add CCString.edit_distance
This commit is contained in:
parent
72d43c6eeb
commit
bd7a9ce070
2 changed files with 66 additions and 0 deletions
|
|
@ -407,6 +407,38 @@ let compare_versions a b =
|
|||
in
|
||||
cmp_rec (Split.gen_cpy ~by:"." a) (Split.gen_cpy ~by:"." b)
|
||||
|
||||
let edit_distance s1 s2 =
|
||||
if length s1 = 0
|
||||
then length s2
|
||||
else if length s2 = 0
|
||||
then length s1
|
||||
else if s1 = s2
|
||||
then 0
|
||||
else begin
|
||||
(* distance vectors (v0=previous, v1=current) *)
|
||||
let v0 = Array.make (length s2 + 1) 0 in
|
||||
let v1 = Array.make (length s2 + 1) 0 in
|
||||
(* initialize v0: v0(i) = A(0)(i) = delete i chars from t *)
|
||||
for i = 0 to length s2 do
|
||||
v0.(i) <- i
|
||||
done;
|
||||
(* main loop for the bottom up dynamic algorithm *)
|
||||
for i = 0 to length s1 - 1 do
|
||||
(* first edit distance is the deletion of i+1 elements from s *)
|
||||
v1.(0) <- i+1;
|
||||
|
||||
(* try add/delete/replace operations *)
|
||||
for j = 0 to length s2 - 1 do
|
||||
let cost = if Char.compare (String.get s1 i) (String.get s2 j) = 0 then 0 else 1 in
|
||||
v1.(j+1) <- min (v1.(j) + 1) (min (v0.(j+1) + 1) (v0.(j) + cost));
|
||||
done;
|
||||
|
||||
(* copy v1 into v0 for next iteration *)
|
||||
Array.blit v1 0 v0 0 (length s2 + 1);
|
||||
done;
|
||||
v1.(length s2)
|
||||
end
|
||||
|
||||
let repeat s n =
|
||||
assert (n>=0);
|
||||
let len = String.length s in
|
||||
|
|
|
|||
|
|
@ -517,6 +517,40 @@ val compare_versions : string -> string -> int
|
|||
*)
|
||||
|
||||
|
||||
val edit_distance : string -> string -> int
|
||||
(** Edition distance between two strings. This satisfies the classical
|
||||
distance axioms: it is always positive, symmetric, and satisfies
|
||||
the formula [distance a b + distance b c >= distance a c] *)
|
||||
|
||||
(*$Q
|
||||
Q.(string_of_size Gen.(0 -- 30)) (fun s -> \
|
||||
edit_distance s s = 0)
|
||||
*)
|
||||
|
||||
(* test that building a from s, and mutating one char of s, yields
|
||||
a string s' that is accepted by a.
|
||||
|
||||
--> generate triples (s, i, c) where c is a char, s a non empty string
|
||||
and i a valid index in s
|
||||
*)
|
||||
|
||||
(*$QR
|
||||
(
|
||||
let gen = Q.Gen.(
|
||||
3 -- 10 >>= fun len ->
|
||||
0 -- (len-1) >>= fun i ->
|
||||
string_size (return len) >>= fun s ->
|
||||
char >|= fun c -> (s,i,c)
|
||||
) in
|
||||
let small (s,_,_) = String.length s in
|
||||
Q.make ~small gen
|
||||
)
|
||||
(fun (s,i,c) ->
|
||||
let s' = Bytes.of_string s in
|
||||
Bytes.set s' i c;
|
||||
edit_distance s (Bytes.to_string s') <= 1)
|
||||
*)
|
||||
|
||||
(** {2 Slices} A contiguous part of a string *)
|
||||
|
||||
module Sub : sig
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue