mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2026-01-29 04:14:51 -05:00
refactor and clarify cutoff in String.edit_distance
This commit is contained in:
parent
45b3956421
commit
e75d93bb9d
2 changed files with 14 additions and 9 deletions
|
|
@ -576,12 +576,12 @@ let compare_natural a b =
|
||||||
*)
|
*)
|
||||||
|
|
||||||
let edit_distance ?(cutoff=max_int) s1 s2 =
|
let edit_distance ?(cutoff=max_int) s1 s2 =
|
||||||
if length s1 = 0
|
let n1 = length s1 in
|
||||||
then min cutoff (length s2)
|
let n2 = length s2 in
|
||||||
else if length s2 = 0
|
if n1 = 0 then min cutoff n2
|
||||||
then min cutoff (length s1)
|
else if n2 = 0 then min cutoff n1
|
||||||
else if equal s1 s2
|
else if equal s1 s2 then 0
|
||||||
then 0
|
else if n1-n2 >= cutoff || n2-n1 >= cutoff then cutoff (* at least cutoff inserts *)
|
||||||
else try
|
else try
|
||||||
(* distance vectors (v0=previous, v1=current) *)
|
(* distance vectors (v0=previous, v1=current) *)
|
||||||
let v0 = Array.make (length s2 + 1) 0 in
|
let v0 = Array.make (length s2 + 1) 0 in
|
||||||
|
|
|
||||||
|
|
@ -462,9 +462,14 @@ val edit_distance : ?cutoff:int -> string -> string -> int
|
||||||
(** [edit_distance ~cutoff s1 s2] is the edition distance between the two strings [s1] and [s2].
|
(** [edit_distance ~cutoff s1 s2] is the edition distance between the two strings [s1] and [s2].
|
||||||
This satisfies the classical distance axioms: it is always positive, symmetric, and satisfies
|
This satisfies the classical distance axioms: it is always positive, symmetric, and satisfies
|
||||||
the formula [distance s1 s2 + distance s2 s3 >= distance s1 s3].
|
the formula [distance s1 s2 + distance s2 s3 >= distance s1 s3].
|
||||||
@param cutoff if provided, it's a cap on both the number of iterations,
|
|
||||||
and on the result. (since 3.0). This is useful if you just want to
|
@param cutoff if provided, it's a cap on the number of iterations.
|
||||||
check whether the edit distance is less or equal than 2 (use cutoff of 3).
|
(since 3.0). This is useful if you just want to
|
||||||
|
check whether the edit distance is less or equal than 2 without
|
||||||
|
(use [edit_distance s1 s2 ~cutoff:3 <= 2]).
|
||||||
|
{b note} that contrary to what was previously documented here, the result can
|
||||||
|
still be higher than [cutoff] if it's reached in [<cutoff] iterations.
|
||||||
|
However if the result is [< cutoff] then it is accurate.
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(** {2 Infix operators}
|
(** {2 Infix operators}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue