refactor and clarify cutoff in String.edit_distance

This commit is contained in:
Simon Cruanes 2021-04-08 11:20:54 -04:00
parent 45b3956421
commit e75d93bb9d
2 changed files with 14 additions and 9 deletions

View file

@ -576,12 +576,12 @@ let compare_natural a b =
*)
let edit_distance ?(cutoff=max_int) s1 s2 =
if length s1 = 0
then min cutoff (length s2)
else if length s2 = 0
then min cutoff (length s1)
else if equal s1 s2
then 0
let n1 = length s1 in
let n2 = length s2 in
if n1 = 0 then min cutoff n2
else if n2 = 0 then min cutoff n1
else if equal s1 s2 then 0
else if n1-n2 >= cutoff || n2-n1 >= cutoff then cutoff (* at least cutoff inserts *)
else try
(* distance vectors (v0=previous, v1=current) *)
let v0 = Array.make (length s2 + 1) 0 in

View file

@ -462,9 +462,14 @@ val edit_distance : ?cutoff:int -> string -> string -> int
(** [edit_distance ~cutoff s1 s2] is the edition distance between the two strings [s1] and [s2].
This satisfies the classical distance axioms: it is always positive, symmetric, and satisfies
the formula [distance s1 s2 + distance s2 s3 >= distance s1 s3].
@param cutoff if provided, it's a cap on both the number of iterations,
and on the result. (since 3.0). This is useful if you just want to
check whether the edit distance is less or equal than 2 (use cutoff of 3).
@param cutoff if provided, it's a cap on the number of iterations.
(since 3.0). This is useful if you just want to
check whether the edit distance is less or equal than 2 without
(use [edit_distance s1 s2 ~cutoff:3 <= 2]).
{b note} that contrary to what was previously documented here, the result can
still be higher than [cutoff] if it's reached in [<cutoff] iterations.
However if the result is [< cutoff] then it is accurate.
*)
(** {2 Infix operators}