diff --git a/src/string/CCLevenshtein.ml b/src/string/CCLevenshtein.ml index 7ccbb495..d7347f88 100644 --- a/src/string/CCLevenshtein.ml +++ b/src/string/CCLevenshtein.ml @@ -93,7 +93,7 @@ let rec klist_to_list l = match l () with l, Index.of_list l' in let gen = Q.Gen.( - list_size (3 -- 15) (string_size (0 -- 10)) >|= mklist + list_size (3 -- 15) (string_size (1 -- 10)) >|= mklist ) in let small (l,_) = List.length l in let print (l,_) = Q.Print.(list string) l in @@ -106,12 +106,23 @@ let rec klist_to_list l = match l () with let retrieved = Index.retrieve ~limit:2 idx s |> klist_to_list in List.for_all - (fun s' -> edit_distance s s' <= 2) retrieved + (fun s' -> edit_distance s s' <= 2) retrieved && + List.for_all + (fun s' -> not (edit_distance s s' <= 2) || List.mem s' retrieved) + l ) l ) *) +(*$R +let idx = Index.of_list ["aa", "aa"; "ab", "ab"; "cd", "cd"; "a'c", "a'c"] in + assert_equal ~printer:Q.Print.(list string) + ["a'c"; "aa"; "ab"] + (Index.retrieve ~limit:1 idx "ac" |> CCKList.to_list + |> List.sort Pervasives.compare) +*) + module type S = sig type char_ type string_ diff --git a/src/string/CCLevenshtein.mli b/src/string/CCLevenshtein.mli index a22bbdeb..ad92c6b4 100644 --- a/src/string/CCLevenshtein.mli +++ b/src/string/CCLevenshtein.mli @@ -79,15 +79,14 @@ The signature for a given string representation provides 3 main things: A possible use of the index could be: {[ -open Batteries;; -let words = File.with_file_in "/usr/share/dict/english" - (fun i -> IO.read_all i |> String.nsplit ~by:"\\n");; +let words = CCIO.with_in "/usr/share/dict/words" + (fun i -> CCIO.read_all i |> CCString.Split.list_cpy ~by:"\n");; let words = List.map (fun s->s,s) words;; -let idx = Levenshtein.Index.of_list words;; +let idx = CCLevenshtein.Index.of_list words;; -Levenshtein.Index.retrieve ~limit:1 idx "hell" |> Levenshtein.klist_to_list;; +CCLevenshtein.Index.retrieve ~limit:1 idx "hell" |> CCLevenshtein.klist_to_list;; ]} *)