add CCLevenshtein.Index.{of,to}_{gen,seq} and cardinal

This commit is contained in:
Simon Cruanes 2015-09-27 22:57:36 +02:00
parent a015b61208
commit 80b0f9b820
2 changed files with 121 additions and 14 deletions

View file

@ -26,6 +26,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Levenshtein distance} *) (** {1 Levenshtein distance} *)
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
module type STRING = sig module type STRING = sig
type char_ type char_
type t type t
@ -50,6 +53,15 @@ let rec klist_to_list l = match l () with
(*$inject (*$inject
open CCFun open CCFun
let list_uniq_ = Q.(
let gen = Gen.(list_size (0 -- 100) (string_size ~gen:printable (1 -- 10))
>|= CCList.sort_uniq ~cmp:String.compare
>|= List.map (fun s->s,s)
) in
let print = Print.(list (pair string string)) in
let shrink = Shrink.(list ~shrink:(pair string string)) in
make ~small:List.length ~print ~shrink gen
)
*) *)
(*$Q (*$Q
@ -174,6 +186,9 @@ module type S = sig
(** Add a pair string/value to the index. If a value was already present (** Add a pair string/value to the index. If a value was already present
for this string it is replaced. *) for this string it is replaced. *)
val cardinal : _ t -> int
(** Number of bindings *)
val remove : 'b t -> string_ -> 'b t val remove : 'b t -> string_ -> 'b t
(** Remove a string (and its associated value, if any) from the index. *) (** Remove a string (and its associated value, if any) from the index. *)
@ -186,6 +201,24 @@ module type S = sig
val to_list : 'b t -> (string_ * 'b) list val to_list : 'b t -> (string_ * 'b) list
(** Extract a list of pairs from an index *) (** Extract a list of pairs from an index *)
val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t
(** @since NEXT_RELEASE *)
val of_seq : (string_ * 'a) sequence -> 'a t
(** @since NEXT_RELEASE *)
val to_seq : 'a t -> (string_ * 'a) sequence
(** @since NEXT_RELEASE *)
val add_gen : 'a t -> (string_ * 'a) gen -> 'a t
(** @since NEXT_RELEASE *)
val of_gen : (string_ * 'a) gen -> 'a t
(** @since NEXT_RELEASE *)
val to_gen : 'a t -> (string_ * 'a) gen
(** @since NEXT_RELEASE *)
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
(** Fold over the stored pairs string/value *) (** Fold over the stored pairs string/value *)
@ -197,7 +230,8 @@ module type S = sig
end end
end end
module Make(Str : STRING) = struct module Make(Str : STRING)
: S with type char_ = Str.char_ and type string_ = Str.t = struct
type string_ = Str.t type string_ = Str.t
type char_ = Str.char_ type char_ = Str.char_
@ -689,13 +723,62 @@ module Make(Str : STRING) = struct
let iter f idx = let iter f idx =
fold (fun () str v -> f str v) () idx fold (fun () str v -> f str v) () idx
let cardinal idx = fold (fun n _ _ -> n+1) 0 idx
let to_list idx = let to_list idx =
fold (fun acc str v -> (str,v) :: acc) [] idx fold (fun acc str v -> (str,v) :: acc) [] idx
let add_seq i s =
let i = ref i in
s (fun (arr,v) -> i := add !i arr v);
!i
let of_seq s = add_seq empty s
let to_seq i yield = iter (fun x y -> yield (x,y)) i
(*$Q
list_uniq_ (fun l -> \
Sequence.of_list l |> Index.of_seq |> Index.to_seq \
|> Sequence.to_list |> List.sort Pervasives.compare \
= List.sort Pervasives.compare l)
*)
let rec add_gen i g = match g() with
| None -> i
| Some (arr,v) -> add_gen (add i arr v) g
let of_gen g = add_gen empty g
let to_gen s =
let st = Stack.create () in
Stack.push ([],s) st;
let rec next () =
if Stack.is_empty st then None
else
let trail, Node (opt, m) = Stack.pop st in
(* explore children *)
M.iter
(fun c node' -> Stack.push (c::trail, node') st)
m;
match opt with
| None -> next()
| Some v ->
let str = Str.of_list (List.rev trail) in
Some (str,v)
in
next
(*$Q
list_uniq_ (fun l -> \
Gen.of_list l |> Index.of_gen |> Index.to_gen \
|> Gen.to_list |> List.sort Pervasives.compare \
= List.sort Pervasives.compare l)
*)
let to_klist idx = let to_klist idx =
let rec traverse node trail ~(fk:(string_*'a) klist) () = let rec traverse node trail ~(fk:(string_*'a) klist) () =
match node with let Node (opt, m) = node in
| Node (opt, m) ->
(* all alternatives: continue exploring [m], or call [fk] *) (* all alternatives: continue exploring [m], or call [fk] *)
let fk = let fk =
M.fold M.fold

View file

@ -31,6 +31,9 @@ We take inspiration from
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
for the main algorithm and ideas. However some parts are adapted *) for the main algorithm and ideas. However some parts are adapted *)
type 'a sequence = ('a -> unit) -> unit
type 'a gen = unit -> 'a option
(** {2 Abstraction over Strings} (** {2 Abstraction over Strings}
Due to the existence of several encodings and string representations we Due to the existence of several encodings and string representations we
abstract over the type of strings. A string is a finite array of characters abstract over the type of strings. A string is a finite array of characters
@ -141,6 +144,9 @@ module type S = sig
(** Add a pair string/value to the index. If a value was already present (** Add a pair string/value to the index. If a value was already present
for this string it is replaced. *) for this string it is replaced. *)
val cardinal : _ t -> int
(** Number of bindings *)
val remove : 'b t -> string_ -> 'b t val remove : 'b t -> string_ -> 'b t
(** Remove a string (and its associated value, if any) from the index. *) (** Remove a string (and its associated value, if any) from the index. *)
@ -153,6 +159,24 @@ module type S = sig
val to_list : 'b t -> (string_ * 'b) list val to_list : 'b t -> (string_ * 'b) list
(** Extract a list of pairs from an index *) (** Extract a list of pairs from an index *)
val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t
(** @since NEXT_RELEASE *)
val of_seq : (string_ * 'a) sequence -> 'a t
(** @since NEXT_RELEASE *)
val to_seq : 'a t -> (string_ * 'a) sequence
(** @since NEXT_RELEASE *)
val add_gen : 'a t -> (string_ * 'a) gen -> 'a t
(** @since NEXT_RELEASE *)
val of_gen : (string_ * 'a) gen -> 'a t
(** @since NEXT_RELEASE *)
val to_gen : 'a t -> (string_ * 'a) gen
(** @since NEXT_RELEASE *)
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
(** Fold over the stored pairs string/value *) (** Fold over the stored pairs string/value *)