mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
add CCLevenshtein.Index.{of,to}_{gen,seq} and cardinal
This commit is contained in:
parent
a015b61208
commit
80b0f9b820
2 changed files with 121 additions and 14 deletions
|
|
@ -26,6 +26,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
(** {1 Levenshtein distance} *)
|
(** {1 Levenshtein distance} *)
|
||||||
|
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
type 'a gen = unit -> 'a option
|
||||||
|
|
||||||
module type STRING = sig
|
module type STRING = sig
|
||||||
type char_
|
type char_
|
||||||
type t
|
type t
|
||||||
|
|
@ -50,6 +53,15 @@ let rec klist_to_list l = match l () with
|
||||||
(*$inject
|
(*$inject
|
||||||
open CCFun
|
open CCFun
|
||||||
|
|
||||||
|
let list_uniq_ = Q.(
|
||||||
|
let gen = Gen.(list_size (0 -- 100) (string_size ~gen:printable (1 -- 10))
|
||||||
|
>|= CCList.sort_uniq ~cmp:String.compare
|
||||||
|
>|= List.map (fun s->s,s)
|
||||||
|
) in
|
||||||
|
let print = Print.(list (pair string string)) in
|
||||||
|
let shrink = Shrink.(list ~shrink:(pair string string)) in
|
||||||
|
make ~small:List.length ~print ~shrink gen
|
||||||
|
)
|
||||||
*)
|
*)
|
||||||
|
|
||||||
(*$Q
|
(*$Q
|
||||||
|
|
@ -174,6 +186,9 @@ module type S = sig
|
||||||
(** Add a pair string/value to the index. If a value was already present
|
(** Add a pair string/value to the index. If a value was already present
|
||||||
for this string it is replaced. *)
|
for this string it is replaced. *)
|
||||||
|
|
||||||
|
val cardinal : _ t -> int
|
||||||
|
(** Number of bindings *)
|
||||||
|
|
||||||
val remove : 'b t -> string_ -> 'b t
|
val remove : 'b t -> string_ -> 'b t
|
||||||
(** Remove a string (and its associated value, if any) from the index. *)
|
(** Remove a string (and its associated value, if any) from the index. *)
|
||||||
|
|
||||||
|
|
@ -186,6 +201,24 @@ module type S = sig
|
||||||
val to_list : 'b t -> (string_ * 'b) list
|
val to_list : 'b t -> (string_ * 'b) list
|
||||||
(** Extract a list of pairs from an index *)
|
(** Extract a list of pairs from an index *)
|
||||||
|
|
||||||
|
val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val of_seq : (string_ * 'a) sequence -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val to_seq : 'a t -> (string_ * 'a) sequence
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val add_gen : 'a t -> (string_ * 'a) gen -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val of_gen : (string_ * 'a) gen -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val to_gen : 'a t -> (string_ * 'a) gen
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
|
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
|
||||||
(** Fold over the stored pairs string/value *)
|
(** Fold over the stored pairs string/value *)
|
||||||
|
|
||||||
|
|
@ -197,7 +230,8 @@ module type S = sig
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make(Str : STRING) = struct
|
module Make(Str : STRING)
|
||||||
|
: S with type char_ = Str.char_ and type string_ = Str.t = struct
|
||||||
type string_ = Str.t
|
type string_ = Str.t
|
||||||
type char_ = Str.char_
|
type char_ = Str.char_
|
||||||
|
|
||||||
|
|
@ -689,24 +723,73 @@ module Make(Str : STRING) = struct
|
||||||
let iter f idx =
|
let iter f idx =
|
||||||
fold (fun () str v -> f str v) () idx
|
fold (fun () str v -> f str v) () idx
|
||||||
|
|
||||||
|
let cardinal idx = fold (fun n _ _ -> n+1) 0 idx
|
||||||
|
|
||||||
let to_list idx =
|
let to_list idx =
|
||||||
fold (fun acc str v -> (str,v) :: acc) [] idx
|
fold (fun acc str v -> (str,v) :: acc) [] idx
|
||||||
|
|
||||||
|
let add_seq i s =
|
||||||
|
let i = ref i in
|
||||||
|
s (fun (arr,v) -> i := add !i arr v);
|
||||||
|
!i
|
||||||
|
|
||||||
|
let of_seq s = add_seq empty s
|
||||||
|
|
||||||
|
let to_seq i yield = iter (fun x y -> yield (x,y)) i
|
||||||
|
|
||||||
|
(*$Q
|
||||||
|
list_uniq_ (fun l -> \
|
||||||
|
Sequence.of_list l |> Index.of_seq |> Index.to_seq \
|
||||||
|
|> Sequence.to_list |> List.sort Pervasives.compare \
|
||||||
|
= List.sort Pervasives.compare l)
|
||||||
|
*)
|
||||||
|
|
||||||
|
let rec add_gen i g = match g() with
|
||||||
|
| None -> i
|
||||||
|
| Some (arr,v) -> add_gen (add i arr v) g
|
||||||
|
|
||||||
|
let of_gen g = add_gen empty g
|
||||||
|
|
||||||
|
let to_gen s =
|
||||||
|
let st = Stack.create () in
|
||||||
|
Stack.push ([],s) st;
|
||||||
|
let rec next () =
|
||||||
|
if Stack.is_empty st then None
|
||||||
|
else
|
||||||
|
let trail, Node (opt, m) = Stack.pop st in
|
||||||
|
(* explore children *)
|
||||||
|
M.iter
|
||||||
|
(fun c node' -> Stack.push (c::trail, node') st)
|
||||||
|
m;
|
||||||
|
match opt with
|
||||||
|
| None -> next()
|
||||||
|
| Some v ->
|
||||||
|
let str = Str.of_list (List.rev trail) in
|
||||||
|
Some (str,v)
|
||||||
|
in
|
||||||
|
next
|
||||||
|
|
||||||
|
(*$Q
|
||||||
|
list_uniq_ (fun l -> \
|
||||||
|
Gen.of_list l |> Index.of_gen |> Index.to_gen \
|
||||||
|
|> Gen.to_list |> List.sort Pervasives.compare \
|
||||||
|
= List.sort Pervasives.compare l)
|
||||||
|
*)
|
||||||
|
|
||||||
let to_klist idx =
|
let to_klist idx =
|
||||||
let rec traverse node trail ~(fk:(string_*'a) klist) () =
|
let rec traverse node trail ~(fk:(string_*'a) klist) () =
|
||||||
match node with
|
let Node (opt, m) = node in
|
||||||
| Node (opt, m) ->
|
(* all alternatives: continue exploring [m], or call [fk] *)
|
||||||
(* all alternatives: continue exploring [m], or call [fk] *)
|
let fk =
|
||||||
let fk =
|
M.fold
|
||||||
M.fold
|
(fun c node' fk -> traverse node' (c::trail) ~fk)
|
||||||
(fun c node' fk -> traverse node' (c::trail) ~fk)
|
m fk
|
||||||
m fk
|
in
|
||||||
in
|
match opt with
|
||||||
match opt with
|
| Some v ->
|
||||||
| Some v ->
|
let str = Str.of_list (List.rev trail) in
|
||||||
let str = Str.of_list (List.rev trail) in
|
`Cons ((str,v), fk)
|
||||||
`Cons ((str,v), fk)
|
| _ -> fk () (* fail... or explore subtrees *)
|
||||||
| _ -> fk () (* fail... or explore subtrees *)
|
|
||||||
in
|
in
|
||||||
traverse idx [] ~fk:(fun () -> `Nil)
|
traverse idx [] ~fk:(fun () -> `Nil)
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,9 @@ We take inspiration from
|
||||||
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
|
http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
|
||||||
for the main algorithm and ideas. However some parts are adapted *)
|
for the main algorithm and ideas. However some parts are adapted *)
|
||||||
|
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
type 'a gen = unit -> 'a option
|
||||||
|
|
||||||
(** {2 Abstraction over Strings}
|
(** {2 Abstraction over Strings}
|
||||||
Due to the existence of several encodings and string representations we
|
Due to the existence of several encodings and string representations we
|
||||||
abstract over the type of strings. A string is a finite array of characters
|
abstract over the type of strings. A string is a finite array of characters
|
||||||
|
|
@ -141,6 +144,9 @@ module type S = sig
|
||||||
(** Add a pair string/value to the index. If a value was already present
|
(** Add a pair string/value to the index. If a value was already present
|
||||||
for this string it is replaced. *)
|
for this string it is replaced. *)
|
||||||
|
|
||||||
|
val cardinal : _ t -> int
|
||||||
|
(** Number of bindings *)
|
||||||
|
|
||||||
val remove : 'b t -> string_ -> 'b t
|
val remove : 'b t -> string_ -> 'b t
|
||||||
(** Remove a string (and its associated value, if any) from the index. *)
|
(** Remove a string (and its associated value, if any) from the index. *)
|
||||||
|
|
||||||
|
|
@ -153,6 +159,24 @@ module type S = sig
|
||||||
val to_list : 'b t -> (string_ * 'b) list
|
val to_list : 'b t -> (string_ * 'b) list
|
||||||
(** Extract a list of pairs from an index *)
|
(** Extract a list of pairs from an index *)
|
||||||
|
|
||||||
|
val add_seq : 'a t -> (string_ * 'a) sequence -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val of_seq : (string_ * 'a) sequence -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val to_seq : 'a t -> (string_ * 'a) sequence
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val add_gen : 'a t -> (string_ * 'a) gen -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val of_gen : (string_ * 'a) gen -> 'a t
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val to_gen : 'a t -> (string_ * 'a) gen
|
||||||
|
(** @since NEXT_RELEASE *)
|
||||||
|
|
||||||
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
|
val fold : ('a -> string_ -> 'b -> 'a) -> 'a -> 'b t -> 'a
|
||||||
(** Fold over the stored pairs string/value *)
|
(** Fold over the stored pairs string/value *)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue