(* copyright (c) 2013-2014, simon cruanes all rights reserved. redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) (** {1 Prefix Tree} *) type 'a sequence = ('a -> unit) -> unit type 'a ktree = unit -> [`Nil | `Node of 'a * 'a ktree list] (** {2 Signatures} *) (** {6 A Composite Word} Words are made of characters, who belong to a total order *) module type WORD = sig type t type char_ val compare : char_ -> char_ -> int val to_seq : t -> char_ sequence val of_list : char_ list -> t end module type S = sig type char_ type key type 'a t val empty : 'a t val is_empty : _ t -> bool val add : key -> 'a -> 'a t -> 'a t (** Add a binding to the trie (possibly erasing the previous one) *) val remove : key -> 'a t -> 'a t (** Remove the key, if present *) val find : key -> 'a t -> 'a option (** Find the value associated with the key, if any *) val find_exn : key -> 'a t -> 'a (** Same as {!find} but can fail. @raise Not_found if the key is not present *) val update : key -> ('a option -> 'a option) -> 'a t -> 'a t (** Update the binding for the given key. The function is given [None] if the key is absent, or [Some v] if [key] is bound to [v]; if it returns [None] the key is removed, otherwise it returns [Some y] and [key] becomes bound to [y] *) val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b (** Fold on key/value bindings. Will use {!WORD.of_list} to rebuild keys. *) val iter : (key -> 'a -> unit) -> 'a t -> unit (** Same as {!fold}, but for effectful functions *) val fold_values : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b (** More efficient version of {!fold}, that doesn't keep keys *) val iter_values : ('a -> unit) -> 'a t -> unit val merge : ('a -> 'a -> 'a option) -> 'a t -> 'a t -> 'a t (** Merge two tries together. The function is used in case of conflicts, when a key belongs to both tries *) val size : _ t -> int (** Number of bindings *) (** {6 Conversions} *) val to_list : 'a t -> (key * 'a) list val of_list : (key * 'a) list -> 'a t val to_seq : 'a t -> (key * 'a) sequence val of_seq : (key * 'a) sequence -> 'a t val to_seq_values : 'a t -> 'a sequence val to_tree : 'a t -> [`Char of char_ | `Val of 'a | `Switch] ktree (** {6 Ranges} *) val above : key -> 'a t -> (key * 'a) sequence (** All bindings whose key is bigger than (or equal to) the given key *) val below : key -> 'a t -> (key * 'a) sequence (** All bindings whose key is smaller or equal to the given key *) end module Make(W : WORD) = struct type char_ = W.char_ type key = W.t module M = Map.Make(struct type t = char_ let compare = W.compare end) type 'a t = | Empty | Path of char_ list * 'a t | Node of 'a option * 'a t M.t (* invariants: - for Path(l,t) l is never empty - for Node (None,map) map always has at least 2 elements - for Node (Some _,map) map can be anything *) let empty = Empty let _invariant = function | Path ([],_) -> false | Node (None, map) when M.is_empty map -> false | _ -> true let is_empty = function | Empty -> true | _ -> false let _id x = x let _fold_seq f ~finish acc seq = let acc = ref acc in seq (fun x -> acc := f !acc x); finish !acc let _filter_map_seq f seq k = seq (fun x -> match f x with | None -> () | Some y -> k y) let _seq_append_list l seq = let l = ref l in seq (fun x -> l := x :: !l); !l let _seq_map map k = M.iter (fun key v -> k (key,v)) map let _is_path = function | Path _ -> true | _ -> false (* return common prefix, and disjoint suffixes *) let rec _merge_lists l1 l2 = match l1, l2 with | [], _ | _, [] -> [], l1, l2 | c1::l1', c2::l2' -> if W.compare c1 c2 = 0 then let pre, rest1, rest2 = _merge_lists l1' l2' in c1::pre, rest1, rest2 else [], l1, l2 (* prefix [l] to the tree [t] *) let _mk_path l t = match l, t with | [], _ -> t | _, Empty -> Empty | _, Node _ -> Path (l, t) | _, Path (l',t') -> assert (not(_is_path t')); Path (l@l', t') let _mk_path_cons x t = match t with | Empty -> Empty | Node _ -> Path ([x], t) | Path (l', t') -> assert (not(_is_path t')); Path (x::l', t') (* build a Node value *) let _mk_node value map = match value with | Some _ -> Node (value, map) | None -> if M.is_empty map then Empty else let high, t' = M.max_binding map in let low, _ = M.min_binding map in if W.compare low high = 0 then _mk_path [high] t' (* only one element *) else Node (value,map) let _remove_sub c t = match t with | Empty -> t | Path ([], _) -> assert false | Path (c'::l, t') -> if W.compare c c' = 0 then Empty else t | Node (value, map) -> if M.mem c map then let map' = M.remove c map in _mk_node value map' else t let update key f t = (* [state]: current subtree and rebuild function; [x]: current char *) let goto (t, rebuild) c = match t with | Empty -> (t, fun t -> rebuild (_mk_path_cons c t)) | Path ([], _) -> assert false | Path (c'::l, t') -> if W.compare c c' = 0 then (* follow the path *) _mk_path l t', (fun t -> rebuild (_mk_path_cons c t)) else (* exit the path, so we have an empty tree. Upon rebuild we potentially need to make a map *) let rebuild' new_child = rebuild ( if is_empty new_child then t else let map = M.singleton c new_child in let map = M.add c' (_mk_path l t') map in _mk_node None map ) in empty, rebuild' | Node (value, map) -> try let t' = M.find c map in (* rebuild: we modify [t], so we put the new version in [map] if it's not empty, and make the node again *) let rebuild' new_child = rebuild ( if is_empty new_child then _mk_node value (M.remove c map) else _mk_node value (M.add c new_child map) ) in t', rebuild' with Not_found -> let rebuild' new_child = if is_empty new_child then rebuild t (* ignore *) else let map' = M.add c new_child map in rebuild (_mk_node value map') in empty, rebuild' in let finish (t,rebuild) = match t with | Empty -> rebuild (_mk_node (f None) M.empty) | Path ([], _) -> assert false | Path (c::l', t') -> rebuild ( match f None with | None -> t (* TODO: raise exception & return original tree *) | Some _ as v -> _mk_node v (M.singleton c (_mk_path l' t')) ) | Node (value, map) -> let value' = f value in rebuild (_mk_node value' map) in let word = W.to_seq key in _fold_seq goto ~finish (t, _id) word let add k v t = update k (fun _ -> Some v) t let remove k t = update k (fun _ -> None) t let find_exn k t = (* at subtree [t], and character [c] *) let goto t c = match t with | Empty -> raise Not_found | Path ([], _) -> assert false | Path (c'::l, t') -> if W.compare c c' = 0 then _mk_path l t' else raise Not_found | Node (_, map) -> M.find c map and finish t = match t with | Node (Some v, _) -> v | _ -> raise Not_found in let word = W.to_seq k in _fold_seq goto ~finish t word let find k t = try Some (find_exn k t) with Not_found -> None let _difflist_append f l = fun l' -> f (l @ l') let _difflist_add f x = fun l' -> f (x :: l') (* fold that also keeps the path from the root, so as to provide the list of chars that lead to a value. The path is a difference list, ie a function that prepends a list to some suffix *) let rec _fold f path t acc = match t with | Empty -> acc | Path (l, t') -> _fold f (_difflist_append path l) t' acc | Node (v, map) -> let acc = match v with | None -> acc | Some v -> f acc path v in M.fold (fun c t' acc -> _fold f (_difflist_add path c) t' acc) map acc let fold f acc t = _fold (fun acc path v -> let key = W.of_list (path []) in f acc key v ) _id t acc let iter f t = _fold (fun () path y -> f (W.of_list (path [])) y) _id t () let _iter_prefix ~prefix f t = _fold (fun () path y -> let key = W.of_list (prefix (path [])) in f key y) _id t () let rec fold_values f acc t = match t with | Empty -> acc | Path (_, t') -> fold_values f acc t' | Node (v, map) -> let acc = match v with | None -> acc | Some v -> f acc v in M.fold (fun c t' acc -> fold_values f acc t') map acc let iter_values f t = fold_values (fun () x -> f x) () t let rec merge f t1 t2 = match t1, t2 with | Empty, _ -> t2 | _, Empty -> t1 | Path (l1,t1'), Path (l2,t2') -> let common, l1', l2' = _merge_lists l1 l2 in begin match l1', l2' with | c1::l1'', c2::l2'' -> (* need to build a map here, to represent the choice between [c1] and [c2] *) assert (W.compare c1 c2 <> 0); let map = M.add c1 (_mk_path l1'' t1') M.empty in let map = M.add c2 (_mk_path l2'' t2') map in _mk_path common (Node (None, map)) | _ -> _mk_path common (merge f (_mk_path l1' t1') (_mk_path l2' t2') ) end | Path ([], _), _ -> assert false | Path (c1::l1, t1'), Node (value, map) -> begin try (* collision *) let t2' = M.find c1 map in let new_t = merge f (_mk_path l1 t1') t2' in let map' = if is_empty new_t then M.remove c1 map else M.add c1 new_t map in _mk_node value map' with Not_found -> (* no collision *) assert (not(is_empty t1')); Node (value, M.add c1 (_mk_path l1 t1') map) end | Node _, Path _ -> merge f t2 t1 (* previous case *) | Node(v1, map1), Node (v2, map2) -> let v = match v1, v2 with | None, _ -> v2 | _, None -> v1 | Some v1, Some v2 -> f v1 v2 in let map' = M.merge (fun _c t1 t2 -> match t1, t2 with | None, None -> assert false | Some t, None | None, Some t -> Some t | Some t1, Some t2 -> let new_t = merge f t1 t2 in if is_empty new_t then None else Some new_t ) map1 map2 in _mk_node v map' let rec size t = match t with | Empty -> 0 | Path (_, t') -> size t' | Node (v, map) -> let s = if v=None then 0 else 1 in M.fold (fun _ t' acc -> size t' + acc) map s let to_list t = fold (fun acc k v -> (k,v)::acc) [] t let of_list l = List.fold_left (fun acc (k,v) -> add k v acc) empty l let to_seq t k = iter (fun key v -> k (key,v)) t let to_seq_values t k = iter_values k t let of_seq seq = _fold_seq (fun acc (k,v) -> add k v acc) ~finish:_id empty seq let rec to_tree t () = let _tree_node x l () = `Node (x,l) in match t with | Empty -> `Nil | Path ([], _) -> assert false | Path (c::l, t') -> `Node (`Char c, [to_tree (_mk_path l t')]) | Node (v, map) -> let x = match v with | None -> `Switch | Some v -> `Val v in let l = M.bindings map in `Node(x, List.map (fun (c,t') -> _tree_node (`Char c) [to_tree t']) l) (** {6 Ranges} *) (* range above or below a threshold. [p c c'] must return [true] if [c'], in the tree, meets some criterion w.r.t [c] which is a part of the key. *) let _half_range ~p key t k = (* at subtree [cur = Some (t,trail)] or [None], alternatives above [alternatives], and char [c] in [key]. *) let on_char (cur, alternatives) c = match cur with | None -> (None, alternatives) | Some (Empty,_) -> (None, alternatives) | Some (Path ([], _),_) -> assert false | Some (Path (c'::l, t'), trail) -> if W.compare c c' = 0 then Some (_mk_path l t', _difflist_add trail c), alternatives else None, alternatives | Some (Node (_, map), trail) -> let alternatives = _seq_map map |> _filter_map_seq (fun (c', t') -> if p c c' then Some (t', _difflist_add trail c') else None ) |> _seq_append_list alternatives in begin try let t' = M.find c map in Some (t', _difflist_add trail c), alternatives with Not_found -> None, alternatives end (* run through the current path (if any) and alternatives *) and finish (cur,alternatives) = begin match cur with | Some (t, prefix) -> _iter_prefix ~prefix (fun key' v -> k (key', v)) t | None -> () end; List.iter (fun (t,prefix) -> _iter_prefix ~prefix (fun key' v -> k (key', v)) t) alternatives in let word = W.to_seq key in _fold_seq on_char ~finish (Some(t,_id), []) word let above key t = _half_range ~p:(fun c c' -> W.compare c c' < 0) key t let below key t = _half_range ~p:(fun c c' -> W.compare c c' > 0) key t end module type ORDERED = sig type t val compare : t -> t -> int end module MakeArray(X : ORDERED) = Make(struct type t = X.t array type char_ = X.t let compare = X.compare let to_seq a k = Array.iter k a let of_list = Array.of_list end) module MakeList(X : ORDERED) = Make(struct type t = X.t list type char_ = X.t let compare = X.compare let to_seq a k = List.iter k a let of_list l = l end) module String = Make(struct type t = string type char_ = char let compare = Char.compare let to_seq s k = String.iter k s let of_list l = let s = String.create (List.length l) in List.iteri (fun i c -> s.[i] <- c) l; s end) (*$T String.of_list ["a", 1; "b", 2] |> String.size = 2 String.of_list ["a", 1; "b", 2; "a", 3] |> String.size = 2 String.of_list ["a", 1; "b", 2] |> String.find_exn "a" = 1 String.of_list ["a", 1; "b", 2] |> String.find_exn "b" = 2 String.of_list ["a", 1; "b", 2] |> String.find "c" = None String.of_list ["cat", 1; "catogan", 2; "foo", 3] |> String.find_exn "cat" = 1 String.of_list ["cat", 1; "catogan", 2; "foo", 3] |> String.find_exn "catogan" = 2 String.of_list ["cat", 1; "catogan", 2; "foo", 3] |> String.find_exn "foo" = 3 String.of_list ["cat", 1; "catogan", 2; "foo", 3] |> String.find "cato" = None *)