mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
VPTree interface
This commit is contained in:
parent
239f6af59f
commit
7912fb24e1
7 changed files with 347 additions and 8 deletions
223
VPTree.ml
Normal file
223
VPTree.ml
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
(*
|
||||
copyright (c) 2013, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Vantage-Point Tree} *)
|
||||
|
||||
module type METRIC_SPACE = sig
|
||||
type t
|
||||
(** Elements of the metric space *)
|
||||
|
||||
type num
|
||||
(** Numeric type used for distances *)
|
||||
|
||||
val distance : t -> t -> num
|
||||
(** Distance between two points. It must satisfy the following invariants:
|
||||
|
||||
- [distance x y = distance y x]
|
||||
- [distance x y <= distance x z + distance z y]
|
||||
- [distance x y >= 0]
|
||||
- [distance x y = 0] if and only if [x] and [y] are the same point.
|
||||
*)
|
||||
|
||||
val add : num -> num -> num
|
||||
(** Addition of two distances (associative, commutative) *)
|
||||
|
||||
val compare : num -> num -> int
|
||||
(** Total ordering on the numeric type used for distances *)
|
||||
end
|
||||
(** {2 Utils} *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module LazyList = struct
|
||||
type 'a t =
|
||||
| Nil
|
||||
| Cons of 'a * 'a t Lazy.t
|
||||
|
||||
let rec take n l = match l with
|
||||
| _ when n = 0 -> Nil
|
||||
| Nil -> Nil
|
||||
| Cons (x, (lazy l')) -> Cons (x, lazy (take (n-1) l'))
|
||||
|
||||
let rec iter l k = match l with
|
||||
| Nil -> ()
|
||||
| Cons (x, (lazy l')) -> k x; iter l' k
|
||||
|
||||
let rec to_list l = match l with
|
||||
| Nil -> []
|
||||
| Cons (x, (lazy l')) -> x :: (to_list l')
|
||||
end
|
||||
|
||||
(** {3 Mutable heap} *)
|
||||
module Heap = struct
|
||||
(** Implementation from http://en.wikipedia.org/wiki/Skew_heap *)
|
||||
|
||||
type 'a t = {
|
||||
mutable tree : 'a tree;
|
||||
cmp : 'a -> 'a -> int;
|
||||
} (** A pairing tree heap with the given comparison function *)
|
||||
and 'a tree =
|
||||
| Empty
|
||||
| Node of 'a * 'a tree * 'a tree
|
||||
|
||||
let empty ~cmp = {
|
||||
tree = Empty;
|
||||
cmp;
|
||||
}
|
||||
|
||||
let is_empty h =
|
||||
match h.tree with
|
||||
| Empty -> true
|
||||
| Node _ -> false
|
||||
|
||||
let rec union ~cmp t1 t2 = match t1, t2 with
|
||||
| Empty, _ -> t2
|
||||
| _, Empty -> t1
|
||||
| Node (x1, l1, r1), Node (x2, l2, r2) ->
|
||||
if cmp x1 x2 <= 0
|
||||
then Node (x1, union ~cmp t2 r1, l1)
|
||||
else Node (x2, union ~cmp t1 r2, l2)
|
||||
|
||||
let insert h x =
|
||||
h.tree <- union ~cmp:h.cmp (Node (x, Empty, Empty)) h.tree
|
||||
|
||||
let pop h = match h.tree with
|
||||
| Empty -> raise Not_found
|
||||
| Node (x, l, r) ->
|
||||
h.tree <- union ~cmp:h.cmp l r;
|
||||
x
|
||||
end
|
||||
|
||||
(** {2 VPTree functorial interface} *)
|
||||
|
||||
module type S = sig
|
||||
module Space : METRIC_SPACE
|
||||
|
||||
type key = Space.t
|
||||
|
||||
type +'a t
|
||||
(** VPTree that maps to data of type 'a *)
|
||||
|
||||
val member : _ t -> key -> bool
|
||||
(** Is the given key, member of the tree? *)
|
||||
|
||||
val find_closest : 'a t -> key -> (key * 'a * int) LazyList.t
|
||||
(** [find_closest tree key] finds the points of [tree]
|
||||
that are the closest to [key], in increasing order. Enumerates all
|
||||
the points eventually, if the whole list is explored.
|
||||
Each tuple also contains the distance to the [key]. *)
|
||||
|
||||
val of_array : (key * 'a) array -> 'a t
|
||||
(** Build a VPTree from an array of bindings. The array is modified in
|
||||
place (values are swapped).
|
||||
If a key occurs several times in the sequence, one of the value
|
||||
it maps to is chosen arbitrarily. *)
|
||||
|
||||
val of_list : (key * 'a) list -> 'a t
|
||||
(** Build a VPTree from a list *)
|
||||
|
||||
val of_seq : (key * 'a) sequence -> 'a t
|
||||
|
||||
val of_lazy_list : (key * 'a) LazyList.t -> 'a t
|
||||
|
||||
val size : _ t -> int
|
||||
(** Find the number of key/value pairs in the tree (linear time) *)
|
||||
end
|
||||
|
||||
(* we follow François Berenger's implementation, but simpler, because we
|
||||
want to retrieve several points and yet keep the implementation reasonably
|
||||
simple. *)
|
||||
module Make(Space : METRIC_SPACE) = struct
|
||||
module Space = Space
|
||||
|
||||
type key = Space.t
|
||||
type num = Space.num
|
||||
|
||||
type 'a t =
|
||||
| Empty
|
||||
| Node of 'a node * num * 'a t * 'a t
|
||||
|
||||
and 'a node = {
|
||||
key : key;
|
||||
value : 'a;
|
||||
tree : 'a t;
|
||||
}
|
||||
|
||||
(* insert a tree in the heap of trees to explore *)
|
||||
let _insert_heap h key tree = match tree with
|
||||
| Empty -> ()
|
||||
| Node (node, _, _, _) ->
|
||||
let d = Space.distance key node.key in
|
||||
Heap.insert h (tree, d)
|
||||
|
||||
(* explore the given heap *)
|
||||
let rec _explore_heap h key =
|
||||
if Heap.is_empty h
|
||||
then LazyList.Nil
|
||||
else match Heap.pop h with
|
||||
| Empty -> _explore_heap h key
|
||||
|
|
||||
|
||||
(* find closest points to [key] *)
|
||||
let find_closest tree key =
|
||||
match tree with
|
||||
| Empty -> LazyList.Nil
|
||||
| Node _ ->
|
||||
let cmp (t1,d1) (t2,d2) = Space.compare d1 d2 in
|
||||
let h = Heap.empty ~cmp in
|
||||
_insert_heap h key tree;
|
||||
_explore_heap h key
|
||||
|
||||
let member tree key =
|
||||
match tree with
|
||||
| None -> false
|
||||
| Some tree ->
|
||||
match find_closest tree key with
|
||||
| LazyList.Nil -> false
|
||||
| LazyList.Cons ((_, _, 0), _) -> true
|
||||
| _ -> false
|
||||
|
||||
let size t =
|
||||
let rec size t = match t with
|
||||
| Leaf _ -> 1
|
||||
| Pair _ -> 2
|
||||
| Node (_, _, _, t1, t2) -> size t1 + size t2 + 1
|
||||
in match t with
|
||||
| None -> 0
|
||||
| Some t -> size t
|
||||
|
||||
(* utilities to build the tree *)
|
||||
|
||||
let of_array a = assert false (* TODO *)
|
||||
|
||||
let of_list l = of_array (Array.of_list l)
|
||||
|
||||
let of_seq seq =
|
||||
let l = ref [] in
|
||||
seq (fun x -> l := x :: !l);
|
||||
of_list !l
|
||||
|
||||
let of_lazy_list l = of_seq (LazyList.iter l) (* TODO optimize *)
|
||||
end
|
||||
112
VPTree.mli
Normal file
112
VPTree.mli
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
|
||||
(*
|
||||
copyright (c) 2013, simon cruanes
|
||||
all rights reserved.
|
||||
|
||||
redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer. redistributions in binary
|
||||
form must reproduce the above copyright notice, this list of conditions and the
|
||||
following disclaimer in the documentation and/or other materials provided with
|
||||
the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*)
|
||||
|
||||
(** {1 Vantage-Point Tree}
|
||||
|
||||
A data structure used to store and retrieve elements that belong to a
|
||||
metric space. References are:
|
||||
|
||||
{{: https://github.com/UnixJunkie/vantage_point_tree_from_codepad/blob/master/vp_tree.ml} François Berenger's implementation}
|
||||
{{: http://en.wikipedia.org/wiki/Vantage-point_tree} Wikipedia}
|
||||
*)
|
||||
|
||||
module type METRIC_SPACE = sig
|
||||
type t
|
||||
(** Elements of the metric space *)
|
||||
|
||||
type num
|
||||
(** Numeric type used for distances *)
|
||||
|
||||
val distance : t -> t -> num
|
||||
(** Distance between two points. It must satisfy the following invariants:
|
||||
|
||||
- [distance x y = distance y x]
|
||||
- [distance x y <= distance x z + distance z y]
|
||||
- [distance x y >= 0]
|
||||
- [distance x y = 0] if and only if [x] and [y] are the same point.
|
||||
*)
|
||||
|
||||
val add : num -> num -> num
|
||||
(** Addition of two distances (associative, commutative) *)
|
||||
|
||||
val compare : num -> num -> int
|
||||
(** Total ordering on the numeric type used for distances; must
|
||||
be compatible with {!add} *)
|
||||
end
|
||||
|
||||
(** {2 Utils} *)
|
||||
|
||||
type 'a sequence = ('a -> unit) -> unit
|
||||
|
||||
module LazyList : sig
|
||||
type +'a t =
|
||||
| Nil
|
||||
| Cons of 'a * 'a t Lazy.t
|
||||
|
||||
val take : int -> 'a t -> 'a t
|
||||
|
||||
val to_list : 'a t -> 'a list
|
||||
|
||||
val iter : 'a t -> ('a -> unit) -> unit
|
||||
end
|
||||
|
||||
(** {2 VPTree functorial interface} *)
|
||||
|
||||
module type S = sig
|
||||
module Space : METRIC_SPACE
|
||||
|
||||
type key = Space.t
|
||||
|
||||
type +'a t
|
||||
(** VPTree that maps to data of type 'a *)
|
||||
|
||||
val member : _ t -> key -> bool
|
||||
(** Is the given key, member of the tree? *)
|
||||
|
||||
val find_closest : 'a t -> key -> (key * 'a * int) LazyList.t
|
||||
(** [find_closest tree key] finds the points of [tree]
|
||||
that are the closest to [key], in increasing order. Enumerates all
|
||||
the points eventually, if the whole list is explored.
|
||||
Each tuple also contains the distance to the [key]. *)
|
||||
|
||||
val of_array : (key * 'a) array -> 'a t
|
||||
(** Build a VPTree from an array of bindings. The array is modified in
|
||||
place (values are swapped).
|
||||
If a key occurs several times in the sequence, one of the value
|
||||
it maps to is chosen arbitrarily. *)
|
||||
|
||||
val of_list : (key * 'a) list -> 'a t
|
||||
(** Build a VPTree from a list *)
|
||||
|
||||
val of_seq : (key * 'a) sequence -> 'a t
|
||||
|
||||
val of_lazy_list : (key * 'a) LazyList.t -> 'a t
|
||||
|
||||
val size : _ t -> int
|
||||
(** Find the number of key/value pairs in the tree (linear time) *)
|
||||
end
|
||||
|
||||
module Make(Space : METRIC_SPACE) : S with module Space = Space
|
||||
2
_oasis
2
_oasis
|
|
@ -40,7 +40,7 @@ Library "containers"
|
|||
Vector, Bij, PiCalculus, Bencode, Sexp, RAL, MultiSet,
|
||||
UnionFind, SmallSet, Leftistheap, AbsSet, CSM, MultiMap,
|
||||
ActionMan, BV, QCheck, BencodeOnDisk, Show, TTree,
|
||||
HGraph
|
||||
HGraph, VPTree
|
||||
BuildDepends: unix
|
||||
|
||||
Library "containers_thread"
|
||||
|
|
|
|||
3
_tags
3
_tags
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: dcf1c0cacacb67335a863b18858eaeb7)
|
||||
# DO NOT EDIT (digest: 96f10d0656c8edf5564bc01896f92844)
|
||||
# Ignore VCS directories, you can use the same kind of rule outside
|
||||
# OASIS_START/STOP if you want to exclude directories that contains
|
||||
# useless stuff for the build process
|
||||
|
|
@ -52,6 +52,7 @@
|
|||
"show.cmx": for-pack(Containers)
|
||||
"tTree.cmx": for-pack(Containers)
|
||||
"hGraph.cmx": for-pack(Containers)
|
||||
"VPTree.cmx": for-pack(Containers)
|
||||
# Library containers_thread
|
||||
"containers_thread.cmxs": use_containers_thread
|
||||
<*.ml{,i}>: pkg_threads
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: c05f285f4d4b4832b21ddd28863f4edf)
|
||||
# DO NOT EDIT (digest: eb2e2715cb08b7b87800e38c24157fce)
|
||||
Cache
|
||||
Deque
|
||||
Gen
|
||||
|
|
@ -37,4 +37,5 @@ BencodeOnDisk
|
|||
Show
|
||||
TTree
|
||||
HGraph
|
||||
VPTree
|
||||
# OASIS_STOP
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OASIS_START
|
||||
# DO NOT EDIT (digest: b68696a2df00165022fe21a389026f4f)
|
||||
# DO NOT EDIT (digest: 8a4d45e8f3b44572f3dfbac03926e207)
|
||||
Cache
|
||||
Deque
|
||||
Gen
|
||||
|
|
@ -37,5 +37,6 @@ BencodeOnDisk
|
|||
Show
|
||||
TTree
|
||||
HGraph
|
||||
VPTree
|
||||
Behavior
|
||||
# OASIS_STOP
|
||||
|
|
|
|||
9
setup.ml
9
setup.ml
|
|
@ -1,7 +1,7 @@
|
|||
(* setup.ml generated for the first time by OASIS v0.3.0 *)
|
||||
|
||||
(* OASIS_START *)
|
||||
(* DO NOT EDIT (digest: b3286e91d3acc2931801b7c5da3859c2) *)
|
||||
(* DO NOT EDIT (digest: 79fce0538fbf243815415d95729ceb1e) *)
|
||||
(*
|
||||
Regenerated by OASIS v0.3.0
|
||||
Visit http://oasis.forge.ocamlcore.org for more information and
|
||||
|
|
@ -5904,7 +5904,8 @@ let setup_t =
|
|||
"BencodeOnDisk";
|
||||
"Show";
|
||||
"TTree";
|
||||
"HGraph"
|
||||
"HGraph";
|
||||
"VPTree"
|
||||
];
|
||||
lib_pack = true;
|
||||
lib_internal_modules = [];
|
||||
|
|
@ -6225,7 +6226,7 @@ let setup_t =
|
|||
};
|
||||
oasis_fn = Some "_oasis";
|
||||
oasis_version = "0.3.0";
|
||||
oasis_digest = Some "\139{\144\225j\1545\001\221\138R\129\177\182q*";
|
||||
oasis_digest = Some "\182\214L^I\190_b`\140ur\005\163\127\217";
|
||||
oasis_exec = None;
|
||||
oasis_setup_args = [];
|
||||
setup_update = false;
|
||||
|
|
@ -6233,6 +6234,6 @@ let setup_t =
|
|||
|
||||
let setup () = BaseSetup.setup setup_t;;
|
||||
|
||||
# 6237 "setup.ml"
|
||||
# 6238 "setup.ml"
|
||||
(* OASIS_STOP *)
|
||||
let () = setup ();;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue