Merge pull request #457 from gmevel/linear-heap-building

Improvements to CCHeap
This commit is contained in:
Simon Cruanes 2024-10-02 10:04:42 -04:00 committed by GitHub
commit afb93cfc43
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 706 additions and 291 deletions

View file

@ -1,5 +1,14 @@
# Changelog # Changelog
## main
- `CCHeap`: building a heap from an almost-sorted sequence
- perf: `CCHeap`: building a heap from n elements is now in time O(n)
instead of O(n log n)
- perf: `CCHeap`: `filter` and `delete_all` are now in time O(n)
instead of O(n log n), and they ensure physical equality
(for `delete_all` this is a bugfix)
## 3.14 ## 3.14

View file

@ -539,7 +539,7 @@ val h' : IntHeap.t = <abstr>
val x : int = 2 val x : int = 2
# IntHeap.to_list h' (* see, 2 is removed *);; # IntHeap.to_list h' (* see, 2 is removed *);;
- : int list = [4; 6; 8; 10] - : int list = [4; 8; 10; 6]
``` ```
### IO helpers ### IO helpers

View file

@ -7,6 +7,14 @@ type 'a gen = unit -> 'a option
type 'a printer = Format.formatter -> 'a -> unit type 'a printer = Format.formatter -> 'a -> unit
type 'a ktree = unit -> [ `Nil | `Node of 'a * 'a ktree list ] type 'a ktree = unit -> [ `Nil | `Node of 'a * 'a ktree list ]
let[@inline] _iter_map f xs k = xs (fun x -> k (f x))
let rec _gen_iter k g =
begin match g () with
| None -> ()
| Some x -> k x; _gen_iter k g
end
module type PARTIAL_ORD = sig module type PARTIAL_ORD = sig
type t type t
@ -28,125 +36,209 @@ module type S = sig
type elt type elt
type t type t
exception Empty
(** {2 Basing heap operations} *)
val empty : t val empty : t
(** Empty heap. *) (** Empty heap. *)
val is_empty : t -> bool val is_empty : t -> bool
(** Is the heap empty? *) (** Is the heap empty? *)
exception Empty
val merge : t -> t -> t val merge : t -> t -> t
(** Merge two heaps. *) (** [merge h1 h2] merges the two heaps [h1] and [h2].
If one heap is empty, the result is physically equal to the other heap.
Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap.
*)
val insert : elt -> t -> t val insert : elt -> t -> t
(** Insert a value in the heap. *) (** [insert x h] inserts an element [x] into the heap [h].
Complexity: [O(log n)] where [n] is the number of elements in [h].
*)
val add : t -> elt -> t val add : t -> elt -> t
(** Synonym to {!insert}. *) (** [add h x] is [insert x h]. *)
val filter : (elt -> bool) -> t -> t
(** Filter values, only retaining the ones that satisfy the predicate.
Linear time at least. *)
val find_min : t -> elt option val find_min : t -> elt option
(** Find minimal element. *) (** [find_min h] returns the minimal element of [h],
or [None] if [h] is empty.
Complexity: [O(1)].
*)
val find_min_exn : t -> elt val find_min_exn : t -> elt
(** Like {!find_min} but can fail. (** [find_min_exn h] is akin to {!find_min},
but it raises {!Empty} when the heap is empty.
@raise Empty if the heap is empty. *) @raise Empty if the heap is empty. *)
val take : t -> (t * elt) option val take : t -> (t * elt) option
(** Extract and return the minimum element, and the new heap (without (** [take h] returns the minimum element of [h]
this element), or [None] if the heap is empty. *) and the new heap without this element,
or [None] if [h] is empty.
Complexity: [O(log n)].
*)
val take_exn : t -> t * elt val take_exn : t -> t * elt
(** Like {!take}, but can fail. (** [take_exn h] is akin to {!take},
but it raises {!Empty} when the heap is empty.
@raise Empty if the heap is empty. *) @raise Empty if the heap is empty. *)
val size : t -> int
(** [size h] is the number of elements in the heap [h].
Complexity: [O(n)].
*)
(** {2 Deleting elements} *)
val delete_one : (elt -> elt -> bool) -> elt -> t -> t val delete_one : (elt -> elt -> bool) -> elt -> t -> t
(** Delete one occurrence of a value if it exist in the heap. (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap
[delete_one eq x h], use [eq] to find one [x] in [h] and delete it. [h],
If [h] do not contain [x] then it return [h]. if there is some.
If [h] does not contain [x], then [h] itself is returned.
Elements are identified by the equality function [eq].
Complexity: [O(n)].
@since 2.0 *) @since 2.0 *)
val delete_all : (elt -> elt -> bool) -> elt -> t -> t val delete_all : (elt -> elt -> bool) -> elt -> t -> t
(** Delete all occurrences of a value in the heap. (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h].
[delete_all eq x h], use [eq] to find all [x] in [h] and delete them. If [h] does not contain [x], then [h] itself is returned.
If [h] do not contain [x] then it return [h]. Elements are identified by the equality function [eq].
The difference with {!filter} is that [delete_all] stops as soon as This function is more efficient than {!filter}
it enters a subtree whose root is bigger than the element. because it avoids considering elements greater than [x].
Complexity: [O(n)].
@since 2.0 *) @since 2.0 *)
val filter : (elt -> bool) -> t -> t
(** [filter p h] filters the elements of [h],
only retaining those that satisfy the predicate [p].
If no element in [h] satisfies [p], then [h] itself is returned.
Complexity: [O(n)].
*)
(** {2 Iterating on elements} *)
val iter : (elt -> unit) -> t -> unit val iter : (elt -> unit) -> t -> unit
(** Iterate on elements. *) (** [iter f h] invokes [f] on every element of the heap [h]. *)
val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a
(** Fold on all values. *) (** [fold f acc h] folds on all elements of [h]. *)
val size : t -> int (** {2 Adding many elements at once} *)
(** Number of elements (linear complexity). *)
(** {2 Conversions} *)
val to_list : t -> elt list
(** Return the elements of the heap, in no particular order. *)
val to_list_sorted : t -> elt list
(** Return the elements in increasing order.
@since 1.1 *)
val add_list : t -> elt list -> t val add_list : t -> elt list -> t
(** Add the elements of the list to the heap. An element occurring several (** [add_list h l] adds the elements of the list [l] into the heap [h].
times will be added that many times to the heap. An element occurring several times will be added that many times to the heap.
Elements need not be given in any particular order.
This function is more efficient than repeated insertions.
Complexity: [O(log m + n)]
where [m] and [n] are the number of elements in [h] and [l], respectively.
@since 0.16 *) @since 0.16 *)
val of_list : elt list -> t
(** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *)
val add_iter : t -> elt iter -> t val add_iter : t -> elt iter -> t
(** Like {!add_list}. (** [add_iter h iter] is akin to {!add_list},
but taking an [iter] of elements as input.
@since 2.8 *) @since 2.8 *)
val add_seq : t -> elt Seq.t -> t val add_seq : t -> elt Seq.t -> t
(** Like {!add_list}. (** [add_seq h seq] is akin to {!add_list},
@since 2.8 *) but taking a [Seq.t] of elements as input.
Renamed from [add_std_seq] since 3.0.
@since 3.0 *)
val add_gen : t -> elt gen -> t
(** [add_gen h gen] is akin to {!add_list},
but taking a [gen] of elements as input.
@since 0.16 *)
val add_iter_almost_sorted : t -> elt iter -> t
(** [add_iter_almost_sorted h iter] is equivalent to
[merge h (of_iter_almost_sorted iter)].
See {!of_iter_almost_sorted}.
Complexity: [O(log m + n)].
@since NEXT_RELEASE
*)
(** {2 Conversions} *)
val of_list : elt list -> t
(** [of_list l] builds a heap from the list of elements [l].
Elements need not be given in any particular order.
This function is more efficient than repeated insertions.
It is equivalent to [add_list empty l].
Complexity: [O(n)].
*)
val of_iter : elt iter -> t val of_iter : elt iter -> t
(** Build a heap from a given [iter]. Complexity: [O(n log n)]. (** [of_iter iter] is akin to {!of_list},
but taking an [iter] of elements as input.
@since 2.8 *) @since 2.8 *)
val of_seq : elt Seq.t -> t val of_seq : elt Seq.t -> t
(** Build a heap from a given [Seq.t]. Complexity: [O(n log n)]. (** [of_seq seq] is akin to {!of_list},
@since 2.8 *) but taking a [Seq.t] of elements as input.
Renamed from [of_std_seq] since 3.0.
@since 3.0 *)
val of_gen : elt gen -> t
(** [of_gen gen] is akin to {!of_list},
but taking a [gen] of elements as input. *)
val of_iter_almost_sorted : elt iter -> t
(** [of_iter iter] builds a heap from the {!type:iter} sequence of elements.
Elements need not be given in any particular order.
However, the heap takes advantage of partial sorting found in the input:
the closer the input sequence is to being sorted,
the more efficient it is to convert the heap to a sorted sequence.
This enables heap-sorting that is faster than [O(n log n)]
when the input is almost sorted.
In the best case, when only a constant number of elements are misplaced,
then successive {!take} run in [O(1)],
and {!to_list_sorted} runs in [O(n)].
Complexity: [O(n)].
*)
val to_list : t -> elt list
(** [to_list h] returns a list of the elements of the heap [h],
in no particular order.
Complexity: [O(n)].
*)
val to_iter : t -> elt iter val to_iter : t -> elt iter
(** Return a [iter] of the elements of the heap. (** [to_iter h] is akin to {!to_list}, but returning an [iter] of elements.
@since 2.8 *) @since 2.8 *)
val to_seq : t -> elt Seq.t val to_seq : t -> elt Seq.t
(** Return a [Seq.t] of the elements of the heap. (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements.
@since 2.8 *) Renamed from [to_std_seq] since 3.0.
@since 3.0 *)
val to_gen : t -> elt gen
(** [to_gen h] is akin to {!to_list}, but returning a [gen] of elements. *)
val to_list_sorted : t -> elt list
(** [to_list_sorted h] returns the list of elements of the heap [h]
in increasing order.
Complexity: [O(n log n)].
@since 1.1 *)
val to_iter_sorted : t -> elt iter val to_iter_sorted : t -> elt iter
(** Iterate on the elements, in increasing order. (** [to_iter_sorted h] is akin to {!to_list_sorted},
but returning an [iter] of elements.
@since 2.8 *) @since 2.8 *)
val to_seq_sorted : t -> elt Seq.t val to_seq_sorted : t -> elt Seq.t
(** Iterate on the elements, in increasing order. (** [to_seq_sorted h] is akin to {!to_list_sorted},
@since 2.8 *) but returning a [Seq.t] of elements.
Renamed from [to_std_seq_sorted] since 3.0.
val add_gen : t -> elt gen -> t @since 3.0 *)
(** @since 0.16 *)
val of_gen : elt gen -> t
(** Build a heap from a given [gen]. Complexity: [O(n log n)]. *)
val to_gen : t -> elt gen
(** Return a [gen] of the elements of the heap. *)
val to_tree : t -> elt ktree val to_tree : t -> elt ktree
(** Return a [ktree] of the elements of the heap. *) (** [to_tree h] returns a [ktree] of the elements of the heap [h].
The layout is not specified.
Complexity: [O(n)].
*)
(** {2 Pretty-printing} *)
val to_string : ?sep:string -> (elt -> string) -> t -> string val to_string : ?sep:string -> (elt -> string) -> t -> string
(** Print the heap in a string (** Print the heap in a string
@ -178,6 +270,8 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
exception Empty exception Empty
let singleton x = N (1, x, E, E)
(* Rank of the tree *) (* Rank of the tree *)
let _rank = function let _rank = function
| E -> 0 | E -> 0
@ -203,15 +297,9 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
else else
_make_node y a2 (merge t1 b2) _make_node y a2 (merge t1 b2)
let insert x h = merge (N (1, x, E, E)) h let insert x h = merge (singleton x) h
let add h x = insert x h let add h x = insert x h
let rec filter p h =
match h with
| E -> E
| N (_, x, l, r) when p x -> _make_node x (filter p l) (filter p r)
| N (_, _, l, r) -> merge (filter p l) (filter p r)
let find_min_exn = function let find_min_exn = function
| E -> raise Empty | E -> raise Empty
| N (_, x, _, _) -> x | N (_, x, _, _) -> x
@ -228,39 +316,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
| E -> raise Empty | E -> raise Empty
| N (_, x, l, r) -> merge l r, x | N (_, x, l, r) -> merge l r, x
let delete_one eq x h =
let rec aux = function
| E -> false, E
| N (_, y, l, r) as h ->
if eq x y then
true, merge l r
else if E.leq y x then (
let found_left, l1 = aux l in
let found, r1 =
if found_left then
true, r
else
aux r
in
if found then
true, _make_node y l1 r1
else
false, h
) else
false, h
in
snd (aux h)
let rec delete_all eq x = function
| E -> E
| N (_, y, l, r) as h ->
if eq x y then
merge (delete_all eq x l) (delete_all eq x r)
else if E.leq y x then
_make_node y (delete_all eq x l) (delete_all eq x r)
else
h
let rec iter f h = let rec iter f h =
match h with match h with
| E -> () | E -> ()
@ -281,7 +336,94 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
| E -> 0 | E -> 0
| N (_, _, l, r) -> 1 + size l + size r | N (_, _, l, r) -> 1 + size l + size r
(** {2 Conversions} *) (** {2 Conversions from sequences} *)
(* Merge an [iter] of k heaps into one.
Instead of folding [merge] in one pass (which would run in time O(k log N)
where k is the number of heaps and N is the total number of elements), it
is more efficient to merge heaps pairwise until only one remains; see e.g.
Robert Tarjan, "Data Structures and Network Algorithms",
Chapter 3.3 "Leftist heaps", 1983.
or:
Chris Okasaki, "Purely Functional Data Structures",
Chapter 3.2 "Leftist heaps", Exercise 3.3, 1998
This is independent of the representation of heaps, and, as long as merging
is in time O(log n), this runs in time O(k + k*log(N/k)). Notice that this
is a O(k + N) (if k is small wrt. N, this last upper bound is very loose).
The code below uses additional space of only O(log(k)) at any moment;
it avoids storing an intermediate list of length O(k).
When at most one of the input heaps is non-empty, the result is physically
equal to it. *)
let _merge_heap_iter (hs : t iter) : t =
let rec cons_and_merge h0 hs weights =
begin match hs with
| h1 :: hs' when weights land 1 = 0 ->
cons_and_merge (merge h0 h1) hs' (weights lsr 1)
| _ ->
h0 :: hs
end
in
(* the i-th heap in this list is a merger of 2^{w_i} input heaps, each
having gone through w_i merge operations, where the "weights" 2^{w_i} are
strictly increasing wrt. i: *)
let mergers = ref [] in
(* The w_i are the 1-bits in the binary writing of [count], the number of
input heaps merged so far; adding a heap to the mergers works like binary
incrementation: *)
let count = ref 0 in
hs begin fun h ->
incr count ;
mergers := cons_and_merge h !mergers !count ;
end ;
List.fold_left merge E !mergers
(* To build a heap with n given values, instead of repeated insertions,
it is more efficient to do pairwise merging, running in time O(n). *)
let of_iter xs =
xs
|> _iter_map singleton
|> _merge_heap_iter
let of_list xs = of_iter (fun k -> List.iter k xs)
let of_seq xs = of_iter (fun k -> Seq.iter k xs)
let of_gen xs = of_iter (fun k -> _gen_iter k xs)
(* When input values are sorted in reverse order, then repeated insertions in
a leftist heap run in time O(n) and build a list-like heap where elements
are totally sorted, which makes a subsequent conversion to sorted sequence
run in O(n). *)
let _of_list_rev_sorted (xs : elt list) : t =
List.fold_left (fun h x -> N (1, x, h, E)) E xs
(* We use this to convert an arbitrary input sequence to a heap in time O(n),
while achieving an efficient heap structure in the common situation when
the input is almost sorted. This improves heap-sorting, for instance. *)
let of_iter_almost_sorted xs =
let sorted_chunk = ref [] in
let iter_sorted_heaps k =
xs begin fun x ->
begin match !sorted_chunk with
| (y :: _) as ys when not (E.leq y x) ->
k (_of_list_rev_sorted ys) ;
sorted_chunk := [x]
| ys ->
sorted_chunk := x :: ys
end ;
end ;
k (_of_list_rev_sorted !sorted_chunk)
in
_merge_heap_iter iter_sorted_heaps
(** {2 Adding many elements at once} *)
let add_list h xs = merge h (of_list xs)
let add_iter h xs = merge h (of_iter xs)
let add_seq h xs = merge h (of_seq xs)
let add_gen h xs = merge h (of_gen xs)
let add_iter_almost_sorted h xs = merge h (of_iter_almost_sorted xs)
(** {2 Conversions to sequences} *)
let to_list h = let to_list h =
let rec aux acc h = let rec aux acc h =
@ -291,29 +433,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
in in
aux [] h aux [] h
let to_list_sorted heap =
let rec recurse acc h =
match take h with
| None -> List.rev acc
| Some (h', x) -> recurse (x :: acc) h'
in
recurse [] heap
let add_list h l = List.fold_left add h l
let of_list l = add_list empty l
let add_iter h i =
let h = ref h in
i (fun x -> h := insert x !h);
!h
let add_seq h seq =
let h = ref h in
Seq.iter (fun x -> h := insert x !h) seq;
!h
let of_iter i = add_iter empty i
let of_seq seq = add_seq empty seq
let to_iter h k = iter k h let to_iter h k = iter k h
let to_seq h = let to_seq h =
@ -326,28 +445,6 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
in in
aux [ h ] aux [ h ]
let to_iter_sorted heap =
let rec recurse h k =
match take h with
| None -> ()
| Some (h', x) ->
k x;
recurse h' k
in
fun k -> recurse heap k
let rec to_seq_sorted h () =
match take h with
| None -> Seq.Nil
| Some (h', x) -> Seq.Cons (x, to_seq_sorted h')
let rec add_gen h g =
match g () with
| None -> h
| Some x -> add_gen (add h x) g
let of_gen g = add_gen empty g
let to_gen h = let to_gen h =
let stack = Stack.create () in let stack = Stack.create () in
Stack.push h stack; Stack.push h stack;
@ -365,11 +462,109 @@ module Make (E : PARTIAL_ORD) : S with type elt = E.t = struct
in in
next next
let to_list_sorted heap =
let rec recurse acc h =
match take h with
| None -> List.rev acc
| Some (h', x) -> recurse (x :: acc) h'
in
recurse [] heap
let to_iter_sorted heap =
let rec recurse h k =
match take h with
| None -> ()
| Some (h', x) ->
k x;
recurse h' k
in
fun k -> recurse heap k
let rec to_seq_sorted h () =
match take h with
| None -> Seq.Nil
| Some (h', x) -> Seq.Cons (x, to_seq_sorted h')
let rec to_tree h () = let rec to_tree h () =
match h with match h with
| E -> `Nil | E -> `Nil
| N (_, x, l, r) -> `Node (x, [ to_tree l; to_tree r ]) | N (_, x, l, r) -> `Node (x, [ to_tree l; to_tree r ])
(** {2 Filtering} *)
let rec delete_one eq x0 = function
| N (_, x, l, r) as h when E.leq x x0 ->
if eq x0 x then
merge l r
else begin
let l' = delete_one eq x0 l in
if CCEqual.physical l' l then
let r' = delete_one eq x0 r in
if CCEqual.physical r' r then
h
else
_make_node x l r'
else
_make_node x l' r
end
| h -> h
let delete_all eq x0 h =
(* Iterates [k] on sub-heaps of [h] whose merger is equal to [h] minus
the deleted elements [x0]; we do this, instead of merging the subheaps
directly, in order to ensure complexity O(n).
When no element is deleted, the iterator does nothing and the function
returns true; this makes sure that the result shares sub-heaps with the
input as much as possible, and ensures physical equality when no element
is deleted.
In [delete_all], by contrast with [filter], we can avoid considering
elements greater than [x0]. As a consequence, the complexity is more
precisely O(k + k log(n/k)), where k is the number of elements not
greater than [x0]. This is a O(n), but it is also a O(k log n), which is
much smaller than O(n) if k is asymptotically smaller than n.
*)
let rec iter_subheaps eq x0 h k =
begin match h with
| N (_, x, l, r) when E.leq x x0 ->
let keep_x = not (eq x0 x) in
let keep_l = iter_subheaps eq x0 l k in
let keep_r = iter_subheaps eq x0 r k in
if keep_x && keep_l && keep_r then
true
else begin
if keep_x then k (singleton x) ;
if keep_l then k l ;
if keep_r then k r ;
false
end
| _ -> true
end
in
_merge_heap_iter (fun k -> if iter_subheaps eq x0 h k then k h)
let filter p h =
(* similar to [delete_all] *)
let rec iter_subheaps p k h =
begin match h with
| E -> true
| N (_, x, l, r) ->
let keep_x = p x in
let keep_l = iter_subheaps p k l in
let keep_r = iter_subheaps p k r in
if keep_x && keep_l && keep_r then
true
else begin
if keep_x then k (singleton x) ;
if keep_l then k l ;
if keep_r then k r ;
false
end
end
in
_merge_heap_iter (fun k -> if iter_subheaps p k h then k h)
(** {2 Pretty-printing} *)
let to_string ?(sep = ",") elt_to_string h = let to_string ?(sep = ",") elt_to_string h =
to_list_sorted h |> List.map elt_to_string |> String.concat sep to_list_sorted h |> List.map elt_to_string |> String.concat sep

View file

@ -33,136 +33,214 @@ module type S = sig
type elt type elt
type t type t
exception Empty
(** {2 Basic heap operations} *)
val empty : t val empty : t
(** [empty] returns the empty heap. *) (** [empty] returns the empty heap. *)
val is_empty : t -> bool val is_empty : t -> bool
(** [is_empty h] returns [true] if the heap [h] is empty. *) (** [is_empty h] returns [true] iff the heap [h] is empty. *)
exception Empty
val merge : t -> t -> t val merge : t -> t -> t
(** [merge h1 h2] merges the two heaps [h1] and [h2]. *) (** [merge h1 h2] merges the two heaps [h1] and [h2].
If one heap is empty, the result is physically equal to the other heap.
Complexity: [O(log (m+n))] where [m] and [n] are the number of elements in each heap.
*)
val insert : elt -> t -> t val insert : elt -> t -> t
(** [insert x h] inserts an element [x] into the heap [h]. *) (** [insert x h] inserts an element [x] into the heap [h].
Complexity: [O(log n)] where [n] is the number of elements in [h].
*)
val add : t -> elt -> t val add : t -> elt -> t
(** [add h x] inserts an element [x] into the heap [h]. *) (** [add h x] is [insert x h]. *)
val filter : (elt -> bool) -> t -> t
(** [filter p h] filters values, only retaining the ones that satisfy the predicate [p].
Linear time at least. *)
val find_min : t -> elt option val find_min : t -> elt option
(** [find_min h] find the minimal element of the heap [h]. *) (** [find_min h] returns the minimal element of [h],
or [None] if [h] is empty.
Complexity: [O(1)].
*)
val find_min_exn : t -> elt val find_min_exn : t -> elt
(** [find_min_exn h] is like {!find_min} but can fail. (** [find_min_exn h] is akin to {!find_min},
but it raises {!Empty} when the heap is empty.
@raise Empty if the heap is empty. *) @raise Empty if the heap is empty. *)
val take : t -> (t * elt) option val take : t -> (t * elt) option
(** [take h] extracts and returns the minimum element, and the new heap (without (** [take h] returns the minimum element of [h]
this element), or [None] if the heap [h] is empty. *) and the new heap without this element,
or [None] if [h] is empty.
Complexity: [O(log n)].
*)
val take_exn : t -> t * elt val take_exn : t -> t * elt
(** [take_exn h] is like {!take}, but can fail. (** [take_exn h] is akin to {!take},
but it raises {!Empty} when the heap is empty.
@raise Empty if the heap is empty. *) @raise Empty if the heap is empty. *)
val size : t -> int
(** [size h] is the number of elements in the heap [h].
Complexity: [O(n)].
*)
(** {2 Deleting elements} *)
val delete_one : (elt -> elt -> bool) -> elt -> t -> t val delete_one : (elt -> elt -> bool) -> elt -> t -> t
(** [delete_one eq x h] uses [eq] to find one occurrence of a value [x] (** [delete_one eq x h] deletes an occurrence of the value [x] from the heap [h],
if it exist in the heap [h], and delete it. if there is some.
If [h] do not contain [x] then it return [h]. If [h] does not contain [x], then [h] itself is returned.
Elements are identified by the equality function [eq].
Complexity: [O(n)].
@since 2.0 *) @since 2.0 *)
val delete_all : (elt -> elt -> bool) -> elt -> t -> t val delete_all : (elt -> elt -> bool) -> elt -> t -> t
(** [delete_all eq x h] uses [eq] to find all [x] in [h] and delete them. (** [delete_all eq x h] deletes all occurrences of the value [x] from the heap [h].
If [h] do not contain [x] then it return [h]. If [h] does not contain [x], then [h] itself is returned.
The difference with {!filter} is that [delete_all] stops as soon as Elements are identified by the equality function [eq].
it enters a subtree whose root is bigger than the element. This function is more efficient than {!filter}
because it avoids considering elements greater than [x].
Complexity: [O(n)].
@since 2.0 *) @since 2.0 *)
val filter : (elt -> bool) -> t -> t
(** [filter p h] filters the elements of [h],
only retaining those that satisfy the predicate [p].
If no element in [h] satisfies [p], then [h] itself is returned.
Complexity: [O(n)].
*)
(** {2 Iterating on elements} *)
val iter : (elt -> unit) -> t -> unit val iter : (elt -> unit) -> t -> unit
(** [iter f h] iterates over the heap [h] invoking [f] with the current element. *) (** [iter f h] invokes [f] on every element of the heap [h]. *)
val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a val fold : ('a -> elt -> 'a) -> 'a -> t -> 'a
(** [fold f acc h] folds on all values of [h]. *) (** [fold f acc h] folds on all elements of [h]. *)
val size : t -> int (** {2 Adding many elements at once} *)
(** [size h] is the number of elements in the heap [h]. Linear complexity. *)
(** {2 Conversions} *)
val to_list : t -> elt list
(** [to_list h] returns the elements of the heap [h], in no particular order. *)
val to_list_sorted : t -> elt list
(** [to_list_sorted h] returns the elements of the heap [h] in increasing order.
@since 1.1 *)
val add_list : t -> elt list -> t val add_list : t -> elt list -> t
(** [add_list h l] adds the elements of the list [l] into the heap [h]. (** [add_list h l] adds the elements of the list [l] into the heap [h].
An element occurring several times will be added that many times to the heap. An element occurring several times will be added that many times to the heap.
Elements need not be given in any particular order.
This function is more efficient than repeated insertions.
Complexity: [O(log m + n)]
where [m] and [n] are the number of elements in [h] and [l], respectively.
@since 0.16 *) @since 0.16 *)
val of_list : elt list -> t
(** [of_list l] is [add_list empty l]. Complexity: [O(n log n)]. *)
val add_iter : t -> elt iter -> t val add_iter : t -> elt iter -> t
(** [add_iter h iter] is like {!add_list}. (** [add_iter h iter] is akin to {!add_list},
but taking an {!type:iter} of elements as input.
@since 2.8 *) @since 2.8 *)
val add_seq : t -> elt Seq.t -> t val add_seq : t -> elt Seq.t -> t
(** [add_seq h seq] is like {!add_list}. (** [add_seq h seq] is akin to {!add_list},
but taking a [Seq.t] of elements as input.
Renamed from [add_std_seq] since 3.0. Renamed from [add_std_seq] since 3.0.
@since 3.0 *) @since 3.0 *)
val add_gen : t -> elt gen -> t
(** [add_gen h gen] is akin to {!add_list},
but taking a {!type:gen} of elements as input.
@since 0.16 *)
val add_iter_almost_sorted : t -> elt iter -> t
(** [add_iter_almost_sorted h iter] is equivalent to
[merge h (of_iter_almost_sorted iter)].
See {!of_iter_almost_sorted}.
Complexity: [O(log m + n)].
@since NEXT_RELEASE
*)
(** {2 Conversions} *)
val of_list : elt list -> t
(** [of_list l] builds a heap from the list of elements [l].
Elements need not be given in any particular order.
This function is more efficient than repeated insertions.
It is equivalent to {!add_list}[ empty l].
Complexity: [O(n)].
*)
val of_iter : elt iter -> t val of_iter : elt iter -> t
(** [of_iter iter] builds a heap from a given [iter]. Complexity: [O(n log n)]. (** [of_iter iter] is akin to {!of_list},
but taking an {!type:iter} of elements as input.
@since 2.8 *) @since 2.8 *)
val of_seq : elt Seq.t -> t val of_seq : elt Seq.t -> t
(** [of_seq seq] builds a heap from a given [Seq.t]. Complexity: [O(n log n)]. (** [of_seq seq] is akin to {!of_list},
Renamed from [of_seq] since 3.0. but taking a [Seq.t] of elements as input.
Renamed from [of_std_seq] since 3.0.
@since 3.0 *) @since 3.0 *)
val of_gen : elt gen -> t
(** [of_gen gen] is akin to {!of_list},
but taking a {!type:gen} of elements as input. *)
val of_iter_almost_sorted : elt iter -> t
(** [of_iter iter] builds a heap from the {!type:iter} sequence of elements.
Elements need not be given in any particular order.
However, the heap takes advantage of partial sorting found in the input:
the closer the input sequence is to being sorted,
the more efficient it is to convert the heap to a sorted sequence.
This enables heap-sorting that is faster than [O(n log n)]
when the input is almost sorted.
In the best case, when only a constant number of elements are misplaced,
then successive {!take} run in [O(1)],
and {!to_list_sorted} runs in [O(n)].
Complexity: [O(n)].
@since NEXT_RELEASE
*)
val to_list : t -> elt list
(** [to_list h] returns a list of the elements of the heap [h],
in no particular order.
Complexity: [O(n)].
*)
val to_iter : t -> elt iter val to_iter : t -> elt iter
(** [to_iter h] returns a [iter] of the elements of the heap [h]. (** [to_iter h] is akin to {!to_list}, but returning an {!type:iter} of elements.
@since 2.8 *) @since 2.8 *)
val to_seq : t -> elt Seq.t val to_seq : t -> elt Seq.t
(** [to_seq h] returns a [Seq.t] of the elements of the heap [h]. (** [to_seq h] is akin to {!to_list}, but returning a [Seq.t] of elements.
Renamed from [to_std_seq] since 3.0. Renamed from [to_std_seq] since 3.0.
@since 3.0 *) @since 3.0 *)
val to_iter_sorted : t -> elt iter val to_gen : t -> elt gen
(** [to_iter_sorted h] returns a [iter] by iterating on the elements of [h], (** [to_gen h] is akin to {!to_list}, but returning a {!type:gen} of elements. *)
val to_list_sorted : t -> elt list
(** [to_list_sorted h] returns the list of elements of the heap [h]
in increasing order. in increasing order.
Complexity: [O(n log n)].
@since 1.1 *)
val to_iter_sorted : t -> elt iter
(** [to_iter_sorted h] is akin to {!to_list_sorted},
but returning an {!type:iter} of elements.
@since 2.8 *) @since 2.8 *)
val to_seq_sorted : t -> elt Seq.t val to_seq_sorted : t -> elt Seq.t
(** [to_seq_sorted h] returns a [Seq.t] by iterating on the elements of [h], (** [to_seq_sorted h] is akin to {!to_list_sorted},
in increasing order. but returning a [Seq.t] of elements.
Renamed from [to_std_seq_sorted] since 3.0. Renamed from [to_std_seq_sorted] since 3.0.
@since 3.0 *) @since 3.0 *)
val add_gen : t -> elt gen -> t
(** [add_gen h gen] adds the gen [gen] to the heap [h].
@since 0.16 *)
val of_gen : elt gen -> t
(** [of_gen gen] builds a heap from a given [gen]. Complexity: [O(n log n)]. *)
val to_gen : t -> elt gen
(** [to_gen h] returns a [gen] of the elements of the heap [h]. *)
val to_tree : t -> elt ktree val to_tree : t -> elt ktree
(** [to_tree h] returns a [ktree] of the elements of the heap [h]. *) (** [to_tree h] returns a {!type:ktree} of the elements of the heap [h].
The layout is not specified.
Complexity: [O(n)].
*)
(** {2 Pretty-printing} *)
val to_string : ?sep:string -> (elt -> string) -> t -> string val to_string : ?sep:string -> (elt -> string) -> t -> string
(** [to_string ?sep f h] prints the heap [h] in a string (** [to_string ?sep f h] prints the heap [h] to a string,
using [sep] as a given separator (default ",") between each element using [f] to convert elements to strings
(converted to a string using [f]). and [sep] (default: [","]) as a separator between elements.
@since 2.7 *) @since 2.7 *)
val pp : val pp :
@ -173,17 +251,17 @@ module type S = sig
t printer t printer
(** [pp ?pp_start ?pp_stop ?pp_sep ppf h] prints [h] on [ppf]. (** [pp ?pp_start ?pp_stop ?pp_sep ppf h] prints [h] on [ppf].
Each element is formatted with [ppf], [pp_start] is called at the beginning, Each element is formatted with [ppf], [pp_start] is called at the beginning,
[pp_stop] is called at the end, [pp_sep] is called between each elements. [pp_stop] is called at the end, [pp_sep] is called between each element.
By defaults [pp_start] and [pp_stop] does nothing and [pp_sep] defaults to By default, [pp_start] and [pp_stop] do nothing, and [pp_sep] is
(fun out -> Format.fprintf out ",@ "). [(fun out -> Format.fprintf out ",@ ")].
Renamed from [print] since 2.0 Renamed from [print] since 2.0
@since 0.16 *) @since 0.16 *)
end end
module Make (E : PARTIAL_ORD) : S with type elt = E.t module Make (E : PARTIAL_ORD) : S with type elt = E.t
(** A convenient version of [Make] that take a [TOTAL_ORD] instead of (** A convenient version of [Make] that takes a [TOTAL_ORD] instead of
a partially ordered module. a partially ordered module.
It allow to directly pass modules that implement [compare] It allows to directly pass modules that implement [compare]
without implementing [leq] explicitly *) without implementing [leq] explicitly. *)
module Make_from_compare (E : TOTAL_ORD) : S with type elt = E.t module Make_from_compare (E : TOTAL_ORD) : S with type elt = E.t

View file

@ -2,106 +2,239 @@ open CCHeap
module T = (val Containers_testlib.make ~__FILE__ ()) module T = (val Containers_testlib.make ~__FILE__ ())
include T include T
(* A QCheck generator for natural numbers that are not too large (larger than
* [small_nat] but smaller than [big_nat]), with a bias towards smaller numbers.
* This also happens to be what QCheck uses for picking a length for a list
* generated by [QCheck.list].
* QCheck defines this generator under the name [nat] but does not expose it. *)
let medium_nat =
Q.make ~print:Q.Print.int ~shrink:Q.Shrink.int ~small:(fun _ -> 1)
(fun st ->
let p = Random.State.float st 1. in
if p < 0.5 then Random.State.int st 10
else if p < 0.75 then Random.State.int st 100
else if p < 0.95 then Random.State.int st 1_000
else Random.State.int st 10_000
)
let list_delete_first (x0 : int) (xs : int list) : int list =
let rec aux acc xs =
begin match xs with
| [] -> List.rev acc
| x :: xs' when x = x0 -> List.rev_append acc xs'
| x :: xs' -> aux (x :: acc) xs'
end
in
aux [] xs
module H = CCHeap.Make (struct module H = CCHeap.Make (struct
type t = int type t = int
let leq x y = x <= y let leq x y = x <= y
end) end)
let rec is_sorted l = ;;
match l with
| [ _ ] | [] -> true
| x :: (y :: _ as l') -> x <= y && is_sorted l'
let extract_list = H.to_list_sorted;; t ~name:"of_list, find_min_exn, take_exn" @@ fun () ->
let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in
t @@ fun () -> assert_equal ~printer:string_of_int 0 (H.find_min_exn h);
let h = H.of_list [ 5; 3; 4; 1; 42; 0 ] in
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 0 x; assert_equal ~printer:string_of_int 0 x;
assert_equal ~printer:string_of_int 1 (H.find_min_exn h);
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 1 x; assert_equal ~printer:string_of_int 1 x;
assert_equal ~printer:string_of_int 3 (H.find_min_exn h);
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 3 x; assert_equal ~printer:string_of_int 3 x;
assert_equal ~printer:string_of_int 4 (H.find_min_exn h);
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 4 x; assert_equal ~printer:string_of_int 4 x;
assert_equal ~printer:string_of_int 4 (H.find_min_exn h);
let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 4 x;
assert_equal ~printer:string_of_int 5 (H.find_min_exn h);
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 5 x; assert_equal ~printer:string_of_int 5 x;
assert_equal ~printer:string_of_int 42 (H.find_min_exn h);
let h, x = H.take_exn h in let h, x = H.take_exn h in
assert_equal ~printer:string_of_int 42 x; assert_equal ~printer:string_of_int 42 x;
assert_raises assert_raises ((=) H.Empty) (fun () -> H.find_min_exn h);
(function assert_raises ((=) H.Empty) (fun () -> H.take_exn h);
| H.Empty -> true
| _ -> false)
(fun () -> H.take_exn h);
true true
;; ;;
q ~count:30 q ~name:"of_list, to_list"
Q.(list_of_size Gen.(return 1_000) int) ~count:30
Q.(list medium_nat)
(fun l -> (fun l ->
(* put elements into a heap *) (l |> H.of_list |> H.to_list |> List.sort CCInt.compare)
let h = H.of_iter (Iter.of_list l) in = (l |> List.sort CCInt.compare))
assert_equal 1_000 (H.size h);
let l' = extract_list h in
is_sorted l')
;; ;;
(* test filter *) q ~name:"of_list, to_list_sorted"
q ~count:30 ~count:30
Q.(list_of_size Gen.(return 1_000) int) Q.(list medium_nat)
(fun l -> (fun l ->
(* put elements into a heap *) (l |> H.of_list |> H.to_list_sorted)
let h = H.of_iter (Iter.of_list l) in = (l |> List.sort CCInt.compare))
let h = H.filter (fun x -> x mod 2 = 0) h in
assert (H.to_iter h |> Iter.for_all (fun x -> x mod 2 = 0));
let l' = extract_list h in
is_sorted l')
;; ;;
q (* The remaining tests assume the correctness of
Q.(list_of_size Gen.(return 1_000) int) [of_list], [to_list], [to_list_sorted]. *)
q ~name:"size"
~count:30
Q.(list_of_size Gen.small_nat medium_nat)
(fun l -> (fun l ->
(* put elements into a heap *) (l |> H.of_list |> H.size)
let h = H.of_iter (Iter.of_list l) in = (l |> List.length))
let l' = H.to_iter_sorted h |> Iter.to_list in
is_sorted l')
;; ;;
q q ~name:"insert"
Q.(list int) Q.(pair medium_nat (list medium_nat))
(fun l -> (fun (x, l) ->
extract_list (H.of_list l) = extract_list (H.of_gen (CCList.to_gen l))) (l |> H.of_list |> H.insert x |> H.to_list_sorted)
= ((x::l) |> List.sort CCInt.compare))
;; ;;
q q ~name:"merge"
Q.(list int) Q.(pair (list medium_nat) (list medium_nat))
(fun l -> (fun (l1, l2) ->
let h = H.of_list l in (H.merge (H.of_list l1) (H.of_list l2) |> H.to_list_sorted)
H.to_gen h |> CCList.of_gen |> List.sort Stdlib.compare = ((l1@l2) |> List.sort CCInt.compare))
= (H.to_list h |> List.sort Stdlib.compare))
;; ;;
q q ~name:"add_list"
Q.(list int) Q.(pair (list medium_nat) (list medium_nat))
(fun l -> (fun (l1, l2) ->
let h = H.of_list l in (H.add_list (H.of_list l1) l2 |> H.to_list_sorted)
H.to_string string_of_int h = ((l1@l2) |> List.sort CCInt.compare))
= (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat ","))
;; ;;
q q ~name:"delete_one"
Q.(list int) Q.(pair medium_nat (list medium_nat))
(fun l -> (fun (x, l) ->
let h = H.of_list l in (l |> H.of_list |> H.delete_one (=) x |> H.to_list_sorted)
H.to_string ~sep:" " string_of_int h = (l |> list_delete_first x |> List.sort CCInt.compare))
= (List.sort Stdlib.compare l |> List.map string_of_int |> String.concat " "))
;; ;;
q q ~name:"delete_all"
Q.(list_of_size Gen.(return 1_000) int) Q.(pair medium_nat (list medium_nat))
(fun (x, l) ->
(l |> H.of_list |> H.delete_all (=) x |> H.to_list_sorted)
= (l |> List.filter ((<>) x) |> List.sort CCInt.compare))
;;
q ~name:"filter"
Q.(list medium_nat)
(fun l ->
let p = (fun x -> x mod 2 = 0) in
let l' = l |> H.of_list |> H.filter p |> H.to_list in
List.for_all p l' && List.length l' = List.length (List.filter p l))
;;
t ~name:"physical equality" @@ fun () ->
let h = H.of_list [ 5; 4; 3; 4; 1; 42; 0 ] in
assert_bool "physical equality of merge with left empty"
(CCEqual.physical h (H.merge H.empty h)) ;
assert_bool "physical equality of merge with right empty"
(CCEqual.physical h (H.merge h H.empty)) ;
assert_bool "physical equality of delete_one with element lesser than min"
(CCEqual.physical h (H.delete_one (=) (-999) h)) ;
assert_bool "physical equality of delete_one with element between min and max"
(CCEqual.physical h (H.delete_one (=) 2 h)) ;
assert_bool "physical equality of delete_one with element greater than max"
(CCEqual.physical h (H.delete_one (=) 999 h)) ;
assert_bool "physical equality of delete_all with element lesser than min"
(CCEqual.physical h (H.delete_all (=) (-999) h)) ;
assert_bool "physical equality of delete_all with element between min and max"
(CCEqual.physical h (H.delete_all (=) 2 h)) ;
assert_bool "physical equality of delete_all with element greater than max"
(CCEqual.physical h (H.delete_all (=) 999 h)) ;
assert_bool "physical equality of filter"
(CCEqual.physical h (H.filter (fun _ -> true) h)) ;
true
;;
q ~name:"fold"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.fold (+) 0)
= (l |> List.fold_left (+) 0))
;;
q ~name:"of_iter"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> CCList.to_iter |> H.of_iter |> H.to_list_sorted)
= (l |> List.sort CCInt.compare))
;;
q ~name:"of_seq"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> CCList.to_seq |> H.of_seq |> H.to_list_sorted)
= (l |> List.sort CCInt.compare))
;;
q ~name:"of_gen"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> CCList.to_gen |> H.of_gen |> H.to_list_sorted)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_iter"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_iter |> CCList.of_iter |> List.sort CCInt.compare)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_seq"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_seq |> CCList.of_seq |> List.sort CCInt.compare)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_gen"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_gen |> CCList.of_gen |> List.sort CCInt.compare)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_iter_sorted"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_iter_sorted |> Iter.to_list)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_seq_sorted"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_seq_sorted |> CCList.of_seq |> List.sort CCInt.compare)
= (l |> List.sort CCInt.compare))
;;
q ~name:"to_string with default sep"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_string string_of_int)
= (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat ","))
;;
q ~name:"to_string with space as sep"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l ->
(l |> H.of_list |> H.to_string ~sep:" " string_of_int)
= (l |> List.sort CCInt.compare |> List.map string_of_int |> String.concat " "))
;;
q ~name:"Make_from_compare"
Q.(list_of_size Gen.small_nat medium_nat)
(fun l -> (fun l ->
let module H' = Make_from_compare (CCInt) in let module H' = Make_from_compare (CCInt) in
let h = H'.of_list l in (l |> H'.of_list |> H'.to_list_sorted)
let l' = H'.to_list_sorted h in = (l |> List.sort CCInt.compare))
is_sorted l')