mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 03:05:28 -05:00
wip: persistent vectors based on clojure's
This commit is contained in:
parent
9de8f1fb2e
commit
7b7eda5a05
3 changed files with 460 additions and 0 deletions
368
src/pvec/containers_pvec.ml
Normal file
368
src/pvec/containers_pvec.ml
Normal file
|
|
@ -0,0 +1,368 @@
|
||||||
|
(* HAMT.
|
||||||
|
We follow https://hypirion.com/musings/understanding-persistent-vector-pt-1
|
||||||
|
and following posts. *)
|
||||||
|
|
||||||
|
type 'a iter = ('a -> unit) -> unit
|
||||||
|
|
||||||
|
let num_bits = 2
|
||||||
|
let branching_factor = 1 lsl num_bits
|
||||||
|
let bitmask = branching_factor - 1
|
||||||
|
|
||||||
|
(** Short array with functional semantics *)
|
||||||
|
module A = struct
|
||||||
|
open Array
|
||||||
|
|
||||||
|
type 'a t = 'a array
|
||||||
|
|
||||||
|
let length = length
|
||||||
|
let get = get
|
||||||
|
let[@inline] is_empty self = self = [||]
|
||||||
|
let[@inline] return self = [| self |]
|
||||||
|
let[@inline] is_full self = length self = branching_factor
|
||||||
|
|
||||||
|
let[@inline] push (self : _ t) x =
|
||||||
|
let n = length self in
|
||||||
|
if n = branching_factor then invalid_arg "Pvec.push";
|
||||||
|
let arr = Array.make (n + 1) x in
|
||||||
|
Array.blit self 0 arr 0 n;
|
||||||
|
arr
|
||||||
|
|
||||||
|
let[@inline] pop self : _ t =
|
||||||
|
let n = length self in
|
||||||
|
if n = 0 then invalid_arg "Pvec.pop";
|
||||||
|
Array.sub self 0 (n - 1)
|
||||||
|
|
||||||
|
(* TODO: remove *)
|
||||||
|
let set ~mut (self : _ t) i x : _ t =
|
||||||
|
if i < 0 || i > length self || i >= branching_factor then
|
||||||
|
invalid_arg "Pvec.set";
|
||||||
|
if i = length self then (
|
||||||
|
(* insert in a longer copy *)
|
||||||
|
let arr = Array.make (i + 1) x in
|
||||||
|
Array.blit self 0 arr 0 i;
|
||||||
|
arr
|
||||||
|
) else if mut then (
|
||||||
|
(* replace element at [i] in place *)
|
||||||
|
self.(i) <- x;
|
||||||
|
self
|
||||||
|
) else (
|
||||||
|
(* replace element at [i] in copy *)
|
||||||
|
let arr = Array.copy self in
|
||||||
|
arr.(i) <- x;
|
||||||
|
arr
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
type 'a tree = Empty | Node of 'a tree A.t | Leaf of 'a A.t
|
||||||
|
|
||||||
|
type 'a t = {
|
||||||
|
t: 'a tree; (** The 32-way tree *)
|
||||||
|
size: int; (** Exact number of elements *)
|
||||||
|
shift: int; (** num_bits*(depth of tree) *)
|
||||||
|
tail: 'a A.t; (** Tail array, for fast push/pop *)
|
||||||
|
}
|
||||||
|
(* invariants:
|
||||||
|
- if size>0 then [not (is_empty tail)]
|
||||||
|
- all leaves in [t] are at depth shift/5
|
||||||
|
*)
|
||||||
|
|
||||||
|
(* FIXME: remove *)
|
||||||
|
let dbg_ = ref (fun _ _ _ -> assert false)
|
||||||
|
let empty_tree = Empty
|
||||||
|
let empty = { t = empty_tree; size = 0; shift = 0; tail = [||] }
|
||||||
|
|
||||||
|
let[@inline] is_empty_tree = function
|
||||||
|
| Empty -> true
|
||||||
|
| _ -> false
|
||||||
|
|
||||||
|
let[@inline] is_empty (self : _ t) = self.size = 0
|
||||||
|
let[@inline] length (self : _ t) = self.size
|
||||||
|
let[@inline] return x = { empty with size = 1; tail = A.return x }
|
||||||
|
let[@inline] tail_off (self : _ t) : int = self.size - A.length self.tail
|
||||||
|
|
||||||
|
let[@unroll 2] rec get_tree_ (self : 'a tree) (shift : int) i : 'a =
|
||||||
|
match self with
|
||||||
|
| Empty -> invalid_arg "pvec.get"
|
||||||
|
| Leaf a -> A.get a (i land bitmask)
|
||||||
|
| Node a ->
|
||||||
|
let idx = (i lsr shift) land bitmask in
|
||||||
|
get_tree_ (A.get a idx) (shift - num_bits) i
|
||||||
|
|
||||||
|
let get (self : 'a t) (i : int) : 'a =
|
||||||
|
if i < 0 then
|
||||||
|
invalid_arg "pvec.get"
|
||||||
|
else (
|
||||||
|
let tail_off = self.size - A.length self.tail in
|
||||||
|
if i >= tail_off then
|
||||||
|
A.get self.tail (i - tail_off)
|
||||||
|
else
|
||||||
|
get_tree_ self.t self.shift i
|
||||||
|
)
|
||||||
|
|
||||||
|
let[@inline] get_opt self i =
|
||||||
|
try Some (get self i) with Invalid_argument _ -> None
|
||||||
|
|
||||||
|
(** Build a tree leading to [tail] with indices 0 at each node *)
|
||||||
|
let rec build_new_tail_spine_ shift tail : _ tree =
|
||||||
|
if shift = 0 then
|
||||||
|
Leaf tail
|
||||||
|
else
|
||||||
|
Node [| build_new_tail_spine_ (shift - num_bits) tail |]
|
||||||
|
|
||||||
|
let rec insert_tail_ (self : _ tree) shift i (tail : _ A.t) : _ tree =
|
||||||
|
Format.printf "insert tail shift=%d i=0x%x into %a@." shift i
|
||||||
|
(!dbg_ Format.pp_print_int)
|
||||||
|
(Obj.magic self : int tree);
|
||||||
|
match self with
|
||||||
|
| Empty ->
|
||||||
|
if shift = 0 then
|
||||||
|
Leaf tail
|
||||||
|
else (
|
||||||
|
assert ((i lsl shift) land bitmask = 0);
|
||||||
|
Node [| insert_tail_ Empty (shift - num_bits) i tail |]
|
||||||
|
)
|
||||||
|
| Leaf _ -> assert false
|
||||||
|
| Node a ->
|
||||||
|
(* would be in the {!build_new_tail_spine_} case *)
|
||||||
|
assert (i <> 0);
|
||||||
|
let idx = (i lsr shift) land bitmask in
|
||||||
|
Printf.printf "insert tail rec at idx=%d shift=%d i=0x%x\n%!" idx shift i;
|
||||||
|
let sub, must_push =
|
||||||
|
if idx < A.length a then
|
||||||
|
A.get a idx, false
|
||||||
|
else
|
||||||
|
Empty, true
|
||||||
|
in
|
||||||
|
let new_sub = insert_tail_ sub (shift - num_bits) i tail in
|
||||||
|
let a =
|
||||||
|
if must_push then
|
||||||
|
A.push a new_sub
|
||||||
|
else
|
||||||
|
A.set ~mut:false a idx new_sub
|
||||||
|
in
|
||||||
|
Node a
|
||||||
|
|
||||||
|
let[@inline never] push_full_ self x : _ t =
|
||||||
|
if 1 lsl (self.shift + num_bits) = self.size - A.length self.tail then (
|
||||||
|
(* tree is full, add a level *)
|
||||||
|
let t = Node [| self.t; build_new_tail_spine_ self.shift self.tail |] in
|
||||||
|
{ t; size = self.size + 1; shift = self.shift + num_bits; tail = [| x |] }
|
||||||
|
) else (
|
||||||
|
(* insert at the end of the current tree *)
|
||||||
|
let idx = self.size - A.length self.tail in
|
||||||
|
let t = insert_tail_ self.t self.shift idx self.tail in
|
||||||
|
{ t; size = self.size + 1; shift = self.shift; tail = [| x |] }
|
||||||
|
)
|
||||||
|
|
||||||
|
let[@inline] push (self : _ t) x : _ t =
|
||||||
|
if A.is_full self.tail then
|
||||||
|
push_full_ self x
|
||||||
|
else
|
||||||
|
{ self with tail = A.push self.tail x; size = self.size + 1 }
|
||||||
|
|
||||||
|
let rec pop_tail_from_tree_ (self : _ tree) shift i : 'a A.t * 'a tree =
|
||||||
|
Format.printf "pop tail shift=%d i=0x%x from %a@." shift i
|
||||||
|
(!dbg_ Format.pp_print_int)
|
||||||
|
(Obj.magic self : int tree);
|
||||||
|
match self with
|
||||||
|
| Empty -> assert false
|
||||||
|
| Leaf tail ->
|
||||||
|
assert (shift = 0);
|
||||||
|
tail, Empty
|
||||||
|
| Node a ->
|
||||||
|
let idx = (i lsr shift) land bitmask in
|
||||||
|
let sub = A.get a idx in
|
||||||
|
let tail, new_sub = pop_tail_from_tree_ sub (shift - num_bits) i in
|
||||||
|
let new_tree =
|
||||||
|
if is_empty_tree new_sub then (
|
||||||
|
let a = A.pop a in
|
||||||
|
if A.is_empty a then
|
||||||
|
Empty
|
||||||
|
else
|
||||||
|
Node a
|
||||||
|
) else
|
||||||
|
Node (A.set ~mut:false a idx new_sub)
|
||||||
|
in
|
||||||
|
tail, new_tree
|
||||||
|
|
||||||
|
let[@inline never] move_last_leaf_to_tail (self : _ t) : _ t =
|
||||||
|
assert (A.length self.tail = 1);
|
||||||
|
if self.size = 1 then
|
||||||
|
(* back to empty *)
|
||||||
|
empty
|
||||||
|
else (
|
||||||
|
(* before pop *)
|
||||||
|
let tail, t = pop_tail_from_tree_ self.t self.shift (self.size - 1) in
|
||||||
|
let t, shift =
|
||||||
|
match t with
|
||||||
|
| Node [| t' |] ->
|
||||||
|
(* all indices have 00000 as MSB, remove one level *)
|
||||||
|
t', self.shift - 1
|
||||||
|
| _ -> t, self.shift
|
||||||
|
in
|
||||||
|
{ tail; size = self.size - 1; shift; t }
|
||||||
|
)
|
||||||
|
|
||||||
|
let pop (self : 'a t) : 'a * 'a t =
|
||||||
|
if self.size = 0 then invalid_arg "pvec.pop";
|
||||||
|
let x = A.get self.tail (A.length self.tail - 1) in
|
||||||
|
let new_tail = A.pop self.tail in
|
||||||
|
if A.is_empty new_tail then (
|
||||||
|
let new_self = move_last_leaf_to_tail self in
|
||||||
|
x, new_self
|
||||||
|
) else (
|
||||||
|
let new_self = { self with size = self.size - 1; tail = new_tail } in
|
||||||
|
x, new_self
|
||||||
|
)
|
||||||
|
|
||||||
|
let pop_opt (self : 'a t) : ('a * 'a t) option =
|
||||||
|
if self.size = 0 then
|
||||||
|
None
|
||||||
|
else
|
||||||
|
Some (pop self)
|
||||||
|
|
||||||
|
let rec iteri_rec_ f idx (self : _ tree) =
|
||||||
|
match self with
|
||||||
|
| Empty -> ()
|
||||||
|
| Leaf a ->
|
||||||
|
for i = 0 to A.length a - 1 do
|
||||||
|
let j = idx lor i in
|
||||||
|
f j (Array.unsafe_get a i)
|
||||||
|
done
|
||||||
|
| Node a ->
|
||||||
|
for i = 0 to A.length a - 1 do
|
||||||
|
let idx = (idx lsl num_bits) lor i in
|
||||||
|
iteri_rec_ f idx (Array.unsafe_get a i)
|
||||||
|
done
|
||||||
|
|
||||||
|
let iteri f (self : 'a t) : unit =
|
||||||
|
iteri_rec_ f 0 self.t;
|
||||||
|
let tail_off = tail_off self in
|
||||||
|
for i = 0 to A.length self.tail - 1 do
|
||||||
|
f (i + tail_off) (Array.unsafe_get self.tail i)
|
||||||
|
done
|
||||||
|
|
||||||
|
let rec iteri_rev_rec_ f idx (self : _ tree) =
|
||||||
|
match self with
|
||||||
|
| Empty -> ()
|
||||||
|
| Leaf a ->
|
||||||
|
for i = A.length a - 1 downto 0 do
|
||||||
|
let j = idx lor i in
|
||||||
|
f j (Array.unsafe_get a i)
|
||||||
|
done
|
||||||
|
| Node a ->
|
||||||
|
for i = A.length a - 1 downto 0 do
|
||||||
|
let idx = (idx lsl num_bits) lor i in
|
||||||
|
iteri_rev_rec_ f idx (Array.unsafe_get a i)
|
||||||
|
done
|
||||||
|
|
||||||
|
let iteri_rev f (self : 'a t) : unit =
|
||||||
|
let tail_off = tail_off self in
|
||||||
|
for i = A.length self.tail - 1 downto 0 do
|
||||||
|
f (i + tail_off) (Array.unsafe_get self.tail i)
|
||||||
|
done;
|
||||||
|
iteri_rev_rec_ f (tail_off - 1) self.t
|
||||||
|
|
||||||
|
let fold_lefti f x m =
|
||||||
|
let acc = ref x in
|
||||||
|
iteri (fun i x -> acc := f !acc i x) m;
|
||||||
|
!acc
|
||||||
|
|
||||||
|
let foldi_rev f x m =
|
||||||
|
let acc = ref x in
|
||||||
|
iteri_rev (fun i x -> acc := f !acc i x) m;
|
||||||
|
!acc
|
||||||
|
|
||||||
|
let iter f m = iteri (fun _ x -> f x) m
|
||||||
|
let fold_left f x m = fold_lefti (fun acc _ x -> f acc x) x m
|
||||||
|
let fold_rev f x m = foldi_rev (fun acc _ x -> f acc x) x m
|
||||||
|
|
||||||
|
let rec map_t f (self : _ tree) : _ tree =
|
||||||
|
match self with
|
||||||
|
| Empty -> Empty
|
||||||
|
| Node a ->
|
||||||
|
let a = Array.map (map_t f) a in
|
||||||
|
Node a
|
||||||
|
| Leaf a -> Leaf (Array.map f a)
|
||||||
|
|
||||||
|
let map f (self : _ t) : _ t =
|
||||||
|
{ self with t = map_t f self.t; tail = Array.map f self.tail }
|
||||||
|
|
||||||
|
let append a b =
|
||||||
|
if is_empty b then
|
||||||
|
a
|
||||||
|
else
|
||||||
|
fold_left push a b
|
||||||
|
|
||||||
|
let add_list v l = List.fold_left push v l
|
||||||
|
let of_list l = add_list empty l
|
||||||
|
let to_list m = fold_rev (fun acc x -> x :: acc) [] m
|
||||||
|
|
||||||
|
let add_iter v seq =
|
||||||
|
let v = ref v in
|
||||||
|
seq (fun x -> v := push !v x);
|
||||||
|
!v
|
||||||
|
|
||||||
|
let of_iter s = add_iter empty s
|
||||||
|
let to_iter m yield = iteri (fun _ v -> yield v) m
|
||||||
|
|
||||||
|
let make n x : _ t =
|
||||||
|
(* TODO: probably we can optimize that? *)
|
||||||
|
of_iter (fun k ->
|
||||||
|
for _i = 1 to n do
|
||||||
|
k x
|
||||||
|
done)
|
||||||
|
|
||||||
|
let rec add_seq self seq =
|
||||||
|
match seq () with
|
||||||
|
| Seq.Nil -> self
|
||||||
|
| Seq.Cons (x, tl) -> add_seq (push self x) tl
|
||||||
|
|
||||||
|
let of_seq seq = add_seq empty seq
|
||||||
|
|
||||||
|
let to_seq self : _ Seq.t =
|
||||||
|
let rec to_seq (stack : ('a tree * int) list) () =
|
||||||
|
match stack with
|
||||||
|
| [] -> Seq.Nil
|
||||||
|
| (Empty, _) :: tl -> to_seq tl ()
|
||||||
|
| (Leaf a, i) :: tl when i < Array.length a ->
|
||||||
|
Seq.Cons (A.get a i, to_seq ((Leaf a, i + 1) :: tl))
|
||||||
|
| (Leaf _, _) :: tl -> to_seq tl ()
|
||||||
|
| (Node a, i) :: tl when i < A.length a ->
|
||||||
|
to_seq ((A.get a i, 0) :: (Node a, i + 1) :: tl) ()
|
||||||
|
| (Node _, _) :: tl -> to_seq tl ()
|
||||||
|
in
|
||||||
|
to_seq [ self.t, 0; Leaf self.tail, 0 ]
|
||||||
|
|
||||||
|
let choose self =
|
||||||
|
if self.size = 0 then
|
||||||
|
None
|
||||||
|
else
|
||||||
|
Some (A.get self.tail 0)
|
||||||
|
|
||||||
|
module Private_ = struct
|
||||||
|
type 'a printer = Format.formatter -> 'a -> unit
|
||||||
|
|
||||||
|
let fpf = Format.fprintf
|
||||||
|
|
||||||
|
let pp_array ppx out a =
|
||||||
|
fpf out "[@[%a@]]"
|
||||||
|
(Format.pp_print_list
|
||||||
|
~pp_sep:(fun out () -> Format.fprintf out ";@ ")
|
||||||
|
ppx)
|
||||||
|
(Array.to_list a)
|
||||||
|
|
||||||
|
let rec debugtree ppx out (self : _ tree) : unit =
|
||||||
|
match self with
|
||||||
|
| Empty -> fpf out "()"
|
||||||
|
| Leaf a -> fpf out "leaf(%a)" (pp_array ppx) a
|
||||||
|
| Node a -> fpf out "node(%a)" (pp_array @@ debugtree ppx) a
|
||||||
|
|
||||||
|
let debug ppx out self =
|
||||||
|
fpf out
|
||||||
|
"@[<v>pvec {@ size: %d; shift: %d;@ @[<2>tree:@ %a@];@ @[<2>tail:@ \
|
||||||
|
%a@]@]}"
|
||||||
|
self.size self.shift (debugtree ppx) self.t (pp_array ppx) self.tail
|
||||||
|
|
||||||
|
let () = dbg_ := debugtree
|
||||||
|
end
|
||||||
84
src/pvec/containers_pvec.mli
Normal file
84
src/pvec/containers_pvec.mli
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
(** Functional Vectors.
|
||||||
|
|
||||||
|
These are trees with a large branching factor for logarithmic operations with
|
||||||
|
a low multiplicative factor.
|
||||||
|
|
||||||
|
{b status: experimental}
|
||||||
|
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'a iter = ('a -> unit) -> unit
|
||||||
|
type !'a t
|
||||||
|
|
||||||
|
val empty : 'a t
|
||||||
|
(** Empty vector. *)
|
||||||
|
|
||||||
|
val is_empty : _ t -> bool
|
||||||
|
(** Is the vector empty? *)
|
||||||
|
|
||||||
|
val return : 'a -> 'a t
|
||||||
|
(** Single element vector. *)
|
||||||
|
|
||||||
|
val length : _ t -> int
|
||||||
|
(** Number of elements. Constant time. *)
|
||||||
|
|
||||||
|
val make : int -> 'a -> 'a t
|
||||||
|
(** [make n x] makes a vector with [n] copies
|
||||||
|
of the element [x] *)
|
||||||
|
|
||||||
|
val push : 'a t -> 'a -> 'a t
|
||||||
|
(** Add element at the end. *)
|
||||||
|
|
||||||
|
val get : 'a t -> int -> 'a
|
||||||
|
(** @raise Invalid_argument if key not present. *)
|
||||||
|
|
||||||
|
val get_opt : 'a t -> int -> 'a option
|
||||||
|
|
||||||
|
val pop : 'a t -> 'a * 'a t
|
||||||
|
(** Pop last element.
|
||||||
|
@raise Invalid_argument in case the vec is empty. *)
|
||||||
|
|
||||||
|
val pop_opt : 'a t -> ('a * 'a t) option
|
||||||
|
(** Pop last element. *)
|
||||||
|
|
||||||
|
val iter : ('a -> unit) -> 'a t -> unit
|
||||||
|
|
||||||
|
val iteri : (int -> 'a -> unit) -> 'a t -> unit
|
||||||
|
(** Iterate on elements with their index, in increasing order. *)
|
||||||
|
|
||||||
|
val iteri_rev : (int -> 'a -> unit) -> 'a t -> unit
|
||||||
|
(** Iterate on elements with their index, but starting from the end. *)
|
||||||
|
|
||||||
|
val fold_left : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
val fold_rev : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
val fold_lefti : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b
|
||||||
|
|
||||||
|
val append : 'a t -> 'a t -> 'a t
|
||||||
|
(** [append a b] adds all elements of [b] at the end of [a]. This is
|
||||||
|
at least linear in the length of [b]. *)
|
||||||
|
|
||||||
|
val map : ('a -> 'b) -> 'a t -> 'b t
|
||||||
|
|
||||||
|
val choose : 'a t -> 'a option
|
||||||
|
(** Return an element. It is unspecified which one is returned. *)
|
||||||
|
|
||||||
|
val to_list : 'a t -> 'a list
|
||||||
|
val of_list : 'a list -> 'a t
|
||||||
|
val add_list : 'a t -> 'a list -> 'a t
|
||||||
|
val add_iter : 'a t -> 'a iter -> 'a t
|
||||||
|
val of_iter : 'a iter -> 'a t
|
||||||
|
val to_iter : 'a t -> 'a iter
|
||||||
|
val add_seq : 'a t -> 'a Seq.t -> 'a t
|
||||||
|
val of_seq : 'a Seq.t -> 'a t
|
||||||
|
val to_seq : 'a t -> 'a Seq.t
|
||||||
|
|
||||||
|
(**/**)
|
||||||
|
|
||||||
|
module Private_ : sig
|
||||||
|
type 'a printer = Format.formatter -> 'a -> unit
|
||||||
|
|
||||||
|
val debug : 'a printer -> 'a t printer
|
||||||
|
end
|
||||||
|
|
||||||
|
(**/**)
|
||||||
8
src/pvec/dune
Normal file
8
src/pvec/dune
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
|
||||||
|
(library
|
||||||
|
(name containers_pvec)
|
||||||
|
(public_name containers.pvec)
|
||||||
|
(preprocess
|
||||||
|
(action
|
||||||
|
(run %{project_root}/src/core/cpp/cpp.exe %{input-file})))
|
||||||
|
(synopsis "Persistent vector for OCaml"))
|
||||||
Loading…
Add table
Reference in a new issue