From 7b7eda5a05c131be988e43a674c448c6bf4c7bc1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 21:47:09 -0500 Subject: [PATCH 01/20] wip: persistent vectors based on clojure's --- src/pvec/containers_pvec.ml | 368 +++++++++++++++++++++++++++++++++++ src/pvec/containers_pvec.mli | 84 ++++++++ src/pvec/dune | 8 + 3 files changed, 460 insertions(+) create mode 100644 src/pvec/containers_pvec.ml create mode 100644 src/pvec/containers_pvec.mli create mode 100644 src/pvec/dune diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml new file mode 100644 index 00000000..171733c9 --- /dev/null +++ b/src/pvec/containers_pvec.ml @@ -0,0 +1,368 @@ +(* HAMT. + We follow https://hypirion.com/musings/understanding-persistent-vector-pt-1 + and following posts. *) + +type 'a iter = ('a -> unit) -> unit + +let num_bits = 2 +let branching_factor = 1 lsl num_bits +let bitmask = branching_factor - 1 + +(** Short array with functional semantics *) +module A = struct + open Array + + type 'a t = 'a array + + let length = length + let get = get + let[@inline] is_empty self = self = [||] + let[@inline] return self = [| self |] + let[@inline] is_full self = length self = branching_factor + + let[@inline] push (self : _ t) x = + let n = length self in + if n = branching_factor then invalid_arg "Pvec.push"; + let arr = Array.make (n + 1) x in + Array.blit self 0 arr 0 n; + arr + + let[@inline] pop self : _ t = + let n = length self in + if n = 0 then invalid_arg "Pvec.pop"; + Array.sub self 0 (n - 1) + + (* TODO: remove *) + let set ~mut (self : _ t) i x : _ t = + if i < 0 || i > length self || i >= branching_factor then + invalid_arg "Pvec.set"; + if i = length self then ( + (* insert in a longer copy *) + let arr = Array.make (i + 1) x in + Array.blit self 0 arr 0 i; + arr + ) else if mut then ( + (* replace element at [i] in place *) + self.(i) <- x; + self + ) else ( + (* replace element at [i] in copy *) + let arr = Array.copy self in + arr.(i) <- x; + arr + ) +end + +type 'a tree = Empty | Node of 'a tree A.t | Leaf of 'a A.t + +type 'a t = { + t: 'a tree; (** The 32-way tree *) + size: int; (** Exact number of elements *) + shift: int; (** num_bits*(depth of tree) *) + tail: 'a A.t; (** Tail array, for fast push/pop *) +} +(* invariants: + - if size>0 then [not (is_empty tail)] + - all leaves in [t] are at depth shift/5 +*) + +(* FIXME: remove *) +let dbg_ = ref (fun _ _ _ -> assert false) +let empty_tree = Empty +let empty = { t = empty_tree; size = 0; shift = 0; tail = [||] } + +let[@inline] is_empty_tree = function + | Empty -> true + | _ -> false + +let[@inline] is_empty (self : _ t) = self.size = 0 +let[@inline] length (self : _ t) = self.size +let[@inline] return x = { empty with size = 1; tail = A.return x } +let[@inline] tail_off (self : _ t) : int = self.size - A.length self.tail + +let[@unroll 2] rec get_tree_ (self : 'a tree) (shift : int) i : 'a = + match self with + | Empty -> invalid_arg "pvec.get" + | Leaf a -> A.get a (i land bitmask) + | Node a -> + let idx = (i lsr shift) land bitmask in + get_tree_ (A.get a idx) (shift - num_bits) i + +let get (self : 'a t) (i : int) : 'a = + if i < 0 then + invalid_arg "pvec.get" + else ( + let tail_off = self.size - A.length self.tail in + if i >= tail_off then + A.get self.tail (i - tail_off) + else + get_tree_ self.t self.shift i + ) + +let[@inline] get_opt self i = + try Some (get self i) with Invalid_argument _ -> None + +(** Build a tree leading to [tail] with indices 0 at each node *) +let rec build_new_tail_spine_ shift tail : _ tree = + if shift = 0 then + Leaf tail + else + Node [| build_new_tail_spine_ (shift - num_bits) tail |] + +let rec insert_tail_ (self : _ tree) shift i (tail : _ A.t) : _ tree = + Format.printf "insert tail shift=%d i=0x%x into %a@." shift i + (!dbg_ Format.pp_print_int) + (Obj.magic self : int tree); + match self with + | Empty -> + if shift = 0 then + Leaf tail + else ( + assert ((i lsl shift) land bitmask = 0); + Node [| insert_tail_ Empty (shift - num_bits) i tail |] + ) + | Leaf _ -> assert false + | Node a -> + (* would be in the {!build_new_tail_spine_} case *) + assert (i <> 0); + let idx = (i lsr shift) land bitmask in + Printf.printf "insert tail rec at idx=%d shift=%d i=0x%x\n%!" idx shift i; + let sub, must_push = + if idx < A.length a then + A.get a idx, false + else + Empty, true + in + let new_sub = insert_tail_ sub (shift - num_bits) i tail in + let a = + if must_push then + A.push a new_sub + else + A.set ~mut:false a idx new_sub + in + Node a + +let[@inline never] push_full_ self x : _ t = + if 1 lsl (self.shift + num_bits) = self.size - A.length self.tail then ( + (* tree is full, add a level *) + let t = Node [| self.t; build_new_tail_spine_ self.shift self.tail |] in + { t; size = self.size + 1; shift = self.shift + num_bits; tail = [| x |] } + ) else ( + (* insert at the end of the current tree *) + let idx = self.size - A.length self.tail in + let t = insert_tail_ self.t self.shift idx self.tail in + { t; size = self.size + 1; shift = self.shift; tail = [| x |] } + ) + +let[@inline] push (self : _ t) x : _ t = + if A.is_full self.tail then + push_full_ self x + else + { self with tail = A.push self.tail x; size = self.size + 1 } + +let rec pop_tail_from_tree_ (self : _ tree) shift i : 'a A.t * 'a tree = + Format.printf "pop tail shift=%d i=0x%x from %a@." shift i + (!dbg_ Format.pp_print_int) + (Obj.magic self : int tree); + match self with + | Empty -> assert false + | Leaf tail -> + assert (shift = 0); + tail, Empty + | Node a -> + let idx = (i lsr shift) land bitmask in + let sub = A.get a idx in + let tail, new_sub = pop_tail_from_tree_ sub (shift - num_bits) i in + let new_tree = + if is_empty_tree new_sub then ( + let a = A.pop a in + if A.is_empty a then + Empty + else + Node a + ) else + Node (A.set ~mut:false a idx new_sub) + in + tail, new_tree + +let[@inline never] move_last_leaf_to_tail (self : _ t) : _ t = + assert (A.length self.tail = 1); + if self.size = 1 then + (* back to empty *) + empty + else ( + (* before pop *) + let tail, t = pop_tail_from_tree_ self.t self.shift (self.size - 1) in + let t, shift = + match t with + | Node [| t' |] -> + (* all indices have 00000 as MSB, remove one level *) + t', self.shift - 1 + | _ -> t, self.shift + in + { tail; size = self.size - 1; shift; t } + ) + +let pop (self : 'a t) : 'a * 'a t = + if self.size = 0 then invalid_arg "pvec.pop"; + let x = A.get self.tail (A.length self.tail - 1) in + let new_tail = A.pop self.tail in + if A.is_empty new_tail then ( + let new_self = move_last_leaf_to_tail self in + x, new_self + ) else ( + let new_self = { self with size = self.size - 1; tail = new_tail } in + x, new_self + ) + +let pop_opt (self : 'a t) : ('a * 'a t) option = + if self.size = 0 then + None + else + Some (pop self) + +let rec iteri_rec_ f idx (self : _ tree) = + match self with + | Empty -> () + | Leaf a -> + for i = 0 to A.length a - 1 do + let j = idx lor i in + f j (Array.unsafe_get a i) + done + | Node a -> + for i = 0 to A.length a - 1 do + let idx = (idx lsl num_bits) lor i in + iteri_rec_ f idx (Array.unsafe_get a i) + done + +let iteri f (self : 'a t) : unit = + iteri_rec_ f 0 self.t; + let tail_off = tail_off self in + for i = 0 to A.length self.tail - 1 do + f (i + tail_off) (Array.unsafe_get self.tail i) + done + +let rec iteri_rev_rec_ f idx (self : _ tree) = + match self with + | Empty -> () + | Leaf a -> + for i = A.length a - 1 downto 0 do + let j = idx lor i in + f j (Array.unsafe_get a i) + done + | Node a -> + for i = A.length a - 1 downto 0 do + let idx = (idx lsl num_bits) lor i in + iteri_rev_rec_ f idx (Array.unsafe_get a i) + done + +let iteri_rev f (self : 'a t) : unit = + let tail_off = tail_off self in + for i = A.length self.tail - 1 downto 0 do + f (i + tail_off) (Array.unsafe_get self.tail i) + done; + iteri_rev_rec_ f (tail_off - 1) self.t + +let fold_lefti f x m = + let acc = ref x in + iteri (fun i x -> acc := f !acc i x) m; + !acc + +let foldi_rev f x m = + let acc = ref x in + iteri_rev (fun i x -> acc := f !acc i x) m; + !acc + +let iter f m = iteri (fun _ x -> f x) m +let fold_left f x m = fold_lefti (fun acc _ x -> f acc x) x m +let fold_rev f x m = foldi_rev (fun acc _ x -> f acc x) x m + +let rec map_t f (self : _ tree) : _ tree = + match self with + | Empty -> Empty + | Node a -> + let a = Array.map (map_t f) a in + Node a + | Leaf a -> Leaf (Array.map f a) + +let map f (self : _ t) : _ t = + { self with t = map_t f self.t; tail = Array.map f self.tail } + +let append a b = + if is_empty b then + a + else + fold_left push a b + +let add_list v l = List.fold_left push v l +let of_list l = add_list empty l +let to_list m = fold_rev (fun acc x -> x :: acc) [] m + +let add_iter v seq = + let v = ref v in + seq (fun x -> v := push !v x); + !v + +let of_iter s = add_iter empty s +let to_iter m yield = iteri (fun _ v -> yield v) m + +let make n x : _ t = + (* TODO: probably we can optimize that? *) + of_iter (fun k -> + for _i = 1 to n do + k x + done) + +let rec add_seq self seq = + match seq () with + | Seq.Nil -> self + | Seq.Cons (x, tl) -> add_seq (push self x) tl + +let of_seq seq = add_seq empty seq + +let to_seq self : _ Seq.t = + let rec to_seq (stack : ('a tree * int) list) () = + match stack with + | [] -> Seq.Nil + | (Empty, _) :: tl -> to_seq tl () + | (Leaf a, i) :: tl when i < Array.length a -> + Seq.Cons (A.get a i, to_seq ((Leaf a, i + 1) :: tl)) + | (Leaf _, _) :: tl -> to_seq tl () + | (Node a, i) :: tl when i < A.length a -> + to_seq ((A.get a i, 0) :: (Node a, i + 1) :: tl) () + | (Node _, _) :: tl -> to_seq tl () + in + to_seq [ self.t, 0; Leaf self.tail, 0 ] + +let choose self = + if self.size = 0 then + None + else + Some (A.get self.tail 0) + +module Private_ = struct + type 'a printer = Format.formatter -> 'a -> unit + + let fpf = Format.fprintf + + let pp_array ppx out a = + fpf out "[@[%a@]]" + (Format.pp_print_list + ~pp_sep:(fun out () -> Format.fprintf out ";@ ") + ppx) + (Array.to_list a) + + let rec debugtree ppx out (self : _ tree) : unit = + match self with + | Empty -> fpf out "()" + | Leaf a -> fpf out "leaf(%a)" (pp_array ppx) a + | Node a -> fpf out "node(%a)" (pp_array @@ debugtree ppx) a + + let debug ppx out self = + fpf out + "@[pvec {@ size: %d; shift: %d;@ @[<2>tree:@ %a@];@ @[<2>tail:@ \ + %a@]@]}" + self.size self.shift (debugtree ppx) self.t (pp_array ppx) self.tail + + let () = dbg_ := debugtree +end diff --git a/src/pvec/containers_pvec.mli b/src/pvec/containers_pvec.mli new file mode 100644 index 00000000..bee50bec --- /dev/null +++ b/src/pvec/containers_pvec.mli @@ -0,0 +1,84 @@ +(** Functional Vectors. + + These are trees with a large branching factor for logarithmic operations with + a low multiplicative factor. + + {b status: experimental} + + @since NEXT_RELEASE +*) + +type 'a iter = ('a -> unit) -> unit +type !'a t + +val empty : 'a t +(** Empty vector. *) + +val is_empty : _ t -> bool +(** Is the vector empty? *) + +val return : 'a -> 'a t +(** Single element vector. *) + +val length : _ t -> int +(** Number of elements. Constant time. *) + +val make : int -> 'a -> 'a t +(** [make n x] makes a vector with [n] copies + of the element [x] *) + +val push : 'a t -> 'a -> 'a t +(** Add element at the end. *) + +val get : 'a t -> int -> 'a +(** @raise Invalid_argument if key not present. *) + +val get_opt : 'a t -> int -> 'a option + +val pop : 'a t -> 'a * 'a t +(** Pop last element. + @raise Invalid_argument in case the vec is empty. *) + +val pop_opt : 'a t -> ('a * 'a t) option +(** Pop last element. *) + +val iter : ('a -> unit) -> 'a t -> unit + +val iteri : (int -> 'a -> unit) -> 'a t -> unit +(** Iterate on elements with their index, in increasing order. *) + +val iteri_rev : (int -> 'a -> unit) -> 'a t -> unit +(** Iterate on elements with their index, but starting from the end. *) + +val fold_left : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b +val fold_rev : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b +val fold_lefti : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b + +val append : 'a t -> 'a t -> 'a t +(** [append a b] adds all elements of [b] at the end of [a]. This is + at least linear in the length of [b]. *) + +val map : ('a -> 'b) -> 'a t -> 'b t + +val choose : 'a t -> 'a option +(** Return an element. It is unspecified which one is returned. *) + +val to_list : 'a t -> 'a list +val of_list : 'a list -> 'a t +val add_list : 'a t -> 'a list -> 'a t +val add_iter : 'a t -> 'a iter -> 'a t +val of_iter : 'a iter -> 'a t +val to_iter : 'a t -> 'a iter +val add_seq : 'a t -> 'a Seq.t -> 'a t +val of_seq : 'a Seq.t -> 'a t +val to_seq : 'a t -> 'a Seq.t + +(**/**) + +module Private_ : sig + type 'a printer = Format.formatter -> 'a -> unit + + val debug : 'a printer -> 'a t printer +end + +(**/**) diff --git a/src/pvec/dune b/src/pvec/dune new file mode 100644 index 00000000..6ff76874 --- /dev/null +++ b/src/pvec/dune @@ -0,0 +1,8 @@ + +(library + (name containers_pvec) + (public_name containers.pvec) + (preprocess + (action + (run %{project_root}/src/core/cpp/cpp.exe %{input-file}))) + (synopsis "Persistent vector for OCaml")) From dd0e23cea24e1e91fdf0248b3fe2a117a483787f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 21:47:25 -0500 Subject: [PATCH 02/20] add tests for pvec --- tests/pvec/dune | 7 ++ tests/pvec/t.ml | 1 + tests/pvec/t_pvec.ml | 246 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+) create mode 100644 tests/pvec/dune create mode 100644 tests/pvec/t.ml create mode 100644 tests/pvec/t_pvec.ml diff --git a/tests/pvec/dune b/tests/pvec/dune new file mode 100644 index 00000000..d4d56f95 --- /dev/null +++ b/tests/pvec/dune @@ -0,0 +1,7 @@ + +(test + (name t) + (flags :standard -strict-sequence -warn-error -a+8) + (modes (best exe)) + (package containers) + (libraries containers containers.pvec containers_testlib iter)) diff --git a/tests/pvec/t.ml b/tests/pvec/t.ml new file mode 100644 index 00000000..948befc1 --- /dev/null +++ b/tests/pvec/t.ml @@ -0,0 +1 @@ +Containers_testlib.run_all ~descr:"containers.pvec" [ T_pvec.Test.get () ] diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml new file mode 100644 index 00000000..a82951ac --- /dev/null +++ b/tests/pvec/t_pvec.ml @@ -0,0 +1,246 @@ +module Test = (val Containers_testlib.make ~__FILE__ ()) +open Test +open Containers_pvec + +let spf = Printf.sprintf + +let _listuniq = + let g = Q.(small_list (pair small_int small_int)) in + Q.map_same_type + (fun l -> + CCList.sort_uniq ~cmp:(fun a b -> Stdlib.compare (fst a) (fst b)) l) + g +;; + +t @@ fun () -> is_empty empty;; +t @@ fun () -> not (is_empty (return 2));; +t @@ fun () -> length (return 2) = 1;; + +q ~name:"get over of_list" _listuniq (fun l -> + let m = of_list l in + List.for_all (fun (i, y) -> get m i = y) @@ List.mapi CCPair.make l) +;; + +(* regression test for #298 *) +t ~name:"reg 298" @@ fun () -> +let rec consume x = + match pop_opt x with + | None -> () + | Some (_, x) -> consume x +in +consume (of_list CCList.(1 -- 100)); +true +;; + +q ~name:"push length pop" + Q.(pair int (small_list int)) + (fun (x, l) -> + let q0 = of_list l in + let q = push q0 x in + assert_equal (length q) (length q0 + 1); + let y, q = pop q in + assert_equal x y; + assert_equal (to_list q) (to_list q0); + true) +;; + +q + Q.(pair (fun1 Observable.int bool) (small_list int)) + (fun (f, l) -> + let f = Q.Fn.apply f in + List.map f l = (of_list l |> map f |> to_list)) +;; + +q + Q.(pair (small_list int) (small_list int)) + (fun (l1, l2) -> l1 @ l2 = (append (of_list l1) (of_list l2) |> to_list)) +;; + +q Q.(small_list int) (fun l -> l = to_list (of_list l));; + +q _listuniq (fun l -> + List.sort Stdlib.compare l + = (l |> Iter.of_list |> of_iter |> to_iter |> Iter.to_list + |> List.sort Stdlib.compare)) +;; + +q _listuniq (fun l -> + List.sort Stdlib.compare l + = (l |> CCSeq.of_list |> of_seq |> to_seq |> CCSeq.to_list + |> List.sort Stdlib.compare)) +;; + +t @@ fun () -> choose empty = None;; +t @@ fun () -> choose (of_list [ 1, 1; 2, 2 ]) <> None + +module Ref_impl = struct + type +'a t = 'a list + + let empty : _ t = [] + let length = List.length + let push x l : _ t = l @ [ x ] + let get i l = List.nth l i + let to_list l = l + let to_seq = CCSeq.of_list + let add_list l l2 : _ t = List.append l l2 + + let pop_exn l = + match List.rev l with + | x :: tl -> x, List.rev tl + | [] -> invalid_arg "empty" + + let is_empty l = l = [] + + let choose l = + match l with + | [] -> None + | x :: _ -> Some x +end + +module Op = struct + type 'a t = + | Push of 'a + | Pop + (* TODO: set *) + | Add_list of 'a list + | Check_get of int + | Check_choose + | Check_is_empty + | Check_len + | Check_to_list + | Check_to_gen + + let well_formed ops : bool = + let rec loop size = function + | [] -> true + | Push _ :: tl -> loop (size + 1) tl + | Pop :: tl -> size >= 0 && loop (size - 1) tl + | Add_list l :: tl -> loop (size + List.length l) tl + | Check_get x :: tl -> x < size && loop size tl + | Check_choose :: tl + | Check_is_empty :: tl + | Check_len :: tl + | Check_to_list :: tl + | Check_to_gen :: tl -> + loop size tl + in + loop 0 ops + + let show show_x (self : _ t) : string = + match self with + | Push x -> spf "push %s" (show_x x) + | Pop -> "pop" + | Add_list l -> spf "add_list [%s]" (String.concat ";" @@ List.map show_x l) + | Check_get i -> spf "check_get %d" i + | Check_choose -> "check_choose" + | Check_is_empty -> "check_is_empty" + | Check_len -> "check_len" + | Check_to_list -> "check_to_list" + | Check_to_gen -> "check_to_gen" + + let shrink shrink_x (op : _ t) : _ Q.Iter.t = + let open Q.Shrink in + let open Q.Iter in + match op with + | Push x -> shrink_x x >|= fun x -> Push x + | Pop -> empty + | Add_list l -> list ~shrink:shrink_x l >|= fun x -> Add_list x + | Check_get _ | Check_choose | Check_is_empty | Check_len | Check_to_list + | Check_to_gen -> + empty + + let shrink_l shrink_x : _ t list Q.Shrink.t = + Q.Shrink.list ~shrink:(shrink shrink_x) |> Q.Shrink.filter well_formed + + type 'a op = 'a t + + (* generate list of length [n] *) + let gen (gen_x : 'a Q.Gen.t) n : 'a t list Q.Gen.t = + let open Q.Gen in + let rec loop size n : 'a op list Q.Gen.t = + if n = 0 then + return [] + else ( + let op = + frequency + @@ List.flatten + [ + [ + (3, gen_x >|= fun x -> Push x, size + 1); + 1, return (Check_choose, size); + 1, return (Check_is_empty, size); + 1, return (Check_to_list, size); + 1, return (Check_to_gen, size); + ]; + (if size > 0 then + [ + 1, return (Pop, size - 1); + (1, 0 -- (size - 1) >|= fun x -> Check_get x, size); + ] + else + []); + [ + ( 1, + small_list gen_x >|= fun l -> + Add_list l, size + List.length l ); + ]; + ] + in + + op >>= fun (op, size) -> + loop size (n - 1) >>= fun tl -> return (op :: tl) + ) + in + loop 0 n +end + +let arb_ops_int : int Op.t list Q.arbitrary = + Q.make + ~print:(fun o -> + spf "[%s]" @@ String.concat ";" @@ List.map (Op.show @@ spf "%d") o) + ~shrink:(Op.shrink_l Q.Shrink.int) + Q.Gen.(0 -- 40 >>= fun len -> Op.gen small_int len) + +let check_ops ~show_x (ops : 'a Op.t list) : unit = + let fail () = + Q.Test.fail_reportf "on list [%s]" + (String.concat ";" @@ List.map (Op.show show_x) ops) + in + let cur = ref empty in + let cur_ref = ref Ref_impl.empty in + List.iter + (fun (op : _ Op.t) -> + match op with + | Op.Push x -> + cur := push !cur x; + cur_ref := Ref_impl.push x !cur_ref + | Op.Pop -> + let x1, cur' = pop !cur in + cur := cur'; + let x2, cur_ref' = Ref_impl.pop_exn !cur_ref in + cur_ref := cur_ref'; + if x1 <> x2 then fail () + | Op.Add_list l -> + cur := add_list !cur l; + cur_ref := Ref_impl.add_list !cur_ref l + | Op.Check_get i -> if get !cur i <> Ref_impl.get i !cur_ref then fail () + | Op.Check_is_empty -> + if is_empty !cur <> Ref_impl.is_empty !cur_ref then fail () + | Op.Check_len -> if length !cur <> Ref_impl.length !cur_ref then fail () + | Op.Check_to_list -> + if to_list !cur <> Ref_impl.to_list !cur_ref then fail () + | Op.Check_choose -> + if choose !cur <> Ref_impl.choose !cur_ref then fail () + | Op.Check_to_gen -> + if + to_seq !cur |> CCSeq.to_list + <> (Ref_impl.to_seq !cur_ref |> CCSeq.to_list) + then + fail ()) + ops; + () + +let () = + q arb_ops_int (fun ops -> + check_ops ~show_x:(spf "%d") ops; + true) From 209ee3a3edebff84faf53cc49669520a2c48c64f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 21:47:40 -0500 Subject: [PATCH 03/20] fix warning in test --- tests/core/t_fun.ml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/core/t_fun.ml b/tests/core/t_fun.ml index ceb8a6df..59b6eb4f 100644 --- a/tests/core/t_fun.ml +++ b/tests/core/t_fun.ml @@ -18,5 +18,4 @@ true t @@ fun () -> CCFun.((succ %> string_of_int) 2 = "3");; t @@ fun () -> CCFun.((( * ) 3 % succ) 5 = 18);; -t @@ fun () -> CCFun.(succ @@ ( * ) 2 @@ pred @@ 3 = 5);; -t @@ fun () -> CCFun.(3 |> succ |> ( * ) 5 |> pred = 19) +t @@ fun () -> CCFun.(succ @@ ( * ) 2 @@ pred @@ 3 = 5) From 66b42ea944a7b8ca04688aa1c6cf763248ce5ab4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 21:54:38 -0500 Subject: [PATCH 04/20] fixes for pvec --- src/pvec/containers_pvec.ml | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index 171733c9..6313c1bd 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -66,8 +66,6 @@ type 'a t = { - all leaves in [t] are at depth shift/5 *) -(* FIXME: remove *) -let dbg_ = ref (fun _ _ _ -> assert false) let empty_tree = Empty let empty = { t = empty_tree; size = 0; shift = 0; tail = [||] } @@ -110,9 +108,6 @@ let rec build_new_tail_spine_ shift tail : _ tree = Node [| build_new_tail_spine_ (shift - num_bits) tail |] let rec insert_tail_ (self : _ tree) shift i (tail : _ A.t) : _ tree = - Format.printf "insert tail shift=%d i=0x%x into %a@." shift i - (!dbg_ Format.pp_print_int) - (Obj.magic self : int tree); match self with | Empty -> if shift = 0 then @@ -126,7 +121,6 @@ let rec insert_tail_ (self : _ tree) shift i (tail : _ A.t) : _ tree = (* would be in the {!build_new_tail_spine_} case *) assert (i <> 0); let idx = (i lsr shift) land bitmask in - Printf.printf "insert tail rec at idx=%d shift=%d i=0x%x\n%!" idx shift i; let sub, must_push = if idx < A.length a then A.get a idx, false @@ -161,9 +155,6 @@ let[@inline] push (self : _ t) x : _ t = { self with tail = A.push self.tail x; size = self.size + 1 } let rec pop_tail_from_tree_ (self : _ tree) shift i : 'a A.t * 'a tree = - Format.printf "pop tail shift=%d i=0x%x from %a@." shift i - (!dbg_ Format.pp_print_int) - (Obj.magic self : int tree); match self with | Empty -> assert false | Leaf tail -> @@ -191,13 +182,14 @@ let[@inline never] move_last_leaf_to_tail (self : _ t) : _ t = (* back to empty *) empty else ( - (* before pop *) - let tail, t = pop_tail_from_tree_ self.t self.shift (self.size - 1) in + (* idx of the beginning of the tail *) + let idx = self.size - 1 - branching_factor in + let tail, t = pop_tail_from_tree_ self.t self.shift idx in let t, shift = match t with | Node [| t' |] -> (* all indices have 00000 as MSB, remove one level *) - t', self.shift - 1 + t', self.shift - num_bits | _ -> t, self.shift in { tail; size = self.size - 1; shift; t } @@ -363,6 +355,4 @@ module Private_ = struct "@[pvec {@ size: %d; shift: %d;@ @[<2>tree:@ %a@];@ @[<2>tail:@ \ %a@]@]}" self.size self.shift (debugtree ppx) self.t (pp_array ppx) self.tail - - let () = dbg_ := debugtree end From 03e253a31c11077652e1a5d943e0c0d7c0eaca79 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 21:54:41 -0500 Subject: [PATCH 05/20] fix pvec tests (make choose's result irrelevant, only test whether it returns) --- tests/pvec/t_pvec.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml index a82951ac..25c7391a 100644 --- a/tests/pvec/t_pvec.ml +++ b/tests/pvec/t_pvec.ml @@ -93,8 +93,8 @@ module Ref_impl = struct let choose l = match l with - | [] -> None - | x :: _ -> Some x + | [] -> false + | _ :: _ -> true end module Op = struct @@ -230,7 +230,7 @@ let check_ops ~show_x (ops : 'a Op.t list) : unit = | Op.Check_to_list -> if to_list !cur <> Ref_impl.to_list !cur_ref then fail () | Op.Check_choose -> - if choose !cur <> Ref_impl.choose !cur_ref then fail () + if Option.is_some (choose !cur) <> Ref_impl.choose !cur_ref then fail () | Op.Check_to_gen -> if to_seq !cur |> CCSeq.to_list From 42967b21278f4183da0fe6b4ce65e533cd154282 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 22:14:18 -0500 Subject: [PATCH 06/20] benchs: add pvec --- benchs/dune | 3 ++- benchs/run_benchs.ml | 56 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/benchs/dune b/benchs/dune index 21ff71ef..ee9ac780 100644 --- a/benchs/dune +++ b/benchs/dune @@ -1,6 +1,7 @@ (executables (names run_benchs run_bench_hash run_objsize) - (libraries containers containers-data containers-thread benchmark gen iter + (libraries containers containers_pvec + containers-data benchmark gen iter qcheck oseq batteries base sek) (flags :standard -warn-error -3-5 -safe-string -color always) (optional) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index 461e9f7b..ad2eeb69 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -9,6 +9,7 @@ let ( @>> ) = B.Tree.( @>> ) let ( @>>> ) = B.Tree.( @>>> ) module Int_map = Map.Make (CCInt) +module Pvec = Containers_pvec let app_int f n = string_of_int n @> lazy (f n) let app_ints f l = B.Tree.concat (List.map (app_int f) l) @@ -26,16 +27,19 @@ module L = struct let l = CCList.(1 -- n) in let ral = CCRAL.of_list l in let vec = CCFun_vec.of_list l in + let pv = Pvec.of_list l in let sek = Sek.Persistent.of_array 0 (Array.of_list l) in let iter_list () = List.iter f l and raliter () = CCRAL.iter ~f ral and funvec_iter () = CCFun_vec.iter ~f vec + and pvec_iter () = Pvec.iter f pv and sek_iter () = Sek.Persistent.iter Sek.forward f sek in B.throughputN time ~repeat [ "List.iter", iter_list, (); "CCRAL.iter", raliter, (); "CCFun_vec.iter", funvec_iter, (); + "Pvec.iter", pvec_iter, (); "Sek.Persistent.iter", sek_iter, (); ] @@ -146,6 +150,9 @@ module L = struct let v1 = CCFun_vec.of_list l1 in let v2 = CCFun_vec.of_list l2 in let v3 = CCFun_vec.of_list l3 in + let pv1 = Pvec.of_list l1 in + let pv2 = Pvec.of_list l2 in + let pv3 = Pvec.of_list l3 in let s1 = Sek.Persistent.of_array 0 (Array.of_list l1) in let s2 = Sek.Persistent.of_array 0 (Array.of_list l2) in let s3 = Sek.Persistent.of_array 0 (Array.of_list l3) in @@ -158,6 +165,9 @@ module L = struct let bench_funvec l1 l2 l3 () = opaque_ignore CCFun_vec.(append (append l1 l2) l3) in + let bench_pvec l1 l2 l3 () = + opaque_ignore Pvec.(append (append l1 l2) l3) + in let bench_sek l1 l2 l3 () = opaque_ignore Sek.Persistent.(concat (concat l1 l2) l3) in @@ -166,6 +176,7 @@ module L = struct "CCList.append", bench_list l1 l2 l3, (); "List.append", bench_cclist l1 l2 l3, (); "CCFun_vec.append", bench_funvec v1 v2 v3, (); + "Pvec.append", bench_pvec pv1 pv2 pv3, (); "Sek.concat", bench_sek s1 s2 s3, (); ] @@ -209,6 +220,7 @@ module L = struct let l = CCList.(0 -- (n - 1)) in let ral = CCRAL.of_list l in let v = CCFun_vec.of_list l in + let pv = Pvec.of_list l in let bv = BatVect.of_list l in let map = List.fold_left (fun map i -> Int_map.add i i map) Int_map.empty l @@ -230,6 +242,10 @@ module L = struct for i = 0 to n - 1 do opaque_ignore (CCFun_vec.get_exn i l) done + and bench_pvec l () = + for i = 0 to n - 1 do + opaque_ignore (Pvec.get l i) + done and bench_batvec l () = for i = 0 to n - 1 do opaque_ignore (BatVect.get l i) @@ -245,6 +261,7 @@ module L = struct "Map.find", bench_map map, (); "RAL.get", bench_ral ral, (); "funvec.get", bench_funvec v, (); + "pvec.get", bench_pvec pv, (); "batvec.get", bench_batvec bv, (); "Sek.Persistent.get", bench_sek sek, (); ] @@ -290,11 +307,8 @@ module L = struct let bench_push ?(time = 2) n = (*let ral = ref CCRAL.empty in *) - let v = ref CCFun_vec.empty in - let bv = ref BatVect.empty in - let map = ref Int_map.empty in - let sek = ref (Sek.Persistent.create 0) in - let bench_map l () = + let bench_map () = + let l = ref Int_map.empty in for i = 0 to n - 1 do l := Int_map.add i i !l done; @@ -304,17 +318,26 @@ module L = struct (* Note: Better implementation probably possible *) for i = 0 to n-1 do l := CCRAL.append !l (CCRAL.return i) done; opaque_ignore l *) - and bench_funvec l () = + and bench_funvec () = + let l = ref CCFun_vec.empty in for i = 0 to n - 1 do l := CCFun_vec.push i !l done; opaque_ignore l - and bench_batvec l () = + and bench_pvec () = + let l = ref Pvec.empty in + for i = 0 to n - 1 do + l := Pvec.push !l i + done; + opaque_ignore l + and bench_batvec () = + let l = ref BatVect.empty in for i = 0 to n - 1 do l := BatVect.append i !l done; opaque_ignore l - and bench_sek l () = + and bench_sek () = + let l = ref (Sek.Persistent.create 0) in for i = 0 to n - 1 do l := Sek.Persistent.push Sek.front !l i done; @@ -322,18 +345,20 @@ module L = struct in B.throughputN time ~repeat [ - "Map.add", bench_map map, () + "Map.add", bench_map, () (* ; "RAL.append", bench_ral ral, () *) (* too slow *); - "Sek.Persistent.push", bench_sek sek, (); - "funvec.push", bench_funvec v, (); - "batvec.append", bench_batvec bv, (); + "Sek.Persistent.push", bench_sek, (); + "funvec.push", bench_funvec, (); + "pvec.push", bench_pvec, (); + "batvec.append", bench_batvec, (); ] let bench_pop ?(time = 2) n = let l = CCList.(0 -- (n - 1)) in let ral = CCRAL.of_list l in let v = CCFun_vec.of_list l in + let pv = Pvec.of_list l in let bv = BatVect.of_list l in let map = List.fold_left (fun map i -> Int_map.add i i map) Int_map.empty l @@ -357,6 +382,12 @@ module L = struct l := snd (CCFun_vec.pop_exn !l) done; opaque_ignore l + and bench_pvec l () = + let l = ref l in + for _ = 0 to n - 1 do + l := snd (Pvec.pop !l) + done; + opaque_ignore l and bench_batvec l () = let l = ref l in for _ = 0 to n - 1 do @@ -375,6 +406,7 @@ module L = struct "Map.remove", bench_map map, (); "RAL.tl", bench_ral ral, (); "funvec.pop", bench_funvec v, (); + "pvec.pop", bench_pvec pv, (); "batvec.pop", bench_batvec bv, (); "Sek.Persistent.pop", bench_sek sek, (); ] From b9b6bf82b6f19cb27eca9734931a49ca64b86f19 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 22:14:22 -0500 Subject: [PATCH 07/20] perf: restore branching factor to 32 --- src/pvec/containers_pvec.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index 6313c1bd..99dee905 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -4,7 +4,7 @@ type 'a iter = ('a -> unit) -> unit -let num_bits = 2 +let num_bits = 5 let branching_factor = 1 lsl num_bits let bitmask = branching_factor - 1 From 6a3cafa76364c37d615b5cf5156f89b4be530fd0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 22:38:05 -0500 Subject: [PATCH 08/20] compat --- src/pvec/containers_pvec.mli | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/pvec/containers_pvec.mli b/src/pvec/containers_pvec.mli index bee50bec..2d3a815c 100644 --- a/src/pvec/containers_pvec.mli +++ b/src/pvec/containers_pvec.mli @@ -9,8 +9,17 @@ *) type 'a iter = ('a -> unit) -> unit + +[@@@ifge 5.0] + type !'a t +[@@@else_] + +type 'a t + +[@@@endif] + val empty : 'a t (** Empty vector. *) From 81408b8e1b608298f99fe4626e6ddd7a08ea3a9c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 5 Jan 2024 22:54:08 -0500 Subject: [PATCH 09/20] add `last` to Pvec --- src/pvec/containers_pvec.ml | 16 ++++++++++++++++ src/pvec/containers_pvec.mli | 10 ++++++++++ tests/pvec/t_pvec.ml | 14 +++++++++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index 99dee905..ba3c504e 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -213,6 +213,22 @@ let pop_opt (self : 'a t) : ('a * 'a t) option = else Some (pop self) +let[@inline] last self = + if self.size = 0 then invalid_arg "pvec.last"; + A.get self.tail (A.length self.tail - 1) + +let last_opt self = + if self.size = 0 then + None + else + Some (A.get self.tail (A.length self.tail - 1)) + +let drop_last self = + if self.size = 0 then + self + else + snd (pop self) + let rec iteri_rec_ f idx (self : _ tree) = match self with | Empty -> () diff --git a/src/pvec/containers_pvec.mli b/src/pvec/containers_pvec.mli index 2d3a815c..dd3cd71a 100644 --- a/src/pvec/containers_pvec.mli +++ b/src/pvec/containers_pvec.mli @@ -44,6 +44,12 @@ val get : 'a t -> int -> 'a val get_opt : 'a t -> int -> 'a option +val last : 'a t -> 'a +(** Last element. + @raise Invalid_argument if the vec is empty *) + +val last_opt : 'a t -> 'a option + val pop : 'a t -> 'a * 'a t (** Pop last element. @raise Invalid_argument in case the vec is empty. *) @@ -51,6 +57,10 @@ val pop : 'a t -> 'a * 'a t val pop_opt : 'a t -> ('a * 'a t) option (** Pop last element. *) +val drop_last : 'a t -> 'a t +(** Like {!pop_opt} but doesn't return the last element. + Returns the same vector if it's empty. *) + val iter : ('a -> unit) -> 'a t -> unit val iteri : (int -> 'a -> unit) -> 'a t -> unit diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml index 25c7391a..5f8ba83a 100644 --- a/tests/pvec/t_pvec.ml +++ b/tests/pvec/t_pvec.ml @@ -89,6 +89,12 @@ module Ref_impl = struct | x :: tl -> x, List.rev tl | [] -> invalid_arg "empty" + let last_opt l = + if l = [] then + None + else + Some (List.nth l (List.length l - 1)) + let is_empty l = l = [] let choose l = @@ -109,6 +115,7 @@ module Op = struct | Check_len | Check_to_list | Check_to_gen + | Check_last let well_formed ops : bool = let rec loop size = function @@ -121,6 +128,7 @@ module Op = struct | Check_is_empty :: tl | Check_len :: tl | Check_to_list :: tl + | Check_last :: tl | Check_to_gen :: tl -> loop size tl in @@ -137,6 +145,7 @@ module Op = struct | Check_len -> "check_len" | Check_to_list -> "check_to_list" | Check_to_gen -> "check_to_gen" + | Check_last -> "check_last" let shrink shrink_x (op : _ t) : _ Q.Iter.t = let open Q.Shrink in @@ -146,7 +155,7 @@ module Op = struct | Pop -> empty | Add_list l -> list ~shrink:shrink_x l >|= fun x -> Add_list x | Check_get _ | Check_choose | Check_is_empty | Check_len | Check_to_list - | Check_to_gen -> + | Check_to_gen | Check_last -> empty let shrink_l shrink_x : _ t list Q.Shrink.t = @@ -171,6 +180,7 @@ module Op = struct 1, return (Check_is_empty, size); 1, return (Check_to_list, size); 1, return (Check_to_gen, size); + 1, return (Check_last, size); ]; (if size > 0 then [ @@ -231,6 +241,8 @@ let check_ops ~show_x (ops : 'a Op.t list) : unit = if to_list !cur <> Ref_impl.to_list !cur_ref then fail () | Op.Check_choose -> if Option.is_some (choose !cur) <> Ref_impl.choose !cur_ref then fail () + | Op.Check_last -> + if last_opt !cur <> Ref_impl.last_opt !cur_ref then fail () | Op.Check_to_gen -> if to_seq !cur |> CCSeq.to_list From 04440deb39a4765cedbffe3ace622b18dfba245b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 6 Jan 2024 17:17:06 -0500 Subject: [PATCH 10/20] small refactor --- src/pvec/containers_pvec.ml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index ba3c504e..7e5ee45c 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -32,8 +32,7 @@ module A = struct if n = 0 then invalid_arg "Pvec.pop"; Array.sub self 0 (n - 1) - (* TODO: remove *) - let set ~mut (self : _ t) i x : _ t = + let set (self : _ t) i x : _ t = if i < 0 || i > length self || i >= branching_factor then invalid_arg "Pvec.set"; if i = length self then ( @@ -41,10 +40,6 @@ module A = struct let arr = Array.make (i + 1) x in Array.blit self 0 arr 0 i; arr - ) else if mut then ( - (* replace element at [i] in place *) - self.(i) <- x; - self ) else ( (* replace element at [i] in copy *) let arr = Array.copy self in From 8dca0ea78d62822b5cfc797d90460ccf091bfe77 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 6 Jan 2024 22:45:42 -0500 Subject: [PATCH 11/20] fix build --- src/pvec/containers_pvec.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index 7e5ee45c..c0c0eee2 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -127,7 +127,7 @@ let rec insert_tail_ (self : _ tree) shift i (tail : _ A.t) : _ tree = if must_push then A.push a new_sub else - A.set ~mut:false a idx new_sub + A.set a idx new_sub in Node a @@ -167,7 +167,7 @@ let rec pop_tail_from_tree_ (self : _ tree) shift i : 'a A.t * 'a tree = else Node a ) else - Node (A.set ~mut:false a idx new_sub) + Node (A.set a idx new_sub) in tail, new_tree From a28147608238f2a215de0afebf248308daaeda61 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Jan 2024 23:17:39 -0500 Subject: [PATCH 12/20] perf: reduce GC pressure by using a branching factor of 16 --- src/pvec/containers_pvec.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index c0c0eee2..d4c7f502 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -4,7 +4,7 @@ type 'a iter = ('a -> unit) -> unit -let num_bits = 5 +let num_bits = 4 let branching_factor = 1 lsl num_bits let bitmask = branching_factor - 1 From 12ff3802ce8e673e6ca125d9243c5ccfbd55d4b2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Jan 2024 23:17:57 -0500 Subject: [PATCH 13/20] perf: implement `iter` separately from `iteri` --- src/pvec/containers_pvec.ml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index d4c7f502..5902c03e 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -224,6 +224,29 @@ let drop_last self = else snd (pop self) +let rec iter_rec_ f (self : _ tree) = + match self with + | Empty -> () + | Leaf a -> + for i = 0 to A.length a - 1 do + f (Array.unsafe_get a i) + done + | Node a -> + for i = 0 to A.length a - 1 do + iter_rec_ f (Array.unsafe_get a i) + done + +let iter f self = + iter_rec_ f self.t; + for i = 0 to A.length self.tail - 1 do + f (Array.unsafe_get self.tail i) + done + +let fold_left f x m = + let acc = ref x in + iter (fun x -> acc := f !acc x) m; + !acc + let rec iteri_rec_ f idx (self : _ tree) = match self with | Empty -> () @@ -276,8 +299,6 @@ let foldi_rev f x m = iteri_rev (fun i x -> acc := f !acc i x) m; !acc -let iter f m = iteri (fun _ x -> f x) m -let fold_left f x m = fold_lefti (fun acc _ x -> f acc x) x m let fold_rev f x m = foldi_rev (fun acc _ x -> f acc x) x m let rec map_t f (self : _ tree) : _ tree = From b9cc91fb96bf3a7b8c5d80a736b20c21339afdfa Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Jan 2024 23:21:32 -0500 Subject: [PATCH 14/20] pvec: implement `iter_rev` directly --- src/pvec/containers_pvec.ml | 25 +++++++++++++++++++++++-- src/pvec/containers_pvec.mli | 4 ++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index 5902c03e..ac83d9e3 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -268,6 +268,24 @@ let iteri f (self : 'a t) : unit = f (i + tail_off) (Array.unsafe_get self.tail i) done +let rec iter_rev_rec_ f (self : _ tree) = + match self with + | Empty -> () + | Leaf a -> + for i = A.length a - 1 downto 0 do + f (Array.unsafe_get a i) + done + | Node a -> + for i = A.length a - 1 downto 0 do + iter_rev_rec_ f (Array.unsafe_get a i) + done + +let iter_rev f (self : 'a t) : unit = + for i = A.length self.tail - 1 downto 0 do + f (Array.unsafe_get self.tail i) + done; + iter_rev_rec_ f self.t + let rec iteri_rev_rec_ f idx (self : _ tree) = match self with | Empty -> () @@ -294,12 +312,15 @@ let fold_lefti f x m = iteri (fun i x -> acc := f !acc i x) m; !acc -let foldi_rev f x m = +let fold_revi f x m = let acc = ref x in iteri_rev (fun i x -> acc := f !acc i x) m; !acc -let fold_rev f x m = foldi_rev (fun acc _ x -> f acc x) x m +let fold_rev f x m = + let acc = ref x in + iter_rev (fun x -> acc := f !acc x) m; + !acc let rec map_t f (self : _ tree) : _ tree = match self with diff --git a/src/pvec/containers_pvec.mli b/src/pvec/containers_pvec.mli index dd3cd71a..6c40b497 100644 --- a/src/pvec/containers_pvec.mli +++ b/src/pvec/containers_pvec.mli @@ -63,6 +63,9 @@ val drop_last : 'a t -> 'a t val iter : ('a -> unit) -> 'a t -> unit +val iter_rev : ('a -> unit) -> 'a t -> unit +(** Iterate on elements but starting from the end. *) + val iteri : (int -> 'a -> unit) -> 'a t -> unit (** Iterate on elements with their index, in increasing order. *) @@ -72,6 +75,7 @@ val iteri_rev : (int -> 'a -> unit) -> 'a t -> unit val fold_left : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b val fold_rev : ('b -> 'a -> 'b) -> 'b -> 'a t -> 'b val fold_lefti : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b +val fold_revi : ('b -> int -> 'a -> 'b) -> 'b -> 'a t -> 'b val append : 'a t -> 'a t -> 'a t (** [append a b] adds all elements of [b] at the end of [a]. This is From dd552fe334063a656373ef9356865dcafde81746 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 7 Jan 2024 23:30:12 -0500 Subject: [PATCH 15/20] more tests --- tests/pvec/t_pvec.ml | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml index 5f8ba83a..3663d5e0 100644 --- a/tests/pvec/t_pvec.ml +++ b/tests/pvec/t_pvec.ml @@ -84,6 +84,11 @@ module Ref_impl = struct let to_seq = CCSeq.of_list let add_list l l2 : _ t = List.append l l2 + let to_list_via_reviter m = + let l = ref [] in + iter_rev (fun x -> l := x :: !l) m; + !l + let pop_exn l = match List.rev l with | x :: tl -> x, List.rev tl @@ -103,6 +108,16 @@ module Ref_impl = struct | _ :: _ -> true end +let to_list_via_iter m = + let l = ref [] in + iter (fun x -> l := x :: !l) m; + List.rev !l + +let to_list_via_reviter m = + let l = ref [] in + iter_rev (fun x -> l := x :: !l) m; + !l + module Op = struct type 'a t = | Push of 'a @@ -114,6 +129,8 @@ module Op = struct | Check_is_empty | Check_len | Check_to_list + | Check_iter + | Check_rev_iter | Check_to_gen | Check_last @@ -128,6 +145,8 @@ module Op = struct | Check_is_empty :: tl | Check_len :: tl | Check_to_list :: tl + | Check_iter :: tl + | Check_rev_iter :: tl | Check_last :: tl | Check_to_gen :: tl -> loop size tl @@ -144,6 +163,8 @@ module Op = struct | Check_is_empty -> "check_is_empty" | Check_len -> "check_len" | Check_to_list -> "check_to_list" + | Check_iter -> "check_rev_iter" + | Check_rev_iter -> "check_rev_iter" | Check_to_gen -> "check_to_gen" | Check_last -> "check_last" @@ -155,7 +176,7 @@ module Op = struct | Pop -> empty | Add_list l -> list ~shrink:shrink_x l >|= fun x -> Add_list x | Check_get _ | Check_choose | Check_is_empty | Check_len | Check_to_list - | Check_to_gen | Check_last -> + | Check_to_gen | Check_last | Check_rev_iter | Check_iter -> empty let shrink_l shrink_x : _ t list Q.Shrink.t = @@ -239,6 +260,10 @@ let check_ops ~show_x (ops : 'a Op.t list) : unit = | Op.Check_len -> if length !cur <> Ref_impl.length !cur_ref then fail () | Op.Check_to_list -> if to_list !cur <> Ref_impl.to_list !cur_ref then fail () + | Op.Check_iter -> + if to_list_via_iter !cur <> Ref_impl.to_list !cur_ref then fail () + | Op.Check_rev_iter -> + if to_list !cur <> Ref_impl.to_list !cur_ref then fail () | Op.Check_choose -> if Option.is_some (choose !cur) <> Ref_impl.choose !cur_ref then fail () | Op.Check_last -> @@ -253,6 +278,6 @@ let check_ops ~show_x (ops : 'a Op.t list) : unit = () let () = - q arb_ops_int (fun ops -> + q ~count:1000 ~name:"ops" ~long_factor:10 arb_ops_int (fun ops -> check_ops ~show_x:(spf "%d") ops; true) From 821fa6e3cfcb3080bcb4ccbefcc0152a4cab4f1f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 8 Jan 2024 09:19:39 -0500 Subject: [PATCH 16/20] more tests --- tests/pvec/t_pvec.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml index 3663d5e0..babdee8f 100644 --- a/tests/pvec/t_pvec.ml +++ b/tests/pvec/t_pvec.ml @@ -202,6 +202,8 @@ module Op = struct 1, return (Check_to_list, size); 1, return (Check_to_gen, size); 1, return (Check_last, size); + 1, return (Check_iter, size); + 1, return (Check_rev_iter, size); ]; (if size > 0 then [ From b49f358d47df0029fdb477bcf8d873673ba73168 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 8 Jan 2024 12:53:26 -0500 Subject: [PATCH 17/20] perf: more bench for pvec --- benchs/run_benchs.ml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index ad2eeb69..bab3be2d 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -191,10 +191,10 @@ module L = struct opaque_ignore (Sek.Persistent.flatten s : _ Sek.Persistent.t) and funvec_flatten v () = opaque_ignore - (CCFun_vec.fold_rev ~x:CCFun_vec.empty - ~f:(fun acc x -> CCFun_vec.append x acc) - v + (CCFun_vec.fold ~x:CCFun_vec.empty ~f:CCFun_vec.append v : _ CCFun_vec.t) + and pvec_flatten v () = + opaque_ignore (Pvec.fold_left Pvec.append Pvec.empty v : _ Pvec.t) in let l = CCList.mapi (fun i x -> CCList.(x -- (x + min i 100))) CCList.(1 -- n) @@ -204,12 +204,15 @@ module L = struct (List.map (Sek.Persistent.of_list 0) l) in let v = CCFun_vec.of_list (List.map CCFun_vec.of_list l) in + let pv = Pvec.of_list (List.map Pvec.of_list l) in + B.throughputN time ~repeat [ "CCList.flatten", (fun () -> ignore (CCList.flatten l)), (); "List.flatten", (fun () -> ignore (List.flatten l)), (); "fold_right append", fold_right_append_ l, (); - "funvec.(fold_right append)", funvec_flatten v, (); + "funvec.(fold append)", funvec_flatten v, (); + "pvec.(fold append)", pvec_flatten pv, (); "CCList.(fold_right append)", cc_fold_right_append_ l, (); "Sek.flatten", sek_flatten sek, (); ] From 813ea40ac5f47faf4750abecb4e4e927118cc4e1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 8 Jan 2024 23:49:38 -0500 Subject: [PATCH 18/20] comment --- src/pvec/containers_pvec.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index ac83d9e3..d622583f 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -1,4 +1,4 @@ -(* HAMT. +(* Persistent vector structure with fast get/push/pop. We follow https://hypirion.com/musings/understanding-persistent-vector-pt-1 and following posts. *) From cb949e4c7f509654cfcb28c5ed7349c1ca01e0a0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 10 Jan 2024 12:48:27 -0500 Subject: [PATCH 19/20] more benchs --- benchs/run_benchs.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchs/run_benchs.ml b/benchs/run_benchs.ml index bab3be2d..4c617e9b 100644 --- a/benchs/run_benchs.ml +++ b/benchs/run_benchs.ml @@ -56,10 +56,12 @@ module L = struct let bench_map ?(time = 2) n = let l = CCList.(1 -- n) in + let pv = Pvec.of_list l in let ral = CCRAL.of_list l in let map_naive () = ignore (try List.map f_ l with Stack_overflow -> []) and map_naive2 () = ignore (try map_naive f_ l with Stack_overflow -> []) and map_tailrec () = ignore (List.rev (List.rev_map f_ l)) + and pvec_map () = ignore (Pvec.map f_ pv) and ccmap () = ignore (CCList.map f_ l) and ralmap () = ignore (CCRAL.map ~f:f_ ral) in B.throughputN time ~repeat @@ -67,6 +69,7 @@ module L = struct "List.map", map_naive, (); "List.map(inline)", map_naive2, (); "List.rev_map o rev", map_tailrec, (); + "pvec.map", pvec_map, (); "CCList.map", ccmap, (); "CCRAL.map", ralmap, (); ] From 41d8a7a968d5dfe3b371a101245fedc916ac154d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 16 Jan 2024 14:20:09 -0500 Subject: [PATCH 20/20] add `Pvec.equal` --- src/pvec/containers_pvec.ml | 20 +++++++++++++++++++ src/pvec/containers_pvec.mli | 1 + tests/pvec/t_pvec.ml | 38 +++++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/pvec/containers_pvec.ml b/src/pvec/containers_pvec.ml index d622583f..8a9b9c4b 100644 --- a/src/pvec/containers_pvec.ml +++ b/src/pvec/containers_pvec.ml @@ -20,6 +20,16 @@ module A = struct let[@inline] return self = [| self |] let[@inline] is_full self = length self = branching_factor + let equal eq a b = + length a = length b + && + try + for i = 0 to length a - 1 do + if not (eq (unsafe_get a i) (unsafe_get b i)) then raise_notrace Exit + done; + true + with Exit -> false + let[@inline] push (self : _ t) x = let n = length self in if n = branching_factor then invalid_arg "Pvec.push"; @@ -339,6 +349,16 @@ let append a b = else fold_left push a b +let rec equal_tree eq t1 t2 = + match t1, t2 with + | Empty, Empty -> true + | Node a, Node b -> A.equal (equal_tree eq) a b + | Leaf a, Leaf b -> A.equal eq a b + | (Empty | Leaf _ | Node _), _ -> false + +let equal eq (a : _ t) (b : _ t) : bool = + a.size = b.size && A.equal eq a.tail b.tail && equal_tree eq a.t b.t + let add_list v l = List.fold_left push v l let of_list l = add_list empty l let to_list m = fold_rev (fun acc x -> x :: acc) [] m diff --git a/src/pvec/containers_pvec.mli b/src/pvec/containers_pvec.mli index 6c40b497..68773a2d 100644 --- a/src/pvec/containers_pvec.mli +++ b/src/pvec/containers_pvec.mli @@ -61,6 +61,7 @@ val drop_last : 'a t -> 'a t (** Like {!pop_opt} but doesn't return the last element. Returns the same vector if it's empty. *) +val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool val iter : ('a -> unit) -> 'a t -> unit val iter_rev : ('a -> unit) -> 'a t -> unit diff --git a/tests/pvec/t_pvec.ml b/tests/pvec/t_pvec.ml index babdee8f..e58130aa 100644 --- a/tests/pvec/t_pvec.ml +++ b/tests/pvec/t_pvec.ml @@ -71,7 +71,43 @@ q _listuniq (fun l -> ;; t @@ fun () -> choose empty = None;; -t @@ fun () -> choose (of_list [ 1, 1; 2, 2 ]) <> None +t @@ fun () -> choose (of_list [ 1, 1; 2, 2 ]) <> None;; + +q + Q.(pair (small_list int) (small_list int)) + (fun (l1, l2) -> equal CCInt.equal (of_list l1) (of_list l2) = (l1 = l2)) +;; + +q Q.(small_list int) (fun l1 -> equal CCInt.equal (of_list l1) (of_list l1)) + +let arb_list_with_idx = + let open Q in + let shrink (l, i) = + Iter.(Shrink.(list ~shrink:int l) >|= fun l -> l, min i (List.length l - 1)) + in + let gen = + Gen.( + let* l = small_list int in + let+ i = + if l = [] then + return 0 + else + 0 -- (List.length l - 1) + in + l, i) + in + make ~shrink ~print:Print.(pair (list int) int) gen +;; + +q arb_list_with_idx (fun (l1, i) -> + if l1 <> [] then ( + let l2 = + let x = List.nth l1 i in + CCList.set_at_idx i (x + 1) l1 + in + not (equal CCInt.equal (of_list l1) (of_list l2)) + ) else + true) module Ref_impl = struct type +'a t = 'a list