API change for Sequence.persistent: no need to

provide a block size, the unrolled list manages it (doubling block size);
new implementation for persistent is more efficient.
This commit is contained in:
Simon Cruanes 2014-02-27 11:51:57 +01:00
parent a3ff8ee854
commit a5385ce649
3 changed files with 75 additions and 87 deletions

View file

@ -144,97 +144,94 @@ let intersperse elem seq =
(** Mutable unrolled list to serve as intermediate storage *) (** Mutable unrolled list to serve as intermediate storage *)
module MList = struct module MList = struct
type 'a t = { type 'a node =
content : 'a array; (* elements of the node *) | Nil
mutable len : int; (* number of elements in content *) | Cons of 'a array * int ref * 'a node ref
mutable tl : 'a t; (* tail *)
} (** A list that contains some elements, and may point to another list *)
let _empty () : 'a t = Obj.magic 0 let of_seq seq =
(** Empty list, for the tl field *) let start = ref Nil in
let chunk_size = ref 8 in
(* fill the list. prev: tail-reference from previous node *)
let prev, cur = ref start, ref Nil in
seq
(fun x -> match !cur with
| Nil ->
let n = !chunk_size in
if n < 4096 then chunk_size := 2 * !chunk_size;
cur := Cons (Array.make n x, ref 1, ref Nil)
| Cons (a,n,next) ->
assert (!n < Array.length a);
a.(!n) <- x;
incr n;
if !n = Array.length a then begin
!prev := !cur;
prev := next;
cur := Nil
end
);
!prev := !cur;
!start
let make n = let is_empty = function
assert (n > 0); | Nil -> true
{ content = Array.make n (Obj.magic 0); | Cons _ -> false
len = 0;
tl = _empty ();
}
let rec is_empty l = let rec iter f l = match l with
l.len = 0 && (l.tl == _empty () || is_empty l.tl) | Nil -> ()
| Cons (a, n, tl) ->
let rec iter f l = for i=0 to !n - 1 do f a.(i) done;
for i = 0 to l.len - 1 do f l.content.(i); done; iter f !tl
if l.tl != _empty () then iter f l.tl
let iteri f l = let iteri f l =
let rec iteri i f l = let rec iteri i f l = match l with
for j = 0 to l.len - 1 do f (i+j) l.content.(j); done; | Nil -> ()
if l.tl != _empty () then iteri (i+l.len) f l.tl | Cons (a, n, tl) ->
for j=0 to !n - 1 do f (i+j) a.(j) done;
iteri (i+ !n) f !tl
in iteri 0 f l in iteri 0 f l
let rec iter_rev f l = let rec iter_rev f l = match l with
(if l.tl != _empty () then iter_rev f l.tl); | Nil -> ()
for i = l.len - 1 downto 0 do f l.content.(i); done | Cons (a, n, tl) ->
iter_rev f !tl;
for i = !n-1 downto 0 do f a.(i) done
let length l = let length l =
let rec len acc l = let rec len acc l = match l with
if l.tl == _empty () then acc+l.len else len (acc+l.len) l.tl | Nil -> acc
| Cons (_, n, tl) -> len (acc+ !n) !tl
in len 0 l in len 0 l
(** Get element by index *) (** Get element by index *)
let rec get l i = let rec get l i = match l with
if i < l.len then l.content.(i) | Nil -> raise (Invalid_argument "MList.get")
else if i >= l.len && l.tl == _empty () then raise (Invalid_argument "MList.get") | Cons (a, n, _) when i < !n -> a.(i)
else get l.tl (i - l.len) | Cons (_, n, tl) -> get !tl (i- !n)
(** Push [x] at the end of the list. It returns the block in which the let to_seq l k = iter k l
element is inserted. *)
let rec push x l =
if l.len = Array.length l.content
then begin (* insert in the next block *)
(if l.tl == _empty () then
let n = Array.length l.content in
l.tl <- make (n + n lsr 1));
push x l.tl
end else begin (* insert in l *)
l.content.(l.len) <- x;
l.len <- l.len + 1;
l
end
(** Reverse list (in place), and returns the new head *) let to_stream l =
let rev l = let cur = ref l in
let rec rev prev l = let i = ref 0 in (* offset in cons *)
(* reverse array *) let rec get_next _ = match !cur with
for i = 0 to (l.len-1) / 2 do | Nil -> None
let x = l.content.(i) in | Cons (_, n, tl) when !i = !n ->
l.content.(i) <- l.content.(l.len - i - 1); cur := !tl;
l.content.(l.len - i - 1) <- x; i := 0;
done; get_next 42 (* any value would do *)
(* reverse next block *) | Cons (a, n, _) ->
let l' = l.tl in let x = a.(!i) in
l.tl <- prev; incr i;
if l' == _empty () then l else rev l l' Some x
in in
rev (_empty ()) l Stream.from get_next
(** Build a MList of elements of the Seq. The optional argument indicates
the size of the blocks *)
let of_seq ?(size=8) seq =
(* read sequence into a MList.t *)
let start = make size in
let l = ref start in
seq (fun x -> l := push x !l);
start
end end
(** Iterate on the sequence, storing elements in a data structure. (** Iterate on the sequence, storing elements in a data structure.
The resulting sequence can be iterated on as many times as needed. *) The resulting sequence can be iterated on as many times as needed. *)
let persistent ?(blocksize=64) seq = let persistent seq =
if blocksize < 2 then failwith "Sequence.persistent: blocksize too small"; let l = MList.of_seq seq in
let l = MList.of_seq ~size:blocksize seq in MList.to_seq l
from_iter (fun k -> MList.iter k l)
(** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *)
let sort ?(cmp=Pervasives.compare) seq = let sort ?(cmp=Pervasives.compare) seq =
@ -337,7 +334,7 @@ let take n seq =
incr count; incr count;
k x; k x;
if !count = n then raise ExitSequence) if !count = n then raise ExitSequence)
with ExitSequence -> () with ExitSequence -> ()
(** Drop the [n] first elements of the sequence *) (** Drop the [n] first elements of the sequence *)
let drop n seq = let drop n seq =
@ -470,14 +467,8 @@ let of_stream s =
(** Convert to a stream. The sequence is made persistent. *) (** Convert to a stream. The sequence is made persistent. *)
let to_stream seq = let to_stream seq =
let l = ref (MList.of_seq seq) in let l = MList.of_seq seq in
let i = ref 0 in MList.to_stream l
let rec get_next () =
if !l == MList._empty () then None
else if (!l).MList.len = !i then (l := (!l).MList.tl; i := 0; get_next ())
else let x = (!l).MList.content.(!i) in (incr i; Some x)
in
Stream.from (fun _ -> get_next ())
(** Push elements of the sequence on the stack *) (** Push elements of the sequence on the stack *)
let to_stack s seq = iter (fun x -> Stack.push x s) seq let to_stack s seq = iter (fun x -> Stack.push x s) seq

View file

@ -151,15 +151,11 @@ val fmap : ('a -> 'b option) -> 'a t -> 'b t
val intersperse : 'a -> 'a t -> 'a t val intersperse : 'a -> 'a t -> 'a t
(** Insert the single element between every element of the sequence *) (** Insert the single element between every element of the sequence *)
val persistent : ?blocksize:int -> 'a t -> 'a t val persistent : 'a t -> 'a t
(** Iterate on the sequence, storing elements in a data structure. (** Iterate on the sequence, storing elements in a data structure.
The resulting sequence can be iterated on as many times as needed. The resulting sequence can be iterated on as many times as needed.
{b Note}: calling persistent on an already persistent sequence {b Note}: calling persistent on an already persistent sequence
will still make a new copy of the sequence! will still make a new copy of the sequence! *)
@param blocksize the size of chunks in the unrolled list
used to store elements. Use bigger values for bigger sequences.
Default: 64 *)
val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t
(** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time.

View file

@ -93,6 +93,7 @@ let test_persistent () =
OUnit.assert_equal ~printer [] (seq |> S.to_list); OUnit.assert_equal ~printer [] (seq |> S.to_list);
OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list); OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list);
OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list); OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list);
OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_stream |> S.of_stream |> S.to_list);
() ()
let test_big_persistent () = let test_big_persistent () =