diff --git a/sequence.ml b/sequence.ml index e21a99a..d0cf59e 100644 --- a/sequence.ml +++ b/sequence.ml @@ -144,97 +144,94 @@ let intersperse elem seq = (** Mutable unrolled list to serve as intermediate storage *) module MList = struct - type 'a t = { - content : 'a array; (* elements of the node *) - mutable len : int; (* number of elements in content *) - mutable tl : 'a t; (* tail *) - } (** A list that contains some elements, and may point to another list *) + type 'a node = + | Nil + | Cons of 'a array * int ref * 'a node ref - let _empty () : 'a t = Obj.magic 0 - (** Empty list, for the tl field *) + let of_seq seq = + let start = ref Nil in + let chunk_size = ref 8 in + (* fill the list. prev: tail-reference from previous node *) + let prev, cur = ref start, ref Nil in + seq + (fun x -> match !cur with + | Nil -> + let n = !chunk_size in + if n < 4096 then chunk_size := 2 * !chunk_size; + cur := Cons (Array.make n x, ref 1, ref Nil) + | Cons (a,n,next) -> + assert (!n < Array.length a); + a.(!n) <- x; + incr n; + if !n = Array.length a then begin + !prev := !cur; + prev := next; + cur := Nil + end + ); + !prev := !cur; + !start - let make n = - assert (n > 0); - { content = Array.make n (Obj.magic 0); - len = 0; - tl = _empty (); - } + let is_empty = function + | Nil -> true + | Cons _ -> false - let rec is_empty l = - l.len = 0 && (l.tl == _empty () || is_empty l.tl) - - let rec iter f l = - for i = 0 to l.len - 1 do f l.content.(i); done; - if l.tl != _empty () then iter f l.tl + let rec iter f l = match l with + | Nil -> () + | Cons (a, n, tl) -> + for i=0 to !n - 1 do f a.(i) done; + iter f !tl let iteri f l = - let rec iteri i f l = - for j = 0 to l.len - 1 do f (i+j) l.content.(j); done; - if l.tl != _empty () then iteri (i+l.len) f l.tl + let rec iteri i f l = match l with + | Nil -> () + | Cons (a, n, tl) -> + for j=0 to !n - 1 do f (i+j) a.(j) done; + iteri (i+ !n) f !tl in iteri 0 f l - let rec iter_rev f l = - (if l.tl != _empty () then iter_rev f l.tl); - for i = l.len - 1 downto 0 do f l.content.(i); done + let rec iter_rev f l = match l with + | Nil -> () + | Cons (a, n, tl) -> + iter_rev f !tl; + for i = !n-1 downto 0 do f a.(i) done let length l = - let rec len acc l = - if l.tl == _empty () then acc+l.len else len (acc+l.len) l.tl + let rec len acc l = match l with + | Nil -> acc + | Cons (_, n, tl) -> len (acc+ !n) !tl in len 0 l (** Get element by index *) - let rec get l i = - if i < l.len then l.content.(i) - else if i >= l.len && l.tl == _empty () then raise (Invalid_argument "MList.get") - else get l.tl (i - l.len) + let rec get l i = match l with + | Nil -> raise (Invalid_argument "MList.get") + | Cons (a, n, _) when i < !n -> a.(i) + | Cons (_, n, tl) -> get !tl (i- !n) - (** Push [x] at the end of the list. It returns the block in which the - element is inserted. *) - let rec push x l = - if l.len = Array.length l.content - then begin (* insert in the next block *) - (if l.tl == _empty () then - let n = Array.length l.content in - l.tl <- make (n + n lsr 1)); - push x l.tl - end else begin (* insert in l *) - l.content.(l.len) <- x; - l.len <- l.len + 1; - l - end + let to_seq l k = iter k l - (** Reverse list (in place), and returns the new head *) - let rev l = - let rec rev prev l = - (* reverse array *) - for i = 0 to (l.len-1) / 2 do - let x = l.content.(i) in - l.content.(i) <- l.content.(l.len - i - 1); - l.content.(l.len - i - 1) <- x; - done; - (* reverse next block *) - let l' = l.tl in - l.tl <- prev; - if l' == _empty () then l else rev l l' + let to_stream l = + let cur = ref l in + let i = ref 0 in (* offset in cons *) + let rec get_next _ = match !cur with + | Nil -> None + | Cons (_, n, tl) when !i = !n -> + cur := !tl; + i := 0; + get_next 42 (* any value would do *) + | Cons (a, n, _) -> + let x = a.(!i) in + incr i; + Some x in - rev (_empty ()) l - - (** Build a MList of elements of the Seq. The optional argument indicates - the size of the blocks *) - let of_seq ?(size=8) seq = - (* read sequence into a MList.t *) - let start = make size in - let l = ref start in - seq (fun x -> l := push x !l); - start + Stream.from get_next end (** Iterate on the sequence, storing elements in a data structure. The resulting sequence can be iterated on as many times as needed. *) -let persistent ?(blocksize=64) seq = - if blocksize < 2 then failwith "Sequence.persistent: blocksize too small"; - let l = MList.of_seq ~size:blocksize seq in - from_iter (fun k -> MList.iter k l) +let persistent seq = + let l = MList.of_seq seq in + MList.to_seq l (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. *) let sort ?(cmp=Pervasives.compare) seq = @@ -337,7 +334,7 @@ let take n seq = incr count; k x; if !count = n then raise ExitSequence) - with ExitSequence -> () + with ExitSequence -> () (** Drop the [n] first elements of the sequence *) let drop n seq = @@ -470,14 +467,8 @@ let of_stream s = (** Convert to a stream. The sequence is made persistent. *) let to_stream seq = - let l = ref (MList.of_seq seq) in - let i = ref 0 in - let rec get_next () = - if !l == MList._empty () then None - else if (!l).MList.len = !i then (l := (!l).MList.tl; i := 0; get_next ()) - else let x = (!l).MList.content.(!i) in (incr i; Some x) - in - Stream.from (fun _ -> get_next ()) + let l = MList.of_seq seq in + MList.to_stream l (** Push elements of the sequence on the stack *) let to_stack s seq = iter (fun x -> Stack.push x s) seq diff --git a/sequence.mli b/sequence.mli index b09a0de..697a5d3 100644 --- a/sequence.mli +++ b/sequence.mli @@ -151,15 +151,11 @@ val fmap : ('a -> 'b option) -> 'a t -> 'b t val intersperse : 'a -> 'a t -> 'a t (** Insert the single element between every element of the sequence *) -val persistent : ?blocksize:int -> 'a t -> 'a t +val persistent : 'a t -> 'a t (** Iterate on the sequence, storing elements in a data structure. The resulting sequence can be iterated on as many times as needed. {b Note}: calling persistent on an already persistent sequence - will still make a new copy of the sequence! - - @param blocksize the size of chunks in the unrolled list - used to store elements. Use bigger values for bigger sequences. - Default: 64 *) + will still make a new copy of the sequence! *) val sort : ?cmp:('a -> 'a -> int) -> 'a t -> 'a t (** Sort the sequence. Eager, O(n) ram and O(n ln(n)) time. diff --git a/tests/test_sequence.ml b/tests/test_sequence.ml index 0b71ecf..e5f6a3f 100644 --- a/tests/test_sequence.ml +++ b/tests/test_sequence.ml @@ -93,6 +93,7 @@ let test_persistent () = OUnit.assert_equal ~printer [] (seq |> S.to_list); OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list); OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_list); + OUnit.assert_equal ~printer [0;1;2;3;4] (seq' |> S.to_stream |> S.of_stream |> S.to_list); () let test_big_persistent () =