added Sequence.persistent function (copy sequence in memory);

implementation is done by unrolled linked list (also used for Sequence.rev and Sequence.to_array);
should be quite efficient in time and memory
This commit is contained in:
Simon Cruanes 2013-02-25 02:18:40 +01:00
parent d64691f1b3
commit df7ef45ad5
2 changed files with 108 additions and 24 deletions

View file

@ -23,7 +23,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*) *)
(** {2 Transient iterators, that abstract on a finite sequence of elements. *) (** {1 Transient iterators, that abstract on a finite sequence of elements. *)
(** Sequence abstract iterator type *) (** Sequence abstract iterator type *)
type 'a t = ('a -> unit) -> unit type 'a t = ('a -> unit) -> unit
@ -117,8 +117,96 @@ let intersperse seq elem =
from_iter from_iter
(fun k -> seq (fun x -> k x; k elem)) (fun k -> seq (fun x -> k x; k elem))
(** Mutable unrolled list to serve as intermediate storage *)
module MList = struct
type 'a t = {
content : 'a array; (* elements of the node *)
mutable len : int; (* number of elements in content *)
mutable tl : 'a t; (* tail *)
} (** A list that contains some elements, and may point to another list *)
let _empty () : 'a t = Obj.magic 0
(** Empty list, for the tl field *)
let make n =
assert (n > 0);
{ content = Array.make n (Obj.magic 0);
len = 0;
tl = _empty ();
}
let rec is_empty l =
l.len = 0 && (l.tl == _empty () || is_empty l.tl)
let rec iter f l =
for i = 0 to l.len - 1 do f l.content.(i); done;
if l.tl != _empty () then iter f l.tl
let iteri f l =
let rec iteri i f l =
for j = 0 to l.len - 1 do f (i+j) l.content.(j); done;
if l.tl != _empty () then iteri (i+l.len) f l.tl
in iteri 0 f l
let length l =
let rec len acc l =
if l.tl == _empty () then acc+l.len else len (acc+l.len) l.tl
in len 0 l
(** Get element by index *)
let rec get l i =
if i < l.len then l.content.(i)
else if i >= l.len && l.tl == _empty () then raise (Invalid_argument "MList.get")
else get l.tl (i - l.len)
(** Push [x] at the end of the list. It returns the block in which the
element is inserted. *)
let rec push x l =
if l.len = Array.length l.content
then begin (* insert in the next block *)
(if l.tl == _empty () then l.tl <- make (Array.length l.content));
push x l.tl
end else begin (* insert in l *)
l.content.(l.len) <- x;
l.len <- l.len + 1;
l
end
(** Reverse list (in place), and returns the new head *)
let rev l =
let rec rev prev l =
(* reverse array *)
for i = 0 to (l.len-1) / 2 do
let x = l.content.(i) in
l.content.(i) <- l.content.(l.len - i - 1);
l.content.(l.len - i - 1) <- x;
done;
(* reverse next block *)
let l' = l.tl in
l.tl <- prev;
if l' == _empty () then l else rev l l'
in
rev (_empty ()) l
(** Build a MList of elements of the Seq. The optional argument indicates
the size of the blocks *)
let of_seq ?(size=64) seq =
(* read sequence into a MList.t *)
let start = make size in
let l = ref start in
seq (fun x -> l := push x !l);
start
end
(** Iterate on the sequence, storing elements in a data structure.
The resulting sequence can be iterated on as many times as needed. *)
let persistent (seq : 'a t) : 'a t =
let l = MList.of_seq seq in
from_iter (fun k -> MList.iter k l)
(** Cartesian product of the sequences. *) (** Cartesian product of the sequences. *)
let product outer inner = let product outer inner =
let outer = persistent outer in
from_iter from_iter
(fun k -> (fun k ->
outer (fun x -> outer (fun x ->
@ -163,16 +251,9 @@ let drop n seq =
(** Reverse the sequence. O(n) memory. *) (** Reverse the sequence. O(n) memory. *)
let rev seq = let rev seq =
fun k -> let l = MList.of_seq seq in
(* if we have traversed [s_1, ..., s_m], [cont ()] will call [k] on s_m, let l' = MList.rev l in
s_{m-1}, ..., s_1. Once we know [s_{m+1}], we update [cont] so that it from_iter (fun k -> MList.iter k l')
first returns it, and then called the previous cont. *)
let cont = ref (fun () -> ()) in
iter (fun x ->
let current_cont = !cont in
let cont' () = k x; current_cont () in
cont := cont') seq;
!cont ()
(** Do all elements satisfy the predicate? *) (** Do all elements satisfy the predicate? *)
let for_all p seq = let for_all p seq =
@ -207,17 +288,15 @@ let to_rev_list seq = fold (fun y x -> x :: y) [] seq
let of_list l = from_iter (fun k -> List.iter k l) let of_list l = from_iter (fun k -> List.iter k l)
let to_array seq = let to_array seq =
(* intermediate list... *) let l = MList.of_seq seq in
let l = to_rev_list seq in let n = MList.length l in
let a = Array.of_list l in if n = 0
(* reverse array *) then [||]
let n = Array.length a in else begin
for i = 0 to (n-1) / 2 do let a = Array.make n (MList.get l 0) in
let tmp = a.(i) in MList.iteri (fun i x -> a.(i) <- x) l;
a.(i) <- a.(n-i-1);
a.(n-i-1) <- tmp;
done;
a a
end
let of_array a = from_iter (fun k -> Array.iter k a) let of_array a = from_iter (fun k -> Array.iter k a)

View file

@ -105,9 +105,14 @@ val flatMap : ('a -> 'b t) -> 'a t -> 'b t
val intersperse : 'a t -> 'a -> 'a t val intersperse : 'a t -> 'a -> 'a t
(** Insert the second element between every element of the sequence *) (** Insert the second element between every element of the sequence *)
val persistent : 'a t -> 'a t
(** Iterate on the sequence, storing elements in a data structure.
The resulting sequence can be iterated on as many times as needed. *)
val product : 'a t -> 'b t -> ('a * 'b) t val product : 'a t -> 'b t -> ('a * 'b) t
(** Cartesian product of the sequences. The first one is outer (** Cartesian product of the sequences. The first one is transformed
and therefore must be traversable several times. *) by calling [persistent] on it, so that it can be traversed
several times (outer loop of the product) *)
val unfoldr : ('b -> ('a * 'b) option) -> 'b -> 'a t val unfoldr : ('b -> ('a * 'b) option) -> 'b -> 'a t
(** [unfoldr f b] will apply [f] to [b]. If it (** [unfoldr f b] will apply [f] to [b]. If it