Merge pull request #9 from Drup/master

random shuffle an sampling (credit @drup)
This commit is contained in:
Simon Cruanes 2016-01-17 14:54:03 +01:00
commit 8f5bd27b71
3 changed files with 119 additions and 0 deletions

View file

@ -70,6 +70,16 @@ let mapi f seq k =
let i = ref 0 in
seq (fun x -> k (f !i x); incr i)
let map_by_2 f seq k =
let r = ref None in
let f y = match !r with
| None -> r := Some y
| Some x -> k (f x y)
in
seq f ;
match !r with
| None -> () | Some x -> k x
let filter p seq k = seq (fun x -> if p x then k x)
let append s1 s2 k = s1 k; s2 k
@ -688,6 +698,61 @@ let random_array a k =
let random_list l = random_array (Array.of_list l)
(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *)
let shuffle_array a =
for k = Array.length a - 1 downto 0+1 do
let l = Random.int (k+1) in
let tmp = a.(l) in
a.(l) <- a.(k);
a.(k) <- tmp;
done
let shuffle seq =
let a = to_array seq in
shuffle_array a ;
of_array a
let shuffle_buffer n seq k =
let seq_front = take n seq in
let a = to_array seq_front in
let l = Array.length a in
if l < n then begin
shuffle_array a ;
of_array a k
end
else begin
let seq = drop n seq in
let f x =
let i = Random.int n in
let y = a.(i) in
a.(i) <- x ;
k y
in
seq f
end
(** {2 Sampling} *)
(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *)
let sample n seq =
match head seq with
| None -> [||]
| Some x ->
let a = Array.make n x in
let i = ref (-1) in
let f x =
incr i ;
if !i < n then
a.(!i) <- x
else
let j = Random.int n in
if j <= n then a.(!i) <- x
else ()
in
seq f ;
if !i < n then Array.sub a 0 !i
else a
(** {2 Infix functions} *)
module Infix = struct

View file

@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t
val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t
(** Map objects, along with their index in the sequence *)
val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t
(** Map objects two by two. lazily.
The last element is kept in the sequence if the count is odd.
@since NEXT_RELEASE *)
val for_all : ('a -> bool) -> 'a t -> bool
(** Do all elements satisfy the predicate? *)
@ -493,6 +498,28 @@ val random_list : 'a list -> 'a t
(** Infinite sequence of random elements of the list. Basically the
same as {!random_array}. *)
val shuffle : 'a t -> 'a t
(** [shuffle seq] returns a perfect shuffle of [seq].
Uses O(length seq) memory and time. Eager.
@since NEXT_RELEASE *)
val shuffle_buffer : int -> 'a t -> 'a t
(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random
order. The shuffling is *not* uniform. Uses O(n) memory.
The first [n] elements of the sequence are consumed immediately. The
rest is consumed lazily.
@since NEXT_RELEASE *)
(** {2 Sampling} *)
val sample : int -> 'a t -> 'a array
(** [sample n seq] returns k samples of [seq], with uniform probability.
It will consume the sequence and use O(n) memory.
It returns an array of size [min (length seq) n].
@since NEXT_RELEASE *)
(** {2 Infix functions} *)
module Infix : sig

View file

@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t
val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t
(** Map objects, along with their index in the sequence *)
val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t
(** Map objects two by two. lazily.
The last element is kept in the sequence if the count is odd.
@since NEXT_RELEASE *)
val for_all : f:('a -> bool) -> 'a t -> bool
(** Do all elements satisfy the predicate? *)
@ -441,6 +446,28 @@ val random_list : 'a list -> 'a t
(** Infinite sequence of random elements of the list. Basically the
same as {!random_array}. *)
val shuffle : 'a t -> 'a t
(** [shuffle seq] returns a perfect shuffle of [seq].
Uses O(length seq) memory and time. Eager.
@since NEXT_RELEASE *)
val shuffle_buffer : n:int -> 'a t -> 'a t
(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random
order. The shuffling is not uniform. Uses O(n) memory.
The first [n] elements of the sequence are consumed immediately. The
rest is consumed lazily.
@since NEXT_RELEASE *)
(** {2 Sampling} *)
val sample : n:int -> 'a t -> 'a array
(** [sample n seq] returns k samples of [seq], with uniform probability.
It will consume the sequence and use O(n) memory.
It returns an array of size [min (length seq) n].
@since NEXT_RELEASE *)
(** {2 Infix functions} *)
module Infix : sig