mirror of
https://github.com/c-cube/iter.git
synced 2025-12-06 11:15:32 -05:00
Merge pull request #9 from Drup/master
random shuffle an sampling (credit @drup)
This commit is contained in:
commit
8f5bd27b71
3 changed files with 119 additions and 0 deletions
65
sequence.ml
65
sequence.ml
|
|
@ -70,6 +70,16 @@ let mapi f seq k =
|
|||
let i = ref 0 in
|
||||
seq (fun x -> k (f !i x); incr i)
|
||||
|
||||
let map_by_2 f seq k =
|
||||
let r = ref None in
|
||||
let f y = match !r with
|
||||
| None -> r := Some y
|
||||
| Some x -> k (f x y)
|
||||
in
|
||||
seq f ;
|
||||
match !r with
|
||||
| None -> () | Some x -> k x
|
||||
|
||||
let filter p seq k = seq (fun x -> if p x then k x)
|
||||
|
||||
let append s1 s2 k = s1 k; s2 k
|
||||
|
|
@ -688,6 +698,61 @@ let random_array a k =
|
|||
|
||||
let random_list l = random_array (Array.of_list l)
|
||||
|
||||
(* See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *)
|
||||
let shuffle_array a =
|
||||
for k = Array.length a - 1 downto 0+1 do
|
||||
let l = Random.int (k+1) in
|
||||
let tmp = a.(l) in
|
||||
a.(l) <- a.(k);
|
||||
a.(k) <- tmp;
|
||||
done
|
||||
|
||||
let shuffle seq =
|
||||
let a = to_array seq in
|
||||
shuffle_array a ;
|
||||
of_array a
|
||||
|
||||
let shuffle_buffer n seq k =
|
||||
let seq_front = take n seq in
|
||||
let a = to_array seq_front in
|
||||
let l = Array.length a in
|
||||
if l < n then begin
|
||||
shuffle_array a ;
|
||||
of_array a k
|
||||
end
|
||||
else begin
|
||||
let seq = drop n seq in
|
||||
let f x =
|
||||
let i = Random.int n in
|
||||
let y = a.(i) in
|
||||
a.(i) <- x ;
|
||||
k y
|
||||
in
|
||||
seq f
|
||||
end
|
||||
|
||||
(** {2 Sampling} *)
|
||||
|
||||
(** See https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_R *)
|
||||
let sample n seq =
|
||||
match head seq with
|
||||
| None -> [||]
|
||||
| Some x ->
|
||||
let a = Array.make n x in
|
||||
let i = ref (-1) in
|
||||
let f x =
|
||||
incr i ;
|
||||
if !i < n then
|
||||
a.(!i) <- x
|
||||
else
|
||||
let j = Random.int n in
|
||||
if j <= n then a.(!i) <- x
|
||||
else ()
|
||||
in
|
||||
seq f ;
|
||||
if !i < n then Array.sub a 0 !i
|
||||
else a
|
||||
|
||||
(** {2 Infix functions} *)
|
||||
|
||||
module Infix = struct
|
||||
|
|
|
|||
27
sequence.mli
27
sequence.mli
|
|
@ -109,6 +109,11 @@ val map : ('a -> 'b) -> 'a t -> 'b t
|
|||
val mapi : (int -> 'a -> 'b) -> 'a t -> 'b t
|
||||
(** Map objects, along with their index in the sequence *)
|
||||
|
||||
val map_by_2 : ('a -> 'a -> 'a) -> 'a t -> 'a t
|
||||
(** Map objects two by two. lazily.
|
||||
The last element is kept in the sequence if the count is odd.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val for_all : ('a -> bool) -> 'a t -> bool
|
||||
(** Do all elements satisfy the predicate? *)
|
||||
|
||||
|
|
@ -493,6 +498,28 @@ val random_list : 'a list -> 'a t
|
|||
(** Infinite sequence of random elements of the list. Basically the
|
||||
same as {!random_array}. *)
|
||||
|
||||
val shuffle : 'a t -> 'a t
|
||||
(** [shuffle seq] returns a perfect shuffle of [seq].
|
||||
Uses O(length seq) memory and time. Eager.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val shuffle_buffer : int -> 'a t -> 'a t
|
||||
(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random
|
||||
order. The shuffling is *not* uniform. Uses O(n) memory.
|
||||
|
||||
The first [n] elements of the sequence are consumed immediately. The
|
||||
rest is consumed lazily.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(** {2 Sampling} *)
|
||||
|
||||
val sample : int -> 'a t -> 'a array
|
||||
(** [sample n seq] returns k samples of [seq], with uniform probability.
|
||||
It will consume the sequence and use O(n) memory.
|
||||
|
||||
It returns an array of size [min (length seq) n].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(** {2 Infix functions} *)
|
||||
|
||||
module Infix : sig
|
||||
|
|
|
|||
|
|
@ -87,6 +87,11 @@ val map : f:('a -> 'b) -> 'a t -> 'b t
|
|||
val mapi : f:(int -> 'a -> 'b) -> 'a t -> 'b t
|
||||
(** Map objects, along with their index in the sequence *)
|
||||
|
||||
val map_by_2 : f:('a -> 'a -> 'a) -> 'a t -> 'a t
|
||||
(** Map objects two by two. lazily.
|
||||
The last element is kept in the sequence if the count is odd.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val for_all : f:('a -> bool) -> 'a t -> bool
|
||||
(** Do all elements satisfy the predicate? *)
|
||||
|
||||
|
|
@ -441,6 +446,28 @@ val random_list : 'a list -> 'a t
|
|||
(** Infinite sequence of random elements of the list. Basically the
|
||||
same as {!random_array}. *)
|
||||
|
||||
val shuffle : 'a t -> 'a t
|
||||
(** [shuffle seq] returns a perfect shuffle of [seq].
|
||||
Uses O(length seq) memory and time. Eager.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val shuffle_buffer : n:int -> 'a t -> 'a t
|
||||
(** [shuffle_buffer n seq] returns a sequence of element of [seq] in random
|
||||
order. The shuffling is not uniform. Uses O(n) memory.
|
||||
|
||||
The first [n] elements of the sequence are consumed immediately. The
|
||||
rest is consumed lazily.
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(** {2 Sampling} *)
|
||||
|
||||
val sample : n:int -> 'a t -> 'a array
|
||||
(** [sample n seq] returns k samples of [seq], with uniform probability.
|
||||
It will consume the sequence and use O(n) memory.
|
||||
|
||||
It returns an array of size [min (length seq) n].
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
(** {2 Infix functions} *)
|
||||
|
||||
module Infix : sig
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue