mirror of
https://github.com/c-cube/moonpool.git
synced 2025-12-08 04:05:41 -05:00
add Fork_join.{for_,map_reduce_commutative}
This commit is contained in:
parent
858755e812
commit
55f831bc8b
5 changed files with 222 additions and 23 deletions
2
Makefile
2
Makefile
|
|
@ -29,7 +29,7 @@ bench-fib:
|
||||||
'./_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -n $(N)'
|
'./_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -n $(N)'
|
||||||
|
|
||||||
PI_NSTEPS?=100_000_000
|
PI_NSTEPS?=100_000_000
|
||||||
PI_MODES?=seq,par1
|
PI_MODES?=seq,par1,forkjoin
|
||||||
bench-pi:
|
bench-pi:
|
||||||
@echo running for N=$(PI_NSTEPS)
|
@echo running for N=$(PI_NSTEPS)
|
||||||
dune build $(DUNE_OPTS_BENCH) benchs/pi.exe
|
dune build $(DUNE_OPTS_BENCH) benchs/pi.exe
|
||||||
|
|
|
||||||
45
benchs/pi.ml
45
benchs/pi.ml
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
open Moonpool
|
open Moonpool
|
||||||
|
|
||||||
|
let ( let@ ) = ( @@ )
|
||||||
let j = ref 0
|
let j = ref 0
|
||||||
let spf = Printf.sprintf
|
let spf = Printf.sprintf
|
||||||
|
|
||||||
|
|
@ -16,15 +17,15 @@ let run_sequential (num_steps : int) : float =
|
||||||
pi
|
pi
|
||||||
|
|
||||||
(** Create a pool *)
|
(** Create a pool *)
|
||||||
let mk_pool () =
|
let with_pool f =
|
||||||
if !j = 0 then
|
if !j = 0 then
|
||||||
Pool.create ~per_domain:1 ()
|
Pool.with_ ~per_domain:1 f
|
||||||
else
|
else
|
||||||
Pool.create ~min:!j ()
|
Pool.with_ ~min:!j f
|
||||||
|
|
||||||
(** Run in parallel using {!Fut.for_} *)
|
(** Run in parallel using {!Fut.for_} *)
|
||||||
let run_par1 (num_steps : int) : float =
|
let run_par1 (num_steps : int) : float =
|
||||||
let pool = mk_pool () in
|
let@ pool = with_pool () in
|
||||||
|
|
||||||
let num_tasks = Pool.size pool in
|
let num_tasks = Pool.size pool in
|
||||||
|
|
||||||
|
|
@ -51,12 +52,42 @@ let run_par1 (num_steps : int) : float =
|
||||||
pi
|
pi
|
||||||
|
|
||||||
[@@@ifge 5.0]
|
[@@@ifge 5.0]
|
||||||
|
|
||||||
|
let run_fork_join num_steps : float =
|
||||||
|
let@ pool = with_pool () in
|
||||||
|
|
||||||
|
let num_tasks = Pool.size pool in
|
||||||
|
|
||||||
|
let step = 1. /. float num_steps in
|
||||||
|
let global_sum = Lock.create 0. in
|
||||||
|
|
||||||
|
Pool.run_wait_block pool (fun () ->
|
||||||
|
Fork_join.for_
|
||||||
|
~chunk_size:(3 + (num_steps / num_tasks))
|
||||||
|
num_steps
|
||||||
|
(fun range ->
|
||||||
|
let sum = ref 0. in
|
||||||
|
range (fun i ->
|
||||||
|
let x = (float i +. 0.5) *. step in
|
||||||
|
sum := !sum +. (4. /. (1. +. (x *. x))));
|
||||||
|
|
||||||
|
let sum = !sum in
|
||||||
|
Lock.update global_sum (fun n -> n +. sum)));
|
||||||
|
|
||||||
|
let pi = step *. Lock.get global_sum in
|
||||||
|
pi
|
||||||
|
|
||||||
[@@@else_]
|
[@@@else_]
|
||||||
|
|
||||||
|
let run_fork_join _ =
|
||||||
|
failwith "fork join not available on this version of OCaml"
|
||||||
|
|
||||||
[@@@endif]
|
[@@@endif]
|
||||||
|
|
||||||
type mode =
|
type mode =
|
||||||
| Sequential
|
| Sequential
|
||||||
| Par1
|
| Par1
|
||||||
|
| Fork_join
|
||||||
|
|
||||||
let () =
|
let () =
|
||||||
let mode = ref Sequential in
|
let mode = ref Sequential in
|
||||||
|
|
@ -66,13 +97,16 @@ let () =
|
||||||
let set_mode = function
|
let set_mode = function
|
||||||
| "seq" -> mode := Sequential
|
| "seq" -> mode := Sequential
|
||||||
| "par1" -> mode := Par1
|
| "par1" -> mode := Par1
|
||||||
|
| "forkjoin" -> mode := Fork_join
|
||||||
| _s -> failwith (spf "unknown mode %S" _s)
|
| _s -> failwith (spf "unknown mode %S" _s)
|
||||||
in
|
in
|
||||||
|
|
||||||
let opts =
|
let opts =
|
||||||
[
|
[
|
||||||
"-n", Arg.Set_int n, " number of steps";
|
"-n", Arg.Set_int n, " number of steps";
|
||||||
"-mode", Arg.Symbol ([ "seq"; "par1" ], set_mode), " mode of execution";
|
( "-mode",
|
||||||
|
Arg.Symbol ([ "seq"; "par1"; "forkjoin" ], set_mode),
|
||||||
|
" mode of execution" );
|
||||||
"-j", Arg.Set_int j, " number of threads";
|
"-j", Arg.Set_int j, " number of threads";
|
||||||
"-t", Arg.Set time, " printing timing";
|
"-t", Arg.Set time, " printing timing";
|
||||||
]
|
]
|
||||||
|
|
@ -85,6 +119,7 @@ let () =
|
||||||
match !mode with
|
match !mode with
|
||||||
| Sequential -> run_sequential !n
|
| Sequential -> run_sequential !n
|
||||||
| Par1 -> run_par1 !n
|
| Par1 -> run_par1 !n
|
||||||
|
| Fork_join -> run_fork_join !n
|
||||||
in
|
in
|
||||||
let elapsed : float = Unix.gettimeofday () -. t_start in
|
let elapsed : float = Unix.gettimeofday () -. t_start in
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@
|
||||||
|
|
||||||
module A = Atomic_
|
module A = Atomic_
|
||||||
|
|
||||||
|
type 'a iter = ('a -> unit) -> unit
|
||||||
|
|
||||||
module State_ = struct
|
module State_ = struct
|
||||||
type 'a single_res =
|
type 'a single_res =
|
||||||
| St_none
|
| St_none
|
||||||
|
|
@ -91,22 +93,34 @@ let both f g : _ * _ =
|
||||||
|
|
||||||
let both_ignore f g = ignore (both f g : _ * _)
|
let both_ignore f g = ignore (both f g : _ * _)
|
||||||
|
|
||||||
let all_list fs : _ list =
|
let for_ ?chunk_size n (f : int iter -> unit) : unit =
|
||||||
let len = List.length fs in
|
|
||||||
let arr = Array.make len None in
|
|
||||||
let has_failed = A.make false in
|
let has_failed = A.make false in
|
||||||
let missing = A.make len in
|
let missing = A.make n in
|
||||||
|
|
||||||
|
let chunk_size =
|
||||||
|
match chunk_size with
|
||||||
|
| Some cs -> max 1 (min n cs)
|
||||||
|
| None ->
|
||||||
|
(* guess: try to have roughly one task per core *)
|
||||||
|
max 1 (n / Domain_.recommended_number ())
|
||||||
|
in
|
||||||
|
|
||||||
let start_tasks ~run (suspension : Suspend_.suspension) =
|
let start_tasks ~run (suspension : Suspend_.suspension) =
|
||||||
let task_for i f =
|
let task_for ~offset ~len_range =
|
||||||
try
|
(* range to process within this task *)
|
||||||
let x = f () in
|
let range : int iter =
|
||||||
arr.(i) <- Some x;
|
fun yield ->
|
||||||
|
for j = offset to offset + len_range - 1 do
|
||||||
|
yield j
|
||||||
|
done
|
||||||
|
in
|
||||||
|
|
||||||
if A.fetch_and_add missing (-1) = 1 then
|
match f range with
|
||||||
|
| () ->
|
||||||
|
if A.fetch_and_add missing (-len_range) = len_range then
|
||||||
(* all tasks done successfully *)
|
(* all tasks done successfully *)
|
||||||
suspension (Ok ())
|
suspension (Ok ())
|
||||||
with exn ->
|
| exception exn ->
|
||||||
let bt = Printexc.get_raw_backtrace () in
|
let bt = Printexc.get_raw_backtrace () in
|
||||||
if not (A.exchange has_failed true) then
|
if not (A.exchange has_failed true) then
|
||||||
(* first one to fail, and [missing] must be >= 2
|
(* first one to fail, and [missing] must be >= 2
|
||||||
|
|
@ -114,23 +128,79 @@ let all_list fs : _ list =
|
||||||
suspension (Error (exn, bt))
|
suspension (Error (exn, bt))
|
||||||
in
|
in
|
||||||
|
|
||||||
List.iteri (fun i f -> run ~with_handler:true (fun () -> task_for i f)) fs
|
let i = ref 0 in
|
||||||
|
while !i < n do
|
||||||
|
let offset = !i in
|
||||||
|
|
||||||
|
let len_range = min chunk_size (n - offset) in
|
||||||
|
assert (offset + len_range <= n);
|
||||||
|
|
||||||
|
run ~with_handler:true (fun () -> task_for ~offset ~len_range);
|
||||||
|
i := !i + len_range
|
||||||
|
done
|
||||||
in
|
in
|
||||||
|
|
||||||
Suspend_.suspend
|
Suspend_.suspend
|
||||||
{
|
{
|
||||||
Suspend_.handle =
|
Suspend_.handle =
|
||||||
(fun ~run suspension ->
|
(fun ~run suspension ->
|
||||||
(* nothing else is started, no race condition possible *)
|
(* run tasks, then we'll resume [suspension] *)
|
||||||
start_tasks ~run suspension);
|
start_tasks ~run suspension);
|
||||||
};
|
};
|
||||||
|
()
|
||||||
|
|
||||||
|
let all_array ?chunk_size (fs : _ array) : _ array =
|
||||||
|
let len = Array.length fs in
|
||||||
|
let arr = Array.make len None in
|
||||||
|
|
||||||
|
(* parallel for *)
|
||||||
|
for_ ?chunk_size len (fun range ->
|
||||||
|
range (fun i ->
|
||||||
|
let x = fs.(i) () in
|
||||||
|
arr.(i) <- Some x));
|
||||||
|
|
||||||
(* get all results *)
|
(* get all results *)
|
||||||
List.init len (fun i ->
|
Array.map
|
||||||
|
(function
|
||||||
|
| None -> assert false
|
||||||
|
| Some x -> x)
|
||||||
|
arr
|
||||||
|
|
||||||
|
let all_list ?chunk_size fs : _ list =
|
||||||
|
Array.to_list @@ all_array ?chunk_size @@ Array.of_list fs
|
||||||
|
|
||||||
|
let all_init ?chunk_size n f : _ list =
|
||||||
|
let arr = Array.make n None in
|
||||||
|
|
||||||
|
for_ ?chunk_size n (fun range ->
|
||||||
|
range (fun i ->
|
||||||
|
let x = f i in
|
||||||
|
arr.(i) <- Some x));
|
||||||
|
|
||||||
|
(* get all results *)
|
||||||
|
List.init n (fun i ->
|
||||||
match arr.(i) with
|
match arr.(i) with
|
||||||
| None -> assert false
|
| None -> assert false
|
||||||
| Some x -> x)
|
| Some x -> x)
|
||||||
|
|
||||||
let all_init n f = all_list @@ List.init n (fun i () -> f i)
|
type 'a commutative_monoid = {
|
||||||
|
neutral: unit -> 'a; (** Neutral element *)
|
||||||
|
combine: 'a -> 'a -> 'a; (** Combine two items. *)
|
||||||
|
}
|
||||||
|
|
||||||
|
let map_reduce_commutative ?chunk_size ~gen ~map
|
||||||
|
~(reduce : 'b commutative_monoid) n : 'b =
|
||||||
|
let res = Lock.create (reduce.neutral ()) in
|
||||||
|
|
||||||
|
for_ ?chunk_size n (fun range ->
|
||||||
|
let local_acc = ref (reduce.neutral ()) in
|
||||||
|
range (fun i ->
|
||||||
|
let x = gen i in
|
||||||
|
let y = map x in
|
||||||
|
|
||||||
|
local_acc := reduce.combine !local_acc y);
|
||||||
|
|
||||||
|
Lock.update res (fun res -> reduce.combine res !local_acc));
|
||||||
|
Lock.get res
|
||||||
|
|
||||||
[@@@endif]
|
[@@@endif]
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,10 @@
|
||||||
|
|
||||||
[@@@ifge 5.0]
|
[@@@ifge 5.0]
|
||||||
|
|
||||||
|
type 'a iter = ('a -> unit) -> unit
|
||||||
|
(** Iterators of type ['a].
|
||||||
|
@since NEXT_RELEASE *)
|
||||||
|
|
||||||
val both : (unit -> 'a) -> (unit -> 'b) -> 'a * 'b
|
val both : (unit -> 'a) -> (unit -> 'b) -> 'a * 'b
|
||||||
(** [both f g] runs [f()] and [g()], potentially in parallel,
|
(** [both f g] runs [f()] and [g()], potentially in parallel,
|
||||||
and returns their result when both are done.
|
and returns their result when both are done.
|
||||||
|
|
@ -23,16 +27,95 @@ val both_ignore : (unit -> _) -> (unit -> _) -> unit
|
||||||
@since 0.3
|
@since 0.3
|
||||||
{b NOTE} this is only available on OCaml 5. *)
|
{b NOTE} this is only available on OCaml 5. *)
|
||||||
|
|
||||||
val all_list : (unit -> 'a) list -> 'a list
|
val for_ : ?chunk_size:int -> int -> (int iter -> unit) -> unit
|
||||||
|
(** [for_ n f] is the parallel version of [for i=0 to n-1 do f i done].
|
||||||
|
|
||||||
|
[f] is called with a [range] parameter, which is an iterator on indices
|
||||||
|
[f] should process.
|
||||||
|
|
||||||
|
@param chunk_size controls the granularity of parallelism.
|
||||||
|
The default chunk size is not specified.
|
||||||
|
See {!all_array} or {!all_list} for more details.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
{[
|
||||||
|
let total_sum = Atomic.make 0
|
||||||
|
|
||||||
|
let() = for_ ~chunk_size:5 100
|
||||||
|
(fun range ->
|
||||||
|
(* iterate on the range sequentially. The range should have 5 items or less. *)
|
||||||
|
let local_sum = ref 0 in
|
||||||
|
range
|
||||||
|
(fun i -> local_sum := !local_sum + n);
|
||||||
|
ignore (Atomic.fetch_and_add total_sum !local_sum : int)))
|
||||||
|
|
||||||
|
let() = assert (Atomic.get total_sum = 4950)
|
||||||
|
]}
|
||||||
|
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
{b NOTE} this is only available on OCaml 5. *)
|
||||||
|
|
||||||
|
val all_array : ?chunk_size:int -> (unit -> 'a) array -> 'a array
|
||||||
|
(** [all_array fs] runs all functions in [fs] in tasks, and waits for
|
||||||
|
all the results.
|
||||||
|
|
||||||
|
@param chunk_size if equal to [n], groups items by [n] to be run in
|
||||||
|
a single task. Default is [1].
|
||||||
|
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
{b NOTE} this is only available on OCaml 5. *)
|
||||||
|
|
||||||
|
val all_list : ?chunk_size:int -> (unit -> 'a) list -> 'a list
|
||||||
(** [all_list fs] runs all functions in [fs] in tasks, and waits for
|
(** [all_list fs] runs all functions in [fs] in tasks, and waits for
|
||||||
all the results.
|
all the results.
|
||||||
|
|
||||||
|
@param chunk_size if equal to [n], groups items by [n] to be run in
|
||||||
|
a single task. Default is not specified.
|
||||||
|
This parameter is available since NEXT_RELEASE.
|
||||||
|
|
||||||
@since 0.3
|
@since 0.3
|
||||||
{b NOTE} this is only available on OCaml 5. *)
|
{b NOTE} this is only available on OCaml 5. *)
|
||||||
|
|
||||||
val all_init : int -> (int -> 'a) -> 'a list
|
val all_init : ?chunk_size:int -> int -> (int -> 'a) -> 'a list
|
||||||
(** [all_init n f] runs functions [f 0], [f 1], … [f (n-1)] in tasks, and waits for
|
(** [all_init n f] runs functions [f 0], [f 1], … [f (n-1)] in tasks, and waits for
|
||||||
all the results.
|
all the results.
|
||||||
|
|
||||||
|
@param chunk_size if equal to [n], groups items by [n] to be run in
|
||||||
|
a single task. Default is not specified.
|
||||||
|
This parameter is available since NEXT_RELEASE.
|
||||||
|
|
||||||
@since 0.3
|
@since 0.3
|
||||||
{b NOTE} this is only available on OCaml 5. *)
|
{b NOTE} this is only available on OCaml 5. *)
|
||||||
|
|
||||||
|
type 'a commutative_monoid = {
|
||||||
|
neutral: unit -> 'a; (** Neutral element *)
|
||||||
|
combine: 'a -> 'a -> 'a; (** Combine two items. *)
|
||||||
|
}
|
||||||
|
(** A commutative monoid: order of operations does not matter.
|
||||||
|
@since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val map_reduce_commutative :
|
||||||
|
?chunk_size:int ->
|
||||||
|
gen:(int -> 'a) ->
|
||||||
|
map:('a -> 'b) ->
|
||||||
|
reduce:'b commutative_monoid ->
|
||||||
|
int ->
|
||||||
|
'b
|
||||||
|
(** [map_reduce_commutative ~gen ~map ~reduce n] produces items of type ['a]
|
||||||
|
using [gen 0], [gen 1], …, [gen (n-1)]. Items are then mapped using [map]
|
||||||
|
in background tasks (each task processes up to [chunk_size] items at a time).
|
||||||
|
|
||||||
|
Then, items of type ['b] obtained by mapping are reduced together using the
|
||||||
|
definition of the commutative monoid [reduce]. The order in which they
|
||||||
|
are reduced is not specified.
|
||||||
|
|
||||||
|
@param chunk_size controls granularity of the mapping process
|
||||||
|
@param gen generates items to process based on an index
|
||||||
|
@param map takes an item and processes it, independently of other items
|
||||||
|
@param reduce is used to aggregate results of mapping.
|
||||||
|
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
{b NOTE} this is only available on OCaml 5.
|
||||||
|
*)
|
||||||
|
|
||||||
[@@@endif]
|
[@@@endif]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
[@@@ifge 5.0]
|
[@@@ifge 5.0]
|
||||||
|
|
||||||
open Moonpool
|
open! Moonpool
|
||||||
|
|
||||||
let pool = Pool.create ~min:4 ()
|
let pool = Pool.create ~min:4 ()
|
||||||
|
|
||||||
|
|
@ -39,4 +39,15 @@ let () =
|
||||||
let exp_sum = List.init 42 (fun x -> x * x) |> List.fold_left ( + ) 0 in
|
let exp_sum = List.init 42 (fun x -> x * x) |> List.fold_left ( + ) 0 in
|
||||||
assert (par_sum = exp_sum)
|
assert (par_sum = exp_sum)
|
||||||
|
|
||||||
|
let () =
|
||||||
|
let total_sum = Atomic.make 0 in
|
||||||
|
|
||||||
|
Pool.run_wait_block pool (fun () ->
|
||||||
|
Fork_join.for_ ~chunk_size:5 100 (fun range ->
|
||||||
|
(* iterate on the range sequentially. The range should have 5 items or less. *)
|
||||||
|
let local_sum = ref 0 in
|
||||||
|
range (fun i -> local_sum := !local_sum + i);
|
||||||
|
ignore (Atomic.fetch_and_add total_sum !local_sum : int)));
|
||||||
|
assert (Atomic.get total_sum = 4950)
|
||||||
|
|
||||||
[@@@endif]
|
[@@@endif]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue