From faeb95b49d2294beac4e4ee0f82f40895a8b772e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 09:51:40 -0400 Subject: [PATCH 01/77] fix pool: on shutdown, finish reading from all queues --- src/pool.ml | 69 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index 43cda564..e8e4c863 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -108,34 +108,55 @@ let worker_thread_ (runner : t) ~on_exn ~around_task (active : bool A.t) let num_qs = Array.length qs in let (AT_pair (before_task, after_task)) = around_task in + let get_task_without_blocking () : _ option = + try + for i = 0 to num_qs - 1 do + let q = qs.((offset + i) mod num_qs) in + match Bb_queue.try_pop ~force_lock:false q with + | Some f -> raise_notrace (Got_task f) + | None -> () + done; + None + with Got_task f -> Some f + in + + (* last resort: block on my queue *) + let[@inline] pop_blocking () = + let my_q = qs.(offset mod num_qs) in + Bb_queue.pop my_q + in + + let run_task task : unit = + let _ctx = before_task runner in + (* run the task now, catching errors *) + (try task () + with e -> + let bt = Printexc.get_raw_backtrace () in + on_exn e bt); + after_task runner _ctx + in + + let run_tasks_already_present () = + (* drain the queues from existing tasks *) + let continue = ref true in + while !continue do + match get_task_without_blocking () with + | None -> continue := false + | Some task -> run_task task + done + in + let main_loop () = while A.get active do - (* last resort: block on my queue *) - let pop_blocking () = - let my_q = qs.(offset mod num_qs) in - Bb_queue.pop my_q - in + run_tasks_already_present (); - let task = - try - for i = 0 to num_qs - 1 do - let q = qs.((offset + i) mod num_qs) in - match Bb_queue.try_pop ~force_lock:false q with - | Some f -> raise_notrace (Got_task f) - | None -> () - done; - pop_blocking () - with Got_task f -> f - in + (* no task available, block until one comes *) + let task = pop_blocking () in + run_task task + done; - let _ctx = before_task runner in - (* run the task now, catching errors *) - (try task () - with e -> - let bt = Printexc.get_raw_backtrace () in - on_exn e bt); - after_task runner _ctx - done + (* cleanup *) + run_tasks_already_present () in try From 60255c0e95ce72576e974c8871aaf06856623913 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 10:03:46 -0400 Subject: [PATCH 02/77] test: add dep on trace-tef; add new test for scheduling issues trying to expose that sometimes, some workers might be asleep while others do several tasks, because they're sleeping on the "wrong" queue --- Makefile | 4 ++-- dune-project | 1 + moonpool.opam | 1 + test/dune | 3 +++ test/t_unfair.ml | 44 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 test/t_unfair.ml diff --git a/Makefile b/Makefile index a7308673..b1acca61 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ BENCH_CUTOFF?=20 bench-fib: @echo running for N=$(N) dune build $(DUNE_OPTS_BENCH) benchs/fib_rec.exe - hyperfine -L psize $(BENCH_PSIZE) \ + hyperfine -L psize $(BENCH_PSIZE) --warmup=1 \ './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -n $(N)' PI_NSTEPS?=100_000_000 @@ -36,7 +36,7 @@ PI_MODES?=seq,par1,forkjoin bench-pi: @echo running for N=$(PI_NSTEPS) dune build $(DUNE_OPTS_BENCH) benchs/pi.exe - hyperfine -L mode $(PI_MODES) \ + hyperfine -L mode $(PI_MODES) --warmup=1 \ './_build/default/benchs/pi.exe -mode={mode} -n $(PI_NSTEPS)' .PHONY: test clean bench-fib bench-pi diff --git a/dune-project b/dune-project index 3cd1b85e..18c5ade8 100644 --- a/dune-project +++ b/dune-project @@ -20,6 +20,7 @@ dune (either (>= 1.0)) (trace :with-test) + (trace-tef :with-test) (qcheck-core (and :with-test (>= 0.19))) (odoc :with-doc) (mdx diff --git a/moonpool.opam b/moonpool.opam index 3f411cfa..3a1be0b0 100644 --- a/moonpool.opam +++ b/moonpool.opam @@ -13,6 +13,7 @@ depends: [ "dune" {>= "3.0"} "either" {>= "1.0"} "trace" {with-test} + "trace-tef" {with-test} "qcheck-core" {with-test & >= "0.19"} "odoc" {with-doc} "mdx" {>= "1.9.0" & with-test} diff --git a/test/dune b/test/dune index 72c44bbf..e5d032c7 100644 --- a/test/dune +++ b/test/dune @@ -8,10 +8,13 @@ t_props t_chan_train t_resource + t_unfair t_bounded_queue) (libraries moonpool qcheck-core qcheck-core.runner ;tracy-client.trace + unix + trace-tef trace)) diff --git a/test/t_unfair.ml b/test/t_unfair.ml new file mode 100644 index 00000000..81271046 --- /dev/null +++ b/test/t_unfair.ml @@ -0,0 +1,44 @@ +(* exhibits unfairness *) + +open Moonpool + +let ( let@ ) = ( @@ ) + +let sleep_for f () = + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "sleep" in + Thread.delay f + +let () = + let@ () = Trace_tef.with_setup () in + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main" in + + let pool = + Pool.create ~min:3 + ~on_init_thread:(fun ~dom_id:_ ~t_id () -> + Trace.set_thread_name (Printf.sprintf "pool worker %d" t_id)) + ~around_task: + ( (fun self -> Trace.counter_int "n_tasks" (Pool.num_tasks self)), + fun self () -> Trace.counter_int "n_tasks" (Pool.num_tasks self) ) + () + in + + (* make all threads busy *) + Pool.run_async pool (sleep_for 0.01); + Pool.run_async pool (sleep_for 0.01); + Pool.run_async pool (sleep_for 0.05); + + let t = Unix.gettimeofday () in + for _i = 1 to 100 do + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "schedule step" in + Pool.run_async pool (sleep_for 0.001); + Pool.run_async pool (sleep_for 0.001); + Pool.run_async pool (sleep_for 0.01) + done; + + Printf.printf "pool size: %d\n%!" (Pool.num_tasks pool); + (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "shutdown" in + Pool.shutdown pool); + Printf.printf "pool size after shutdown: %d\n%!" (Pool.num_tasks pool); + + let elapsed = Unix.gettimeofday () -. t in + Printf.printf "elapsed: %.4fs\n%!" elapsed From 69faea0bcbc269a104665e6dd3811269e2ff8b68 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 12:53:19 -0400 Subject: [PATCH 03/77] wip: have only one condition in pool --- src/pool.ml | 133 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 33 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index e8e4c863..e4685b9c 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -5,6 +5,48 @@ include Runner let ( let@ ) = ( @@ ) +(** Thread safe queue, non blocking *) +module TS_queue = struct + type 'a t = { + mutex: Mutex.t; + q: 'a Queue.t; + } + + let create () : _ t = { mutex = Mutex.create (); q = Queue.create () } + + let try_push (self : _ t) x : bool = + if Mutex.try_lock self.mutex then ( + Queue.push x self.q; + Mutex.unlock self.mutex; + true + ) else + false + + let push (self : _ t) x : unit = + Mutex.lock self.mutex; + Queue.push x self.q; + Mutex.unlock self.mutex + + let try_pop ~force_lock (self : _ t) : _ option = + let has_lock = + if force_lock then ( + Mutex.lock self.mutex; + true + ) else + Mutex.try_lock self.mutex + in + if has_lock then ( + match Queue.pop self.q with + | x -> + Mutex.unlock self.mutex; + Some x + | exception Queue.Empty -> + Mutex.unlock self.mutex; + None + ) else + None +end + type thread_loop_wrapper = thread:Thread.t -> pool:t -> (unit -> unit) -> unit -> unit @@ -21,11 +63,22 @@ let add_global_thread_loop_wrapper f : unit = type state = { active: bool A.t; threads: Thread.t array; - qs: task Bb_queue.t array; + qs: task TS_queue.t array; + num_tasks: int A.t; + mutex: Mutex.t; + cond: Condition.t; cur_q: int A.t; (** Selects queue into which to push *) } (** internal state *) +let[@inline] size_ (self : state) = Array.length self.threads +let[@inline] num_tasks_ (self : state) : int = A.get self.num_tasks + +let awake_workers_ (self : state) : unit = + Mutex.lock self.mutex; + Condition.broadcast self.cond; + Mutex.unlock self.mutex + (** Run [task] as is, on the pool. *) let run_direct_ (self : state) (task : task) : unit = let n_qs = Array.length self.qs in @@ -35,22 +88,22 @@ let run_direct_ (self : state) (task : task) : unit = let[@inline] push_wait f = let q_idx = offset mod Array.length self.qs in let q = self.qs.(q_idx) in - Bb_queue.push q f + TS_queue.push q f in + let old_num_tasks = A.fetch_and_add self.num_tasks 1 in + try (* try each queue with a round-robin initial offset *) for _retry = 1 to 10 do for i = 0 to n_qs - 1 do let q_idx = (i + offset) mod Array.length self.qs in let q = self.qs.(q_idx) in - if Bb_queue.try_push q task then raise_notrace Exit + if TS_queue.try_push q task then raise_notrace Exit done done; push_wait task - with - | Exit -> () - | Bb_queue.Closed -> raise Shutdown + with Exit -> if old_num_tasks < size_ self then awake_workers_ self let rec run_async_ (self : state) (task : task) : unit = let task' () = @@ -64,12 +117,6 @@ let rec run_async_ (self : state) (task : task) : unit = run_direct_ self task' let run = run_async -let size_ (self : state) = Array.length self.threads - -let num_tasks_ (self : state) : int = - let n = ref 0 in - Array.iter (fun q -> n := !n + Bb_queue.size q) self.qs; - !n [@@@ifge 5.0] @@ -103,27 +150,41 @@ exception Got_task of task type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task -let worker_thread_ (runner : t) ~on_exn ~around_task (active : bool A.t) - (qs : task Bb_queue.t array) ~(offset : int) : unit = - let num_qs = Array.length qs in +exception Closed + +let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task + ~(offset : int) : unit = + let num_qs = Array.length self.qs in let (AT_pair (before_task, after_task)) = around_task in let get_task_without_blocking () : _ option = try for i = 0 to num_qs - 1 do - let q = qs.((offset + i) mod num_qs) in - match Bb_queue.try_pop ~force_lock:false q with + let q = self.qs.((offset + i) mod num_qs) in + match TS_queue.try_pop ~force_lock:false q with | Some f -> raise_notrace (Got_task f) | None -> () done; None - with Got_task f -> Some f + with Got_task f -> + A.decr self.num_tasks; + Some f in - (* last resort: block on my queue *) - let[@inline] pop_blocking () = - let my_q = qs.(offset mod num_qs) in - Bb_queue.pop my_q + (* last resort: block on condition or raise Closed *) + let pop_blocking () : task = + Mutex.lock self.mutex; + + try + while A.get self.active do + match get_task_without_blocking () with + | Some t -> + Mutex.unlock self.mutex; + raise_notrace (Got_task t) + | None -> Condition.wait self.cond self.mutex + done; + raise Closed + with Got_task t -> t in let run_task task : unit = @@ -147,12 +208,13 @@ let worker_thread_ (runner : t) ~on_exn ~around_task (active : bool A.t) in let main_loop () = - while A.get active do + while A.get self.active do run_tasks_already_present (); (* no task available, block until one comes *) - let task = pop_blocking () in - run_task task + match pop_blocking () with + | exception Closed -> () + | task -> run_task task done; (* cleanup *) @@ -176,9 +238,8 @@ let max_queues = 32 let shutdown_ ~wait (self : state) : unit = let was_active = A.exchange self.active false in - (* close the job queues, which will fail future calls to [run], - and wake up the subset of [self.threads] that are waiting on them. *) - if was_active then Array.iter Bb_queue.close self.qs; + (* wake up the subset of [self.threads] that are waiting on new tasks *) + if was_active then awake_workers_ self; if wait then Array.iter Thread.join self.threads type ('a, 'b) create_args = @@ -215,12 +276,20 @@ let create ?(on_init_thread = default_thread_init_exit_) let active = A.make true in let qs = let num_qs = min (min num_domains num_threads) max_queues in - Array.init num_qs (fun _ -> Bb_queue.create ()) + Array.init num_qs (fun _ -> TS_queue.create ()) in let pool = let dummy = Thread.self () in - { active; threads = Array.make num_threads dummy; qs; cur_q = A.make 0 } + { + active; + threads = Array.make num_threads dummy; + num_tasks = A.make 0; + qs; + mutex = Mutex.create (); + cond = Condition.create (); + cur_q = A.make 0; + } in let runner = @@ -250,9 +319,7 @@ let create ?(on_init_thread = default_thread_init_exit_) List.rev_append thread_wrappers (A.get global_thread_wrappers_) in - let run () = - worker_thread_ runner ~on_exn ~around_task active qs ~offset:i - in + let run () = worker_thread_ pool runner ~on_exn ~around_task ~offset:i in (* the actual worker loop is [worker_thread_], with all wrappers for this pool and for all pools (global_thread_wrappers_) *) let run' = From d15bfb07f22dd32fc8351f87c426e6ba6c6cc870 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 13:47:53 -0400 Subject: [PATCH 04/77] fix pool: rework scheduler to use one condition --- src/pool.ml | 77 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index e4685b9c..590a9586 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -84,26 +84,29 @@ let run_direct_ (self : state) (task : task) : unit = let n_qs = Array.length self.qs in let offset = A.fetch_and_add self.cur_q 1 in - (* blocking push, last resort *) + (* push that forces lock acquisition, last resort *) let[@inline] push_wait f = let q_idx = offset mod Array.length self.qs in let q = self.qs.(q_idx) in TS_queue.push q f in - let old_num_tasks = A.fetch_and_add self.num_tasks 1 in + (try + (* try each queue with a round-robin initial offset *) + for _retry = 1 to 10 do + for i = 0 to n_qs - 1 do + let q_idx = (i + offset) mod Array.length self.qs in + let q = self.qs.(q_idx) in - try - (* try each queue with a round-robin initial offset *) - for _retry = 1 to 10 do - for i = 0 to n_qs - 1 do - let q_idx = (i + offset) mod Array.length self.qs in - let q = self.qs.(q_idx) in - if TS_queue.try_push q task then raise_notrace Exit - done - done; - push_wait task - with Exit -> if old_num_tasks < size_ self then awake_workers_ self + if TS_queue.try_push q task then raise_notrace Exit + done + done; + push_wait task + with Exit -> ()); + + (* successfully pushed, now see if we need to wakeup workers *) + let old_num_tasks = A.fetch_and_add self.num_tasks 1 in + if old_num_tasks < size_ self then awake_workers_ self let rec run_async_ (self : state) (task : task) : unit = let task' () = @@ -157,13 +160,18 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task let num_qs = Array.length self.qs in let (AT_pair (before_task, after_task)) = around_task in - let get_task_without_blocking () : _ option = + (* try to get a task that is already in one of the queues. + @param force_lock if true, we force acquisition of the queue's mutex, + which is slower but always succeeds to get a task if there's one. *) + let get_task_already_in_queues ~force_lock () : _ option = try - for i = 0 to num_qs - 1 do - let q = self.qs.((offset + i) mod num_qs) in - match TS_queue.try_pop ~force_lock:false q with - | Some f -> raise_notrace (Got_task f) - | None -> () + for _retry = 1 to 3 do + for i = 0 to num_qs - 1 do + let q = self.qs.((offset + i) mod num_qs) in + match TS_queue.try_pop ~force_lock q with + | Some f -> raise_notrace (Got_task f) + | None -> () + done done; None with Got_task f -> @@ -171,17 +179,21 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task Some f in - (* last resort: block on condition or raise Closed *) + (* slow path: force locking when trying to get tasks, + and wait on [self.cond] if no task is currently available. *) let pop_blocking () : task = - Mutex.lock self.mutex; - try while A.get self.active do - match get_task_without_blocking () with - | Some t -> - Mutex.unlock self.mutex; - raise_notrace (Got_task t) - | None -> Condition.wait self.cond self.mutex + match get_task_already_in_queues ~force_lock:true () with + | Some t -> raise_notrace (Got_task t) + | None -> + Mutex.lock self.mutex; + (* NOTE: be careful about race conditions: we must only + block if the [shutdown] that sets [active] to [false] + has not broadcast over this condition first. Otherwise + we might miss the signal and wait here forever. *) + if A.get self.active then Condition.wait self.cond self.mutex; + Mutex.unlock self.mutex done; raise Closed with Got_task t -> t @@ -197,11 +209,12 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task after_task runner _ctx in - let run_tasks_already_present () = - (* drain the queues from existing tasks *) + (* drain the queues from existing tasks. If [force_lock=false] + then it is best effort. *) + let run_tasks_already_present ~force_lock () = let continue = ref true in while !continue do - match get_task_without_blocking () with + match get_task_already_in_queues ~force_lock () with | None -> continue := false | Some task -> run_task task done @@ -209,7 +222,7 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task let main_loop () = while A.get self.active do - run_tasks_already_present (); + run_tasks_already_present ~force_lock:false (); (* no task available, block until one comes *) match pop_blocking () with @@ -218,7 +231,7 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task done; (* cleanup *) - run_tasks_already_present () + run_tasks_already_present ~force_lock:true () in try From 9ab9df78c9f4ec34093fd7a1ec06144f5532512b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 13:48:23 -0400 Subject: [PATCH 05/77] update tests a bit --- benchs/fib_rec.ml | 3 ++- test/t_resource.ml | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 4ff984f4..1a3d3288 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -31,7 +31,8 @@ let run ~psize ~n ~seq ~niter () : unit = ) in Printf.printf "fib %d = %d\n%!" n res - done + done; + if not seq then Pool.shutdown (Lazy.force pool) let () = let n = ref 40 in diff --git a/test/t_resource.ml b/test/t_resource.ml index 5845c520..a9686867 100644 --- a/test/t_resource.ml +++ b/test/t_resource.ml @@ -4,8 +4,10 @@ let ( let@ ) = ( @@ ) (* test proper resource handling *) let () = + let@ () = Trace_tef.with_setup () in let a = Atomic.make 0 in for _i = 1 to 1_000 do + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "loop.step" in (* give a chance to domains to die *) if _i mod 100 = 0 then Thread.delay 0.8; From fb7cc5d69f3f304bf0a78a2cde97e0cb8437b38b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 16:56:52 -0400 Subject: [PATCH 06/77] add heavier test for a particular hangup in fork join --- test/effect-based/dune | 4 +- test/effect-based/t_fork_join.ml | 16 ++++---- test/effect-based/t_fork_join_heavy.ml | 57 ++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 10 deletions(-) create mode 100644 test/effect-based/t_fork_join_heavy.ml diff --git a/test/effect-based/dune b/test/effect-based/dune index 1d4898d3..9989823f 100644 --- a/test/effect-based/dune +++ b/test/effect-based/dune @@ -1,11 +1,11 @@ (tests (names t_fib1 t_futs1 t_many t_fib_fork_join - t_fib_fork_join_all t_sort t_fork_join) + t_fib_fork_join_all t_sort t_fork_join t_fork_join_heavy) (preprocess (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) (enabled_if (>= %{ocaml_version} 5.0)) - (libraries moonpool trace + (libraries moonpool trace trace-tef qcheck-core qcheck-core.runner ;tracy-client.trace )) diff --git a/test/effect-based/t_fork_join.ml b/test/effect-based/t_fork_join.ml index 7fc8fa31..e9110f72 100644 --- a/test/effect-based/t_fork_join.ml +++ b/test/effect-based/t_fork_join.ml @@ -326,15 +326,15 @@ let t_map ~chunk_size () = let () = QCheck_base_runner.run_tests_main [ - t_eval; - t_map ~chunk_size:1 (); - t_map ~chunk_size:50 (); - t_for_nested ~min:1 ~chunk_size:1 (); - t_for_nested ~min:4 ~chunk_size:1 (); - t_for_nested ~min:1 ~chunk_size:3 (); - t_for_nested ~min:4 ~chunk_size:3 (); + (* t_eval; *) + (* t_map ~chunk_size:1 (); *) + (* t_map ~chunk_size:50 (); *) + (* t_for_nested ~min:1 ~chunk_size:1 (); *) + (* t_for_nested ~min:4 ~chunk_size:1 (); *) + (* t_for_nested ~min:1 ~chunk_size:3 (); *) + (* t_for_nested ~min:4 ~chunk_size:3 (); *) t_for_nested ~min:1 ~chunk_size:100 (); - t_for_nested ~min:4 ~chunk_size:100 (); + (* t_for_nested ~min:4 ~chunk_size:100 (); *) ] [@@@endif] diff --git a/test/effect-based/t_fork_join_heavy.ml b/test/effect-based/t_fork_join_heavy.ml new file mode 100644 index 00000000..be86299a --- /dev/null +++ b/test/effect-based/t_fork_join_heavy.ml @@ -0,0 +1,57 @@ +[@@@ifge 5.0] + +module Q = QCheck + +let spf = Printf.sprintf +let ( let@ ) = ( @@ ) +let ppl = Q.Print.(list @@ list int) + +open! Moonpool + +let run ~min () = + let@ _sp = + Trace.with_span ~__FILE__ ~__LINE__ "run" ~data:(fun () -> + [ "min", `Int min ]) + in + + Printf.printf "run with min=%d\n%!" min; + let neg x = -x in + + let chunk_size = 100 in + let l = List.init 300 (fun _ -> List.init 15 (fun i -> i)) in + + let ref_l1 = List.map (List.map neg) l in + let ref_l2 = List.map (List.map neg) ref_l1 in + + for _i = 1 to 800 do + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "step" in + + let l1, l2 = + let@ pool = Pool.with_ ~min () in + let@ () = Pool.run_wait_block pool in + + let l1, l2 = + Fork_join.both + (fun () -> + Fork_join.map_list ~chunk_size + (Fork_join.map_list ~chunk_size neg) + l) + (fun () -> + Fork_join.map_list ~chunk_size + (Fork_join.map_list ~chunk_size neg) + ref_l1) + in + l1, l2 + in + + if l1 <> ref_l1 then failwith (spf "l1=%s, ref_l1=%s" (ppl l1) (ppl ref_l1)); + if l2 <> ref_l2 then failwith (spf "l1=%s, ref_l1=%s" (ppl l2) (ppl ref_l2)) + done + +let () = + let@ () = Trace_tef.with_setup () in + run ~min:4 (); + run ~min:1 (); + Printf.printf "done\n%!" + +[@@@endif] From 43eca1d4e222055fe1cfe8f7b9ec603e79954976 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 24 Oct 2023 19:56:18 -0400 Subject: [PATCH 07/77] restore test --- test/effect-based/t_fork_join.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/effect-based/t_fork_join.ml b/test/effect-based/t_fork_join.ml index e9110f72..7fc8fa31 100644 --- a/test/effect-based/t_fork_join.ml +++ b/test/effect-based/t_fork_join.ml @@ -326,15 +326,15 @@ let t_map ~chunk_size () = let () = QCheck_base_runner.run_tests_main [ - (* t_eval; *) - (* t_map ~chunk_size:1 (); *) - (* t_map ~chunk_size:50 (); *) - (* t_for_nested ~min:1 ~chunk_size:1 (); *) - (* t_for_nested ~min:4 ~chunk_size:1 (); *) - (* t_for_nested ~min:1 ~chunk_size:3 (); *) - (* t_for_nested ~min:4 ~chunk_size:3 (); *) + t_eval; + t_map ~chunk_size:1 (); + t_map ~chunk_size:50 (); + t_for_nested ~min:1 ~chunk_size:1 (); + t_for_nested ~min:4 ~chunk_size:1 (); + t_for_nested ~min:1 ~chunk_size:3 (); + t_for_nested ~min:4 ~chunk_size:3 (); t_for_nested ~min:1 ~chunk_size:100 (); - (* t_for_nested ~min:4 ~chunk_size:100 (); *) + t_for_nested ~min:4 ~chunk_size:100 (); ] [@@@endif] From 4c4b7203069e65efb5a4db107c35f9c23b22865d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:08:48 -0400 Subject: [PATCH 08/77] feat: add Simple_pool, with the naive single-queue implementation --- src/simple_pool.ml | 161 ++++++++++++++++++++++++++++++++++++++++++++ src/simple_pool.mli | 36 ++++++++++ 2 files changed, 197 insertions(+) create mode 100644 src/simple_pool.ml create mode 100644 src/simple_pool.mli diff --git a/src/simple_pool.ml b/src/simple_pool.ml new file mode 100644 index 00000000..54570bbb --- /dev/null +++ b/src/simple_pool.ml @@ -0,0 +1,161 @@ +include Runner + +let ( let@ ) = ( @@ ) + +type state = { + threads: Thread.t array; + q: task Bb_queue.t; (** Queue for tasks. *) +} +(** internal state *) + +let[@inline] size_ (self : state) = Array.length self.threads +let[@inline] num_tasks_ (self : state) : int = Bb_queue.size self.q + +(** Run [task] as is, on the pool. *) +let run_direct_ (self : state) (task : task) : unit = Bb_queue.push self.q task + +let rec run_async_ (self : state) (task : task) : unit = + let task' () = + (* run [f()] and handle [suspend] in it *) + Suspend_.with_suspend task ~run:(fun ~with_handler task -> + if with_handler then + run_async_ self task + else + run_direct_ self task) + in + run_direct_ self task' + +type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task + +let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task : unit = + let (AT_pair (before_task, after_task)) = around_task in + + let run_task task : unit = + let _ctx = before_task runner in + (* run the task now, catching errors *) + (try task () + with e -> + let bt = Printexc.get_raw_backtrace () in + on_exn e bt); + after_task runner _ctx + in + + let main_loop () = + let continue = ref true in + while !continue do + match Bb_queue.pop self.q with + | task -> run_task task + | exception Bb_queue.Closed -> continue := false + done + in + + try + (* handle domain-local await *) + Dla_.using ~prepare_for_await:Suspend_.prepare_for_await + ~while_running:main_loop + with Bb_queue.Closed -> () + +let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () + +let shutdown_ ~wait (self : state) : unit = + Bb_queue.close self.q; + if wait then Array.iter Thread.join self.threads + +type ('a, 'b) create_args = + ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> + ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> + ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> + ?around_task:(t -> 'b) * (t -> 'b -> unit) -> + ?min:int -> + ?per_domain:int -> + 'a + +let create ?(on_init_thread = default_thread_init_exit_) + ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) + ?around_task ?min:(min_threads = 1) ?(per_domain = 0) () : t = + (* wrapper *) + let around_task = + match around_task with + | Some (f, g) -> AT_pair (f, g) + | None -> AT_pair (ignore, fun _ _ -> ()) + in + + (* number of threads to run *) + let min_threads = max 1 min_threads in + let num_domains = D_pool_.n_domains () in + assert (num_domains >= 1); + let num_threads = max min_threads (num_domains * per_domain) in + + (* make sure we don't bias towards the first domain(s) in {!D_pool_} *) + let offset = Random.int num_domains in + + let pool = + let dummy = Thread.self () in + { threads = Array.make num_threads dummy; q = Bb_queue.create () } + in + + let runner = + Runner.For_runner_implementors.create + ~shutdown:(fun ~wait () -> shutdown_ pool ~wait) + ~run_async:(fun f -> run_async_ pool f) + ~size:(fun () -> size_ pool) + ~num_tasks:(fun () -> num_tasks_ pool) + () + in + + (* temporary queue used to obtain thread handles from domains + on which the thread are started. *) + let receive_threads = Bb_queue.create () in + + (* start the thread with index [i] *) + let start_thread_with_idx i = + let dom_idx = (offset + i) mod num_domains in + + (* function run in the thread itself *) + let main_thread_fun () : unit = + let thread = Thread.self () in + let t_id = Thread.id thread in + on_init_thread ~dom_id:dom_idx ~t_id (); + + let run () = worker_thread_ pool runner ~on_exn ~around_task in + + (* now run the main loop *) + Fun.protect run ~finally:(fun () -> + (* on termination, decrease refcount of underlying domain *) + D_pool_.decr_on dom_idx); + on_exit_thread ~dom_id:dom_idx ~t_id () + in + + (* function called in domain with index [i], to + create the thread and push it into [receive_threads] *) + let create_thread_in_domain () = + let thread = Thread.create main_thread_fun () in + (* send the thread from the domain back to us *) + Bb_queue.push receive_threads (i, thread) + in + + D_pool_.run_on dom_idx create_thread_in_domain + in + + (* start all threads, placing them on the domains + according to their index and [offset] in a round-robin fashion. *) + for i = 0 to num_threads - 1 do + start_thread_with_idx i + done; + + (* receive the newly created threads back from domains *) + for _j = 1 to num_threads do + let i, th = Bb_queue.pop receive_threads in + pool.threads.(i) <- th + done; + + runner + +let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain + () f = + let pool = + create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain + () + in + let@ () = Fun.protect ~finally:(fun () -> shutdown pool) in + f pool diff --git a/src/simple_pool.mli b/src/simple_pool.mli new file mode 100644 index 00000000..b7f89824 --- /dev/null +++ b/src/simple_pool.mli @@ -0,0 +1,36 @@ +(** A simple thread pool. + + This uses a single blocking queue to manage tasks, it's very + simple and reliable. Like {!Pool} it distributes a fixed number + of workers over several domains. + + @since NEXT_RELEASE *) + +include module type of Runner + +type ('a, 'b) create_args = + ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> + ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> + ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> + ?around_task:(t -> 'b) * (t -> 'b -> unit) -> + ?min:int -> + ?per_domain:int -> + 'a +(** Arguments used in {!create}. See {!create} for explanations. *) + +val create : (unit -> t, _) create_args +(** [create ()] makes a new thread pool. + @param on_init_thread called at the beginning of each new thread in the pool. + @param min minimum size of the pool. See {!Pool.create_args}. + @param per_domain is the number of threads allocated per domain in the fixed + domain pool. See {!Pool.create_args}. + @param on_exit_thread called at the end of each worker thread in the pool. + @param around_task a pair of [before, after] functions + ran around each task. See {!Pool.create_args}. + *) + +val with_ : (unit -> (t -> 'a) -> 'a, _) create_args +(** [with_ () f] calls [f pool], where [pool] is obtained via {!create}. + When [f pool] returns or fails, [pool] is shutdown and its resources + are released. + Most parameters are the same as in {!create}. *) From 0cec78eb30428cbedc0c1cfd4e5d9e038f1c226b Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:09:09 -0400 Subject: [PATCH 09/77] refactor: move some common code to Suspend_ --- src/suspend_.ml | 21 +++++++++++++++++++++ src/suspend_.mli | 3 +++ 2 files changed, 24 insertions(+) diff --git a/src/suspend_.ml b/src/suspend_.ml index 1a0b55df..19accc9c 100644 --- a/src/suspend_.ml +++ b/src/suspend_.ml @@ -1,3 +1,5 @@ +module A = Atomic_ + type suspension = (unit, exn * Printexc.raw_backtrace) result -> unit type task = unit -> unit @@ -32,9 +34,28 @@ let with_suspend ~(run : with_handler:bool -> task -> unit) (f : unit -> unit) : E.try_with f () { E.effc } +(* DLA interop *) +let prepare_for_await () : Dla_.t = + (* current state *) + let st : ((with_handler:bool -> task -> unit) * suspension) option A.t = + A.make None + in + + let release () : unit = + match A.exchange st None with + | None -> () + | Some (run, k) -> run ~with_handler:true (fun () -> k (Ok ())) + and await () : unit = + suspend { handle = (fun ~run k -> A.set st (Some (run, k))) } + in + + let t = { Dla_.release; await } in + t + [@@@ocaml.alert "+unstable"] [@@@else_] let with_suspend ~run:_ f = f () +let prepare_for_await () = { Dla_.release = ignore; await = ignore } [@@@endif] diff --git a/src/suspend_.mli b/src/suspend_.mli index 032bc3e0..716e9b8a 100644 --- a/src/suspend_.mli +++ b/src/suspend_.mli @@ -50,6 +50,9 @@ val suspend : suspension_handler -> unit [@@@endif] +val prepare_for_await : unit -> Dla_.t +(** Our stub for DLA. Unstable. *) + val with_suspend : run:(with_handler:bool -> task -> unit) -> (unit -> unit) -> unit (** [with_suspend ~run f] runs [f()] in an environment where [suspend] From 95b27b3a70fab6be6af8205aa3cdf998c899f025 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:09:28 -0400 Subject: [PATCH 10/77] feat: add Ws_deque_ --- src/ws_deque_.ml | 107 ++++++++++++++++++++++++++++++++++++++++++++++ src/ws_deque_.mli | 21 +++++++++ 2 files changed, 128 insertions(+) create mode 100644 src/ws_deque_.ml create mode 100644 src/ws_deque_.mli diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml new file mode 100644 index 00000000..7ca39f86 --- /dev/null +++ b/src/ws_deque_.ml @@ -0,0 +1,107 @@ +module A = Atomic_ + +(* terminology: + + - Bottom: where we push/pop normally. Only one thread can do that. + - top: where work stealing happens (older values). + This only ever grows. + + Elements are always added on the bottom end. *) + +(** Circular array (size is [2 ^ log_size]) *) +module CA : sig + type 'a t + + val create : log_size:int -> unit -> 'a t + val size : _ t -> int + val get : 'a t -> int -> 'a + val set : 'a t -> int -> 'a -> unit + val grow : 'a t -> bottom:int -> top:int -> 'a t +end = struct + type 'a t = { + log_size: int; + arr: 'a option array; + } + + let[@inline] size (self : _ t) = 1 lsl self.log_size + + let create ~log_size () : _ t = + { log_size; arr = Array.make (1 lsl log_size) None } + + let[@inline] get (self : _ t) (i : int) : 'a = + match Array.unsafe_get self.arr (i mod size self) with + | Some x -> x + | None -> assert false + + let[@inline] set (self : 'a t) (i : int) (x : 'a) : unit = + Array.unsafe_set self.arr (i mod size self) (Some x) + + let grow (self : _ t) ~bottom ~top : 'a t = + let new_arr = create ~log_size:(self.log_size + 1) () in + for i = top to bottom - 1 do + set new_arr i (get self i) + done; + new_arr +end + +type 'a t = { + top: int A.t; (** Where we steal *) + bottom: int A.t; (** Where we push/pop from the owning thread *) + mutable arr: 'a CA.t; (** The circular array *) +} + +let create () : _ t = + let arr = CA.create ~log_size:4 () in + { top = A.make 0; bottom = A.make 0; arr } + +let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top) + +let push (self : 'a t) (x : 'a) : unit = + let b = A.get self.bottom in + let t = A.get self.top in + let size = b - t in + + if size >= CA.size self.arr - 1 then + self.arr <- CA.grow self.arr ~top:t ~bottom:b; + + CA.set self.arr b x; + A.set self.bottom (b + 1) + +let pop (self : 'a t) : 'a option = + let b = A.get self.bottom in + let arr = self.arr in + let b = b - 1 in + A.set self.bottom b; + let t = A.get self.top in + let size = b - t in + if size < 0 then ( + A.set self.bottom t; + None + ) else if size > 0 then ( + let x = CA.get arr b in + Some x + ) else if A.compare_and_set self.top t (t + 1) then ( + (* exactly one slot, so we might be racing against stealers + to update [self.top] *) + let x = CA.get arr b in + A.set self.bottom (t + 1); + Some x + ) else + None + +let steal (self : 'a t) : 'a option = + let t = A.get self.top in + let b = A.get self.bottom in + let arr = self.arr in + + let size = b - t in + if size <= 0 then + None + else ( + let x = CA.get arr t in + if A.compare_and_set self.top t (t + 1) then + (* successfully increased top to consume [x] *) + Some x + else + None + ) diff --git a/src/ws_deque_.mli b/src/ws_deque_.mli new file mode 100644 index 00000000..0b243f68 --- /dev/null +++ b/src/ws_deque_.mli @@ -0,0 +1,21 @@ +(** Work-stealing deque. + + Adapted from "Dynamic circular work stealing deque", Chase & Lev + *) + +type 'a t +(** Deque containing values of type ['a] *) + +val create : unit -> _ t +(** Create a new deque. *) + +val push : 'a t -> 'a -> unit +(** Push value at the bottom of deque. This is not thread-safe. *) + +val pop : 'a t -> 'a option +(** Pop value from the bottom of deque. This is not thread-safe. *) + +val steal : 'a t -> 'a option +(** Try to steal from the top of deque. This is thread-safe. *) + +val size : _ t -> int From ae5f3a7e97485a3391def440fd7d678b95f6cac2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:09:38 -0400 Subject: [PATCH 11/77] tests for ws_deque --- test/t_ws_deque.ml | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 test/t_ws_deque.ml diff --git a/test/t_ws_deque.ml b/test/t_ws_deque.ml new file mode 100644 index 00000000..4a364166 --- /dev/null +++ b/test/t_ws_deque.ml @@ -0,0 +1,99 @@ +module A = Moonpool.Atomic +module D = Moonpool.Private.Ws_deque_ + +let ( let@ ) = ( @@ ) + +let t_simple () = + let d = D.create () in + assert (D.steal d = None); + assert (D.pop d = None); + D.push d 1; + D.push d 2; + assert (D.pop d = Some 2); + assert (D.steal d = Some 1); + assert (D.steal d = None); + assert (D.pop d = None); + D.push d 3; + assert (D.pop d = Some 3); + D.push d 4; + D.push d 5; + D.push d 6; + assert (D.steal d = Some 4); + assert (D.steal d = Some 5); + assert (D.pop d = Some 6); + assert (D.pop d = None); + + Printf.printf "basic tests passed\n"; + () + +(* big heavy test *) +let t_heavy () = + let sum = A.make 0 in + let ref_sum = ref 0 in + + let[@inline] add_to_sum x = ignore (A.fetch_and_add sum x : int) in + + let active = A.make true in + + let d = D.create () in + + let stealer_loop () = + Trace.set_thread_name "stealer"; + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "stealer" in + while A.get active do + match D.steal d with + | None -> Thread.yield () + | Some x -> add_to_sum x + done + in + + let main_loop () = + Trace.set_thread_name "producer"; + for _i = 1 to 100_000 do + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main.outer" in + for j = 1 to 100 do + ref_sum := !ref_sum + j; + D.push d j; + ref_sum := !ref_sum + j; + D.push d j; + + Option.iter (fun x -> add_to_sum x) (D.pop d); + Option.iter (fun x -> add_to_sum x) (D.pop d) + done; + + (* now compete with stealers to pop *) + let continue = ref true in + while !continue do + match D.pop d with + | Some x -> add_to_sum x + | None -> continue := false + done + done + in + + let ts = + Array.init 6 (fun _ -> Moonpool.start_thread_on_some_domain stealer_loop ()) + in + let t = Moonpool.start_thread_on_some_domain main_loop () in + + (* stop *) + A.set active false; + + Trace.message "joining t"; + Thread.join t; + Trace.message "joining stealers"; + Array.iter Thread.join ts; + Trace.message "done"; + + let ref_sum = !ref_sum in + let sum = A.get sum in + + Printf.printf "ref sum = %d, sum = %d\n%!" ref_sum sum; + assert (ref_sum = sum); + () + +let () = + let@ () = Trace_tef.with_setup () in + t_simple (); + t_heavy (); + () From f2e9f99b3624a8f7ef9de00da1b97ec486f0c522 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:16:54 -0400 Subject: [PATCH 12/77] perf ws_deque: implement shrinking and a push optim --- src/ws_deque_.ml | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index 7ca39f86..be5e4b47 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -17,6 +17,7 @@ module CA : sig val get : 'a t -> int -> 'a val set : 'a t -> int -> 'a -> unit val grow : 'a t -> bottom:int -> top:int -> 'a t + val shrink : 'a t -> bottom:int -> top:int -> 'a t end = struct type 'a t = { log_size: int; @@ -42,43 +43,69 @@ end = struct set new_arr i (get self i) done; new_arr + + let shrink (self : _ t) ~bottom ~top : 'a t = + let new_arr = create ~log_size:(self.log_size - 1) () in + for i = top to bottom - 1 do + set new_arr i (get self i) + done; + new_arr end type 'a t = { top: int A.t; (** Where we steal *) bottom: int A.t; (** Where we push/pop from the owning thread *) + mutable top_cached: int; (** Last read value of [top] *) mutable arr: 'a CA.t; (** The circular array *) } let create () : _ t = let arr = CA.create ~log_size:4 () in - { top = A.make 0; bottom = A.make 0; arr } + { top = A.make 0; top_cached = 0; bottom = A.make 0; arr } let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top) let push (self : 'a t) (x : 'a) : unit = let b = A.get self.bottom in - let t = A.get self.top in - let size = b - t in + let t_approx = self.top_cached in - if size >= CA.size self.arr - 1 then - self.arr <- CA.grow self.arr ~top:t ~bottom:b; + (* Section 2.3: over-approximation of size. + Only if it seems too big do we actually read [t]. *) + let size_approx = b - t_approx in + if size_approx >= CA.size self.arr - 1 then ( + let t = A.get self.top in + self.top_cached <- t; + let size = b - t in + + if size >= CA.size self.arr - 1 then + self.arr <- CA.grow self.arr ~top:t ~bottom:b + ); CA.set self.arr b x; A.set self.bottom (b + 1) +let perhaps_shrink (self : _ t) ~top ~bottom : unit = + let size = bottom - top in + let ca_size = CA.size self.arr in + if ca_size >= 256 && size <= ca_size / 3 then + self.arr <- CA.shrink self.arr ~top ~bottom + let pop (self : 'a t) : 'a option = let b = A.get self.bottom in let arr = self.arr in let b = b - 1 in A.set self.bottom b; + let t = A.get self.top in + self.top_cached <- t; + let size = b - t in if size < 0 then ( A.set self.bottom t; None ) else if size > 0 then ( let x = CA.get arr b in + perhaps_shrink self ~bottom:b ~top:t; Some x ) else if A.compare_and_set self.top t (t + 1) then ( (* exactly one slot, so we might be racing against stealers @@ -90,7 +117,10 @@ let pop (self : 'a t) : 'a option = None let steal (self : 'a t) : 'a option = + (* read [top], but do not update [top_cached] + as we're in another thread *) let t = A.get self.top in + let b = A.get self.bottom in let arr = self.arr in From e67ab53f9f12de163cba6b0684836d88020f0939 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:19:34 -0400 Subject: [PATCH 13/77] feat pool: rewrite main pool to use work stealing there's a single blocking queue, and one WS_queue per worker. Scheduling into the pool from a worker (e.g. via fork_join or explicitly) will push into this WS queue; otherwise it goes into the main blocking queue. Workers will always try to empty their local queue first, then try to work steal, then block on the main queue. --- src/dune | 2 +- src/moonpool.ml | 7 +- src/moonpool.mli | 16 ++- src/pool.ml | 327 +++++++++++++++-------------------------------- src/pool.mli | 5 - 5 files changed, 120 insertions(+), 237 deletions(-) diff --git a/src/dune b/src/dune index d65920e8..313191a5 100644 --- a/src/dune +++ b/src/dune @@ -1,7 +1,7 @@ (library (public_name moonpool) (name moonpool) - (private_modules d_pool_) + (private_modules d_pool_ dla_) (preprocess (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) diff --git a/src/moonpool.ml b/src/moonpool.ml index 83ae22a8..97da4d2a 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -11,4 +11,9 @@ module Fut = Fut module Lock = Lock module Pool = Pool module Runner = Runner -module Suspend_ = Suspend_ +module Simple_pool = Simple_pool + +module Private = struct + module Ws_deque_ = Ws_deque_ + module Suspend_ = Suspend_ +end diff --git a/src/moonpool.mli b/src/moonpool.mli index 1d300665..74b48772 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -5,6 +5,7 @@ *) module Pool = Pool +module Simple_pool = Simple_pool module Runner = Runner val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t @@ -141,12 +142,19 @@ module Atomic = Atomic_ This is either a shim using [ref], on pre-OCaml 5, or the standard [Atomic] module on OCaml 5. *) -(** {2 Suspensions} *) +(**/**) -module Suspend_ = Suspend_ -[@@alert unstable "this module is an implementation detail of moonpool for now"] -(** Suspensions. +module Private : sig + module Ws_deque_ = Ws_deque_ + + (** {2 Suspensions} *) + + module Suspend_ = Suspend_ + [@@alert + unstable "this module is an implementation detail of moonpool for now"] + (** Suspensions. This is only going to work on OCaml 5.x. {b NOTE}: this is not stable for now. *) +end diff --git a/src/pool.ml b/src/pool.ml index 590a9586..4ce08f76 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -1,204 +1,76 @@ -(* TODO: use a better queue for the tasks *) - -module A = Atomic_ +module WSQ = Ws_deque_ include Runner let ( let@ ) = ( @@ ) -(** Thread safe queue, non blocking *) -module TS_queue = struct - type 'a t = { - mutex: Mutex.t; - q: 'a Queue.t; - } - - let create () : _ t = { mutex = Mutex.create (); q = Queue.create () } - - let try_push (self : _ t) x : bool = - if Mutex.try_lock self.mutex then ( - Queue.push x self.q; - Mutex.unlock self.mutex; - true - ) else - false - - let push (self : _ t) x : unit = - Mutex.lock self.mutex; - Queue.push x self.q; - Mutex.unlock self.mutex - - let try_pop ~force_lock (self : _ t) : _ option = - let has_lock = - if force_lock then ( - Mutex.lock self.mutex; - true - ) else - Mutex.try_lock self.mutex - in - if has_lock then ( - match Queue.pop self.q with - | x -> - Mutex.unlock self.mutex; - Some x - | exception Queue.Empty -> - Mutex.unlock self.mutex; - None - ) else - None -end - type thread_loop_wrapper = thread:Thread.t -> pool:t -> (unit -> unit) -> unit -> unit -let global_thread_wrappers_ : thread_loop_wrapper list A.t = A.make [] - -let add_global_thread_loop_wrapper f : unit = - while - let l = A.get global_thread_wrappers_ in - not (A.compare_and_set global_thread_wrappers_ l (f :: l)) - do - Domain_.relax () - done +type worker_state = { + mutable thread: Thread.t; + q: task WSQ.t; (** Work stealing queue *) +} type state = { - active: bool A.t; - threads: Thread.t array; - qs: task TS_queue.t array; - num_tasks: int A.t; - mutex: Mutex.t; - cond: Condition.t; - cur_q: int A.t; (** Selects queue into which to push *) + workers: worker_state array; + main_q: task Bb_queue.t; (** Main queue to block on *) } (** internal state *) -let[@inline] size_ (self : state) = Array.length self.threads -let[@inline] num_tasks_ (self : state) : int = A.get self.num_tasks +let[@inline] size_ (self : state) = Array.length self.workers -let awake_workers_ (self : state) : unit = - Mutex.lock self.mutex; - Condition.broadcast self.cond; - Mutex.unlock self.mutex +let num_tasks_ (self : state) : int = + let n = ref (Bb_queue.size self.main_q) in + Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; + !n + +exception Got_worker of worker_state + +let find_current_worker_ (self : state) : worker_state option = + let self_id = Thread.id @@ Thread.self () in + try + (* see if we're in one of the worker threads *) + for i = 0 to Array.length self.workers - 1 do + let w = self.workers.(i) in + if Thread.id w.thread = self_id then raise_notrace (Got_worker w) + done; + None + with Got_worker w -> Some w (** Run [task] as is, on the pool. *) -let run_direct_ (self : state) (task : task) : unit = - let n_qs = Array.length self.qs in - let offset = A.fetch_and_add self.cur_q 1 in +let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = + match w with + | Some w -> WSQ.push w.q task + | None -> Bb_queue.push self.main_q task - (* push that forces lock acquisition, last resort *) - let[@inline] push_wait f = - let q_idx = offset mod Array.length self.qs in - let q = self.qs.(q_idx) in - TS_queue.push q f +let run_async_ (self : state) (task : task) : unit = + (* stay on current worker if possible *) + let w = find_current_worker_ self in + + let rec run_async_rec_ (task : task) = + let task_with_suspend_ () = + (* run [f()] and handle [suspend] in it *) + Suspend_.with_suspend task ~run:(fun ~with_handler task' -> + if with_handler then + run_async_rec_ task' + else + run_direct_ self w task') + in + run_direct_ self w task_with_suspend_ in - - (try - (* try each queue with a round-robin initial offset *) - for _retry = 1 to 10 do - for i = 0 to n_qs - 1 do - let q_idx = (i + offset) mod Array.length self.qs in - let q = self.qs.(q_idx) in - - if TS_queue.try_push q task then raise_notrace Exit - done - done; - push_wait task - with Exit -> ()); - - (* successfully pushed, now see if we need to wakeup workers *) - let old_num_tasks = A.fetch_and_add self.num_tasks 1 in - if old_num_tasks < size_ self then awake_workers_ self - -let rec run_async_ (self : state) (task : task) : unit = - let task' () = - (* run [f()] and handle [suspend] in it *) - Suspend_.with_suspend task ~run:(fun ~with_handler task -> - if with_handler then - run_async_ self task - else - run_direct_ self task) - in - run_direct_ self task' + run_async_rec_ task let run = run_async -[@@@ifge 5.0] - -(* DLA interop *) -let prepare_for_await () : Dla_.t = - (* current state *) - let st : - ((with_handler:bool -> task -> unit) * Suspend_.suspension) option A.t = - A.make None - in - - let release () : unit = - match A.exchange st None with - | None -> () - | Some (run, k) -> run ~with_handler:true (fun () -> k (Ok ())) - and await () : unit = - Suspend_.suspend - { Suspend_.handle = (fun ~run k -> A.set st (Some (run, k))) } - in - - let t = { Dla_.release; await } in - t - -[@@@else_] - -let prepare_for_await () = { Dla_.release = ignore; await = ignore } - -[@@@endif] - exception Got_task of task type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task -exception Closed - -let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task - ~(offset : int) : unit = - let num_qs = Array.length self.qs in +let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn + ~around_task : unit = let (AT_pair (before_task, after_task)) = around_task in - (* try to get a task that is already in one of the queues. - @param force_lock if true, we force acquisition of the queue's mutex, - which is slower but always succeeds to get a task if there's one. *) - let get_task_already_in_queues ~force_lock () : _ option = - try - for _retry = 1 to 3 do - for i = 0 to num_qs - 1 do - let q = self.qs.((offset + i) mod num_qs) in - match TS_queue.try_pop ~force_lock q with - | Some f -> raise_notrace (Got_task f) - | None -> () - done - done; - None - with Got_task f -> - A.decr self.num_tasks; - Some f - in - - (* slow path: force locking when trying to get tasks, - and wait on [self.cond] if no task is currently available. *) - let pop_blocking () : task = - try - while A.get self.active do - match get_task_already_in_queues ~force_lock:true () with - | Some t -> raise_notrace (Got_task t) - | None -> - Mutex.lock self.mutex; - (* NOTE: be careful about race conditions: we must only - block if the [shutdown] that sets [active] to [false] - has not broadcast over this condition first. Otherwise - we might miss the signal and wait here forever. *) - if A.get self.active then Condition.wait self.cond self.mutex; - Mutex.unlock self.mutex - done; - raise Closed - with Got_task t -> t - in - + (* run this task. *) let run_task task : unit = let _ctx = before_task runner in (* run the task now, catching errors *) @@ -209,51 +81,69 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task after_task runner _ctx in - (* drain the queues from existing tasks. If [force_lock=false] - then it is best effort. *) - let run_tasks_already_present ~force_lock () = + let run_self_tasks_ () = let continue = ref true in + let pop_retries = ref 0 in while !continue do - match get_task_already_in_queues ~force_lock () with - | None -> continue := false - | Some task -> run_task task + match WSQ.pop w.q with + | Some task -> + pop_retries := 0; + run_task task + | None -> + incr pop_retries; + if !pop_retries > 10 then continue := false done in + (* get a task from another worker *) + let try_to_steal_work () : task option = + try + for _retry = 1 to 3 do + Array.iter + (fun w' -> + if w != w' then ( + match WSQ.steal w'.q with + | None -> () + | Some task -> raise_notrace (Got_task task) + )) + self.workers + done; + None + with Got_task task -> Some task + in + let main_loop () = - while A.get self.active do - run_tasks_already_present ~force_lock:false (); + let steal_attempts = ref 0 in + while true do + run_self_tasks_ (); - (* no task available, block until one comes *) - match pop_blocking () with - | exception Closed -> () - | task -> run_task task - done; + match try_to_steal_work () with + | Some task -> + steal_attempts := 0; + run_task task + | None -> + incr steal_attempts; + Domain_.relax (); - (* cleanup *) - run_tasks_already_present ~force_lock:true () + if !steal_attempts > 10 then ( + steal_attempts := 0; + let task = Bb_queue.pop self.main_q in + run_task task + ) + done in try (* handle domain-local await *) - Dla_.using ~prepare_for_await ~while_running:main_loop + Dla_.using ~prepare_for_await:Suspend_.prepare_for_await + ~while_running:main_loop with Bb_queue.Closed -> () let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () -(** We want a reasonable number of queues. Even if your system is - a beast with hundreds of cores, trying - to work-steal through hundreds of queues will have a cost. - - Hence, we limit the number of queues to at most 32 (number picked - via the ancestral technique of the pifomètre). *) -let max_queues = 32 - let shutdown_ ~wait (self : state) : unit = - let was_active = A.exchange self.active false in - (* wake up the subset of [self.threads] that are waiting on new tasks *) - if was_active then awake_workers_ self; - if wait then Array.iter Thread.join self.threads + Bb_queue.close self.main_q; + if wait then Array.iter (fun w -> Thread.join w.thread) self.workers type ('a, 'b) create_args = ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> @@ -286,24 +176,12 @@ let create ?(on_init_thread = default_thread_init_exit_) (* make sure we don't bias towards the first domain(s) in {!D_pool_} *) let offset = Random.int num_domains in - let active = A.make true in - let qs = - let num_qs = min (min num_domains num_threads) max_queues in - Array.init num_qs (fun _ -> TS_queue.create ()) + let workers : worker_state array = + let dummy = Thread.self () in + Array.init num_threads (fun _ -> { thread = dummy; q = WSQ.create () }) in - let pool = - let dummy = Thread.self () in - { - active; - threads = Array.make num_threads dummy; - num_tasks = A.make 0; - qs; - mutex = Mutex.create (); - cond = Condition.create (); - cur_q = A.make 0; - } - in + let pool = { workers; main_q = Bb_queue.create () } in let runner = Runner.For_runner_implementors.create @@ -320,6 +198,7 @@ let create ?(on_init_thread = default_thread_init_exit_) (* start the thread with index [i] *) let start_thread_with_idx i = + let w = pool.workers.(i) in let dom_idx = (offset + i) mod num_domains in (* function run in the thread itself *) @@ -328,17 +207,13 @@ let create ?(on_init_thread = default_thread_init_exit_) let t_id = Thread.id thread in on_init_thread ~dom_id:dom_idx ~t_id (); - let all_wrappers = - List.rev_append thread_wrappers (A.get global_thread_wrappers_) - in - - let run () = worker_thread_ pool runner ~on_exn ~around_task ~offset:i in + let run () = worker_thread_ pool runner w ~on_exn ~around_task in (* the actual worker loop is [worker_thread_], with all wrappers for this pool and for all pools (global_thread_wrappers_) *) let run' = List.fold_left (fun run f -> f ~thread ~pool:runner run) - run all_wrappers + run thread_wrappers in (* now run the main loop *) @@ -368,7 +243,7 @@ let create ?(on_init_thread = default_thread_init_exit_) (* receive the newly created threads back from domains *) for _j = 1 to num_threads do let i, th = Bb_queue.pop receive_threads in - pool.threads.(i) <- th + pool.workers.(i).thread <- th done; runner diff --git a/src/pool.mli b/src/pool.mli index 11cac88b..f7a42633 100644 --- a/src/pool.mli +++ b/src/pool.mli @@ -23,11 +23,6 @@ type thread_loop_wrapper = By default it just returns the same loop function but it can be used to install tracing, effect handlers, etc. *) -val add_global_thread_loop_wrapper : thread_loop_wrapper -> unit -(** [add_global_thread_loop_wrapper f] installs [f] to be installed in every new pool worker - thread, for all existing pools, and all new pools created with [create]. - These wrappers accumulate: they all apply, but their order is not specified. *) - type ('a, 'b) create_args = ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> From c03e342178e6c7466df8107e6850315ceb5b0519 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:21:07 -0400 Subject: [PATCH 14/77] tests: run some tests on both Pool and Simple_pool --- test/dune | 1 + test/t_fib.ml | 28 ++++++++++++++--- test/t_fib_rec.ml | 44 ++++++++++++++++++++------ test/t_props.ml | 77 ++++++++++++++++++++++++--------------------- test/t_resource.ml | 13 ++++++-- test/t_tree_futs.ml | 21 ++++++++++--- 6 files changed, 127 insertions(+), 57 deletions(-) diff --git a/test/dune b/test/dune index e5d032c7..56261dad 100644 --- a/test/dune +++ b/test/dune @@ -9,6 +9,7 @@ t_chan_train t_resource t_unfair + t_ws_deque t_bounded_queue) (libraries moonpool diff --git a/test/t_fib.ml b/test/t_fib.ml index 38e3cb50..32e264e9 100644 --- a/test/t_fib.ml +++ b/test/t_fib.ml @@ -1,5 +1,12 @@ open Moonpool +let ( let@ ) = ( @@ ) + +let with_pool ~kind () f = + match kind with + | `Simple_pool -> Simple_pool.with_ ~min:4 () f + | `Pool -> Pool.with_ ~min:4 () f + let rec fib x = if x <= 1 then 1 @@ -8,8 +15,7 @@ let rec fib x = let () = assert (List.init 10 fib = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) -let run_test () = - let pool = Pool.create ~min:4 () in +let run_test ~pool () = let fibs = Array.init 30 (fun n -> Fut.spawn ~on:pool (fun () -> fib n)) in let res = Fut.join_array fibs |> Fut.wait_block in Pool.shutdown pool; @@ -50,11 +56,23 @@ let run_test () = 832040; |]) -let () = +let run ~kind () = for _i = 1 to 4 do - run_test () + let@ pool = with_pool ~kind () in + run_test ~pool () done; (* now make sure we can do this with multiple pools in parallel *) - let jobs = Array.init 4 (fun _ -> Thread.create run_test ()) in + let jobs = + Array.init 4 (fun _ -> + Thread.create + (fun () -> + let@ pool = with_pool ~kind () in + run_test ~pool ()) + ()) + in Array.iter Thread.join jobs + +let () = + run ~kind:`Pool (); + run ~kind:`Simple_pool () diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index b76fe875..d79e85b4 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -1,4 +1,6 @@ -open Moonpool +open! Moonpool + +let ( let@ ) = ( @@ ) let rec fib_direct x = if x <= 1 then @@ -6,9 +8,13 @@ let rec fib_direct x = else fib_direct (x - 1) + fib_direct (x - 2) +let n_calls_fib_direct = Atomic.make 0 + let rec fib ~on x : int Fut.t = if x <= 18 then - Fut.spawn ~on (fun () -> fib_direct x) + Fut.spawn ~on (fun () -> + Atomic.incr n_calls_fib_direct; + fib_direct x) else let open Fut.Infix_local in let+ t1 = fib ~on (x - 1) and+ t2 = fib ~on (x - 2) in @@ -16,14 +22,19 @@ let rec fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) -let fib_40 : int = - let pool = Pool.create ~min:8 () in - let r = fib ~on:pool 40 |> Fut.wait_block_exn in - Pool.shutdown pool; - r +let fib_40 : int lazy_t = + lazy + (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "fib40" in + let pool = Pool.create ~min:8 () in + let r = fib ~on:pool 40 |> Fut.wait_block_exn in + Pool.shutdown pool; + r) let run_test () = - let pool = Pool.create ~min:8 () in + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "run-test" in + let@ pool = Pool.with_ ~min:8 () in + + let (lazy fib_40) = fib_40 in assert ( List.init 10 (fib ~on:pool) @@ -34,11 +45,26 @@ let run_test () = let fibs = Array.init n_fibs (fun _ -> fib ~on:pool 40) in let res = Fut.join_array fibs |> Fut.wait_block in - Pool.shutdown pool; assert (res = Ok (Array.make n_fibs fib_40)) +let setup_counter () = + if Trace.enabled () then + ignore + (Thread.create + (fun () -> + while true do + Thread.delay 0.01; + Trace.counter_int "n-fib-direct" (Atomic.get n_calls_fib_direct) + done) + () + : Thread.t) + let () = + let@ () = Trace_tef.with_setup () in + setup_counter (); + + let (lazy fib_40) = fib_40 in Printf.printf "fib 40 = %d\n%!" fib_40; for _i = 1 to 2 do run_test () diff --git a/test/t_props.ml b/test/t_props.ml index ae6638ae..01e3c174 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -1,49 +1,54 @@ module Q = QCheck open Moonpool +let ( let@ ) = ( @@ ) let tests = ref [] let add_test t = tests := t :: !tests -(* main pool *) -let pool = Pool.create ~min:4 ~per_domain:1 () - -(* pool for future combinators *) -let pool_fut = Pool.create ~min:2 () - -module Fut2 = (val Fut.infix pool_fut) +let with_pool ~kind () f = + match kind with + | `Simple_pool -> Simple_pool.with_ ~min:4 ~per_domain:1 () f + | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f let () = - add_test - @@ Q.Test.make ~name:"map then join_list" - Q.(small_list small_int) - (fun l -> - let l' = List.map (fun x -> Fut.spawn ~on:pool (fun () -> x + 1)) l in - let l' = Fut.join_list l' |> Fut.wait_block_exn in - if l' <> List.map succ l then Q.Test.fail_reportf "bad list"; - true) + add_test @@ fun ~kind -> + let@ pool = with_pool ~kind () in + Q.Test.make ~name:"map then join_list" + Q.(small_list small_int) + (fun l -> + let l' = List.map (fun x -> Fut.spawn ~on:pool (fun () -> x + 1)) l in + let l' = Fut.join_list l' |> Fut.wait_block_exn in + if l' <> List.map succ l then Q.Test.fail_reportf "bad list"; + true) let () = - add_test - @@ Q.Test.make ~name:"map bind" - Q.(small_list small_int) - (fun l -> - let open Fut2 in - let l' = - l - |> List.map (fun x -> - let* x = Fut.spawn ~on:pool_fut (fun () -> x + 1) in - let* y = Fut.return (x - 1) in - let+ z = Fut.spawn ~on:pool_fut (fun () -> string_of_int y) in - z) - in + add_test @@ fun ~kind -> + let@ pool = with_pool ~kind () in + Q.Test.make ~name:"map bind" + Q.(small_list small_int) + (fun l -> + let open Fut.Infix_local in + let l' = + l + |> List.map (fun x -> + let* x = Fut.spawn ~on:pool (fun () -> x + 1) in + let* y = Fut.return (x - 1) in + let+ z = Fut.spawn ~on:pool (fun () -> string_of_int y) in + z) + in - Fut.wait_list l' |> Fut.wait_block_exn; + Fut.wait_list l' |> Fut.wait_block_exn; - let l_res = List.map Fut.get_or_fail_exn l' in - if l_res <> List.map string_of_int l then - Q.Test.fail_reportf "bad list: from %s, to %s" - Q.Print.(list int l) - Q.Print.(list string l_res); - true) + let l_res = List.map Fut.get_or_fail_exn l' in + if l_res <> List.map string_of_int l then + Q.Test.fail_reportf "bad list: from %s, to %s" + Q.Print.(list int l) + Q.Print.(list string l_res); + true) -let () = QCheck_base_runner.run_tests_main !tests +let () = + let tests = + List.map (fun t -> [ t ~kind:`Simple_pool; t ~kind:`Pool ]) !tests + |> List.flatten + in + QCheck_base_runner.run_tests_main tests diff --git a/test/t_resource.ml b/test/t_resource.ml index a9686867..01f8be57 100644 --- a/test/t_resource.ml +++ b/test/t_resource.ml @@ -2,8 +2,13 @@ open! Moonpool let ( let@ ) = ( @@ ) +let with_pool ~kind () f = + match kind with + | `Simple_pool -> Simple_pool.with_ ~min:4 ~per_domain:1 () f + | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f + (* test proper resource handling *) -let () = +let run ~kind () = let@ () = Trace_tef.with_setup () in let a = Atomic.make 0 in for _i = 1 to 1_000 do @@ -12,7 +17,11 @@ let () = if _i mod 100 = 0 then Thread.delay 0.8; (* allocate a new pool at each iteration *) - let@ p = Pool.with_ ~min:4 () in + let@ p = with_pool ~kind () in Pool.run_wait_block p (fun () -> Atomic.incr a) done; assert (Atomic.get a = 1_000) + +let () = + run ~kind:`Pool (); + run ~kind:`Simple_pool () diff --git a/test/t_tree_futs.ml b/test/t_tree_futs.ml index 5ebf2bff..56bde804 100644 --- a/test/t_tree_futs.ml +++ b/test/t_tree_futs.ml @@ -2,6 +2,11 @@ open Moonpool let ( let@ ) = ( @@ ) +let with_pool ~kind ~j () f = + match kind with + | `Simple_pool -> Simple_pool.with_ ~min:j () f + | `Pool -> Pool.with_ ~min:j () f + type 'a tree = | Leaf of 'a | Node of 'a tree Fut.t * 'a tree Fut.t @@ -61,15 +66,13 @@ let stat_thread () = done) () -let () = - (* - Tracy_client_trace.setup (); - *) +let run_main ~kind () = + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "run_main" in let start = Unix.gettimeofday () in let n = try int_of_string (Sys.getenv "N") with _ -> default_n in let j = try int_of_string (Sys.getenv "J") with _ -> 4 in - let pool = Pool.create ~min:j () in + let@ pool = with_pool ~kind ~j () in ignore (stat_thread () : Thread.t); Printf.printf "n=%d, j=%d\n%!" n j; @@ -79,3 +82,11 @@ let () = assert (n1 = 1 lsl (n - 1)); assert (n2 = 1 lsl (n - 1)); () + +let () = + let@ () = Trace_tef.with_setup () in + (* + Tracy_client_trace.setup (); + *) + run_main ~kind:`Pool (); + run_main ~kind:`Simple_pool () From 73c2f9768cd8dfffafbd682b37a5211ad171f4b7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:21:29 -0400 Subject: [PATCH 15/77] benchs: run with both pool and simple_pool --- Makefile | 12 +++++++----- benchs/dune | 2 +- benchs/fib_rec.ml | 16 +++++++++++++--- benchs/pi.ml | 35 ++++++++++++++++++++++++----------- 4 files changed, 45 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index b1acca61..3e63a195 100644 --- a/Makefile +++ b/Makefile @@ -22,22 +22,24 @@ watch: DUNE_OPTS_BENCH?=--profile=release N?=40 -NITER?=3 +NITER?=2 BENCH_PSIZE?=1,4,8,20 +BENCH_KIND?=simple,pool BENCH_CUTOFF?=20 bench-fib: @echo running for N=$(N) dune build $(DUNE_OPTS_BENCH) benchs/fib_rec.exe - hyperfine -L psize $(BENCH_PSIZE) --warmup=1 \ - './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -n $(N)' + hyperfine -L psize $(BENCH_PSIZE) -L kind $(BENCH_KIND) --warmup=1 \ + './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -kind={kind} -n $(N)' PI_NSTEPS?=100_000_000 PI_MODES?=seq,par1,forkjoin +PI_KIND?=simple,pool bench-pi: @echo running for N=$(PI_NSTEPS) dune build $(DUNE_OPTS_BENCH) benchs/pi.exe - hyperfine -L mode $(PI_MODES) --warmup=1 \ - './_build/default/benchs/pi.exe -mode={mode} -n $(PI_NSTEPS)' + hyperfine -L mode $(PI_MODES) -L kind $(PI_KIND) --warmup=1 \ + './_build/default/benchs/pi.exe -mode={mode} -kind={kind} -n $(PI_NSTEPS)' .PHONY: test clean bench-fib bench-pi diff --git a/benchs/dune b/benchs/dune index 2c798176..0ae20bf3 100644 --- a/benchs/dune +++ b/benchs/dune @@ -3,4 +3,4 @@ (names fib_rec pi) (preprocess (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) - (libraries moonpool unix)) + (libraries moonpool unix trace trace-tef)) diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 1a3d3288..57a444f0 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -18,8 +18,14 @@ let rec fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) -let run ~psize ~n ~seq ~niter () : unit = - let pool = lazy (Pool.create ~min:psize ()) in +let create_pool ~psize ~kind () = + match kind with + | "simple" -> Simple_pool.create ~min:psize () + | "pool" -> Pool.create ~min:psize () + | _ -> assert false + +let run ~psize ~n ~seq ~niter ~kind () : unit = + let pool = lazy (create_pool ~kind ~psize ()) in for _i = 1 to niter do let res = if seq then ( @@ -39,6 +45,7 @@ let () = let psize = ref 16 in let seq = ref false in let niter = ref 3 in + let kind = ref "pool" in let opts = [ "-psize", Arg.Set_int psize, " pool size"; @@ -46,9 +53,12 @@ let () = "-seq", Arg.Set seq, " sequential"; "-niter", Arg.Set_int niter, " number of iterations"; "-cutoff", Arg.Set_int cutoff, " cutoff for sequential computation"; + ( "-kind", + Arg.Symbol ([ "pool"; "simple" ], ( := ) kind), + " pick pool implementation" ); ] |> Arg.align in Arg.parse opts ignore ""; - run ~psize:!psize ~n:!n ~seq:!seq ~niter:!niter () + run ~psize:!psize ~n:!n ~seq:!seq ~niter:!niter ~kind:!kind () diff --git a/benchs/pi.ml b/benchs/pi.ml index 1dd55fb9..36b4e92a 100644 --- a/benchs/pi.ml +++ b/benchs/pi.ml @@ -17,15 +17,23 @@ let run_sequential (num_steps : int) : float = pi (** Create a pool *) -let with_pool f = - if !j = 0 then - Pool.with_ ~per_domain:1 f - else - Pool.with_ ~min:!j f +let with_pool ~kind f = + match kind with + | "pool" -> + if !j = 0 then + Pool.with_ ~per_domain:1 f + else + Pool.with_ ~min:!j f + | "simple" -> + if !j = 0 then + Simple_pool.with_ ~per_domain:1 f + else + Simple_pool.with_ ~min:!j f + | _ -> assert false (** Run in parallel using {!Fut.for_} *) -let run_par1 (num_steps : int) : float = - let@ pool = with_pool () in +let run_par1 ~kind (num_steps : int) : float = + let@ pool = with_pool ~kind () in let num_tasks = Pool.size pool in @@ -53,8 +61,8 @@ let run_par1 (num_steps : int) : float = [@@@ifge 5.0] -let run_fork_join num_steps : float = - let@ pool = with_pool () in +let run_fork_join ~kind num_steps : float = + let@ pool = with_pool ~kind () in let num_tasks = Pool.size pool in @@ -90,9 +98,11 @@ type mode = | Fork_join let () = + let@ () = Trace_tef.with_setup () in let mode = ref Sequential in let n = ref 1000 in let time = ref false in + let kind = ref "pool" in let set_mode = function | "seq" -> mode := Sequential @@ -109,6 +119,9 @@ let () = " mode of execution" ); "-j", Arg.Set_int j, " number of threads"; "-t", Arg.Set time, " printing timing"; + ( "-kind", + Arg.Symbol ([ "pool"; "simple" ], ( := ) kind), + " pick pool implementation" ); ] |> Arg.align in @@ -118,8 +131,8 @@ let () = let res = match !mode with | Sequential -> run_sequential !n - | Par1 -> run_par1 !n - | Fork_join -> run_fork_join !n + | Par1 -> run_par1 ~kind:!kind !n + | Fork_join -> run_fork_join ~kind:!kind !n in let elapsed : float = Unix.gettimeofday () -. t_start in From 3bfc4cdcc7c11d234d80297e8c3e1581ed2a7146 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:28:16 -0400 Subject: [PATCH 16/77] more test --- test/t_unfair.ml | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/test/t_unfair.ml b/test/t_unfair.ml index 81271046..5d22a663 100644 --- a/test/t_unfair.ml +++ b/test/t_unfair.ml @@ -8,18 +8,20 @@ let sleep_for f () = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "sleep" in Thread.delay f -let () = - let@ () = Trace_tef.with_setup () in - let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main" in +let run ~kind () = + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "run" in let pool = - Pool.create ~min:3 - ~on_init_thread:(fun ~dom_id:_ ~t_id () -> - Trace.set_thread_name (Printf.sprintf "pool worker %d" t_id)) - ~around_task: - ( (fun self -> Trace.counter_int "n_tasks" (Pool.num_tasks self)), - fun self () -> Trace.counter_int "n_tasks" (Pool.num_tasks self) ) - () + let on_init_thread ~dom_id:_ ~t_id () = + Trace.set_thread_name (Printf.sprintf "pool worker %d" t_id) + and around_task = + ( (fun self -> Trace.counter_int "n_tasks" (Pool.num_tasks self)), + fun self () -> Trace.counter_int "n_tasks" (Pool.num_tasks self) ) + in + + match kind with + | `Simple -> Simple_pool.create ~min:3 ~on_init_thread ~around_task () + | `Pool -> Pool.create ~min:3 ~on_init_thread ~around_task () in (* make all threads busy *) @@ -42,3 +44,9 @@ let () = let elapsed = Unix.gettimeofday () -. t in Printf.printf "elapsed: %.4fs\n%!" elapsed + +let () = + let@ () = Trace_tef.with_setup () in + let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main" in + run ~kind:`Simple (); + run ~kind:`Pool () From ef05146e03b2aeb7109140ba1ec95da69c235b3c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 00:28:19 -0400 Subject: [PATCH 17/77] tweak retry thresholds in pool --- src/pool.ml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index 4ce08f76..4dda7141 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -66,6 +66,12 @@ exception Got_task of task type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task +(** How many times in a row do we try to read the next local task? *) +let run_self_task_max_retry = 5 + +(** How many times in a row do we try to do work-stealing? *) +let steal_attempt_max_retry = 5 + let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn ~around_task : unit = let (AT_pair (before_task, after_task)) = around_task in @@ -90,8 +96,9 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn pop_retries := 0; run_task task | None -> + Domain_.relax (); incr pop_retries; - if !pop_retries > 10 then continue := false + if !pop_retries > run_self_task_max_retry then continue := false done in @@ -125,7 +132,7 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn incr steal_attempts; Domain_.relax (); - if !steal_attempts > 10 then ( + if !steal_attempts > steal_attempt_max_retry then ( steal_attempts := 0; let task = Bb_queue.pop self.main_q in run_task task From 91c0c3f6c14f05d040fcacb14f6425c10751449d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 01:01:51 -0400 Subject: [PATCH 18/77] fix props --- test/t_props.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/t_props.ml b/test/t_props.ml index 01e3c174..403f2534 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -12,10 +12,10 @@ let with_pool ~kind () f = let () = add_test @@ fun ~kind -> - let@ pool = with_pool ~kind () in Q.Test.make ~name:"map then join_list" Q.(small_list small_int) (fun l -> + let@ pool = with_pool ~kind () in let l' = List.map (fun x -> Fut.spawn ~on:pool (fun () -> x + 1)) l in let l' = Fut.join_list l' |> Fut.wait_block_exn in if l' <> List.map succ l then Q.Test.fail_reportf "bad list"; @@ -23,10 +23,10 @@ let () = let () = add_test @@ fun ~kind -> - let@ pool = with_pool ~kind () in Q.Test.make ~name:"map bind" Q.(small_list small_int) (fun l -> + let@ pool = with_pool ~kind () in let open Fut.Infix_local in let l' = l From 078adae786b4711587e5fc90d2b1aca1f3b749bf Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 09:28:18 -0400 Subject: [PATCH 19/77] limit CI time --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e54322e6..56eecb58 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,6 +9,7 @@ on: jobs: run: name: build + timeout-minutes: 10 strategy: fail-fast: true matrix: From 78407c495d79f4b275979619e1506ca01782a881 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 09:56:56 -0400 Subject: [PATCH 20/77] more tests for WS_deque --- test/t_ws_deque.ml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/test/t_ws_deque.ml b/test/t_ws_deque.ml index 4a364166..73f1f6f5 100644 --- a/test/t_ws_deque.ml +++ b/test/t_ws_deque.ml @@ -92,8 +92,35 @@ let t_heavy () = assert (ref_sum = sum); () +let t_many () = + print_endline "pushing many elements"; + let d = D.create () in + + let push_and_pop count = + for i = 1 to count do + (* if i mod 100_000 = 0 then Printf.printf "push %d\n%!" i; *) + D.push d i + done; + let n = ref 0 in + + let continue = ref true in + while !continue do + match D.pop d with + | None -> continue := false + | Some _ -> incr n + done; + assert (!n = count) + in + + push_and_pop 10_000; + push_and_pop 100_000; + push_and_pop 100_000_000; + print_endline "pushing many elements: ok"; + () + let () = let@ () = Trace_tef.with_setup () in t_simple (); t_heavy (); + t_many (); () From 3d7e272d015be449d184a3443aae3a9c551e80b9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 09:57:05 -0400 Subject: [PATCH 21/77] perf ws_deque: use bitmasks instead of modulo op --- src/ws_deque_.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index be5e4b47..6dfd51d6 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -30,12 +30,13 @@ end = struct { log_size; arr = Array.make (1 lsl log_size) None } let[@inline] get (self : _ t) (i : int) : 'a = - match Array.unsafe_get self.arr (i mod size self) with + match Array.unsafe_get self.arr (i land ((1 lsl self.log_size) - 1)) with | Some x -> x | None -> assert false let[@inline] set (self : 'a t) (i : int) (x : 'a) : unit = - Array.unsafe_set self.arr (i mod size self) (Some x) + assert (i >= 0); + Array.unsafe_set self.arr (i land ((1 lsl self.log_size) - 1)) (Some x) let grow (self : _ t) ~bottom ~top : 'a t = let new_arr = create ~log_size:(self.log_size + 1) () in @@ -73,6 +74,7 @@ let push (self : 'a t) (x : 'a) : unit = Only if it seems too big do we actually read [t]. *) let size_approx = b - t_approx in if size_approx >= CA.size self.arr - 1 then ( + (* we need to read the actual value of [top], which might entail contention. *) let t = A.get self.top in self.top_cached <- t; let size = b - t in From fdc188c291d45f46a7ddd1401365b483b204d9a7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 09:57:19 -0400 Subject: [PATCH 22/77] wip: debug pool --- src/pool.ml | 72 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index 4dda7141..ebc1cab1 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -40,8 +40,12 @@ let find_current_worker_ (self : state) : worker_state option = (** Run [task] as is, on the pool. *) let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = match w with - | Some w -> WSQ.push w.q task - | None -> Bb_queue.push self.main_q task + | Some w -> + print_endline "push local"; + WSQ.push w.q task + | None -> + print_endline "push blocking"; + Bb_queue.push self.main_q task let run_async_ (self : state) (task : task) : unit = (* stay on current worker if possible *) @@ -88,6 +92,7 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn in let run_self_tasks_ () = + print_endline "run self tasks"; let continue = ref true in let pop_retries = ref 0 in while !continue do @@ -104,6 +109,7 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn (* get a task from another worker *) let try_to_steal_work () : task option = + print_endline "try to steal work"; try for _retry = 1 to 3 do Array.iter @@ -119,32 +125,46 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn with Got_task task -> Some task in - let main_loop () = - let steal_attempts = ref 0 in - while true do - run_self_tasks_ (); - - match try_to_steal_work () with - | Some task -> - steal_attempts := 0; - run_task task - | None -> - incr steal_attempts; - Domain_.relax (); - - if !steal_attempts > steal_attempt_max_retry then ( - steal_attempts := 0; - let task = Bb_queue.pop self.main_q in - run_task task - ) - done + (* try to steal work multiple times *) + let try_to_steal_work_loop () : bool = + try + let unsuccessful_steal_attempts = ref 0 in + while !unsuccessful_steal_attempts < steal_attempt_max_retry do + match try_to_steal_work () with + | Some task -> + run_task task; + raise_notrace Exit + | None -> + incr unsuccessful_steal_attempts; + Domain_.relax () + done; + false + with Exit -> true in - try - (* handle domain-local await *) - Dla_.using ~prepare_for_await:Suspend_.prepare_for_await - ~while_running:main_loop - with Bb_queue.Closed -> () + let main_loop () = + (try + while true do + run_self_tasks_ (); + + if not (try_to_steal_work_loop ()) then ( + Array.iteri + (fun i w -> Printf.printf "w[%d].q.size=%d\n" i (WSQ.size w.q)) + self.workers; + Printf.printf "bq.size=%d\n%!" (Bb_queue.size self.main_q); + + print_endline "wait block"; + let task = Bb_queue.pop self.main_q in + run_task task + ) + done + with Bb_queue.Closed -> ()); + run_self_tasks_ () + in + + (* handle domain-local await *) + Dla_.using ~prepare_for_await:Suspend_.prepare_for_await + ~while_running:main_loop let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () From e0d3a18562313281780d0b2948b801aaddf4d0f8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 09:57:25 -0400 Subject: [PATCH 23/77] improve test a bit --- test/effect-based/t_many.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/effect-based/t_many.ml b/test/effect-based/t_many.ml index c9cad3c6..8b5b76e3 100644 --- a/test/effect-based/t_many.ml +++ b/test/effect-based/t_many.ml @@ -2,9 +2,10 @@ open Moonpool -let pool = Pool.create ~min:4 () +let ( let@ ) = ( @@ ) let run () = + let@ pool = Pool.with_ ~min:4 () in let t1 = Unix.gettimeofday () in let n = 200_000 in From db33bec13f0c311690c3aee32293ff3c99ab2717 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 12:11:41 -0400 Subject: [PATCH 24/77] wip: better work stealing pool --- src/pool.ml | 112 +++++++++++++++++++++++++++++++++++++-------------- src/pool.mli | 2 + 2 files changed, 84 insertions(+), 30 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index ebc1cab1..b44ce6cf 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -1,4 +1,5 @@ module WSQ = Ws_deque_ +module A = Atomic_ include Runner let ( let@ ) = ( @@ ) @@ -11,20 +12,30 @@ type worker_state = { q: task WSQ.t; (** Work stealing queue *) } +type mut_cond = { + mutex: Mutex.t; + cond: Condition.t; +} + type state = { + active: bool Atomic.t; workers: worker_state array; - main_q: task Bb_queue.t; (** Main queue to block on *) + main_q: task Queue.t; (** Main queue to block on *) + mc: mut_cond; } (** internal state *) let[@inline] size_ (self : state) = Array.length self.workers let num_tasks_ (self : state) : int = - let n = ref (Bb_queue.size self.main_q) in + Mutex.lock self.mc.mutex; + let n = ref (Queue.length self.main_q) in + Mutex.unlock self.mc.mutex; Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; !n exception Got_worker of worker_state +exception Closed = Bb_queue.Closed let find_current_worker_ (self : state) : worker_state option = let self_id = Thread.id @@ Thread.self () in @@ -41,11 +52,22 @@ let find_current_worker_ (self : state) : worker_state option = let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = match w with | Some w -> - print_endline "push local"; - WSQ.push w.q task + WSQ.push w.q task; + + (* see if we need to wakeup other workers to come and steal from us *) + Mutex.lock self.mc.mutex; + if Queue.is_empty self.main_q then Condition.broadcast self.mc.cond; + Mutex.unlock self.mc.mutex | None -> - print_endline "push blocking"; - Bb_queue.push self.main_q task + if A.get self.active then ( + (* push into the main queue *) + Mutex.lock self.mc.mutex; + let was_empty = Queue.is_empty self.main_q in + Queue.push task self.main_q; + if was_empty then Condition.broadcast self.mc.cond; + Mutex.unlock self.mc.mutex + ) else + raise Bb_queue.Closed let run_async_ (self : state) (task : task) : unit = (* stay on current worker if possible *) @@ -74,7 +96,7 @@ type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task let run_self_task_max_retry = 5 (** How many times in a row do we try to do work-stealing? *) -let steal_attempt_max_retry = 5 +let steal_attempt_max_retry = 7 let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn ~around_task : unit = @@ -92,7 +114,6 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn in let run_self_tasks_ () = - print_endline "run self tasks"; let continue = ref true in let pop_retries = ref 0 in while !continue do @@ -107,23 +128,34 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn done in + let work_steal_offset = ref 0 in + (* get a task from another worker *) - let try_to_steal_work () : task option = - print_endline "try to steal work"; + let rec try_to_steal_work () : task option = + let i = !work_steal_offset in + work_steal_offset := (i + 1) mod Array.length self.workers; + let w' = self.workers.(i) in + if w == w' then + try_to_steal_work () + else + WSQ.steal w'.q + in + + (* try - for _retry = 1 to 3 do - Array.iter - (fun w' -> - if w != w' then ( - match WSQ.steal w'.q with - | None -> () - | Some task -> raise_notrace (Got_task task) - )) - self.workers + for _retry = 1 to 1 do + for i = 0 to Array.length self.workers - 1 do + let w' = self.workers.(i) in + if w != w' then ( + match WSQ.steal w'.q with + | None -> () + | Some task -> raise_notrace (Got_task task) + ) + done done; None with Got_task task -> Some task - in + *) (* try to steal work multiple times *) let try_to_steal_work_loop () : bool = @@ -142,19 +174,28 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn with Exit -> true in + let get_task_from_main_queue_block () : task = + try + Mutex.lock self.mc.mutex; + while A.get self.active do + match Queue.pop self.main_q with + | exception Queue.Empty -> Condition.wait self.mc.cond self.mc.mutex + | task -> + Mutex.unlock self.mc.mutex; + raise_notrace (Got_task task) + done; + Mutex.unlock self.mc.mutex; + raise Bb_queue.Closed + with Got_task t -> t + in + let main_loop () = (try while true do run_self_tasks_ (); if not (try_to_steal_work_loop ()) then ( - Array.iteri - (fun i w -> Printf.printf "w[%d].q.size=%d\n" i (WSQ.size w.q)) - self.workers; - Printf.printf "bq.size=%d\n%!" (Bb_queue.size self.main_q); - - print_endline "wait block"; - let task = Bb_queue.pop self.main_q in + let task = get_task_from_main_queue_block () in run_task task ) done @@ -169,8 +210,12 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () let shutdown_ ~wait (self : state) : unit = - Bb_queue.close self.main_q; - if wait then Array.iter (fun w -> Thread.join w.thread) self.workers + if A.exchange self.active false then ( + Mutex.lock self.mc.mutex; + Condition.broadcast self.mc.cond; + Mutex.unlock self.mc.mutex; + if wait then Array.iter (fun w -> Thread.join w.thread) self.workers + ) type ('a, 'b) create_args = ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> @@ -208,7 +253,14 @@ let create ?(on_init_thread = default_thread_init_exit_) Array.init num_threads (fun _ -> { thread = dummy; q = WSQ.create () }) in - let pool = { workers; main_q = Bb_queue.create () } in + let pool = + { + active = A.make true; + workers; + main_q = Queue.create (); + mc = { mutex = Mutex.create (); cond = Condition.create () }; + } + in let runner = Runner.For_runner_implementors.create diff --git a/src/pool.mli b/src/pool.mli index f7a42633..ae6699b2 100644 --- a/src/pool.mli +++ b/src/pool.mli @@ -34,6 +34,8 @@ type ('a, 'b) create_args = 'a (** Arguments used in {!create}. See {!create} for explanations. *) +exception Closed + val create : (unit -> t, _) create_args (** [create ()] makes a new thread pool. @param on_init_thread called at the beginning of each new thread From 6452ca89d1f46cf4d234a3f3d47b0ea418f6ba1d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 21:55:29 -0400 Subject: [PATCH 25/77] rename `Simple_pool` into `Fifo_pool`, update doc --- Makefile | 4 ++-- benchs/fib_rec.ml | 4 ++-- benchs/pi.ml | 8 ++++---- src/{simple_pool.ml => fifo_pool.ml} | 0 src/{simple_pool.mli => fifo_pool.mli} | 16 ++++++++++++---- src/moonpool.ml | 2 +- src/moonpool.mli | 2 +- test/t_fib.ml | 4 ++-- test/t_props.ml | 4 ++-- test/t_resource.ml | 4 ++-- test/t_tree_futs.ml | 4 ++-- test/t_unfair.ml | 2 +- 12 files changed, 31 insertions(+), 23 deletions(-) rename src/{simple_pool.ml => fifo_pool.ml} (100%) rename src/{simple_pool.mli => fifo_pool.mli} (65%) diff --git a/Makefile b/Makefile index 3e63a195..8e08c4b8 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ DUNE_OPTS_BENCH?=--profile=release N?=40 NITER?=2 BENCH_PSIZE?=1,4,8,20 -BENCH_KIND?=simple,pool +BENCH_KIND?=fifo,pool BENCH_CUTOFF?=20 bench-fib: @echo running for N=$(N) @@ -34,7 +34,7 @@ bench-fib: PI_NSTEPS?=100_000_000 PI_MODES?=seq,par1,forkjoin -PI_KIND?=simple,pool +PI_KIND?=fifo,pool bench-pi: @echo running for N=$(PI_NSTEPS) dune build $(DUNE_OPTS_BENCH) benchs/pi.exe diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 57a444f0..385bfed4 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -20,7 +20,7 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let create_pool ~psize ~kind () = match kind with - | "simple" -> Simple_pool.create ~min:psize () + | "fifo" -> Fifo_pool.create ~min:psize () | "pool" -> Pool.create ~min:psize () | _ -> assert false @@ -54,7 +54,7 @@ let () = "-niter", Arg.Set_int niter, " number of iterations"; "-cutoff", Arg.Set_int cutoff, " cutoff for sequential computation"; ( "-kind", - Arg.Symbol ([ "pool"; "simple" ], ( := ) kind), + Arg.Symbol ([ "pool"; "fifo" ], ( := ) kind), " pick pool implementation" ); ] |> Arg.align diff --git a/benchs/pi.ml b/benchs/pi.ml index 36b4e92a..01017ae9 100644 --- a/benchs/pi.ml +++ b/benchs/pi.ml @@ -24,11 +24,11 @@ let with_pool ~kind f = Pool.with_ ~per_domain:1 f else Pool.with_ ~min:!j f - | "simple" -> + | "fifo" -> if !j = 0 then - Simple_pool.with_ ~per_domain:1 f + Fifo_pool.with_ ~per_domain:1 f else - Simple_pool.with_ ~min:!j f + Fifo_pool.with_ ~min:!j f | _ -> assert false (** Run in parallel using {!Fut.for_} *) @@ -120,7 +120,7 @@ let () = "-j", Arg.Set_int j, " number of threads"; "-t", Arg.Set time, " printing timing"; ( "-kind", - Arg.Symbol ([ "pool"; "simple" ], ( := ) kind), + Arg.Symbol ([ "pool"; "fifo" ], ( := ) kind), " pick pool implementation" ); ] |> Arg.align diff --git a/src/simple_pool.ml b/src/fifo_pool.ml similarity index 100% rename from src/simple_pool.ml rename to src/fifo_pool.ml diff --git a/src/simple_pool.mli b/src/fifo_pool.mli similarity index 65% rename from src/simple_pool.mli rename to src/fifo_pool.mli index b7f89824..252083c5 100644 --- a/src/simple_pool.mli +++ b/src/fifo_pool.mli @@ -1,8 +1,16 @@ -(** A simple thread pool. +(** A simple thread pool in FIFO order. - This uses a single blocking queue to manage tasks, it's very - simple and reliable. Like {!Pool} it distributes a fixed number - of workers over several domains. + FIFO: first-in, first-out. Basically tasks are put into a queue, + and worker threads pull them out of the queue at the other end. + + Since this uses a single blocking queue to manage tasks, it's very + simple and reliable. The number of worker threads is fixed, but + they are spread over several domains to enable parallelism. + + This can be useful for latency-sensitive applications (e.g. as a + pool of workers for network servers). Work-stealing pools might + have higher throughput but they're very unfair to some tasks; by + contrast, here, older tasks have priority over younger tasks. @since NEXT_RELEASE *) diff --git a/src/moonpool.ml b/src/moonpool.ml index 97da4d2a..fb0a3661 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -11,7 +11,7 @@ module Fut = Fut module Lock = Lock module Pool = Pool module Runner = Runner -module Simple_pool = Simple_pool +module Fifo_pool = Fifo_pool module Private = struct module Ws_deque_ = Ws_deque_ diff --git a/src/moonpool.mli b/src/moonpool.mli index 74b48772..66b3164a 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -5,7 +5,7 @@ *) module Pool = Pool -module Simple_pool = Simple_pool +module Fifo_pool = Fifo_pool module Runner = Runner val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t diff --git a/test/t_fib.ml b/test/t_fib.ml index 32e264e9..f54d7118 100644 --- a/test/t_fib.ml +++ b/test/t_fib.ml @@ -4,7 +4,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with - | `Simple_pool -> Simple_pool.with_ ~min:4 () f + | `Fifo_pool -> Fifo_pool.with_ ~min:4 () f | `Pool -> Pool.with_ ~min:4 () f let rec fib x = @@ -75,4 +75,4 @@ let run ~kind () = let () = run ~kind:`Pool (); - run ~kind:`Simple_pool () + run ~kind:`Fifo_pool () diff --git a/test/t_props.ml b/test/t_props.ml index 403f2534..be586251 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -7,7 +7,7 @@ let add_test t = tests := t :: !tests let with_pool ~kind () f = match kind with - | `Simple_pool -> Simple_pool.with_ ~min:4 ~per_domain:1 () f + | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f let () = @@ -48,7 +48,7 @@ let () = let () = let tests = - List.map (fun t -> [ t ~kind:`Simple_pool; t ~kind:`Pool ]) !tests + List.map (fun t -> [ t ~kind:`Fifo_pool; t ~kind:`Pool ]) !tests |> List.flatten in QCheck_base_runner.run_tests_main tests diff --git a/test/t_resource.ml b/test/t_resource.ml index 01f8be57..005ed4c3 100644 --- a/test/t_resource.ml +++ b/test/t_resource.ml @@ -4,7 +4,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with - | `Simple_pool -> Simple_pool.with_ ~min:4 ~per_domain:1 () f + | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f (* test proper resource handling *) @@ -24,4 +24,4 @@ let run ~kind () = let () = run ~kind:`Pool (); - run ~kind:`Simple_pool () + run ~kind:`Fifo_pool () diff --git a/test/t_tree_futs.ml b/test/t_tree_futs.ml index 56bde804..83a9d80c 100644 --- a/test/t_tree_futs.ml +++ b/test/t_tree_futs.ml @@ -4,7 +4,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind ~j () f = match kind with - | `Simple_pool -> Simple_pool.with_ ~min:j () f + | `Fifo_pool -> Fifo_pool.with_ ~min:j () f | `Pool -> Pool.with_ ~min:j () f type 'a tree = @@ -89,4 +89,4 @@ let () = Tracy_client_trace.setup (); *) run_main ~kind:`Pool (); - run_main ~kind:`Simple_pool () + run_main ~kind:`Fifo_pool () diff --git a/test/t_unfair.ml b/test/t_unfair.ml index 5d22a663..b6dc5884 100644 --- a/test/t_unfair.ml +++ b/test/t_unfair.ml @@ -20,7 +20,7 @@ let run ~kind () = in match kind with - | `Simple -> Simple_pool.create ~min:3 ~on_init_thread ~around_task () + | `Simple -> Fifo_pool.create ~min:3 ~on_init_thread ~around_task () | `Pool -> Pool.create ~min:3 ~on_init_thread ~around_task () in From a89c0ce4f24cb6702dcbdd9628b89beba253f2c9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 21:59:22 -0400 Subject: [PATCH 26/77] breaking: make Runner.t abstract --- src/moonpool.mli | 7 ++++++- src/runner.mli | 17 ++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/moonpool.mli b/src/moonpool.mli index 66b3164a..6edeb1e7 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -1,7 +1,12 @@ (** Moonpool A pool within a bigger pool (ie the ocean). Here, we're talking about - pools of [Thread.t] which live within a fixed pool of [Domain.t]. + pools of [Thread.t] that are dispatched over several [Domain.t] to + enable parallelism. + + We provide several implementations of pools + with distinct scheduling strategies, alongside some concurrency + primitives such as guarding locks ({!Lock.t}) and futures ({!Fut.t}). *) module Pool = Pool diff --git a/src/runner.mli b/src/runner.mli index cda20720..3ac2f724 100644 --- a/src/runner.mli +++ b/src/runner.mli @@ -1,17 +1,13 @@ -(** Abstract runner. +(** Interface for runners. - This provides an abstraction for running tasks in the background. + This provides an abstraction for running tasks in the background, + which is implemented by various thread pools. @since 0.3 *) type task = unit -> unit -type t = private { - run_async: task -> unit; - shutdown: wait:bool -> unit -> unit; - size: unit -> int; - num_tasks: unit -> int; -} +type t (** A runner. If a runner is no longer needed, {!shutdown} can be used to signal all @@ -50,8 +46,11 @@ val run_wait_block : t -> (unit -> 'a) -> 'a and returns its result. If [f()] raises an exception, then [run_wait_block pool f] will raise it as well. - {b NOTE} be careful with deadlocks (see notes in {!Fut.wait_block}). *) + {b NOTE} be careful with deadlocks (see notes in {!Fut.wait_block} + about the required discipline to avoid deadlocks). *) +(** This module is specifically intended for users who implement their + own runners. Regular users of Moonpool should not need to look at it. *) module For_runner_implementors : sig val create : size:(unit -> int) -> From 9a1309c44fe1d7ab82c2af6279bba7581fa0c841 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:03:35 -0400 Subject: [PATCH 27/77] chore: add keywords in opam --- dune-project | 2 +- moonpool.opam | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dune-project b/dune-project index 18c5ade8..ddf01fef 100644 --- a/dune-project +++ b/dune-project @@ -30,6 +30,6 @@ (depopts (domain-local-await (>= 0.2))) (tags - (thread pool domain))) + (thread pool domain futures fork-join))) ; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/moonpool.opam b/moonpool.opam index 3a1be0b0..547b18c1 100644 --- a/moonpool.opam +++ b/moonpool.opam @@ -5,7 +5,7 @@ synopsis: "Pools of threads supported by a pool of domains" maintainer: ["Simon Cruanes"] authors: ["Simon Cruanes"] license: "MIT" -tags: ["thread" "pool" "domain"] +tags: ["thread" "pool" "domain" "futures" "fork-join"] homepage: "https://github.com/c-cube/moonpool" bug-reports: "https://github.com/c-cube/moonpool/issues" depends: [ From 3f720241b24c112000dc1e49f89301fc0d79ec96 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:10:10 -0400 Subject: [PATCH 28/77] feat pool: use `Shutdown` when running a task after it's closed --- src/pool.ml | 8 +++++--- src/pool.mli | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index b44ce6cf..8d74ab65 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -67,7 +67,9 @@ let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = if was_empty then Condition.broadcast self.mc.cond; Mutex.unlock self.mc.mutex ) else - raise Bb_queue.Closed + (* notify the caller that scheduling tasks is no + longer permitted *) + raise Shutdown let run_async_ (self : state) (task : task) : unit = (* stay on current worker if possible *) @@ -185,7 +187,7 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn raise_notrace (Got_task task) done; Mutex.unlock self.mc.mutex; - raise Bb_queue.Closed + raise Shutdown with Got_task t -> t in @@ -199,7 +201,7 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn run_task task ) done - with Bb_queue.Closed -> ()); + with Shutdown -> ()); run_self_tasks_ () in diff --git a/src/pool.mli b/src/pool.mli index ae6699b2..f7a42633 100644 --- a/src/pool.mli +++ b/src/pool.mli @@ -34,8 +34,6 @@ type ('a, 'b) create_args = 'a (** Arguments used in {!create}. See {!create} for explanations. *) -exception Closed - val create : (unit -> t, _) create_args (** [create ()] makes a new thread pool. @param on_init_thread called at the beginning of each new thread From 530507d84e07a9ad9718cbc77329dab0f72f0cf9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:11:08 -0400 Subject: [PATCH 29/77] fix fifo_pool: raise Shutdown, not Closed, in run --- src/fifo_pool.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fifo_pool.ml b/src/fifo_pool.ml index 54570bbb..920b36ed 100644 --- a/src/fifo_pool.ml +++ b/src/fifo_pool.ml @@ -12,7 +12,8 @@ let[@inline] size_ (self : state) = Array.length self.threads let[@inline] num_tasks_ (self : state) : int = Bb_queue.size self.q (** Run [task] as is, on the pool. *) -let run_direct_ (self : state) (task : task) : unit = Bb_queue.push self.q task +let run_direct_ (self : state) (task : task) : unit = + try Bb_queue.push self.q task with Bb_queue.Closed -> raise Shutdown let rec run_async_ (self : state) (task : task) : unit = let task' () = From 894851f6e801d398fca4a89da4fbdad380ec752e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:32:42 -0400 Subject: [PATCH 30/77] comments in ws_deque --- src/ws_deque_.ml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index 6dfd51d6..488d651a 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -109,14 +109,17 @@ let pop (self : 'a t) : 'a option = let x = CA.get arr b in perhaps_shrink self ~bottom:b ~top:t; Some x - ) else if A.compare_and_set self.top t (t + 1) then ( - (* exactly one slot, so we might be racing against stealers - to update [self.top] *) - let x = CA.get arr b in - A.set self.bottom (t + 1); - Some x - ) else - None + ) else ( + assert (size = 0); + if A.compare_and_set self.top t (t + 1) then ( + (* exactly one slot, so we might be racing against stealers + to update [self.top] *) + let x = CA.get arr b in + A.set self.bottom (t + 1); + Some x + ) else + None + ) let steal (self : 'a t) : 'a option = (* read [top], but do not update [top_cached] From d9da7844e23ad26ca4059cdd90a8c3d39a945e8d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:32:57 -0400 Subject: [PATCH 31/77] test: run more diverse pools in t_fib_rec --- test/t_fib_rec.ml | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index d79e85b4..01cd7c16 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -30,9 +30,8 @@ let fib_40 : int lazy_t = Pool.shutdown pool; r) -let run_test () = +let run_test ~pool () = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "run-test" in - let@ pool = Pool.with_ ~min:8 () in let (lazy fib_40) = fib_40 in @@ -48,6 +47,16 @@ let run_test () = assert (res = Ok (Array.make n_fibs fib_40)) +let run_test_size ~size () = + Printf.printf "test pool(%d)\n%!" size; + let@ pool = Pool.with_ ~min:size () in + run_test ~pool () + +let run_test_fifo ~size () = + Printf.printf "test fifo(%d)\n%!" size; + let@ pool = Fifo_pool.with_ ~min:size () in + run_test ~pool () + let setup_counter () = if Trace.enabled () then ignore @@ -66,10 +75,11 @@ let () = let (lazy fib_40) = fib_40 in Printf.printf "fib 40 = %d\n%!" fib_40; - for _i = 1 to 2 do - run_test () - done; + + run_test_fifo ~size:4 (); + + List.iter (fun size -> run_test_size ~size ()) [ 1; 2; 4; 8 ]; (* now make sure we can do this with multiple pools in parallel *) - let jobs = Array.init 4 (fun _ -> Thread.create run_test ()) in + let jobs = Array.init 4 (fun _ -> Thread.create (run_test_size ~size:4) ()) in Array.iter Thread.join jobs From 3956fb656641cfc127b0248d6d2353784c2a0e85 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:33:08 -0400 Subject: [PATCH 32/77] fix ws_pool: no work stealing for pools of 1 worker there would be a loop because it'd try to find the index of another worker to steal from, but loop forever because there is no other worker. --- src/pool.ml | 140 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 54 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index 8d74ab65..8d0a8a20 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -1,5 +1,13 @@ module WSQ = Ws_deque_ module A = Atomic_ + +module Int_tbl = Hashtbl.Make (struct + type t = int + + let equal : t -> t -> bool = ( = ) + let hash : t -> int = Hashtbl.hash +end) + include Runner let ( let@ ) = ( @@ ) @@ -11,6 +19,9 @@ type worker_state = { mutable thread: Thread.t; q: task WSQ.t; (** Work stealing queue *) } +(** State for a given worker. Only this worker is + allowed to push into the queue, but other workers + can come and steal from it if they're idle. *) type mut_cond = { mutex: Mutex.t; @@ -18,35 +29,27 @@ type mut_cond = { } type state = { - active: bool Atomic.t; - workers: worker_state array; - main_q: task Queue.t; (** Main queue to block on *) - mc: mut_cond; + active: bool Atomic.t; (** Becomes [false] when the pool is shutdown. *) + workers: worker_state array; (** Fixed set of workers. *) + worker_by_id: worker_state Int_tbl.t; + main_q: task Queue.t; (** Main queue for tasks coming from the outside *) + mc: mut_cond; (** Used to block on [main_q] *) } (** internal state *) let[@inline] size_ (self : state) = Array.length self.workers let num_tasks_ (self : state) : int = + let n = ref 0 in Mutex.lock self.mc.mutex; - let n = ref (Queue.length self.main_q) in + n := Queue.length self.main_q; Mutex.unlock self.mc.mutex; Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; !n -exception Got_worker of worker_state -exception Closed = Bb_queue.Closed - -let find_current_worker_ (self : state) : worker_state option = +let[@inline] find_current_worker_ (self : state) : worker_state option = let self_id = Thread.id @@ Thread.self () in - try - (* see if we're in one of the worker threads *) - for i = 0 to Array.length self.workers - 1 do - let w = self.workers.(i) in - if Thread.id w.thread = self_id then raise_notrace (Got_worker w) - done; - None - with Got_worker w -> Some w + Int_tbl.find_opt self.worker_by_id self_id (** Run [task] as is, on the pool. *) let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = @@ -133,14 +136,20 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn let work_steal_offset = ref 0 in (* get a task from another worker *) - let rec try_to_steal_work () : task option = - let i = !work_steal_offset in - work_steal_offset := (i + 1) mod Array.length self.workers; - let w' = self.workers.(i) in - if w == w' then - try_to_steal_work () - else - WSQ.steal w'.q + let try_to_steal_work () : task option = + assert (size_ self > 1); + + work_steal_offset := (!work_steal_offset + 1) mod Array.length self.workers; + + (* if we're pointing to [w], skip to the next worker as + it's useless to steal from oneself *) + if self.workers.(!work_steal_offset) == w then + work_steal_offset := + (!work_steal_offset + 1) mod Array.length self.workers; + + let w' = self.workers.(!work_steal_offset) in + assert (w != w'); + WSQ.steal w'.q in (* @@ -161,48 +170,65 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn (* try to steal work multiple times *) let try_to_steal_work_loop () : bool = - try - let unsuccessful_steal_attempts = ref 0 in - while !unsuccessful_steal_attempts < steal_attempt_max_retry do - match try_to_steal_work () with - | Some task -> - run_task task; - raise_notrace Exit - | None -> - incr unsuccessful_steal_attempts; - Domain_.relax () - done; + if size_ self = 1 then + (* no stealing for single thread pool *) false - with Exit -> true + else ( + try + let unsuccessful_steal_attempts = ref 0 in + while !unsuccessful_steal_attempts < steal_attempt_max_retry do + match try_to_steal_work () with + | Some task -> + run_task task; + raise_notrace Exit + | None -> + incr unsuccessful_steal_attempts; + Domain_.relax () + done; + false + with Exit -> true + ) in - let get_task_from_main_queue_block () : task = + let get_task_from_main_queue_block () : task option = try Mutex.lock self.mc.mutex; - while A.get self.active do + while true do match Queue.pop self.main_q with - | exception Queue.Empty -> Condition.wait self.mc.cond self.mc.mutex + | exception Queue.Empty -> + if A.get self.active then + Condition.wait self.mc.cond self.mc.mutex + else ( + (* empty queue and we're closed, time to exit *) + Mutex.unlock self.mc.mutex; + raise_notrace Exit + ) | task -> Mutex.unlock self.mc.mutex; raise_notrace (Got_task task) done; - Mutex.unlock self.mc.mutex; - raise Shutdown - with Got_task t -> t + (* unreachable *) + assert false + with + | Got_task t -> Some t + | Exit -> None in let main_loop () = - (try - while true do - run_self_tasks_ (); + let continue = ref true in + while !continue do + run_self_tasks_ (); - if not (try_to_steal_work_loop ()) then ( - let task = get_task_from_main_queue_block () in - run_task task - ) - done - with Shutdown -> ()); - run_self_tasks_ () + let did_steal = try_to_steal_work_loop () in + if not did_steal then ( + match get_task_from_main_queue_block () with + | None -> + (* main queue is closed *) + continue := false + | Some task -> run_task task + ) + done; + assert (WSQ.size w.q = 0) in (* handle domain-local await *) @@ -259,6 +285,7 @@ let create ?(on_init_thread = default_thread_init_exit_) { active = A.make true; workers; + worker_by_id = Int_tbl.create 8; main_q = Queue.create (); mc = { mutex = Mutex.create (); cond = Condition.create () }; } @@ -324,7 +351,12 @@ let create ?(on_init_thread = default_thread_init_exit_) (* receive the newly created threads back from domains *) for _j = 1 to num_threads do let i, th = Bb_queue.pop receive_threads in - pool.workers.(i).thread <- th + let worker_state = pool.workers.(i) in + worker_state.thread <- th; + + Mutex.lock pool.mc.mutex; + Int_tbl.add pool.worker_by_id (Thread.id th) worker_state; + Mutex.unlock pool.mc.mutex done; runner From e937bf0e9d1348acda7c16c64236c29b862efa97 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:38:35 -0400 Subject: [PATCH 33/77] perf pool: restore non-hashtable lookup for self-queuing --- src/pool.ml | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index 8d0a8a20..df8992de 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -1,13 +1,5 @@ module WSQ = Ws_deque_ module A = Atomic_ - -module Int_tbl = Hashtbl.Make (struct - type t = int - - let equal : t -> t -> bool = ( = ) - let hash : t -> int = Hashtbl.hash -end) - include Runner let ( let@ ) = ( @@ ) @@ -31,7 +23,6 @@ type mut_cond = { type state = { active: bool Atomic.t; (** Becomes [false] when the pool is shutdown. *) workers: worker_state array; (** Fixed set of workers. *) - worker_by_id: worker_state Int_tbl.t; main_q: task Queue.t; (** Main queue for tasks coming from the outside *) mc: mut_cond; (** Used to block on [main_q] *) } @@ -47,9 +38,18 @@ let num_tasks_ (self : state) : int = Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; !n +exception Got_worker of worker_state + let[@inline] find_current_worker_ (self : state) : worker_state option = let self_id = Thread.id @@ Thread.self () in - Int_tbl.find_opt self.worker_by_id self_id + try + (* see if we're in one of the worker threads *) + for i = 0 to Array.length self.workers - 1 do + let w = self.workers.(i) in + if Thread.id w.thread = self_id then raise_notrace (Got_worker w) + done; + None + with Got_worker w -> Some w (** Run [task] as is, on the pool. *) let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = @@ -285,7 +285,6 @@ let create ?(on_init_thread = default_thread_init_exit_) { active = A.make true; workers; - worker_by_id = Int_tbl.create 8; main_q = Queue.create (); mc = { mutex = Mutex.create (); cond = Condition.create () }; } @@ -352,11 +351,7 @@ let create ?(on_init_thread = default_thread_init_exit_) for _j = 1 to num_threads do let i, th = Bb_queue.pop receive_threads in let worker_state = pool.workers.(i) in - worker_state.thread <- th; - - Mutex.lock pool.mc.mutex; - Int_tbl.add pool.worker_by_id (Thread.id th) worker_state; - Mutex.unlock pool.mc.mutex + worker_state.thread <- th done; runner From 056986c84f841f4a9c573c6dc3c95b718c1c7dd1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:38:43 -0400 Subject: [PATCH 34/77] perf pool: no retries for self-queue; fewer retries for stealing --- src/pool.ml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index df8992de..b4c5ac7c 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -97,11 +97,8 @@ exception Got_task of task type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task -(** How many times in a row do we try to read the next local task? *) -let run_self_task_max_retry = 5 - (** How many times in a row do we try to do work-stealing? *) -let steal_attempt_max_retry = 7 +let steal_attempt_max_retry = 3 let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn ~around_task : unit = @@ -120,16 +117,10 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn let run_self_tasks_ () = let continue = ref true in - let pop_retries = ref 0 in while !continue do match WSQ.pop w.q with - | Some task -> - pop_retries := 0; - run_task task - | None -> - Domain_.relax (); - incr pop_retries; - if !pop_retries > run_self_task_max_retry then continue := false + | Some task -> run_task task + | None -> continue := false done in From dfb588cdc52a62320c78f11cc6f34dfa5e054fa1 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:41:18 -0400 Subject: [PATCH 35/77] test: update readme --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 60f478d3..40ae042b 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,7 @@ of `pool`, as soon as one is available. No result is returned. ```ocaml # #require "threads";; # let pool = Moonpool.Pool.create ~min:4 ();; -val pool : Moonpool.Runner.t = - {Moonpool.Pool.run_async = ; shutdown = ; size = ; - num_tasks = } +val pool : Moonpool.Runner.t = # begin Moonpool.Pool.run_async pool From 1ed25e5aca05f30bd74b82e8f5fa9c096d1395a5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 22:41:26 -0400 Subject: [PATCH 36/77] test: make t_ws_deque consume less ram --- test/t_ws_deque.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/t_ws_deque.ml b/test/t_ws_deque.ml index 73f1f6f5..3377dcb6 100644 --- a/test/t_ws_deque.ml +++ b/test/t_ws_deque.ml @@ -114,7 +114,7 @@ let t_many () = push_and_pop 10_000; push_and_pop 100_000; - push_and_pop 100_000_000; + push_and_pop 1_000_000; print_endline "pushing many elements: ok"; () From 629b66662f6e7879db28dbdfe2bbdffb79cde0a2 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 23:18:47 -0400 Subject: [PATCH 37/77] fix ws_deque: circular array is also in an atomic --- src/ws_deque_.ml | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index 488d651a..378dc14e 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -57,44 +57,47 @@ type 'a t = { top: int A.t; (** Where we steal *) bottom: int A.t; (** Where we push/pop from the owning thread *) mutable top_cached: int; (** Last read value of [top] *) - mutable arr: 'a CA.t; (** The circular array *) + arr: 'a CA.t A.t; (** The circular array *) } let create () : _ t = let arr = CA.create ~log_size:4 () in - { top = A.make 0; top_cached = 0; bottom = A.make 0; arr } + { top = A.make 0; top_cached = 0; bottom = A.make 0; arr = A.make arr } let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top) let push (self : 'a t) (x : 'a) : unit = let b = A.get self.bottom in let t_approx = self.top_cached in + let arr = ref (A.get self.arr) in (* Section 2.3: over-approximation of size. Only if it seems too big do we actually read [t]. *) let size_approx = b - t_approx in - if size_approx >= CA.size self.arr - 1 then ( + if size_approx >= CA.size !arr - 1 then ( (* we need to read the actual value of [top], which might entail contention. *) let t = A.get self.top in self.top_cached <- t; let size = b - t in - if size >= CA.size self.arr - 1 then - self.arr <- CA.grow self.arr ~top:t ~bottom:b + if size >= CA.size !arr - 1 then ( + arr := CA.grow !arr ~top:t ~bottom:b; + A.set self.arr !arr + ) ); - CA.set self.arr b x; + CA.set !arr b x; A.set self.bottom (b + 1) -let perhaps_shrink (self : _ t) ~top ~bottom : unit = +let perhaps_shrink (self : _ t) arr ~top ~bottom : unit = let size = bottom - top in - let ca_size = CA.size self.arr in + let ca_size = CA.size arr in if ca_size >= 256 && size <= ca_size / 3 then - self.arr <- CA.shrink self.arr ~top ~bottom + A.set self.arr (CA.shrink arr ~top ~bottom) let pop (self : 'a t) : 'a option = let b = A.get self.bottom in - let arr = self.arr in + let arr = A.get self.arr in let b = b - 1 in A.set self.bottom b; @@ -103,31 +106,34 @@ let pop (self : 'a t) : 'a option = let size = b - t in if size < 0 then ( + (* reset to basic empty state *) A.set self.bottom t; None ) else if size > 0 then ( + (* can pop without modifying [top] *) let x = CA.get arr b in - perhaps_shrink self ~bottom:b ~top:t; + perhaps_shrink self arr ~bottom:b ~top:t; Some x ) else ( assert (size = 0); + (* there was exactly one slot, so we might be racing against stealers + to update [self.top] *) if A.compare_and_set self.top t (t + 1) then ( - (* exactly one slot, so we might be racing against stealers - to update [self.top] *) let x = CA.get arr b in A.set self.bottom (t + 1); Some x - ) else + ) else ( + A.set self.bottom (t + 1); None + ) ) let steal (self : 'a t) : 'a option = (* read [top], but do not update [top_cached] as we're in another thread *) let t = A.get self.top in - let b = A.get self.bottom in - let arr = self.arr in + let arr = A.get self.arr in let size = b - t in if size <= 0 then From 1e3629bc675d6d368048b0593563e02324cd9512 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 23:29:47 -0400 Subject: [PATCH 38/77] fix ws_deque: strict bound for shrinking --- src/ws_deque_.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index 378dc14e..f155aa85 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -89,10 +89,10 @@ let push (self : 'a t) (x : 'a) : unit = CA.set !arr b x; A.set self.bottom (b + 1) -let perhaps_shrink (self : _ t) arr ~top ~bottom : unit = +let maybe_shrink_ (self : _ t) arr ~top ~bottom : unit = let size = bottom - top in let ca_size = CA.size arr in - if ca_size >= 256 && size <= ca_size / 3 then + if ca_size >= 256 && size < ca_size / 3 then A.set self.arr (CA.shrink arr ~top ~bottom) let pop (self : 'a t) : 'a option = @@ -112,7 +112,7 @@ let pop (self : 'a t) : 'a option = ) else if size > 0 then ( (* can pop without modifying [top] *) let x = CA.get arr b in - perhaps_shrink self arr ~bottom:b ~top:t; + maybe_shrink_ self arr ~bottom:b ~top:t; Some x ) else ( assert (size = 0); From 30035fa67dcd7378e0eecee6b3429ca10bece724 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 23:30:04 -0400 Subject: [PATCH 39/77] fix pool: suspension handler might run from a different thread! this means we can't reuse the same worker state, it's neither thread safe nor deadlock-safe (the worker whose state it is might be waiting on the main queue) --- src/pool.ml | 36 ++++++++++++------------------------ test/effect-based/t_many.ml | 24 ++++++++++++++++++++---- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/pool.ml b/src/pool.ml index b4c5ac7c..01a093b3 100644 --- a/src/pool.ml +++ b/src/pool.ml @@ -75,21 +75,24 @@ let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = raise Shutdown let run_async_ (self : state) (task : task) : unit = - (* stay on current worker if possible *) - let w = find_current_worker_ self in - - let rec run_async_rec_ (task : task) = + (* run [task] inside a suspension handler *) + let rec run_async_in_suspend_rec_ (task : task) = let task_with_suspend_ () = (* run [f()] and handle [suspend] in it *) Suspend_.with_suspend task ~run:(fun ~with_handler task' -> if with_handler then - run_async_rec_ task' - else - run_direct_ self w task') + run_async_in_suspend_rec_ task' + else ( + let w = find_current_worker_ self in + run_direct_ self w task' + )) in + + (* schedule on current worker, if run from a worker *) + let w = find_current_worker_ self in run_direct_ self w task_with_suspend_ in - run_async_rec_ task + run_async_in_suspend_rec_ task let run = run_async @@ -100,6 +103,7 @@ type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task (** How many times in a row do we try to do work-stealing? *) let steal_attempt_max_retry = 3 +(** Main loop for a worker thread. *) let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn ~around_task : unit = let (AT_pair (before_task, after_task)) = around_task in @@ -143,22 +147,6 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn WSQ.steal w'.q in - (* - try - for _retry = 1 to 1 do - for i = 0 to Array.length self.workers - 1 do - let w' = self.workers.(i) in - if w != w' then ( - match WSQ.steal w'.q with - | None -> () - | Some task -> raise_notrace (Got_task task) - ) - done - done; - None - with Got_task task -> Some task - *) - (* try to steal work multiple times *) let try_to_steal_work_loop () : bool = if size_ self = 1 then diff --git a/test/effect-based/t_many.ml b/test/effect-based/t_many.ml index 8b5b76e3..23e1a929 100644 --- a/test/effect-based/t_many.ml +++ b/test/effect-based/t_many.ml @@ -4,8 +4,7 @@ open Moonpool let ( let@ ) = ( @@ ) -let run () = - let@ pool = Pool.with_ ~min:4 () in +let run ~pool () = let t1 = Unix.gettimeofday () in let n = 200_000 in @@ -15,7 +14,7 @@ let run () = Fut.spawn ~on:pool (fun () -> List.fold_left (fun n x -> - let _res = Fut.await x in + let _res = Sys.opaque_identity (Fut.await x) in n + 1) 0 l) in @@ -29,6 +28,23 @@ let run () = Printf.printf "in %.4fs\n%!" (Unix.gettimeofday () -. t1); assert (List.for_all (fun s -> s = n) lens) -let () = run () +let () = + (print_endline "with fifo"; + let@ pool = Fifo_pool.with_ ~min:4 () in + run ~pool ()); + + (print_endline "with WS(1)"; + let@ pool = Pool.with_ ~min:1 () in + run ~pool ()); + + (print_endline "with WS(2)"; + let@ pool = Pool.with_ ~min:2 () in + run ~pool ()); + + (print_endline "with WS(4)"; + let@ pool = Pool.with_ ~min:4 () in + run ~pool ()); + + () [@@@endif] From 3e614ec9928ff8073478b7e230709d1e3d92ae26 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 23:40:01 -0400 Subject: [PATCH 40/77] rename `Pool` to `Ws_pool`; deprecated `Moonpool.Pool` --- README.md | 26 +++++++++++++----------- benchs/fib_rec.ml | 4 ++-- benchs/pi.ml | 10 ++++----- src/fut.ml | 12 +++++------ src/moonpool.ml | 3 ++- src/moonpool.mli | 6 +++++- src/{pool.ml => ws_pool.ml} | 26 +++++++----------------- src/{pool.mli => ws_pool.mli} | 23 ++++++++------------- test/effect-based/t_fib1.ml | 6 +++--- test/effect-based/t_fib_fork_join.ml | 6 +++--- test/effect-based/t_fib_fork_join_all.ml | 6 +++--- test/effect-based/t_fork_join.ml | 24 +++++++++++----------- test/effect-based/t_fork_join_heavy.ml | 4 ++-- test/effect-based/t_futs1.ml | 2 +- test/effect-based/t_many.ml | 6 +++--- test/effect-based/t_sort.ml | 2 +- test/t_bench1.ml | 2 +- test/t_chan_train.ml | 2 +- test/t_fib.ml | 6 +++--- test/t_fib_rec.ml | 6 +++--- test/t_futs1.ml | 4 ++-- test/t_props.ml | 4 ++-- test/t_resource.ml | 6 +++--- test/t_tree_futs.ml | 4 ++-- test/t_unfair.ml | 26 ++++++++++++------------ 25 files changed, 108 insertions(+), 118 deletions(-) rename src/{pool.ml => ws_pool.ml} (91%) rename src/{pool.mli => ws_pool.mli} (76%) diff --git a/README.md b/README.md index 40ae042b..ab451e08 100644 --- a/README.md +++ b/README.md @@ -24,20 +24,20 @@ In addition, some concurrency and parallelism primitives are provided: ## Usage -The user can create several thread pools. These pools use regular posix threads, -but the threads are spread across multiple domains (on OCaml 5), which enables -parallelism. +The user can create several thread pools (implementing the interface `Runner.t`). +These pools use regular posix threads, but the threads are spread across +multiple domains (on OCaml 5), which enables parallelism. -The function `Pool.run_async pool task` runs `task()` on one of the workers -of `pool`, as soon as one is available. No result is returned. +The function `Runner.run_async pool task` schedules `task()` to run on one of +the workers of `pool`, as soon as one is available. No result is returned by `run_async`. ```ocaml # #require "threads";; -# let pool = Moonpool.Pool.create ~min:4 ();; +# let pool = Moonpool.Fifo_pool.create ~min:4 ();; val pool : Moonpool.Runner.t = # begin - Moonpool.Pool.run_async pool + Moonpool.Runner.run_async pool (fun () -> Thread.delay 0.1; print_endline "running from the pool"); @@ -49,11 +49,13 @@ running from the pool - : unit = () ``` -To wait until the task is done, you can use `Pool.run_wait_block` instead: +To wait until the task is done, you can use `Runner.run_wait_block`[^1] instead: + +[^1]: beware of deadlock! See documentation for more details. ```ocaml # begin - Moonpool.Pool.run_wait_block pool + Moonpool.Runner.run_wait_block pool (fun () -> Thread.delay 0.1; print_endline "running from the pool"); @@ -155,7 +157,7 @@ val expected_sum : int = 5050 On OCaml 5, again using effect handlers, the module `Fork_join` implements the [fork-join model](https://en.wikipedia.org/wiki/Fork%E2%80%93join_model). -It must run on a pool (using [Pool.run] or inside a future via [Future.spawn]). +It must run on a pool (using [Runner.run_async] or inside a future via [Fut.spawn]). ```ocaml # let rec select_sort arr i len = @@ -257,7 +259,7 @@ This works for OCaml >= 4.08. the same pool, too — this is useful for threads blocking on IO). A useful analogy is that each domain is a bit like a CPU core, and `Thread.t` is a logical thread running on a core. - Multiple threads have to share a single core and do not run in parallel on it[^1]. + Multiple threads have to share a single core and do not run in parallel on it[^2]. We can therefore build pools that spread their worker threads on multiple cores to enable parallelism within each pool. TODO: actually use https://github.com/haesbaert/ocaml-processor to pin domains to cores, @@ -273,4 +275,4 @@ MIT license. $ opam install moonpool ``` -[^1]: let's not talk about hyperthreading. +[^2]: let's not talk about hyperthreading. diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 385bfed4..06341ce1 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -21,7 +21,7 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let create_pool ~psize ~kind () = match kind with | "fifo" -> Fifo_pool.create ~min:psize () - | "pool" -> Pool.create ~min:psize () + | "pool" -> Ws_pool.create ~min:psize () | _ -> assert false let run ~psize ~n ~seq ~niter ~kind () : unit = @@ -38,7 +38,7 @@ let run ~psize ~n ~seq ~niter ~kind () : unit = in Printf.printf "fib %d = %d\n%!" n res done; - if not seq then Pool.shutdown (Lazy.force pool) + if not seq then Ws_pool.shutdown (Lazy.force pool) let () = let n = ref 40 in diff --git a/benchs/pi.ml b/benchs/pi.ml index 01017ae9..65304a80 100644 --- a/benchs/pi.ml +++ b/benchs/pi.ml @@ -21,9 +21,9 @@ let with_pool ~kind f = match kind with | "pool" -> if !j = 0 then - Pool.with_ ~per_domain:1 f + Ws_pool.with_ ~per_domain:1 f else - Pool.with_ ~min:!j f + Ws_pool.with_ ~min:!j f | "fifo" -> if !j = 0 then Fifo_pool.with_ ~per_domain:1 f @@ -35,7 +35,7 @@ let with_pool ~kind f = let run_par1 ~kind (num_steps : int) : float = let@ pool = with_pool ~kind () in - let num_tasks = Pool.size pool in + let num_tasks = Ws_pool.size pool in let step = 1. /. float num_steps in let global_sum = Lock.create 0. in @@ -64,12 +64,12 @@ let run_par1 ~kind (num_steps : int) : float = let run_fork_join ~kind num_steps : float = let@ pool = with_pool ~kind () in - let num_tasks = Pool.size pool in + let num_tasks = Ws_pool.size pool in let step = 1. /. float num_steps in let global_sum = Lock.create 0. in - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.for_ ~chunk_size:(3 + (num_steps / num_tasks)) num_steps diff --git a/src/fut.ml b/src/fut.ml index 42767b61..0a5332ed 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -97,7 +97,7 @@ let spawn ~on f : _ t = fulfill promise res in - Pool.run_async on task; + Runner.run_async on task; fut let reify_error (f : 'a t) : 'a or_error t = @@ -131,7 +131,7 @@ let map ?on ~f fut : _ t = match on with | None -> map_and_fulfill () - | Some on -> Pool.run_async on map_and_fulfill); + | Some on -> Runner.run_async on map_and_fulfill); fut2 @@ -158,14 +158,14 @@ let bind ?on ~f fut : _ t = | None -> apply_f_to_res r | Some on -> let fut2, promise = make () in - Pool.run_async on (bind_and_fulfill r promise); + Runner.run_async on (bind_and_fulfill r promise); fut2) | None -> let fut2, promise = make () in on_result fut (fun r -> match on with | None -> bind_and_fulfill r promise () - | Some on -> Pool.run_async on (bind_and_fulfill r promise)); + | Some on -> Runner.run_async on (bind_and_fulfill r promise)); fut2 @@ -403,7 +403,7 @@ module type INFIX = sig end module Infix_ (X : sig - val pool : Pool.t option + val pool : Runner.t option end) : INFIX = struct let[@inline] ( >|= ) x f = map ?on:X.pool ~f x let[@inline] ( >>= ) x f = bind ?on:X.pool ~f x @@ -420,7 +420,7 @@ end) include Infix_local module Infix (X : sig - val pool : Pool.t + val pool : Runner.t end) = Infix_ (struct let pool = Some X.pool diff --git a/src/moonpool.ml b/src/moonpool.ml index fb0a3661..ed1af755 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -9,7 +9,8 @@ module Chan = Chan module Fork_join = Fork_join module Fut = Fut module Lock = Lock -module Pool = Pool +module Pool = Fifo_pool +module Ws_pool = Ws_pool module Runner = Runner module Fifo_pool = Fifo_pool diff --git a/src/moonpool.mli b/src/moonpool.mli index 6edeb1e7..4028e858 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -9,10 +9,14 @@ primitives such as guarding locks ({!Lock.t}) and futures ({!Fut.t}). *) -module Pool = Pool +module Ws_pool = Ws_pool module Fifo_pool = Fifo_pool module Runner = Runner +module Pool = Fifo_pool +[@@deprecated "use Fifo_pool or Ws_pool"] +(** Default pool. Please explicitly pick an implementation instead. *) + val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t (** Similar to {!Thread.create}, but it picks a background domain at random to run the thread. This ensures that we don't always pick the same domain diff --git a/src/pool.ml b/src/ws_pool.ml similarity index 91% rename from src/pool.ml rename to src/ws_pool.ml index 01a093b3..fcf5eed9 100644 --- a/src/pool.ml +++ b/src/ws_pool.ml @@ -4,9 +4,6 @@ include Runner let ( let@ ) = ( @@ ) -type thread_loop_wrapper = - thread:Thread.t -> pool:t -> (unit -> unit) -> unit -> unit - type worker_state = { mutable thread: Thread.t; q: task WSQ.t; (** Work stealing queue *) @@ -227,7 +224,6 @@ let shutdown_ ~wait (self : state) : unit = type ('a, 'b) create_args = ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> - ?thread_wrappers:thread_loop_wrapper list -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> ?min:int -> @@ -236,9 +232,8 @@ type ('a, 'b) create_args = (** Arguments used in {!create}. See {!create} for explanations. *) let create ?(on_init_thread = default_thread_init_exit_) - ?(on_exit_thread = default_thread_init_exit_) ?(thread_wrappers = []) - ?(on_exn = fun _ _ -> ()) ?around_task ?min:(min_threads = 1) - ?(per_domain = 0) () : t = + ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) + ?around_task ?min:(min_threads = 1) ?(per_domain = 0) () : t = (* wrapper *) let around_task = match around_task with @@ -294,16 +289,9 @@ let create ?(on_init_thread = default_thread_init_exit_) on_init_thread ~dom_id:dom_idx ~t_id (); let run () = worker_thread_ pool runner w ~on_exn ~around_task in - (* the actual worker loop is [worker_thread_], with all - wrappers for this pool and for all pools (global_thread_wrappers_) *) - let run' = - List.fold_left - (fun run f -> f ~thread ~pool:runner run) - run thread_wrappers - in (* now run the main loop *) - Fun.protect run' ~finally:(fun () -> + Fun.protect run ~finally:(fun () -> (* on termination, decrease refcount of underlying domain *) D_pool_.decr_on dom_idx); on_exit_thread ~dom_id:dom_idx ~t_id () @@ -335,11 +323,11 @@ let create ?(on_init_thread = default_thread_init_exit_) runner -let with_ ?on_init_thread ?on_exit_thread ?thread_wrappers ?on_exn ?around_task - ?min ?per_domain () f = +let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain + () f = let pool = - create ?on_init_thread ?on_exit_thread ?thread_wrappers ?on_exn ?around_task - ?min ?per_domain () + create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain + () in let@ () = Fun.protect ~finally:(fun () -> shutdown pool) in f pool diff --git a/src/pool.mli b/src/ws_pool.mli similarity index 76% rename from src/pool.mli rename to src/ws_pool.mli index f7a42633..4775024c 100644 --- a/src/pool.mli +++ b/src/ws_pool.mli @@ -1,7 +1,13 @@ -(** Thread pool. +(** Work-stealing thread pool. - A pool of threads. The pool contains a fixed number of threads that - wait for work items to come, process these, and loop. + A pool of threads with a worker-stealing scheduler. + The pool contains a fixed number of threads that wait for work + items to come, process these, and loop. + + This is good for CPU-intensive tasks that feature a lot of small tasks. + Note that tasks will not always be processed in the order they are + scheduled, so this is not great for workloads where the latency + of individual tasks matter (for that see {!Fifo_pool}). This implements {!Runner.t} since 0.3. @@ -15,18 +21,9 @@ include module type of Runner -type thread_loop_wrapper = - thread:Thread.t -> pool:t -> (unit -> unit) -> unit -> unit -(** A thread wrapper [f] takes the current thread, the current pool, - and the worker function [loop : unit -> unit] which is - the worker's main loop, and returns a new loop function. - By default it just returns the same loop function but it can be used - to install tracing, effect handlers, etc. *) - type ('a, 'b) create_args = ?on_init_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> - ?thread_wrappers:thread_loop_wrapper list -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> ?min:int -> @@ -47,8 +44,6 @@ val create : (unit -> t, _) create_args If both [min] and [per_domain] are specified, the maximum of both [min] and [per_domain * num_of_domains] is used. @param on_exit_thread called at the end of each thread in the pool - @param thread_wrappers a list of {!thread_loop_wrapper} functions - to use for this pool's workers. @param around_task a pair of [before, after], where [before pool] is called before a task is processed, on the worker thread about to run it, and returns [x]; and [after pool x] is called by diff --git a/test/effect-based/t_fib1.ml b/test/effect-based/t_fib1.ml index e8d2f534..ca3f2861 100644 --- a/test/effect-based/t_fib1.ml +++ b/test/effect-based/t_fib1.ml @@ -26,13 +26,13 @@ let fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int = - let pool = Pool.create ~min:8 () in + let pool = Ws_pool.create ~min:8 () in fib ~on:pool 40 |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let pool = Pool.create ~min:8 () in + let pool = Ws_pool.create ~min:8 () in assert ( List.init 10 (fib ~on:pool) @@ -42,7 +42,7 @@ let run_test () = let fibs = Array.init 3 (fun _ -> fib ~on:pool 40) in let res = Fut.join_array fibs |> Fut.wait_block in - Pool.shutdown pool; + Ws_pool.shutdown pool; assert (res = Ok (Array.make 3 fib_40)) diff --git a/test/effect-based/t_fib_fork_join.ml b/test/effect-based/t_fib_fork_join.ml index c6898833..bdf60337 100644 --- a/test/effect-based/t_fib_fork_join.ml +++ b/test/effect-based/t_fib_fork_join.ml @@ -27,13 +27,13 @@ let fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int = - let pool = Pool.create ~min:8 () in + let pool = Ws_pool.create ~min:8 () in fib ~on:pool 40 |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let pool = Pool.create ~min:8 () in + let pool = Ws_pool.create ~min:8 () in assert ( List.init 10 (fib ~on:pool) @@ -43,7 +43,7 @@ let run_test () = let fibs = Array.init 3 (fun _ -> fib ~on:pool 40) in let res = Fut.join_array fibs |> Fut.wait_block in - Pool.shutdown pool; + Ws_pool.shutdown pool; assert (res = Ok (Array.make 3 fib_40)) diff --git a/test/effect-based/t_fib_fork_join_all.ml b/test/effect-based/t_fib_fork_join_all.ml index e1ae83f4..ed82902e 100644 --- a/test/effect-based/t_fib_fork_join_all.ml +++ b/test/effect-based/t_fib_fork_join_all.ml @@ -22,13 +22,13 @@ let rec fib x : int = ) let fib_40 : int = - let@ pool = Pool.with_ ~min:8 () in + let@ pool = Ws_pool.with_ ~min:8 () in Fut.spawn ~on:pool (fun () -> fib 40) |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let@ pool = Pool.with_ ~min:8 () in + let@ pool = Ws_pool.with_ ~min:8 () in let fut = Fut.spawn ~on:pool (fun () -> @@ -37,7 +37,7 @@ let run_test () = in let res = Fut.wait_block_exn fut in - Pool.shutdown pool; + Ws_pool.shutdown pool; assert (res = (Array.make 3 fib_40 |> Array.to_list)) diff --git a/test/effect-based/t_fork_join.ml b/test/effect-based/t_fork_join.ml index 7fc8fa31..5b467187 100644 --- a/test/effect-based/t_fork_join.ml +++ b/test/effect-based/t_fork_join.ml @@ -5,11 +5,11 @@ let ( let@ ) = ( @@ ) open! Moonpool -let pool = Pool.create ~min:4 () +let pool = Ws_pool.create ~min:4 () let () = let x = - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> let x, y = Fork_join.both (fun () -> @@ -25,7 +25,7 @@ let () = let () = try - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.both_ignore (fun () -> Thread.delay 0.005) (fun () -> @@ -36,21 +36,21 @@ let () = let () = let par_sum = - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.all_init 42 (fun i -> i * i) |> List.fold_left ( + ) 0) in let exp_sum = List.init 42 (fun x -> x * x) |> List.fold_left ( + ) 0 in assert (par_sum = exp_sum) let () = - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.for_ 0 (fun _ _ -> assert false)); () let () = let total_sum = Atomic.make 0 in - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.for_ ~chunk_size:5 100 (fun low high -> (* iterate on the range sequentially. The range should have 5 items or less. *) let local_sum = ref 0 in @@ -63,7 +63,7 @@ let () = let () = let total_sum = Atomic.make 0 in - Pool.run_wait_block pool (fun () -> + Ws_pool.run_wait_block pool (fun () -> Fork_join.for_ ~chunk_size:1 100 (fun low high -> assert (low = high); ignore (Atomic.fetch_and_add total_sum low : int))); @@ -270,7 +270,7 @@ end let t_eval = let arb = Q.set_stats [ "size", Evaluator.size ] Evaluator.arb in Q.Test.make ~name:"same eval" arb (fun e -> - let@ pool = Pool.with_ ~min:4 () in + let@ pool = Ws_pool.with_ ~min:4 () in (* Printf.eprintf "eval %s\n%!" (Evaluator.show e); *) let x = Evaluator.eval_seq e in let y = Evaluator.eval_fork_join ~pool e in @@ -288,8 +288,8 @@ let t_for_nested ~min ~chunk_size () = let ref_l2 = List.map (List.map neg) ref_l1 in let l1, l2 = - let@ pool = Pool.with_ ~min () in - let@ () = Pool.run_wait_block pool in + let@ pool = Ws_pool.with_ ~min () in + let@ () = Ws_pool.run_wait_block pool in let l1 = Fork_join.map_list ~chunk_size (Fork_join.map_list ~chunk_size neg) l in @@ -310,8 +310,8 @@ let t_map ~chunk_size () = Q.Test.make ~name:"map1" Q.(small_list small_int |> Q.set_stats [ "len", List.length ]) (fun l -> - let@ pool = Pool.with_ ~min:4 () in - let@ () = Pool.run_wait_block pool in + let@ pool = Ws_pool.with_ ~min:4 () in + let@ () = Ws_pool.run_wait_block pool in let a1 = Fork_join.map_list ~chunk_size string_of_int l |> Array.of_list diff --git a/test/effect-based/t_fork_join_heavy.ml b/test/effect-based/t_fork_join_heavy.ml index be86299a..ad9f7044 100644 --- a/test/effect-based/t_fork_join_heavy.ml +++ b/test/effect-based/t_fork_join_heavy.ml @@ -27,8 +27,8 @@ let run ~min () = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "step" in let l1, l2 = - let@ pool = Pool.with_ ~min () in - let@ () = Pool.run_wait_block pool in + let@ pool = Ws_pool.with_ ~min () in + let@ () = Ws_pool.run_wait_block pool in let l1, l2 = Fork_join.both diff --git a/test/effect-based/t_futs1.ml b/test/effect-based/t_futs1.ml index be58f50b..182ca9d5 100644 --- a/test/effect-based/t_futs1.ml +++ b/test/effect-based/t_futs1.ml @@ -2,7 +2,7 @@ open! Moonpool -let pool = Pool.create ~min:4 () +let pool = Ws_pool.create ~min:4 () let () = let fut = Array.init 10 (fun i -> Fut.spawn ~on:pool (fun () -> i)) in diff --git a/test/effect-based/t_many.ml b/test/effect-based/t_many.ml index 23e1a929..4362932c 100644 --- a/test/effect-based/t_many.ml +++ b/test/effect-based/t_many.ml @@ -34,15 +34,15 @@ let () = run ~pool ()); (print_endline "with WS(1)"; - let@ pool = Pool.with_ ~min:1 () in + let@ pool = Ws_pool.with_ ~min:1 () in run ~pool ()); (print_endline "with WS(2)"; - let@ pool = Pool.with_ ~min:2 () in + let@ pool = Ws_pool.with_ ~min:2 () in run ~pool ()); (print_endline "with WS(4)"; - let@ pool = Pool.with_ ~min:4 () in + let@ pool = Ws_pool.with_ ~min:4 () in run ~pool ()); () diff --git a/test/effect-based/t_sort.ml b/test/effect-based/t_sort.ml index a732c740..8d3fe17c 100644 --- a/test/effect-based/t_sort.ml +++ b/test/effect-based/t_sort.ml @@ -59,7 +59,7 @@ let rec quicksort arr i len : unit = (fun () -> quicksort arr !low (len - (!low - i))) ) -let pool = Moonpool.Pool.create ~min:8 () +let pool = Moonpool.Ws_pool.create ~min:8 () let () = let arr = Array.init 400_000 (fun _ -> Random.int 300_000) in diff --git a/test/t_bench1.ml b/test/t_bench1.ml index abf4a7f2..95cd87a5 100644 --- a/test/t_bench1.ml +++ b/test/t_bench1.ml @@ -8,7 +8,7 @@ let rec fib x = let run ~psize ~n ~j () : _ Fut.t = Printf.printf "pool size=%d, n=%d, j=%d\n%!" psize n j; - let pool = Pool.create ~min:psize ~per_domain:0 () in + let pool = Ws_pool.create ~min:psize ~per_domain:0 () in (* TODO: a ppx for tracy so we can use instrumentation *) let loop () = diff --git a/test/t_chan_train.ml b/test/t_chan_train.ml index 5d1c40ef..bb3e24f7 100644 --- a/test/t_chan_train.ml +++ b/test/t_chan_train.ml @@ -1,7 +1,7 @@ open Moonpool (* large pool, some of our tasks below are long lived *) -let pool = Pool.create ~min:30 () +let pool = Ws_pool.create ~min:30 () open (val Fut.infix pool) diff --git a/test/t_fib.ml b/test/t_fib.ml index f54d7118..3a98e395 100644 --- a/test/t_fib.ml +++ b/test/t_fib.ml @@ -5,7 +5,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with | `Fifo_pool -> Fifo_pool.with_ ~min:4 () f - | `Pool -> Pool.with_ ~min:4 () f + | `Ws_pool -> Ws_pool.with_ ~min:4 () f let rec fib x = if x <= 1 then @@ -18,7 +18,7 @@ let () = assert (List.init 10 fib = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let run_test ~pool () = let fibs = Array.init 30 (fun n -> Fut.spawn ~on:pool (fun () -> fib n)) in let res = Fut.join_array fibs |> Fut.wait_block in - Pool.shutdown pool; + Ws_pool.shutdown pool; assert ( res @@ -74,5 +74,5 @@ let run ~kind () = Array.iter Thread.join jobs let () = - run ~kind:`Pool (); + run ~kind:`Ws_pool (); run ~kind:`Fifo_pool () diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index 01cd7c16..de704f34 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -25,9 +25,9 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int lazy_t = lazy (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "fib40" in - let pool = Pool.create ~min:8 () in + let pool = Ws_pool.create ~min:8 () in let r = fib ~on:pool 40 |> Fut.wait_block_exn in - Pool.shutdown pool; + Ws_pool.shutdown pool; r) let run_test ~pool () = @@ -49,7 +49,7 @@ let run_test ~pool () = let run_test_size ~size () = Printf.printf "test pool(%d)\n%!" size; - let@ pool = Pool.with_ ~min:size () in + let@ pool = Ws_pool.with_ ~min:size () in run_test ~pool () let run_test_fifo ~size () = diff --git a/test/t_futs1.ml b/test/t_futs1.ml index 930c8bdc..ee2d96a6 100644 --- a/test/t_futs1.ml +++ b/test/t_futs1.ml @@ -1,7 +1,7 @@ open! Moonpool -let pool = Pool.create ~min:4 () -let pool2 = Pool.create ~min:2 () +let pool = Ws_pool.create ~min:4 () +let pool2 = Ws_pool.create ~min:2 () let () = let fut = Fut.return 1 in diff --git a/test/t_props.ml b/test/t_props.ml index be586251..9fa64fbe 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -8,7 +8,7 @@ let add_test t = tests := t :: !tests let with_pool ~kind () f = match kind with | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f - | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f + | `Ws_pool -> Ws_pool.with_ ~min:4 ~per_domain:1 () f let () = add_test @@ fun ~kind -> @@ -48,7 +48,7 @@ let () = let () = let tests = - List.map (fun t -> [ t ~kind:`Fifo_pool; t ~kind:`Pool ]) !tests + List.map (fun t -> [ t ~kind:`Fifo_pool; t ~kind:`Ws_pool ]) !tests |> List.flatten in QCheck_base_runner.run_tests_main tests diff --git a/test/t_resource.ml b/test/t_resource.ml index 005ed4c3..c990f708 100644 --- a/test/t_resource.ml +++ b/test/t_resource.ml @@ -5,7 +5,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f - | `Pool -> Pool.with_ ~min:4 ~per_domain:1 () f + | `Ws_pool -> Ws_pool.with_ ~min:4 ~per_domain:1 () f (* test proper resource handling *) let run ~kind () = @@ -18,10 +18,10 @@ let run ~kind () = (* allocate a new pool at each iteration *) let@ p = with_pool ~kind () in - Pool.run_wait_block p (fun () -> Atomic.incr a) + Ws_pool.run_wait_block p (fun () -> Atomic.incr a) done; assert (Atomic.get a = 1_000) let () = - run ~kind:`Pool (); + run ~kind:`Ws_pool (); run ~kind:`Fifo_pool () diff --git a/test/t_tree_futs.ml b/test/t_tree_futs.ml index 83a9d80c..3507be0a 100644 --- a/test/t_tree_futs.ml +++ b/test/t_tree_futs.ml @@ -5,7 +5,7 @@ let ( let@ ) = ( @@ ) let with_pool ~kind ~j () f = match kind with | `Fifo_pool -> Fifo_pool.with_ ~min:j () f - | `Pool -> Pool.with_ ~min:j () f + | `Ws_pool -> Ws_pool.with_ ~min:j () f type 'a tree = | Leaf of 'a @@ -88,5 +88,5 @@ let () = (* Tracy_client_trace.setup (); *) - run_main ~kind:`Pool (); + run_main ~kind:`Ws_pool (); run_main ~kind:`Fifo_pool () diff --git a/test/t_unfair.ml b/test/t_unfair.ml index b6dc5884..f535a450 100644 --- a/test/t_unfair.ml +++ b/test/t_unfair.ml @@ -15,32 +15,32 @@ let run ~kind () = let on_init_thread ~dom_id:_ ~t_id () = Trace.set_thread_name (Printf.sprintf "pool worker %d" t_id) and around_task = - ( (fun self -> Trace.counter_int "n_tasks" (Pool.num_tasks self)), - fun self () -> Trace.counter_int "n_tasks" (Pool.num_tasks self) ) + ( (fun self -> Trace.counter_int "n_tasks" (Ws_pool.num_tasks self)), + fun self () -> Trace.counter_int "n_tasks" (Ws_pool.num_tasks self) ) in match kind with | `Simple -> Fifo_pool.create ~min:3 ~on_init_thread ~around_task () - | `Pool -> Pool.create ~min:3 ~on_init_thread ~around_task () + | `Ws_pool -> Ws_pool.create ~min:3 ~on_init_thread ~around_task () in (* make all threads busy *) - Pool.run_async pool (sleep_for 0.01); - Pool.run_async pool (sleep_for 0.01); - Pool.run_async pool (sleep_for 0.05); + Ws_pool.run_async pool (sleep_for 0.01); + Ws_pool.run_async pool (sleep_for 0.01); + Ws_pool.run_async pool (sleep_for 0.05); let t = Unix.gettimeofday () in for _i = 1 to 100 do let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "schedule step" in - Pool.run_async pool (sleep_for 0.001); - Pool.run_async pool (sleep_for 0.001); - Pool.run_async pool (sleep_for 0.01) + Ws_pool.run_async pool (sleep_for 0.001); + Ws_pool.run_async pool (sleep_for 0.001); + Ws_pool.run_async pool (sleep_for 0.01) done; - Printf.printf "pool size: %d\n%!" (Pool.num_tasks pool); + Printf.printf "pool size: %d\n%!" (Ws_pool.num_tasks pool); (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "shutdown" in - Pool.shutdown pool); - Printf.printf "pool size after shutdown: %d\n%!" (Pool.num_tasks pool); + Ws_pool.shutdown pool); + Printf.printf "pool size after shutdown: %d\n%!" (Ws_pool.num_tasks pool); let elapsed = Unix.gettimeofday () -. t in Printf.printf "elapsed: %.4fs\n%!" elapsed @@ -49,4 +49,4 @@ let () = let@ () = Trace_tef.with_setup () in let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main" in run ~kind:`Simple (); - run ~kind:`Pool () + run ~kind:`Ws_pool () From 5409cf8e1bc21d062a7ea3f5737c8a1767f11e6d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 25 Oct 2023 23:50:34 -0400 Subject: [PATCH 41/77] compat 4.08 --- src/ws_pool.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index fcf5eed9..e3b8bb71 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -18,7 +18,7 @@ type mut_cond = { } type state = { - active: bool Atomic.t; (** Becomes [false] when the pool is shutdown. *) + active: bool A.t; (** Becomes [false] when the pool is shutdown. *) workers: worker_state array; (** Fixed set of workers. *) main_q: task Queue.t; (** Main queue for tasks coming from the outside *) mc: mut_cond; (** Used to block on [main_q] *) From 9e93ebd3bbcce62acbabf9738b7c8854f9991e69 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 12:26:03 -0400 Subject: [PATCH 42/77] update benchmark fib-rec with more implementations --- Makefile | 2 ++ bench_fib.sh | 3 ++ benchs/dune | 2 +- benchs/fib_rec.ml | 74 +++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 77 insertions(+), 4 deletions(-) create mode 100755 bench_fib.sh diff --git a/Makefile b/Makefile index 8e08c4b8..3a6c7bf1 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,8 @@ bench-fib: @echo running for N=$(N) dune build $(DUNE_OPTS_BENCH) benchs/fib_rec.exe hyperfine -L psize $(BENCH_PSIZE) -L kind $(BENCH_KIND) --warmup=1 \ + './_build/default/benchs/fib_rec.exe -seq -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ + './_build/default/benchs/fib_rec.exe -dl -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -kind={kind} -n $(N)' PI_NSTEPS?=100_000_000 diff --git a/bench_fib.sh b/bench_fib.sh new file mode 100755 index 00000000..e9996d53 --- /dev/null +++ b/bench_fib.sh @@ -0,0 +1,3 @@ +#!/bin/sh +OPTS="--profile=release --display=quiet" +exec dune exec $OPTS -- benchs/fib_rec.exe $@ diff --git a/benchs/dune b/benchs/dune index 0ae20bf3..ff0f878b 100644 --- a/benchs/dune +++ b/benchs/dune @@ -3,4 +3,4 @@ (names fib_rec pi) (preprocess (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) - (libraries moonpool unix trace trace-tef)) + (libraries moonpool unix trace trace-tef domainslib)) diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 06341ce1..d3df44df 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -16,6 +16,43 @@ let rec fib ~on x : int Fut.t = let+ t1 = fib ~on (x - 1) and+ t2 = fib ~on (x - 2) in t1 + t2 +let fib_fj ~on x : int Fut.t = + let rec fib_rec x : int = + if x <= !cutoff then + fib_direct x + else ( + let n1, n2 = + Fork_join.both (fun () -> fib_rec (x - 1)) (fun () -> fib_rec (x - 2)) + in + n1 + n2 + ) + in + Fut.spawn ~on (fun () -> fib_rec x) + +let fib_await ~on x : int Fut.t = + let rec fib_rec x : int Fut.t = + if x <= !cutoff then + Fut.spawn ~on (fun () -> fib_direct x) + else + Fut.spawn ~on (fun () -> + let n1 = fib_rec (x - 1) in + let n2 = fib_rec (x - 2) in + let n1 = Fut.await n1 in + let n2 = Fut.await n2 in + n1 + n2) + in + fib_rec x + +let rec fib_dl ~pool x : int Domainslib.Task.promise = + if x <= !cutoff then + Domainslib.Task.async pool (fun () -> fib_direct x) + else + Domainslib.Task.async pool (fun () -> + let t1 = fib_dl ~pool (x - 1) and t2 = fib_dl ~pool (x - 2) in + let t1 = Domainslib.Task.await pool t1 in + let t2 = Domainslib.Task.await pool t2 in + t1 + t2) + let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let create_pool ~psize ~kind () = @@ -24,13 +61,31 @@ let create_pool ~psize ~kind () = | "pool" -> Ws_pool.create ~min:psize () | _ -> assert false -let run ~psize ~n ~seq ~niter ~kind () : unit = +let run ~psize ~n ~seq ~dl ~fj ~await ~niter ~kind () : unit = let pool = lazy (create_pool ~kind ~psize ()) in + let dl_pool = + lazy + (let n = Domain.recommended_domain_count () in + Printf.printf "use %d domains\n%!" n; + Domainslib.Task.setup_pool ~num_domains:n ()) + in for _i = 1 to niter do let res = if seq then ( Printf.printf "compute fib %d sequentially\n%!" n; fib_direct n + ) else if dl then ( + Printf.printf "compute fib %d with domainslib\n%!" n; + let (lazy pool) = dl_pool in + Domainslib.Task.run pool (fun () -> + Domainslib.Task.await pool @@ fib_dl ~pool n) + ) else if fj then ( + Printf.printf "compute fib %d using fork-join with pool size=%d\n%!" n + psize; + fib_fj ~on:(Lazy.force pool) n |> Fut.wait_block_exn + ) else if await then ( + Printf.printf "compute fib %d using await with pool size=%d\n%!" n psize; + fib_await ~on:(Lazy.force pool) n |> Fut.wait_block_exn ) else ( Printf.printf "compute fib %d with pool size=%d\n%!" n psize; fib ~on:(Lazy.force pool) n |> Fut.wait_block_exn @@ -38,7 +93,13 @@ let run ~psize ~n ~seq ~niter ~kind () : unit = in Printf.printf "fib %d = %d\n%!" n res done; - if not seq then Ws_pool.shutdown (Lazy.force pool) + + if seq then + () + else if dl then + Domainslib.Task.teardown_pool (Lazy.force dl_pool) + else + Ws_pool.shutdown (Lazy.force pool) let () = let n = ref 40 in @@ -46,12 +107,18 @@ let () = let seq = ref false in let niter = ref 3 in let kind = ref "pool" in + let dl = ref false in + let await = ref false in + let fj = ref false in let opts = [ "-psize", Arg.Set_int psize, " pool size"; "-n", Arg.Set_int n, " fib "; "-seq", Arg.Set seq, " sequential"; + "-dl", Arg.Set dl, " domainslib"; + "-fj", Arg.Set fj, " fork join"; "-niter", Arg.Set_int niter, " number of iterations"; + "-await", Arg.Set await, " use await"; "-cutoff", Arg.Set_int cutoff, " cutoff for sequential computation"; ( "-kind", Arg.Symbol ([ "pool"; "fifo" ], ( := ) kind), @@ -61,4 +128,5 @@ let () = in Arg.parse opts ignore ""; - run ~psize:!psize ~n:!n ~seq:!seq ~niter:!niter ~kind:!kind () + run ~psize:!psize ~n:!n ~fj:!fj ~seq:!seq ~await:!await ~dl:!dl ~niter:!niter + ~kind:!kind () From 08722691e84956a8669b386f60e3c5976f4ecae6 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 12:26:43 -0400 Subject: [PATCH 43/77] ws deque: try to reduce false sharing --- src/ws_deque_.ml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index f155aa85..137e1c15 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -35,7 +35,6 @@ end = struct | None -> assert false let[@inline] set (self : 'a t) (i : int) (x : 'a) : unit = - assert (i >= 0); Array.unsafe_set self.arr (i land ((1 lsl self.log_size) - 1)) (Some x) let grow (self : _ t) ~bottom ~top : 'a t = @@ -61,8 +60,11 @@ type 'a t = { } let create () : _ t = - let arr = CA.create ~log_size:4 () in - { top = A.make 0; top_cached = 0; bottom = A.make 0; arr = A.make arr } + let top = A.make 0 in + let arr = A.make @@ CA.create ~log_size:4 () in + (* allocate far from top to avoid false sharing *) + let bottom = A.make 0 in + { top; top_cached = 0; bottom; arr } let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top) From c044fb8fc93e4c59c0dfe8c56a3ae0f59cc5aea3 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 12:26:50 -0400 Subject: [PATCH 44/77] tweal to WS pool --- src/ws_pool.ml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index e3b8bb71..ac9f85da 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -98,7 +98,7 @@ exception Got_task of task type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task (** How many times in a row do we try to do work-stealing? *) -let steal_attempt_max_retry = 3 +let steal_attempt_max_retry = 2 (** Main loop for a worker thread. *) let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn @@ -157,9 +157,8 @@ let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn | Some task -> run_task task; raise_notrace Exit - | None -> - incr unsuccessful_steal_attempts; - Domain_.relax () + | None -> incr unsuccessful_steal_attempts + (* Domain_.relax () *) done; false with Exit -> true From 9e0a583a948562e48565e625a0e7785399032e8f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 14:46:59 -0400 Subject: [PATCH 45/77] chore: disable -dl/-seq benchs for now, too verbose --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3a6c7bf1..ee77ff8c 100644 --- a/Makefile +++ b/Makefile @@ -30,9 +30,9 @@ bench-fib: @echo running for N=$(N) dune build $(DUNE_OPTS_BENCH) benchs/fib_rec.exe hyperfine -L psize $(BENCH_PSIZE) -L kind $(BENCH_KIND) --warmup=1 \ - './_build/default/benchs/fib_rec.exe -seq -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ - './_build/default/benchs/fib_rec.exe -dl -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -kind={kind} -n $(N)' + #'./_build/default/benchs/fib_rec.exe -seq -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ + #'./_build/default/benchs/fib_rec.exe -dl -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ PI_NSTEPS?=100_000_000 PI_MODES?=seq,par1,forkjoin From 68fe7221b871f7d4486babcd67c7bc5c7d055944 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 14:47:30 -0400 Subject: [PATCH 46/77] suspend: remove additional parameter, always run tasks in handler --- src/fifo_pool.ml | 17 +++-------------- src/fork_join.ml | 6 +++--- src/fut.ml | 4 +--- src/suspend_.ml | 17 ++++++----------- src/suspend_.mli | 7 ++----- 5 files changed, 15 insertions(+), 36 deletions(-) diff --git a/src/fifo_pool.ml b/src/fifo_pool.ml index 920b36ed..044e0013 100644 --- a/src/fifo_pool.ml +++ b/src/fifo_pool.ml @@ -12,20 +12,9 @@ let[@inline] size_ (self : state) = Array.length self.threads let[@inline] num_tasks_ (self : state) : int = Bb_queue.size self.q (** Run [task] as is, on the pool. *) -let run_direct_ (self : state) (task : task) : unit = +let schedule_ (self : state) (task : task) : unit = try Bb_queue.push self.q task with Bb_queue.Closed -> raise Shutdown -let rec run_async_ (self : state) (task : task) : unit = - let task' () = - (* run [f()] and handle [suspend] in it *) - Suspend_.with_suspend task ~run:(fun ~with_handler task -> - if with_handler then - run_async_ self task - else - run_direct_ self task) - in - run_direct_ self task' - type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task : unit = @@ -34,7 +23,7 @@ let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task : unit = let run_task task : unit = let _ctx = before_task runner in (* run the task now, catching errors *) - (try task () + (try Suspend_.with_suspend task ~run:(fun task' -> schedule_ self task') with e -> let bt = Printexc.get_raw_backtrace () in on_exn e bt); @@ -98,7 +87,7 @@ let create ?(on_init_thread = default_thread_init_exit_) let runner = Runner.For_runner_implementors.create ~shutdown:(fun ~wait () -> shutdown_ pool ~wait) - ~run_async:(fun f -> run_async_ pool f) + ~run_async:(fun f -> schedule_ pool f) ~size:(fun () -> size_ pool) ~num_tasks:(fun () -> num_tasks_ pool) () diff --git a/src/fork_join.ml b/src/fork_join.ml index f1733514..ac5ba5d7 100644 --- a/src/fork_join.ml +++ b/src/fork_join.ml @@ -62,7 +62,7 @@ let both f g : _ * _ = let st = A.make { suspension = None; left = St_none; right = St_none } in let start_tasks ~run () : unit = - run ~with_handler:true (fun () -> + run (fun () -> try let res = f () in set_left_ st (St_some res) @@ -70,7 +70,7 @@ let both f g : _ * _ = let bt = Printexc.get_raw_backtrace () in set_left_ st (St_fail (e, bt))); - run ~with_handler:true (fun () -> + run (fun () -> try let res = g () in set_right_ st (St_some res) @@ -126,7 +126,7 @@ let for_ ?chunk_size n (f : int -> int -> unit) : unit = let len_range = min chunk_size (n - offset) in assert (offset + len_range <= n); - run ~with_handler:true (fun () -> task_for ~offset ~len_range); + run (fun () -> task_for ~offset ~len_range); i := !i + len_range done in diff --git a/src/fut.ml b/src/fut.ml index 0a5332ed..639a503b 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -381,9 +381,7 @@ let await (fut : 'a t) : 'a = Suspend_.handle = (fun ~run k -> on_result fut (function - | Ok _ -> - (* run without handler, we're already in a deep effect *) - run ~with_handler:false (fun () -> k (Ok ())) + | Ok _ -> run (fun () -> k (Ok ())) | Error (exn, bt) -> (* fail continuation immediately *) k (Error (exn, bt)))); diff --git a/src/suspend_.ml b/src/suspend_.ml index 19accc9c..88eacb2c 100644 --- a/src/suspend_.ml +++ b/src/suspend_.ml @@ -3,9 +3,7 @@ module A = Atomic_ type suspension = (unit, exn * Printexc.raw_backtrace) result -> unit type task = unit -> unit -type suspension_handler = { - handle: run:(with_handler:bool -> task -> unit) -> suspension -> unit; -} +type suspension_handler = { handle: run:(task -> unit) -> suspension -> unit } [@@unboxed] [@@@ifge 5.0] @@ -15,8 +13,7 @@ type _ Effect.t += Suspend : suspension_handler -> unit Effect.t let[@inline] suspend h = Effect.perform (Suspend h) -let with_suspend ~(run : with_handler:bool -> task -> unit) (f : unit -> unit) : - unit = +let with_suspend ~(run : task -> unit) (f : unit -> unit) : unit = let module E = Effect.Deep in (* effect handler *) let effc : type e. e Effect.t -> ((e, _) E.continuation -> _) option = @@ -37,14 +34,12 @@ let with_suspend ~(run : with_handler:bool -> task -> unit) (f : unit -> unit) : (* DLA interop *) let prepare_for_await () : Dla_.t = (* current state *) - let st : ((with_handler:bool -> task -> unit) * suspension) option A.t = - A.make None - in + let st : ((task -> unit) * suspension) option A.t = A.make None in let release () : unit = match A.exchange st None with | None -> () - | Some (run, k) -> run ~with_handler:true (fun () -> k (Ok ())) + | Some (run, k) -> run (fun () -> k (Ok ())) and await () : unit = suspend { handle = (fun ~run k -> A.set st (Some (run, k))) } in @@ -55,7 +50,7 @@ let prepare_for_await () : Dla_.t = [@@@ocaml.alert "+unstable"] [@@@else_] -let with_suspend ~run:_ f = f () -let prepare_for_await () = { Dla_.release = ignore; await = ignore } +let[@inline] with_suspend ~run:_ f = f () +let[@inline] prepare_for_await () = { Dla_.release = ignore; await = ignore } [@@@endif] diff --git a/src/suspend_.mli b/src/suspend_.mli index 716e9b8a..77cc06af 100644 --- a/src/suspend_.mli +++ b/src/suspend_.mli @@ -8,9 +8,7 @@ type suspension = (unit, exn * Printexc.raw_backtrace) result -> unit type task = unit -> unit -type suspension_handler = { - handle: run:(with_handler:bool -> task -> unit) -> suspension -> unit; -} +type suspension_handler = { handle: run:(task -> unit) -> suspension -> unit } [@@unboxed] (** The handler that knows what to do with the suspended computation. @@ -53,8 +51,7 @@ val suspend : suspension_handler -> unit val prepare_for_await : unit -> Dla_.t (** Our stub for DLA. Unstable. *) -val with_suspend : - run:(with_handler:bool -> task -> unit) -> (unit -> unit) -> unit +val with_suspend : run:(task -> unit) -> (unit -> unit) -> unit (** [with_suspend ~run f] runs [f()] in an environment where [suspend] will work. If [f()] suspends with suspension handler [h], this calls [h ~run k] where [k] is the suspension. From 359ec0352b252a0bb3f378d24526836f8442970c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 14:47:59 -0400 Subject: [PATCH 47/77] small change to test --- test/t_fib_rec.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index de704f34..286e6aac 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -25,7 +25,7 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int lazy_t = lazy (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "fib40" in - let pool = Ws_pool.create ~min:8 () in + let pool = Fifo_pool.create ~min:8 () in let r = fib ~on:pool 40 |> Fut.wait_block_exn in Ws_pool.shutdown pool; r) From b4ddd82ee82fcbeac2b9980b3e6586b6222ebaa4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 14:48:13 -0400 Subject: [PATCH 48/77] ws pool: use non atomic boolean to reduce number of wakeups; refactor --- src/ws_pool.ml | 267 +++++++++++++++++++++++-------------------------- 1 file changed, 125 insertions(+), 142 deletions(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index ac9f85da..b0e055cd 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -7,21 +7,24 @@ let ( let@ ) = ( @@ ) type worker_state = { mutable thread: Thread.t; q: task WSQ.t; (** Work stealing queue *) + mutable work_steal_offset: int; (** Current offset for work stealing *) } (** State for a given worker. Only this worker is allowed to push into the queue, but other workers can come and steal from it if they're idle. *) -type mut_cond = { - mutex: Mutex.t; - cond: Condition.t; -} +type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task type state = { active: bool A.t; (** Becomes [false] when the pool is shutdown. *) workers: worker_state array; (** Fixed set of workers. *) main_q: task Queue.t; (** Main queue for tasks coming from the outside *) - mc: mut_cond; (** Used to block on [main_q] *) + mutable n_waiting: int; (* protected by mutex *) + mutable n_waiting_nonzero: bool; (** [n_waiting > 0] *) + mutex: Mutex.t; + cond: Condition.t; + on_exn: exn -> Printexc.raw_backtrace -> unit; + around_task: around_task; } (** internal state *) @@ -29,14 +32,13 @@ let[@inline] size_ (self : state) = Array.length self.workers let num_tasks_ (self : state) : int = let n = ref 0 in - Mutex.lock self.mc.mutex; n := Queue.length self.main_q; - Mutex.unlock self.mc.mutex; Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; !n exception Got_worker of worker_state +(* FIXME: replace with TLS *) let[@inline] find_current_worker_ (self : state) : worker_state option = let self_id = Thread.id @@ Thread.self () in try @@ -48,159 +50,130 @@ let[@inline] find_current_worker_ (self : state) : worker_state option = None with Got_worker w -> Some w +(** Try to wake up a waiter, if there's any. *) +let[@inline] try_wake_someone_ (self : state) : unit = + if self.n_waiting_nonzero then ( + Mutex.lock self.mutex; + Condition.broadcast self.cond; + Mutex.unlock self.mutex + ) + (** Run [task] as is, on the pool. *) -let run_direct_ (self : state) (w : worker_state option) (task : task) : unit = +let schedule_task_ (self : state) (w : worker_state option) (task : task) : unit + = + (* Printf.printf "schedule task now (%d)\n%!" (Thread.id @@ Thread.self ()); *) match w with | Some w -> WSQ.push w.q task; - - (* see if we need to wakeup other workers to come and steal from us *) - Mutex.lock self.mc.mutex; - if Queue.is_empty self.main_q then Condition.broadcast self.mc.cond; - Mutex.unlock self.mc.mutex + try_wake_someone_ self | None -> if A.get self.active then ( (* push into the main queue *) - Mutex.lock self.mc.mutex; - let was_empty = Queue.is_empty self.main_q in + Mutex.lock self.mutex; Queue.push task self.main_q; - if was_empty then Condition.broadcast self.mc.cond; - Mutex.unlock self.mc.mutex + if self.n_waiting_nonzero then Condition.broadcast self.cond; + Mutex.unlock self.mutex ) else (* notify the caller that scheduling tasks is no longer permitted *) raise Shutdown -let run_async_ (self : state) (task : task) : unit = - (* run [task] inside a suspension handler *) - let rec run_async_in_suspend_rec_ (task : task) = - let task_with_suspend_ () = - (* run [f()] and handle [suspend] in it *) - Suspend_.with_suspend task ~run:(fun ~with_handler task' -> - if with_handler then - run_async_in_suspend_rec_ task' - else ( - let w = find_current_worker_ self in - run_direct_ self w task' - )) - in +(** Run this task, now. Must be called from a worker. *) +let run_task_now_ (self : state) ~runner task : unit = + (* Printf.printf "run task now (%d)\n%!" (Thread.id @@ Thread.self ()); *) + let (AT_pair (before_task, after_task)) = self.around_task in + let _ctx = before_task runner in + (* run the task now, catching errors *) + (try + (* run [task()] and handle [suspend] in it *) + Suspend_.with_suspend task ~run:(fun task' -> + let w = find_current_worker_ self in + schedule_task_ self w task') + with e -> + let bt = Printexc.get_raw_backtrace () in + self.on_exn e bt); + after_task runner _ctx - (* schedule on current worker, if run from a worker *) - let w = find_current_worker_ self in - run_direct_ self w task_with_suspend_ - in - run_async_in_suspend_rec_ task +let[@inline] run_async_ (self : state) (task : task) : unit = + let w = find_current_worker_ self in + schedule_task_ self w task + +(* TODO: function to schedule many tasks from the outside. + - build a queue + - lock + - queue transfer + - wakeup all (broadcast) + - unlock *) let run = run_async -exception Got_task of task +(** Wait on condition. Precondition: we hold the mutex. *) +let[@inline] wait_ (self : state) : unit = + self.n_waiting <- self.n_waiting + 1; + if self.n_waiting = 1 then self.n_waiting_nonzero <- true; + Condition.wait self.cond self.mutex; + self.n_waiting <- self.n_waiting - 1; + if self.n_waiting = 0 then self.n_waiting_nonzero <- false -type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task +(** Try to steal a task from the worker [w] *) +let try_to_steal_work_once_ (self : state) (w : worker_state) : task option = + w.work_steal_offset <- (w.work_steal_offset + 1) mod Array.length self.workers; -(** How many times in a row do we try to do work-stealing? *) -let steal_attempt_max_retry = 2 + (* if we're pointing to [w], skip to the next worker as + it's useless to steal from oneself *) + if Array.unsafe_get self.workers w.work_steal_offset == w then + w.work_steal_offset <- + (w.work_steal_offset + 1) mod Array.length self.workers; + + let w' = Array.unsafe_get self.workers w.work_steal_offset in + WSQ.steal w'.q + +(** Try to steal work from several other workers. *) +let try_to_steal_work_loop (self : state) ~runner w : bool = + if size_ self = 1 then + (* no stealing for single thread pool *) + false + else ( + let has_stolen = ref false in + let n_retries_left = ref (size_ self - 1) in + + while !n_retries_left > 0 do + match try_to_steal_work_once_ self w with + | Some task -> + run_task_now_ self ~runner task; + has_stolen := true; + n_retries_left := 0 + | None -> decr n_retries_left + done; + !has_stolen + ) + +(** Worker runs tasks from its queue until none remains *) +let worker_run_self_tasks_ (self : state) ~runner w : unit = + let continue = ref true in + while !continue && A.get self.active do + match WSQ.pop w.q with + | Some task -> run_task_now_ self ~runner task + | None -> continue := false + done (** Main loop for a worker thread. *) -let worker_thread_ (self : state) (runner : t) (w : worker_state) ~on_exn - ~around_task : unit = - let (AT_pair (before_task, after_task)) = around_task in - - (* run this task. *) - let run_task task : unit = - let _ctx = before_task runner in - (* run the task now, catching errors *) - (try task () - with e -> - let bt = Printexc.get_raw_backtrace () in - on_exn e bt); - after_task runner _ctx - in - - let run_self_tasks_ () = +let worker_thread_ (self : state) ~(runner : t) (w : worker_state) : unit = + let main_loop () : unit = let continue = ref true in - while !continue do - match WSQ.pop w.q with - | Some task -> run_task task - | None -> continue := false - done - in + while !continue && A.get self.active do + worker_run_self_tasks_ self ~runner w; - let work_steal_offset = ref 0 in - - (* get a task from another worker *) - let try_to_steal_work () : task option = - assert (size_ self > 1); - - work_steal_offset := (!work_steal_offset + 1) mod Array.length self.workers; - - (* if we're pointing to [w], skip to the next worker as - it's useless to steal from oneself *) - if self.workers.(!work_steal_offset) == w then - work_steal_offset := - (!work_steal_offset + 1) mod Array.length self.workers; - - let w' = self.workers.(!work_steal_offset) in - assert (w != w'); - WSQ.steal w'.q - in - - (* try to steal work multiple times *) - let try_to_steal_work_loop () : bool = - if size_ self = 1 then - (* no stealing for single thread pool *) - false - else ( - try - let unsuccessful_steal_attempts = ref 0 in - while !unsuccessful_steal_attempts < steal_attempt_max_retry do - match try_to_steal_work () with - | Some task -> - run_task task; - raise_notrace Exit - | None -> incr unsuccessful_steal_attempts - (* Domain_.relax () *) - done; - false - with Exit -> true - ) - in - - let get_task_from_main_queue_block () : task option = - try - Mutex.lock self.mc.mutex; - while true do - match Queue.pop self.main_q with - | exception Queue.Empty -> - if A.get self.active then - Condition.wait self.mc.cond self.mc.mutex - else ( - (* empty queue and we're closed, time to exit *) - Mutex.unlock self.mc.mutex; - raise_notrace Exit - ) - | task -> - Mutex.unlock self.mc.mutex; - raise_notrace (Got_task task) - done; - (* unreachable *) - assert false - with - | Got_task t -> Some t - | Exit -> None - in - - let main_loop () = - let continue = ref true in - while !continue do - run_self_tasks_ (); - - let did_steal = try_to_steal_work_loop () in + let did_steal = try_to_steal_work_loop self ~runner w in if not did_steal then ( - match get_task_from_main_queue_block () with - | None -> - (* main queue is closed *) - continue := false - | Some task -> run_task task + Mutex.lock self.mutex; + match Queue.pop self.main_q with + | task -> + Mutex.unlock self.mutex; + run_task_now_ self ~runner task + | exception Queue.Empty -> + wait_ self; + Mutex.unlock self.mutex ) done; assert (WSQ.size w.q = 0) @@ -214,9 +187,9 @@ let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () let shutdown_ ~wait (self : state) : unit = if A.exchange self.active false then ( - Mutex.lock self.mc.mutex; - Condition.broadcast self.mc.cond; - Mutex.unlock self.mc.mutex; + Mutex.lock self.mutex; + Condition.broadcast self.cond; + Mutex.unlock self.mutex; if wait then Array.iter (fun w -> Thread.join w.thread) self.workers ) @@ -251,7 +224,12 @@ let create ?(on_init_thread = default_thread_init_exit_) let workers : worker_state array = let dummy = Thread.self () in - Array.init num_threads (fun _ -> { thread = dummy; q = WSQ.create () }) + Array.init num_threads (fun i -> + { + thread = dummy; + q = WSQ.create (); + work_steal_offset = (i + 1) mod num_threads; + }) in let pool = @@ -259,7 +237,12 @@ let create ?(on_init_thread = default_thread_init_exit_) active = A.make true; workers; main_q = Queue.create (); - mc = { mutex = Mutex.create (); cond = Condition.create () }; + n_waiting = 0; + n_waiting_nonzero = true; + mutex = Mutex.create (); + cond = Condition.create (); + around_task; + on_exn; } in @@ -287,7 +270,7 @@ let create ?(on_init_thread = default_thread_init_exit_) let t_id = Thread.id thread in on_init_thread ~dom_id:dom_idx ~t_id (); - let run () = worker_thread_ pool runner w ~on_exn ~around_task in + let run () = worker_thread_ pool ~runner w in (* now run the main loop *) Fun.protect run ~finally:(fun () -> From e67bffeca5adaa9caedd5645049f711a3c61cea0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 15:18:50 -0400 Subject: [PATCH 49/77] ws_pool: use TLS for quick worker storage access; reduce contention --- dune-project | 1 + moonpool.opam | 1 + src/dune | 3 ++ src/moonpool.ml | 1 + src/moonpool.mli | 1 + src/thread_local_storage.mli | 21 +++++++++ src/thread_local_storage.real.ml | 80 ++++++++++++++++++++++++++++++++ src/thread_local_storage.stub.ml | 3 ++ src/ws_pool.ml | 37 ++++++++------- 9 files changed, 130 insertions(+), 18 deletions(-) create mode 100644 src/thread_local_storage.mli create mode 100644 src/thread_local_storage.real.ml create mode 100644 src/thread_local_storage.stub.ml diff --git a/dune-project b/dune-project index ddf01fef..f6ace0a6 100644 --- a/dune-project +++ b/dune-project @@ -28,6 +28,7 @@ (>= 1.9.0) :with-test))) (depopts + thread-local-storage (domain-local-await (>= 0.2))) (tags (thread pool domain futures fork-join))) diff --git a/moonpool.opam b/moonpool.opam index 547b18c1..62bdcf6e 100644 --- a/moonpool.opam +++ b/moonpool.opam @@ -19,6 +19,7 @@ depends: [ "mdx" {>= "1.9.0" & with-test} ] depopts: [ + "thread-local-storage" "domain-local-await" {>= "0.2"} ] build: [ diff --git a/src/dune b/src/dune index 313191a5..5275ab40 100644 --- a/src/dune +++ b/src/dune @@ -6,6 +6,9 @@ (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) (libraries threads either + (select thread_local_storage.ml from + (thread-local-storage -> thread_local_storage.stub.ml) + (-> thread_local_storage.real.ml)) (select dla_.ml from (domain-local-await -> dla_.real.ml) ( -> dla_.dummy.ml)))) diff --git a/src/moonpool.ml b/src/moonpool.ml index ed1af755..b4118536 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -13,6 +13,7 @@ module Pool = Fifo_pool module Ws_pool = Ws_pool module Runner = Runner module Fifo_pool = Fifo_pool +module Thread_local_storage = Thread_local_storage module Private = struct module Ws_deque_ = Ws_deque_ diff --git a/src/moonpool.mli b/src/moonpool.mli index 4028e858..b744dc51 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -26,6 +26,7 @@ module Lock = Lock module Fut = Fut module Chan = Chan module Fork_join = Fork_join +module Thread_local_storage = Thread_local_storage (** A simple blocking queue. diff --git a/src/thread_local_storage.mli b/src/thread_local_storage.mli new file mode 100644 index 00000000..b7b50706 --- /dev/null +++ b/src/thread_local_storage.mli @@ -0,0 +1,21 @@ +(** Thread local storage *) + +(* TODO: alias this to the library if present *) + +type 'a key +(** A TLS key for values of type ['a]. This allows the + storage of a single value of type ['a] per thread. *) + +val new_key : (unit -> 'a) -> 'a key +(** Allocate a new, generative key. + When the key is used for the first time on a thread, + the function is called to produce it. + + This should only ever be called at toplevel to produce + constants, do not use it in a loop. *) + +val get : 'a key -> 'a +(** Get the value for the current thread. *) + +val set : 'a key -> 'a -> unit +(** Set the value for the current thread. *) diff --git a/src/thread_local_storage.real.ml b/src/thread_local_storage.real.ml new file mode 100644 index 00000000..2d33f62c --- /dev/null +++ b/src/thread_local_storage.real.ml @@ -0,0 +1,80 @@ +(* see: https://discuss.ocaml.org/t/a-hack-to-implement-efficient-tls-thread-local-storage/13264 *) + +(* sanity check *) +let () = assert (Obj.field (Obj.repr (Thread.self ())) 1 = Obj.repr ()) + +type 'a key = { + index: int; (** Unique index for this key. *) + compute: unit -> 'a; + (** Initializer for values for this key. Called at most + once per thread. *) +} + +(** Counter used to allocate new keys *) +let counter = Atomic.make 0 + +(** Value used to detect a TLS slot that was not initialized yet *) +let sentinel_value_for_uninit_tls_ () : Obj.t = Obj.repr counter + +let new_key compute : _ key = + let index = Atomic.fetch_and_add counter 1 in + { index; compute } + +type thread_internal_state = { + _id: int; (** Thread ID (here for padding reasons) *) + mutable tls: Obj.t; (** Our data, stowed away in this unused field *) +} +(** A partial representation of the internal type [Thread.t], allowing + us to access the second field (unused after the thread + has started) and stash TLS data in it. *) + +let ceil_pow_2_minus_1 (n : int) : int = + let n = n lor (n lsr 1) in + let n = n lor (n lsr 2) in + let n = n lor (n lsr 4) in + let n = n lor (n lsr 8) in + let n = n lor (n lsr 16) in + if Sys.int_size > 32 then + n lor (n lsr 32) + else + n + +(** Grow the array so that [index] is valid. *) +let[@inline never] grow_tls (old : Obj.t array) (index : int) : Obj.t array = + let new_length = ceil_pow_2_minus_1 (index + 1) in + let new_ = Array.make new_length (sentinel_value_for_uninit_tls_ ()) in + Array.blit old 0 new_ 0 (Array.length old); + new_ + +let[@inline] get_tls_ (index : int) : Obj.t array = + let thread : thread_internal_state = Obj.magic (Thread.self ()) in + let tls = thread.tls in + if Obj.is_int tls then ( + let new_tls = grow_tls [||] index in + thread.tls <- Obj.magic new_tls; + new_tls + ) else ( + let tls = (Obj.magic tls : Obj.t array) in + if index < Array.length tls then + tls + else ( + let new_tls = grow_tls tls index in + thread.tls <- Obj.magic new_tls; + new_tls + ) + ) + +let get key = + let tls = get_tls_ key.index in + let value = Array.unsafe_get tls key.index in + if value != sentinel_value_for_uninit_tls_ () then + Obj.magic value + else ( + let value = key.compute () in + Array.unsafe_set tls key.index (Obj.repr (Sys.opaque_identity value)); + value + ) + +let set key value = + let tls = get_tls_ key.index in + Array.unsafe_set tls key.index (Obj.repr (Sys.opaque_identity value)) diff --git a/src/thread_local_storage.stub.ml b/src/thread_local_storage.stub.ml new file mode 100644 index 00000000..88712b6d --- /dev/null +++ b/src/thread_local_storage.stub.ml @@ -0,0 +1,3 @@ + +(* just defer to library *) +include Thread_local_storage diff --git a/src/ws_pool.ml b/src/ws_pool.ml index b0e055cd..4d1e0c70 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -1,5 +1,6 @@ module WSQ = Ws_deque_ module A = Atomic_ +module TLS = Thread_local_storage include Runner let ( let@ ) = ( @@ ) @@ -36,25 +37,20 @@ let num_tasks_ (self : state) : int = Array.iter (fun w -> n := !n + WSQ.size w.q) self.workers; !n -exception Got_worker of worker_state +(** TLS, used by worker to store their specific state + and be able to retrieve it from tasks when we schedule new + sub-tasks. *) +let k_worker_state : worker_state option ref TLS.key = + TLS.new_key (fun () -> ref None) -(* FIXME: replace with TLS *) -let[@inline] find_current_worker_ (self : state) : worker_state option = - let self_id = Thread.id @@ Thread.self () in - try - (* see if we're in one of the worker threads *) - for i = 0 to Array.length self.workers - 1 do - let w = self.workers.(i) in - if Thread.id w.thread = self_id then raise_notrace (Got_worker w) - done; - None - with Got_worker w -> Some w +let[@inline] find_current_worker_ () : worker_state option = + !(TLS.get k_worker_state) (** Try to wake up a waiter, if there's any. *) let[@inline] try_wake_someone_ (self : state) : unit = if self.n_waiting_nonzero then ( Mutex.lock self.mutex; - Condition.broadcast self.cond; + Condition.signal self.cond; Mutex.unlock self.mutex ) @@ -71,7 +67,7 @@ let schedule_task_ (self : state) (w : worker_state option) (task : task) : unit (* push into the main queue *) Mutex.lock self.mutex; Queue.push task self.main_q; - if self.n_waiting_nonzero then Condition.broadcast self.cond; + if self.n_waiting_nonzero then Condition.signal self.cond; Mutex.unlock self.mutex ) else (* notify the caller that scheduling tasks is no @@ -87,7 +83,7 @@ let run_task_now_ (self : state) ~runner task : unit = (try (* run [task()] and handle [suspend] in it *) Suspend_.with_suspend task ~run:(fun task' -> - let w = find_current_worker_ self in + let w = find_current_worker_ () in schedule_task_ self w task') with e -> let bt = Printexc.get_raw_backtrace () in @@ -95,7 +91,7 @@ let run_task_now_ (self : state) ~runner task : unit = after_task runner _ctx let[@inline] run_async_ (self : state) (task : task) : unit = - let w = find_current_worker_ self in + let w = find_current_worker_ () in schedule_task_ self w task (* TODO: function to schedule many tasks from the outside. @@ -140,6 +136,7 @@ let try_to_steal_work_loop (self : state) ~runner w : bool = while !n_retries_left > 0 do match try_to_steal_work_once_ self w with | Some task -> + try_wake_someone_ self; run_task_now_ self ~runner task; has_stolen := true; n_retries_left := 0 @@ -153,12 +150,16 @@ let worker_run_self_tasks_ (self : state) ~runner w : unit = let continue = ref true in while !continue && A.get self.active do match WSQ.pop w.q with - | Some task -> run_task_now_ self ~runner task + | Some task -> + try_wake_someone_ self; + run_task_now_ self ~runner task | None -> continue := false done (** Main loop for a worker thread. *) let worker_thread_ (self : state) ~(runner : t) (w : worker_state) : unit = + TLS.get k_worker_state := Some w; + let main_loop () : unit = let continue = ref true in while !continue && A.get self.active do @@ -172,7 +173,7 @@ let worker_thread_ (self : state) ~(runner : t) (w : worker_state) : unit = Mutex.unlock self.mutex; run_task_now_ self ~runner task | exception Queue.Empty -> - wait_ self; + if A.get self.active then wait_ self; Mutex.unlock self.mutex ) done; From aa7906eb2cef9a2b8df5cd52dd10c5e496a49f91 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:05:43 -0400 Subject: [PATCH 50/77] perf TLS: inline --- src/thread_local_storage.real.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/thread_local_storage.real.ml b/src/thread_local_storage.real.ml index 2d33f62c..250982ee 100644 --- a/src/thread_local_storage.real.ml +++ b/src/thread_local_storage.real.ml @@ -14,7 +14,7 @@ type 'a key = { let counter = Atomic.make 0 (** Value used to detect a TLS slot that was not initialized yet *) -let sentinel_value_for_uninit_tls_ () : Obj.t = Obj.repr counter +let[@inline] sentinel_value_for_uninit_tls_ () : Obj.t = Obj.repr counter let new_key compute : _ key = let index = Atomic.fetch_and_add counter 1 in From aba0d84ecfca7943cc3ef96be9e2ba1aec69bd7f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:05:52 -0400 Subject: [PATCH 51/77] ws pool: random stealing; rework main state machine in the state machine, after waiting, we check the main queue, else we directly go to stealing. --- src/ws_pool.ml | 101 +++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index 4d1e0c70..ca5d2500 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -8,7 +8,7 @@ let ( let@ ) = ( @@ ) type worker_state = { mutable thread: Thread.t; q: task WSQ.t; (** Work stealing queue *) - mutable work_steal_offset: int; (** Current offset for work stealing *) + rng: Random.State.t; } (** State for a given worker. Only this worker is allowed to push into the queue, but other workers @@ -111,39 +111,26 @@ let[@inline] wait_ (self : state) : unit = self.n_waiting <- self.n_waiting - 1; if self.n_waiting = 0 then self.n_waiting_nonzero <- false -(** Try to steal a task from the worker [w] *) +exception Got_task of task + +(** Try to steal a task *) let try_to_steal_work_once_ (self : state) (w : worker_state) : task option = - w.work_steal_offset <- (w.work_steal_offset + 1) mod Array.length self.workers; + let init = Random.State.int w.rng (Array.length self.workers) in - (* if we're pointing to [w], skip to the next worker as - it's useless to steal from oneself *) - if Array.unsafe_get self.workers w.work_steal_offset == w then - w.work_steal_offset <- - (w.work_steal_offset + 1) mod Array.length self.workers; + try + for i = 0 to Array.length self.workers - 1 do + let w' = + Array.unsafe_get self.workers ((i + init) mod Array.length self.workers) + in - let w' = Array.unsafe_get self.workers w.work_steal_offset in - WSQ.steal w'.q - -(** Try to steal work from several other workers. *) -let try_to_steal_work_loop (self : state) ~runner w : bool = - if size_ self = 1 then - (* no stealing for single thread pool *) - false - else ( - let has_stolen = ref false in - let n_retries_left = ref (size_ self - 1) in - - while !n_retries_left > 0 do - match try_to_steal_work_once_ self w with - | Some task -> - try_wake_someone_ self; - run_task_now_ self ~runner task; - has_stolen := true; - n_retries_left := 0 - | None -> decr n_retries_left + if w != w' then ( + match WSQ.steal w'.q with + | Some t -> raise_notrace (Got_task t) + | None -> () + ) done; - !has_stolen - ) + None + with Got_task t -> Some t (** Worker runs tasks from its queue until none remains *) let worker_run_self_tasks_ (self : state) ~runner w : unit = @@ -160,29 +147,41 @@ let worker_run_self_tasks_ (self : state) ~runner w : unit = let worker_thread_ (self : state) ~(runner : t) (w : worker_state) : unit = TLS.get k_worker_state := Some w; - let main_loop () : unit = - let continue = ref true in - while !continue && A.get self.active do + let rec main () : unit = + if A.get self.active then ( worker_run_self_tasks_ self ~runner w; + try_steal () + ) + and run_task task : unit = + run_task_now_ self ~runner task; + main () + and try_steal () = + if A.get self.active then ( + match try_to_steal_work_once_ self w with + | Some task -> run_task task + | None -> wait () + ) + and wait () = + Mutex.lock self.mutex; + match Queue.pop self.main_q with + | task -> + Mutex.unlock self.mutex; + run_task task + | exception Queue.Empty -> + (* wait here *) + if A.get self.active then wait_ self; - let did_steal = try_to_steal_work_loop self ~runner w in - if not did_steal then ( - Mutex.lock self.mutex; - match Queue.pop self.main_q with - | task -> - Mutex.unlock self.mutex; - run_task_now_ self ~runner task - | exception Queue.Empty -> - if A.get self.active then wait_ self; - Mutex.unlock self.mutex - ) - done; - assert (WSQ.size w.q = 0) + (* see if a task became available *) + let task = try Some (Queue.pop self.main_q) with Queue.Empty -> None in + Mutex.unlock self.mutex; + + (match task with + | Some t -> run_task t + | None -> try_steal ()) in (* handle domain-local await *) - Dla_.using ~prepare_for_await:Suspend_.prepare_for_await - ~while_running:main_loop + Dla_.using ~prepare_for_await:Suspend_.prepare_for_await ~while_running:main let default_thread_init_exit_ ~dom_id:_ ~t_id:_ () = () @@ -226,11 +225,7 @@ let create ?(on_init_thread = default_thread_init_exit_) let workers : worker_state array = let dummy = Thread.self () in Array.init num_threads (fun i -> - { - thread = dummy; - q = WSQ.create (); - work_steal_offset = (i + 1) mod num_threads; - }) + { thread = dummy; q = WSQ.create (); rng = Random.State.make [| i |] }) in let pool = From ddf394be904523bd2780487a6fc61ec23f9584c8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:06:55 -0400 Subject: [PATCH 52/77] chore: handpick set of benchmarks --- Makefile | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ee77ff8c..0995b509 100644 --- a/Makefile +++ b/Makefile @@ -29,8 +29,19 @@ BENCH_CUTOFF?=20 bench-fib: @echo running for N=$(N) dune build $(DUNE_OPTS_BENCH) benchs/fib_rec.exe - hyperfine -L psize $(BENCH_PSIZE) -L kind $(BENCH_KIND) --warmup=1 \ - './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -kind={kind} -n $(N)' + + hyperfine --warmup=1 \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -seq' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -dl' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=20 -kind=pool -fj' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=20 -kind=pool -await' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=4 -kind=fifo' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=4 -kind=pool' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=8 -kind=fifo' \ + './_build/default/benchs/fib_rec.exe -n $(N) -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize=16 -kind=pool' + + #hyperfine -L psize $(BENCH_PSIZE) -L kind $(BENCH_KIND) --warmup=1 \ + # './_build/default/benchs/fib_rec.exe -cutoff $(BENCH_CUTOFF) -niter $(NITER) -psize={psize} -kind={kind} -n $(N)' #'./_build/default/benchs/fib_rec.exe -seq -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ #'./_build/default/benchs/fib_rec.exe -dl -cutoff $(BENCH_CUTOFF) -niter $(NITER) -n $(N)' \ From bfcf7f774e7868dbe1320c5a7cbc091ea42025e0 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:16:49 -0400 Subject: [PATCH 53/77] chore: handpick combinations for bench-pi --- Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0995b509..1d7ec227 100644 --- a/Makefile +++ b/Makefile @@ -51,8 +51,12 @@ PI_KIND?=fifo,pool bench-pi: @echo running for N=$(PI_NSTEPS) dune build $(DUNE_OPTS_BENCH) benchs/pi.exe - hyperfine -L mode $(PI_MODES) -L kind $(PI_KIND) --warmup=1 \ - './_build/default/benchs/pi.exe -mode={mode} -kind={kind} -n $(PI_NSTEPS)' + hyperfine --warmup=1 \ + './_build/default/benchs/pi.exe -n $(PI_NSTEPS) -mode=seq' \ + './_build/default/benchs/pi.exe -n $(PI_NSTEPS) -j 8 -mode par1 -kind=pool' \ + './_build/default/benchs/pi.exe -n $(PI_NSTEPS) -j 8 -mode par1 -kind=fifo' \ + './_build/default/benchs/pi.exe -n $(PI_NSTEPS) -j 16 -mode forkjoin -kind=pool' \ + './_build/default/benchs/pi.exe -n $(PI_NSTEPS) -j 20 -mode forkjoin -kind=pool' .PHONY: test clean bench-fib bench-pi From 052b70027a55637871ee8b4ffe0feb0a811159f9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:17:56 -0400 Subject: [PATCH 54/77] compat 4.08 --- src/thread_local_storage.real.ml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/thread_local_storage.real.ml b/src/thread_local_storage.real.ml index 250982ee..70d7a558 100644 --- a/src/thread_local_storage.real.ml +++ b/src/thread_local_storage.real.ml @@ -1,5 +1,7 @@ (* see: https://discuss.ocaml.org/t/a-hack-to-implement-efficient-tls-thread-local-storage/13264 *) +module A = Atomic_ + (* sanity check *) let () = assert (Obj.field (Obj.repr (Thread.self ())) 1 = Obj.repr ()) @@ -11,13 +13,13 @@ type 'a key = { } (** Counter used to allocate new keys *) -let counter = Atomic.make 0 +let counter = A.make 0 (** Value used to detect a TLS slot that was not initialized yet *) let[@inline] sentinel_value_for_uninit_tls_ () : Obj.t = Obj.repr counter let new_key compute : _ key = - let index = Atomic.fetch_and_add counter 1 in + let index = A.fetch_and_add counter 1 in { index; compute } type thread_internal_state = { From def384b4f83e3d9b4a00ff0fe7791971c4a20091 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Fri, 27 Oct 2023 16:18:24 -0400 Subject: [PATCH 55/77] fix warning --- src/suspend_.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/suspend_.ml b/src/suspend_.ml index 88eacb2c..6555b6bc 100644 --- a/src/suspend_.ml +++ b/src/suspend_.ml @@ -1,5 +1,3 @@ -module A = Atomic_ - type suspension = (unit, exn * Printexc.raw_backtrace) result -> unit type task = unit -> unit @@ -9,6 +7,8 @@ type suspension_handler = { handle: run:(task -> unit) -> suspension -> unit } [@@@ifge 5.0] [@@@ocaml.alert "-unstable"] +module A = Atomic_ + type _ Effect.t += Suspend : suspension_handler -> unit Effect.t let[@inline] suspend h = Effect.perform (Suspend h) From 7a36783e8b57fe7b52d1cec80f855128d95a4cfd Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 11:59:20 -0400 Subject: [PATCH 56/77] perf: also use the main domain, along with n-1 other ones we always keep a thread alive on the main domain as a worker for new tasks, but other domains can still come and go to manage resources properly in case a pool is started and used only for a short while. --- src/d_pool_.ml | 12 +++++++++--- src/domain_.ml | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/d_pool_.ml b/src/d_pool_.ml index fb78535b..d12a4f6a 100644 --- a/src/d_pool_.ml +++ b/src/d_pool_.ml @@ -18,9 +18,7 @@ type worker_state = { including a work queue and a thread refcount; and the domain itself, if any, in a separate option because it might outlive its own state. *) let domains_ : (worker_state option * Domain_.t option) Lock.t array = - (* number of domains we spawn. Note that we spawn n-1 domains - because there already is the main domain running. *) - let n = max 1 (Domain_.recommended_number () - 1) in + let n = max 1 (Domain_.recommended_number ()) in Array.init n (fun _ -> Lock.create (None, None)) (** main work loop for a domain worker. @@ -84,6 +82,14 @@ let work_ idx (st : worker_state) : unit = done; () +(* special case for main domain: we start a worker immediately *) +let () = + assert (Domain_.is_main_domain ()); + let w = { th_count = Atomic_.make 1; q = Bb_queue.create () } in + (* thread that stays alive *) + ignore (Thread.create (fun () -> work_ 0 w) () : Thread.t); + domains_.(0) <- Lock.create (Some w, None) + let[@inline] n_domains () : int = Array.length domains_ let run_on (i : int) (f : unit -> unit) : unit = diff --git a/src/domain_.ml b/src/domain_.ml index 60d1e669..3050282f 100644 --- a/src/domain_.ml +++ b/src/domain_.ml @@ -9,6 +9,7 @@ let get_id (self : t) : int = (Domain.get_id self :> int) let spawn : _ -> t = Domain.spawn let relax = Domain.cpu_relax let join = Domain.join +let is_main_domain = Domain.is_main_domain [@@@ocaml.alert "+unstable"] [@@@else_] @@ -21,5 +22,6 @@ let get_id (self : t) : int = Thread.id self let spawn f : t = Thread.create f () let relax () = Thread.yield () let join = Thread.join +let is_main_domain () = true [@@@endif] From a3d3468b5eb3d67e5409c0a0ca94fd20b8ebdfce Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 12:29:25 -0400 Subject: [PATCH 57/77] feat: add `Moonpool.recommended_thread_count` --- src/moonpool.ml | 2 ++ src/moonpool.mli | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/src/moonpool.ml b/src/moonpool.ml index b4118536..b46bc123 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -2,6 +2,8 @@ let start_thread_on_some_domain f x = let did = Random.int (D_pool_.n_domains ()) in D_pool_.run_on_and_wait did (fun () -> Thread.create f x) +let recommended_thread_count () = Domain_.recommended_number () + module Atomic = Atomic_ module Blocking_queue = Bb_queue module Bounded_queue = Bounded_queue diff --git a/src/moonpool.mli b/src/moonpool.mli index b744dc51..4360c6ee 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -22,6 +22,12 @@ val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t to run the thread. This ensures that we don't always pick the same domain to run all the various threads needed in an application (timers, event loops, etc.) *) +val recommended_thread_count : unit -> int +(** Number of threads recommended to saturate the CPU. + For IO pools this makes little sense (you might want more threads than + this because many of them will be blocked most of the time). + @since NEXT_RELEASE *) + module Lock = Lock module Fut = Fut module Chan = Chan From 056f80b3186f23e56f39e726807b8581fe158c22 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 12:33:16 -0400 Subject: [PATCH 58/77] add `No_runner`: a runner that doesn't do anything in the background The idea is that you might have APIs that want a runner, but the work is too trivial to require a full actual thread pool. In this case use `No_runner.runner` and calls to `run_async runner f` will turn into `f()`. --- src/moonpool.ml | 6 ++++-- src/moonpool.mli | 8 +++++++- src/no_runner.ml | 9 +++++++++ src/no_runner.mli | 6 ++++++ 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 src/no_runner.ml create mode 100644 src/no_runner.mli diff --git a/src/moonpool.ml b/src/moonpool.ml index b46bc123..498a9fb3 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -3,19 +3,21 @@ let start_thread_on_some_domain f x = D_pool_.run_on_and_wait did (fun () -> Thread.create f x) let recommended_thread_count () = Domain_.recommended_number () +let spawn = Fut.spawn module Atomic = Atomic_ module Blocking_queue = Bb_queue module Bounded_queue = Bounded_queue module Chan = Chan +module Fifo_pool = Fifo_pool module Fork_join = Fork_join module Fut = Fut module Lock = Lock +module No_runner = No_runner module Pool = Fifo_pool -module Ws_pool = Ws_pool module Runner = Runner -module Fifo_pool = Fifo_pool module Thread_local_storage = Thread_local_storage +module Ws_pool = Ws_pool module Private = struct module Ws_deque_ = Ws_deque_ diff --git a/src/moonpool.mli b/src/moonpool.mli index 4360c6ee..05b8649c 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -12,9 +12,10 @@ module Ws_pool = Ws_pool module Fifo_pool = Fifo_pool module Runner = Runner +module No_runner = No_runner module Pool = Fifo_pool -[@@deprecated "use Fifo_pool or Ws_pool"] +[@@deprecated "use Fifo_pool or Ws_pool to be more explicit"] (** Default pool. Please explicitly pick an implementation instead. *) val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t @@ -28,6 +29,11 @@ val recommended_thread_count : unit -> int this because many of them will be blocked most of the time). @since NEXT_RELEASE *) +val spawn : on:Runner.t -> (unit -> 'a) -> 'a Fut.t +(** [spawn ~on f] runs [f()] on the runner (a thread pool typically) + and returns a future result for it. See {!Fut.spawn}. + @since NEXT_RELEASE *) + module Lock = Lock module Fut = Fut module Chan = Chan diff --git a/src/no_runner.ml b/src/no_runner.ml new file mode 100644 index 00000000..d5e11284 --- /dev/null +++ b/src/no_runner.ml @@ -0,0 +1,9 @@ +include Runner + +let runner : t = + Runner.For_runner_implementors.create + ~size:(fun () -> 0) + ~num_tasks:(fun () -> 0) + ~shutdown:(fun ~wait:_ () -> ()) + ~run_async:(fun f -> f ()) + () diff --git a/src/no_runner.mli b/src/no_runner.mli new file mode 100644 index 00000000..2c295adc --- /dev/null +++ b/src/no_runner.mli @@ -0,0 +1,6 @@ +(** Runner that runs in the caller, not in the background. *) + +include module type of Runner + +val runner : t +(** The trivial runner that actually runs tasks at the calling point. *) From 21ac980fb20c4ae3eb0785f244e4eb6a4e2f426d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 12:48:41 -0400 Subject: [PATCH 59/77] rename no_runner to immediate_runner --- src/{no_runner.ml => immediate_runner.ml} | 0 src/immediate_runner.mli | 20 ++++++++++++++++++++ src/moonpool.ml | 2 +- src/moonpool.mli | 2 +- src/no_runner.mli | 6 ------ 5 files changed, 22 insertions(+), 8 deletions(-) rename src/{no_runner.ml => immediate_runner.ml} (100%) create mode 100644 src/immediate_runner.mli delete mode 100644 src/no_runner.mli diff --git a/src/no_runner.ml b/src/immediate_runner.ml similarity index 100% rename from src/no_runner.ml rename to src/immediate_runner.ml diff --git a/src/immediate_runner.mli b/src/immediate_runner.mli new file mode 100644 index 00000000..ed017eba --- /dev/null +++ b/src/immediate_runner.mli @@ -0,0 +1,20 @@ +(** Runner that runs tasks immediately in the caller thread. + + Whenever a task is submitted to this runner via [Runner.run_async r task], + the task is run immediately in the caller thread as [task()]. + There are no background threads, no resource, this is just a trivial + implementation of the interface. + + This can be useful when an implementation needs a runner, but there isn't + enough work to justify starting an actual full thread pool. + + Another situation is when threads cannot be used at all (e.g. because you + plan to call [Unix.fork] later). + + @since NEXT_RELEASE +*) + +include module type of Runner + +val runner : t +(** The trivial runner that actually runs tasks at the calling point. *) diff --git a/src/moonpool.ml b/src/moonpool.ml index 498a9fb3..cb82f668 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -13,7 +13,7 @@ module Fifo_pool = Fifo_pool module Fork_join = Fork_join module Fut = Fut module Lock = Lock -module No_runner = No_runner +module Immediate_runner = Immediate_runner module Pool = Fifo_pool module Runner = Runner module Thread_local_storage = Thread_local_storage diff --git a/src/moonpool.mli b/src/moonpool.mli index 05b8649c..40b78891 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -12,7 +12,7 @@ module Ws_pool = Ws_pool module Fifo_pool = Fifo_pool module Runner = Runner -module No_runner = No_runner +module Immediate_runner = Immediate_runner module Pool = Fifo_pool [@@deprecated "use Fifo_pool or Ws_pool to be more explicit"] diff --git a/src/no_runner.mli b/src/no_runner.mli deleted file mode 100644 index 2c295adc..00000000 --- a/src/no_runner.mli +++ /dev/null @@ -1,6 +0,0 @@ -(** Runner that runs in the caller, not in the background. *) - -include module type of Runner - -val runner : t -(** The trivial runner that actually runs tasks at the calling point. *) From 133a0d61283f0a372a7c8aca72ec1bd5c49d3fd8 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 13:00:15 -0400 Subject: [PATCH 60/77] breaking: change interface for number of threads now the user can specify `num_threads`; if not provided a sensible default is picked. --- src/fifo_pool.ml | 19 ++++++++----------- src/fifo_pool.mli | 8 ++++---- src/util_pool_.ml | 11 +++++++++++ src/util_pool_.mli | 5 +++++ src/ws_pool.ml | 17 ++++++----------- src/ws_pool.mli | 16 ++++++---------- 6 files changed, 40 insertions(+), 36 deletions(-) create mode 100644 src/util_pool_.ml create mode 100644 src/util_pool_.mli diff --git a/src/fifo_pool.ml b/src/fifo_pool.ml index 044e0013..1a95d715 100644 --- a/src/fifo_pool.ml +++ b/src/fifo_pool.ml @@ -56,13 +56,12 @@ type ('a, 'b) create_args = ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> - ?min:int -> - ?per_domain:int -> + ?num_threads:int -> 'a let create ?(on_init_thread = default_thread_init_exit_) ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) - ?around_task ?min:(min_threads = 1) ?(per_domain = 0) () : t = + ?around_task ?num_threads () : t = (* wrapper *) let around_task = match around_task with @@ -70,11 +69,10 @@ let create ?(on_init_thread = default_thread_init_exit_) | None -> AT_pair (ignore, fun _ _ -> ()) in - (* number of threads to run *) - let min_threads = max 1 min_threads in let num_domains = D_pool_.n_domains () in - assert (num_domains >= 1); - let num_threads = max min_threads (num_domains * per_domain) in + + (* number of threads to run *) + let num_threads = Util_pool_.num_threads ?num_threads () in (* make sure we don't bias towards the first domain(s) in {!D_pool_} *) let offset = Random.int num_domains in @@ -141,11 +139,10 @@ let create ?(on_init_thread = default_thread_init_exit_) runner -let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain - () f = +let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?num_threads () f + = let pool = - create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain - () + create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?num_threads () in let@ () = Fun.protect ~finally:(fun () -> shutdown pool) in f pool diff --git a/src/fifo_pool.mli b/src/fifo_pool.mli index 252083c5..4371db58 100644 --- a/src/fifo_pool.mli +++ b/src/fifo_pool.mli @@ -21,8 +21,7 @@ type ('a, 'b) create_args = ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> - ?min:int -> - ?per_domain:int -> + ?num_threads:int -> 'a (** Arguments used in {!create}. See {!create} for explanations. *) @@ -30,8 +29,9 @@ val create : (unit -> t, _) create_args (** [create ()] makes a new thread pool. @param on_init_thread called at the beginning of each new thread in the pool. @param min minimum size of the pool. See {!Pool.create_args}. - @param per_domain is the number of threads allocated per domain in the fixed - domain pool. See {!Pool.create_args}. + The default is [Domain.recommended_domain_count()], ie one worker per + CPU core. + On OCaml 4 the default is [4] (since there is only one domain). @param on_exit_thread called at the end of each worker thread in the pool. @param around_task a pair of [before, after] functions ran around each task. See {!Pool.create_args}. diff --git a/src/util_pool_.ml b/src/util_pool_.ml new file mode 100644 index 00000000..8207062a --- /dev/null +++ b/src/util_pool_.ml @@ -0,0 +1,11 @@ +let num_threads ?num_threads () : int = + let n_domains = D_pool_.n_domains () in + + (* number of threads to run *) + let num_threads = + match num_threads with + | Some j -> max 1 j + | None -> n_domains + in + + num_threads diff --git a/src/util_pool_.mli b/src/util_pool_.mli new file mode 100644 index 00000000..68fdde22 --- /dev/null +++ b/src/util_pool_.mli @@ -0,0 +1,5 @@ +(** Utils for pools *) + +val num_threads : ?num_threads:int -> unit -> int +(** Number of threads a pool should have. + @param num_threads user-specified number of threads *) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index ca5d2500..179d555a 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -198,14 +198,13 @@ type ('a, 'b) create_args = ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> - ?min:int -> - ?per_domain:int -> + ?num_threads:int -> 'a (** Arguments used in {!create}. See {!create} for explanations. *) let create ?(on_init_thread = default_thread_init_exit_) ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) - ?around_task ?min:(min_threads = 1) ?(per_domain = 0) () : t = + ?around_task ?num_threads () : t = (* wrapper *) let around_task = match around_task with @@ -213,11 +212,8 @@ let create ?(on_init_thread = default_thread_init_exit_) | None -> AT_pair (ignore, fun _ _ -> ()) in - (* number of threads to run *) - let min_threads = max 1 min_threads in let num_domains = D_pool_.n_domains () in - assert (num_domains >= 1); - let num_threads = max min_threads (num_domains * per_domain) in + let num_threads = Util_pool_.num_threads ?num_threads () in (* make sure we don't bias towards the first domain(s) in {!D_pool_} *) let offset = Random.int num_domains in @@ -301,11 +297,10 @@ let create ?(on_init_thread = default_thread_init_exit_) runner -let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain - () f = +let with_ ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?num_threads () f + = let pool = - create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?min ?per_domain - () + create ?on_init_thread ?on_exit_thread ?on_exn ?around_task ?num_threads () in let@ () = Fun.protect ~finally:(fun () -> shutdown pool) in f pool diff --git a/src/ws_pool.mli b/src/ws_pool.mli index 4775024c..c13e4c75 100644 --- a/src/ws_pool.mli +++ b/src/ws_pool.mli @@ -26,8 +26,7 @@ type ('a, 'b) create_args = ?on_exit_thread:(dom_id:int -> t_id:int -> unit -> unit) -> ?on_exn:(exn -> Printexc.raw_backtrace -> unit) -> ?around_task:(t -> 'b) * (t -> 'b -> unit) -> - ?min:int -> - ?per_domain:int -> + ?num_threads:int -> 'a (** Arguments used in {!create}. See {!create} for explanations. *) @@ -35,14 +34,11 @@ val create : (unit -> t, _) create_args (** [create ()] makes a new thread pool. @param on_init_thread called at the beginning of each new thread in the pool. - @param min minimum size of the pool. It will be at least [1] internally, - so [0] or negative values make no sense. - @param per_domain is the number of threads allocated per domain in the fixed - domain pool. The default value is [0], but setting, say, [~per_domain:2] - means that if there are [8] domains (which might be the case on an 8-core machine) - then the minimum size of the pool is [16]. - If both [min] and [per_domain] are specified, the maximum of both - [min] and [per_domain * num_of_domains] is used. + @param num_threads size of the pool, ie. number of worker threads. + It will be at least [1] internally, so [0] or negative values make no sense. + The default is [Domain.recommended_domain_count()], ie one worker + thread per CPU core. + On OCaml 4 the default is [4] (since there is only one domain). @param on_exit_thread called at the end of each thread in the pool @param around_task a pair of [before, after], where [before pool] is called before a task is processed, From 928345437a878fe8f0be82e7669658efef9aab79 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 28 Oct 2023 13:19:44 -0400 Subject: [PATCH 61/77] fix tests to use new API --- README.md | 2 +- benchs/fib_rec.ml | 22 ++++++++++++++-------- benchs/pi.ml | 8 ++++---- test/effect-based/t_fib1.ml | 4 ++-- test/effect-based/t_fib_fork_join.ml | 4 ++-- test/effect-based/t_fib_fork_join_all.ml | 4 ++-- test/effect-based/t_fork_join.ml | 8 ++++---- test/effect-based/t_fork_join_heavy.ml | 2 +- test/effect-based/t_futs1.ml | 2 +- test/effect-based/t_many.ml | 8 ++++---- test/effect-based/t_sort.ml | 2 +- test/t_bench1.ml | 2 +- test/t_chan_train.ml | 2 +- test/t_fib.ml | 4 ++-- test/t_fib_rec.ml | 6 +++--- test/t_futs1.ml | 4 ++-- test/t_props.ml | 4 ++-- test/t_resource.ml | 4 ++-- test/t_tree_futs.ml | 4 ++-- test/t_unfair.ml | 4 ++-- 20 files changed, 53 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index ab451e08..b06a1975 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ the workers of `pool`, as soon as one is available. No result is returned by `ru ```ocaml # #require "threads";; -# let pool = Moonpool.Fifo_pool.create ~min:4 ();; +# let pool = Moonpool.Fifo_pool.create ~num_threads:4 ();; val pool : Moonpool.Runner.t = # begin diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index d3df44df..571b8495 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -57,10 +57,14 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let create_pool ~psize ~kind () = match kind with - | "fifo" -> Fifo_pool.create ~min:psize () - | "pool" -> Ws_pool.create ~min:psize () + | "fifo" -> Fifo_pool.create ?num_threads:psize () + | "pool" -> Ws_pool.create ?num_threads:psize () | _ -> assert false +let str_of_int_opt = function + | None -> "None" + | Some i -> Printf.sprintf "Some %d" i + let run ~psize ~n ~seq ~dl ~fj ~await ~niter ~kind () : unit = let pool = lazy (create_pool ~kind ~psize ()) in let dl_pool = @@ -80,14 +84,16 @@ let run ~psize ~n ~seq ~dl ~fj ~await ~niter ~kind () : unit = Domainslib.Task.run pool (fun () -> Domainslib.Task.await pool @@ fib_dl ~pool n) ) else if fj then ( - Printf.printf "compute fib %d using fork-join with pool size=%d\n%!" n - psize; + Printf.printf "compute fib %d using fork-join with pool size=%s\n%!" n + (str_of_int_opt psize); fib_fj ~on:(Lazy.force pool) n |> Fut.wait_block_exn ) else if await then ( - Printf.printf "compute fib %d using await with pool size=%d\n%!" n psize; + Printf.printf "compute fib %d using await with pool size=%s\n%!" n + (str_of_int_opt psize); fib_await ~on:(Lazy.force pool) n |> Fut.wait_block_exn ) else ( - Printf.printf "compute fib %d with pool size=%d\n%!" n psize; + Printf.printf "compute fib %d with pool size=%s\n%!" n + (str_of_int_opt psize); fib ~on:(Lazy.force pool) n |> Fut.wait_block_exn ) in @@ -103,7 +109,7 @@ let run ~psize ~n ~seq ~dl ~fj ~await ~niter ~kind () : unit = let () = let n = ref 40 in - let psize = ref 16 in + let psize = ref None in let seq = ref false in let niter = ref 3 in let kind = ref "pool" in @@ -112,7 +118,7 @@ let () = let fj = ref false in let opts = [ - "-psize", Arg.Set_int psize, " pool size"; + "-psize", Arg.Int (fun i -> psize := Some i), " pool size"; "-n", Arg.Set_int n, " fib "; "-seq", Arg.Set seq, " sequential"; "-dl", Arg.Set dl, " domainslib"; diff --git a/benchs/pi.ml b/benchs/pi.ml index 65304a80..c8ef57b5 100644 --- a/benchs/pi.ml +++ b/benchs/pi.ml @@ -21,14 +21,14 @@ let with_pool ~kind f = match kind with | "pool" -> if !j = 0 then - Ws_pool.with_ ~per_domain:1 f + Ws_pool.with_ f else - Ws_pool.with_ ~min:!j f + Ws_pool.with_ ~num_threads:!j f | "fifo" -> if !j = 0 then - Fifo_pool.with_ ~per_domain:1 f + Fifo_pool.with_ f else - Fifo_pool.with_ ~min:!j f + Fifo_pool.with_ ~num_threads:!j f | _ -> assert false (** Run in parallel using {!Fut.for_} *) diff --git a/test/effect-based/t_fib1.ml b/test/effect-based/t_fib1.ml index ca3f2861..a7c8ebee 100644 --- a/test/effect-based/t_fib1.ml +++ b/test/effect-based/t_fib1.ml @@ -26,13 +26,13 @@ let fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int = - let pool = Ws_pool.create ~min:8 () in + let pool = Ws_pool.create ~num_threads:8 () in fib ~on:pool 40 |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let pool = Ws_pool.create ~min:8 () in + let pool = Ws_pool.create ~num_threads:8 () in assert ( List.init 10 (fib ~on:pool) diff --git a/test/effect-based/t_fib_fork_join.ml b/test/effect-based/t_fib_fork_join.ml index bdf60337..4e6639b2 100644 --- a/test/effect-based/t_fib_fork_join.ml +++ b/test/effect-based/t_fib_fork_join.ml @@ -27,13 +27,13 @@ let fib ~on x : int Fut.t = let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int = - let pool = Ws_pool.create ~min:8 () in + let pool = Ws_pool.create ~num_threads:8 () in fib ~on:pool 40 |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let pool = Ws_pool.create ~min:8 () in + let pool = Ws_pool.create ~num_threads:8 () in assert ( List.init 10 (fib ~on:pool) diff --git a/test/effect-based/t_fib_fork_join_all.ml b/test/effect-based/t_fib_fork_join_all.ml index ed82902e..3caee9b9 100644 --- a/test/effect-based/t_fib_fork_join_all.ml +++ b/test/effect-based/t_fib_fork_join_all.ml @@ -22,13 +22,13 @@ let rec fib x : int = ) let fib_40 : int = - let@ pool = Ws_pool.with_ ~min:8 () in + let@ pool = Ws_pool.with_ ~num_threads:8 () in Fut.spawn ~on:pool (fun () -> fib 40) |> Fut.wait_block_exn let () = Printf.printf "fib 40 = %d\n%!" fib_40 let run_test () = - let@ pool = Ws_pool.with_ ~min:8 () in + let@ pool = Ws_pool.with_ ~num_threads:8 () in let fut = Fut.spawn ~on:pool (fun () -> diff --git a/test/effect-based/t_fork_join.ml b/test/effect-based/t_fork_join.ml index 5b467187..5c7134ca 100644 --- a/test/effect-based/t_fork_join.ml +++ b/test/effect-based/t_fork_join.ml @@ -5,7 +5,7 @@ let ( let@ ) = ( @@ ) open! Moonpool -let pool = Ws_pool.create ~min:4 () +let pool = Ws_pool.create ~num_threads:4 () let () = let x = @@ -270,7 +270,7 @@ end let t_eval = let arb = Q.set_stats [ "size", Evaluator.size ] Evaluator.arb in Q.Test.make ~name:"same eval" arb (fun e -> - let@ pool = Ws_pool.with_ ~min:4 () in + let@ pool = Ws_pool.with_ ~num_threads:4 () in (* Printf.eprintf "eval %s\n%!" (Evaluator.show e); *) let x = Evaluator.eval_seq e in let y = Evaluator.eval_fork_join ~pool e in @@ -288,7 +288,7 @@ let t_for_nested ~min ~chunk_size () = let ref_l2 = List.map (List.map neg) ref_l1 in let l1, l2 = - let@ pool = Ws_pool.with_ ~min () in + let@ pool = Ws_pool.with_ ~num_threads:min () in let@ () = Ws_pool.run_wait_block pool in let l1 = Fork_join.map_list ~chunk_size (Fork_join.map_list ~chunk_size neg) l @@ -310,7 +310,7 @@ let t_map ~chunk_size () = Q.Test.make ~name:"map1" Q.(small_list small_int |> Q.set_stats [ "len", List.length ]) (fun l -> - let@ pool = Ws_pool.with_ ~min:4 () in + let@ pool = Ws_pool.with_ ~num_threads:4 () in let@ () = Ws_pool.run_wait_block pool in let a1 = diff --git a/test/effect-based/t_fork_join_heavy.ml b/test/effect-based/t_fork_join_heavy.ml index ad9f7044..a981bee1 100644 --- a/test/effect-based/t_fork_join_heavy.ml +++ b/test/effect-based/t_fork_join_heavy.ml @@ -27,7 +27,7 @@ let run ~min () = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "step" in let l1, l2 = - let@ pool = Ws_pool.with_ ~min () in + let@ pool = Ws_pool.with_ ~num_threads:min () in let@ () = Ws_pool.run_wait_block pool in let l1, l2 = diff --git a/test/effect-based/t_futs1.ml b/test/effect-based/t_futs1.ml index 182ca9d5..4df18226 100644 --- a/test/effect-based/t_futs1.ml +++ b/test/effect-based/t_futs1.ml @@ -2,7 +2,7 @@ open! Moonpool -let pool = Ws_pool.create ~min:4 () +let pool = Ws_pool.create ~num_threads:4 () let () = let fut = Array.init 10 (fun i -> Fut.spawn ~on:pool (fun () -> i)) in diff --git a/test/effect-based/t_many.ml b/test/effect-based/t_many.ml index 4362932c..6a2b5918 100644 --- a/test/effect-based/t_many.ml +++ b/test/effect-based/t_many.ml @@ -30,19 +30,19 @@ let run ~pool () = let () = (print_endline "with fifo"; - let@ pool = Fifo_pool.with_ ~min:4 () in + let@ pool = Fifo_pool.with_ ~num_threads:4 () in run ~pool ()); (print_endline "with WS(1)"; - let@ pool = Ws_pool.with_ ~min:1 () in + let@ pool = Ws_pool.with_ ~num_threads:1 () in run ~pool ()); (print_endline "with WS(2)"; - let@ pool = Ws_pool.with_ ~min:2 () in + let@ pool = Ws_pool.with_ ~num_threads:2 () in run ~pool ()); (print_endline "with WS(4)"; - let@ pool = Ws_pool.with_ ~min:4 () in + let@ pool = Ws_pool.with_ ~num_threads:4 () in run ~pool ()); () diff --git a/test/effect-based/t_sort.ml b/test/effect-based/t_sort.ml index 8d3fe17c..8ccc372f 100644 --- a/test/effect-based/t_sort.ml +++ b/test/effect-based/t_sort.ml @@ -59,7 +59,7 @@ let rec quicksort arr i len : unit = (fun () -> quicksort arr !low (len - (!low - i))) ) -let pool = Moonpool.Ws_pool.create ~min:8 () +let pool = Moonpool.Ws_pool.create ~num_threads:8 () let () = let arr = Array.init 400_000 (fun _ -> Random.int 300_000) in diff --git a/test/t_bench1.ml b/test/t_bench1.ml index 95cd87a5..cd1a8bfd 100644 --- a/test/t_bench1.ml +++ b/test/t_bench1.ml @@ -8,7 +8,7 @@ let rec fib x = let run ~psize ~n ~j () : _ Fut.t = Printf.printf "pool size=%d, n=%d, j=%d\n%!" psize n j; - let pool = Ws_pool.create ~min:psize ~per_domain:0 () in + let pool = Ws_pool.create ~num_threads:psize () in (* TODO: a ppx for tracy so we can use instrumentation *) let loop () = diff --git a/test/t_chan_train.ml b/test/t_chan_train.ml index bb3e24f7..132d5540 100644 --- a/test/t_chan_train.ml +++ b/test/t_chan_train.ml @@ -1,7 +1,7 @@ open Moonpool (* large pool, some of our tasks below are long lived *) -let pool = Ws_pool.create ~min:30 () +let pool = Ws_pool.create ~num_threads:30 () open (val Fut.infix pool) diff --git a/test/t_fib.ml b/test/t_fib.ml index 3a98e395..3fc53bf9 100644 --- a/test/t_fib.ml +++ b/test/t_fib.ml @@ -4,8 +4,8 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with - | `Fifo_pool -> Fifo_pool.with_ ~min:4 () f - | `Ws_pool -> Ws_pool.with_ ~min:4 () f + | `Fifo_pool -> Fifo_pool.with_ ~num_threads:4 () f + | `Ws_pool -> Ws_pool.with_ ~num_threads:4 () f let rec fib x = if x <= 1 then diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index 286e6aac..94e206b7 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -25,7 +25,7 @@ let () = assert (List.init 10 fib_direct = [ 1; 1; 2; 3; 5; 8; 13; 21; 34; 55 ]) let fib_40 : int lazy_t = lazy (let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "fib40" in - let pool = Fifo_pool.create ~min:8 () in + let pool = Fifo_pool.create ~num_threads:8 () in let r = fib ~on:pool 40 |> Fut.wait_block_exn in Ws_pool.shutdown pool; r) @@ -49,12 +49,12 @@ let run_test ~pool () = let run_test_size ~size () = Printf.printf "test pool(%d)\n%!" size; - let@ pool = Ws_pool.with_ ~min:size () in + let@ pool = Ws_pool.with_ ~num_threads:size () in run_test ~pool () let run_test_fifo ~size () = Printf.printf "test fifo(%d)\n%!" size; - let@ pool = Fifo_pool.with_ ~min:size () in + let@ pool = Fifo_pool.with_ ~num_threads:size () in run_test ~pool () let setup_counter () = diff --git a/test/t_futs1.ml b/test/t_futs1.ml index ee2d96a6..03a1ac13 100644 --- a/test/t_futs1.ml +++ b/test/t_futs1.ml @@ -1,7 +1,7 @@ open! Moonpool -let pool = Ws_pool.create ~min:4 () -let pool2 = Ws_pool.create ~min:2 () +let pool = Ws_pool.create ~num_threads:4 () +let pool2 = Ws_pool.create ~num_threads:2 () let () = let fut = Fut.return 1 in diff --git a/test/t_props.ml b/test/t_props.ml index 9fa64fbe..698650fd 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -7,8 +7,8 @@ let add_test t = tests := t :: !tests let with_pool ~kind () f = match kind with - | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f - | `Ws_pool -> Ws_pool.with_ ~min:4 ~per_domain:1 () f + | `Fifo_pool -> Fifo_pool.with_ () f + | `Ws_pool -> Ws_pool.with_ () f let () = add_test @@ fun ~kind -> diff --git a/test/t_resource.ml b/test/t_resource.ml index c990f708..4c20e9fb 100644 --- a/test/t_resource.ml +++ b/test/t_resource.ml @@ -4,8 +4,8 @@ let ( let@ ) = ( @@ ) let with_pool ~kind () f = match kind with - | `Fifo_pool -> Fifo_pool.with_ ~min:4 ~per_domain:1 () f - | `Ws_pool -> Ws_pool.with_ ~min:4 ~per_domain:1 () f + | `Fifo_pool -> Fifo_pool.with_ () f + | `Ws_pool -> Ws_pool.with_ () f (* test proper resource handling *) let run ~kind () = diff --git a/test/t_tree_futs.ml b/test/t_tree_futs.ml index 3507be0a..0bc96a03 100644 --- a/test/t_tree_futs.ml +++ b/test/t_tree_futs.ml @@ -4,8 +4,8 @@ let ( let@ ) = ( @@ ) let with_pool ~kind ~j () f = match kind with - | `Fifo_pool -> Fifo_pool.with_ ~min:j () f - | `Ws_pool -> Ws_pool.with_ ~min:j () f + | `Fifo_pool -> Fifo_pool.with_ ~num_threads:j () f + | `Ws_pool -> Ws_pool.with_ ~num_threads:j () f type 'a tree = | Leaf of 'a diff --git a/test/t_unfair.ml b/test/t_unfair.ml index f535a450..cee4373e 100644 --- a/test/t_unfair.ml +++ b/test/t_unfair.ml @@ -20,8 +20,8 @@ let run ~kind () = in match kind with - | `Simple -> Fifo_pool.create ~min:3 ~on_init_thread ~around_task () - | `Ws_pool -> Ws_pool.create ~min:3 ~on_init_thread ~around_task () + | `Simple -> Fifo_pool.create ~num_threads:3 ~on_init_thread ~around_task () + | `Ws_pool -> Ws_pool.create ~num_threads:3 ~on_init_thread ~around_task () in (* make all threads busy *) From 00a5cfc8bafa5a68e4a93cbbab753d0faaceb0d5 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 29 Oct 2023 18:28:13 -0400 Subject: [PATCH 62/77] fix: rename Thread_local_storage_ to not collide with the library --- src/dune | 6 +++--- src/moonpool.ml | 2 +- src/moonpool.mli | 2 +- src/{thread_local_storage.mli => thread_local_storage_.mli} | 0 ..._local_storage.real.ml => thread_local_storage_.real.ml} | 0 ..._local_storage.stub.ml => thread_local_storage_.stub.ml} | 0 6 files changed, 5 insertions(+), 5 deletions(-) rename src/{thread_local_storage.mli => thread_local_storage_.mli} (100%) rename src/{thread_local_storage.real.ml => thread_local_storage_.real.ml} (100%) rename src/{thread_local_storage.stub.ml => thread_local_storage_.stub.ml} (100%) diff --git a/src/dune b/src/dune index 5275ab40..59005b54 100644 --- a/src/dune +++ b/src/dune @@ -6,9 +6,9 @@ (action (run %{project_root}/src/cpp/cpp.exe %{input-file}))) (libraries threads either - (select thread_local_storage.ml from - (thread-local-storage -> thread_local_storage.stub.ml) - (-> thread_local_storage.real.ml)) + (select thread_local_storage_.ml from + (thread-local-storage -> thread_local_storage_.stub.ml) + (-> thread_local_storage_.real.ml)) (select dla_.ml from (domain-local-await -> dla_.real.ml) ( -> dla_.dummy.ml)))) diff --git a/src/moonpool.ml b/src/moonpool.ml index cb82f668..21b4ccec 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -16,7 +16,7 @@ module Lock = Lock module Immediate_runner = Immediate_runner module Pool = Fifo_pool module Runner = Runner -module Thread_local_storage = Thread_local_storage +module Thread_local_storage = Thread_local_storage_ module Ws_pool = Ws_pool module Private = struct diff --git a/src/moonpool.mli b/src/moonpool.mli index 40b78891..0e46bd02 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -38,7 +38,7 @@ module Lock = Lock module Fut = Fut module Chan = Chan module Fork_join = Fork_join -module Thread_local_storage = Thread_local_storage +module Thread_local_storage = Thread_local_storage_ (** A simple blocking queue. diff --git a/src/thread_local_storage.mli b/src/thread_local_storage_.mli similarity index 100% rename from src/thread_local_storage.mli rename to src/thread_local_storage_.mli diff --git a/src/thread_local_storage.real.ml b/src/thread_local_storage_.real.ml similarity index 100% rename from src/thread_local_storage.real.ml rename to src/thread_local_storage_.real.ml diff --git a/src/thread_local_storage.stub.ml b/src/thread_local_storage_.stub.ml similarity index 100% rename from src/thread_local_storage.stub.ml rename to src/thread_local_storage_.stub.ml From 80031c0a54ec0cb0d16b9ae700eed05cf9a6949f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 29 Oct 2023 18:41:02 -0400 Subject: [PATCH 63/77] fix compilation error --- src/ws_pool.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index 179d555a..44432112 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -1,6 +1,6 @@ module WSQ = Ws_deque_ module A = Atomic_ -module TLS = Thread_local_storage +module TLS = Thread_local_storage_ include Runner let ( let@ ) = ( @@ ) From 72f289af844ae1816a1260287215cc95396f1a7d Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 29 Oct 2023 16:41:19 -0400 Subject: [PATCH 64/77] refactor: use a fixed size work-stealing deque if it's full, tasks overflow into the main queue. --- src/ws_deque_.ml | 100 ++++++++++++++++----------------------------- src/ws_deque_.mli | 10 +++-- src/ws_pool.ml | 20 +++++++-- test/t_ws_deque.ml | 50 ++++++----------------- 4 files changed, 72 insertions(+), 108 deletions(-) diff --git a/src/ws_deque_.ml b/src/ws_deque_.ml index 137e1c15..6c5d1419 100644 --- a/src/ws_deque_.ml +++ b/src/ws_deque_.ml @@ -12,94 +12,68 @@ module A = Atomic_ module CA : sig type 'a t - val create : log_size:int -> unit -> 'a t - val size : _ t -> int + val create : dummy:'a -> unit -> 'a t + val size : 'a t -> int val get : 'a t -> int -> 'a val set : 'a t -> int -> 'a -> unit - val grow : 'a t -> bottom:int -> top:int -> 'a t - val shrink : 'a t -> bottom:int -> top:int -> 'a t end = struct - type 'a t = { - log_size: int; - arr: 'a option array; - } + (** The array has size 256. *) + let log_size = 8 - let[@inline] size (self : _ t) = 1 lsl self.log_size + type 'a t = { arr: 'a array } [@@unboxed] - let create ~log_size () : _ t = - { log_size; arr = Array.make (1 lsl log_size) None } + let[@inline] size (_self : _ t) = 1 lsl log_size + let create ~dummy () : _ t = { arr = Array.make (1 lsl log_size) dummy } - let[@inline] get (self : _ t) (i : int) : 'a = - match Array.unsafe_get self.arr (i land ((1 lsl self.log_size) - 1)) with - | Some x -> x - | None -> assert false + let[@inline] get (self : 'a t) (i : int) : 'a = + Array.unsafe_get self.arr (i land ((1 lsl log_size) - 1)) let[@inline] set (self : 'a t) (i : int) (x : 'a) : unit = - Array.unsafe_set self.arr (i land ((1 lsl self.log_size) - 1)) (Some x) - - let grow (self : _ t) ~bottom ~top : 'a t = - let new_arr = create ~log_size:(self.log_size + 1) () in - for i = top to bottom - 1 do - set new_arr i (get self i) - done; - new_arr - - let shrink (self : _ t) ~bottom ~top : 'a t = - let new_arr = create ~log_size:(self.log_size - 1) () in - for i = top to bottom - 1 do - set new_arr i (get self i) - done; - new_arr + Array.unsafe_set self.arr (i land ((1 lsl log_size) - 1)) x end type 'a t = { top: int A.t; (** Where we steal *) bottom: int A.t; (** Where we push/pop from the owning thread *) mutable top_cached: int; (** Last read value of [top] *) - arr: 'a CA.t A.t; (** The circular array *) + arr: 'a CA.t; (** The circular array *) } -let create () : _ t = +let create ~dummy () : _ t = let top = A.make 0 in - let arr = A.make @@ CA.create ~log_size:4 () in - (* allocate far from top to avoid false sharing *) + let arr = CA.create ~dummy () in + (* allocate far from [top] to avoid false sharing *) let bottom = A.make 0 in { top; top_cached = 0; bottom; arr } let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top) -let push (self : 'a t) (x : 'a) : unit = - let b = A.get self.bottom in - let t_approx = self.top_cached in - let arr = ref (A.get self.arr) in +exception Full - (* Section 2.3: over-approximation of size. - Only if it seems too big do we actually read [t]. *) - let size_approx = b - t_approx in - if size_approx >= CA.size !arr - 1 then ( - (* we need to read the actual value of [top], which might entail contention. *) - let t = A.get self.top in - self.top_cached <- t; - let size = b - t in +let push (self : 'a t) (x : 'a) : bool = + try + let b = A.get self.bottom in + let t_approx = self.top_cached in - if size >= CA.size !arr - 1 then ( - arr := CA.grow !arr ~top:t ~bottom:b; - A.set self.arr !arr - ) - ); + (* Section 2.3: over-approximation of size. + Only if it seems too big do we actually read [t]. *) + let size_approx = b - t_approx in + if size_approx >= CA.size self.arr - 1 then ( + (* we need to read the actual value of [top], which might entail contention. *) + let t = A.get self.top in + self.top_cached <- t; + let size = b - t in - CA.set !arr b x; - A.set self.bottom (b + 1) + if size >= CA.size self.arr - 1 then (* full! *) raise_notrace Full + ); -let maybe_shrink_ (self : _ t) arr ~top ~bottom : unit = - let size = bottom - top in - let ca_size = CA.size arr in - if ca_size >= 256 && size < ca_size / 3 then - A.set self.arr (CA.shrink arr ~top ~bottom) + CA.set self.arr b x; + A.set self.bottom (b + 1); + true + with Full -> false let pop (self : 'a t) : 'a option = let b = A.get self.bottom in - let arr = A.get self.arr in let b = b - 1 in A.set self.bottom b; @@ -113,15 +87,14 @@ let pop (self : 'a t) : 'a option = None ) else if size > 0 then ( (* can pop without modifying [top] *) - let x = CA.get arr b in - maybe_shrink_ self arr ~bottom:b ~top:t; + let x = CA.get self.arr b in Some x ) else ( assert (size = 0); (* there was exactly one slot, so we might be racing against stealers to update [self.top] *) if A.compare_and_set self.top t (t + 1) then ( - let x = CA.get arr b in + let x = CA.get self.arr b in A.set self.bottom (t + 1); Some x ) else ( @@ -135,13 +108,12 @@ let steal (self : 'a t) : 'a option = as we're in another thread *) let t = A.get self.top in let b = A.get self.bottom in - let arr = A.get self.arr in let size = b - t in if size <= 0 then None else ( - let x = CA.get arr t in + let x = CA.get self.arr t in if A.compare_and_set self.top t (t + 1) then (* successfully increased top to consume [x] *) Some x diff --git a/src/ws_deque_.mli b/src/ws_deque_.mli index 0b243f68..bead45aa 100644 --- a/src/ws_deque_.mli +++ b/src/ws_deque_.mli @@ -6,14 +6,16 @@ type 'a t (** Deque containing values of type ['a] *) -val create : unit -> _ t +val create : dummy:'a -> unit -> 'a t (** Create a new deque. *) -val push : 'a t -> 'a -> unit -(** Push value at the bottom of deque. This is not thread-safe. *) +val push : 'a t -> 'a -> bool +(** Push value at the bottom of deque. returns [true] if it succeeds. + This must be called only by the owner thread. *) val pop : 'a t -> 'a option -(** Pop value from the bottom of deque. This is not thread-safe. *) +(** Pop value from the bottom of deque. + This must be called only by the owner thread. *) val steal : 'a t -> 'a option (** Try to steal from the top of deque. This is thread-safe. *) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index 44432112..4623a3e3 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -60,8 +60,16 @@ let schedule_task_ (self : state) (w : worker_state option) (task : task) : unit (* Printf.printf "schedule task now (%d)\n%!" (Thread.id @@ Thread.self ()); *) match w with | Some w -> - WSQ.push w.q task; - try_wake_someone_ self + let pushed = WSQ.push w.q task in + if pushed then + try_wake_someone_ self + else ( + (* overflow into main queue *) + Mutex.lock self.mutex; + Queue.push task self.main_q; + if self.n_waiting_nonzero then Condition.signal self.cond; + Mutex.unlock self.mutex + ) | None -> if A.get self.active then ( (* push into the main queue *) @@ -202,6 +210,8 @@ type ('a, 'b) create_args = 'a (** Arguments used in {!create}. See {!create} for explanations. *) +let dummy_task_ () = assert false + let create ?(on_init_thread = default_thread_init_exit_) ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) ?around_task ?num_threads () : t = @@ -221,7 +231,11 @@ let create ?(on_init_thread = default_thread_init_exit_) let workers : worker_state array = let dummy = Thread.self () in Array.init num_threads (fun i -> - { thread = dummy; q = WSQ.create (); rng = Random.State.make [| i |] }) + { + thread = dummy; + q = WSQ.create ~dummy:dummy_task_ (); + rng = Random.State.make [| i |]; + }) in let pool = diff --git a/test/t_ws_deque.ml b/test/t_ws_deque.ml index 3377dcb6..88429a8d 100644 --- a/test/t_ws_deque.ml +++ b/test/t_ws_deque.ml @@ -2,22 +2,23 @@ module A = Moonpool.Atomic module D = Moonpool.Private.Ws_deque_ let ( let@ ) = ( @@ ) +let dummy = -100 let t_simple () = - let d = D.create () in + let d = D.create ~dummy () in assert (D.steal d = None); assert (D.pop d = None); - D.push d 1; - D.push d 2; + assert (D.push d 1); + assert (D.push d 2); assert (D.pop d = Some 2); assert (D.steal d = Some 1); assert (D.steal d = None); assert (D.pop d = None); - D.push d 3; + assert (D.push d 3); assert (D.pop d = Some 3); - D.push d 4; - D.push d 5; - D.push d 6; + assert (D.push d 4); + assert (D.push d 5); + assert (D.push d 6); assert (D.steal d = Some 4); assert (D.steal d = Some 5); assert (D.pop d = Some 6); @@ -35,7 +36,7 @@ let t_heavy () = let active = A.make true in - let d = D.create () in + let d = D.create ~dummy () in let stealer_loop () = Trace.set_thread_name "stealer"; @@ -51,11 +52,13 @@ let t_heavy () = Trace.set_thread_name "producer"; for _i = 1 to 100_000 do let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "main.outer" in + + (* NOTE: we make sure to push less than 256 elements at once *) for j = 1 to 100 do ref_sum := !ref_sum + j; - D.push d j; + assert (D.push d j); ref_sum := !ref_sum + j; - D.push d j; + assert (D.push d j); Option.iter (fun x -> add_to_sum x) (D.pop d); Option.iter (fun x -> add_to_sum x) (D.pop d) @@ -92,35 +95,8 @@ let t_heavy () = assert (ref_sum = sum); () -let t_many () = - print_endline "pushing many elements"; - let d = D.create () in - - let push_and_pop count = - for i = 1 to count do - (* if i mod 100_000 = 0 then Printf.printf "push %d\n%!" i; *) - D.push d i - done; - let n = ref 0 in - - let continue = ref true in - while !continue do - match D.pop d with - | None -> continue := false - | Some _ -> incr n - done; - assert (!n = count) - in - - push_and_pop 10_000; - push_and_pop 100_000; - push_and_pop 1_000_000; - print_endline "pushing many elements: ok"; - () - let () = let@ () = Trace_tef.with_setup () in t_simple (); t_heavy (); - t_many (); () From 2073c600c419806ef34e98e11f273a7140f78001 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 29 Oct 2023 17:11:48 -0400 Subject: [PATCH 65/77] add `Moonpool.run_async` --- src/moonpool.ml | 1 + src/moonpool.mli | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/moonpool.ml b/src/moonpool.ml index 21b4ccec..b7d225d2 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -2,6 +2,7 @@ let start_thread_on_some_domain f x = let did = Random.int (D_pool_.n_domains ()) in D_pool_.run_on_and_wait did (fun () -> Thread.create f x) +let run_async = Runner.run_async let recommended_thread_count () = Domain_.recommended_number () let spawn = Fut.spawn diff --git a/src/moonpool.mli b/src/moonpool.mli index 0e46bd02..3c470bd4 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -23,6 +23,12 @@ val start_thread_on_some_domain : ('a -> unit) -> 'a -> Thread.t to run the thread. This ensures that we don't always pick the same domain to run all the various threads needed in an application (timers, event loops, etc.) *) +val run_async : Runner.t -> (unit -> unit) -> unit +(** [run_async runner task] schedules the task to run + on the given runner. This means [task()] will be executed + at some point in the future, possibly in another thread. + @since NEXT_RELEASE *) + val recommended_thread_count : unit -> int (** Number of threads recommended to saturate the CPU. For IO pools this makes little sense (you might want more threads than From 6fe707609944a86a1cb3341ff06581454c99d21e Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sun, 29 Oct 2023 17:12:02 -0400 Subject: [PATCH 66/77] update doc and readme --- README.md | 15 +++++++++++++++ src/ws_deque_.mli | 6 +++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b06a1975..5135d00b 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,17 @@ The user can create several thread pools (implementing the interface `Runner.t`) These pools use regular posix threads, but the threads are spread across multiple domains (on OCaml 5), which enables parallelism. +Current we provide these pool implementations: +- `Fifo_pool` is a thread pool that uses a blocking queue to schedule tasks, + which means they're picked in the same order they've been scheduled ("fifo"). + This pool is simple and will behave fine for coarse-granularity concurrency, + but will slow down under heavy contention. +- `Ws_pool` is a work-stealing pool, where each thread has its own local queue + in addition to a global queue of tasks. This is efficient for workloads + with many short tasks that spawn other tasks, but the order in which + tasks are run is less predictable. This is useful when throughput is + the important thing to optimize. + The function `Runner.run_async pool task` schedules `task()` to run on one of the workers of `pool`, as soon as one is available. No result is returned by `run_async`. @@ -159,6 +170,10 @@ On OCaml 5, again using effect handlers, the module `Fork_join` implements the [fork-join model](https://en.wikipedia.org/wiki/Fork%E2%80%93join_model). It must run on a pool (using [Runner.run_async] or inside a future via [Fut.spawn]). +It is generally better to use the work-stealing pool for workloads that rely on +fork-join for better performance, because fork-join will tend to spawn lots of +shorter tasks. + ```ocaml # let rec select_sort arr i len = if len >= 2 then ( diff --git a/src/ws_deque_.mli b/src/ws_deque_.mli index bead45aa..b696224e 100644 --- a/src/ws_deque_.mli +++ b/src/ws_deque_.mli @@ -1,6 +1,10 @@ (** Work-stealing deque. - Adapted from "Dynamic circular work stealing deque", Chase & Lev + Adapted from "Dynamic circular work stealing deque", Chase & Lev. + + However note that this one is not dynamic in the sense that there + is no resizing. Instead we return [false] when [push] fails, which + keeps the implementation fairly lightweight. *) type 'a t From 6e6a2a1faa4ed62166300609643ec334c39a7510 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 6 Nov 2023 00:09:01 -0500 Subject: [PATCH 67/77] feat runner: add `get_current_runner` this relies on TLS to access the current pool really quickly. --- src/fifo_pool.ml | 2 ++ src/runner.ml | 7 +++++++ src/runner.mli | 7 +++++++ src/ws_pool.ml | 1 + 4 files changed, 17 insertions(+) diff --git a/src/fifo_pool.ml b/src/fifo_pool.ml index 1a95d715..c4cc59ac 100644 --- a/src/fifo_pool.ml +++ b/src/fifo_pool.ml @@ -1,3 +1,4 @@ +module TLS = Thread_local_storage_ include Runner let ( let@ ) = ( @@ ) @@ -18,6 +19,7 @@ let schedule_ (self : state) (task : task) : unit = type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task let worker_thread_ (self : state) (runner : t) ~on_exn ~around_task : unit = + TLS.get Runner.For_runner_implementors.k_cur_runner := Some runner; let (AT_pair (before_task, after_task)) = around_task in let run_task task : unit = diff --git a/src/runner.ml b/src/runner.ml index 91cde5a2..0fcf2392 100644 --- a/src/runner.ml +++ b/src/runner.ml @@ -1,3 +1,5 @@ +module TLS = Thread_local_storage_ + type task = unit -> unit type t = { @@ -34,4 +36,9 @@ let run_wait_block self (f : unit -> 'a) : 'a = module For_runner_implementors = struct let create ~size ~num_tasks ~shutdown ~run_async () : t = { size; num_tasks; shutdown; run_async } + + let k_cur_runner : t option ref TLS.key = TLS.new_key (fun () -> ref None) end + +let[@inline] get_current_runner () : _ option = + !(TLS.get For_runner_implementors.k_cur_runner) diff --git a/src/runner.mli b/src/runner.mli index 3ac2f724..471d21af 100644 --- a/src/runner.mli +++ b/src/runner.mli @@ -63,4 +63,11 @@ module For_runner_implementors : sig {b NOTE}: the runner should support DLA and {!Suspend_} on OCaml 5.x, so that {!Fork_join} and other 5.x features work properly. *) + + val k_cur_runner : t option ref Thread_local_storage_.key end + +val get_current_runner : unit -> t option +(** Access the current runner. This returns [Some r] if the call + happens on a thread that belongs in a runner. + @since NEXT_RELEASE *) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index 4623a3e3..874cbd5c 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -153,6 +153,7 @@ let worker_run_self_tasks_ (self : state) ~runner w : unit = (** Main loop for a worker thread. *) let worker_thread_ (self : state) ~(runner : t) (w : worker_state) : unit = + TLS.get Runner.For_runner_implementors.k_cur_runner := Some runner; TLS.get k_worker_state := Some w; let rec main () : unit = From d2be2db0ef83d5759943ac49eee808d05f7f6e83 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Mon, 6 Nov 2023 00:09:38 -0500 Subject: [PATCH 68/77] perf fork-join: in `both f g` only run `f` in the background `g` can run immediately on same thread, otherwise we just suspend the computation and start a new task for nothing. --- src/fork_join.ml | 153 +++++++++++++++++++++++++---------------------- 1 file changed, 81 insertions(+), 72 deletions(-) diff --git a/src/fork_join.ml b/src/fork_join.ml index ac5ba5d7..8ad61cec 100644 --- a/src/fork_join.ml +++ b/src/fork_join.ml @@ -3,91 +3,100 @@ module A = Atomic_ module State_ = struct - type 'a single_res = - | St_none - | St_some of 'a - | St_fail of exn * Printexc.raw_backtrace + type error = exn * Printexc.raw_backtrace + type 'a or_error = ('a, error) result - type ('a, 'b) t = { - mutable suspension: - ((unit, exn * Printexc.raw_backtrace) result -> unit) option; - (** suspended caller *) - left: 'a single_res; - right: 'b single_res; - } + type ('a, 'b) t = + | Init + | Left_solved of 'a or_error + | Right_solved of 'b or_error * Suspend_.suspension + | Both_solved of 'a or_error * 'b or_error - let get_exn (self : _ t A.t) = + let get_exn_ (self : _ t A.t) = match A.get self with - | { left = St_fail (e, bt); _ } | { right = St_fail (e, bt); _ } -> - Printexc.raise_with_backtrace e bt - | { left = St_some x; right = St_some y; _ } -> x, y + | Both_solved (Ok a, Ok b) -> a, b + | Both_solved (Error (exn, bt), _) | Both_solved (_, Error (exn, bt)) -> + Printexc.raise_with_backtrace exn bt | _ -> assert false - let check_if_state_complete_ (self : _ t) : unit = - match self.left, self.right, self.suspension with - | St_some _, St_some _, Some f -> f (Ok ()) - | St_fail (e, bt), _, Some f | _, St_fail (e, bt), Some f -> - f (Error (e, bt)) - | _ -> () - - let set_left_ (self : _ t A.t) (x : _ single_res) = - while - let old_st = A.get self in - let new_st = { old_st with left = x } in - if A.compare_and_set self old_st new_st then ( - check_if_state_complete_ new_st; - false + let rec set_left_ (self : _ t A.t) (left : _ or_error) = + let old_st = A.get self in + match old_st with + | Init -> + let new_st = Left_solved left in + if not (A.compare_and_set self old_st new_st) then ( + Domain_.relax (); + set_left_ self left + ) + | Right_solved (right, cont) -> + let new_st = Both_solved (left, right) in + if not (A.compare_and_set self old_st new_st) then ( + Domain_.relax (); + set_left_ self left ) else - true - do - Domain_.relax () - done + cont (Ok ()) + | Left_solved _ | Both_solved _ -> assert false - let set_right_ (self : _ t A.t) (y : _ single_res) = - while - let old_st = A.get self in - let new_st = { old_st with right = y } in - if A.compare_and_set self old_st new_st then ( - check_if_state_complete_ new_st; - false - ) else - true - do - Domain_.relax () - done + let rec set_right_ (self : _ t A.t) (right : _ or_error) : unit = + let old_st = A.get self in + match old_st with + | Left_solved left -> + let new_st = Both_solved (left, right) in + if not (A.compare_and_set self old_st new_st) then set_right_ self right + | Init -> + (* we are first arrived, we suspend until the left computation is done *) + Suspend_.suspend + { + Suspend_.handle = + (fun ~run:_ suspension -> + while + let old_st = A.get self in + match old_st with + | Init -> + not + (A.compare_and_set self old_st + (Right_solved (right, suspension))) + | Left_solved left -> + (* other thread is done, no risk of race condition *) + A.set self (Both_solved (left, right)); + suspension (Ok ()); + false + | Right_solved _ | Both_solved _ -> assert false + do + () + done); + } + | Right_solved _ | Both_solved _ -> assert false end let both f g : _ * _ = - let open State_ in - let st = A.make { suspension = None; left = St_none; right = St_none } in + let module ST = State_ in + let st = A.make ST.Init in - let start_tasks ~run () : unit = - run (fun () -> - try - let res = f () in - set_left_ st (St_some res) - with e -> - let bt = Printexc.get_raw_backtrace () in - set_left_ st (St_fail (e, bt))); - - run (fun () -> - try - let res = g () in - set_right_ st (St_some res) - with e -> - let bt = Printexc.get_raw_backtrace () in - set_right_ st (St_fail (e, bt))) + let runner = + match Runner.get_current_runner () with + | None -> invalid_arg "Fork_join.both must be run from within a runner" + | Some r -> r in - Suspend_.suspend - { - Suspend_.handle = - (fun ~run suspension -> - (* nothing else is started, no race condition possible *) - (A.get st).suspension <- Some suspension; - start_tasks ~run ()); - }; - get_exn st + (* start computing [f] in the background *) + Runner.run_async runner (fun () -> + try + let res = f () in + ST.set_left_ st (Ok res) + with exn -> + let bt = Printexc.get_raw_backtrace () in + ST.set_left_ st (Error (exn, bt))); + + let res_right = + try Ok (g ()) + with exn -> + let bt = Printexc.get_raw_backtrace () in + Error (exn, bt) + in + + ST.set_right_ st res_right; + ST.get_exn_ st let both_ignore f g = ignore (both f g : _ * _) From 245bfd9b7baf1191cba5edd466915fdf4fe6198a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:00:24 -0500 Subject: [PATCH 69/77] test: regression test for bug --- test/dune | 1 + test/t_ws_pool_confusion.ml | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 test/t_ws_pool_confusion.ml diff --git a/test/dune b/test/dune index 56261dad..43955ec6 100644 --- a/test/dune +++ b/test/dune @@ -1,6 +1,7 @@ (tests (names t_fib + t_ws_pool_confusion t_bench1 t_fib_rec t_futs1 diff --git a/test/t_ws_pool_confusion.ml b/test/t_ws_pool_confusion.ml new file mode 100644 index 00000000..20488b65 --- /dev/null +++ b/test/t_ws_pool_confusion.ml @@ -0,0 +1,28 @@ +open Moonpool + +let delay () = Thread.delay 0.001 + +let run ~p_main:_ ~p_sub () = + let f1 = + Fut.spawn ~on:p_sub (fun () -> + delay (); + 1) + in + let f2 = + Fut.spawn ~on:p_sub (fun () -> + delay (); + 2) + in + Fut.wait_block_exn f1 + Fut.wait_block_exn f2 + +let () = + let p_main = Ws_pool.create ~num_threads:2 () in + let p_sub = Ws_pool.create ~num_threads:10 () in + + let futs = List.init 8 (fun _ -> Fut.spawn ~on:p_main (run ~p_main ~p_sub)) in + + let l = List.map Fut.wait_block_exn futs in + assert (l = List.init 8 (fun _ -> 3)); + + print_endline "ok"; + () From 0a432585c6323795c1ee767ad370d8136c1be368 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:03:46 -0500 Subject: [PATCH 70/77] fix: in WS_pool, only schedule in worker queues in some cases only do it if we actually want to spawn the task on pool A while running on a worker of A (not a worker on B). --- src/ws_pool.ml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/ws_pool.ml b/src/ws_pool.ml index 874cbd5c..d32c71f8 100644 --- a/src/ws_pool.ml +++ b/src/ws_pool.ml @@ -5,7 +5,16 @@ include Runner let ( let@ ) = ( @@ ) +module Id = struct + type t = unit ref + (** Unique identifier for a pool *) + + let create () : t = Sys.opaque_identity (ref ()) + let equal : t -> t -> bool = ( == ) +end + type worker_state = { + pool_id_: Id.t; (** Unique per pool *) mutable thread: Thread.t; q: task WSQ.t; (** Work stealing queue *) rng: Random.State.t; @@ -17,6 +26,7 @@ type worker_state = { type around_task = AT_pair : (t -> 'a) * (t -> 'a -> unit) -> around_task type state = { + id_: Id.t; active: bool A.t; (** Becomes [false] when the pool is shutdown. *) workers: worker_state array; (** Fixed set of workers. *) main_q: task Queue.t; (** Main queue for tasks coming from the outside *) @@ -59,7 +69,10 @@ let schedule_task_ (self : state) (w : worker_state option) (task : task) : unit = (* Printf.printf "schedule task now (%d)\n%!" (Thread.id @@ Thread.self ()); *) match w with - | Some w -> + | Some w when Id.equal self.id_ w.pool_id_ -> + (* we're on this same pool, schedule in the worker's state. Otherwise + we might also be on pool A but asking to schedule on pool B, + so we have to check that identifiers match. *) let pushed = WSQ.push w.q task in if pushed then try_wake_someone_ self @@ -70,7 +83,7 @@ let schedule_task_ (self : state) (w : worker_state option) (task : task) : unit if self.n_waiting_nonzero then Condition.signal self.cond; Mutex.unlock self.mutex ) - | None -> + | _ -> if A.get self.active then ( (* push into the main queue *) Mutex.lock self.mutex; @@ -216,6 +229,7 @@ let dummy_task_ () = assert false let create ?(on_init_thread = default_thread_init_exit_) ?(on_exit_thread = default_thread_init_exit_) ?(on_exn = fun _ _ -> ()) ?around_task ?num_threads () : t = + let pool_id_ = Id.create () in (* wrapper *) let around_task = match around_task with @@ -233,6 +247,7 @@ let create ?(on_init_thread = default_thread_init_exit_) let dummy = Thread.self () in Array.init num_threads (fun i -> { + pool_id_; thread = dummy; q = WSQ.create ~dummy:dummy_task_ (); rng = Random.State.make [| i |]; @@ -241,6 +256,7 @@ let create ?(on_init_thread = default_thread_init_exit_) let pool = { + id_ = pool_id_; active = A.make true; workers; main_q = Queue.create (); From 9cb7781a2e7fc7378600ef921a6ae47145e2d84c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:37:39 -0500 Subject: [PATCH 71/77] feat: fut: add `spawn_on_current_runner` --- src/fut.ml | 5 +++++ src/fut.mli | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/src/fut.ml b/src/fut.ml index 639a503b..ad34cefe 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -100,6 +100,11 @@ let spawn ~on f : _ t = Runner.run_async on task; fut +let spawn_on_current_runner f : _ t = + match Runner.get_current_runner () with + | None -> failwith "Fut.spawn_on_current_runner: not running on a runner" + | Some on -> spawn ~on f + let reify_error (f : 'a t) : 'a or_error t = match peek f with | Some res -> return res diff --git a/src/fut.mli b/src/fut.mli index 944a9525..99b04a8d 100644 --- a/src/fut.mli +++ b/src/fut.mli @@ -85,6 +85,15 @@ val spawn : on:Runner.t -> (unit -> 'a) -> 'a t (** [spaw ~on f] runs [f()] on the given runner [on], and return a future that will hold its result. *) +val spawn_on_current_runner : (unit -> 'a) -> 'a t +(** This must be run from inside a runner, and schedules + the new task on it as well. + + See {!Runner.get_current_runner} to see how the runner is found. + + @since NEXT_RELEASE + @raise Failure if run from outside a runner. *) + val reify_error : 'a t -> 'a or_error t (** [reify_error fut] turns a failing future into a non-failing one that contain [Error (exn, bt)]. A non-failing future From 9709f88d5fa6c1b296bec05d70f9ba9cd6764180 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:38:27 -0500 Subject: [PATCH 72/77] breaking: fut: `join` does not take `?on` anymore --- src/fut.ml | 12 ++++++++++-- src/fut.mli | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/fut.ml b/src/fut.ml index ad34cefe..0e235f77 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -123,7 +123,6 @@ let map ?on ~f fut : _ t = Error (e, bt)) | Error e_bt -> Error e_bt in - match peek fut with | Some r -> of_result (map_res r) | None -> @@ -137,7 +136,17 @@ let map ?on ~f fut : _ t = match on with | None -> map_and_fulfill () | Some on -> Runner.run_async on map_and_fulfill); + fut2 +let join (fut : 'a t t) : 'a t = + match peek fut with + | Some (Ok f) -> f + | Some (Error (e, bt)) -> fail e bt + | None -> + let fut2, promise = make () in + on_result fut (function + | Ok sub_fut -> on_result sub_fut (fulfill promise) + | Error _ as e -> fulfill promise e); fut2 let bind ?on ~f fut : _ t = @@ -175,7 +184,6 @@ let bind ?on ~f fut : _ t = fut2 let bind_reify_error ?on ~f fut : _ t = bind ?on ~f (reify_error fut) -let join ?on fut = bind ?on fut ~f:(fun x -> x) let update_ (st : 'a A.t) f : 'a = let rec loop () = diff --git a/src/fut.mli b/src/fut.mli index 99b04a8d..989d43a6 100644 --- a/src/fut.mli +++ b/src/fut.mli @@ -120,7 +120,7 @@ val bind_reify_error : ?on:Runner.t -> f:('a or_error -> 'b t) -> 'a t -> 'b t @param on if provided, [f] runs on the given runner @since 0.4 *) -val join : ?on:Runner.t -> 'a t t -> 'a t +val join : 'a t t -> 'a t (** [join fut] is [fut >>= Fun.id]. It joins the inner layer of the future. @since 0.2 *) From d4e5e811bb73ba1bc8f4b14b2d7502bfdd2ddd8a Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:41:34 -0500 Subject: [PATCH 73/77] breaking: fut: change behavior of `?on` combinators that take `?on` will now check if they can use the current runner if `?on:None` is passed. If no runner is passed and they're run from outside a runner, they will just run in the relevant callback or on the current thread. --- src/fut.ml | 69 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/src/fut.ml b/src/fut.ml index 0e235f77..54ea7aa8 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -113,8 +113,13 @@ let reify_error (f : 'a t) : 'a or_error t = on_result f (fun r -> fulfill promise (Ok r)); fut +let get_runner_ ?on () : Runner.t option = + match on with + | Some _ as r -> r + | None -> Runner.get_current_runner () + let map ?on ~f fut : _ t = - let map_res r = + let map_immediate_ r : _ result = match r with | Ok x -> (try Ok (f x) @@ -123,19 +128,22 @@ let map ?on ~f fut : _ t = Error (e, bt)) | Error e_bt -> Error e_bt in - match peek fut with - | Some r -> of_result (map_res r) - | None -> - let fut2, promise = make () in - on_result fut (fun r -> - let map_and_fulfill () = - let res = map_res r in - fulfill promise res - in - match on with - | None -> map_and_fulfill () - | Some on -> Runner.run_async on map_and_fulfill); + match peek fut, get_runner_ ?on () with + | Some res, None -> of_result @@ map_immediate_ res + | Some res, Some runner -> + let fut2, promise = make () in + Runner.run_async runner (fun () -> fulfill promise @@ map_immediate_ res); + fut2 + | None, None -> + let fut2, promise = make () in + on_result fut (fun res -> fulfill promise @@ map_immediate_ res); + fut2 + | None, Some runner -> + let fut2, promise = make () in + on_result fut (fun res -> + Runner.run_async runner (fun () -> + fulfill promise @@ map_immediate_ res)); fut2 let join (fut : 'a t t) : 'a t = @@ -160,32 +168,31 @@ let bind ?on ~f fut : _ t = | Error (e, bt) -> fail e bt in - let bind_and_fulfill r promise () = + let bind_and_fulfill (r : _ result) promise () : unit = let f_res_fut = apply_f_to_res r in (* forward result *) on_result f_res_fut (fun r -> fulfill promise r) in - match peek fut with - | Some r -> - (match on with - | None -> apply_f_to_res r - | Some on -> - let fut2, promise = make () in - Runner.run_async on (bind_and_fulfill r promise); - fut2) - | None -> + match peek fut, get_runner_ ?on () with + | Some res, Some runner -> + let fut2, promise = make () in + Runner.run_async runner (bind_and_fulfill res promise); + fut2 + | Some res, None -> apply_f_to_res res + | None, Some runner -> let fut2, promise = make () in on_result fut (fun r -> - match on with - | None -> bind_and_fulfill r promise () - | Some on -> Runner.run_async on (bind_and_fulfill r promise)); - + Runner.run_async runner (bind_and_fulfill r promise)); + fut2 + | None, None -> + let fut2, promise = make () in + on_result fut (fun res -> bind_and_fulfill res promise ()); fut2 -let bind_reify_error ?on ~f fut : _ t = bind ?on ~f (reify_error fut) +let[@inline] bind_reify_error ?on ~f fut : _ t = bind ?on ~f (reify_error fut) -let update_ (st : 'a A.t) f : 'a = +let update_atomic_ (st : 'a A.t) f : 'a = let rec loop () = let x = A.get st in let y = f x in @@ -210,7 +217,7 @@ let both a b : _ t = | Error err -> fulfill_idempotent promise (Error err) | Ok x -> (match - update_ st (function + update_atomic_ st (function | `Neither -> `Left x | `Right y -> `Both (x, y) | _ -> assert false) @@ -221,7 +228,7 @@ let both a b : _ t = | Error err -> fulfill_idempotent promise (Error err) | Ok y -> (match - update_ st (function + update_atomic_ st (function | `Left x -> `Both (x, y) | `Neither -> `Right y | _ -> assert false) From 59ae1068fde92b75a3a89dc736e49325c61ac03f Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:41:09 -0500 Subject: [PATCH 74/77] breaking: fut: only have `module Infix` we keep `Infix_local` as a deprecated alias to it for now --- src/fut.ml | 37 +++++-------------------------------- src/fut.mli | 31 +++++++++++++++++-------------- 2 files changed, 22 insertions(+), 46 deletions(-) diff --git a/src/fut.ml b/src/fut.ml index 54ea7aa8..0661ffa2 100644 --- a/src/fut.ml +++ b/src/fut.ml @@ -411,41 +411,14 @@ let await (fut : 'a t) : 'a = [@@@endif] -module type INFIX = sig - val ( >|= ) : 'a t -> ('a -> 'b) -> 'b t - val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t - val ( let+ ) : 'a t -> ('a -> 'b) -> 'b t - val ( and+ ) : 'a t -> 'b t -> ('a * 'b) t - val ( let* ) : 'a t -> ('a -> 'b t) -> 'b t - val ( and* ) : 'a t -> 'b t -> ('a * 'b) t -end - -module Infix_ (X : sig - val pool : Runner.t option -end) : INFIX = struct - let[@inline] ( >|= ) x f = map ?on:X.pool ~f x - let[@inline] ( >>= ) x f = bind ?on:X.pool ~f x +module Infix = struct + let[@inline] ( >|= ) x f = map ~f x + let[@inline] ( >>= ) x f = bind ~f x let ( let+ ) = ( >|= ) let ( let* ) = ( >>= ) let ( and+ ) = both let ( and* ) = both end -module Infix_local = Infix_ (struct - let pool = None -end) - -include Infix_local - -module Infix (X : sig - val pool : Runner.t -end) = -Infix_ (struct - let pool = Some X.pool -end) - -let[@inline] infix pool : (module INFIX) = - let module M = Infix (struct - let pool = pool - end) in - (module M) +include Infix +module Infix_local = Infix [@@deprecated "use Infix"] diff --git a/src/fut.mli b/src/fut.mli index 989d43a6..aa4515f5 100644 --- a/src/fut.mli +++ b/src/fut.mli @@ -209,7 +209,19 @@ val wait_block : 'a t -> 'a or_error val wait_block_exn : 'a t -> 'a (** Same as {!wait_block} but re-raises the exception if the future failed. *) -module type INFIX = sig +(** {2 Infix operators} + + These combinators run on either the current pool (if present), + or on the same thread that just fulfilled the previous future + if not. + + They were previously present as [module Infix_local] and [val infix], + but are now simplified. + + @since NEXT_RELEASE *) + +(** @since NEXT_RELEASE *) +module Infix : sig val ( >|= ) : 'a t -> ('a -> 'b) -> 'b t val ( >>= ) : 'a t -> ('a -> 'b t) -> 'b t val ( let+ ) : 'a t -> ('a -> 'b) -> 'b t @@ -218,17 +230,8 @@ module type INFIX = sig val ( and* ) : 'a t -> 'b t -> ('a * 'b) t end -module Infix_local : INFIX -(** Operators that run on the same thread as the first future. *) +include module type of Infix -include INFIX - -(** Make infix combinators, with intermediate computations running on the given pool. *) -module Infix (_ : sig - val pool : Runner.t -end) : INFIX - -val infix : Runner.t -> (module INFIX) -(** [infix runner] makes a new infix module with intermediate computations - running on the given runner.. - @since 0.2 *) +module Infix_local = Infix +[@@deprecated "Use Infix"] +(** @deprecated use Infix instead *) From 62e8336d84fb80fbf0a7a8447573cc9da6643910 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:42:30 -0500 Subject: [PATCH 75/77] update tests and benchs for new Fut API --- benchs/fib_rec.ml | 2 +- test/effect-based/t_many.ml | 4 +--- test/t_chan_train.ml | 2 +- test/t_tree_futs.ml | 17 +++++++---------- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/benchs/fib_rec.ml b/benchs/fib_rec.ml index 571b8495..b0f1623d 100644 --- a/benchs/fib_rec.ml +++ b/benchs/fib_rec.ml @@ -12,7 +12,7 @@ let rec fib ~on x : int Fut.t = if x <= !cutoff then Fut.spawn ~on (fun () -> fib_direct x) else - let open Fut.Infix_local in + let open Fut.Infix in let+ t1 = fib ~on (x - 1) and+ t2 = fib ~on (x - 2) in t1 + t2 diff --git a/test/effect-based/t_many.ml b/test/effect-based/t_many.ml index 6a2b5918..b4a2c8da 100644 --- a/test/effect-based/t_many.ml +++ b/test/effect-based/t_many.ml @@ -19,9 +19,7 @@ let run ~pool () = 0 l) in - let futs = - List.init n_tasks (fun _ -> Fut.spawn ~on:pool task |> Fut.join ~on:pool) - in + let futs = List.init n_tasks (fun _ -> Fut.spawn ~on:pool task |> Fut.join) in let lens = List.map Fut.wait_block_exn futs in Printf.printf "awaited %d items (%d times)\n%!" (List.hd lens) n_tasks; diff --git a/test/t_chan_train.ml b/test/t_chan_train.ml index 132d5540..20645a73 100644 --- a/test/t_chan_train.ml +++ b/test/t_chan_train.ml @@ -3,7 +3,7 @@ open Moonpool (* large pool, some of our tasks below are long lived *) let pool = Ws_pool.create ~num_threads:30 () -open (val Fut.infix pool) +open Fut.Infix type event = | E_int of int diff --git a/test/t_tree_futs.ml b/test/t_tree_futs.ml index 0bc96a03..c9905eae 100644 --- a/test/t_tree_futs.ml +++ b/test/t_tree_futs.ml @@ -15,19 +15,16 @@ let rec mk_tree ~pool n : _ tree Fut.t = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "mk-tree" in if n <= 1 then Fut.return (Leaf 1) - else - let open (val Fut.infix pool) in - let l = - Fut.spawn ~on:pool (fun () -> mk_tree ~pool (n - 1)) |> Fut.join ~on:pool - and r = - Fut.spawn ~on:pool (fun () -> mk_tree ~pool (n - 1)) |> Fut.join ~on:pool - in + else ( + let l = Fut.spawn ~on:pool (fun () -> mk_tree ~pool (n - 1)) |> Fut.join + and r = Fut.spawn ~on:pool (fun () -> mk_tree ~pool (n - 1)) |> Fut.join in Fut.return @@ Node (l, r) + ) let rec rev ~pool (t : 'a tree Fut.t) : 'a tree Fut.t = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "rev" in - let open (val Fut.infix pool) in + let open Fut.Infix in t >>= function | Leaf n -> Fut.return (Leaf n) | Node (l, r) -> @@ -36,7 +33,7 @@ let rec rev ~pool (t : 'a tree Fut.t) : 'a tree Fut.t = let rec sum ~pool (t : int tree Fut.t) : int Fut.t = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "sum" in - let open (val Fut.infix pool) in + let open Fut.Infix in t >>= function | Leaf n -> Fut.return n | Node (l, r) -> @@ -45,7 +42,7 @@ let rec sum ~pool (t : int tree Fut.t) : int Fut.t = let run ~pool n : (int * int) Fut.t = let@ _sp = Trace.with_span ~__FILE__ ~__LINE__ "run" in - let open (val Fut.infix pool) in + let open Fut.Infix in let t = Fut.return n >>= mk_tree ~pool in let t' = rev ~pool t in let sum_t = sum ~pool t in From 989c012f77a0e446d6a59627695f7436315958f7 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 20:48:04 -0500 Subject: [PATCH 76/77] fix warnings --- test/t_fib_rec.ml | 2 +- test/t_props.ml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/t_fib_rec.ml b/test/t_fib_rec.ml index 94e206b7..3495fcae 100644 --- a/test/t_fib_rec.ml +++ b/test/t_fib_rec.ml @@ -16,7 +16,7 @@ let rec fib ~on x : int Fut.t = Atomic.incr n_calls_fib_direct; fib_direct x) else - let open Fut.Infix_local in + let open Fut.Infix in let+ t1 = fib ~on (x - 1) and+ t2 = fib ~on (x - 2) in t1 + t2 diff --git a/test/t_props.ml b/test/t_props.ml index 698650fd..fe187073 100644 --- a/test/t_props.ml +++ b/test/t_props.ml @@ -27,7 +27,7 @@ let () = Q.(small_list small_int) (fun l -> let@ pool = with_pool ~kind () in - let open Fut.Infix_local in + let open Fut.Infix in let l' = l |> List.map (fun x -> From 3f7ed7b6b886f3731ace7182147d34753ffd0ab4 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Tue, 7 Nov 2023 21:11:12 -0500 Subject: [PATCH 77/77] moonpool: expose Fut.{spawn_on_current_runner,await} --- src/moonpool.ml | 7 +++++++ src/moonpool.mli | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/moonpool.ml b/src/moonpool.ml index b7d225d2..f2cf0174 100644 --- a/src/moonpool.ml +++ b/src/moonpool.ml @@ -5,6 +5,13 @@ let start_thread_on_some_domain f x = let run_async = Runner.run_async let recommended_thread_count () = Domain_.recommended_number () let spawn = Fut.spawn +let spawn_on_current_runner = Fut.spawn_on_current_runner + +[@@@ifge 5.0] + +let await = Fut.await + +[@@@endif] module Atomic = Atomic_ module Blocking_queue = Bb_queue diff --git a/src/moonpool.mli b/src/moonpool.mli index 3c470bd4..60c0ede6 100644 --- a/src/moonpool.mli +++ b/src/moonpool.mli @@ -40,6 +40,19 @@ val spawn : on:Runner.t -> (unit -> 'a) -> 'a Fut.t and returns a future result for it. See {!Fut.spawn}. @since NEXT_RELEASE *) +val spawn_on_current_runner : (unit -> 'a) -> 'a Fut.t +(** See {!Fut.spawn_on_current_runner}. + @since NEXT_RELEASE *) + +[@@@ifge 5.0] + +val await : 'a Fut.t -> 'a +(** Await a future. See {!Fut.await}. + Only on OCaml >= 5.0. + @since NEXT_RELEASE *) + +[@@@endif] + module Lock = Lock module Fut = Fut module Chan = Chan