moonpool/src/private/ws_deque_.ml
Simon Cruanes 9b3c75124e
simon/move to picos (#30)
* feat: depend on picos, use picos.exn_bt

* refactor: remove dla

* non optional dependency on thread-local-storage

it's a dep of picos anyway

* wip: use picos computations

* disable t_fib1 test, way too flaky

* feat `fut`: wrap picos computations

* detail in fut

* gitignore

* refactor core: use picos for schedulers; add Worker_loop_

we factor most of the thread workers' logic in `Worker_loop_`,
which is now shared between Ws_pool and Fifo_pool

* github actions

* feat fut: add `on_result_ignore`

* details

* wip: port to picos

* test: wip porting tests

* fix fut: trigger failing to attach doesn't signal it

* fix pool: only return No_more_tasks when local and global q empty

* format

* chore: fix CI by installing picos first

* more CI

* test: re-enable t_fib1 but with a single core fifo pool

it should be deterministic now!

* fixes after reviews

* bump minimal OCaml version to 4.13

* use `exn_bt`, not `picos.exn_bt`

* feat: optional dep on hmap, for inheritable FLS data

* format

* chore: depend on picos explicitly

* feat: move hmap-fls to Fiber.Fls

* change API for local FLS hmap

* refactor: move optional hmap FLS stuff into core/task_local_storage

* add Task_local_storage.remove_in_local_hmap

* chore: try to fix CI

* format

* chore: CI

* fix

* feat: add `Fls.with_in_local_hmap`

* chore: depend on hmap for tests

* fix test for FLS

use the inheritable keys

* chore: CI

* require OCaml 4.14 :/

* feat: add `moonpool.sync` with await-friendly abstractions

based on picos_sync

* fix: catch TLS.Not_set

* fix: `LS.get` shouldn't raise

* fix

* update to merged picos PR

* chore: CI

* fix dep

* feat: add `Event.of_fut`

* chore: CI

* remove dep on now defunct `exn_bt`

* feat: add moonpool-io

* chore: CI

* version constraint on moonpool-io

* add Event.Infix

* move to picos_io
2024-09-04 12:04:27 -04:00

129 lines
3.3 KiB
OCaml

module A = Atomic_
(* terminology:
- Bottom: where we push/pop normally. Only one thread can do that.
- top: where work stealing happens (older values).
This only ever grows.
Elements are always added on the bottom end. *)
(** Circular array (size is [2 ^ log_size]) *)
module CA : sig
type 'a t
val create : dummy:'a -> unit -> 'a t
val size : 'a t -> int
val get : 'a t -> int -> 'a
val set : 'a t -> int -> 'a -> unit
end = struct
(** The array has size 256. *)
let log_size = 8
type 'a t = { arr: 'a array } [@@unboxed]
let[@inline] size (_self : _ t) = 1 lsl log_size
let create ~dummy () : _ t = { arr = Array.make (1 lsl log_size) dummy }
let[@inline] get (self : 'a t) (i : int) : 'a =
Array.unsafe_get self.arr (i land ((1 lsl log_size) - 1))
let[@inline] set (self : 'a t) (i : int) (x : 'a) : unit =
Array.unsafe_set self.arr (i land ((1 lsl log_size) - 1)) x
end
type 'a t = {
top: int A.t; (** Where we steal *)
bottom: int A.t; (** Where we push/pop from the owning thread *)
mutable top_cached: int; (** Last read value of [top] *)
arr: 'a CA.t; (** The circular array *)
}
let create ~dummy () : _ t =
let top = A.make 0 in
let arr = CA.create ~dummy () in
(* allocate far from [top] to avoid false sharing *)
let bottom = A.make 0 in
{ top; top_cached = 0; bottom; arr }
let[@inline] size (self : _ t) : int = max 0 (A.get self.bottom - A.get self.top)
exception Full
let push (self : 'a t) (x : 'a) : bool =
try
let b = A.get self.bottom in
let t_approx = self.top_cached in
(* Section 2.3: over-approximation of size.
Only if it seems too big do we actually read [t]. *)
let size_approx = b - t_approx in
if size_approx >= CA.size self.arr - 1 then (
(* we need to read the actual value of [top], which might entail contention. *)
let t = A.get self.top in
self.top_cached <- t;
let size = b - t in
if size >= CA.size self.arr - 1 then (* full! *) raise_notrace Full
);
CA.set self.arr b x;
A.set self.bottom (b + 1);
true
with Full -> false
exception Empty
let pop_exn (self : 'a t) : 'a =
let b = A.get self.bottom in
let b = b - 1 in
A.set self.bottom b;
let t = A.get self.top in
self.top_cached <- t;
let size = b - t in
if size < 0 then (
(* reset to basic empty state *)
A.set self.bottom t;
raise_notrace Empty
) else if size > 0 then (
(* can pop without modifying [top] *)
let x = CA.get self.arr b in
x
) else (
assert (size = 0);
(* there was exactly one slot, so we might be racing against stealers
to update [self.top] *)
if A.compare_and_set self.top t (t + 1) then (
let x = CA.get self.arr b in
A.set self.bottom (t + 1);
x
) else (
A.set self.bottom (t + 1);
raise_notrace Empty
)
)
let[@inline] pop self : _ option =
match pop_exn self with
| exception Empty -> None
| t -> Some t
let steal (self : 'a t) : 'a option =
(* read [top], but do not update [top_cached]
as we're in another thread *)
let t = A.get self.top in
let b = A.get self.bottom in
let size = b - t in
if size <= 0 then
None
else (
let x = CA.get self.arr t in
if A.compare_and_set self.top t (t + 1) then
(* successfully increased top to consume [x] *)
Some x
else
None
)