diff --git a/src/core/CCList.ml b/src/core/CCList.ml index 6effa719..8429faf1 100644 --- a/src/core/CCList.ml +++ b/src/core/CCList.ml @@ -1017,6 +1017,110 @@ let all_ok l = | Some e -> Result.Error e end +let group_by (type k) ?(hash=Hashtbl.hash) ?(eq=Pervasives.(=)) l = + let module Tbl = Hashtbl.Make(struct type t = k let equal = eq let hash = hash end) in + (* compute group table *) + let tbl = Tbl.create 32 in + List.iter + (fun x -> + let l = try Tbl.find tbl x with Not_found -> [] in + Tbl.replace tbl x (x::l)) + l; + Tbl.fold (fun _ x acc -> x::acc) tbl [] + +let join ~join_row s1 s2 : _ t = + flat_map (fun a -> filter_map (join_row a) s2) s1 + +(*$R + let s1 = (1 -- 3) in + let s2 = ["1"; "2"] in + let join_row i j = + if string_of_int i = j then Some (string_of_int i ^ " = " ^ j) else None + in + let s = join ~join_row s1 s2 in + OUnit.assert_equal ["1 = 1"; "2 = 2"] s; +*) + +let join_by (type a) ?(eq=Pervasives.(=)) ?(hash=Hashtbl.hash) f1 f2 ~merge c1 c2 = + let module Tbl = Hashtbl.Make(struct type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + List.iter + (fun x -> + let key = f1 x in + Tbl.add tbl key x) + c1; + let res = ref [] in + List.iter + (fun y -> + let key = f2 y in + let xs = Tbl.find_all tbl key in + List.iter + (fun x -> match merge key x y with + | None -> () + | Some z -> res := z :: !res) + xs) + c2; + !res + +type ('a, 'b) join_all_cell = { + mutable ja_left: 'a list; + mutable ja_right: 'b list; +} + +let join_all_by (type a) ?(eq=Pervasives.(=)) ?(hash=Hashtbl.hash) f1 f2 ~merge c1 c2 = + let module Tbl = Hashtbl.Make(struct type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + (* build the map [key -> cell] *) + List.iter + (fun x -> + let key = f1 x in + try + let c = Tbl.find tbl key in + c.ja_left <- x :: c.ja_left + with Not_found -> + Tbl.add tbl key {ja_left=[x]; ja_right=[]}) + c1; + List.iter + (fun y -> + let key = f2 y in + try + let c = Tbl.find tbl key in + c.ja_right <- y :: c.ja_right + with Not_found -> + Tbl.add tbl key {ja_left=[]; ja_right=[y]}) + c2; + Tbl.fold + (fun key cell res -> match merge key cell.ja_left cell.ja_right with + | None -> res + | Some z -> z :: res) + tbl [] + +let group_join_by (type a) ?(eq=Pervasives.(=)) ?(hash=Hashtbl.hash) f c1 c2 = + let module Tbl = Hashtbl.Make(struct type t = a let equal = eq let hash = hash end) in + let tbl = Tbl.create 32 in + List.iter (fun x -> Tbl.replace tbl x []) c1; + List.iter + (fun y -> + (* project [y] into some element of [c1] *) + let key = f y in + try + let l = Tbl.find tbl key in + Tbl.replace tbl key (y :: l) + with Not_found -> ()) + c2; + Tbl.fold (fun k v l -> (k,v) :: l) tbl [] + +(*$= + ['a', ["abc"; "attic"]; \ + 'b', ["barbary"; "boom"; "bop"]; \ + 'c', []] \ + (group_join_by (fun s->s.[0]) \ + (CCString.to_list "abc") \ + ["abc"; "boom"; "attic"; "deleted"; "barbary"; "bop"] \ + |> map (fun (c,l)->c,List.sort Pervasives.compare l) \ + |> sort Pervasives.compare) +*) + (*$inject open Result *) diff --git a/src/core/CCList.mli b/src/core/CCList.mli index c1f4e5c2..5e32522a 100644 --- a/src/core/CCList.mli +++ b/src/core/CCList.mli @@ -174,6 +174,64 @@ val partition_map : ('a -> [<`Left of 'b | `Right of 'c | `Drop]) -> - if [f x = `Drop], ignores [x]. @since 0.11 *) +val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> 'a list t +(** Group equal elements, regardless of their order of appearance. + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + +val join : join_row:('a -> 'b -> 'c option) -> 'a t -> 'b t -> 'c t +(** [join ~join_row a b] combines every element of [a] with every + element of [b] using [join_row]. If [join_row] returns None, then + the two elements do not combine. Assume that [b] allows for multiple + iterations. + @since NEXT_RELEASE *) + +val join_by : ?eq:('key -> 'key -> bool) -> ?hash:('key -> int) -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a -> 'b -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and combine + values [(x,y)] from [(a,b)] with the same [key] + using [merge]. If [merge] returns [None], the combination + of values is discarded. + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + +val join_all_by : ?eq:('key -> 'key -> bool) -> ?hash:('key -> int) -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a list -> 'b list -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join_all_by key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and, for each key [k] + occurring in at least one of them: + - compute the list [l1] of elements of [a] that map to [k] + - compute the list [l2] of elements of [b] that map to [k] + - call [merge k l1 l2]. If [merge] returns [None], the combination + of values is discarded, otherwise it returns [Some c] + and [c] is inserted in the result. + @since NEXT_RELEASE *) + +val group_join_by : ?eq:('a -> 'a -> bool) -> ?hash:('a -> int) -> + ('b -> 'a) -> + 'a t -> + 'b t -> + ('a * 'b list) t +(** [group_join_by key2] associates to every element [x] of + the first sequence, all the elements [y] of the second + sequence such that [eq x (key y)]. Elements of the first + sequences without corresponding values in the second one + are mapped to [[]] + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + val sublists_of_len : ?last:('a list -> 'a list option) -> ?offset:int -> diff --git a/src/core/CCListLabels.mli b/src/core/CCListLabels.mli index efec8391..1d28e439 100644 --- a/src/core/CCListLabels.mli +++ b/src/core/CCListLabels.mli @@ -127,6 +127,7 @@ val cartesian_product : 'a t t -> 'a t t val map_product_l : f:('a -> 'b list) -> 'a list -> 'b list list (** @since 2.2 *) + val diagonal : 'a t -> ('a * 'a) t (** All pairs of distinct positions of the list. [list_diagonal l] will return the list of [List.nth i l, List.nth j l] if [i < j]. *) @@ -139,6 +140,65 @@ val partition_map : f:('a -> [<`Left of 'b | `Right of 'c | `Drop]) -> - if [f x = `Drop], ignores [x]. @since 0.11 *) +val group_by : ?hash:('a -> int) -> ?eq:('a -> 'a -> bool) -> + 'a t -> 'a list t +(** Group equal elements, regardless of their order of appearance. + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + +val join : join_row:('a -> 'b -> 'c option) -> 'a t -> 'b t -> 'c t +(** [join ~join_row a b] combines every element of [a] with every + element of [b] using [join_row]. If [join_row] returns None, then + the two elements do not combine. Assume that [b] allows for multiple + iterations. + @since NEXT_RELEASE *) + +val join_by : ?eq:('key -> 'key -> bool) -> ?hash:('key -> int) -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a -> 'b -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and combine + values [(x,y)] from [(a,b)] with the same [key] + using [merge]. If [merge] returns [None], the combination + of values is discarded. + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + +val join_all_by : ?eq:('key -> 'key -> bool) -> ?hash:('key -> int) -> + ('a -> 'key) -> ('b -> 'key) -> + merge:('key -> 'a list -> 'b list -> 'c option) -> + 'a t -> + 'b t -> + 'c t +(** [join_all_by key1 key2 ~merge] is a binary operation + that takes two sequences [a] and [b], projects their + elements resp. with [key1] and [key2], and, for each key [k] + occurring in at least one of them: + - compute the list [l1] of elements of [a] that map to [k] + - compute the list [l2] of elements of [b] that map to [k] + - call [merge k l1 l2]. If [merge] returns [None], the combination + of values is discarded, otherwise it returns [Some c] + and [c] is inserted in the result. + @since NEXT_RELEASE *) + +val group_join_by : ?eq:('a -> 'a -> bool) -> ?hash:('a -> int) -> + ('b -> 'a) -> + 'a t -> + 'b t -> + ('a * 'b list) t +(** [group_join_by key2] associates to every element [x] of + the first sequence, all the elements [y] of the second + sequence such that [eq x (key y)]. Elements of the first + sequences without corresponding values in the second one + are mapped to [[]] + precondition: for any [x] and [y], if [eq x y] then [hash x=hash y] must hold. + @since NEXT_RELEASE *) + + val sublists_of_len : ?last:('a list -> 'a list option) -> ?offset:int ->