mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 11:45:31 -05:00
more optimizations, and a big chunk of query evaluation in CCLinq
This commit is contained in:
parent
17930cf119
commit
0d39bfdbf4
3 changed files with 303 additions and 92 deletions
4
_oasis
4
_oasis
|
|
@ -41,7 +41,7 @@ Library "containers"
|
||||||
Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap,
|
Modules: CCVector, CCDeque, CCGen, CCSequence, CCFQueue, CCMultiMap,
|
||||||
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl,
|
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl,
|
||||||
CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash,
|
CCLeftistheap, CCList, CCOpt, CCPair, CCFun, CCHash,
|
||||||
CCKList, CCInt, CCBool, CCArray, CCBatch
|
CCKList, CCInt, CCBool, CCArray, CCBatch, CCLinq
|
||||||
FindlibName: containers
|
FindlibName: containers
|
||||||
|
|
||||||
Library "containers_string"
|
Library "containers_string"
|
||||||
|
|
@ -60,7 +60,7 @@ Library "containers_misc"
|
||||||
Bij, PiCalculus, Bencode, Sexp, RAL,
|
Bij, PiCalculus, Bencode, Sexp, RAL,
|
||||||
UnionFind, SmallSet, AbsSet, CSM,
|
UnionFind, SmallSet, AbsSet, CSM,
|
||||||
ActionMan, QCheck, BencodeOnDisk, TTree,
|
ActionMan, QCheck, BencodeOnDisk, TTree,
|
||||||
HGraph, Automaton, Conv, Bidir, Iteratee, Linq,
|
HGraph, Automaton, Conv, Bidir, Iteratee,
|
||||||
Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact
|
Ty, Tell, BencodeStream, RatTerm, Cause, AVL, ParseReact
|
||||||
BuildDepends: unix,containers
|
BuildDepends: unix,containers
|
||||||
FindlibName: misc
|
FindlibName: misc
|
||||||
|
|
|
||||||
377
core/CCLinq.ml
377
core/CCLinq.ml
|
|
@ -30,39 +30,61 @@ type 'a sequence = ('a -> unit) -> unit
|
||||||
type 'a equal = 'a -> 'a -> bool
|
type 'a equal = 'a -> 'a -> bool
|
||||||
type 'a ord = 'a -> 'a -> int
|
type 'a ord = 'a -> 'a -> int
|
||||||
type 'a hash = 'a -> int
|
type 'a hash = 'a -> int
|
||||||
type 'a klist = unit -> [ `Nil | `Cons of 'a * 'a klist ]
|
|
||||||
|
|
||||||
let _id x = x
|
let _id x = x
|
||||||
|
|
||||||
|
type 'a search_result =
|
||||||
|
| SearchContinue
|
||||||
|
| SearchStop of 'a
|
||||||
|
|
||||||
module Coll = struct
|
module Coll = struct
|
||||||
type 'a t =
|
type 'a t =
|
||||||
| Seq of 'a sequence
|
| Seq : 'a sequence -> 'a t
|
||||||
| List of 'a list
|
| List : 'a list -> 'a t
|
||||||
|
| Set : (module CCSequence.Set.S
|
||||||
|
with type elt = 'a and type t = 'b) * 'b -> 'a t
|
||||||
|
|
||||||
let of_seq s = Seq s
|
let of_seq s = Seq s
|
||||||
let of_list l = List l
|
let of_list l = List l
|
||||||
let of_array a = Seq (CCSequence.of_array a)
|
let of_array a = Seq (CCSequence.of_array a)
|
||||||
let empty = List []
|
|
||||||
|
|
||||||
let to_seq = function
|
let set_of_seq (type elt) ?(cmp=Pervasives.compare) seq =
|
||||||
|
let module S = CCSequence.Set.Make(struct
|
||||||
|
type t = elt
|
||||||
|
let compare = cmp
|
||||||
|
end) in
|
||||||
|
let set = S.of_seq seq in
|
||||||
|
Set ((module S), set)
|
||||||
|
|
||||||
|
let to_seq (type elt) = function
|
||||||
| Seq s -> s
|
| Seq s -> s
|
||||||
| List l -> (fun k -> List.iter k l)
|
| List l -> (fun k -> List.iter k l)
|
||||||
|
| Set (m, set) ->
|
||||||
|
let module S = (val m : CCSequence.Set.S
|
||||||
|
with type elt = elt and type t = 'b) in
|
||||||
|
S.to_seq set
|
||||||
|
|
||||||
let to_list = function
|
let to_list (type elt) = function
|
||||||
| Seq s -> CCSequence.to_list s
|
| Seq s -> CCSequence.to_list s
|
||||||
| List l -> l
|
| List l -> l
|
||||||
|
| Set (m, set) ->
|
||||||
|
let module S = (val m : CCSequence.Set.S
|
||||||
|
with type elt = elt and type t = 'b) in
|
||||||
|
S.elements set
|
||||||
|
|
||||||
let _fmap ~lst ~seq c = match c with
|
let _fmap ~lst ~seq c = match c with
|
||||||
| List l -> List (lst l)
|
| List l -> List (lst l)
|
||||||
| Seq s -> Seq (seq s)
|
| Seq s -> Seq (seq s)
|
||||||
|
| Set _ ->
|
||||||
|
List (lst (to_list c))
|
||||||
|
|
||||||
let _fold ~lst ~seq acc c = match c with
|
let fold (type elt) f acc c = match c with
|
||||||
| List l -> List.fold_left lst acc l
|
| List l -> List.fold_left f acc l
|
||||||
| Seq s -> CCSequence.fold seq acc s
|
| Seq s -> CCSequence.fold f acc s
|
||||||
|
| Set (m, set) ->
|
||||||
let iter f c = match c with
|
let module S = (val m : CCSequence.Set.S
|
||||||
| List l -> List.iter f l
|
with type elt = elt and type t = 'b) in
|
||||||
| Seq s -> s f
|
S.fold (fun x acc -> f acc x) set acc
|
||||||
|
|
||||||
let map f c =
|
let map f c =
|
||||||
_fmap ~lst:(List.map f) ~seq:(CCSequence.map f) c
|
_fmap ~lst:(List.map f) ~seq:(CCSequence.map f) c
|
||||||
|
|
@ -72,16 +94,79 @@ module Coll = struct
|
||||||
|
|
||||||
let flat_map f c =
|
let flat_map f c =
|
||||||
let c' = to_seq c in
|
let c' = to_seq c in
|
||||||
Seq (CCSequence.flatMap f c')
|
Seq (CCSequence.flatMap (fun x -> to_seq (f x)) c')
|
||||||
|
|
||||||
let filter_map f c =
|
let filter_map f c =
|
||||||
_fmap ~lst:(CCList.filter_map f) ~seq:(CCSequence.fmap f) c
|
_fmap ~lst:(CCList.filter_map f) ~seq:(CCSequence.fmap f) c
|
||||||
|
|
||||||
let size = function
|
let size (type elt) = function
|
||||||
| List l -> List.length l
|
| List l -> List.length l
|
||||||
| Seq s -> CCSequence.length s
|
| Seq s -> CCSequence.length s
|
||||||
|
| Set (m, set) ->
|
||||||
|
let module S = (val m : CCSequence.Set.S
|
||||||
|
with type elt = elt and type t = 'b) in
|
||||||
|
S.cardinal set
|
||||||
|
|
||||||
let fold f acc c = _fold ~lst:f ~seq:f acc c
|
let choose (type elt) = function
|
||||||
|
| List [] -> None
|
||||||
|
| List (x::_) -> Some x
|
||||||
|
| Seq s ->
|
||||||
|
begin match CCSequence.take 1 s |> CCSequence.to_list with
|
||||||
|
| [x] -> Some x
|
||||||
|
| _ -> None
|
||||||
|
end
|
||||||
|
| Set (m, set) ->
|
||||||
|
let module S = (val m : CCSequence.Set.S
|
||||||
|
with type elt = elt and type t = 'b) in
|
||||||
|
try Some (S.choose set) with Not_found -> None
|
||||||
|
|
||||||
|
let take n c =
|
||||||
|
_fmap ~lst:(CCList.take n) ~seq:(CCSequence.take n) c
|
||||||
|
|
||||||
|
exception MySurpriseExit
|
||||||
|
|
||||||
|
let _seq_take_while p seq k =
|
||||||
|
try
|
||||||
|
seq (fun x -> if not (p x) then k x else raise MySurpriseExit)
|
||||||
|
with MySurpriseExit -> ()
|
||||||
|
|
||||||
|
let take_while p c =
|
||||||
|
to_seq c |> _seq_take_while p |> of_seq
|
||||||
|
|
||||||
|
let distinct ~cmp c = set_of_seq ~cmp (to_seq c)
|
||||||
|
|
||||||
|
let sort cmp c = match c with
|
||||||
|
| List l -> List (List.sort cmp l)
|
||||||
|
| _ ->
|
||||||
|
to_seq c |> set_of_seq ~cmp
|
||||||
|
|
||||||
|
let search obj c =
|
||||||
|
let _search_seq obj seq =
|
||||||
|
let ret = ref None in
|
||||||
|
begin try
|
||||||
|
seq (fun x -> match obj#check x with
|
||||||
|
| SearchContinue -> ()
|
||||||
|
| SearchStop y -> ret := Some y; raise MySurpriseExit);
|
||||||
|
with MySurpriseExit -> ()
|
||||||
|
end;
|
||||||
|
match !ret with
|
||||||
|
| None -> obj#failure
|
||||||
|
| Some x -> x
|
||||||
|
in
|
||||||
|
to_seq c |> _search_seq obj
|
||||||
|
|
||||||
|
let contains (type elt) ~eq x c = match c with
|
||||||
|
| List l -> List.exists (eq x) l
|
||||||
|
| Seq s -> CCSequence.exists (eq x) s
|
||||||
|
| Set (m, set) ->
|
||||||
|
let module S = (val m : CCSequence.Set.S
|
||||||
|
with type elt = elt and type t = 'b) in
|
||||||
|
(* XXX: here we don't use the equality relation *)
|
||||||
|
try
|
||||||
|
let y = S.find x set in
|
||||||
|
assert (eq x y);
|
||||||
|
true
|
||||||
|
with Not_found -> false
|
||||||
end
|
end
|
||||||
|
|
||||||
type 'a collection = 'a Coll.t
|
type 'a collection = 'a Coll.t
|
||||||
|
|
@ -95,7 +180,14 @@ module Map = struct
|
||||||
to_seq : ('a * 'b) sequence;
|
to_seq : ('a * 'b) sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) seq =
|
type ('a, 'b) build = {
|
||||||
|
mutable cur : ('a, 'b) t;
|
||||||
|
add : 'a -> 'b -> unit;
|
||||||
|
update : 'a -> ('b option -> 'b option) -> unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
(* careful to use this map linearly *)
|
||||||
|
let make_hash (type key) ?(eq=(=)) ?(hash=Hashtbl.hash) () =
|
||||||
let module H = Hashtbl.Make(struct
|
let module H = Hashtbl.Make(struct
|
||||||
type t = key
|
type t = key
|
||||||
let equal = eq
|
let equal = eq
|
||||||
|
|
@ -103,57 +195,99 @@ module Map = struct
|
||||||
end) in
|
end) in
|
||||||
(* build table *)
|
(* build table *)
|
||||||
let tbl = H.create 32 in
|
let tbl = H.create 32 in
|
||||||
seq
|
let cur = {
|
||||||
(fun (k,v) ->
|
|
||||||
let l = try H.find tbl k with Not_found -> [] in
|
|
||||||
H.replace tbl k (v::l)
|
|
||||||
);
|
|
||||||
(* provide the multimap interface *)
|
|
||||||
let to_seq cont = H.iter (fun k v -> cont (k, Coll.of_list v)) tbl
|
|
||||||
in
|
|
||||||
{
|
|
||||||
is_empty = (fun () -> H.length tbl = 0);
|
is_empty = (fun () -> H.length tbl = 0);
|
||||||
size = (fun () -> H.length tbl);
|
size = (fun () -> H.length tbl);
|
||||||
get = (fun k ->
|
get = (fun k ->
|
||||||
try Some (Coll.of_list (H.find tbl k))
|
try Some (H.find tbl k)
|
||||||
with Not_found -> None);
|
with Not_found -> None);
|
||||||
fold = (fun f acc -> H.fold (fun k v acc -> f acc k (Coll.of_list v)) tbl acc);
|
fold = (fun f acc -> H.fold (fun k v acc -> f acc k v) tbl acc);
|
||||||
to_seq;
|
to_seq = (fun k -> H.iter (fun key v -> k (key,v)) tbl);
|
||||||
|
} in
|
||||||
|
{ cur;
|
||||||
|
add = (fun k v -> H.replace tbl k v);
|
||||||
|
update = (fun k f ->
|
||||||
|
match (try f (Some (H.find tbl k)) with Not_found -> f None) with
|
||||||
|
| None -> H.remove tbl k
|
||||||
|
| Some v' -> H.replace tbl k v');
|
||||||
}
|
}
|
||||||
|
|
||||||
let make_map (type key) (type value)
|
let make_cmp (type key) ?(cmp=Pervasives.compare) () =
|
||||||
?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) seq =
|
|
||||||
let module M = CCSequence.Map.Make(struct
|
let module M = CCSequence.Map.Make(struct
|
||||||
type t = key
|
type t = key
|
||||||
let compare = cmp_key
|
let compare = cmp
|
||||||
end) in
|
end) in
|
||||||
let module S = CCSequence.Set.Make(struct
|
let map = ref M.empty in
|
||||||
type t = value
|
let cur = {
|
||||||
let compare = cmp_val
|
is_empty = (fun () -> M.is_empty !map);
|
||||||
end) in
|
size = (fun () -> M.cardinal !map);
|
||||||
let _map_set set = Coll.of_seq (S.to_seq set) in
|
|
||||||
let map = CCSequence.fold
|
|
||||||
(fun map (k,v) ->
|
|
||||||
let set = try M.find k map with Not_found -> S.empty in
|
|
||||||
M.add k (S.add v set) map
|
|
||||||
) M.empty seq
|
|
||||||
in
|
|
||||||
let to_seq =
|
|
||||||
M.to_seq map |> CCSequence.map (fun (k,v) -> k, _map_set v)
|
|
||||||
in
|
|
||||||
{
|
|
||||||
is_empty = (fun () -> M.is_empty map);
|
|
||||||
size = (fun () -> M.cardinal map);
|
|
||||||
get = (fun k ->
|
get = (fun k ->
|
||||||
try Some (_map_set (M.find k map))
|
try Some (M.find k !map)
|
||||||
with Not_found -> None);
|
with Not_found -> None);
|
||||||
fold = (fun f acc ->
|
fold = (fun f acc ->
|
||||||
M.fold
|
M.fold
|
||||||
(fun key set acc -> f acc key (_map_set set)) map acc
|
(fun key set acc -> f acc key set) !map acc
|
||||||
);
|
);
|
||||||
to_seq;
|
to_seq = M.to_seq !map;
|
||||||
|
} in
|
||||||
|
{
|
||||||
|
cur;
|
||||||
|
add = (fun k v -> map := M.add k v !map);
|
||||||
|
update = (fun k f ->
|
||||||
|
match (try f (Some (M.find k !map)) with Not_found -> f None) with
|
||||||
|
| None -> map := M.remove k !map
|
||||||
|
| Some v' -> map := M.add k v' !map);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type 'a key_info = {
|
||||||
|
eq : 'a equal option;
|
||||||
|
cmp : 'a ord option;
|
||||||
|
hash : 'a hash option;
|
||||||
|
}
|
||||||
|
|
||||||
|
let default_key_info = {
|
||||||
|
eq=None; cmp=None; hash=None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let make_info info =
|
||||||
|
match info with
|
||||||
|
| { hash=None; _}
|
||||||
|
| { eq=None; _} ->
|
||||||
|
begin match info.cmp with
|
||||||
|
| None -> make_cmp ()
|
||||||
|
| Some cmp -> make_cmp ~cmp ()
|
||||||
|
end
|
||||||
|
| {eq=Some eq; hash=Some hash; _} -> make_hash ~eq ~hash ()
|
||||||
|
|
||||||
|
let multiset build seq =
|
||||||
|
seq (fun (k,v) ->
|
||||||
|
build.update k (function
|
||||||
|
| None -> Some [v]
|
||||||
|
| Some l -> Some (v::l)));
|
||||||
|
{ is_empty = build.cur.is_empty;
|
||||||
|
size = build.cur.size;
|
||||||
|
get = (fun k -> match build.cur.get k with
|
||||||
|
| None -> None
|
||||||
|
| Some v -> Some (Coll.of_list v));
|
||||||
|
fold = (fun f acc ->
|
||||||
|
build.cur.fold (fun acc k v -> f acc k (Coll.of_list v)) acc);
|
||||||
|
to_seq = build.cur.to_seq
|
||||||
|
|> CCSequence.map (fun (k,v) -> k,Coll.of_list v);
|
||||||
|
}
|
||||||
|
|
||||||
|
let multimap_cmp ?cmp seq =
|
||||||
|
let build = make_cmp ?cmp () in
|
||||||
|
multiset build seq
|
||||||
|
|
||||||
|
let count build seq =
|
||||||
|
seq (fun x ->
|
||||||
|
let n = match build.cur.get x with
|
||||||
|
| None -> 1
|
||||||
|
| Some n -> n+1
|
||||||
|
in
|
||||||
|
build.add x n);
|
||||||
|
build.cur
|
||||||
|
|
||||||
let get m x = m.get x
|
let get m x = m.get x
|
||||||
|
|
||||||
let get_exn m x =
|
let get_exn m x =
|
||||||
|
|
@ -164,26 +298,14 @@ module Map = struct
|
||||||
let size m = m.size ()
|
let size m = m.size ()
|
||||||
|
|
||||||
let to_seq m = m.to_seq
|
let to_seq m = m.to_seq
|
||||||
|
|
||||||
type 'a key_info = {
|
|
||||||
eq : 'a equal option;
|
|
||||||
cmp : 'a ord option;
|
|
||||||
hash : 'a hash option;
|
|
||||||
}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
(** {2 Query operators} *)
|
(** {2 Query operators} *)
|
||||||
|
|
||||||
type safe = Safe
|
|
||||||
type unsafe = Unsafe
|
|
||||||
type (_,_) safety =
|
type (_,_) safety =
|
||||||
| Safe : ('a, 'a option) safety
|
| Safe : ('a, 'a option) safety
|
||||||
| Unsafe : ('a, 'a) safety
|
| Unsafe : ('a, 'a) safety
|
||||||
|
|
||||||
type 'a search_result =
|
|
||||||
| SearchContinue
|
|
||||||
| SearchStop of 'a
|
|
||||||
|
|
||||||
type (_, _) unary =
|
type (_, _) unary =
|
||||||
| Map : ('a -> 'b) -> ('a collection, 'b collection) unary
|
| Map : ('a -> 'b) -> ('a collection, 'b collection) unary
|
||||||
| GeneralMap : ('a -> 'b) -> ('a, 'b) unary
|
| GeneralMap : ('a -> 'b) -> ('a, 'b) unary
|
||||||
|
|
@ -198,21 +320,21 @@ type (_, _) unary =
|
||||||
| Take : int -> ('a collection, 'a collection) unary
|
| Take : int -> ('a collection, 'a collection) unary
|
||||||
| TakeWhile : ('a -> bool) -> ('a collection, 'a collection) unary
|
| TakeWhile : ('a -> bool) -> ('a collection, 'a collection) unary
|
||||||
| Sort : 'a ord -> ('a collection, 'a collection) unary
|
| Sort : 'a ord -> ('a collection, 'a collection) unary
|
||||||
| Distinct : 'a ord option * 'a equal option * 'a hash option
|
| Distinct : 'a ord -> ('a collection, 'a collection) unary
|
||||||
-> ('a collection, 'a collection) unary
|
|
||||||
| Search :
|
| Search :
|
||||||
< check: ('a -> 'b search_result);
|
< check: ('a -> 'b search_result);
|
||||||
failure : 'b;
|
failure : 'b;
|
||||||
> -> ('a collection, 'b) unary
|
> -> ('a collection, 'b) unary
|
||||||
|
| Contains : 'a equal * 'a -> ('a collection, bool) unary
|
||||||
| Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary
|
| Get : ('b,'c) safety * 'a -> (('a,'b) Map.t, 'c) unary
|
||||||
| GroupBy : 'b ord * 'a ord * ('a -> 'b)
|
| GroupBy : 'b ord * ('a -> 'b)
|
||||||
-> ('a collection, ('b,'a collection) Map.t) unary
|
-> ('a collection, ('b,'a collection) Map.t) unary
|
||||||
| Count : 'a ord -> ('a collection, ('a, int) Map.t) unary
|
| Count : 'a ord -> ('a collection, ('a, int) Map.t) unary
|
||||||
|
|
||||||
type ('a,'b,'key,'c) join_descr = {
|
type ('a,'b,'key,'c) join_descr = {
|
||||||
join_key1 : 'a -> 'key;
|
join_key1 : 'a -> 'key;
|
||||||
join_key2 : 'b -> 'key;
|
join_key2 : 'b -> 'key;
|
||||||
join_merge : 'key -> 'a -> 'b -> 'c;
|
join_merge : 'key -> 'a -> 'b -> 'c option;
|
||||||
join_key : 'key Map.key_info;
|
join_key : 'key Map.key_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -234,7 +356,6 @@ type (_, _, _) binary =
|
||||||
| Product : ('a collection, 'b collection, ('a*'b) collection) binary
|
| Product : ('a collection, 'b collection, ('a*'b) collection) binary
|
||||||
| Append : ('a collection, 'a collection, 'a collection) binary
|
| Append : ('a collection, 'a collection, 'a collection) binary
|
||||||
| SetOp : set_op * 'a ord -> ('a collection, 'a collection, 'a collection) binary
|
| SetOp : set_op * 'a ord -> ('a collection, 'a collection, 'a collection) binary
|
||||||
| Inter : 'a ord -> ('a collection, 'a collection, 'a collection) binary
|
|
||||||
|
|
||||||
(* type of queries that return a 'a *)
|
(* type of queries that return a 'a *)
|
||||||
and 'a t =
|
and 'a t =
|
||||||
|
|
@ -258,12 +379,6 @@ let start_hashtbl h =
|
||||||
let start_seq seq =
|
let start_seq seq =
|
||||||
Start (Coll.of_seq seq)
|
Start (Coll.of_seq seq)
|
||||||
|
|
||||||
(** {6 Composition} *)
|
|
||||||
|
|
||||||
let apply u q = Unary (u, q)
|
|
||||||
|
|
||||||
let (>>>) = apply
|
|
||||||
|
|
||||||
(** {6 Execution} *)
|
(** {6 Execution} *)
|
||||||
|
|
||||||
let rec _optimize : type a. a t -> a t
|
let rec _optimize : type a. a t -> a t
|
||||||
|
|
@ -274,26 +389,98 @@ let rec _optimize : type a. a t -> a t
|
||||||
| Binary (b, q1, q2) ->
|
| Binary (b, q1, q2) ->
|
||||||
_optimize_binary b (_optimize q1) (_optimize q2)
|
_optimize_binary b (_optimize q1) (_optimize q2)
|
||||||
| QueryMap (f, q) -> QueryMap (f, _optimize q)
|
| QueryMap (f, q) -> QueryMap (f, _optimize q)
|
||||||
|
| Bind _ -> q (* cannot optimize before execution *)
|
||||||
and _optimize_unary : type a b. (a,b) unary -> a t -> b t
|
and _optimize_unary : type a b. (a,b) unary -> a t -> b t
|
||||||
= fun u q -> match u, q with
|
= fun u q -> match u, q with
|
||||||
| Map f, Unary (Map g, q') ->
|
| Map f, Unary (Map g, q') ->
|
||||||
_optimize_unary (Map (fun x -> f (g x))) q'
|
_optimize_unary (Map (fun x -> f (g x))) q'
|
||||||
|
| Filter p, Unary (Map f, cont) ->
|
||||||
|
_optimize_unary
|
||||||
|
(FilterMap (fun x -> let y = f x in if p y then Some y else None))
|
||||||
|
cont
|
||||||
|
| Map f, Unary (Filter p, cont) ->
|
||||||
|
_optimize_unary
|
||||||
|
(FilterMap (fun x -> if p x then Some (f x) else None))
|
||||||
|
cont
|
||||||
|
| Map f, Binary (Append, q1, q2) ->
|
||||||
|
_optimize_binary Append (Unary (u, q1)) (Unary (u, q2))
|
||||||
|
| Filter p, Binary (Append, q1, q2) ->
|
||||||
|
_optimize_binary Append (Unary (u, q1)) (Unary (u, q2))
|
||||||
|
| Fold (f,acc), Unary (Map f', cont) ->
|
||||||
|
_optimize_unary
|
||||||
|
(Fold ((fun acc x -> f acc (f' x)), acc))
|
||||||
|
cont
|
||||||
|
| Reduce (safety, start, mix, stop), Unary (Map f, cont) ->
|
||||||
|
_optimize_unary
|
||||||
|
(Reduce (safety,
|
||||||
|
(fun x -> start (f x)),
|
||||||
|
(fun x acc -> mix (f x) acc),
|
||||||
|
stop))
|
||||||
|
cont
|
||||||
|
| Size, Unary (Map _, cont) ->
|
||||||
|
_optimize_unary Size cont (* ignore the map! *)
|
||||||
|
| Size, Unary (Sort _, cont) ->
|
||||||
|
_optimize_unary Size cont
|
||||||
| _ -> Unary (u,q)
|
| _ -> Unary (u,q)
|
||||||
(* TODO *)
|
(* TODO: other cases *)
|
||||||
and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t
|
and _optimize_binary : type a b c. (a,b,c) binary -> a t -> b t -> c t
|
||||||
= fun b q1 q2 -> match b, q1, q2 with
|
= fun b q1 q2 -> match b, q1, q2 with
|
||||||
| _ -> Binary (b, q1, q2) (* TODO *)
|
| _ -> Binary (b, q1, q2) (* TODO *)
|
||||||
|
|
||||||
|
|
||||||
(* apply a unary operator on a collection *)
|
(* apply a unary operator on a collection *)
|
||||||
let _do_unary : type a b. (a,b) unary -> a -> b
|
let _do_unary : type a b. (a,b) unary -> a -> b
|
||||||
= fun u c -> match u with
|
= fun u c -> match u with
|
||||||
| Map f -> Coll.map f c
|
| Map f -> Coll.map f c
|
||||||
|
| GeneralMap f -> f c
|
||||||
| Filter p -> Coll.filter p c
|
| Filter p -> Coll.filter p c
|
||||||
| Fold (f, acc) -> Coll.fold f acc c (* TODO: optimize *)
|
| Fold (f, acc) -> Coll.fold f acc c
|
||||||
|
| Reduce (safety, start, mix, stop) ->
|
||||||
|
let acc = Coll.to_seq c
|
||||||
|
|> CCSequence.fold
|
||||||
|
(fun acc x -> match acc with
|
||||||
|
| None -> Some (start x)
|
||||||
|
| Some acc -> Some (mix x acc)
|
||||||
|
) None
|
||||||
|
in
|
||||||
|
begin match acc, safety with
|
||||||
|
| Some x, Safe -> Some (stop x)
|
||||||
|
| None, Safe -> None
|
||||||
|
| Some x, Unsafe -> stop x
|
||||||
|
| None, Unsafe -> invalid_arg "reduce: empty collection"
|
||||||
|
end
|
||||||
|
| Size -> Coll.size c
|
||||||
|
| Choose Safe -> Coll.choose c
|
||||||
|
| Choose Unsafe ->
|
||||||
|
begin match Coll.choose c with
|
||||||
|
| Some x -> x
|
||||||
|
| None -> invalid_arg "choose: empty collection"
|
||||||
|
end
|
||||||
|
| FilterMap f -> Coll.filter_map f c
|
||||||
|
| FlatMap f -> Coll.flat_map f c
|
||||||
|
| Take n -> Coll.take n c
|
||||||
|
| TakeWhile p -> Coll.take_while p c
|
||||||
|
| Sort cmp -> Coll.sort cmp c
|
||||||
|
| Distinct cmp -> Coll.distinct ~cmp c
|
||||||
|
| Search obj -> Coll.search obj c
|
||||||
|
| Get (Safe, k) -> Map.get c k
|
||||||
|
| Get (Unsafe, k) -> Map.get_exn c k
|
||||||
|
| GroupBy (cmp,f) ->
|
||||||
|
Coll.to_seq c
|
||||||
|
|> CCSequence.map (fun x -> f x, x)
|
||||||
|
|> Map.multimap_cmp ~cmp
|
||||||
|
| Contains (eq, x) -> Coll.contains ~eq x c
|
||||||
|
| Count cmp ->
|
||||||
|
Coll.to_seq c
|
||||||
|
|> Map.count (Map.make_cmp ~cmp ())
|
||||||
|
|
||||||
|
|
||||||
(* TODO: join of two collections *)
|
(* TODO: join of two collections *)
|
||||||
let _do_join ~join c1 c2 =
|
let _do_join ~join c1 c2 =
|
||||||
|
let _build = Map.make_info join.join_key in
|
||||||
|
assert false
|
||||||
|
|
||||||
|
(* TODO *)
|
||||||
|
let _do_group_join ~gjoin c1 c2 =
|
||||||
assert false
|
assert false
|
||||||
|
|
||||||
let _do_product c1 c2 =
|
let _do_product c1 c2 =
|
||||||
|
|
@ -303,7 +490,20 @@ let _do_product c1 c2 =
|
||||||
let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c
|
let _do_binary : type a b c. (a, b, c) binary -> a -> b -> c
|
||||||
= fun b c1 c2 -> match b with
|
= fun b c1 c2 -> match b with
|
||||||
| Join join -> _do_join ~join c1 c2
|
| Join join -> _do_join ~join c1 c2
|
||||||
|
| GroupJoin gjoin -> _do_group_join ~gjoin c1 c2
|
||||||
| Product -> _do_product c1 c2
|
| Product -> _do_product c1 c2
|
||||||
|
| Append ->
|
||||||
|
Coll.of_seq (CCSequence.append (Coll.to_seq c1) (Coll.to_seq c2))
|
||||||
|
| SetOp (Inter,cmp) ->
|
||||||
|
(* use a join *)
|
||||||
|
_do_join ~join:{
|
||||||
|
join_key1=_id;
|
||||||
|
join_key2=_id;
|
||||||
|
join_merge=(fun _ a b -> Some a);
|
||||||
|
join_key=Map.({default_key_info with cmp=Some cmp; })
|
||||||
|
} c1 c2
|
||||||
|
| SetOp (Union,cmp) -> failwith "union: not implemented" (* TODO *)
|
||||||
|
| SetOp (Diff,cmp) -> failwith "diff: not implemented" (* TODO *)
|
||||||
|
|
||||||
let rec _run : type a. opt:bool -> a t -> a
|
let rec _run : type a. opt:bool -> a t -> a
|
||||||
= fun ~opt q -> match q with
|
= fun ~opt q -> match q with
|
||||||
|
|
@ -326,8 +526,6 @@ let map f q = Unary (Map f, q)
|
||||||
|
|
||||||
let filter p q = Unary (Filter p, q)
|
let filter p q = Unary (Filter p, q)
|
||||||
|
|
||||||
let size q = Unary (Size, q)
|
|
||||||
|
|
||||||
let choose q = Unary (Choose Safe, q)
|
let choose q = Unary (Choose Safe, q)
|
||||||
|
|
||||||
let choose_exn q = Unary (Choose Unsafe, q)
|
let choose_exn q = Unary (Choose Unsafe, q)
|
||||||
|
|
@ -346,8 +544,8 @@ let take_while p q = Unary (TakeWhile p, q)
|
||||||
|
|
||||||
let sort ~cmp q = Unary (Sort cmp, q)
|
let sort ~cmp q = Unary (Sort cmp, q)
|
||||||
|
|
||||||
let distinct ?cmp ?eq ?hash () q =
|
let distinct ?(cmp=Pervasives.compare) () q =
|
||||||
Unary (Distinct (cmp,eq,hash), q)
|
Unary (Distinct cmp, q)
|
||||||
|
|
||||||
let get key q =
|
let get key q =
|
||||||
Unary (Get (Safe, key), q)
|
Unary (Get (Safe, key), q)
|
||||||
|
|
@ -366,8 +564,8 @@ let map_to_seq_flatten q =
|
||||||
in
|
in
|
||||||
Unary (GeneralMap f, q)
|
Unary (GeneralMap f, q)
|
||||||
|
|
||||||
let group_by ?(cmp_key=Pervasives.compare) ?(cmp_val=Pervasives.compare) f q =
|
let group_by ?(cmp=Pervasives.compare) f q =
|
||||||
Unary (GroupBy (cmp_key,cmp_val,f), q)
|
Unary (GroupBy (cmp,f), q)
|
||||||
|
|
||||||
let count ?(cmp=Pervasives.compare) () q =
|
let count ?(cmp=Pervasives.compare) () q =
|
||||||
Unary (Count cmp, q)
|
Unary (Count cmp, q)
|
||||||
|
|
@ -401,6 +599,15 @@ let max_exn q = Unary (Reduce (Unsafe, _id, Pervasives.max, _id), q)
|
||||||
let min_exn q = Unary (Reduce (Unsafe, _id, Pervasives.min, _id), q)
|
let min_exn q = Unary (Reduce (Unsafe, _id, Pervasives.min, _id), q)
|
||||||
let average_exn q = Unary (Reduce (Unsafe, _avg_start, _avg_mix, _avg_stop), q)
|
let average_exn q = Unary (Reduce (Unsafe, _avg_start, _avg_mix, _avg_stop), q)
|
||||||
|
|
||||||
|
let is_empty q =
|
||||||
|
Unary (Search (object
|
||||||
|
method check _ = SearchStop false (* stop in case there is an element *)
|
||||||
|
method failure = true
|
||||||
|
end), q)
|
||||||
|
|
||||||
|
let contains ?(eq=(=)) x q =
|
||||||
|
Unary (Contains (eq, x), q)
|
||||||
|
|
||||||
let for_all p q =
|
let for_all p q =
|
||||||
Unary (Search (object
|
Unary (Search (object
|
||||||
method check x = if p x then SearchContinue else SearchStop false
|
method check x = if p x then SearchContinue else SearchStop false
|
||||||
|
|
|
||||||
|
|
@ -113,8 +113,7 @@ val take_while : ('a -> bool) -> 'a collection t -> 'a collection t
|
||||||
val sort : cmp:'a ord -> 'a collection t -> 'a collection t
|
val sort : cmp:'a ord -> 'a collection t -> 'a collection t
|
||||||
(** Sort items by the given comparison function *)
|
(** Sort items by the given comparison function *)
|
||||||
|
|
||||||
val distinct : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash ->
|
val distinct : ?cmp:'a ord -> unit -> 'a collection t -> 'a collection t
|
||||||
unit -> 'a collection t -> 'a collection t
|
|
||||||
(** Remove duplicate elements from the input collection.
|
(** Remove duplicate elements from the input collection.
|
||||||
All elements in the result are distinct. *)
|
All elements in the result are distinct. *)
|
||||||
|
|
||||||
|
|
@ -135,7 +134,7 @@ val map_to_seq_flatten : ('a,'b collection) Map.t t -> ('a*'b) collection t
|
||||||
|
|
||||||
(** {6 Aggregation} *)
|
(** {6 Aggregation} *)
|
||||||
|
|
||||||
val group_by : ?cmp_key:'b ord -> ?cmp_val:'a ord ->
|
val group_by : ?cmp:'b ord ->
|
||||||
('a -> 'b) -> 'a collection t -> ('b,'a collection) Map.t t
|
('a -> 'b) -> 'a collection t -> ('b,'a collection) Map.t t
|
||||||
(** [group_by f] takes a collection [c] as input, and returns
|
(** [group_by f] takes a collection [c] as input, and returns
|
||||||
a multimap [m] such that for each [x] in [c],
|
a multimap [m] such that for each [x] in [c],
|
||||||
|
|
@ -163,8 +162,12 @@ val reduce_exn : ('a -> 'b) -> ('a -> 'b -> 'b) -> ('b -> 'c) ->
|
||||||
(** Same as {!reduce} but fails on empty collections.
|
(** Same as {!reduce} but fails on empty collections.
|
||||||
@raise Invalid_argument if the collection is empty *)
|
@raise Invalid_argument if the collection is empty *)
|
||||||
|
|
||||||
|
val is_empty : 'a collection t -> bool t
|
||||||
|
|
||||||
val sum : int collection t -> int t
|
val sum : int collection t -> int t
|
||||||
|
|
||||||
|
val contains : ?eq:'a equal -> 'a -> 'a collection t -> bool t
|
||||||
|
|
||||||
val average : int collection t -> int option t
|
val average : int collection t -> int option t
|
||||||
val max : int collection t -> int option t
|
val max : int collection t -> int option t
|
||||||
val min : int collection t -> int option t
|
val min : int collection t -> int option t
|
||||||
|
|
@ -182,13 +185,14 @@ val find_map : ('a -> 'b option) -> 'a collection t -> 'b option t
|
||||||
|
|
||||||
val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash ->
|
val join : ?cmp:'key ord -> ?eq:'key equal -> ?hash:'key hash ->
|
||||||
('a -> 'key) -> ('b -> 'key) ->
|
('a -> 'key) -> ('b -> 'key) ->
|
||||||
merge:('key -> 'a -> 'b -> 'c) ->
|
merge:('key -> 'a -> 'b -> 'c option) ->
|
||||||
'a collection t -> 'b collection t -> 'c collection t
|
'a collection t -> 'b collection t -> 'c collection t
|
||||||
(** [join key1 key2 ~merge] is a binary operation
|
(** [join key1 key2 ~merge] is a binary operation
|
||||||
that takes two collections [a] and [b], projects their
|
that takes two collections [a] and [b], projects their
|
||||||
elements resp. with [key1] and [key2], and combine
|
elements resp. with [key1] and [key2], and combine
|
||||||
values [(x,y)] from [(a,b)] with the same [key]
|
values [(x,y)] from [(a,b)] with the same [key]
|
||||||
using [merge]. *)
|
using [merge]. If [merge] returns [None], the combination
|
||||||
|
of values is discarded. *)
|
||||||
|
|
||||||
val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash ->
|
val group_join : ?cmp:'a ord -> ?eq:'a equal -> ?hash:'a hash ->
|
||||||
('b -> 'a) -> 'a collection t -> 'b collection t ->
|
('b -> 'a) -> 'a collection t -> 'b collection t ->
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue