merge from master

This commit is contained in:
Simon Cruanes 2014-06-28 04:01:25 +02:00
commit eeca970d0b
10 changed files with 746 additions and 370 deletions

View file

@ -60,6 +60,7 @@ structures comprise (some modules in `misc/`, some other in `core/`):
- `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists)
- small modules (basic types, utilities):
- `CCInt`
- `CCString` (basic string operations)
- `CCPair` (cartesian products)
- `CCOpt` (options)
- `CCFun` (function combinators)
@ -70,6 +71,12 @@ structures comprise (some modules in `misc/`, some other in `core/`):
- `CCHash` (hashing combinators)
- `CCError` (monadic error handling)
### String
In the module `Containers_string`:
- `Levenshtein`: edition distance between two strings
- `KMP`: Knuth-Morris-Pratt substring algorithm
### Misc
- `PHashtbl`, a polymorphic hashtable (with open addressing)

4
_oasis
View file

@ -47,13 +47,13 @@ Library "containers"
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError,
CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash,
CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd,
CCRandom, CCLinq, CCKTree, CCTrie
CCRandom, CCLinq, CCKTree, CCTrie, CCString
FindlibName: containers
Library "containers_string"
Path: string
Pack: true
Modules: KMP, CCString, Levenshtein
Modules: KMP, Levenshtein
FindlibName: string
FindlibParent: containers

View file

@ -57,6 +57,10 @@ module type S = sig
val iteri : (int -> 'a -> unit) -> 'a t -> unit
val blit : 'a t -> int -> 'a t -> int -> int -> unit
(** [blit from i into j len] copies [len] elements from the first array
to the second. See {!Array.blit}. *)
val reverse_in_place : 'a t -> unit
(** Reverse the array in place *)
@ -92,16 +96,16 @@ module type S = sig
(** {2 IO} *)
val pp: ?sep:string -> (Buffer.t -> 'a -> unit)
-> Buffer.t -> 'a t -> unit
val pp: ?sep:string -> (Buffer.t -> 'a -> unit) ->
Buffer.t -> 'a t -> unit
(** print an array of items with printing function *)
val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit)
-> Buffer.t -> 'a t -> unit
val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) ->
Buffer.t -> 'a t -> unit
(** print an array, giving the printing function both index and item *)
val print : ?sep:string -> (Format.formatter -> 'a -> unit)
-> Format.formatter -> 'a t -> unit
val print : ?sep:string -> (Format.formatter -> 'a -> unit) ->
Format.formatter -> 'a t -> unit
(** print an array of items with printing function *)
end
@ -112,13 +116,13 @@ Most of those functions that a range [(i,j)] with
let rec _foldi f acc a i j =
if i = j then acc else _foldi f (f acc i a.(i)) a (i+1) j
let _reverse_in_place a i j =
if i=j then ()
let _reverse_in_place a i ~len =
if len=0 then ()
else
for k = i to (j-1)/2 do
let t = a.(k) in
a.(k) <- a.(j-1-k);
a.(j-1-k) <- t;
for k = 0 to (len-1)/2 do
let t = a.(i+k) in
a.(i+k) <- a.(i+len-1-k);
a.(i+len-1-k) <- t;
done
let rec _equal eq a1 i1 j1 a2 i2 j2 =
@ -135,9 +139,16 @@ let rec _compare cmp a1 i1 j1 a2 i2 j2 =
else
let c = cmp a1.(i1) a2.(i2) in
if c = 0
then _compare cmp a1 (i1+1) j1 a2 (i2+2) j2
then _compare cmp a1 (i1+1) j1 a2 (i2+1) j2
else c
(*$T
compare CCOrd.compare [| 1; 2; 3 |] [| 1; 2; 3 |] = 0
compare CCOrd.compare [| 1; 2; 3 |] [| 2; 2; 3 |] < 0
compare CCOrd.compare [| 1; 2; |] [| 1; 2; 3 |] < 0
compare CCOrd.compare [| 1; 2; 3 |] [| 1; 2; |] > 0
*)
let rec _find f a i j =
if i = j then None
else match f a.(i) with
@ -150,11 +161,11 @@ let rec _for_all p a i j =
let rec _exists p a i j =
i <> j && (p a.(i) || _exists p a (i+1) j)
let rec _for_all2 p a1 a2 i1 i2 j1 =
i1 = j1 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) j1)
let rec _for_all2 p a1 a2 i1 i2 ~len =
len=0 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) ~len:(len-1))
let rec _exists2 p a1 a2 i1 i2 j1 =
i1 <> j1 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) j1)
let rec _exists2 p a1 a2 i1 i2 ~len =
len>0 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) ~len:(len-1))
(* shuffle a[i...j[ using the given int random generator
See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *)
@ -228,6 +239,8 @@ let iter = Array.iter
let iteri = Array.iteri
let blit = Array.blit
let reverse_in_place a =
_reverse_in_place a 0 (Array.length a)
@ -303,12 +316,10 @@ let exists p a = _exists p a 0 (Array.length a)
let for_all2 p a b =
Array.length a = Array.length b
&&
_for_all2 p a b 0 0 (Array.length a)
_for_all2 p a b 0 0 ~len:(Array.length a)
let exists2 p a b =
Array.length a = Array.length b
&&
_exists2 p a b 0 0 (Array.length a)
_exists2 p a b 0 0 ~len:(min (Array.length a) (Array.length b))
let (--) i j =
if i<=j
@ -374,11 +385,13 @@ module Sub = struct
}
let make arr i ~len =
if i+len > Array.length arr then invalid_arg "Array.Sub.make";
if i<0||i+len > Array.length arr then invalid_arg "Array.Sub.make";
{ arr; i; j=i+len; }
let of_slice (arr,i,len) = make arr i ~len
let to_slice a = a.arr, a.i, a.j-a.i
let full arr = { arr; i=0; j=Array.length arr; }
let underlying a = a.arr
@ -419,7 +432,16 @@ module Sub = struct
let iteri f a =
for k=0 to length a-1 do f k a.arr.(a.i + k) done
let reverse_in_place a = _reverse_in_place a.arr a.i a.j
let blit a i b j len =
if i+len>length a || j+len>length b then invalid_arg "Array.Sub.blit";
Array.blit a.arr (a.i+i) b.arr (b.i+j) len
let reverse_in_place a = _reverse_in_place a.arr a.i ~len:(length a)
(*$T
let a = 1--6 in let s = Sub.make a 2 ~len:3 in \
Sub.reverse_in_place s; a = [| 1; 2; 5; 4; 3; 6 |]
*)
let find f a = _find f a.arr a.i a.j
@ -428,10 +450,14 @@ module Sub = struct
let exists p a = _exists p a.arr a.i a.j
let for_all2 p a b =
length a = length b && _for_all2 p a.arr b.arr a.i b.i b.j
length a = length b && _for_all2 p a.arr b.arr a.i b.i ~len:(length a)
let exists2 p a b =
length a = length b && _exists2 p a.arr b.arr a.i b.i a.j
_exists2 p a.arr b.arr a.i b.i ~len:(min (length a) (length b))
(*$T
Sub.exists2 (=) (Sub.make [| 1;2;3;4 |] 1 ~len:2) (Sub.make [| 0;1;3;4 |] 1 ~len:3)
*)
let shuffle a =
_shuffle Random.int a.arr a.i a.j

View file

@ -59,6 +59,10 @@ module type S = sig
val iteri : (int -> 'a -> unit) -> 'a t -> unit
val blit : 'a t -> int -> 'a t -> int -> int -> unit
(** [blit from i into j len] copies [len] elements from the first array
to the second. See {!Array.blit}. *)
val reverse_in_place : 'a t -> unit
(** Reverse the array in place *)
@ -94,16 +98,16 @@ module type S = sig
(** {2 IO} *)
val pp: ?sep:string -> (Buffer.t -> 'a -> unit)
-> Buffer.t -> 'a t -> unit
val pp: ?sep:string -> (Buffer.t -> 'a -> unit) ->
Buffer.t -> 'a t -> unit
(** print an array of items with printing function *)
val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit)
-> Buffer.t -> 'a t -> unit
val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) ->
Buffer.t -> 'a t -> unit
(** print an array, giving the printing function both index and item *)
val print : ?sep:string -> (Format.formatter -> 'a -> unit)
-> Format.formatter -> 'a t -> unit
val print : ?sep:string -> (Format.formatter -> 'a -> unit) ->
Format.formatter -> 'a t -> unit
(** print an array of items with printing function *)
end
@ -159,6 +163,10 @@ module Sub : sig
[i] the offset in [arr], and [len] the number of elements of the slice.
@raise Invalid_argument if the slice isn't valid (See {!make}) *)
val to_slice : 'a t -> ('a array * int * int)
(** Convert into a triple [(arr, i, len)] where [len] is the length of
the subarray of [arr] starting at offset [i] *)
val full : 'a array -> 'a t
(** Slice that covers the full array *)

View file

@ -26,6 +26,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(** {1 Persistent hash-table on top of OCaml's hashtables} *)
type 'a sequence = ('a -> unit) -> unit
type 'a printer = Buffer.t -> 'a -> unit
type 'a formatter = Format.formatter -> 'a -> unit
type 'a equal = 'a -> 'a -> bool
module type HashedType = sig
type t
@ -40,52 +43,92 @@ module type S = sig
type 'a t
val empty : unit -> 'a t
(** Empty table. The table will be allocated at the first binding *)
(** Empty table. The table will be allocated at the first binding *)
val create : int -> 'a t
(** Create a new hashtable *)
(** Create a new hashtable, with the given initial capacity *)
val is_empty : 'a t -> bool
(** Is the table empty? *)
(** Is the table empty? *)
val find : 'a t -> key -> 'a
(** Find the value for this key, or raise Not_found *)
(** Find the value for this key, or fails
@raise Not_found if the key is not present in the table *)
val get_exn : key -> 'a t -> 'a
(** Synonym to {!find} with flipped arguments *)
val get : key -> 'a t -> 'a option
(** Safe version of !{get_exn} *)
val mem : 'a t -> key -> bool
(** Is the key bound? *)
(** Is the key bound? *)
val length : 'a t -> int
(** Number of bindings *)
val length : _ t -> int
(** Number of bindings *)
val replace : 'a t -> key -> 'a -> 'a t
(** Add the binding to the table, returning a new table. This erases
the current binding for [key], if any. *)
(** Add the binding to the table, returning a new table. This erases
the current binding for [key], if any. *)
val update : 'a t -> key -> ('a option -> 'a option) -> 'a t
(** [update tbl key f] calls [f None] if [key] doesn't belong in [tbl],
[f (Some v)] if [key -> v] otherwise; If [f] returns [None] then
[key] is removed, else it returns [Some v'] and [key -> v'] is added. *)
val remove : 'a t -> key -> 'a t
(** Remove the key *)
(** Remove the key *)
val copy : 'a t -> 'a t
(** Fresh copy of the table; the underlying structure is not shared
anymore, so using both tables alternatively will be efficient *)
(** Fresh copy of the table; the underlying structure is not shared
anymore, so using both tables alternatively will be efficient *)
val merge : (key -> 'a option -> 'a option -> 'a option) -> 'a t -> 'a t -> 'a t
(** Merge two tables together into a new table *)
val merge : (key -> 'a option -> 'a option -> 'a option) ->
'a t -> 'a t -> 'a t
(** Merge two tables together into a new table. The function's argument
correspond to values associated with the key (if present); if the
function returns [None] the key will not appear in the result. *)
val iter : 'a t -> (key -> 'a -> unit) -> unit
(** Iterate over bindings *)
(** Iterate over bindings *)
val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b
(** Fold over bindings *)
(** Fold over bindings *)
val of_seq : ?init:'a t -> (key * 'a) sequence -> 'a t
(** Add (replace) bindings from the sequence to the table *)
val map : (key -> 'a -> 'b) -> 'a t -> 'b t
(** Map all values *)
val of_list : ?init:'a t -> (key * 'a) list -> 'a t
val filter : (key -> 'a -> bool) -> 'a t -> 'a t
val filter_map : (key -> 'a -> 'b option) -> 'a t -> 'b t
val for_all : (key -> 'a -> bool) -> 'a t -> bool
val exists : (key -> 'a -> bool) -> 'a t -> bool
(** {3 Conversions} *)
val of_seq : (key * 'a) sequence -> 'a t
(** Add (replace) bindings from the sequence to the table *)
val of_list : (key * 'a) list -> 'a t
val add_seq : 'a t -> (key * 'a) sequence -> 'a t
val add_list : 'a t -> (key * 'a) list -> 'a t
val to_seq : 'a t -> (key * 'a) sequence
(** Sequence of the bindings of the table *)
(** Sequence of the bindings of the table *)
val to_list : 'a t -> (key * 'a) list
(** {3 Misc} *)
val equal : 'a equal -> 'a t equal
val pp : key printer -> 'a printer -> 'a t printer
val print : key formatter -> 'a formatter -> 'a t formatter
end
(** {2 Implementation} *)
@ -107,64 +150,55 @@ module Make(H : HashedType) : S with type key = H.t = struct
let empty () = create 11
(** Reroot: modify the zipper so that the current node is a proper
hashtable, and return the hashtable *)
let reroot t =
(* pass continuation to get a tailrec rerooting *)
let rec reroot t k = match !t with
| Table tbl -> k tbl (* done *)
| Add (key, v, t') ->
reroot t'
(fun tbl ->
t' := Remove (key, t);
Table.add tbl key v;
t := Table tbl;
k tbl)
| Replace (key, v, t') ->
reroot t'
(fun tbl ->
let v' = Table.find tbl key in
t' := Replace (key, v', t);
t := Table tbl;
Table.replace tbl key v;
k tbl)
| Remove (key, t') ->
reroot t'
(fun tbl ->
let v = Table.find tbl key in
t' := Add (key, v, t);
t := Table tbl;
Table.remove tbl key;
k tbl)
in
match !t with
(* pass continuation to get a tailrec rerooting *)
let rec _reroot t k = match !t with
| Table tbl -> k tbl (* done *)
| Add (key, v, t') ->
_reroot t'
(fun tbl ->
t' := Remove (key, t);
Table.add tbl key v;
t := Table tbl;
k tbl)
| Replace (key, v, t') ->
_reroot t'
(fun tbl ->
let v' = Table.find tbl key in
t' := Replace (key, v', t);
t := Table tbl;
Table.replace tbl key v;
k tbl)
| Remove (key, t') ->
_reroot t'
(fun tbl ->
let v = Table.find tbl key in
t' := Add (key, v, t);
t := Table tbl;
Table.remove tbl key;
k tbl)
(* Reroot: modify the zipper so that the current node is a proper
hashtable, and return the hashtable *)
let reroot t = match !t with
| Table tbl -> tbl
| _ -> reroot t (fun x -> x)
| _ -> _reroot t (fun x -> x)
let is_empty t =
match !t with
| Table tbl -> Table.length tbl = 0
| _ -> Table.length (reroot t) = 0
let is_empty t = Table.length (reroot t) = 0
let find t k =
match !t with
| Table tbl -> Table.find tbl k
| _ -> Table.find (reroot t) k
let find t k = Table.find (reroot t) k
let mem t k =
match !t with
| Table tbl -> Table.mem tbl k
| _ -> Table.mem (reroot t) k
let get_exn k t = find t k
let length t =
match !t with
| Table tbl -> Table.length tbl
| _ -> Table.length (reroot t)
let get k t =
try Some (find t k)
with Not_found -> None
let mem t k = Table.mem (reroot t) k
let length t = Table.length (reroot t)
let replace t k v =
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t in
let tbl = reroot t in
(* create the new hashtable *)
let t' = ref (Table tbl) in
(* update [t] to point to the new hashtable *)
@ -172,15 +206,14 @@ module Make(H : HashedType) : S with type key = H.t = struct
let v' = Table.find tbl k in
t := Replace (k, v', t')
with Not_found ->
t := Remove (k, t'));
t := Remove (k, t')
);
(* modify the underlying hashtable *)
Table.replace tbl k v;
t'
let remove t k =
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t in
let tbl = reroot t in
try
let v' = Table.find tbl k in
(* value present, make a new hashtable without this value *)
@ -192,26 +225,63 @@ module Make(H : HashedType) : S with type key = H.t = struct
(* not member, nothing to do *)
t
let update t k f =
let v = get k t in
match v, f v with
| None, None -> t (* no change *)
| Some _, None -> remove t k
| _, Some v' -> replace t k v'
let copy t =
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t in
let tbl = reroot t in
(* no one will point to the new [t] *)
let t = ref (Table (Table.copy tbl)) in
t
let iter t f =
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t in
let tbl = reroot t in
Table.iter f tbl
let fold f acc t =
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t in
let tbl = reroot t in
Table.fold (fun k v acc -> f acc k v) tbl acc
let map f t =
let tbl = reroot t in
let res = Table.create (Table.length tbl) in
Table.iter (fun k v -> Table.replace res k (f k v)) tbl;
ref (Table res)
let filter p t =
let tbl = reroot t in
let res = Table.create (Table.length tbl) in
Table.iter (fun k v -> if p k v then Table.replace res k v) tbl;
ref (Table res)
let filter_map f t =
let tbl = reroot t in
let res = Table.create (Table.length tbl) in
Table.iter
(fun k v -> match f k v with
| None -> ()
| Some v' -> Table.replace res k v'
) tbl;
ref (Table res)
exception ExitPTbl
let for_all p t =
try
iter t (fun k v -> if not (p k v) then raise ExitPTbl);
true
with ExitPTbl -> false
let exists p t =
try
iter t (fun k v -> if p k v then raise ExitPTbl);
false
with ExitPTbl -> true
let merge f t1 t2 =
let tbl = Table.create (max (length t1) (length t2)) in
iter t1
@ -227,15 +297,17 @@ module Make(H : HashedType) : S with type key = H.t = struct
| Some v' -> Table.replace tbl k v2);
ref (Table tbl)
let of_seq ?init seq =
let tbl = match init with
| None -> Table.create 5
| Some t -> Table.copy (reroot t) in
seq (fun (k,v) -> Table.replace tbl k v);
ref (Table tbl)
let add_seq init seq =
let tbl = ref init in
seq (fun (k,v) -> tbl := replace !tbl k v);
!tbl
let of_list ?init l =
of_seq ?init (fun k -> List.iter k l)
let of_seq seq = add_seq (empty ()) seq
let add_list init l =
add_seq init (fun k -> List.iter k l)
let of_list l = add_list (empty ()) l
let to_list t =
let tbl = reroot t in
@ -244,10 +316,36 @@ module Make(H : HashedType) : S with type key = H.t = struct
let to_seq t =
fun k ->
let tbl = match !t with
| Table tbl -> tbl
| _ -> reroot t
in
let tbl = reroot t in
Table.iter (fun x y -> k (x,y)) tbl
let equal eq t1 t2 =
length t1 = length t2
&&
for_all
(fun k v -> match get k t2 with
| None -> false
| Some v' -> eq v v'
) t1
let pp pp_k pp_v buf t =
Buffer.add_string buf "{";
let first = ref true in
iter t
(fun k v ->
if !first then first:=false else Buffer.add_string buf ", ";
Printf.bprintf buf "%a -> %a" pp_k k pp_v v
);
Buffer.add_string buf "}"
let print pp_k pp_v fmt t =
Format.pp_print_string fmt "{";
let first = ref true in
iter t
(fun k v ->
if !first then first:=false else Format.pp_print_string fmt ", ";
Format.fprintf fmt "%a -> %a" pp_k k pp_v v
);
Format.pp_print_string fmt "}"
end

View file

@ -23,9 +23,19 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Persistent hash-table on top of OCaml's hashtables} *)
(** {1 Persistent hash-table on top of OCaml's hashtables}
Almost as efficient as the regular Hashtbl type, but with a persistent
interface (rewinding changes to get back in the past history). This is
mostly useful for backtracking-like uses, or forward uses (never using
old values).
This module is not thread-safe. *)
type 'a sequence = ('a -> unit) -> unit
type 'a printer = Buffer.t -> 'a -> unit
type 'a formatter = Format.formatter -> 'a -> unit
type 'a equal = 'a -> 'a -> bool
module type HashedType = sig
type t
@ -40,52 +50,92 @@ module type S = sig
type 'a t
val empty : unit -> 'a t
(** Empty table. The table will be allocated at the first binding *)
(** Empty table. The table will be allocated at the first binding *)
val create : int -> 'a t
(** Create a new hashtable *)
(** Create a new hashtable, with the given initial capacity *)
val is_empty : 'a t -> bool
(** Is the table empty? *)
(** Is the table empty? *)
val find : 'a t -> key -> 'a
(** Find the value for this key, or raise Not_found *)
(** Find the value for this key, or fails
@raise Not_found if the key is not present in the table *)
val get_exn : key -> 'a t -> 'a
(** Synonym to {!find} with flipped arguments *)
val get : key -> 'a t -> 'a option
(** Safe version of !{get_exn} *)
val mem : 'a t -> key -> bool
(** Is the key bound? *)
(** Is the key bound? *)
val length : 'a t -> int
(** Number of bindings *)
val length : _ t -> int
(** Number of bindings *)
val replace : 'a t -> key -> 'a -> 'a t
(** Add the binding to the table, returning a new table. This erases
the current binding for [key], if any. *)
(** Add the binding to the table, returning a new table. This erases
the current binding for [key], if any. *)
val update : 'a t -> key -> ('a option -> 'a option) -> 'a t
(** [update tbl key f] calls [f None] if [key] doesn't belong in [tbl],
[f (Some v)] if [key -> v] otherwise; If [f] returns [None] then
[key] is removed, else it returns [Some v'] and [key -> v'] is added. *)
val remove : 'a t -> key -> 'a t
(** Remove the key *)
(** Remove the key *)
val copy : 'a t -> 'a t
(** Fresh copy of the table; the underlying structure is not shared
anymore, so using both tables alternatively will be efficient *)
(** Fresh copy of the table; the underlying structure is not shared
anymore, so using both tables alternatively will be efficient *)
val merge : (key -> 'a option -> 'a option -> 'a option) -> 'a t -> 'a t -> 'a t
(** Merge two tables together into a new table *)
val merge : (key -> 'a option -> 'a option -> 'a option) ->
'a t -> 'a t -> 'a t
(** Merge two tables together into a new table. The function's argument
correspond to values associated with the key (if present); if the
function returns [None] the key will not appear in the result. *)
val iter : 'a t -> (key -> 'a -> unit) -> unit
(** Iterate over bindings *)
(** Iterate over bindings *)
val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b
(** Fold over bindings *)
(** Fold over bindings *)
val of_seq : ?init:'a t -> (key * 'a) sequence -> 'a t
(** Add (replace) bindings from the sequence to the table *)
val map : (key -> 'a -> 'b) -> 'a t -> 'b t
(** Map all values *)
val of_list : ?init:'a t -> (key * 'a) list -> 'a t
val filter : (key -> 'a -> bool) -> 'a t -> 'a t
val filter_map : (key -> 'a -> 'b option) -> 'a t -> 'b t
val for_all : (key -> 'a -> bool) -> 'a t -> bool
val exists : (key -> 'a -> bool) -> 'a t -> bool
(** {3 Conversions} *)
val of_seq : (key * 'a) sequence -> 'a t
(** Add (replace) bindings from the sequence to the table *)
val of_list : (key * 'a) list -> 'a t
val add_seq : 'a t -> (key * 'a) sequence -> 'a t
val add_list : 'a t -> (key * 'a) list -> 'a t
val to_seq : 'a t -> (key * 'a) sequence
(** Sequence of the bindings of the table *)
(** Sequence of the bindings of the table *)
val to_list : 'a t -> (key * 'a) list
(** {3 Misc} *)
val equal : 'a equal -> 'a t equal
val pp : key printer -> 'a printer -> 'a t printer
val print : key formatter -> 'a formatter -> 'a t formatter
end
(** {2 Implementation} *)

260
core/CCString.ml Normal file
View file

@ -0,0 +1,260 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Basic String Utils} *)
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
module type S = sig
type t
val length : t -> int
val blit : t -> int -> t -> int -> int -> unit
(** See {!String.blit} *)
(** {2 Conversions} *)
val to_gen : t -> char gen
val to_seq : t -> char sequence
val to_klist : t -> char klist
val pp : Buffer.t -> t -> unit
end
type t = string
let equal a b = a=b
let compare = String.compare
let hash s = Hashtbl.hash s
let length = String.length
let _is_sub ~sub i s j ~len =
let rec check k =
if k = len
then true
else sub.[i + k] = s.[j+k] && check (k+1)
in
j+len <= String.length s && check 0
let is_sub ~sub i s j ~len =
if i+len > String.length sub then invalid_arg "String.is_sub";
_is_sub ~sub i s j ~len
module Split = struct
type split_state =
| SplitStop
| SplitAt of int (* previous *)
(* [by_j... prefix of s_i...] ? *)
let rec _is_prefix ~by s i j =
j = String.length by
||
( i < String.length s &&
s.[i] = by.[j] &&
_is_prefix ~by s (i+1) (j+1)
)
let rec _split ~by s state = match state with
| SplitStop -> None
| SplitAt prev -> _split_search ~by s prev prev
and _split_search ~by s prev i =
if i >= String.length s
then Some (SplitStop, prev, String.length s - prev)
else if _is_prefix ~by s i 0
then Some (SplitAt (i+String.length by), prev, i-prev)
else _split_search ~by s prev (i+1)
let _tuple3 x y z = x,y,z
let _mkgen ~by s k =
let state = ref (SplitAt 0) in
fun () ->
match _split ~by s !state with
| None -> None
| Some (state', i, len) ->
state := state';
Some (k s i len)
let gen ~by s = _mkgen ~by s _tuple3
let gen_cpy ~by s = _mkgen ~by s String.sub
let _mklist ~by s k =
let rec build acc state = match _split ~by s state with
| None -> List.rev acc
| Some (state', i, len) ->
build (k s i len ::acc) state'
in
build [] (SplitAt 0)
let list_ ~by s = _mklist ~by s _tuple3
let list_cpy ~by s = _mklist ~by s String.sub
let _mkklist ~by s k =
let rec make state () = match _split ~by s state with
| None -> `Nil
| Some (state', i, len) ->
`Cons (k s i len , make state')
in make (SplitAt 0)
let klist ~by s = _mkklist ~by s _tuple3
let klist_cpy ~by s = _mkklist ~by s String.sub
let _mkseq ~by s f k =
let rec aux state = match _split ~by s state with
| None -> ()
| Some (state', i, len) -> k (f s i len); aux state'
in aux (SplitAt 0)
let seq ~by s = _mkseq ~by s _tuple3
let seq_cpy ~by s = _mkseq ~by s String.sub
end
(* note: inefficient *)
let find ?(start=0) ~sub s =
let n = String.length sub in
let i = ref start in
try
while !i + n < String.length s do
if _is_sub ~sub 0 s !i ~len:n then raise Exit;
incr i
done;
-1
with Exit ->
!i
let repeat s n =
assert (n>=0);
let len = String.length s in
assert(len > 0);
let buf = String.create (len * n) in
for i = 0 to n-1 do
String.blit s 0 buf (i * len) len;
done;
buf
let prefix ~pre s =
String.length pre <= String.length s &&
(let i = ref 0 in
while !i < String.length pre && s.[!i] = pre.[!i] do incr i done;
!i = String.length pre)
let blit = String.blit
let _to_gen s i0 len =
let i = ref i0 in
fun () ->
if !i = i0+len then None
else (
let c = String.unsafe_get s !i in
incr i;
Some c
)
let to_gen s = _to_gen s 0 (String.length s)
let of_gen g =
let b = Buffer.create 32 in
let rec aux () = match g () with
| None -> Buffer.contents b
| Some c -> Buffer.add_char b c; aux ()
in aux ()
let to_seq s k = String.iter k s
let of_seq seq =
let b= Buffer.create 32 in
seq (Buffer.add_char b);
Buffer.contents b
let rec _to_klist s i len () =
if len=0 then `Nil
else `Cons (s.[i], _to_klist s (i+1)(len-1))
let of_klist l =
let rec aux acc n l = match l() with
| `Nil ->
let s = String.create n in
let acc = ref acc in
for i=n-1 downto 0 do
s.[i] <- List.hd !acc;
acc := List.tl !acc
done;
s
| `Cons (x,l') -> aux (x::acc) (n+1) l'
in aux [] 0 l
let to_klist s = _to_klist s 0 (String.length s)
let pp buf s =
Buffer.add_char buf '"';
Buffer.add_string buf s;
Buffer.add_char buf '"'
module Sub = struct
type t = string * int * int
let make s i ~len =
if i<0||len<0||i+len > String.length s then invalid_arg "CCString.Sub.make";
s,i,len
let full s = s, 0, String.length s
let copy (s,i,len) = String.sub s i len
let underlying (s,_,_) = s
let sub (s,i,len) i' len' =
if i+i' + len' > i+len then invalid_arg "CCString.Sub.sub";
(s, i+i',len')
let length (_,_,l) = l
let blit (a1,i1,len1) o1 (a2,i2,len2) o2 len =
if o1+len>len1 || o2+len>len2 then invalid_arg "CCString.Sub.blit";
String.blit a1 (i1+o1) a2 (i2+o2) len
let to_gen (s,i,len) = _to_gen s i len
let to_seq (s,i,len) k =
for i=i to i+len-1 do k s.[i] done
let to_klist (s,i,len) = _to_klist s i len
let pp buf (s,i,len) =
Buffer.add_char buf '"';
Buffer.add_substring buf s i len;
Buffer.add_char buf '"'
end

147
core/CCString.mli Normal file
View file

@ -0,0 +1,147 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Basic String Utils}
Consider using {!Containers_string.KMP} for pattern search, or Regex
libraries. *)
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
(** {2 Common Signature} *)
module type S = sig
type t
val length : t -> int
val blit : t -> int -> t -> int -> int -> unit
(** See {!String.blit} *)
(** {2 Conversions} *)
val to_gen : t -> char gen
val to_seq : t -> char sequence
val to_klist : t -> char klist
val pp : Buffer.t -> t -> unit
end
(** {2 Strings} *)
type t = string
val equal : t -> t -> bool
val compare : t -> t -> int
val hash : t -> int
val of_gen : char gen -> t
val of_seq : char sequence -> t
val of_klist : char klist -> t
val find : ?start:int -> sub:t -> t -> int
(** Find [sub] in the string, returns its first index or -1.
Should only be used with very small [sub] *)
val is_sub : sub:t -> int -> t -> int -> len:int -> bool
(** [is_sub ~sub i s j ~len] returns [true] iff the substring of
[sub] starting at position [i] and of length [len],
is a substring of [s] starting at position [j] *)
val repeat : t -> int -> t
(** The same string, repeated n times *)
val prefix : pre:t -> t -> bool
(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *)
include S with type t := t
(** {2 Splitting} *)
module Split : sig
val list_ : by:t -> t -> (t*int*int) list
(** split the given string along the given separator [by]. Should only
be used with very small separators, otherwise
use {!Containers_string.KMP}.
@return a list of (index,length) of substrings of [s] that are
separated by [by]. {!String.sub} can then be used to actually extract
the slice.
@raise Failure if [by = ""] *)
val gen : by:t -> t -> (t*int*int) gen
val seq : by:t -> t -> (t*int*int) sequence
val klist : by:t -> t -> (t*int*int) klist
(** {6 Copying functions}
Those split functions actually copy the substrings, which can be
more convenient but less efficient in general *)
val list_cpy : by:t -> t -> t list
(*$T
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""]
Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"]
*)
val gen_cpy : by:t -> t -> t gen
val seq_cpy : by:t -> t -> t sequence
val klist_cpy : by:t -> t -> t klist
end
(** {2 Slices} A contiguous part of a string *)
module Sub : sig
type t = string * int * int
(** A string, an offset, and the length of the slice *)
val make : string -> int -> len:int -> t
val full : string -> t
(** Full string *)
val copy : t -> string
(** Make a copy of the substring *)
val underlying : t -> string
val sub : t -> int -> int -> t
(** Sub-slice *)
include S with type t := t
end

View file

@ -1,150 +0,0 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Basic String Utils} *)
type t = string
let equal a b = a=b
let compare = String.compare
let hash s = Hashtbl.hash s
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
let is_sub ~sub i s j =
let rec check k =
if i + k = String.length sub
then true
else sub.[i + k] = s.[j+k] && check (k+1)
in
check 0
(* note: quite inefficient if [by] is long *)
let split_gen ~by s =
let len_by = String.length by in
assert (len_by > 0);
let n = String.length s in
let prev = ref 0 in
let stop = ref false in
let rec search i =
if !stop then None
else if i >= n
then (
stop := true;
Some (String.sub s !prev (n- !prev)) (* done *)
)
else if is_prefix i 0
then (
let p = !prev in
prev := i+len_by;
Some (String.sub s p (i-p))
)
else search (i+1)
and is_prefix i j =
if j = len_by
then true
else if i = n
then false
else s.[i] = by.[j] && is_prefix (i+1) (j+1)
in
fun () ->
search !prev
let split_seq ~by s k =
let rec aux g = match g () with
| None -> ()
| Some x -> k x; aux g
in aux (split_gen ~by s)
let split ~by s =
let rec aux g acc = match g () with
| None -> List.rev acc
| Some x -> aux g (x::acc)
in aux (split_gen ~by s) []
(*$T
split ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
split ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""]
*)
(* note: inefficient *)
let find ?(start=0) ~sub s =
let n = String.length sub in
let i = ref start in
try
while !i + n < String.length s do
if is_sub ~sub 0 s !i then raise Exit;
incr i
done;
-1
with Exit ->
!i
let repeat s n =
assert (n>=0);
let len = String.length s in
assert(len > 0);
let buf = String.create (len * n) in
for i = 0 to n-1 do
String.blit s 0 buf (i * len) len;
done;
buf
let prefix ~pre s =
String.length pre <= String.length s &&
(let i = ref 0 in
while !i < String.length pre && s.[!i] = pre.[!i] do incr i done;
!i = String.length pre)
let to_gen s =
let i = ref 0 in
fun () ->
if !i = String.length s then None
else (
let c = String.unsafe_get s !i in
incr i;
Some c
)
let of_gen g =
let b = Buffer.create 32 in
let rec aux () = match g () with
| None -> Buffer.contents b
| Some c -> Buffer.add_char b c; aux ()
in aux ()
let to_seq s k = String.iter k s
let of_seq seq =
let b= Buffer.create 32 in
seq (Buffer.add_char b);
Buffer.contents b
let pp = Buffer.add_string

View file

@ -1,70 +0,0 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Basic String Utils}
Consider using KMP instead. *)
type t = string
val equal : t -> t -> bool
val compare : t -> t -> int
val hash : t -> int
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
val is_sub : sub:t -> int -> t -> int -> bool
(** [is_sub ~sub i s j] returns [true] iff [sub] is a substring of [s] starting
at position [j] *)
val split : by:t -> t -> t list
(** split the given string along the given separator [by]. Should only
be used with very small separators, otherwise use {!KMP}.
@raise Failure if [by = ""] *)
val split_gen : by:t -> t -> t gen
val split_seq : by:t -> t -> t sequence
val find : ?start:int -> sub:t -> t -> int
(** Find [sub] in the string, returns its first index or -1.
Should only be used with very small [sub] *)
val repeat : t -> int -> t
(** The same string, repeated n times *)
val prefix : pre:t -> t -> bool
(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *)
val to_gen : t -> char gen
val of_gen : char gen -> t
val to_seq : t -> char sequence
val of_seq : char sequence -> t
val pp : Buffer.t -> t -> unit