diff --git a/README.md b/README.md index 43610d5b..377b6852 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ structures comprise (some modules in `misc/`, some other in `core/`): - `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists) - small modules (basic types, utilities): - `CCInt` + - `CCString` (basic string operations) - `CCPair` (cartesian products) - `CCOpt` (options) - `CCFun` (function combinators) @@ -70,6 +71,12 @@ structures comprise (some modules in `misc/`, some other in `core/`): - `CCHash` (hashing combinators) - `CCError` (monadic error handling) +### String + +In the module `Containers_string`: +- `Levenshtein`: edition distance between two strings +- `KMP`: Knuth-Morris-Pratt substring algorithm + ### Misc - `PHashtbl`, a polymorphic hashtable (with open addressing) diff --git a/_oasis b/_oasis index d2535038..167123e8 100644 --- a/_oasis +++ b/_oasis @@ -47,13 +47,13 @@ Library "containers" CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError, CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash, CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd, - CCRandom, CCLinq, CCKTree, CCTrie + CCRandom, CCLinq, CCKTree, CCTrie, CCString FindlibName: containers Library "containers_string" Path: string Pack: true - Modules: KMP, CCString, Levenshtein + Modules: KMP, Levenshtein FindlibName: string FindlibParent: containers diff --git a/core/CCArray.ml b/core/CCArray.ml index 49bfdd6c..e3d9c5bb 100644 --- a/core/CCArray.ml +++ b/core/CCArray.ml @@ -57,6 +57,10 @@ module type S = sig val iteri : (int -> 'a -> unit) -> 'a t -> unit + val blit : 'a t -> int -> 'a t -> int -> int -> unit + (** [blit from i into j len] copies [len] elements from the first array + to the second. See {!Array.blit}. *) + val reverse_in_place : 'a t -> unit (** Reverse the array in place *) @@ -92,16 +96,16 @@ module type S = sig (** {2 IO} *) - val pp: ?sep:string -> (Buffer.t -> 'a -> unit) - -> Buffer.t -> 'a t -> unit + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) -> + Buffer.t -> 'a t -> unit (** print an array of items with printing function *) - val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) - -> Buffer.t -> 'a t -> unit + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) -> + Buffer.t -> 'a t -> unit (** print an array, giving the printing function both index and item *) - val print : ?sep:string -> (Format.formatter -> 'a -> unit) - -> Format.formatter -> 'a t -> unit + val print : ?sep:string -> (Format.formatter -> 'a -> unit) -> + Format.formatter -> 'a t -> unit (** print an array of items with printing function *) end @@ -112,13 +116,13 @@ Most of those functions that a range [(i,j)] with let rec _foldi f acc a i j = if i = j then acc else _foldi f (f acc i a.(i)) a (i+1) j -let _reverse_in_place a i j = - if i=j then () +let _reverse_in_place a i ~len = + if len=0 then () else - for k = i to (j-1)/2 do - let t = a.(k) in - a.(k) <- a.(j-1-k); - a.(j-1-k) <- t; + for k = 0 to (len-1)/2 do + let t = a.(i+k) in + a.(i+k) <- a.(i+len-1-k); + a.(i+len-1-k) <- t; done let rec _equal eq a1 i1 j1 a2 i2 j2 = @@ -135,9 +139,16 @@ let rec _compare cmp a1 i1 j1 a2 i2 j2 = else let c = cmp a1.(i1) a2.(i2) in if c = 0 - then _compare cmp a1 (i1+1) j1 a2 (i2+2) j2 + then _compare cmp a1 (i1+1) j1 a2 (i2+1) j2 else c +(*$T + compare CCOrd.compare [| 1; 2; 3 |] [| 1; 2; 3 |] = 0 + compare CCOrd.compare [| 1; 2; 3 |] [| 2; 2; 3 |] < 0 + compare CCOrd.compare [| 1; 2; |] [| 1; 2; 3 |] < 0 + compare CCOrd.compare [| 1; 2; 3 |] [| 1; 2; |] > 0 +*) + let rec _find f a i j = if i = j then None else match f a.(i) with @@ -150,11 +161,11 @@ let rec _for_all p a i j = let rec _exists p a i j = i <> j && (p a.(i) || _exists p a (i+1) j) -let rec _for_all2 p a1 a2 i1 i2 j1 = - i1 = j1 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) j1) +let rec _for_all2 p a1 a2 i1 i2 ~len = + len=0 || (p a1.(i1) a2.(i2) && _for_all2 p a1 a2 (i1+1) (i2+1) ~len:(len-1)) -let rec _exists2 p a1 a2 i1 i2 j1 = - i1 <> j1 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) j1) +let rec _exists2 p a1 a2 i1 i2 ~len = + len>0 && (p a1.(i1) a2.(i2) || _exists2 p a1 a2 (i1+1) (i2+1) ~len:(len-1)) (* shuffle a[i...j[ using the given int random generator See http://en.wikipedia.org/wiki/Fisher-Yates_shuffle *) @@ -228,6 +239,8 @@ let iter = Array.iter let iteri = Array.iteri +let blit = Array.blit + let reverse_in_place a = _reverse_in_place a 0 (Array.length a) @@ -303,12 +316,10 @@ let exists p a = _exists p a 0 (Array.length a) let for_all2 p a b = Array.length a = Array.length b && - _for_all2 p a b 0 0 (Array.length a) + _for_all2 p a b 0 0 ~len:(Array.length a) let exists2 p a b = - Array.length a = Array.length b - && - _exists2 p a b 0 0 (Array.length a) + _exists2 p a b 0 0 ~len:(min (Array.length a) (Array.length b)) let (--) i j = if i<=j @@ -374,11 +385,13 @@ module Sub = struct } let make arr i ~len = - if i+len > Array.length arr then invalid_arg "Array.Sub.make"; + if i<0||i+len > Array.length arr then invalid_arg "Array.Sub.make"; { arr; i; j=i+len; } let of_slice (arr,i,len) = make arr i ~len + let to_slice a = a.arr, a.i, a.j-a.i + let full arr = { arr; i=0; j=Array.length arr; } let underlying a = a.arr @@ -419,7 +432,16 @@ module Sub = struct let iteri f a = for k=0 to length a-1 do f k a.arr.(a.i + k) done - let reverse_in_place a = _reverse_in_place a.arr a.i a.j + let blit a i b j len = + if i+len>length a || j+len>length b then invalid_arg "Array.Sub.blit"; + Array.blit a.arr (a.i+i) b.arr (b.i+j) len + + let reverse_in_place a = _reverse_in_place a.arr a.i ~len:(length a) + + (*$T + let a = 1--6 in let s = Sub.make a 2 ~len:3 in \ + Sub.reverse_in_place s; a = [| 1; 2; 5; 4; 3; 6 |] + *) let find f a = _find f a.arr a.i a.j @@ -428,10 +450,14 @@ module Sub = struct let exists p a = _exists p a.arr a.i a.j let for_all2 p a b = - length a = length b && _for_all2 p a.arr b.arr a.i b.i b.j + length a = length b && _for_all2 p a.arr b.arr a.i b.i ~len:(length a) let exists2 p a b = - length a = length b && _exists2 p a.arr b.arr a.i b.i a.j + _exists2 p a.arr b.arr a.i b.i ~len:(min (length a) (length b)) + + (*$T + Sub.exists2 (=) (Sub.make [| 1;2;3;4 |] 1 ~len:2) (Sub.make [| 0;1;3;4 |] 1 ~len:3) + *) let shuffle a = _shuffle Random.int a.arr a.i a.j diff --git a/core/CCArray.mli b/core/CCArray.mli index 41637ed5..49564c40 100644 --- a/core/CCArray.mli +++ b/core/CCArray.mli @@ -59,6 +59,10 @@ module type S = sig val iteri : (int -> 'a -> unit) -> 'a t -> unit + val blit : 'a t -> int -> 'a t -> int -> int -> unit + (** [blit from i into j len] copies [len] elements from the first array + to the second. See {!Array.blit}. *) + val reverse_in_place : 'a t -> unit (** Reverse the array in place *) @@ -94,16 +98,16 @@ module type S = sig (** {2 IO} *) - val pp: ?sep:string -> (Buffer.t -> 'a -> unit) - -> Buffer.t -> 'a t -> unit + val pp: ?sep:string -> (Buffer.t -> 'a -> unit) -> + Buffer.t -> 'a t -> unit (** print an array of items with printing function *) - val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) - -> Buffer.t -> 'a t -> unit + val pp_i: ?sep:string -> (Buffer.t -> int -> 'a -> unit) -> + Buffer.t -> 'a t -> unit (** print an array, giving the printing function both index and item *) - val print : ?sep:string -> (Format.formatter -> 'a -> unit) - -> Format.formatter -> 'a t -> unit + val print : ?sep:string -> (Format.formatter -> 'a -> unit) -> + Format.formatter -> 'a t -> unit (** print an array of items with printing function *) end @@ -159,6 +163,10 @@ module Sub : sig [i] the offset in [arr], and [len] the number of elements of the slice. @raise Invalid_argument if the slice isn't valid (See {!make}) *) + val to_slice : 'a t -> ('a array * int * int) + (** Convert into a triple [(arr, i, len)] where [len] is the length of + the subarray of [arr] starting at offset [i] *) + val full : 'a array -> 'a t (** Slice that covers the full array *) diff --git a/core/CCPersistentHashtbl.ml b/core/CCPersistentHashtbl.ml index 2f613783..71c1610c 100644 --- a/core/CCPersistentHashtbl.ml +++ b/core/CCPersistentHashtbl.ml @@ -26,6 +26,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (** {1 Persistent hash-table on top of OCaml's hashtables} *) type 'a sequence = ('a -> unit) -> unit +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit +type 'a equal = 'a -> 'a -> bool module type HashedType = sig type t @@ -40,52 +43,92 @@ module type S = sig type 'a t val empty : unit -> 'a t - (** Empty table. The table will be allocated at the first binding *) + (** Empty table. The table will be allocated at the first binding *) val create : int -> 'a t - (** Create a new hashtable *) + (** Create a new hashtable, with the given initial capacity *) val is_empty : 'a t -> bool - (** Is the table empty? *) + (** Is the table empty? *) val find : 'a t -> key -> 'a - (** Find the value for this key, or raise Not_found *) + (** Find the value for this key, or fails + @raise Not_found if the key is not present in the table *) + + val get_exn : key -> 'a t -> 'a + (** Synonym to {!find} with flipped arguments *) + + val get : key -> 'a t -> 'a option + (** Safe version of !{get_exn} *) val mem : 'a t -> key -> bool - (** Is the key bound? *) + (** Is the key bound? *) - val length : 'a t -> int - (** Number of bindings *) + val length : _ t -> int + (** Number of bindings *) val replace : 'a t -> key -> 'a -> 'a t - (** Add the binding to the table, returning a new table. This erases - the current binding for [key], if any. *) + (** Add the binding to the table, returning a new table. This erases + the current binding for [key], if any. *) + + val update : 'a t -> key -> ('a option -> 'a option) -> 'a t + (** [update tbl key f] calls [f None] if [key] doesn't belong in [tbl], + [f (Some v)] if [key -> v] otherwise; If [f] returns [None] then + [key] is removed, else it returns [Some v'] and [key -> v'] is added. *) val remove : 'a t -> key -> 'a t - (** Remove the key *) + (** Remove the key *) val copy : 'a t -> 'a t - (** Fresh copy of the table; the underlying structure is not shared - anymore, so using both tables alternatively will be efficient *) + (** Fresh copy of the table; the underlying structure is not shared + anymore, so using both tables alternatively will be efficient *) - val merge : (key -> 'a option -> 'a option -> 'a option) -> 'a t -> 'a t -> 'a t - (** Merge two tables together into a new table *) + val merge : (key -> 'a option -> 'a option -> 'a option) -> + 'a t -> 'a t -> 'a t + (** Merge two tables together into a new table. The function's argument + correspond to values associated with the key (if present); if the + function returns [None] the key will not appear in the result. *) val iter : 'a t -> (key -> 'a -> unit) -> unit - (** Iterate over bindings *) + (** Iterate over bindings *) val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b - (** Fold over bindings *) + (** Fold over bindings *) - val of_seq : ?init:'a t -> (key * 'a) sequence -> 'a t - (** Add (replace) bindings from the sequence to the table *) + val map : (key -> 'a -> 'b) -> 'a t -> 'b t + (** Map all values *) - val of_list : ?init:'a t -> (key * 'a) list -> 'a t + val filter : (key -> 'a -> bool) -> 'a t -> 'a t + + val filter_map : (key -> 'a -> 'b option) -> 'a t -> 'b t + + val for_all : (key -> 'a -> bool) -> 'a t -> bool + + val exists : (key -> 'a -> bool) -> 'a t -> bool + + (** {3 Conversions} *) + + val of_seq : (key * 'a) sequence -> 'a t + (** Add (replace) bindings from the sequence to the table *) + + val of_list : (key * 'a) list -> 'a t + + val add_seq : 'a t -> (key * 'a) sequence -> 'a t + + val add_list : 'a t -> (key * 'a) list -> 'a t val to_seq : 'a t -> (key * 'a) sequence - (** Sequence of the bindings of the table *) + (** Sequence of the bindings of the table *) val to_list : 'a t -> (key * 'a) list + + (** {3 Misc} *) + + val equal : 'a equal -> 'a t equal + + val pp : key printer -> 'a printer -> 'a t printer + + val print : key formatter -> 'a formatter -> 'a t formatter end (** {2 Implementation} *) @@ -107,64 +150,55 @@ module Make(H : HashedType) : S with type key = H.t = struct let empty () = create 11 - (** Reroot: modify the zipper so that the current node is a proper - hashtable, and return the hashtable *) - let reroot t = - (* pass continuation to get a tailrec rerooting *) - let rec reroot t k = match !t with - | Table tbl -> k tbl (* done *) - | Add (key, v, t') -> - reroot t' - (fun tbl -> - t' := Remove (key, t); - Table.add tbl key v; - t := Table tbl; - k tbl) - | Replace (key, v, t') -> - reroot t' - (fun tbl -> - let v' = Table.find tbl key in - t' := Replace (key, v', t); - t := Table tbl; - Table.replace tbl key v; - k tbl) - | Remove (key, t') -> - reroot t' - (fun tbl -> - let v = Table.find tbl key in - t' := Add (key, v, t); - t := Table tbl; - Table.remove tbl key; - k tbl) - in - match !t with + (* pass continuation to get a tailrec rerooting *) + let rec _reroot t k = match !t with + | Table tbl -> k tbl (* done *) + | Add (key, v, t') -> + _reroot t' + (fun tbl -> + t' := Remove (key, t); + Table.add tbl key v; + t := Table tbl; + k tbl) + | Replace (key, v, t') -> + _reroot t' + (fun tbl -> + let v' = Table.find tbl key in + t' := Replace (key, v', t); + t := Table tbl; + Table.replace tbl key v; + k tbl) + | Remove (key, t') -> + _reroot t' + (fun tbl -> + let v = Table.find tbl key in + t' := Add (key, v, t); + t := Table tbl; + Table.remove tbl key; + k tbl) + + (* Reroot: modify the zipper so that the current node is a proper + hashtable, and return the hashtable *) + let reroot t = match !t with | Table tbl -> tbl - | _ -> reroot t (fun x -> x) + | _ -> _reroot t (fun x -> x) - let is_empty t = - match !t with - | Table tbl -> Table.length tbl = 0 - | _ -> Table.length (reroot t) = 0 + let is_empty t = Table.length (reroot t) = 0 - let find t k = - match !t with - | Table tbl -> Table.find tbl k - | _ -> Table.find (reroot t) k + let find t k = Table.find (reroot t) k - let mem t k = - match !t with - | Table tbl -> Table.mem tbl k - | _ -> Table.mem (reroot t) k + let get_exn k t = find t k - let length t = - match !t with - | Table tbl -> Table.length tbl - | _ -> Table.length (reroot t) + let get k t = + try Some (find t k) + with Not_found -> None + + let mem t k = Table.mem (reroot t) k + + let length t = Table.length (reroot t) let replace t k v = - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t in + let tbl = reroot t in (* create the new hashtable *) let t' = ref (Table tbl) in (* update [t] to point to the new hashtable *) @@ -172,15 +206,14 @@ module Make(H : HashedType) : S with type key = H.t = struct let v' = Table.find tbl k in t := Replace (k, v', t') with Not_found -> - t := Remove (k, t')); + t := Remove (k, t') + ); (* modify the underlying hashtable *) Table.replace tbl k v; t' let remove t k = - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t in + let tbl = reroot t in try let v' = Table.find tbl k in (* value present, make a new hashtable without this value *) @@ -192,26 +225,63 @@ module Make(H : HashedType) : S with type key = H.t = struct (* not member, nothing to do *) t + let update t k f = + let v = get k t in + match v, f v with + | None, None -> t (* no change *) + | Some _, None -> remove t k + | _, Some v' -> replace t k v' + let copy t = - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t in + let tbl = reroot t in (* no one will point to the new [t] *) let t = ref (Table (Table.copy tbl)) in t let iter t f = - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t in + let tbl = reroot t in Table.iter f tbl let fold f acc t = - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t in + let tbl = reroot t in Table.fold (fun k v acc -> f acc k v) tbl acc + let map f t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter (fun k v -> Table.replace res k (f k v)) tbl; + ref (Table res) + + let filter p t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter (fun k v -> if p k v then Table.replace res k v) tbl; + ref (Table res) + + let filter_map f t = + let tbl = reroot t in + let res = Table.create (Table.length tbl) in + Table.iter + (fun k v -> match f k v with + | None -> () + | Some v' -> Table.replace res k v' + ) tbl; + ref (Table res) + + exception ExitPTbl + + let for_all p t = + try + iter t (fun k v -> if not (p k v) then raise ExitPTbl); + true + with ExitPTbl -> false + + let exists p t = + try + iter t (fun k v -> if p k v then raise ExitPTbl); + false + with ExitPTbl -> true + let merge f t1 t2 = let tbl = Table.create (max (length t1) (length t2)) in iter t1 @@ -227,15 +297,17 @@ module Make(H : HashedType) : S with type key = H.t = struct | Some v' -> Table.replace tbl k v2); ref (Table tbl) - let of_seq ?init seq = - let tbl = match init with - | None -> Table.create 5 - | Some t -> Table.copy (reroot t) in - seq (fun (k,v) -> Table.replace tbl k v); - ref (Table tbl) + let add_seq init seq = + let tbl = ref init in + seq (fun (k,v) -> tbl := replace !tbl k v); + !tbl - let of_list ?init l = - of_seq ?init (fun k -> List.iter k l) + let of_seq seq = add_seq (empty ()) seq + + let add_list init l = + add_seq init (fun k -> List.iter k l) + + let of_list l = add_list (empty ()) l let to_list t = let tbl = reroot t in @@ -244,10 +316,36 @@ module Make(H : HashedType) : S with type key = H.t = struct let to_seq t = fun k -> - let tbl = match !t with - | Table tbl -> tbl - | _ -> reroot t - in + let tbl = reroot t in Table.iter (fun x y -> k (x,y)) tbl + + let equal eq t1 t2 = + length t1 = length t2 + && + for_all + (fun k v -> match get k t2 with + | None -> false + | Some v' -> eq v v' + ) t1 + + let pp pp_k pp_v buf t = + Buffer.add_string buf "{"; + let first = ref true in + iter t + (fun k v -> + if !first then first:=false else Buffer.add_string buf ", "; + Printf.bprintf buf "%a -> %a" pp_k k pp_v v + ); + Buffer.add_string buf "}" + + let print pp_k pp_v fmt t = + Format.pp_print_string fmt "{"; + let first = ref true in + iter t + (fun k v -> + if !first then first:=false else Format.pp_print_string fmt ", "; + Format.fprintf fmt "%a -> %a" pp_k k pp_v v + ); + Format.pp_print_string fmt "}" end diff --git a/core/CCPersistentHashtbl.mli b/core/CCPersistentHashtbl.mli index 6de2760b..30b07f4e 100644 --- a/core/CCPersistentHashtbl.mli +++ b/core/CCPersistentHashtbl.mli @@ -23,9 +23,19 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *) -(** {1 Persistent hash-table on top of OCaml's hashtables} *) +(** {1 Persistent hash-table on top of OCaml's hashtables} + +Almost as efficient as the regular Hashtbl type, but with a persistent +interface (rewinding changes to get back in the past history). This is +mostly useful for backtracking-like uses, or forward uses (never using +old values). + +This module is not thread-safe. *) type 'a sequence = ('a -> unit) -> unit +type 'a printer = Buffer.t -> 'a -> unit +type 'a formatter = Format.formatter -> 'a -> unit +type 'a equal = 'a -> 'a -> bool module type HashedType = sig type t @@ -40,52 +50,92 @@ module type S = sig type 'a t val empty : unit -> 'a t - (** Empty table. The table will be allocated at the first binding *) + (** Empty table. The table will be allocated at the first binding *) val create : int -> 'a t - (** Create a new hashtable *) + (** Create a new hashtable, with the given initial capacity *) val is_empty : 'a t -> bool - (** Is the table empty? *) + (** Is the table empty? *) val find : 'a t -> key -> 'a - (** Find the value for this key, or raise Not_found *) + (** Find the value for this key, or fails + @raise Not_found if the key is not present in the table *) + + val get_exn : key -> 'a t -> 'a + (** Synonym to {!find} with flipped arguments *) + + val get : key -> 'a t -> 'a option + (** Safe version of !{get_exn} *) val mem : 'a t -> key -> bool - (** Is the key bound? *) + (** Is the key bound? *) - val length : 'a t -> int - (** Number of bindings *) + val length : _ t -> int + (** Number of bindings *) val replace : 'a t -> key -> 'a -> 'a t - (** Add the binding to the table, returning a new table. This erases - the current binding for [key], if any. *) + (** Add the binding to the table, returning a new table. This erases + the current binding for [key], if any. *) + + val update : 'a t -> key -> ('a option -> 'a option) -> 'a t + (** [update tbl key f] calls [f None] if [key] doesn't belong in [tbl], + [f (Some v)] if [key -> v] otherwise; If [f] returns [None] then + [key] is removed, else it returns [Some v'] and [key -> v'] is added. *) val remove : 'a t -> key -> 'a t - (** Remove the key *) + (** Remove the key *) val copy : 'a t -> 'a t - (** Fresh copy of the table; the underlying structure is not shared - anymore, so using both tables alternatively will be efficient *) + (** Fresh copy of the table; the underlying structure is not shared + anymore, so using both tables alternatively will be efficient *) - val merge : (key -> 'a option -> 'a option -> 'a option) -> 'a t -> 'a t -> 'a t - (** Merge two tables together into a new table *) + val merge : (key -> 'a option -> 'a option -> 'a option) -> + 'a t -> 'a t -> 'a t + (** Merge two tables together into a new table. The function's argument + correspond to values associated with the key (if present); if the + function returns [None] the key will not appear in the result. *) val iter : 'a t -> (key -> 'a -> unit) -> unit - (** Iterate over bindings *) + (** Iterate over bindings *) val fold : ('b -> key -> 'a -> 'b) -> 'b -> 'a t -> 'b - (** Fold over bindings *) + (** Fold over bindings *) - val of_seq : ?init:'a t -> (key * 'a) sequence -> 'a t - (** Add (replace) bindings from the sequence to the table *) + val map : (key -> 'a -> 'b) -> 'a t -> 'b t + (** Map all values *) - val of_list : ?init:'a t -> (key * 'a) list -> 'a t + val filter : (key -> 'a -> bool) -> 'a t -> 'a t + + val filter_map : (key -> 'a -> 'b option) -> 'a t -> 'b t + + val for_all : (key -> 'a -> bool) -> 'a t -> bool + + val exists : (key -> 'a -> bool) -> 'a t -> bool + + (** {3 Conversions} *) + + val of_seq : (key * 'a) sequence -> 'a t + (** Add (replace) bindings from the sequence to the table *) + + val of_list : (key * 'a) list -> 'a t + + val add_seq : 'a t -> (key * 'a) sequence -> 'a t + + val add_list : 'a t -> (key * 'a) list -> 'a t val to_seq : 'a t -> (key * 'a) sequence - (** Sequence of the bindings of the table *) + (** Sequence of the bindings of the table *) val to_list : 'a t -> (key * 'a) list + + (** {3 Misc} *) + + val equal : 'a equal -> 'a t equal + + val pp : key printer -> 'a printer -> 'a t printer + + val print : key formatter -> 'a formatter -> 'a t formatter end (** {2 Implementation} *) diff --git a/core/CCString.ml b/core/CCString.ml new file mode 100644 index 00000000..2f5094f3 --- /dev/null +++ b/core/CCString.ml @@ -0,0 +1,260 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Basic String Utils} *) + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + +module type S = sig + type t + + val length : t -> int + + val blit : t -> int -> t -> int -> int -> unit + (** See {!String.blit} *) + + (** {2 Conversions} *) + + val to_gen : t -> char gen + + val to_seq : t -> char sequence + + val to_klist : t -> char klist + + val pp : Buffer.t -> t -> unit +end + +type t = string + +let equal a b = a=b + +let compare = String.compare + +let hash s = Hashtbl.hash s + +let length = String.length + +let _is_sub ~sub i s j ~len = + let rec check k = + if k = len + then true + else sub.[i + k] = s.[j+k] && check (k+1) + in + j+len <= String.length s && check 0 + +let is_sub ~sub i s j ~len = + if i+len > String.length sub then invalid_arg "String.is_sub"; + _is_sub ~sub i s j ~len + + +module Split = struct + type split_state = + | SplitStop + | SplitAt of int (* previous *) + + (* [by_j... prefix of s_i...] ? *) + let rec _is_prefix ~by s i j = + j = String.length by + || + ( i < String.length s && + s.[i] = by.[j] && + _is_prefix ~by s (i+1) (j+1) + ) + + let rec _split ~by s state = match state with + | SplitStop -> None + | SplitAt prev -> _split_search ~by s prev prev + and _split_search ~by s prev i = + if i >= String.length s + then Some (SplitStop, prev, String.length s - prev) + else if _is_prefix ~by s i 0 + then Some (SplitAt (i+String.length by), prev, i-prev) + else _split_search ~by s prev (i+1) + + let _tuple3 x y z = x,y,z + + let _mkgen ~by s k = + let state = ref (SplitAt 0) in + fun () -> + match _split ~by s !state with + | None -> None + | Some (state', i, len) -> + state := state'; + Some (k s i len) + + let gen ~by s = _mkgen ~by s _tuple3 + + let gen_cpy ~by s = _mkgen ~by s String.sub + + let _mklist ~by s k = + let rec build acc state = match _split ~by s state with + | None -> List.rev acc + | Some (state', i, len) -> + build (k s i len ::acc) state' + in + build [] (SplitAt 0) + + let list_ ~by s = _mklist ~by s _tuple3 + + let list_cpy ~by s = _mklist ~by s String.sub + + let _mkklist ~by s k = + let rec make state () = match _split ~by s state with + | None -> `Nil + | Some (state', i, len) -> + `Cons (k s i len , make state') + in make (SplitAt 0) + + let klist ~by s = _mkklist ~by s _tuple3 + + let klist_cpy ~by s = _mkklist ~by s String.sub + + let _mkseq ~by s f k = + let rec aux state = match _split ~by s state with + | None -> () + | Some (state', i, len) -> k (f s i len); aux state' + in aux (SplitAt 0) + + let seq ~by s = _mkseq ~by s _tuple3 + let seq_cpy ~by s = _mkseq ~by s String.sub +end + +(* note: inefficient *) +let find ?(start=0) ~sub s = + let n = String.length sub in + let i = ref start in + try + while !i + n < String.length s do + if _is_sub ~sub 0 s !i ~len:n then raise Exit; + incr i + done; + -1 + with Exit -> + !i + +let repeat s n = + assert (n>=0); + let len = String.length s in + assert(len > 0); + let buf = String.create (len * n) in + for i = 0 to n-1 do + String.blit s 0 buf (i * len) len; + done; + buf + +let prefix ~pre s = + String.length pre <= String.length s && + (let i = ref 0 in + while !i < String.length pre && s.[!i] = pre.[!i] do incr i done; + !i = String.length pre) + +let blit = String.blit + +let _to_gen s i0 len = + let i = ref i0 in + fun () -> + if !i = i0+len then None + else ( + let c = String.unsafe_get s !i in + incr i; + Some c + ) + +let to_gen s = _to_gen s 0 (String.length s) + +let of_gen g = + let b = Buffer.create 32 in + let rec aux () = match g () with + | None -> Buffer.contents b + | Some c -> Buffer.add_char b c; aux () + in aux () + +let to_seq s k = String.iter k s + +let of_seq seq = + let b= Buffer.create 32 in + seq (Buffer.add_char b); + Buffer.contents b + +let rec _to_klist s i len () = + if len=0 then `Nil + else `Cons (s.[i], _to_klist s (i+1)(len-1)) + +let of_klist l = + let rec aux acc n l = match l() with + | `Nil -> + let s = String.create n in + let acc = ref acc in + for i=n-1 downto 0 do + s.[i] <- List.hd !acc; + acc := List.tl !acc + done; + s + | `Cons (x,l') -> aux (x::acc) (n+1) l' + in aux [] 0 l + +let to_klist s = _to_klist s 0 (String.length s) + +let pp buf s = + Buffer.add_char buf '"'; + Buffer.add_string buf s; + Buffer.add_char buf '"' + +module Sub = struct + type t = string * int * int + + let make s i ~len = + if i<0||len<0||i+len > String.length s then invalid_arg "CCString.Sub.make"; + s,i,len + + let full s = s, 0, String.length s + + let copy (s,i,len) = String.sub s i len + + let underlying (s,_,_) = s + + let sub (s,i,len) i' len' = + if i+i' + len' > i+len then invalid_arg "CCString.Sub.sub"; + (s, i+i',len') + + let length (_,_,l) = l + + let blit (a1,i1,len1) o1 (a2,i2,len2) o2 len = + if o1+len>len1 || o2+len>len2 then invalid_arg "CCString.Sub.blit"; + String.blit a1 (i1+o1) a2 (i2+o2) len + + let to_gen (s,i,len) = _to_gen s i len + let to_seq (s,i,len) k = + for i=i to i+len-1 do k s.[i] done + let to_klist (s,i,len) = _to_klist s i len + + let pp buf (s,i,len) = + Buffer.add_char buf '"'; + Buffer.add_substring buf s i len; + Buffer.add_char buf '"' +end diff --git a/core/CCString.mli b/core/CCString.mli new file mode 100644 index 00000000..83e72342 --- /dev/null +++ b/core/CCString.mli @@ -0,0 +1,147 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Basic String Utils} +Consider using {!Containers_string.KMP} for pattern search, or Regex +libraries. *) + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + +(** {2 Common Signature} *) + +module type S = sig + type t + + val length : t -> int + + val blit : t -> int -> t -> int -> int -> unit + (** See {!String.blit} *) + + (** {2 Conversions} *) + + val to_gen : t -> char gen + + val to_seq : t -> char sequence + + val to_klist : t -> char klist + + val pp : Buffer.t -> t -> unit +end + +(** {2 Strings} *) + +type t = string + +val equal : t -> t -> bool + +val compare : t -> t -> int + +val hash : t -> int + +val of_gen : char gen -> t + +val of_seq : char sequence -> t + +val of_klist : char klist -> t + +val find : ?start:int -> sub:t -> t -> int +(** Find [sub] in the string, returns its first index or -1. + Should only be used with very small [sub] *) + +val is_sub : sub:t -> int -> t -> int -> len:int -> bool +(** [is_sub ~sub i s j ~len] returns [true] iff the substring of + [sub] starting at position [i] and of length [len], + is a substring of [s] starting at position [j] *) + +val repeat : t -> int -> t +(** The same string, repeated n times *) + +val prefix : pre:t -> t -> bool +(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *) + +include S with type t := t + +(** {2 Splitting} *) + +module Split : sig + val list_ : by:t -> t -> (t*int*int) list + (** split the given string along the given separator [by]. Should only + be used with very small separators, otherwise + use {!Containers_string.KMP}. + @return a list of (index,length) of substrings of [s] that are + separated by [by]. {!String.sub} can then be used to actually extract + the slice. + @raise Failure if [by = ""] *) + + val gen : by:t -> t -> (t*int*int) gen + + val seq : by:t -> t -> (t*int*int) sequence + + val klist : by:t -> t -> (t*int*int) klist + + (** {6 Copying functions} + + Those split functions actually copy the substrings, which can be + more convenient but less efficient in general *) + + val list_cpy : by:t -> t -> t list + + (*$T + Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] + Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""] + Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"] + *) + + val gen_cpy : by:t -> t -> t gen + + val seq_cpy : by:t -> t -> t sequence + + val klist_cpy : by:t -> t -> t klist +end + +(** {2 Slices} A contiguous part of a string *) + +module Sub : sig + type t = string * int * int + (** A string, an offset, and the length of the slice *) + + val make : string -> int -> len:int -> t + + val full : string -> t + (** Full string *) + + val copy : t -> string + (** Make a copy of the substring *) + + val underlying : t -> string + + val sub : t -> int -> int -> t + (** Sub-slice *) + + include S with type t := t +end diff --git a/string/CCString.ml b/string/CCString.ml deleted file mode 100644 index a34614cf..00000000 --- a/string/CCString.ml +++ /dev/null @@ -1,150 +0,0 @@ - -(* -copyright (c) 2013-2014, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Basic String Utils} *) - -type t = string - -let equal a b = a=b - -let compare = String.compare - -let hash s = Hashtbl.hash s - -type 'a gen = unit -> 'a option -type 'a sequence = ('a -> unit) -> unit - -let is_sub ~sub i s j = - let rec check k = - if i + k = String.length sub - then true - else sub.[i + k] = s.[j+k] && check (k+1) - in - check 0 - -(* note: quite inefficient if [by] is long *) -let split_gen ~by s = - let len_by = String.length by in - assert (len_by > 0); - let n = String.length s in - let prev = ref 0 in - let stop = ref false in - let rec search i = - if !stop then None - else if i >= n - then ( - stop := true; - Some (String.sub s !prev (n- !prev)) (* done *) - ) - else if is_prefix i 0 - then ( - let p = !prev in - prev := i+len_by; - Some (String.sub s p (i-p)) - ) - else search (i+1) - and is_prefix i j = - if j = len_by - then true - else if i = n - then false - else s.[i] = by.[j] && is_prefix (i+1) (j+1) - in - fun () -> - search !prev - -let split_seq ~by s k = - let rec aux g = match g () with - | None -> () - | Some x -> k x; aux g - in aux (split_gen ~by s) - -let split ~by s = - let rec aux g acc = match g () with - | None -> List.rev acc - | Some x -> aux g (x::acc) - in aux (split_gen ~by s) [] - -(*$T - split ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] - split ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""] -*) - -(* note: inefficient *) -let find ?(start=0) ~sub s = - let n = String.length sub in - let i = ref start in - try - while !i + n < String.length s do - if is_sub ~sub 0 s !i then raise Exit; - incr i - done; - -1 - with Exit -> - !i - -let repeat s n = - assert (n>=0); - let len = String.length s in - assert(len > 0); - let buf = String.create (len * n) in - for i = 0 to n-1 do - String.blit s 0 buf (i * len) len; - done; - buf - -let prefix ~pre s = - String.length pre <= String.length s && - (let i = ref 0 in - while !i < String.length pre && s.[!i] = pre.[!i] do incr i done; - !i = String.length pre) - - -let to_gen s = - let i = ref 0 in - fun () -> - if !i = String.length s then None - else ( - let c = String.unsafe_get s !i in - incr i; - Some c - ) - -let of_gen g = - let b = Buffer.create 32 in - let rec aux () = match g () with - | None -> Buffer.contents b - | Some c -> Buffer.add_char b c; aux () - in aux () - -let to_seq s k = String.iter k s - -let of_seq seq = - let b= Buffer.create 32 in - seq (Buffer.add_char b); - Buffer.contents b - -let pp = Buffer.add_string diff --git a/string/CCString.mli b/string/CCString.mli deleted file mode 100644 index 50ba1bcf..00000000 --- a/string/CCString.mli +++ /dev/null @@ -1,70 +0,0 @@ - -(* -copyright (c) 2013-2014, simon cruanes -all rights reserved. - -redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. redistributions in binary -form must reproduce the above copyright notice, this list of conditions and the -following disclaimer in the documentation and/or other materials provided with -the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*) - -(** {1 Basic String Utils} -Consider using KMP instead. *) - -type t = string - -val equal : t -> t -> bool - -val compare : t -> t -> int - -val hash : t -> int - -type 'a gen = unit -> 'a option -type 'a sequence = ('a -> unit) -> unit - -val is_sub : sub:t -> int -> t -> int -> bool -(** [is_sub ~sub i s j] returns [true] iff [sub] is a substring of [s] starting - at position [j] *) - -val split : by:t -> t -> t list -(** split the given string along the given separator [by]. Should only - be used with very small separators, otherwise use {!KMP}. - @raise Failure if [by = ""] *) - -val split_gen : by:t -> t -> t gen - -val split_seq : by:t -> t -> t sequence - -val find : ?start:int -> sub:t -> t -> int -(** Find [sub] in the string, returns its first index or -1. - Should only be used with very small [sub] *) - -val repeat : t -> int -> t -(** The same string, repeated n times *) - -val prefix : pre:t -> t -> bool -(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *) - -val to_gen : t -> char gen -val of_gen : char gen -> t - -val to_seq : t -> char sequence -val of_seq : char sequence -> t - -val pp : Buffer.t -> t -> unit