From 7822f3a0451c6554df8359586501713235081e45 Mon Sep 17 00:00:00 2001 From: Christopher Zimmermann Date: Thu, 11 Oct 2018 17:38:54 +0200 Subject: [PATCH] Create CCStringLabels CAVE: there is a breaking interface change for String.is_sub --- src/core/CCArrayLabels.ml | 17 -- src/core/CCListLabels.ml | 14 -- src/core/CCString.mli | 8 +- src/core/CCStringLabels.ml | 4 + src/core/CCStringLabels.mli | 472 ++++++++++++++++++++++++++++++++++++ src/core/containers.ml | 1 + src/core/dune | 6 + 7 files changed, 488 insertions(+), 34 deletions(-) create mode 100644 src/core/CCStringLabels.ml create mode 100644 src/core/CCStringLabels.mli diff --git a/src/core/CCArrayLabels.ml b/src/core/CCArrayLabels.ml index f100fe0e..fdf51544 100644 --- a/src/core/CCArrayLabels.ml +++ b/src/core/CCArrayLabels.ml @@ -1,21 +1,4 @@ (* This file is free software, part of containers. See file "license" for more details. *) -(* fallback function *) -external make_float : int -> float array = "caml_make_float_vect" - include CCArray - -(*$inject - module type A = module type of CCArray - module type AL = module type of CCArrayLabels -*) - -(*$R - ignore (module CCArrayLabels : A) -*) - -(*$R - ignore (module CCArray : AL) -*) - diff --git a/src/core/CCListLabels.ml b/src/core/CCListLabels.ml index 282134b4..e2f676b6 100644 --- a/src/core/CCListLabels.ml +++ b/src/core/CCListLabels.ml @@ -2,17 +2,3 @@ (* This file is free software, part of containers. See file "license" for more details. *) include CCList - -(*$inject - module type L = module type of CCList - module type LL = module type of CCListLabels -*) - -(*$R - ignore (module CCListLabels : L) -*) - -(*$R - ignore (module CCList : LL) -*) - diff --git a/src/core/CCString.mli b/src/core/CCString.mli index eb4734f2..5632b169 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -1,3 +1,5 @@ +(* AUTOGENERATED FROM CCStringLabels.mli *) + (* This file is free software, part of containers. See file "license" for more details. *) @@ -142,7 +144,7 @@ val replace : ?which:[`Left|`Right|`All] -> sub:string -> by:string -> string -> @raise Invalid_argument if [sub = ""]. @since 0.14 *) -val is_sub : sub:string -> int -> string -> int -> len:int -> bool +val is_sub : sub:string -> int -> string -> int -> sub_len:int -> bool (** [is_sub ~sub i s j ~len] returns [true] iff the substring of [sub] starting at position [i] and of length [len] is a substring of [s] starting at position [j]. *) @@ -333,12 +335,12 @@ module Find : sig val rcompile : string -> [ `Reverse ] pattern - val find : ?start:int -> pattern:[`Direct] pattern -> string -> int + val find : ?start:int -> pattern:([`Direct] pattern) -> string -> int (** Search for [pattern] in the string, left-to-right. @return the offset of the first match, -1 otherwise. @param start offset in string at which we start. *) - val rfind : ?start:int -> pattern:[`Reverse] pattern -> string -> int + val rfind : ?start:int -> pattern:([`Reverse] pattern) -> string -> int (** Search for [pattern] in the string, right-to-left. @return the offset of the start of the first match from the right, -1 otherwise. @param start right-offset in string at which we start. *) diff --git a/src/core/CCStringLabels.ml b/src/core/CCStringLabels.ml new file mode 100644 index 00000000..e12f316a --- /dev/null +++ b/src/core/CCStringLabels.ml @@ -0,0 +1,4 @@ + +(* This file is free software, part of containers. See file "license" for more details. *) + +include CCString diff --git a/src/core/CCStringLabels.mli b/src/core/CCStringLabels.mli new file mode 100644 index 00000000..87b710d4 --- /dev/null +++ b/src/core/CCStringLabels.mli @@ -0,0 +1,472 @@ + +(* This file is free software, part of containers. See file "license" for more details. *) + +(** {1 Basic String Utils} *) + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit +type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] + +(** {2 Common Signature} *) + +module type S = sig + type t + + val length : t -> int + (** Return the length (number of characters) of the given string. *) + + val blit : src:t -> src_pos:int -> dst:Bytes.t -> dst_pos:int -> len:int -> unit + (** Like {!String.blit}. + Compatible with the [-safe-string] option. + @raise Invalid_argument if indices are not valid. *) + + (* + val blit_immut : t -> int -> t -> int -> int -> string + (** Immutable version of {!blit}, returning a new string. + [blit a i b j len] is the same as [b], but in which + the range [j, ..., j+len] is replaced by [a.[i], ..., a.[i + len]]. + @raise Invalid_argument if indices are not valid. *) + *) + + val fold : f:('a -> char -> 'a) -> init:'a -> t -> 'a + (** Fold on chars by increasing index. + @since 0.7 *) + + (** {2 Conversions} *) + + val to_gen : t -> char gen + (** Return the [gen] of characters contained in the string. *) + + val to_seq : t -> char sequence + (** Return the [sequence] of characters contained in the string. *) + + val to_klist : t -> char klist + (** Return the [klist] of characters contained in the string. *) + + val to_list : t -> char list + (** Return the list of characters contained in the string. *) + + val pp_buf : Buffer.t -> t -> unit + (** Renamed from [pp] since 2.0. *) + + val pp : Format.formatter -> t -> unit + (** Print the string within quotes. + + Renamed from [print] since 2.0. *) +end + +(** {2 Strings} *) + +include module type of struct include StringLabels end + +val equal : string -> string -> bool +(** Equality function on strings. *) + +val compare : string -> string -> int + +val is_empty : string -> bool +(** [is_empty s] returns [true] iff [s] is empty (i.e. its length is 0). + @since 1.5 *) + +val hash : string -> int + +val init : int -> (int -> char) -> string +(** Like [Array.init]. + @since 0.3.3 *) + +val rev : string -> string +(** [rev s] returns the reverse of [s]. + @since 0.17 *) + +val pad : ?side:[`Left|`Right] -> ?c:char -> int -> string -> string +(** [pad n str] ensures that [str] is at least [n] bytes long, + and pads it on the [side] with [c] if it's not the case. + @param side determines where padding occurs (default: [`Left]). + @param c the char used to pad (default: ' '). + @since 0.17 *) + +val of_char : char -> string +(** [of_char 'a'] is ["a"]. + @since 0.19 *) + +val of_gen : char gen -> string +(** Convert a [gen] of characters to a string. *) + +val of_seq : char sequence -> string +(** Convert a [sequence] of characters to a string. *) + +val of_klist : char klist -> string +(** Convert a [klist] of characters to a string. *) + +val of_list : char list -> string +(** Convert a list of characters to a string. *) + +val of_array : char array -> string +(** Convert an array of characters to a string. *) + +val to_array : string -> char array +(** Return the array of characters contained in the string. *) + +val find : ?start:int -> sub:(string [@keep_label]) -> string -> int +(** Find [sub] in string, returns its first index or [-1]. *) + +val find_all : ?start:int -> sub:(string [@keep_label]) -> string -> int gen +(** [find_all ~sub s] finds all occurrences of [sub] in [s], even overlapping + instances. + @param start starting position in [s]. + @since 0.17 *) + +val find_all_l : ?start:int -> sub:(string [@keep_label]) -> string -> int list +(** [find_all_l ~sub s] finds all occurrences of [sub] in [s] and returns + them in a list. + @param start starting position in [s]. + @since 0.17 *) + +val mem : ?start:int -> sub:(string [@keep_label]) -> string -> bool +(** [mem ~sub s] is [true] iff [sub] is a substring of [s]. + @since 0.12 *) + +val rfind : sub:(string [@keep_label]) -> string -> int +(** Find [sub] in string from the right, returns its first index or [-1]. + Should only be used with very small [sub]. + @since 0.12 *) + +val replace : ?which:[`Left|`Right|`All] -> sub:(string [@keep_label]) -> by:(string [@keep_label]) -> string -> string +(** [replace ~sub ~by s] replaces some occurrences of [sub] by [by] in [s]. + @param which decides whether the occurrences to replace are: + {ul + {- [`Left] first occurrence from the left (beginning).} + {- [`Right] first occurrence from the right (end).} + {- [`All] all occurrences (default).} + } + @raise Invalid_argument if [sub = ""]. + @since 0.14 *) + +val is_sub : sub:(string [@keep_label]) -> sub_pos:int -> string -> pos:int -> sub_len:(int [@keep_label]) -> bool +(** [is_sub ~sub i s j ~len] returns [true] iff the substring of + [sub] starting at position [i] and of length [len] is a substring + of [s] starting at position [j]. *) + +val repeat : string -> int -> string +(** The same string, repeated n times. *) + +val prefix : pre:(string [@keep_label]) -> string -> bool +(** [prefix ~pre s] returns [true] iff [pre] is a prefix of [s]. *) + +val suffix : suf:(string [@keep_label]) -> string -> bool +(** [suffix ~suf s] returns [true] iff [suf] is a suffix of [s]. + @since 0.7 *) + +val chop_prefix : pre:(string [@keep_label]) -> string -> string option +(** [chop_prefix ~pre s] removes [pre] from [s] if [pre] really is a prefix + of [s], returns [None] otherwise. + @since 0.17 *) + +val chop_suffix : suf:(string [@keep_label]) -> string -> string option +(** [chop_suffix ~suf s] removes [suf] from [s] if [suf] really is a suffix + of [s], returns [None] otherwise. + @since 0.17 *) + +val take : int -> string -> string +(** [take n s] keeps only the [n] first chars of [s]. + @since 0.17 *) + +val drop : int -> string -> string +(** [drop n s] removes the [n] first chars of [s]. + @since 0.17 *) + +val take_drop : int -> string -> string * string +(** [take_drop n s = take n s, drop n s]. + @since 0.17 *) + +val lines : string -> string list +(** [lines s] returns a list of the lines of [s] (splits along '\n'). + @since 0.10 *) + +val lines_gen : string -> string gen +(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n'). + @since 0.10 *) + +val concat_gen : sep:(string [@keep_label]) -> string gen -> string +(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. + @since 0.10 *) + +val unlines : string list -> string +(** [unlines l] concatenates all strings of [l], separated with '\n'. + @since 0.10 *) + +val unlines_gen : string gen -> string +(** [unlines_gen g] concatenates all strings of [g], separated with '\n'. + @since 0.10 *) + +val set : string -> int -> char -> string +(** [set s i c] creates a new string which is a copy of [s], except + for index [i], which becomes [c]. + @raise Invalid_argument if [i] is an invalid index. + @since 0.12 *) + +val iter : f:(char -> unit) -> string -> unit +(** Alias to {!String.iter}. + @since 0.12 *) + +val iteri : f:(int -> char -> unit) -> string -> unit +(** Iter on chars with their index. + @since 0.12 *) + +val map : f:(char -> char) -> string -> string +(** Map chars. + @since 0.12 *) + +val mapi : f:(int -> char -> char) -> string -> string +(** Map chars with their index. + @since 0.12 *) + +val filter_map : f:(char -> char option) -> string -> string +(** [filter_map f s] calls [(f a0) (f a1) ... (f an)] where [a0 ... an] are the characters of s. + It returns the string of characters [ci] such as [f ai = Some ci] (when [f] returns [None], + the corresponding element of [s] is discarded). + @since 0.17 *) + +val filter : f:(char -> bool) -> string -> string +(** [filter f s] discards characters not satisfying [f]. + @since 0.17 *) + +val flat_map : ?sep:string -> f:(char -> string) -> string -> string +(** Map each chars to a string, then concatenates them all. + @param sep optional separator between each generated string. + @since 0.12 *) + +val for_all : f:(char -> bool) -> string -> bool +(** True for all chars? + @since 0.12 *) + +val exists : f:(char -> bool) -> string -> bool +(** True for some char? + @since 0.12 *) + +include S with type t := string + +val drop_while : f:(char -> bool) -> t -> t +(** [drop_while f s] discards any characters starting from the left, + up to the first character [c] not satisfying [f c]. + @since 2.2 *) + +val rdrop_while : f:(char -> bool) -> t -> t +(** [rdrop_while f s] discards any characters starting from the right, + up to the first character [c] not satisfying [f c]. + @since 2.2 *) + +val ltrim : t -> t +(** Trim space on the left (see {!String.trim} for more details). + @since 1.2 *) + +val rtrim : t -> t +(** Trim space on the right (see {!String.trim} for more details). + @since 1.2 *) + +(** {2 Operations on 2 strings} *) + +val map2 : f:(char -> char -> char) -> string -> string -> string +(** Map pairs of chars. + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +val iter2: f:(char -> char -> unit) -> string -> string -> unit +(** Iterate on pairs of chars. + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +val iteri2: f:(int -> char -> char -> unit) -> string -> string -> unit +(** Iterate on pairs of chars with their index. + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +val fold2: f:('a -> char -> char -> 'a) -> init:'a -> string -> string -> 'a +(** Fold on pairs of chars. + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +val for_all2 : f:(char -> char -> bool) -> string -> string -> bool +(** All pairs of chars respect the predicate? + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +val exists2 : f:(char -> char -> bool) -> string -> string -> bool +(** Exists a pair of chars? + @raise Invalid_argument if the strings have not the same length. + @since 0.12 *) + +(** {2 Ascii functions} + + Those functions are deprecated in {!String} since 4.03, so we provide + a stable alias for them even in older versions. *) + +val capitalize_ascii : string -> string +(** See {!String}. + @since 0.18 *) + +val uncapitalize_ascii : string -> string +(** See {!String}. + @since 0.18 *) + +val uppercase_ascii : string -> string +(** See {!String}. + @since 0.18 *) + +val lowercase_ascii : string -> string +(** See {!String}. + @since 0.18 *) + +val equal_caseless : string -> string -> bool +(** Comparison without respect to {b ascii} lowercase. + @since 1.2 *) + +(** {2 Finding} + + A relatively efficient algorithm for finding sub-strings. + @since 1.0 *) + +module Find : sig + type _ pattern + + val compile : string -> [ `Direct ] pattern + + val rcompile : string -> [ `Reverse ] pattern + + val find : ?start:int -> pattern:(([`Direct] pattern) [@keep_label]) -> string -> int + (** Search for [pattern] in the string, left-to-right. + @return the offset of the first match, -1 otherwise. + @param start offset in string at which we start. *) + + val rfind : ?start:int -> pattern:(([`Reverse] pattern) [@keep_label]) -> string -> int + (** Search for [pattern] in the string, right-to-left. + @return the offset of the start of the first match from the right, -1 otherwise. + @param start right-offset in string at which we start. *) +end + +(** {2 Splitting} *) + +module Split : sig + (** Specification of what to do with empty blocks, as in [split ~by:"-" "-a-b-"]. + + - [{first=false; last=false}] will return [""; "a"; "b"; ""] + - [{first=true; last=false}] will return ["a"; "b" ""] + - [{first=false; last=true}] will return [""; "a"; "b"] + - [{first=true; last=true}] will return ["a"; "b"] + + The default value of all remaining functions is [Drop_none]. + @since 1.5 + *) + type drop_if_empty = { + first: bool; + last: bool; + } + + val no_drop : drop_if_empty + (** Do not drop any group, even empty and on borders. + @since 1.5 *) + + val list_ : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> (string*int*int) list + (** Split the given string along the given separator [by]. Should only + be used with very small separators, otherwise + use {!Containers_string.KMP}. + @return a list of slices [(s,index,length)] that are + separated by [by]. {!String.sub} can then be used to actually extract + a string from the slice. + @raise Failure if [by = ""]. *) + + val gen : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> (string*int*int) gen + + val seq : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> (string*int*int) sequence + + val klist : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> (string*int*int) klist + + (** {4 Copying functions} + + Those split functions actually copy the substrings, which can be + more convenient but less efficient in general. *) + + val list_cpy : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> string list + + val gen_cpy : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> string gen + + val seq_cpy : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> string sequence + + val klist_cpy : ?drop:drop_if_empty -> by:(string [@keep_label]) -> string -> string klist + + val left : by:(string [@keep_label]) -> string -> (string * string) option + (** Split on the first occurrence of [by] from the leftmost part of + the string. + @since 0.12 *) + + val left_exn : by:(string [@keep_label]) -> string -> string * string + (** Split on the first occurrence of [by] from the leftmost part of the string. + @raise Not_found if [by] is not part of the string. + @since 0.16 *) + + val right : by:(string [@keep_label]) -> string -> (string * string) option + (** Split on the first occurrence of [by] from the rightmost part of + the string. + @since 0.12 *) + + val right_exn : by:(string [@keep_label]) -> string -> string * string + (** Split on the first occurrence of [by] from the rightmost part of the string. + @raise Not_found if [by] is not part of the string. + @since 0.16 *) + +end + +val split_on_char : by:char -> string -> string list +(** Split the string along the given char. + @since 1.2 *) + +val split : by:(string [@keep_label]) -> string -> string list +(** Alias to {!Split.list_cpy}. + @since 1.2 *) + +(** {2 Utils} *) + +val compare_versions : string -> string -> int +(** [compare_versions a b] compares {i version strings} [ a] and [b], + considering that numbers are above text. + @since 0.13 *) + +val compare_natural : string -> string -> int +(** Natural Sort Order, comparing chunks of digits as natural numbers. + https://en.wikipedia.org/wiki/Natural_sort_order + @since 1.3 *) + +val edit_distance : string -> string -> int +(** Edition distance between two strings. This satisfies the classical + distance axioms: it is always positive, symmetric, and satisfies + the formula [distance a b + distance b c >= distance a c]. *) + +(** {2 Slices} + + A contiguous part of a string *) + +module Sub : sig + type t = string * int * int + (** A string, an offset, and the length of the slice. *) + + val make : string -> pos:int -> len:(int [@keep_label]) -> t + + val full : string -> t + (** Full string. *) + + val copy : t -> string + (** Make a copy of the substring. *) + + val underlying : t -> string + + val sub : t -> int -> int -> t + (** Sub-slice. *) + + val get : t -> int -> char + (** [get s i] gets the [i]-th element, or fails. + @raise Invalid_argument if the index is not within [0 ... length - 1]. + @since 1.2 *) + + include S with type t := t + +end diff --git a/src/core/containers.ml b/src/core/containers.ml index b12a4a2c..5648dfa1 100644 --- a/src/core/containers.ml +++ b/src/core/containers.ml @@ -43,6 +43,7 @@ module Ref = CCRef module Result = CCResult module Set = CCSet module String = CCString +module StringLabels = CCStringLabels module Vector = CCVector module Monomorphic = CCMonomorphic module Utf8_string = CCUtf8_string diff --git a/src/core/dune b/src/core/dune index ad4b8831..a803cccc 100644 --- a/src/core/dune +++ b/src/core/dune @@ -10,6 +10,12 @@ (mode promote) (action (run ../unlabel.exe %{deps} %{targets}))) +(rule + (targets CCString.mli) + (deps CCStringLabels.mli) + (mode promote) + (action (run ../unlabel.exe %{deps} %{targets}))) + (library (name containers) (public_name containers)