(* This file is free software, part of containers. See file "license" for more details. *) (** {1 Basic String Utils} Consider using {!Containers_string.KMP} for pattern search, or Regex libraries. *) type 'a gen = unit -> 'a option type 'a sequence = ('a -> unit) -> unit type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist] (** {2 Common Signature} *) module type S = sig type t val length : t -> int val blit : t -> int -> Bytes.t -> int -> int -> unit (** Similar to {!String.blit}. Compatible with the [-safe-string] option. @raise Invalid_argument if indices are not valid *) (* val blit_immut : t -> int -> t -> int -> int -> string (** Immutable version of {!blit}, returning a new string. [blit a i b j len] is the same as [b], but in which the range [j, ..., j+len] is replaced by [a.[i], ..., a.[i + len]]. @raise Invalid_argument if indices are not valid *) *) val fold : ('a -> char -> 'a) -> 'a -> t -> 'a (** Fold on chars by increasing index. @since 0.7 *) (** {2 Conversions} *) val to_gen : t -> char gen val to_seq : t -> char sequence val to_klist : t -> char klist val to_list : t -> char list val pp : Buffer.t -> t -> unit val print : Format.formatter -> t -> unit (** Print the string within quotes *) end (** {2 Strings} *) include module type of String val equal : string -> string -> bool val compare : string -> string -> int val hash : string -> int val init : int -> (int -> char) -> string (** Analog to [Array.init]. @since 0.3.3 *) (*$T init 3 (fun i -> [|'a'; 'b'; 'c'|].(i)) = "abc" init 0 (fun _ -> assert false) = "" *) val rev : string -> string (** [rev s] returns the reverse of [s] @since 0.17 *) (*$Q Q.printable_string (fun s -> s = rev (rev s)) Q.printable_string (fun s -> length s = length (rev s)) *) (*$= "abc" (rev "cba") "" (rev "") " " (rev " ") *) val pad : ?side:[`Left|`Right] -> ?c:char -> int -> string -> string (** [pad n str] ensures that [str] is at least [n] bytes long, and pads it on the [side] with [c] if it's not the case. @param side determines where padding occurs (default: [`Left]) @param c the char used to pad (default: ' ') @since 0.17 *) (*$= & ~printer:Q.Print.string " 42" (pad 4 "42") "0042" (pad ~c:'0' 4 "42") "4200" (pad ~side:`Right ~c:'0' 4 "42") "hello" (pad 4 "hello") "aaa" (pad ~c:'a' 3 "") "aaa" (pad ~side:`Right ~c:'a' 3 "") *) val of_char : char -> string (** [of_char 'a' = "a"] @since 0.19 *) val of_gen : char gen -> string val of_seq : char sequence -> string val of_klist : char klist -> string val of_list : char list -> string val of_array : char array -> string (*$T of_list ['a'; 'b'; 'c'] = "abc" of_list [] = "" *) val to_array : string -> char array val find : ?start:int -> sub:string -> string -> int (** Find [sub] in string, returns its first index or [-1]. *) (*$= & ~printer:string_of_int 1 (find ~sub:"bc" "abcd") ~-1 (find ~sub:"bc" "abd") 1 (find ~sub:"a" "_a_a_a_") 6 (find ~sub:"a" ~start:5 "a1a234a") *) (*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = find ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) *) val find_all : ?start:int -> sub:string -> string -> int gen (** [find_all ~sub s] finds all occurrences of [sub] in [s], even overlapping instances. @param start starting position in [s] @since 0.17 *) val find_all_l : ?start:int -> sub:string -> string -> int list (** [find_all ~sub s] finds all occurrences of [sub] in [s] and returns them in a list @param start starting position in [s] @since 0.17 *) (*$= & ~printer:Q.Print.(list int) [1; 6] (find_all_l ~sub:"bc" "abc aabc aab") [] (find_all_l ~sub:"bc" "abd") [76] (find_all_l ~sub:"aaaaaa" \ "aabbaabbaaaaabbbbabababababbbbabbbabbaaababbbaaabaabbaabbaaaabbababaaaabbaabaaaaaabbbaaaabababaabaaabbaabaaaabbababbaabbaaabaabbabababbbaabababaaabaaababbbaaaabbbaabaaababbabaababbaabbaaaaabababbabaababbbaaabbabbabababaaaabaaababaaaaabbabbaabbabbbbbbbbbbbbbbaabbabbbbbabbaaabbabbbbabaaaaabbababbbaaaa") *) val mem : ?start:int -> sub:string -> string -> bool (** [mem ~sub s] is true iff [sub] is a substring of [s] @since 0.12 *) (*$T mem ~sub:"bc" "abcd" not (mem ~sub:"a b" "abcd") *) val rfind : sub:string -> string -> int (** Find [sub] in string from the right, returns its first index or [-1]. Should only be used with very small [sub] @since 0.12 *) (*$= & ~printer:string_of_int 1 (rfind ~sub:"bc" "abcd") ~-1 (rfind ~sub:"bc" "abd") 5 (rfind ~sub:"a" "_a_a_a_") 4 (rfind ~sub:"bc" "abcdbcd") 6 (rfind ~sub:"a" "a1a234a") *) (*$Q & ~count:10_000 Q.(pair printable_string printable_string) (fun (s1,s2) -> \ let i = rfind ~sub:s2 s1 in \ i < 0 || String.sub s1 i (length s2) = s2) *) val replace : ?which:[`Left|`Right|`All] -> sub:string -> by:string -> string -> string (** [replace ~sub ~by s] replaces some occurrences of [sub] by [by] in [s] @param which decides whether the occurrences to replace are: {ul {- [`Left] first occurrence from the left (beginning)} {- [`Right] first occurrence from the right (end)} {- [`All] all occurrences (default)} } @raise Invalid_argument if [sub = ""] @since 0.14 *) (*$= & ~printer:CCFun.id (replace ~which:`All ~sub:"a" ~by:"b" "abcdabcd") "bbcdbbcd" (replace ~which:`Left ~sub:"a" ~by:"b" "abcdabcd") "bbcdabcd" (replace ~which:`Right ~sub:"a" ~by:"b" "abcdabcd") "abcdbbcd" (replace ~which:`All ~sub:"ab" ~by:"hello" " abab cdabb a") \ " hellohello cdhellob a" (replace ~which:`Left ~sub:"ab" ~by:"nope" " a b c d ") " a b c d " (replace ~sub:"a" ~by:"b" "1aa234a") "1bb234b" *) val is_sub : sub:string -> int -> string -> int -> len:int -> bool (** [is_sub ~sub i s j ~len] returns [true] iff the substring of [sub] starting at position [i] and of length [len] is a substring of [s] starting at position [j] *) val repeat : string -> int -> string (** The same string, repeated n times *) val prefix : pre:string -> string -> bool (** [prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *) (*$T prefix ~pre:"aab" "aabcd" not (prefix ~pre:"ab" "aabcd") not (prefix ~pre:"abcd" "abc") *) val suffix : suf:string -> string -> bool (** [suffix ~suf s] returns [true] iff [suf] is a suffix of [s] @since 0.7 *) (*$T suffix ~suf:"cd" "abcd" not (suffix ~suf:"cd" "abcde") not (suffix ~suf:"abcd" "cd") *) val chop_prefix : pre:string -> string -> string option (** [chop_pref ~pre s] removes [pre] from [s] if [pre] really is a prefix of [s], returns [None] otherwise @since 0.17 *) (*$= & ~printer:Q.Print.(option string) (Some "cd") (chop_prefix ~pre:"aab" "aabcd") None (chop_prefix ~pre:"ab" "aabcd") None (chop_prefix ~pre:"abcd" "abc") *) val chop_suffix : suf:string -> string -> string option (** [chop_suffix ~suf s] removes [suf] from [s] if [suf] really is a suffix of [s], returns [None] otherwise @since 0.17 *) (*$= & ~printer:Q.Print.(option string) (Some "ab") (chop_suffix ~suf:"cd" "abcd") None (chop_suffix ~suf:"cd" "abcde") None (chop_suffix ~suf:"abcd" "cd") *) val take : int -> string -> string (** [take n s] keeps only the [n] first chars of [s] @since 0.17 *) val drop : int -> string -> string (** [drop n s] removes the [n] first chars of [s] @since 0.17 *) val take_drop : int -> string -> string * string (** [take_drop n s = take n s, drop n s] @since 0.17 *) (*$= ("ab", "cd") (take_drop 2 "abcd") ("abc", "") (take_drop 3 "abc") ("abc", "") (take_drop 5 "abc") *) val lines : string -> string list (** [lines s] returns a list of the lines of [s] (splits along '\n') @since 0.10 *) val lines_gen : string -> string gen (** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') @since 0.10 *) val concat_gen : sep:string -> string gen -> string (** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. @since 0.10 *) val unlines : string list -> string (** [unlines l] concatenates all strings of [l], separated with '\n' @since 0.10 *) val unlines_gen : string gen -> string (** [unlines_gen g] concatenates all strings of [g], separated with '\n' @since 0.10 *) (*$Q Q.printable_string (fun s -> unlines (lines s) = s) Q.printable_string (fun s -> unlines_gen (lines_gen s) = s) *) val set : string -> int -> char -> string (** [set s i c] creates a new string which is a copy of [s], except for index [i], which becomes [c]. @raise Invalid_argument if [i] is an invalid index @since 0.12 *) (*$T set "abcd" 1 '_' = "a_cd" set "abcd" 0 '-' = "-bcd" (try ignore (set "abc" 5 '_'); false with Invalid_argument _ -> true) *) val iter : (char -> unit) -> string -> unit (** Alias to {!String.iter} @since 0.12 *) val iteri : (int -> char -> unit) -> string -> unit (** Iter on chars with their index @since 0.12 *) val map : (char -> char) -> string -> string (** Map chars @since 0.12 *) val mapi : (int -> char -> char) -> string -> string (** Map chars with their index @since 0.12 *) val filter_map : (char -> char option) -> string -> string (** @since 0.17 *) (*$= & ~printer:Q.Print.string "bcef" (filter_map \ (function 'c' -> None | c -> Some (Char.chr (Char.code c + 1))) "abcde") *) val filter : (char -> bool) -> string -> string (** @since 0.17 *) (*$= & ~printer:Q.Print.string "abde" (filter (function 'c' -> false | _ -> true) "abcdec") *) (*$Q Q.printable_string (fun s -> filter (fun _ -> true) s = s) *) val flat_map : ?sep:string -> (char -> string) -> string -> string (** Map each chars to a string, then concatenates them all @param sep optional separator between each generated string @since 0.12 *) val for_all : (char -> bool) -> string -> bool (** True for all chars? @since 0.12 *) val exists : (char -> bool) -> string -> bool (** True for some char? @since 0.12 *) include S with type t := string val ltrim : t -> t (** trim space on the left (see {!String.trim} for more details) @since NEXT_RELEASE *) val rtrim : t -> t (** trim space on the right (see {!String.trim} for more details) @since NEXT_RELEASE *) (*$= & ~printer:id "abc " (ltrim " abc ") " abc" (rtrim " abc ") *) (*$Q Q.(printable_string) (fun s -> \ String.trim s = (s |> ltrim |> rtrim)) Q.(printable_string) (fun s -> ltrim s = ltrim (ltrim s)) Q.(printable_string) (fun s -> rtrim s = rtrim (rtrim s)) Q.(printable_string) (fun s -> \ let s' = ltrim s in \ if s'="" then Q.assume_fail() else s'.[0] <> ' ') Q.(printable_string) (fun s -> \ let s' = rtrim s in \ if s'="" then Q.assume_fail() else s'.[String.length s'-1] <> ' ') *) (** {2 Operations on 2 strings} *) val map2 : (char -> char -> char) -> string -> string -> string (** Map pairs of chars @raise Invalid_argument if the strings have not the same length @since 0.12 *) val iter2: (char -> char -> unit) -> string -> string -> unit (** Iterate on pairs of chars @raise Invalid_argument if the strings have not the same length @since 0.12 *) val iteri2: (int -> char -> char -> unit) -> string -> string -> unit (** Iterate on pairs of chars with their index @raise Invalid_argument if the strings have not the same length @since 0.12 *) val fold2: ('a -> char -> char -> 'a) -> 'a -> string -> string -> 'a (** Fold on pairs of chars @raise Invalid_argument if the strings have not the same length @since 0.12 *) val for_all2 : (char -> char -> bool) -> string -> string -> bool (** All pairs of chars respect the predicate? @raise Invalid_argument if the strings have not the same length @since 0.12 *) val exists2 : (char -> char -> bool) -> string -> string -> bool (** Exists a pair of chars? @raise Invalid_argument if the strings have not the same length @since 0.12 *) (** {2 Ascii functions} Those functions are deprecated in {!String} since 4.03, so we provide a stable alias for them even in older versions *) val capitalize_ascii : string -> string (** See {!String}. @since 0.18 *) val uncapitalize_ascii : string -> string (** See {!String}. @since 0.18 *) val uppercase_ascii : string -> string (** See {!String}. @since 0.18 *) val lowercase_ascii : string -> string (** See {!String}. @since 0.18 *) val equal_caseless : string -> string -> bool (** Comparison without respect to {b ascii} lowercase. @since NEXT_RELEASE *) (*$T equal_caseless "foo" "FoO" equal_caseless "helLo" "HEllO" *) (*$Q Q.(pair printable_string printable_string) (fun (s1,s2) -> \ equal_caseless s1 s2 = (lowercase_ascii s1=lowercase_ascii s2)) Q.(printable_string) (fun s -> equal_caseless s s) Q.(printable_string) (fun s -> equal_caseless (uppercase_ascii s) s) *) (** {2 Finding} A relatively efficient algorithm for finding sub-strings @since 1.0 *) module Find : sig type _ pattern val compile : string -> [ `Direct ] pattern val rcompile : string -> [ `Reverse ] pattern val find : ?start:int -> pattern:[`Direct] pattern -> string -> int (** Search for [pattern] in the string, left-to-right @return the offset of the first match, -1 otherwise @param start offset in string at which we start *) val rfind : ?start:int -> pattern:[`Reverse] pattern -> string -> int (** Search for [pattern] in the string, right-to-left @return the offset of the start of the first match from the right, -1 otherwise @param start right-offset in string at which we start *) end (** {2 Splitting} *) module Split : sig val list_ : by:string -> string -> (string*int*int) list (** Eplit the given string along the given separator [by]. Should only be used with very small separators, otherwise use {!Containers_string.KMP}. @return a list of slices [(s,index,length)] that are separated by [by]. {!String.sub} can then be used to actually extract a string from the slice. @raise Failure if [by = ""] *) val gen : by:string -> string -> (string*int*int) gen val seq : by:string -> string -> (string*int*int) sequence val klist : by:string -> string -> (string*int*int) klist (** {6 Copying functions} Those split functions actually copy the substrings, which can be more convenient but less efficient in general *) val list_cpy : by:string -> string -> string list (*$T Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""] Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"] *) val gen_cpy : by:string -> string -> string gen val seq_cpy : by:string -> string -> string sequence val klist_cpy : by:string -> string -> string klist val left : by:string -> string -> (string * string) option (** Split on the first occurrence of [by] from the leftmost part of the string @since 0.12 *) val left_exn : by:string -> string -> string * string (** Split on the first occurrence of [by] from the leftmost part of the string @raise Not_found if [by] is not part of the string @since 0.16 *) (*$T Split.left ~by:" " "ab cde f g " = Some ("ab", "cde f g ") Split.left ~by:"__" "a__c__e_f" = Some ("a", "c__e_f") Split.left ~by:"_" "abcde" = None Split.left ~by:"bb" "abbc" = Some ("a", "c") Split.left ~by:"a_" "abcde" = None *) val right : by:string -> string -> (string * string) option (** Split on the first occurrence of [by] from the rightmost part of the string @since 0.12 *) val right_exn : by:string -> string -> string * string (** Split on the first occurrence of [by] from the rightmost part of the string @raise Not_found if [by] is not part of the string @since 0.16 *) (*$T Split.right ~by:" " "ab cde f g" = Some ("ab cde f", "g") Split.right ~by:"__" "a__c__e_f" = Some ("a__c", "e_f") Split.right ~by:"_" "abcde" = None Split.right ~by:"a_" "abcde" = None *) end val split_on_char : char -> string -> string list (** Split the string along the given char @since NEXT_RELEASE *) (*$= & ~printer:Q.Print.(list string) ["a"; "few"; "words"; "from"; "our"; "sponsors"] \ (split_on_char ' ' "a few words from our sponsors") *) (*$Q Q.(printable_string) (fun s -> \ let s = split_on_char ' ' s |> String.concat " " in \ s = (split_on_char ' ' s |> String.concat " ")) *) val split : by:string -> string -> string list (** Alias to {!Split.list_cpy} @since NEXT_RELEASE *) (** {2 Utils} *) val compare_versions : string -> string -> int (** [compare_versions a b] compares {i version strings} [a] and [b], considering that numbers are above text. @since 0.13 *) (*$T compare_versions "0.1.3" "0.1" > 0 compare_versions "10.1" "2.0" > 0 compare_versions "0.1.alpha" "0.1" > 0 compare_versions "0.3.dev" "0.4" < 0 compare_versions "0.foo" "0.0" < 0 compare_versions "1.2.3.4" "01.2.4.3" < 0 *) (*$Q Q.(pair printable_string printable_string) (fun (a,b) -> \ CCOrd.equiv (compare_versions a b) (CCOrd.opp compare_versions b a)) *) val edit_distance : string -> string -> int (** Edition distance between two strings. This satisfies the classical distance axioms: it is always positive, symmetric, and satisfies the formula [distance a b + distance b c >= distance a c] *) (*$Q Q.(string_of_size Gen.(0 -- 30)) (fun s -> \ edit_distance s s = 0) *) (* test that building a from s, and mutating one char of s, yields a string s' that is accepted by a. --> generate triples (s, i, c) where c is a char, s a non empty string and i a valid index in s *) (*$QR ( let gen = Q.Gen.( 3 -- 10 >>= fun len -> 0 -- (len-1) >>= fun i -> string_size (return len) >>= fun s -> char >|= fun c -> (s,i,c) ) in let small (s,_,_) = String.length s in Q.make ~small gen ) (fun (s,i,c) -> let s' = Bytes.of_string s in Bytes.set s' i c; edit_distance s (Bytes.to_string s') <= 1) *) (** {2 Slices} A contiguous part of a string *) module Sub : sig type t = string * int * int (** A string, an offset, and the length of the slice *) val make : string -> int -> len:int -> t val full : string -> t (** Full string *) val copy : t -> string (** Make a copy of the substring *) val underlying : t -> string val sub : t -> int -> int -> t (** Sub-slice *) val get : t -> int -> char (** [get s i] gets the [i]-th element, or fails @raise Invalid_argument if the index is not within [0... length -1] @since NEXT_RELEASE *) include S with type t := t (*$T let s = Sub.make "abcde" 1 3 in \ Sub.fold (fun acc x -> x::acc) [] s = ['d'; 'c'; 'b'] Sub.make "abcde" 1 3 |> Sub.copy = "bcd" Sub.full "abcde" |> Sub.copy = "abcde" *) (*$T let sub = Sub.make " abc " 1 ~len:3 in \ "\"abc\"" = (CCFormat.to_string Sub.print sub) *) (*$= & ~printer:(String.make 1) 'b' Sub.(get (make "abc" 1 ~len:2) 0) 'c' Sub.(get (make "abc" 1 ~len:2) 1) *) (*$QR Q.(printable_string_of_size Gen.(3--10)) (fun s -> let open Sequence.Infix in begin (0 -- (length s-2) >|= fun i -> i, Sub.make s i ~len:(length s-i)) >>= fun (i,sub) -> (0 -- (Sub.length sub-1) >|= fun j -> i,j,sub) end |> Sequence.for_all (fun (i,j,sub) -> Sub.get sub j = s.[i+j])) *) end