diff --git a/src/core/CCString.cppo.ml b/src/core/CCString.cppo.ml index 9eb3c329..8cc09ddd 100644 --- a/src/core/CCString.cppo.ml +++ b/src/core/CCString.cppo.ml @@ -309,6 +309,14 @@ let replace ?(which=`All) ~sub ~by s = Buffer.contents b module Split = struct + type drop_if_empty = { + first: bool; + last: bool; + } + + let no_drop = {first=false; last=false} + let default_drop = no_drop + type split_state = | SplitStop | SplitAt of int (* previous *) @@ -316,6 +324,7 @@ module Split = struct let rec _split ~by s state = match state with | SplitStop -> None | SplitAt prev -> _split_search ~by s prev + and _split_search ~by s prev = let j = Find.find ~pattern:by s ~start:prev in if j < 0 @@ -324,54 +333,64 @@ module Split = struct let _tuple3 x y z = x,y,z - let _mkgen ~by s k = + let _mkgen ~drop ~by s k = let state = ref (SplitAt 0) in let by = Find.compile by in - fun () -> + let rec next() = match _split ~by s !state with | None -> None + | Some (state', 0, 0) when drop.first -> state := state'; next() + | Some (_, i, 0) when drop.last && i = length s -> None | Some (state', i, len) -> state := state'; Some (k s i len) + in + next - let gen ~by s = _mkgen ~by s _tuple3 + let gen ?(drop=default_drop) ~by s = _mkgen ~drop ~by s _tuple3 - let gen_cpy ~by s = _mkgen ~by s String.sub + let gen_cpy ?(drop=default_drop) ~by s = _mkgen ~drop ~by s String.sub - let _mklist ~by s k = + let _mklist ~drop ~by s k = let by = Find.compile by in let rec build acc state = match _split ~by s state with | None -> List.rev acc + | Some (state',0,0) when drop.first -> build acc state' + | Some (_, i, 0) when drop.last && i=length s -> List.rev acc | Some (state', i, len) -> build (k s i len ::acc) state' in build [] (SplitAt 0) - let list_ ~by s = _mklist ~by s _tuple3 + let list_ ?(drop=default_drop) ~by s = _mklist ~drop ~by s _tuple3 - let list_cpy ~by s = _mklist ~by s String.sub + let list_cpy ?(drop=default_drop) ~by s = _mklist ~drop ~by s String.sub - let _mkklist ~by s k = + let _mkklist ~drop ~by s k = let by = Find.compile by in let rec make state () = match _split ~by s state with | None -> `Nil + | Some (state', 0, 0) when drop.first -> make state' () + | Some (_, i, 0) when drop.last && i=length s -> `Nil | Some (state', i, len) -> `Cons (k s i len , make state') in make (SplitAt 0) - let klist ~by s = _mkklist ~by s _tuple3 + let klist ?(drop=default_drop) ~by s = _mkklist ~drop ~by s _tuple3 - let klist_cpy ~by s = _mkklist ~by s String.sub + let klist_cpy ?(drop=default_drop) ~by s = _mkklist ~drop ~by s String.sub - let _mkseq ~by s f k = + let _mkseq ~drop ~by s f k = let by = Find.compile by in let rec aux state = match _split ~by s state with | None -> () + | Some (state', 0, 0) when drop.first -> aux state' + | Some (_, i, 0) when drop.last && i=length s -> () | Some (state', i, len) -> k (f s i len); aux state' in aux (SplitAt 0) - let seq ~by s = _mkseq ~by s _tuple3 - let seq_cpy ~by s = _mkseq ~by s String.sub + let seq ?(drop=default_drop) ~by s = _mkseq ~drop ~by s _tuple3 + let seq_cpy ?(drop=default_drop) ~by s = _mkseq ~drop ~by s String.sub let left_exn ~by s = let i = find ~sub:by s in @@ -393,9 +412,9 @@ module Split = struct end let split_on_char c s: _ list = - Split.list_cpy ~by:(String.make 1 c) s + Split.list_cpy ~drop:Split.no_drop ~by:(String.make 1 c) s -let split = Split.list_cpy +let split ~by s = Split.list_cpy ~by s let compare_versions a b = let of_int s = try Some (int_of_string s) with _ -> None in @@ -613,23 +632,43 @@ let of_array a = let to_array s = Array.init (String.length s) (fun i -> s.[i]) -let lines_gen s = Split.gen_cpy ~by:"\n" s +let lines_gen s = Split.gen_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s -let lines s = Split.list_cpy ~by:"\n" s +let lines s = Split.list_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s -let concat_gen ~sep g = +let concat_gen_buf ~sep g : Buffer.t = let b = Buffer.create 256 in let rec aux ~first () = match g () with - | None -> Buffer.contents b + | None -> b | Some s -> if not first then Buffer.add_string b sep; Buffer.add_string b s; aux ~first:false () in aux ~first:true () -let unlines l = String.concat "\n" l +let concat_gen ~sep g = + let buf = concat_gen_buf ~sep g in + Buffer.contents buf -let unlines_gen g = concat_gen ~sep:"\n" g +let unlines l = + let len = List.fold_left (fun n s -> n + 1 + String.length s) 0 l in + let buf = Bytes.create len in + let rec aux_blit i l = match l with + | [] -> + assert (i=len); + Bytes.to_string buf + | s :: tail -> + let len_s = String.length s in + Bytes.blit_string s 0 buf i len_s; + Bytes.set buf (i+len_s) '\n'; + aux_blit (i+len_s+1) tail + in + aux_blit 0 l + +let unlines_gen g = + let buf = concat_gen_buf ~sep:"\n" g in + Buffer.add_char buf '\n'; + Buffer.contents buf let set s i c = if i<0 || i>= String.length s then invalid_arg "CCString.set"; diff --git a/src/core/CCString.mli b/src/core/CCString.mli index e3ffb55b..7df41f5a 100644 --- a/src/core/CCString.mli +++ b/src/core/CCString.mli @@ -281,6 +281,14 @@ val lines_gen : string -> string gen (** [lines_gen s] returns a generator of the lines of [s] (splits along '\n') @since 0.10 *) +(*$= & ~printer:Q.Print.(list @@ Printf.sprintf "%S") + ["ab"; "c"] (lines "ab\nc") + ["ab"; "c"] (lines "ab\nc\n") + [] (lines "") + [""] (lines "\n") + [""; "a"] (lines "\na") +*) + val concat_gen : sep:string -> string gen -> string (** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep]. @since 0.10 *) @@ -293,9 +301,20 @@ val unlines_gen : string gen -> string (** [unlines_gen g] concatenates all strings of [g], separated with '\n' @since 0.10 *) +(*$= & ~printer:CCFun.id + "" (unlines []) + "ab\nc\n" (unlines ["ab"; "c"]) +*) + (*$Q - Q.printable_string (fun s -> unlines (lines s) = s) - Q.printable_string (fun s -> unlines_gen (lines_gen s) = s) + Q.printable_string (fun s -> trim (unlines (lines s)) = trim s) + Q.printable_string (fun s -> trim (unlines_gen (lines_gen s)) = trim s) +*) + +(*$Q + Q.(list string) (fun l -> \ + let l = unlines l |> lines in \ + l = (unlines l |> lines)) *) val set : string -> int -> char -> string @@ -477,7 +496,26 @@ end (** {2 Splitting} *) module Split : sig - val list_ : by:string -> string -> (string*int*int) list + (** Specification of what to do with empty blocks, as in [split ~by:"-" "-a-b-"]. + + - [{first=false; last=false}] will return [""; "a"; "b"; ""] + - [{first=true; last=false}] will return ["a"; "b" ""] + - [{first=false; last=true}] will return [""; "a"; "b"] + - [{first=true; last=true}] will return ["a"; "b"] + + The default value of all remaining functions is [Drop_none]. + @since NEXT_RELEASE + *) + type drop_if_empty = { + first: bool; + last: bool; + } + + val no_drop : drop_if_empty + (** Do not drop any group, even empty and on borders + @since NEXT_RELEASE *) + + val list_ : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) list (** Eplit the given string along the given separator [by]. Should only be used with very small separators, otherwise use {!Containers_string.KMP}. @@ -486,18 +524,18 @@ module Split : sig a string from the slice. @raise Failure if [by = ""] *) - val gen : by:string -> string -> (string*int*int) gen + val gen : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) gen - val seq : by:string -> string -> (string*int*int) sequence + val seq : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) sequence - val klist : by:string -> string -> (string*int*int) klist + val klist : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) klist (** {6 Copying functions} Those split functions actually copy the substrings, which can be more convenient but less efficient in general *) - val list_cpy : by:string -> string -> string list + val list_cpy : ?drop:drop_if_empty -> by:string -> string -> string list (*$T Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"] @@ -505,11 +543,11 @@ module Split : sig Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"] *) - val gen_cpy : by:string -> string -> string gen + val gen_cpy : ?drop:drop_if_empty -> by:string -> string -> string gen - val seq_cpy : by:string -> string -> string sequence + val seq_cpy : ?drop:drop_if_empty -> by:string -> string -> string sequence - val klist_cpy : by:string -> string -> string klist + val klist_cpy : ?drop:drop_if_empty -> by:string -> string -> string klist val left : by:string -> string -> (string * string) option (** Split on the first occurrence of [by] from the leftmost part of