mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 11:45:31 -05:00
closes #166 by adding a drop parameter to string split functions
This commit is contained in:
parent
2b67a1a679
commit
ed10db67b6
2 changed files with 108 additions and 31 deletions
|
|
@ -309,6 +309,14 @@ let replace ?(which=`All) ~sub ~by s =
|
||||||
Buffer.contents b
|
Buffer.contents b
|
||||||
|
|
||||||
module Split = struct
|
module Split = struct
|
||||||
|
type drop_if_empty = {
|
||||||
|
first: bool;
|
||||||
|
last: bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
let no_drop = {first=false; last=false}
|
||||||
|
let default_drop = no_drop
|
||||||
|
|
||||||
type split_state =
|
type split_state =
|
||||||
| SplitStop
|
| SplitStop
|
||||||
| SplitAt of int (* previous *)
|
| SplitAt of int (* previous *)
|
||||||
|
|
@ -316,6 +324,7 @@ module Split = struct
|
||||||
let rec _split ~by s state = match state with
|
let rec _split ~by s state = match state with
|
||||||
| SplitStop -> None
|
| SplitStop -> None
|
||||||
| SplitAt prev -> _split_search ~by s prev
|
| SplitAt prev -> _split_search ~by s prev
|
||||||
|
|
||||||
and _split_search ~by s prev =
|
and _split_search ~by s prev =
|
||||||
let j = Find.find ~pattern:by s ~start:prev in
|
let j = Find.find ~pattern:by s ~start:prev in
|
||||||
if j < 0
|
if j < 0
|
||||||
|
|
@ -324,54 +333,64 @@ module Split = struct
|
||||||
|
|
||||||
let _tuple3 x y z = x,y,z
|
let _tuple3 x y z = x,y,z
|
||||||
|
|
||||||
let _mkgen ~by s k =
|
let _mkgen ~drop ~by s k =
|
||||||
let state = ref (SplitAt 0) in
|
let state = ref (SplitAt 0) in
|
||||||
let by = Find.compile by in
|
let by = Find.compile by in
|
||||||
fun () ->
|
let rec next() =
|
||||||
match _split ~by s !state with
|
match _split ~by s !state with
|
||||||
| None -> None
|
| None -> None
|
||||||
|
| Some (state', 0, 0) when drop.first -> state := state'; next()
|
||||||
|
| Some (_, i, 0) when drop.last && i = length s -> None
|
||||||
| Some (state', i, len) ->
|
| Some (state', i, len) ->
|
||||||
state := state';
|
state := state';
|
||||||
Some (k s i len)
|
Some (k s i len)
|
||||||
|
in
|
||||||
|
next
|
||||||
|
|
||||||
let gen ~by s = _mkgen ~by s _tuple3
|
let gen ?(drop=default_drop) ~by s = _mkgen ~drop ~by s _tuple3
|
||||||
|
|
||||||
let gen_cpy ~by s = _mkgen ~by s String.sub
|
let gen_cpy ?(drop=default_drop) ~by s = _mkgen ~drop ~by s String.sub
|
||||||
|
|
||||||
let _mklist ~by s k =
|
let _mklist ~drop ~by s k =
|
||||||
let by = Find.compile by in
|
let by = Find.compile by in
|
||||||
let rec build acc state = match _split ~by s state with
|
let rec build acc state = match _split ~by s state with
|
||||||
| None -> List.rev acc
|
| None -> List.rev acc
|
||||||
|
| Some (state',0,0) when drop.first -> build acc state'
|
||||||
|
| Some (_, i, 0) when drop.last && i=length s -> List.rev acc
|
||||||
| Some (state', i, len) ->
|
| Some (state', i, len) ->
|
||||||
build (k s i len ::acc) state'
|
build (k s i len ::acc) state'
|
||||||
in
|
in
|
||||||
build [] (SplitAt 0)
|
build [] (SplitAt 0)
|
||||||
|
|
||||||
let list_ ~by s = _mklist ~by s _tuple3
|
let list_ ?(drop=default_drop) ~by s = _mklist ~drop ~by s _tuple3
|
||||||
|
|
||||||
let list_cpy ~by s = _mklist ~by s String.sub
|
let list_cpy ?(drop=default_drop) ~by s = _mklist ~drop ~by s String.sub
|
||||||
|
|
||||||
let _mkklist ~by s k =
|
let _mkklist ~drop ~by s k =
|
||||||
let by = Find.compile by in
|
let by = Find.compile by in
|
||||||
let rec make state () = match _split ~by s state with
|
let rec make state () = match _split ~by s state with
|
||||||
| None -> `Nil
|
| None -> `Nil
|
||||||
|
| Some (state', 0, 0) when drop.first -> make state' ()
|
||||||
|
| Some (_, i, 0) when drop.last && i=length s -> `Nil
|
||||||
| Some (state', i, len) ->
|
| Some (state', i, len) ->
|
||||||
`Cons (k s i len , make state')
|
`Cons (k s i len , make state')
|
||||||
in make (SplitAt 0)
|
in make (SplitAt 0)
|
||||||
|
|
||||||
let klist ~by s = _mkklist ~by s _tuple3
|
let klist ?(drop=default_drop) ~by s = _mkklist ~drop ~by s _tuple3
|
||||||
|
|
||||||
let klist_cpy ~by s = _mkklist ~by s String.sub
|
let klist_cpy ?(drop=default_drop) ~by s = _mkklist ~drop ~by s String.sub
|
||||||
|
|
||||||
let _mkseq ~by s f k =
|
let _mkseq ~drop ~by s f k =
|
||||||
let by = Find.compile by in
|
let by = Find.compile by in
|
||||||
let rec aux state = match _split ~by s state with
|
let rec aux state = match _split ~by s state with
|
||||||
| None -> ()
|
| None -> ()
|
||||||
|
| Some (state', 0, 0) when drop.first -> aux state'
|
||||||
|
| Some (_, i, 0) when drop.last && i=length s -> ()
|
||||||
| Some (state', i, len) -> k (f s i len); aux state'
|
| Some (state', i, len) -> k (f s i len); aux state'
|
||||||
in aux (SplitAt 0)
|
in aux (SplitAt 0)
|
||||||
|
|
||||||
let seq ~by s = _mkseq ~by s _tuple3
|
let seq ?(drop=default_drop) ~by s = _mkseq ~drop ~by s _tuple3
|
||||||
let seq_cpy ~by s = _mkseq ~by s String.sub
|
let seq_cpy ?(drop=default_drop) ~by s = _mkseq ~drop ~by s String.sub
|
||||||
|
|
||||||
let left_exn ~by s =
|
let left_exn ~by s =
|
||||||
let i = find ~sub:by s in
|
let i = find ~sub:by s in
|
||||||
|
|
@ -393,9 +412,9 @@ module Split = struct
|
||||||
end
|
end
|
||||||
|
|
||||||
let split_on_char c s: _ list =
|
let split_on_char c s: _ list =
|
||||||
Split.list_cpy ~by:(String.make 1 c) s
|
Split.list_cpy ~drop:Split.no_drop ~by:(String.make 1 c) s
|
||||||
|
|
||||||
let split = Split.list_cpy
|
let split ~by s = Split.list_cpy ~by s
|
||||||
|
|
||||||
let compare_versions a b =
|
let compare_versions a b =
|
||||||
let of_int s = try Some (int_of_string s) with _ -> None in
|
let of_int s = try Some (int_of_string s) with _ -> None in
|
||||||
|
|
@ -613,23 +632,43 @@ let of_array a =
|
||||||
let to_array s =
|
let to_array s =
|
||||||
Array.init (String.length s) (fun i -> s.[i])
|
Array.init (String.length s) (fun i -> s.[i])
|
||||||
|
|
||||||
let lines_gen s = Split.gen_cpy ~by:"\n" s
|
let lines_gen s = Split.gen_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s
|
||||||
|
|
||||||
let lines s = Split.list_cpy ~by:"\n" s
|
let lines s = Split.list_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s
|
||||||
|
|
||||||
let concat_gen ~sep g =
|
let concat_gen_buf ~sep g : Buffer.t =
|
||||||
let b = Buffer.create 256 in
|
let b = Buffer.create 256 in
|
||||||
let rec aux ~first () = match g () with
|
let rec aux ~first () = match g () with
|
||||||
| None -> Buffer.contents b
|
| None -> b
|
||||||
| Some s ->
|
| Some s ->
|
||||||
if not first then Buffer.add_string b sep;
|
if not first then Buffer.add_string b sep;
|
||||||
Buffer.add_string b s;
|
Buffer.add_string b s;
|
||||||
aux ~first:false ()
|
aux ~first:false ()
|
||||||
in aux ~first:true ()
|
in aux ~first:true ()
|
||||||
|
|
||||||
let unlines l = String.concat "\n" l
|
let concat_gen ~sep g =
|
||||||
|
let buf = concat_gen_buf ~sep g in
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
let unlines_gen g = concat_gen ~sep:"\n" g
|
let unlines l =
|
||||||
|
let len = List.fold_left (fun n s -> n + 1 + String.length s) 0 l in
|
||||||
|
let buf = Bytes.create len in
|
||||||
|
let rec aux_blit i l = match l with
|
||||||
|
| [] ->
|
||||||
|
assert (i=len);
|
||||||
|
Bytes.to_string buf
|
||||||
|
| s :: tail ->
|
||||||
|
let len_s = String.length s in
|
||||||
|
Bytes.blit_string s 0 buf i len_s;
|
||||||
|
Bytes.set buf (i+len_s) '\n';
|
||||||
|
aux_blit (i+len_s+1) tail
|
||||||
|
in
|
||||||
|
aux_blit 0 l
|
||||||
|
|
||||||
|
let unlines_gen g =
|
||||||
|
let buf = concat_gen_buf ~sep:"\n" g in
|
||||||
|
Buffer.add_char buf '\n';
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
let set s i c =
|
let set s i c =
|
||||||
if i<0 || i>= String.length s then invalid_arg "CCString.set";
|
if i<0 || i>= String.length s then invalid_arg "CCString.set";
|
||||||
|
|
|
||||||
|
|
@ -281,6 +281,14 @@ val lines_gen : string -> string gen
|
||||||
(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n')
|
(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n')
|
||||||
@since 0.10 *)
|
@since 0.10 *)
|
||||||
|
|
||||||
|
(*$= & ~printer:Q.Print.(list @@ Printf.sprintf "%S")
|
||||||
|
["ab"; "c"] (lines "ab\nc")
|
||||||
|
["ab"; "c"] (lines "ab\nc\n")
|
||||||
|
[] (lines "")
|
||||||
|
[""] (lines "\n")
|
||||||
|
[""; "a"] (lines "\na")
|
||||||
|
*)
|
||||||
|
|
||||||
val concat_gen : sep:string -> string gen -> string
|
val concat_gen : sep:string -> string gen -> string
|
||||||
(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep].
|
(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep].
|
||||||
@since 0.10 *)
|
@since 0.10 *)
|
||||||
|
|
@ -293,9 +301,20 @@ val unlines_gen : string gen -> string
|
||||||
(** [unlines_gen g] concatenates all strings of [g], separated with '\n'
|
(** [unlines_gen g] concatenates all strings of [g], separated with '\n'
|
||||||
@since 0.10 *)
|
@since 0.10 *)
|
||||||
|
|
||||||
|
(*$= & ~printer:CCFun.id
|
||||||
|
"" (unlines [])
|
||||||
|
"ab\nc\n" (unlines ["ab"; "c"])
|
||||||
|
*)
|
||||||
|
|
||||||
(*$Q
|
(*$Q
|
||||||
Q.printable_string (fun s -> unlines (lines s) = s)
|
Q.printable_string (fun s -> trim (unlines (lines s)) = trim s)
|
||||||
Q.printable_string (fun s -> unlines_gen (lines_gen s) = s)
|
Q.printable_string (fun s -> trim (unlines_gen (lines_gen s)) = trim s)
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*$Q
|
||||||
|
Q.(list string) (fun l -> \
|
||||||
|
let l = unlines l |> lines in \
|
||||||
|
l = (unlines l |> lines))
|
||||||
*)
|
*)
|
||||||
|
|
||||||
val set : string -> int -> char -> string
|
val set : string -> int -> char -> string
|
||||||
|
|
@ -477,7 +496,26 @@ end
|
||||||
(** {2 Splitting} *)
|
(** {2 Splitting} *)
|
||||||
|
|
||||||
module Split : sig
|
module Split : sig
|
||||||
val list_ : by:string -> string -> (string*int*int) list
|
(** Specification of what to do with empty blocks, as in [split ~by:"-" "-a-b-"].
|
||||||
|
|
||||||
|
- [{first=false; last=false}] will return [""; "a"; "b"; ""]
|
||||||
|
- [{first=true; last=false}] will return ["a"; "b" ""]
|
||||||
|
- [{first=false; last=true}] will return [""; "a"; "b"]
|
||||||
|
- [{first=true; last=true}] will return ["a"; "b"]
|
||||||
|
|
||||||
|
The default value of all remaining functions is [Drop_none].
|
||||||
|
@since NEXT_RELEASE
|
||||||
|
*)
|
||||||
|
type drop_if_empty = {
|
||||||
|
first: bool;
|
||||||
|
last: bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
val no_drop : drop_if_empty
|
||||||
|
(** Do not drop any group, even empty and on borders
|
||||||
|
@since NEXT_RELEASE *)
|
||||||
|
|
||||||
|
val list_ : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) list
|
||||||
(** Eplit the given string along the given separator [by]. Should only
|
(** Eplit the given string along the given separator [by]. Should only
|
||||||
be used with very small separators, otherwise
|
be used with very small separators, otherwise
|
||||||
use {!Containers_string.KMP}.
|
use {!Containers_string.KMP}.
|
||||||
|
|
@ -486,18 +524,18 @@ module Split : sig
|
||||||
a string from the slice.
|
a string from the slice.
|
||||||
@raise Failure if [by = ""] *)
|
@raise Failure if [by = ""] *)
|
||||||
|
|
||||||
val gen : by:string -> string -> (string*int*int) gen
|
val gen : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) gen
|
||||||
|
|
||||||
val seq : by:string -> string -> (string*int*int) sequence
|
val seq : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) sequence
|
||||||
|
|
||||||
val klist : by:string -> string -> (string*int*int) klist
|
val klist : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) klist
|
||||||
|
|
||||||
(** {6 Copying functions}
|
(** {6 Copying functions}
|
||||||
|
|
||||||
Those split functions actually copy the substrings, which can be
|
Those split functions actually copy the substrings, which can be
|
||||||
more convenient but less efficient in general *)
|
more convenient but less efficient in general *)
|
||||||
|
|
||||||
val list_cpy : by:string -> string -> string list
|
val list_cpy : ?drop:drop_if_empty -> by:string -> string -> string list
|
||||||
|
|
||||||
(*$T
|
(*$T
|
||||||
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
|
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
|
||||||
|
|
@ -505,11 +543,11 @@ module Split : sig
|
||||||
Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"]
|
Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"]
|
||||||
*)
|
*)
|
||||||
|
|
||||||
val gen_cpy : by:string -> string -> string gen
|
val gen_cpy : ?drop:drop_if_empty -> by:string -> string -> string gen
|
||||||
|
|
||||||
val seq_cpy : by:string -> string -> string sequence
|
val seq_cpy : ?drop:drop_if_empty -> by:string -> string -> string sequence
|
||||||
|
|
||||||
val klist_cpy : by:string -> string -> string klist
|
val klist_cpy : ?drop:drop_if_empty -> by:string -> string -> string klist
|
||||||
|
|
||||||
val left : by:string -> string -> (string * string) option
|
val left : by:string -> string -> (string * string) option
|
||||||
(** Split on the first occurrence of [by] from the leftmost part of
|
(** Split on the first occurrence of [by] from the leftmost part of
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue