mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 19:25:28 -05:00
closes #166 by adding a drop parameter to string split functions
This commit is contained in:
parent
2b67a1a679
commit
ed10db67b6
2 changed files with 108 additions and 31 deletions
|
|
@ -309,6 +309,14 @@ let replace ?(which=`All) ~sub ~by s =
|
|||
Buffer.contents b
|
||||
|
||||
module Split = struct
|
||||
type drop_if_empty = {
|
||||
first: bool;
|
||||
last: bool;
|
||||
}
|
||||
|
||||
let no_drop = {first=false; last=false}
|
||||
let default_drop = no_drop
|
||||
|
||||
type split_state =
|
||||
| SplitStop
|
||||
| SplitAt of int (* previous *)
|
||||
|
|
@ -316,6 +324,7 @@ module Split = struct
|
|||
let rec _split ~by s state = match state with
|
||||
| SplitStop -> None
|
||||
| SplitAt prev -> _split_search ~by s prev
|
||||
|
||||
and _split_search ~by s prev =
|
||||
let j = Find.find ~pattern:by s ~start:prev in
|
||||
if j < 0
|
||||
|
|
@ -324,54 +333,64 @@ module Split = struct
|
|||
|
||||
let _tuple3 x y z = x,y,z
|
||||
|
||||
let _mkgen ~by s k =
|
||||
let _mkgen ~drop ~by s k =
|
||||
let state = ref (SplitAt 0) in
|
||||
let by = Find.compile by in
|
||||
fun () ->
|
||||
let rec next() =
|
||||
match _split ~by s !state with
|
||||
| None -> None
|
||||
| Some (state', 0, 0) when drop.first -> state := state'; next()
|
||||
| Some (_, i, 0) when drop.last && i = length s -> None
|
||||
| Some (state', i, len) ->
|
||||
state := state';
|
||||
Some (k s i len)
|
||||
in
|
||||
next
|
||||
|
||||
let gen ~by s = _mkgen ~by s _tuple3
|
||||
let gen ?(drop=default_drop) ~by s = _mkgen ~drop ~by s _tuple3
|
||||
|
||||
let gen_cpy ~by s = _mkgen ~by s String.sub
|
||||
let gen_cpy ?(drop=default_drop) ~by s = _mkgen ~drop ~by s String.sub
|
||||
|
||||
let _mklist ~by s k =
|
||||
let _mklist ~drop ~by s k =
|
||||
let by = Find.compile by in
|
||||
let rec build acc state = match _split ~by s state with
|
||||
| None -> List.rev acc
|
||||
| Some (state',0,0) when drop.first -> build acc state'
|
||||
| Some (_, i, 0) when drop.last && i=length s -> List.rev acc
|
||||
| Some (state', i, len) ->
|
||||
build (k s i len ::acc) state'
|
||||
in
|
||||
build [] (SplitAt 0)
|
||||
|
||||
let list_ ~by s = _mklist ~by s _tuple3
|
||||
let list_ ?(drop=default_drop) ~by s = _mklist ~drop ~by s _tuple3
|
||||
|
||||
let list_cpy ~by s = _mklist ~by s String.sub
|
||||
let list_cpy ?(drop=default_drop) ~by s = _mklist ~drop ~by s String.sub
|
||||
|
||||
let _mkklist ~by s k =
|
||||
let _mkklist ~drop ~by s k =
|
||||
let by = Find.compile by in
|
||||
let rec make state () = match _split ~by s state with
|
||||
| None -> `Nil
|
||||
| Some (state', 0, 0) when drop.first -> make state' ()
|
||||
| Some (_, i, 0) when drop.last && i=length s -> `Nil
|
||||
| Some (state', i, len) ->
|
||||
`Cons (k s i len , make state')
|
||||
in make (SplitAt 0)
|
||||
|
||||
let klist ~by s = _mkklist ~by s _tuple3
|
||||
let klist ?(drop=default_drop) ~by s = _mkklist ~drop ~by s _tuple3
|
||||
|
||||
let klist_cpy ~by s = _mkklist ~by s String.sub
|
||||
let klist_cpy ?(drop=default_drop) ~by s = _mkklist ~drop ~by s String.sub
|
||||
|
||||
let _mkseq ~by s f k =
|
||||
let _mkseq ~drop ~by s f k =
|
||||
let by = Find.compile by in
|
||||
let rec aux state = match _split ~by s state with
|
||||
| None -> ()
|
||||
| Some (state', 0, 0) when drop.first -> aux state'
|
||||
| Some (_, i, 0) when drop.last && i=length s -> ()
|
||||
| Some (state', i, len) -> k (f s i len); aux state'
|
||||
in aux (SplitAt 0)
|
||||
|
||||
let seq ~by s = _mkseq ~by s _tuple3
|
||||
let seq_cpy ~by s = _mkseq ~by s String.sub
|
||||
let seq ?(drop=default_drop) ~by s = _mkseq ~drop ~by s _tuple3
|
||||
let seq_cpy ?(drop=default_drop) ~by s = _mkseq ~drop ~by s String.sub
|
||||
|
||||
let left_exn ~by s =
|
||||
let i = find ~sub:by s in
|
||||
|
|
@ -393,9 +412,9 @@ module Split = struct
|
|||
end
|
||||
|
||||
let split_on_char c s: _ list =
|
||||
Split.list_cpy ~by:(String.make 1 c) s
|
||||
Split.list_cpy ~drop:Split.no_drop ~by:(String.make 1 c) s
|
||||
|
||||
let split = Split.list_cpy
|
||||
let split ~by s = Split.list_cpy ~by s
|
||||
|
||||
let compare_versions a b =
|
||||
let of_int s = try Some (int_of_string s) with _ -> None in
|
||||
|
|
@ -613,23 +632,43 @@ let of_array a =
|
|||
let to_array s =
|
||||
Array.init (String.length s) (fun i -> s.[i])
|
||||
|
||||
let lines_gen s = Split.gen_cpy ~by:"\n" s
|
||||
let lines_gen s = Split.gen_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s
|
||||
|
||||
let lines s = Split.list_cpy ~by:"\n" s
|
||||
let lines s = Split.list_cpy ~drop:{Split.first=false; last=true} ~by:"\n" s
|
||||
|
||||
let concat_gen ~sep g =
|
||||
let concat_gen_buf ~sep g : Buffer.t =
|
||||
let b = Buffer.create 256 in
|
||||
let rec aux ~first () = match g () with
|
||||
| None -> Buffer.contents b
|
||||
| None -> b
|
||||
| Some s ->
|
||||
if not first then Buffer.add_string b sep;
|
||||
Buffer.add_string b s;
|
||||
aux ~first:false ()
|
||||
in aux ~first:true ()
|
||||
|
||||
let unlines l = String.concat "\n" l
|
||||
let concat_gen ~sep g =
|
||||
let buf = concat_gen_buf ~sep g in
|
||||
Buffer.contents buf
|
||||
|
||||
let unlines_gen g = concat_gen ~sep:"\n" g
|
||||
let unlines l =
|
||||
let len = List.fold_left (fun n s -> n + 1 + String.length s) 0 l in
|
||||
let buf = Bytes.create len in
|
||||
let rec aux_blit i l = match l with
|
||||
| [] ->
|
||||
assert (i=len);
|
||||
Bytes.to_string buf
|
||||
| s :: tail ->
|
||||
let len_s = String.length s in
|
||||
Bytes.blit_string s 0 buf i len_s;
|
||||
Bytes.set buf (i+len_s) '\n';
|
||||
aux_blit (i+len_s+1) tail
|
||||
in
|
||||
aux_blit 0 l
|
||||
|
||||
let unlines_gen g =
|
||||
let buf = concat_gen_buf ~sep:"\n" g in
|
||||
Buffer.add_char buf '\n';
|
||||
Buffer.contents buf
|
||||
|
||||
let set s i c =
|
||||
if i<0 || i>= String.length s then invalid_arg "CCString.set";
|
||||
|
|
|
|||
|
|
@ -281,6 +281,14 @@ val lines_gen : string -> string gen
|
|||
(** [lines_gen s] returns a generator of the lines of [s] (splits along '\n')
|
||||
@since 0.10 *)
|
||||
|
||||
(*$= & ~printer:Q.Print.(list @@ Printf.sprintf "%S")
|
||||
["ab"; "c"] (lines "ab\nc")
|
||||
["ab"; "c"] (lines "ab\nc\n")
|
||||
[] (lines "")
|
||||
[""] (lines "\n")
|
||||
[""; "a"] (lines "\na")
|
||||
*)
|
||||
|
||||
val concat_gen : sep:string -> string gen -> string
|
||||
(** [concat_gen ~sep g] concatenates all strings of [g], separated with [sep].
|
||||
@since 0.10 *)
|
||||
|
|
@ -293,9 +301,20 @@ val unlines_gen : string gen -> string
|
|||
(** [unlines_gen g] concatenates all strings of [g], separated with '\n'
|
||||
@since 0.10 *)
|
||||
|
||||
(*$= & ~printer:CCFun.id
|
||||
"" (unlines [])
|
||||
"ab\nc\n" (unlines ["ab"; "c"])
|
||||
*)
|
||||
|
||||
(*$Q
|
||||
Q.printable_string (fun s -> unlines (lines s) = s)
|
||||
Q.printable_string (fun s -> unlines_gen (lines_gen s) = s)
|
||||
Q.printable_string (fun s -> trim (unlines (lines s)) = trim s)
|
||||
Q.printable_string (fun s -> trim (unlines_gen (lines_gen s)) = trim s)
|
||||
*)
|
||||
|
||||
(*$Q
|
||||
Q.(list string) (fun l -> \
|
||||
let l = unlines l |> lines in \
|
||||
l = (unlines l |> lines))
|
||||
*)
|
||||
|
||||
val set : string -> int -> char -> string
|
||||
|
|
@ -477,7 +496,26 @@ end
|
|||
(** {2 Splitting} *)
|
||||
|
||||
module Split : sig
|
||||
val list_ : by:string -> string -> (string*int*int) list
|
||||
(** Specification of what to do with empty blocks, as in [split ~by:"-" "-a-b-"].
|
||||
|
||||
- [{first=false; last=false}] will return [""; "a"; "b"; ""]
|
||||
- [{first=true; last=false}] will return ["a"; "b" ""]
|
||||
- [{first=false; last=true}] will return [""; "a"; "b"]
|
||||
- [{first=true; last=true}] will return ["a"; "b"]
|
||||
|
||||
The default value of all remaining functions is [Drop_none].
|
||||
@since NEXT_RELEASE
|
||||
*)
|
||||
type drop_if_empty = {
|
||||
first: bool;
|
||||
last: bool;
|
||||
}
|
||||
|
||||
val no_drop : drop_if_empty
|
||||
(** Do not drop any group, even empty and on borders
|
||||
@since NEXT_RELEASE *)
|
||||
|
||||
val list_ : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) list
|
||||
(** Eplit the given string along the given separator [by]. Should only
|
||||
be used with very small separators, otherwise
|
||||
use {!Containers_string.KMP}.
|
||||
|
|
@ -486,18 +524,18 @@ module Split : sig
|
|||
a string from the slice.
|
||||
@raise Failure if [by = ""] *)
|
||||
|
||||
val gen : by:string -> string -> (string*int*int) gen
|
||||
val gen : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) gen
|
||||
|
||||
val seq : by:string -> string -> (string*int*int) sequence
|
||||
val seq : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) sequence
|
||||
|
||||
val klist : by:string -> string -> (string*int*int) klist
|
||||
val klist : ?drop:drop_if_empty -> by:string -> string -> (string*int*int) klist
|
||||
|
||||
(** {6 Copying functions}
|
||||
|
||||
Those split functions actually copy the substrings, which can be
|
||||
more convenient but less efficient in general *)
|
||||
|
||||
val list_cpy : by:string -> string -> string list
|
||||
val list_cpy : ?drop:drop_if_empty -> by:string -> string -> string list
|
||||
|
||||
(*$T
|
||||
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
|
||||
|
|
@ -505,11 +543,11 @@ module Split : sig
|
|||
Split.list_cpy ~by:" " "hello world aie" = ["hello"; ""; "world"; "aie"]
|
||||
*)
|
||||
|
||||
val gen_cpy : by:string -> string -> string gen
|
||||
val gen_cpy : ?drop:drop_if_empty -> by:string -> string -> string gen
|
||||
|
||||
val seq_cpy : by:string -> string -> string sequence
|
||||
val seq_cpy : ?drop:drop_if_empty -> by:string -> string -> string sequence
|
||||
|
||||
val klist_cpy : by:string -> string -> string klist
|
||||
val klist_cpy : ?drop:drop_if_empty -> by:string -> string -> string klist
|
||||
|
||||
val left : by:string -> string -> (string * string) option
|
||||
(** Split on the first occurrence of [by] from the leftmost part of
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue