mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 11:15:31 -05:00
expose CCString.Find for efficient sub-string searching
This commit is contained in:
parent
83b0744a1b
commit
46cee7096c
2 changed files with 46 additions and 19 deletions
|
|
@ -212,37 +212,42 @@ module Find = struct
|
||||||
| P_char _ -> 1
|
| P_char _ -> 1
|
||||||
| P_KMP p -> kmp_pattern_length p
|
| P_KMP p -> kmp_pattern_length p
|
||||||
|
|
||||||
let compile ~sub : [`Direct] pattern =
|
let compile sub : [`Direct] pattern =
|
||||||
if length sub=1
|
if length sub=1
|
||||||
then P_char sub.[0]
|
then P_char sub.[0]
|
||||||
else P_KMP (kmp_compile sub)
|
else P_KMP (kmp_compile sub)
|
||||||
|
|
||||||
let rcompile ~sub : [`Reverse] pattern =
|
let rcompile sub : [`Reverse] pattern =
|
||||||
if length sub=1
|
if length sub=1
|
||||||
then P_char sub.[0]
|
then P_char sub.[0]
|
||||||
else P_KMP (kmp_rcompile sub)
|
else P_KMP (kmp_rcompile sub)
|
||||||
|
|
||||||
let find ~(pattern:[`Direct] pattern) s start = match pattern with
|
let find ?(start=0) ~(pattern:[`Direct] pattern) s = match pattern with
|
||||||
| P_char c ->
|
| P_char c ->
|
||||||
(try String.index_from s start c with Not_found -> -1)
|
(try String.index_from s start c with Not_found -> -1)
|
||||||
| P_KMP pattern -> kmp_find ~pattern s start
|
| P_KMP pattern -> kmp_find ~pattern s start
|
||||||
|
|
||||||
let rfind ~(pattern:[`Reverse] pattern) s start = match pattern with
|
let rfind ?start ~(pattern:[`Reverse] pattern) s =
|
||||||
| P_char c ->
|
let start = match start with
|
||||||
|
| Some n -> n
|
||||||
|
| None -> String.length s - 1
|
||||||
|
in
|
||||||
|
match pattern with
|
||||||
|
| P_char c ->
|
||||||
(try String.rindex_from s start c with Not_found -> -1)
|
(try String.rindex_from s start c with Not_found -> -1)
|
||||||
| P_KMP pattern -> kmp_rfind ~pattern s start
|
| P_KMP pattern -> kmp_rfind ~pattern s start
|
||||||
end
|
end
|
||||||
|
|
||||||
let find ?(start=0) ~sub =
|
let find ?(start=0) ~sub =
|
||||||
let pattern = Find.compile ~sub in
|
let pattern = Find.compile sub in
|
||||||
fun s -> Find.find ~pattern s start
|
fun s -> Find.find ~pattern s ~start
|
||||||
|
|
||||||
let find_all ?(start=0) ~sub =
|
let find_all ?(start=0) ~sub =
|
||||||
let pattern = Find.compile ~sub in
|
let pattern = Find.compile sub in
|
||||||
fun s ->
|
fun s ->
|
||||||
let i = ref start in
|
let i = ref start in
|
||||||
fun () ->
|
fun () ->
|
||||||
let res = Find.find ~pattern s !i in
|
let res = Find.find ~pattern s ~start:!i in
|
||||||
if res = ~-1 then None
|
if res = ~-1 then None
|
||||||
else (
|
else (
|
||||||
i := res + 1; (* possible overlap *)
|
i := res + 1; (* possible overlap *)
|
||||||
|
|
@ -259,8 +264,8 @@ let find_all_l ?start ~sub s =
|
||||||
let mem ?start ~sub s = find ?start ~sub s >= 0
|
let mem ?start ~sub s = find ?start ~sub s >= 0
|
||||||
|
|
||||||
let rfind ~sub =
|
let rfind ~sub =
|
||||||
let pattern = Find.rcompile ~sub in
|
let pattern = Find.rcompile sub in
|
||||||
fun s -> Find.rfind ~pattern s (String.length s-1)
|
fun s -> Find.rfind ~pattern s ~start:(String.length s-1)
|
||||||
|
|
||||||
(* Replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *)
|
(* Replace substring [s.[pos]....s.[pos+len-1]] by [by] in [s] *)
|
||||||
let replace_at_ ~pos ~len ~by s =
|
let replace_at_ ~pos ~len ~by s =
|
||||||
|
|
@ -281,11 +286,11 @@ let replace ?(which=`All) ~sub ~by s =
|
||||||
if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s
|
if i>=0 then replace_at_ ~pos:i ~len:(String.length sub) ~by s else s
|
||||||
| `All ->
|
| `All ->
|
||||||
(* compile search pattern only once *)
|
(* compile search pattern only once *)
|
||||||
let pattern = Find.compile ~sub in
|
let pattern = Find.compile sub in
|
||||||
let b = Buffer.create (String.length s) in
|
let b = Buffer.create (String.length s) in
|
||||||
let start = ref 0 in
|
let start = ref 0 in
|
||||||
while !start < String.length s do
|
while !start < String.length s do
|
||||||
let i = Find.find ~pattern s !start in
|
let i = Find.find ~pattern s ~start:!start in
|
||||||
if i>=0 then (
|
if i>=0 then (
|
||||||
(* between last and cur occurrences *)
|
(* between last and cur occurrences *)
|
||||||
Buffer.add_substring b s !start (i- !start);
|
Buffer.add_substring b s !start (i- !start);
|
||||||
|
|
@ -308,7 +313,7 @@ module Split = struct
|
||||||
| SplitStop -> None
|
| SplitStop -> None
|
||||||
| SplitAt prev -> _split_search ~by s prev
|
| SplitAt prev -> _split_search ~by s prev
|
||||||
and _split_search ~by s prev =
|
and _split_search ~by s prev =
|
||||||
let j = Find.find ~pattern:by s prev in
|
let j = Find.find ~pattern:by s ~start:prev in
|
||||||
if j < 0
|
if j < 0
|
||||||
then Some (SplitStop, prev, String.length s - prev)
|
then Some (SplitStop, prev, String.length s - prev)
|
||||||
else Some (SplitAt (j+Find.pattern_length by), prev, j-prev)
|
else Some (SplitAt (j+Find.pattern_length by), prev, j-prev)
|
||||||
|
|
@ -317,7 +322,7 @@ module Split = struct
|
||||||
|
|
||||||
let _mkgen ~by s k =
|
let _mkgen ~by s k =
|
||||||
let state = ref (SplitAt 0) in
|
let state = ref (SplitAt 0) in
|
||||||
let by = Find.compile ~sub:by in
|
let by = Find.compile by in
|
||||||
fun () ->
|
fun () ->
|
||||||
match _split ~by s !state with
|
match _split ~by s !state with
|
||||||
| None -> None
|
| None -> None
|
||||||
|
|
@ -330,7 +335,7 @@ module Split = struct
|
||||||
let gen_cpy ~by s = _mkgen ~by s String.sub
|
let gen_cpy ~by s = _mkgen ~by s String.sub
|
||||||
|
|
||||||
let _mklist ~by s k =
|
let _mklist ~by s k =
|
||||||
let by = Find.compile ~sub:by in
|
let by = Find.compile by in
|
||||||
let rec build acc state = match _split ~by s state with
|
let rec build acc state = match _split ~by s state with
|
||||||
| None -> List.rev acc
|
| None -> List.rev acc
|
||||||
| Some (state', i, len) ->
|
| Some (state', i, len) ->
|
||||||
|
|
@ -343,7 +348,7 @@ module Split = struct
|
||||||
let list_cpy ~by s = _mklist ~by s String.sub
|
let list_cpy ~by s = _mklist ~by s String.sub
|
||||||
|
|
||||||
let _mkklist ~by s k =
|
let _mkklist ~by s k =
|
||||||
let by = Find.compile ~sub:by in
|
let by = Find.compile by in
|
||||||
let rec make state () = match _split ~by s state with
|
let rec make state () = match _split ~by s state with
|
||||||
| None -> `Nil
|
| None -> `Nil
|
||||||
| Some (state', i, len) ->
|
| Some (state', i, len) ->
|
||||||
|
|
@ -355,7 +360,7 @@ module Split = struct
|
||||||
let klist_cpy ~by s = _mkklist ~by s String.sub
|
let klist_cpy ~by s = _mkklist ~by s String.sub
|
||||||
|
|
||||||
let _mkseq ~by s f k =
|
let _mkseq ~by s f k =
|
||||||
let by = Find.compile ~sub:by in
|
let by = Find.compile by in
|
||||||
let rec aux state = match _split ~by s state with
|
let rec aux state = match _split ~by s state with
|
||||||
| None -> ()
|
| None -> ()
|
||||||
| Some (state', i, len) -> k (f s i len); aux state'
|
| Some (state', i, len) -> k (f s i len); aux state'
|
||||||
|
|
|
||||||
|
|
@ -399,6 +399,28 @@ val uppercase_ascii : string -> string
|
||||||
val lowercase_ascii : string -> string
|
val lowercase_ascii : string -> string
|
||||||
(** See {!String}. @since 0.18 *)
|
(** See {!String}. @since 0.18 *)
|
||||||
|
|
||||||
|
(** {2 Finding}
|
||||||
|
|
||||||
|
A relatively efficient algorithm for finding sub-strings
|
||||||
|
@since 1.0 *)
|
||||||
|
|
||||||
|
module Find : sig
|
||||||
|
type _ pattern
|
||||||
|
|
||||||
|
val compile : string -> [ `Direct ] pattern
|
||||||
|
|
||||||
|
val rcompile : string -> [ `Reverse ] pattern
|
||||||
|
|
||||||
|
val find : ?start:int -> pattern:[`Direct] pattern -> string -> int
|
||||||
|
(** Search for [pattern] in the string, left-to-right
|
||||||
|
@return the offset of the first match, -1 otherwise
|
||||||
|
@param start offset in string at which we start *)
|
||||||
|
|
||||||
|
val rfind : ?start:int -> pattern:[`Reverse] pattern -> string -> int
|
||||||
|
(** Search for [pattern] in the string, right-to-left
|
||||||
|
@return the offset of the start of the first match from the right, -1 otherwise
|
||||||
|
@param start right-offset in string at which we start *)
|
||||||
|
end
|
||||||
|
|
||||||
(** {2 Splitting} *)
|
(** {2 Splitting} *)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue