mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-06 19:25:28 -05:00
move CCString into core/, since it deals with a basic type;
also add some features to CCString (Sub and Split modules to deal with slices and splitting by a string)
This commit is contained in:
parent
712472521d
commit
a356d811a7
6 changed files with 415 additions and 222 deletions
|
|
@ -60,6 +60,7 @@ structures comprise (some modules in `misc/`, some other in `core/`):
|
||||||
- `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists)
|
- `CCKTree`, an abstract lazy tree structure (similar to what `CCKlist` is to lists)
|
||||||
- small modules (basic types, utilities):
|
- small modules (basic types, utilities):
|
||||||
- `CCInt`
|
- `CCInt`
|
||||||
|
- `CCString` (basic string operations)
|
||||||
- `CCPair` (cartesian products)
|
- `CCPair` (cartesian products)
|
||||||
- `CCOpt` (options)
|
- `CCOpt` (options)
|
||||||
- `CCFun` (function combinators)
|
- `CCFun` (function combinators)
|
||||||
|
|
@ -70,6 +71,12 @@ structures comprise (some modules in `misc/`, some other in `core/`):
|
||||||
- `CCHash` (hashing combinators)
|
- `CCHash` (hashing combinators)
|
||||||
- `CCError` (monadic error handling)
|
- `CCError` (monadic error handling)
|
||||||
|
|
||||||
|
### String
|
||||||
|
|
||||||
|
In the module `Containers_string`:
|
||||||
|
- `Levenshtein`: edition distance between two strings
|
||||||
|
- `KMP`: Knuth-Morris-Pratt substring algorithm
|
||||||
|
|
||||||
### Misc
|
### Misc
|
||||||
|
|
||||||
- `PHashtbl`, a polymorphic hashtable (with open addressing)
|
- `PHashtbl`, a polymorphic hashtable (with open addressing)
|
||||||
|
|
|
||||||
4
_oasis
4
_oasis
|
|
@ -47,13 +47,13 @@ Library "containers"
|
||||||
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError,
|
CCMultiSet, CCBV, CCPrint, CCPersistentHashtbl, CCError,
|
||||||
CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash,
|
CCHeap, CCList, CCOpt, CCPair, CCFun, CCHash,
|
||||||
CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd,
|
CCKList, CCInt, CCBool, CCArray, CCBatch, CCOrd,
|
||||||
CCRandom, CCLinq, CCKTree, CCTrie
|
CCRandom, CCLinq, CCKTree, CCTrie, CCString
|
||||||
FindlibName: containers
|
FindlibName: containers
|
||||||
|
|
||||||
Library "containers_string"
|
Library "containers_string"
|
||||||
Path: string
|
Path: string
|
||||||
Pack: true
|
Pack: true
|
||||||
Modules: KMP, CCString, Levenshtein
|
Modules: KMP, Levenshtein
|
||||||
FindlibName: string
|
FindlibName: string
|
||||||
FindlibParent: containers
|
FindlibParent: containers
|
||||||
|
|
||||||
|
|
|
||||||
265
core/CCString.ml
Normal file
265
core/CCString.ml
Normal file
|
|
@ -0,0 +1,265 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2014, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Basic String Utils} *)
|
||||||
|
|
||||||
|
type 'a gen = unit -> 'a option
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
|
||||||
|
|
||||||
|
module type S = sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val length : t -> int
|
||||||
|
|
||||||
|
val blit : t -> int -> t -> int -> int -> unit
|
||||||
|
(** See {!String.blit} *)
|
||||||
|
|
||||||
|
(** {2 Conversions} *)
|
||||||
|
|
||||||
|
val to_gen : t -> char gen
|
||||||
|
|
||||||
|
val to_seq : t -> char sequence
|
||||||
|
|
||||||
|
val to_klist : t -> char klist
|
||||||
|
|
||||||
|
val pp : Buffer.t -> t -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
type t = string
|
||||||
|
|
||||||
|
let equal a b = a=b
|
||||||
|
|
||||||
|
let compare = String.compare
|
||||||
|
|
||||||
|
let hash s = Hashtbl.hash s
|
||||||
|
|
||||||
|
let length = String.length
|
||||||
|
|
||||||
|
let _is_sub ~sub i s j ~len =
|
||||||
|
let rec check k =
|
||||||
|
if k = len
|
||||||
|
then true
|
||||||
|
else sub.[i + k] = s.[j+k] && check (k+1)
|
||||||
|
in
|
||||||
|
j+len <= String.length s && check 0
|
||||||
|
|
||||||
|
let is_sub ~sub i s j ~len =
|
||||||
|
if i+len > String.length sub then invalid_arg "String.is_sub";
|
||||||
|
_is_sub ~sub i s j ~len
|
||||||
|
|
||||||
|
|
||||||
|
module Split = struct
|
||||||
|
type split_state =
|
||||||
|
| SplitStop
|
||||||
|
| SplitAt of int (* previous *)
|
||||||
|
|
||||||
|
(* [by_j... prefix of s_i...] ? *)
|
||||||
|
let rec _is_prefix ~by s i j =
|
||||||
|
j = String.length by
|
||||||
|
||
|
||||||
|
( i < String.length s &&
|
||||||
|
s.[i] = by.[j] &&
|
||||||
|
_is_prefix ~by s (i+1) (j+1)
|
||||||
|
)
|
||||||
|
|
||||||
|
let rec _split ~by s state = match state with
|
||||||
|
| SplitStop -> None
|
||||||
|
| SplitAt prev -> _split_search ~by s prev prev
|
||||||
|
and _split_search ~by s prev i =
|
||||||
|
if i >= String.length s
|
||||||
|
then Some (SplitStop, prev, String.length s - prev)
|
||||||
|
else if _is_prefix ~by s i 0 && i>prev
|
||||||
|
then Some (SplitAt (i+String.length by), prev, i-prev)
|
||||||
|
else _split_search ~by s prev (i+1)
|
||||||
|
|
||||||
|
let _tuple3 x y z = x,y,z
|
||||||
|
|
||||||
|
let _mkgen ~by s k =
|
||||||
|
let state = ref (SplitAt 0) in
|
||||||
|
fun () ->
|
||||||
|
match _split ~by s !state with
|
||||||
|
| None -> None
|
||||||
|
| Some (state', i, len) ->
|
||||||
|
state := state';
|
||||||
|
Some (k s i len)
|
||||||
|
|
||||||
|
let gen ~by s = _mkgen ~by s _tuple3
|
||||||
|
|
||||||
|
let gen_cpy ~by s = _mkgen ~by s String.sub
|
||||||
|
|
||||||
|
let _mklist ~by s k =
|
||||||
|
let rec build acc state = match _split ~by s state with
|
||||||
|
| None -> List.rev acc
|
||||||
|
| Some (state', i, len) ->
|
||||||
|
build (k s i len ::acc) state'
|
||||||
|
in
|
||||||
|
build [] (SplitAt 0)
|
||||||
|
|
||||||
|
let list_ ~by s = _mklist ~by s _tuple3
|
||||||
|
|
||||||
|
let list_cpy ~by s = _mklist ~by s String.sub
|
||||||
|
|
||||||
|
let _mkklist ~by s k =
|
||||||
|
let rec make state () = match _split ~by s state with
|
||||||
|
| None -> `Nil
|
||||||
|
| Some (state', i, len) ->
|
||||||
|
`Cons (k s i len , make state')
|
||||||
|
in make (SplitAt 0)
|
||||||
|
|
||||||
|
let klist ~by s = _mkklist ~by s _tuple3
|
||||||
|
|
||||||
|
let klist_cpy ~by s = _mkklist ~by s String.sub
|
||||||
|
|
||||||
|
let _mkseq ~by s f k =
|
||||||
|
let rec aux state = match _split ~by s state with
|
||||||
|
| None -> ()
|
||||||
|
| Some (state', i, len) -> k (f s i len); aux state'
|
||||||
|
in aux (SplitAt 0)
|
||||||
|
|
||||||
|
let seq ~by s = _mkseq ~by s _tuple3
|
||||||
|
let seq_cpy ~by s = _mkseq ~by s String.sub
|
||||||
|
|
||||||
|
(*$T
|
||||||
|
Split.list_cpy ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
|
||||||
|
Split.list_cpy ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""]
|
||||||
|
*)
|
||||||
|
end
|
||||||
|
|
||||||
|
(* note: inefficient *)
|
||||||
|
let find ?(start=0) ~sub s =
|
||||||
|
let n = String.length sub in
|
||||||
|
let i = ref start in
|
||||||
|
try
|
||||||
|
while !i + n < String.length s do
|
||||||
|
if _is_sub ~sub 0 s !i ~len:n then raise Exit;
|
||||||
|
incr i
|
||||||
|
done;
|
||||||
|
-1
|
||||||
|
with Exit ->
|
||||||
|
!i
|
||||||
|
|
||||||
|
let repeat s n =
|
||||||
|
assert (n>=0);
|
||||||
|
let len = String.length s in
|
||||||
|
assert(len > 0);
|
||||||
|
let buf = String.create (len * n) in
|
||||||
|
for i = 0 to n-1 do
|
||||||
|
String.blit s 0 buf (i * len) len;
|
||||||
|
done;
|
||||||
|
buf
|
||||||
|
|
||||||
|
let prefix ~pre s =
|
||||||
|
String.length pre <= String.length s &&
|
||||||
|
(let i = ref 0 in
|
||||||
|
while !i < String.length pre && s.[!i] = pre.[!i] do incr i done;
|
||||||
|
!i = String.length pre)
|
||||||
|
|
||||||
|
let blit = String.blit
|
||||||
|
|
||||||
|
let _to_gen s i0 len =
|
||||||
|
let i = ref i0 in
|
||||||
|
fun () ->
|
||||||
|
if !i = i0+len then None
|
||||||
|
else (
|
||||||
|
let c = String.unsafe_get s !i in
|
||||||
|
incr i;
|
||||||
|
Some c
|
||||||
|
)
|
||||||
|
|
||||||
|
let to_gen s = _to_gen s 0 (String.length s)
|
||||||
|
|
||||||
|
let of_gen g =
|
||||||
|
let b = Buffer.create 32 in
|
||||||
|
let rec aux () = match g () with
|
||||||
|
| None -> Buffer.contents b
|
||||||
|
| Some c -> Buffer.add_char b c; aux ()
|
||||||
|
in aux ()
|
||||||
|
|
||||||
|
let to_seq s k = String.iter k s
|
||||||
|
|
||||||
|
let of_seq seq =
|
||||||
|
let b= Buffer.create 32 in
|
||||||
|
seq (Buffer.add_char b);
|
||||||
|
Buffer.contents b
|
||||||
|
|
||||||
|
let rec _to_klist s i len () =
|
||||||
|
if len=0 then `Nil
|
||||||
|
else `Cons (s.[i], _to_klist s (i+1)(len-1))
|
||||||
|
|
||||||
|
let of_klist l =
|
||||||
|
let rec aux acc n l = match l() with
|
||||||
|
| `Nil ->
|
||||||
|
let s = String.create n in
|
||||||
|
let acc = ref acc in
|
||||||
|
for i=n-1 downto 0 do
|
||||||
|
s.[i] <- List.hd !acc;
|
||||||
|
acc := List.tl !acc
|
||||||
|
done;
|
||||||
|
s
|
||||||
|
| `Cons (x,l') -> aux (x::acc) (n+1) l'
|
||||||
|
in aux [] 0 l
|
||||||
|
|
||||||
|
let to_klist s = _to_klist s 0 (String.length s)
|
||||||
|
|
||||||
|
let pp buf s =
|
||||||
|
Buffer.add_char buf '"';
|
||||||
|
Buffer.add_string buf s;
|
||||||
|
Buffer.add_char buf '"'
|
||||||
|
|
||||||
|
module Sub = struct
|
||||||
|
type t = string * int * int
|
||||||
|
|
||||||
|
let make s i ~len =
|
||||||
|
if i<0||len<0||i+len > String.length s then invalid_arg "CCString.Sub.make";
|
||||||
|
s,i,len
|
||||||
|
|
||||||
|
let full s = s, 0, String.length s
|
||||||
|
|
||||||
|
let copy (s,i,len) = String.sub s i len
|
||||||
|
|
||||||
|
let underlying (s,_,_) = s
|
||||||
|
|
||||||
|
let sub (s,i,len) i' len' =
|
||||||
|
if i+i' + len' > i+len then invalid_arg "CCString.Sub.sub";
|
||||||
|
(s, i+i',len')
|
||||||
|
|
||||||
|
let length (_,_,l) = l
|
||||||
|
|
||||||
|
let blit (a1,i1,len1) o1 (a2,i2,len2) o2 len =
|
||||||
|
if o1+len>len1 || o2+len>len2 then invalid_arg "CCString.Sub.blit";
|
||||||
|
String.blit a1 (i1+o1) a2 (i2+o2) len
|
||||||
|
|
||||||
|
let to_gen (s,i,len) = _to_gen s i len
|
||||||
|
let to_seq (s,i,len) k =
|
||||||
|
for i=i to i+len-1 do k s.[i] done
|
||||||
|
let to_klist (s,i,len) = _to_klist s i len
|
||||||
|
|
||||||
|
let pp buf (s,i,len) =
|
||||||
|
Buffer.add_char buf '"';
|
||||||
|
Buffer.add_substring buf s i len;
|
||||||
|
Buffer.add_char buf '"'
|
||||||
|
end
|
||||||
141
core/CCString.mli
Normal file
141
core/CCString.mli
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
|
||||||
|
(*
|
||||||
|
copyright (c) 2013-2014, simon cruanes
|
||||||
|
all rights reserved.
|
||||||
|
|
||||||
|
redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer. redistributions in binary
|
||||||
|
form must reproduce the above copyright notice, this list of conditions and the
|
||||||
|
following disclaimer in the documentation and/or other materials provided with
|
||||||
|
the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*)
|
||||||
|
|
||||||
|
(** {1 Basic String Utils}
|
||||||
|
Consider using {!Containers_string.KMP} for pattern search, or Regex
|
||||||
|
libraries. *)
|
||||||
|
|
||||||
|
type 'a gen = unit -> 'a option
|
||||||
|
type 'a sequence = ('a -> unit) -> unit
|
||||||
|
type 'a klist = unit -> [`Nil | `Cons of 'a * 'a klist]
|
||||||
|
|
||||||
|
(** {2 Common Signature} *)
|
||||||
|
|
||||||
|
module type S = sig
|
||||||
|
type t
|
||||||
|
|
||||||
|
val length : t -> int
|
||||||
|
|
||||||
|
val blit : t -> int -> t -> int -> int -> unit
|
||||||
|
(** See {!String.blit} *)
|
||||||
|
|
||||||
|
(** {2 Conversions} *)
|
||||||
|
|
||||||
|
val to_gen : t -> char gen
|
||||||
|
|
||||||
|
val to_seq : t -> char sequence
|
||||||
|
|
||||||
|
val to_klist : t -> char klist
|
||||||
|
|
||||||
|
val pp : Buffer.t -> t -> unit
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Strings} *)
|
||||||
|
|
||||||
|
type t = string
|
||||||
|
|
||||||
|
val equal : t -> t -> bool
|
||||||
|
|
||||||
|
val compare : t -> t -> int
|
||||||
|
|
||||||
|
val hash : t -> int
|
||||||
|
|
||||||
|
val of_gen : char gen -> t
|
||||||
|
|
||||||
|
val of_seq : char sequence -> t
|
||||||
|
|
||||||
|
val of_klist : char klist -> t
|
||||||
|
|
||||||
|
val find : ?start:int -> sub:t -> t -> int
|
||||||
|
(** Find [sub] in the string, returns its first index or -1.
|
||||||
|
Should only be used with very small [sub] *)
|
||||||
|
|
||||||
|
val is_sub : sub:t -> int -> t -> int -> len:int -> bool
|
||||||
|
(** [is_sub ~sub i s j ~len] returns [true] iff the substring of
|
||||||
|
[sub] starting at position [i] and of length [len],
|
||||||
|
is a substring of [s] starting at position [j] *)
|
||||||
|
|
||||||
|
val repeat : t -> int -> t
|
||||||
|
(** The same string, repeated n times *)
|
||||||
|
|
||||||
|
val prefix : pre:t -> t -> bool
|
||||||
|
(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *)
|
||||||
|
|
||||||
|
include S with type t := t
|
||||||
|
|
||||||
|
(** {2 Splitting} *)
|
||||||
|
|
||||||
|
module Split : sig
|
||||||
|
val list_ : by:t -> t -> (t*int*int) list
|
||||||
|
(** split the given string along the given separator [by]. Should only
|
||||||
|
be used with very small separators, otherwise
|
||||||
|
use {!Containers_string.KMP}.
|
||||||
|
@return a list of (index,length) of substrings of [s] that are
|
||||||
|
separated by [by]. {!String.sub} can then be used to actually extract
|
||||||
|
the slice.
|
||||||
|
@raise Failure if [by = ""] *)
|
||||||
|
|
||||||
|
val gen : by:t -> t -> (t*int*int) gen
|
||||||
|
|
||||||
|
val seq : by:t -> t -> (t*int*int) sequence
|
||||||
|
|
||||||
|
val klist : by:t -> t -> (t*int*int) klist
|
||||||
|
|
||||||
|
(** {6 Copying functions}
|
||||||
|
|
||||||
|
Those split functions actually copy the substrings, which can be
|
||||||
|
more convenient but less efficient in general *)
|
||||||
|
|
||||||
|
val list_cpy : by:t -> t -> t list
|
||||||
|
|
||||||
|
val gen_cpy : by:t -> t -> t gen
|
||||||
|
|
||||||
|
val seq_cpy : by:t -> t -> t sequence
|
||||||
|
|
||||||
|
val klist_cpy : by:t -> t -> t klist
|
||||||
|
end
|
||||||
|
|
||||||
|
(** {2 Slices} A contiguous part of a string *)
|
||||||
|
|
||||||
|
module Sub : sig
|
||||||
|
type t = string * int * int
|
||||||
|
(** A string, an offset, and the length of the slice *)
|
||||||
|
|
||||||
|
val make : string -> int -> len:int -> t
|
||||||
|
|
||||||
|
val full : string -> t
|
||||||
|
(** Full string *)
|
||||||
|
|
||||||
|
val copy : t -> string
|
||||||
|
(** Make a copy of the substring *)
|
||||||
|
|
||||||
|
val underlying : t -> string
|
||||||
|
|
||||||
|
val sub : t -> int -> int -> t
|
||||||
|
(** Sub-slice *)
|
||||||
|
|
||||||
|
include S with type t := t
|
||||||
|
end
|
||||||
|
|
@ -1,150 +0,0 @@
|
||||||
|
|
||||||
(*
|
|
||||||
copyright (c) 2013-2014, simon cruanes
|
|
||||||
all rights reserved.
|
|
||||||
|
|
||||||
redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer. redistributions in binary
|
|
||||||
form must reproduce the above copyright notice, this list of conditions and the
|
|
||||||
following disclaimer in the documentation and/or other materials provided with
|
|
||||||
the distribution.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
||||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*)
|
|
||||||
|
|
||||||
(** {1 Basic String Utils} *)
|
|
||||||
|
|
||||||
type t = string
|
|
||||||
|
|
||||||
let equal a b = a=b
|
|
||||||
|
|
||||||
let compare = String.compare
|
|
||||||
|
|
||||||
let hash s = Hashtbl.hash s
|
|
||||||
|
|
||||||
type 'a gen = unit -> 'a option
|
|
||||||
type 'a sequence = ('a -> unit) -> unit
|
|
||||||
|
|
||||||
let is_sub ~sub i s j =
|
|
||||||
let rec check k =
|
|
||||||
if i + k = String.length sub
|
|
||||||
then true
|
|
||||||
else sub.[i + k] = s.[j+k] && check (k+1)
|
|
||||||
in
|
|
||||||
check 0
|
|
||||||
|
|
||||||
(* note: quite inefficient if [by] is long *)
|
|
||||||
let split_gen ~by s =
|
|
||||||
let len_by = String.length by in
|
|
||||||
assert (len_by > 0);
|
|
||||||
let n = String.length s in
|
|
||||||
let prev = ref 0 in
|
|
||||||
let stop = ref false in
|
|
||||||
let rec search i =
|
|
||||||
if !stop then None
|
|
||||||
else if i >= n
|
|
||||||
then (
|
|
||||||
stop := true;
|
|
||||||
Some (String.sub s !prev (n- !prev)) (* done *)
|
|
||||||
)
|
|
||||||
else if is_prefix i 0
|
|
||||||
then (
|
|
||||||
let p = !prev in
|
|
||||||
prev := i+len_by;
|
|
||||||
Some (String.sub s p (i-p))
|
|
||||||
)
|
|
||||||
else search (i+1)
|
|
||||||
and is_prefix i j =
|
|
||||||
if j = len_by
|
|
||||||
then true
|
|
||||||
else if i = n
|
|
||||||
then false
|
|
||||||
else s.[i] = by.[j] && is_prefix (i+1) (j+1)
|
|
||||||
in
|
|
||||||
fun () ->
|
|
||||||
search !prev
|
|
||||||
|
|
||||||
let split_seq ~by s k =
|
|
||||||
let rec aux g = match g () with
|
|
||||||
| None -> ()
|
|
||||||
| Some x -> k x; aux g
|
|
||||||
in aux (split_gen ~by s)
|
|
||||||
|
|
||||||
let split ~by s =
|
|
||||||
let rec aux g acc = match g () with
|
|
||||||
| None -> List.rev acc
|
|
||||||
| Some x -> aux g (x::acc)
|
|
||||||
in aux (split_gen ~by s) []
|
|
||||||
|
|
||||||
(*$T
|
|
||||||
split ~by:"," "aa,bb,cc" = ["aa"; "bb"; "cc"]
|
|
||||||
split ~by:"--" "a--b----c--" = ["a"; "b"; ""; "c"; ""]
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* note: inefficient *)
|
|
||||||
let find ?(start=0) ~sub s =
|
|
||||||
let n = String.length sub in
|
|
||||||
let i = ref start in
|
|
||||||
try
|
|
||||||
while !i + n < String.length s do
|
|
||||||
if is_sub ~sub 0 s !i then raise Exit;
|
|
||||||
incr i
|
|
||||||
done;
|
|
||||||
-1
|
|
||||||
with Exit ->
|
|
||||||
!i
|
|
||||||
|
|
||||||
let repeat s n =
|
|
||||||
assert (n>=0);
|
|
||||||
let len = String.length s in
|
|
||||||
assert(len > 0);
|
|
||||||
let buf = String.create (len * n) in
|
|
||||||
for i = 0 to n-1 do
|
|
||||||
String.blit s 0 buf (i * len) len;
|
|
||||||
done;
|
|
||||||
buf
|
|
||||||
|
|
||||||
let prefix ~pre s =
|
|
||||||
String.length pre <= String.length s &&
|
|
||||||
(let i = ref 0 in
|
|
||||||
while !i < String.length pre && s.[!i] = pre.[!i] do incr i done;
|
|
||||||
!i = String.length pre)
|
|
||||||
|
|
||||||
|
|
||||||
let to_gen s =
|
|
||||||
let i = ref 0 in
|
|
||||||
fun () ->
|
|
||||||
if !i = String.length s then None
|
|
||||||
else (
|
|
||||||
let c = String.unsafe_get s !i in
|
|
||||||
incr i;
|
|
||||||
Some c
|
|
||||||
)
|
|
||||||
|
|
||||||
let of_gen g =
|
|
||||||
let b = Buffer.create 32 in
|
|
||||||
let rec aux () = match g () with
|
|
||||||
| None -> Buffer.contents b
|
|
||||||
| Some c -> Buffer.add_char b c; aux ()
|
|
||||||
in aux ()
|
|
||||||
|
|
||||||
let to_seq s k = String.iter k s
|
|
||||||
|
|
||||||
let of_seq seq =
|
|
||||||
let b= Buffer.create 32 in
|
|
||||||
seq (Buffer.add_char b);
|
|
||||||
Buffer.contents b
|
|
||||||
|
|
||||||
let pp = Buffer.add_string
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
||||||
|
|
||||||
(*
|
|
||||||
copyright (c) 2013-2014, simon cruanes
|
|
||||||
all rights reserved.
|
|
||||||
|
|
||||||
redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
redistributions of source code must retain the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer. redistributions in binary
|
|
||||||
form must reproduce the above copyright notice, this list of conditions and the
|
|
||||||
following disclaimer in the documentation and/or other materials provided with
|
|
||||||
the distribution.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
||||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*)
|
|
||||||
|
|
||||||
(** {1 Basic String Utils}
|
|
||||||
Consider using KMP instead. *)
|
|
||||||
|
|
||||||
type t = string
|
|
||||||
|
|
||||||
val equal : t -> t -> bool
|
|
||||||
|
|
||||||
val compare : t -> t -> int
|
|
||||||
|
|
||||||
val hash : t -> int
|
|
||||||
|
|
||||||
type 'a gen = unit -> 'a option
|
|
||||||
type 'a sequence = ('a -> unit) -> unit
|
|
||||||
|
|
||||||
val is_sub : sub:t -> int -> t -> int -> bool
|
|
||||||
(** [is_sub ~sub i s j] returns [true] iff [sub] is a substring of [s] starting
|
|
||||||
at position [j] *)
|
|
||||||
|
|
||||||
val split : by:t -> t -> t list
|
|
||||||
(** split the given string along the given separator [by]. Should only
|
|
||||||
be used with very small separators, otherwise use {!KMP}.
|
|
||||||
@raise Failure if [by = ""] *)
|
|
||||||
|
|
||||||
val split_gen : by:t -> t -> t gen
|
|
||||||
|
|
||||||
val split_seq : by:t -> t -> t sequence
|
|
||||||
|
|
||||||
val find : ?start:int -> sub:t -> t -> int
|
|
||||||
(** Find [sub] in the string, returns its first index or -1.
|
|
||||||
Should only be used with very small [sub] *)
|
|
||||||
|
|
||||||
val repeat : t -> int -> t
|
|
||||||
(** The same string, repeated n times *)
|
|
||||||
|
|
||||||
val prefix : pre:t -> t -> bool
|
|
||||||
(** [str_prefix ~pre s] returns [true] iff [pre] is a prefix of [s] *)
|
|
||||||
|
|
||||||
val to_gen : t -> char gen
|
|
||||||
val of_gen : char gen -> t
|
|
||||||
|
|
||||||
val to_seq : t -> char sequence
|
|
||||||
val of_seq : char sequence -> t
|
|
||||||
|
|
||||||
val pp : Buffer.t -> t -> unit
|
|
||||||
Loading…
Add table
Reference in a new issue