containers.bigarray, containing CCBigstring

This commit is contained in:
Simon Cruanes 2014-12-18 16:25:26 +01:00
parent d15d704e7d
commit a63d095722
7 changed files with 367 additions and 1 deletions

View file

@ -8,6 +8,7 @@ S src/sexp/
S src/threads/
S src/misc
S src/string
S src/bigarray
S src/pervasives
S benchs
S examples
@ -22,6 +23,7 @@ B _build/src/sexp/
B _build/src/threads/
B _build/src/misc
B _build/src/string
B _build/src/bigarray
B _build/src/pervasives
B _build/benchs
B _build/examples
@ -31,4 +33,5 @@ PKG benchmark
PKG threads
PKG threads.posix
PKG lwt
PKG bigarray
FLG -w +a -w -4 -w -44

View file

@ -75,6 +75,8 @@ QTESTABLE=$(filter-out $(DONTTEST), \
$(wildcard src/advanced/*.mli) \
$(wildcard src/iter/*.ml) \
$(wildcard src/iter/*.mli) \
$(wildcard src/bigarray/*.ml) \
$(wildcard src/bigarray/*.mli) \
)
qtest-clean:

12
_oasis
View file

@ -37,6 +37,10 @@ Flag "bench"
Description: Build and run benchmarks
Default: false
Flag "bigarray"
Description: Build modules that depend on bigarrays
Default: false
Library "containers"
Path: src/core
Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair,
@ -86,6 +90,13 @@ Library "containers_advanced"
FindlibParent: containers
BuildDepends: containers, sequence
Library "containers_bigarray"
Path: src/bigarray
Modules: CCBigstring
FindlibName: bigarray
FindlibParent: containers
BuildDepends: containers, bigarray, bytes
Library "containers_pervasives"
Path: src/pervasives
Modules: CCPervasives
@ -193,6 +204,7 @@ Executable run_qtest
Build$: flag(tests)
BuildDepends: containers, containers.misc, containers.string, containers.iter,
containers.io, containers.advanced, containers.sexp,
containers.bigarray,
sequence, gen, oUnit, QTest2Lib
Executable run_tests

View file

@ -83,6 +83,10 @@ Iterators:
{!modules: Levenshtein KMP}
{4 Bigarrays}
{!modules: CCBigstring}
{4 Advanced}
{!modules: CCLinq CCCat CCBatch}

2
opam
View file

@ -3,7 +3,7 @@ author: "Simon Cruanes"
maintainer: "simon.cruanes@inria.fr"
build: [
["./configure" "--prefix" prefix "--disable-thread" "--disable-bench"
"--disable-tests" "--disable-cgi" "--%{lwt:enable}%-lwt"
"--disable-tests" "--%{lwt:enable}%-lwt"
"--enable-docs" "--enable-misc"]
[make "build"]
]

219
src/bigarray/CCBigstring.ml Normal file
View file

@ -0,0 +1,219 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Interface to 1-dimension Bigarrays of bytes (char)} *)
module B = Bigarray.Array1
type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
let create size = B.create Bigarray.char Bigarray.c_layout size
let empty = create 0
let init size f =
let a = create size in
for i = 0 to size-1 do
B.unsafe_set a i (f i)
done;
a
let fill = B.fill
let get = B.get
let set = B.set
let size = B.dim
let sub = B.sub
let blit a i b j len =
let a' = sub a i len in
let b' = sub b j len in
B.blit a' b'
let copy a =
let b = create (size a) in
B.blit a b;
b
(*$T
copy (of_string "abcd") |> to_string = "abcd"
*)
let fold f acc a =
let rec fold' f acc a i len =
if i = len then acc
else
let acc = f acc (get a i) in
fold' f acc a (i+1) len
in
fold' f acc a 0 (size a)
let iter f a =
let n = size a in
for i = 0 to n-1 do
f (get a i)
done
let rec equal_rec a b i len =
i = len
||
( get a i = get b i && equal_rec a b (i+1) len)
let equal a b =
size a = size b
&&
equal_rec a b 0 (size a)
(*$Q
Q.(pair printable_string printable_string) (fun (s1, s2) -> \
let a1 = of_string s1 and a2 = of_string s2 in \
equal a1 a2 = (s1 = s2))
*)
let rec compare_rec a b i len_a len_b =
if i=len_a && i=len_b then 0
else if i=len_a then -1
else if i=len_b then 1
else
match Char.compare (get a i) (get b i) with
| 0 -> compare_rec a b (i+1) len_a len_b
| n -> n
let compare a b =
compare_rec a b 0 (size a) (size b)
(*$T
compare (of_string "abc") (of_string "abd") < 0
compare (of_string "abc") (of_string "abcd") < 0
compare (of_string "abcd") (of_string "abc") > 0
compare (of_string "abc") (of_string "b") < 0
*)
(*$Q
Q.(pair string string) (fun (s1, s2) -> \
let a1 = of_string s1 and a2 = of_string s2 in \
CCInt.sign (compare a1 a2) = CCInt.sign (String.compare s1 s2))
*)
(** {2 Conversions} *)
let to_bytes a =
Bytes.init (size a) (fun i -> B.unsafe_get a i)
let of_bytes b =
init (Bytes.length b) (fun i -> Bytes.get b i)
let of_bytes_slice b i len =
if i < 0 || i+len > Bytes.length b then invalid_arg "CCBigstring";
init len (fun j -> Bytes.get b (i+j))
let sub_bytes a i len =
if i < 0 || i+len > size a then invalid_arg "CCBigstring";
Bytes.init len (fun j -> B.get a (i+j))
let blit_to_bytes a i b j len =
if i < 0 || j < 0 || i+len > size a || j+len > Bytes.length b
then invalid_arg "CCBigstring";
for x=0 to len-1 do
Bytes.set b (j+x) (B.get a (i+x))
done
let blit_of_bytes a i b j len =
if i < 0 || j < 0 || i+len > Bytes.length a || j+len > size b
then invalid_arg "CCBigstring";
for x=0 to len-1 do
B.set b (j+x) (Bytes.get a (i+x))
done
let to_string a =
CCString.init (size a) (fun i -> B.unsafe_get a i)
let of_string s =
init (String.length s) (fun i -> String.get s i)
let of_string_slice s i len =
if i < 0 || i+len > String.length s then invalid_arg "CCBigstring";
init len (fun j -> String.get s (i+j))
let sub_string a i len =
if i < 0 || i+len > size a then invalid_arg "CCBigstring";
CCString.init len (fun j -> B.get a (i+j))
(*$T
of_string_slice "abcde" 1 3 |> to_string = "bcd"
*)
let blit_of_string a i b j len =
if i < 0 || j < 0 || i+len > String.length a || j+len > size b
then invalid_arg "CCBigstring";
for x=0 to len-1 do
B.set b (j+x) (String.get a (i+x))
done
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
let to_seq a k = iter k a
let to_gen a =
let i = ref 0 in
let n = size a in
fun () ->
if !i = n then None
else (
let x = get a !i in
incr i;
Some x
)
(*$T
of_string "abcd" |> to_gen |> Gen.to_string = "abcd"
*)
let to_seq_slice a i len =
to_seq (sub a i len)
let to_gen_slice a i len =
to_gen (sub a i len)
(** {2 Memory-map} *)
let map_file_descr ?pos ?(shared=false) fd len =
B.map_file fd ?pos Bigarray.char Bigarray.c_layout shared len
let with_map_file ?pos ?(mode=0o644) ?(flags=[Unix.O_RDONLY]) ?shared name len f =
let fd = Unix.openfile name flags mode in
let a = map_file_descr ?pos ?shared fd len in
try
let x = f a in
Unix.close fd;
x
with e ->
Unix.close fd;
raise e

View file

@ -0,0 +1,126 @@
(*
copyright (c) 2013-2014, simon cruanes
all rights reserved.
redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer. redistributions in binary
form must reproduce the above copyright notice, this list of conditions and the
following disclaimer in the documentation and/or other materials provided with
the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
(** {1 Interface to 1-dimension Bigarrays of bytes (char)}
@since NEXT_RELEASE *)
type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t
val create : int -> t
(** Create a new bigstring of the given size. *)
val empty : t
(** Empty string *)
val init : int -> (int -> char) -> t
(** Initialize with the given function (called at every index) *)
val fill : t -> char -> unit
(** Fill with a single byte *)
val size : t -> int
(** Number of bytes *)
val get : t -> int -> char
val set : t -> int -> char -> unit
val blit : t -> int -> t -> int -> int -> unit
(** Blit a slice of the bigstring into another *)
val copy : t -> t
(** Copy of the string *)
val sub : t -> int -> int -> t
(** [sub s i len] takes a slice of length [len] from the string [s], starting
at offset [i].
@raise Invalid_argument if [i, len] doesn't designate a valid substring *)
val fold : ('a -> char -> 'a) -> 'a -> t -> 'a
val iter : (char -> unit) -> t -> unit
val equal : t -> t -> bool
val compare : t -> t -> int
(** Lexicographic order *)
(** {2 Conversions} *)
val to_bytes : t -> Bytes.t
val of_bytes : Bytes.t -> t
val of_bytes_slice : Bytes.t -> int -> int -> t
val sub_bytes : t -> int -> int -> Bytes.t
val blit_to_bytes : t -> int -> Bytes.t -> int -> int -> unit
val blit_of_bytes : Bytes.t -> int -> t -> int -> int -> unit
val to_string : t -> string
val of_string : string -> t
val of_string_slice : string -> int -> int -> t
val sub_string : t -> int -> int -> string
val blit_of_string : string -> int -> t -> int -> int -> unit
type 'a gen = unit -> 'a option
type 'a sequence = ('a -> unit) -> unit
val to_seq : t -> char sequence
val to_gen : t -> char gen
val to_seq_slice : t -> int -> int -> char sequence
val to_gen_slice : t -> int -> int -> char gen
(** {2 Memory-map} *)
val with_map_file :
?pos:int64 -> ?mode:int -> ?flags:Unix.open_flag list -> ?shared:bool ->
string -> int -> (t -> 'a) -> 'a
(** [with_map_file name len f] maps the file into memory, opening it, and
call [f] with a slice [pos.... pos+len] of the bytes of the file. When
[f] returns, the file is closed.
@param pos offset in the file (default 0)
@param shared if true, modifications are shared between processes that
have mapped this file (requires the filedescr to be open in write mode).
@param mode the mode for the file, if it's created
@param flags opening flags (default rdonly)
@see {!Bigarray.Array1.map_file} for more details *)
val map_file_descr : ?pos:int64 -> ?shared:bool -> Unix.file_descr -> int -> t
(** [map_file_descr descr len] is a lower-level access to an underlying file descriptor.
@param shared if true, modifications are shared between processes that
have mapped this file (requires the filedescr to be open in write mode).
@see {!Bigarray.Array1.map_file} for more details *)