From a63d095722be2e677b8ae5ee161a8c83c9962d1c Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Thu, 18 Dec 2014 16:25:26 +0100 Subject: [PATCH] containers.bigarray, containing CCBigstring --- .merlin | 3 + Makefile | 2 + _oasis | 12 ++ doc/intro.txt | 4 + opam | 2 +- src/bigarray/CCBigstring.ml | 219 +++++++++++++++++++++++++++++++++++ src/bigarray/CCBigstring.mli | 126 ++++++++++++++++++++ 7 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 src/bigarray/CCBigstring.ml create mode 100644 src/bigarray/CCBigstring.mli diff --git a/.merlin b/.merlin index f6234189..d5a2d81a 100644 --- a/.merlin +++ b/.merlin @@ -8,6 +8,7 @@ S src/sexp/ S src/threads/ S src/misc S src/string +S src/bigarray S src/pervasives S benchs S examples @@ -22,6 +23,7 @@ B _build/src/sexp/ B _build/src/threads/ B _build/src/misc B _build/src/string +B _build/src/bigarray B _build/src/pervasives B _build/benchs B _build/examples @@ -31,4 +33,5 @@ PKG benchmark PKG threads PKG threads.posix PKG lwt +PKG bigarray FLG -w +a -w -4 -w -44 diff --git a/Makefile b/Makefile index a52e9d42..a42a7005 100644 --- a/Makefile +++ b/Makefile @@ -75,6 +75,8 @@ QTESTABLE=$(filter-out $(DONTTEST), \ $(wildcard src/advanced/*.mli) \ $(wildcard src/iter/*.ml) \ $(wildcard src/iter/*.mli) \ + $(wildcard src/bigarray/*.ml) \ + $(wildcard src/bigarray/*.mli) \ ) qtest-clean: diff --git a/_oasis b/_oasis index 52d4dbe6..5a5f3719 100644 --- a/_oasis +++ b/_oasis @@ -37,6 +37,10 @@ Flag "bench" Description: Build and run benchmarks Default: false +Flag "bigarray" + Description: Build modules that depend on bigarrays + Default: false + Library "containers" Path: src/core Modules: CCVector, CCPrint, CCError, CCHeap, CCList, CCOpt, CCPair, @@ -86,6 +90,13 @@ Library "containers_advanced" FindlibParent: containers BuildDepends: containers, sequence +Library "containers_bigarray" + Path: src/bigarray + Modules: CCBigstring + FindlibName: bigarray + FindlibParent: containers + BuildDepends: containers, bigarray, bytes + Library "containers_pervasives" Path: src/pervasives Modules: CCPervasives @@ -193,6 +204,7 @@ Executable run_qtest Build$: flag(tests) BuildDepends: containers, containers.misc, containers.string, containers.iter, containers.io, containers.advanced, containers.sexp, + containers.bigarray, sequence, gen, oUnit, QTest2Lib Executable run_tests diff --git a/doc/intro.txt b/doc/intro.txt index 83a4e44f..b9137c99 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -83,6 +83,10 @@ Iterators: {!modules: Levenshtein KMP} +{4 Bigarrays} + +{!modules: CCBigstring} + {4 Advanced} {!modules: CCLinq CCCat CCBatch} diff --git a/opam b/opam index 2e55f8ea..fd1228c8 100644 --- a/opam +++ b/opam @@ -3,7 +3,7 @@ author: "Simon Cruanes" maintainer: "simon.cruanes@inria.fr" build: [ ["./configure" "--prefix" prefix "--disable-thread" "--disable-bench" - "--disable-tests" "--disable-cgi" "--%{lwt:enable}%-lwt" + "--disable-tests" "--%{lwt:enable}%-lwt" "--enable-docs" "--enable-misc"] [make "build"] ] diff --git a/src/bigarray/CCBigstring.ml b/src/bigarray/CCBigstring.ml new file mode 100644 index 00000000..7993d15c --- /dev/null +++ b/src/bigarray/CCBigstring.ml @@ -0,0 +1,219 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Interface to 1-dimension Bigarrays of bytes (char)} *) + +module B = Bigarray.Array1 + +type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t + +let create size = B.create Bigarray.char Bigarray.c_layout size + +let empty = create 0 + +let init size f = + let a = create size in + for i = 0 to size-1 do + B.unsafe_set a i (f i) + done; + a + +let fill = B.fill + +let get = B.get + +let set = B.set + +let size = B.dim + +let sub = B.sub + +let blit a i b j len = + let a' = sub a i len in + let b' = sub b j len in + B.blit a' b' + +let copy a = + let b = create (size a) in + B.blit a b; + b + +(*$T + copy (of_string "abcd") |> to_string = "abcd" + *) + +let fold f acc a = + let rec fold' f acc a i len = + if i = len then acc + else + let acc = f acc (get a i) in + fold' f acc a (i+1) len + in + fold' f acc a 0 (size a) + +let iter f a = + let n = size a in + for i = 0 to n-1 do + f (get a i) + done + +let rec equal_rec a b i len = + i = len + || + ( get a i = get b i && equal_rec a b (i+1) len) + +let equal a b = + size a = size b + && + equal_rec a b 0 (size a) + +(*$Q + Q.(pair printable_string printable_string) (fun (s1, s2) -> \ + let a1 = of_string s1 and a2 = of_string s2 in \ + equal a1 a2 = (s1 = s2)) +*) + +let rec compare_rec a b i len_a len_b = + if i=len_a && i=len_b then 0 + else if i=len_a then -1 + else if i=len_b then 1 + else + match Char.compare (get a i) (get b i) with + | 0 -> compare_rec a b (i+1) len_a len_b + | n -> n + +let compare a b = + compare_rec a b 0 (size a) (size b) + +(*$T + compare (of_string "abc") (of_string "abd") < 0 + compare (of_string "abc") (of_string "abcd") < 0 + compare (of_string "abcd") (of_string "abc") > 0 + compare (of_string "abc") (of_string "b") < 0 +*) + +(*$Q + Q.(pair string string) (fun (s1, s2) -> \ + let a1 = of_string s1 and a2 = of_string s2 in \ + CCInt.sign (compare a1 a2) = CCInt.sign (String.compare s1 s2)) +*) + +(** {2 Conversions} *) + +let to_bytes a = + Bytes.init (size a) (fun i -> B.unsafe_get a i) + +let of_bytes b = + init (Bytes.length b) (fun i -> Bytes.get b i) + +let of_bytes_slice b i len = + if i < 0 || i+len > Bytes.length b then invalid_arg "CCBigstring"; + init len (fun j -> Bytes.get b (i+j)) + +let sub_bytes a i len = + if i < 0 || i+len > size a then invalid_arg "CCBigstring"; + Bytes.init len (fun j -> B.get a (i+j)) + +let blit_to_bytes a i b j len = + if i < 0 || j < 0 || i+len > size a || j+len > Bytes.length b + then invalid_arg "CCBigstring"; + for x=0 to len-1 do + Bytes.set b (j+x) (B.get a (i+x)) + done + +let blit_of_bytes a i b j len = + if i < 0 || j < 0 || i+len > Bytes.length a || j+len > size b + then invalid_arg "CCBigstring"; + for x=0 to len-1 do + B.set b (j+x) (Bytes.get a (i+x)) + done + +let to_string a = + CCString.init (size a) (fun i -> B.unsafe_get a i) + +let of_string s = + init (String.length s) (fun i -> String.get s i) + +let of_string_slice s i len = + if i < 0 || i+len > String.length s then invalid_arg "CCBigstring"; + init len (fun j -> String.get s (i+j)) + +let sub_string a i len = + if i < 0 || i+len > size a then invalid_arg "CCBigstring"; + CCString.init len (fun j -> B.get a (i+j)) + +(*$T + of_string_slice "abcde" 1 3 |> to_string = "bcd" +*) + +let blit_of_string a i b j len = + if i < 0 || j < 0 || i+len > String.length a || j+len > size b + then invalid_arg "CCBigstring"; + for x=0 to len-1 do + B.set b (j+x) (String.get a (i+x)) + done + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit + +let to_seq a k = iter k a + +let to_gen a = + let i = ref 0 in + let n = size a in + fun () -> + if !i = n then None + else ( + let x = get a !i in + incr i; + Some x + ) + +(*$T + of_string "abcd" |> to_gen |> Gen.to_string = "abcd" +*) + +let to_seq_slice a i len = + to_seq (sub a i len) + +let to_gen_slice a i len = + to_gen (sub a i len) + +(** {2 Memory-map} *) + +let map_file_descr ?pos ?(shared=false) fd len = + B.map_file fd ?pos Bigarray.char Bigarray.c_layout shared len + +let with_map_file ?pos ?(mode=0o644) ?(flags=[Unix.O_RDONLY]) ?shared name len f = + let fd = Unix.openfile name flags mode in + let a = map_file_descr ?pos ?shared fd len in + try + let x = f a in + Unix.close fd; + x + with e -> + Unix.close fd; + raise e diff --git a/src/bigarray/CCBigstring.mli b/src/bigarray/CCBigstring.mli new file mode 100644 index 00000000..32283bd8 --- /dev/null +++ b/src/bigarray/CCBigstring.mli @@ -0,0 +1,126 @@ + +(* +copyright (c) 2013-2014, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Interface to 1-dimension Bigarrays of bytes (char)} + +@since NEXT_RELEASE *) + +type t = (char, Bigarray.int8_unsigned_elt, Bigarray.c_layout) Bigarray.Array1.t + +val create : int -> t +(** Create a new bigstring of the given size. *) + +val empty : t +(** Empty string *) + +val init : int -> (int -> char) -> t +(** Initialize with the given function (called at every index) *) + +val fill : t -> char -> unit +(** Fill with a single byte *) + +val size : t -> int +(** Number of bytes *) + +val get : t -> int -> char + +val set : t -> int -> char -> unit + +val blit : t -> int -> t -> int -> int -> unit +(** Blit a slice of the bigstring into another *) + +val copy : t -> t +(** Copy of the string *) + +val sub : t -> int -> int -> t +(** [sub s i len] takes a slice of length [len] from the string [s], starting + at offset [i]. + @raise Invalid_argument if [i, len] doesn't designate a valid substring *) + +val fold : ('a -> char -> 'a) -> 'a -> t -> 'a + +val iter : (char -> unit) -> t -> unit + +val equal : t -> t -> bool + +val compare : t -> t -> int +(** Lexicographic order *) + +(** {2 Conversions} *) + +val to_bytes : t -> Bytes.t + +val of_bytes : Bytes.t -> t + +val of_bytes_slice : Bytes.t -> int -> int -> t + +val sub_bytes : t -> int -> int -> Bytes.t + +val blit_to_bytes : t -> int -> Bytes.t -> int -> int -> unit + +val blit_of_bytes : Bytes.t -> int -> t -> int -> int -> unit + +val to_string : t -> string + +val of_string : string -> t + +val of_string_slice : string -> int -> int -> t + +val sub_string : t -> int -> int -> string + +val blit_of_string : string -> int -> t -> int -> int -> unit + +type 'a gen = unit -> 'a option +type 'a sequence = ('a -> unit) -> unit + +val to_seq : t -> char sequence + +val to_gen : t -> char gen + +val to_seq_slice : t -> int -> int -> char sequence + +val to_gen_slice : t -> int -> int -> char gen + +(** {2 Memory-map} *) + +val with_map_file : + ?pos:int64 -> ?mode:int -> ?flags:Unix.open_flag list -> ?shared:bool -> + string -> int -> (t -> 'a) -> 'a +(** [with_map_file name len f] maps the file into memory, opening it, and + call [f] with a slice [pos.... pos+len] of the bytes of the file. When + [f] returns, the file is closed. + @param pos offset in the file (default 0) + @param shared if true, modifications are shared between processes that + have mapped this file (requires the filedescr to be open in write mode). + @param mode the mode for the file, if it's created + @param flags opening flags (default rdonly) + @see {!Bigarray.Array1.map_file} for more details *) + +val map_file_descr : ?pos:int64 -> ?shared:bool -> Unix.file_descr -> int -> t +(** [map_file_descr descr len] is a lower-level access to an underlying file descriptor. + @param shared if true, modifications are shared between processes that + have mapped this file (requires the filedescr to be open in write mode). + @see {!Bigarray.Array1.map_file} for more details *)