From 204cb0dacba3f27adcb3b37e8a005db43de8f3b9 Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 9 Oct 2013 18:40:45 +0200 Subject: [PATCH] BencodeOnDisk module, to write Bencode values in a file in an append-only, concurrent manner --- Makefile | 5 +- bencodeOnDisk.ml | 127 ++++++++++++++++++++++++++++++++++++++ bencodeOnDisk.mli | 57 +++++++++++++++++ bencode_write_par.sh | 13 ++++ containers.mllib | 1 + examples/bencode_write.ml | 22 +++++++ 6 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 bencodeOnDisk.ml create mode 100644 bencodeOnDisk.mli create mode 100755 bencode_write_par.sh create mode 100644 examples/bencode_write.ml diff --git a/Makefile b/Makefile index c8437e02..49e9d769 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ IMPLEMENTATION_FILES = $(shell find -name '*.ml') TARGETS_LIB = containers.cmxa containers.cma TARGETS_DOC = containers.docdir/index.html -EXAMPLES = examples/mem_size.native examples/collatz.native # examples/crawl.native +EXAMPLES = examples/mem_size.native examples/collatz.native \ + examples/bencode_write.native # examples/crawl.native OPTIONS = -use-ocamlfind @@ -31,7 +32,7 @@ doc: ocamlbuild $(OPTIONS) $(TARGETS_DOC) examples: all - ocamlbuild $(OPTIONS) -I . $(EXAMPLES) + ocamlbuild $(OPTIONS) -package unix -I . $(EXAMPLES) tests: lib ocamlbuild $(OPTIONS) -package oUnit -I . tests/run_tests.native diff --git a/bencodeOnDisk.ml b/bencodeOnDisk.ml new file mode 100644 index 00000000..f327bf76 --- /dev/null +++ b/bencodeOnDisk.ml @@ -0,0 +1,127 @@ + +(* +copyright (c) 2013, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Serialize Bencode on disk with persistency guarantees} + + This module provides an append-only interface to some file, with + synchronized access and fsync() called after every write. + + It currently uses [Unix.O_SYNC] to guarantee that writes are saved to + the disk, so {b WRITES ARE SLOW}. On the other hand, several + processes can access the same file and append data without risks of + losing written values or race conditions. + + Similarly, reads are atomic (require locking) and provide only + a fold interface. + *) + +type t = { + file : Unix.file_descr; + lock_file : Unix.file_descr; +} + +let open_out ?lock filename = + let lock = match lock with + | None -> filename + | Some l -> l + in + let lock_file = Unix.openfile lock [Unix.O_CREAT; Unix.O_WRONLY] 0o644 in + let file = Unix.openfile filename + [Unix.O_CREAT; Unix.O_APPEND; Unix.O_WRONLY; Unix.O_SYNC] 0o644 + in + { file; lock_file; } + +let close_out out = + Unix.close out.file + +let write out b = + Unix.lockf out.lock_file Unix.F_LOCK 0; + try + (* go to the end of the file *) + ignore (Unix.lseek out.file 0 Unix.SEEK_END); + let s = Bencode.to_string b in + (* call write() until everything is written *) + let rec write_all n = + if n >= String.length s + then () + else + let n' = n + Unix.write out.file s n (String.length s - n) in + write_all n' + in + write_all 0; + Unix.lockf out.lock_file Unix.F_ULOCK 0; + with e -> + (* unlock in any case *) + Unix.lockf out.lock_file Unix.F_ULOCK 0; + raise e + +type 'a result = + | Ok of 'a + | Error of string + +let read ?lock filename acc f = + let lock = match lock with + | None -> filename + | Some l -> l + in + (* lock file before reading, to observe a consistent state *) + let lock_file = Unix.openfile lock [Unix.O_CREAT; Unix.O_RDONLY] 0o644 in + Unix.lockf lock_file Unix.F_RLOCK 0; + try + let file = Unix.openfile filename [Unix.O_RDONLY] 0o644 in + (* read bencode values *) + let decoder = Bencode.mk_decoder () in + let len = 256 in + let buf = String.create len in + (* read a chunk of input and parse it *) + let rec next_val acc = + let n = Unix.read file buf 0 len in + if n = 0 + then Ok acc (* finished *) + else match Bencode.parse decoder buf 0 n with + | Bencode.ParseOk v -> + let acc = f acc v in + resume acc + | Bencode.ParseError e -> Error e + | Bencode.ParsePartial -> next_val acc + (* consume what remains of input *) + and resume acc = match Bencode.parse_resume decoder with + | Bencode.ParseOk v -> + let acc = f acc v in + resume acc + | Bencode.ParseError e -> Error e + | Bencode.ParsePartial -> next_val acc + in + let res = next_val acc in + (* cleanup *) + Unix.close file; + Unix.lockf lock_file Unix.F_ULOCK 0; + Unix.close lock_file; + res + with e -> + Unix.lockf lock_file Unix.F_ULOCK 0; + Unix.close lock_file; + raise e diff --git a/bencodeOnDisk.mli b/bencodeOnDisk.mli new file mode 100644 index 00000000..9c5eef49 --- /dev/null +++ b/bencodeOnDisk.mli @@ -0,0 +1,57 @@ + +(* +copyright (c) 2013, simon cruanes +all rights reserved. + +redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. redistributions in binary +form must reproduce the above copyright notice, this list of conditions and the +following disclaimer in the documentation and/or other materials provided with +the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*) + +(** {1 Serialize Bencode on disk with persistency guarantees} + + This module provides an append-only interface to some file, with + synchronized access and fsync() called after every write. + It needs {b Extunix} to compile (needs fsync). + *) + +type t + (** Handle to a file on which we can append values atomically *) + +val open_out : ?lock:string -> string -> t + (** Open the given file for appending values. Creates the file + if it doesn't exist. + @param lock, if provided, is the name of the lock file used. By default, + the file that is provided for writing is also used for locking. + @raise Unix.Unix_error if some IO error occurs. *) + +val close_out : t -> unit + (** Close the file descriptor *) + +val write : t -> Bencode.t -> unit + (** Write "atomically" a value to the end of the file *) + +type 'a result = + | Ok of 'a + | Error of string + +val read : ?lock:string -> string -> 'a -> ('a -> Bencode.t -> 'a) -> 'a result + (** Fold on values serialized in the given file. + @param lock see {!open_out}. + @raise Unix.Unix_error if some IO error occurs. *) diff --git a/bencode_write_par.sh b/bencode_write_par.sh new file mode 100755 index 00000000..a441a5aa --- /dev/null +++ b/bencode_write_par.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +# call n instances of ./bencode_write.native on the same file + +N=$1 +FILE=$2 + +echo "call script $N times on file $FILE" +for i in `seq $N` ; do + ./bencode_write.native "$FILE" & +done + +wait diff --git a/containers.mllib b/containers.mllib index 8f9e7d35..614d2e65 100644 --- a/containers.mllib +++ b/containers.mllib @@ -31,3 +31,4 @@ MultiMap ActionMan BV QCheck +BencodeOnDisk diff --git a/examples/bencode_write.ml b/examples/bencode_write.ml new file mode 100644 index 00000000..13196a42 --- /dev/null +++ b/examples/bencode_write.ml @@ -0,0 +1,22 @@ + +(** Write 10_000 Bencode values on the given file *) + +(* write n times the same value in the file *) +let write_values file n = + let out = BencodeOnDisk.open_out file in + Printf.printf "[%d] opened file\n" (Unix.getpid ()); + let v = Bencode.(L [I 0; I 1; S "foo"]) in + for i = 0 to n-1 do + Printf.printf "[%d] iteration %d\n" (Unix.getpid ()) i; + flush stdout; + BencodeOnDisk.write out v; + done; + BencodeOnDisk.close_out out; + Printf.printf "done\n"; + () + +let _ = + let file = Sys.argv.(1) in + Printf.printf "[%d] start: write to %s\n" (Unix.getpid ()) file; + flush stdout; + write_values file 100