add a benchmark file to measure size of big data structures

This commit is contained in:
Simon Cruanes 2015-09-19 01:46:07 +02:00
parent 384d81945e
commit 9164d53889
3 changed files with 203 additions and 0 deletions

8
_oasis
View file

@ -189,6 +189,14 @@ Executable id_sexp
MainIs: id_sexp.ml MainIs: id_sexp.ml
BuildDepends: containers.sexp BuildDepends: containers.sexp
Executable mem_measure
Path: benchs/
Install: false
CompiledObject: native
MainIs: mem_measure.ml
Build$: flag(bench)
BuildDepends: sequence, unix, containers, containers.data, hamt
Executable id_sexp2 Executable id_sexp2
Path: examples/ Path: examples/
Install: false Install: false

112
benchs/mem_measure.ml Normal file
View file

@ -0,0 +1,112 @@
(* goal: measure memory consumption *)
(* number of words allocated *)
let mem_allocated () =
let gc = Gc.stat () in
gc.Gc.minor_words +. gc.Gc.major_words -. gc.Gc.promoted_words
(* overhead in memory *)
let mem_occupied x = Objsize.size_kb (Obj.repr x)
type stats = {
time: float;
occ: int;
alloc: float;
}
let measure_time_mem f =
let mem_alloc1 = mem_allocated () in
let start = Unix.gettimeofday() in
let x = f () in
let stop = Unix.gettimeofday() in
Gc.compact ();
let mem_alloc2 = mem_allocated () in
let mem_occupied = mem_occupied x in
ignore x;
{ occ=mem_occupied;
alloc=mem_alloc2-.mem_alloc1;
time=stop -. start;
}
let spf = Printf.sprintf
let do_test ~name f =
Format.printf "test %s...@." name;
let res = measure_time_mem f in
Format.printf " allocated:%.2f MB, occupied:%d kB, time: %.2f s@."
(res.alloc *. 8. /. 1_000_000.)
res.occ
res.time
let test_hashtrie n () =
let module M = CCHashTrie.Make(CCInt) in
do_test ~name:(spf "hashtrie(%d)" n)
(fun () ->
let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in
m
)
let test_hamt n () =
let module M = Hamt.Make'(CCInt) in
do_test ~name:(spf "hamt(%d)" n)
(fun () ->
let m = Sequence.(1 -- n
|> map (fun x-> x,x)
|> fold (fun m (k,v) -> M.add k v m) M.empty
) in
m
)
let test_map n () =
let module M = CCMap.Make(CCInt) in
do_test ~name:(spf "map(%d)" n)
(fun () ->
let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in
m
)
let test_wbt n () =
let module M = CCWBTree.Make(CCInt) in
do_test ~name:(spf "wbt(%d)" n)
(fun () ->
let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in
m
)
let test_hashtbl n () =
let module H = CCHashtbl.Make(CCInt) in
do_test ~name:(spf "hashtbl(%d)" n)
(fun () ->
let m = H.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in
m
)
let tests_ =
CCList.flat_map
(fun n ->
[ spf "hashtrie_%d" n, test_hashtrie n
; spf "map_%d" n, test_map n
; spf "hamt_%d" n, test_hamt n
; spf "wbt_%d" n, test_wbt n
; spf "hashtbl_%d" n, test_hashtbl n
]
) [ 1_000; 100_000; 30_000_000 ]
let run_test name = List.assoc name tests_ ()
let print_list () =
Format.printf "@[<v2>tests:@ %a@]@."
(CCList.print CCString.print) (List.map fst tests_)
let () =
let to_test = ref [] in
let options = Arg.align
[
] in
Arg.parse options (CCList.Ref.push to_test) "usage: mem_measure [name*]";
match !to_test with
| [] ->
print_list ();
exit 0
| _ -> List.iter run_test (List.rev !to_test)

83
benchs/objsize.ml Normal file
View file

@ -0,0 +1,83 @@
(**************************************************************************)
(* *)
(* Copyright (C) Jean-Christophe Filliatre *)
(* *)
(* This software is free software; you can redistribute it and/or *)
(* modify it under the terms of the GNU Library General Public *)
(* License version 2.1, with the special exception on linking *)
(* described in file LICENSE. *)
(* *)
(* This software is distributed in the hope that it will be useful, *)
(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)
(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *)
(* *)
(**************************************************************************)
(*i $Id$ i*)
(*i*)
open Obj
(*i*)
(*s Pointers already visited are stored in a hash-table, where
comparisons are done using physical equality. *)
module H = Hashtbl.Make(
struct
type t = Obj.t
let equal = (==)
let hash o = Hashtbl.hash (magic o : int)
end)
let node_table = (H.create 257 : unit H.t)
let in_table o = try H.find node_table o; true with Not_found -> false
let add_in_table o = H.add node_table o ()
let reset_table () = H.clear node_table
(*s Objects are traversed recursively, as soon as their tags are less than
[no_scan_tag]. [count] records the numbers of words already visited. *)
let size_of_double = size (repr 1.0)
let count = ref 0
let rec traverse t =
if not (in_table t) then begin
add_in_table t;
if is_block t then begin
let n = size t in
let tag = tag t in
if tag < no_scan_tag then begin
count := !count + 1 + n;
for i = 0 to n - 1 do
let f = field t i in
if is_block f then traverse f
done
end else if tag = string_tag then
count := !count + 1 + n
else if tag = double_tag then
count := !count + size_of_double
else if tag = double_array_tag then
count := !count + 1 + size_of_double * n
else
incr count
end
end
(*s Sizes of objects in words and in bytes. The size in bytes is computed
system-independently according to [Sys.word_size]. *)
let size_w o =
reset_table ();
count := 0;
traverse (repr o);
!count
let size_b o = (size_w o) * (Sys.word_size / 8)
let size_kb o = (size_w o) / (8192 / Sys.word_size)