From 9164d53889e1b0ed0607823e403509c51af288aa Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Sat, 19 Sep 2015 01:46:07 +0200 Subject: [PATCH] add a benchmark file to measure size of big data structures --- _oasis | 8 +++ benchs/mem_measure.ml | 112 ++++++++++++++++++++++++++++++++++++++++++ benchs/objsize.ml | 83 +++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 benchs/mem_measure.ml create mode 100644 benchs/objsize.ml diff --git a/_oasis b/_oasis index 56318b88..26c4e732 100644 --- a/_oasis +++ b/_oasis @@ -189,6 +189,14 @@ Executable id_sexp MainIs: id_sexp.ml BuildDepends: containers.sexp +Executable mem_measure + Path: benchs/ + Install: false + CompiledObject: native + MainIs: mem_measure.ml + Build$: flag(bench) + BuildDepends: sequence, unix, containers, containers.data, hamt + Executable id_sexp2 Path: examples/ Install: false diff --git a/benchs/mem_measure.ml b/benchs/mem_measure.ml new file mode 100644 index 00000000..c0ea8495 --- /dev/null +++ b/benchs/mem_measure.ml @@ -0,0 +1,112 @@ + +(* goal: measure memory consumption *) + +(* number of words allocated *) +let mem_allocated () = + let gc = Gc.stat () in + gc.Gc.minor_words +. gc.Gc.major_words -. gc.Gc.promoted_words + +(* overhead in memory *) +let mem_occupied x = Objsize.size_kb (Obj.repr x) + +type stats = { + time: float; + occ: int; + alloc: float; +} + +let measure_time_mem f = + let mem_alloc1 = mem_allocated () in + let start = Unix.gettimeofday() in + let x = f () in + let stop = Unix.gettimeofday() in + Gc.compact (); + let mem_alloc2 = mem_allocated () in + let mem_occupied = mem_occupied x in + ignore x; + { occ=mem_occupied; + alloc=mem_alloc2-.mem_alloc1; + time=stop -. start; + } + +let spf = Printf.sprintf + +let do_test ~name f = + Format.printf "test %s...@." name; + let res = measure_time_mem f in + Format.printf " allocated:%.2f MB, occupied:%d kB, time: %.2f s@." + (res.alloc *. 8. /. 1_000_000.) + res.occ + res.time + +let test_hashtrie n () = + let module M = CCHashTrie.Make(CCInt) in + do_test ~name:(spf "hashtrie(%d)" n) + (fun () -> + let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in + m + ) + +let test_hamt n () = + let module M = Hamt.Make'(CCInt) in + do_test ~name:(spf "hamt(%d)" n) + (fun () -> + let m = Sequence.(1 -- n + |> map (fun x-> x,x) + |> fold (fun m (k,v) -> M.add k v m) M.empty + ) in + m + ) + +let test_map n () = + let module M = CCMap.Make(CCInt) in + do_test ~name:(spf "map(%d)" n) + (fun () -> + let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in + m + ) + +let test_wbt n () = + let module M = CCWBTree.Make(CCInt) in + do_test ~name:(spf "wbt(%d)" n) + (fun () -> + let m = M.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in + m + ) + +let test_hashtbl n () = + let module H = CCHashtbl.Make(CCInt) in + do_test ~name:(spf "hashtbl(%d)" n) + (fun () -> + let m = H.of_seq Sequence.(1 -- n |> map (fun x-> x,x)) in + m + ) + +let tests_ = + CCList.flat_map + (fun n -> + [ spf "hashtrie_%d" n, test_hashtrie n + ; spf "map_%d" n, test_map n + ; spf "hamt_%d" n, test_hamt n + ; spf "wbt_%d" n, test_wbt n + ; spf "hashtbl_%d" n, test_hashtbl n + ] + ) [ 1_000; 100_000; 30_000_000 ] + +let run_test name = List.assoc name tests_ () + +let print_list () = + Format.printf "@[tests:@ %a@]@." + (CCList.print CCString.print) (List.map fst tests_) + +let () = + let to_test = ref [] in + let options = Arg.align + [ + ] in + Arg.parse options (CCList.Ref.push to_test) "usage: mem_measure [name*]"; + match !to_test with + | [] -> + print_list (); + exit 0 + | _ -> List.iter run_test (List.rev !to_test) diff --git a/benchs/objsize.ml b/benchs/objsize.ml new file mode 100644 index 00000000..668be91c --- /dev/null +++ b/benchs/objsize.ml @@ -0,0 +1,83 @@ +(**************************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file LICENSE. *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(**************************************************************************) + +(*i $Id$ i*) + +(*i*) +open Obj +(*i*) + +(*s Pointers already visited are stored in a hash-table, where + comparisons are done using physical equality. *) + +module H = Hashtbl.Make( + struct + type t = Obj.t + let equal = (==) + let hash o = Hashtbl.hash (magic o : int) + end) + +let node_table = (H.create 257 : unit H.t) + +let in_table o = try H.find node_table o; true with Not_found -> false + +let add_in_table o = H.add node_table o () + +let reset_table () = H.clear node_table + +(*s Objects are traversed recursively, as soon as their tags are less than + [no_scan_tag]. [count] records the numbers of words already visited. *) + +let size_of_double = size (repr 1.0) + +let count = ref 0 + +let rec traverse t = + if not (in_table t) then begin + add_in_table t; + if is_block t then begin + let n = size t in + let tag = tag t in + if tag < no_scan_tag then begin + count := !count + 1 + n; + for i = 0 to n - 1 do + let f = field t i in + if is_block f then traverse f + done + end else if tag = string_tag then + count := !count + 1 + n + else if tag = double_tag then + count := !count + size_of_double + else if tag = double_array_tag then + count := !count + 1 + size_of_double * n + else + incr count + end + end + +(*s Sizes of objects in words and in bytes. The size in bytes is computed + system-independently according to [Sys.word_size]. *) + +let size_w o = + reset_table (); + count := 0; + traverse (repr o); + !count + +let size_b o = (size_w o) * (Sys.word_size / 8) + +let size_kb o = (size_w o) / (8192 / Sys.word_size) + +