diff --git a/src/minidag/decode.ml b/src/minidag/decode.ml
new file mode 100644
index 00000000..094cffd7
--- /dev/null
+++ b/src/minidag/decode.ml
@@ -0,0 +1,112 @@
+type t = { str: string }
+(** Decoder *)
+
+let create str : t = { str }
+
+type offset = int
+
+type node_decoder = {
+  dec: t;
+  mutable off: offset;
+}
+
+type value =
+  | Stop  (** No other value left *)
+  | Null
+  | Bool of bool
+  | Int64 of int64
+  | Float of float
+  | String of string
+  | Blob of string
+  | Ref of int
+
+exception Fail of string * offset
+
+let fail off msg = raise (Fail (msg, off))
+let failf off msg = Printf.ksprintf (fail off) msg
+
+let[@inline] read_byte_ (self : node_decoder) : int =
+  let c = String.get self.dec.str self.off in
+  self.off <- self.off + 1;
+  Char.code c
+
+let[@inline] read_leading_ (self : node_decoder) =
+  let c = read_byte_ self in
+  c lsr 4, c land 0x0f
+
+let read_uint64 self ~low =
+  match low with
+  | _ when low < 12 -> Int64.of_int low
+  | 12 -> read_byte_ self |> Int64.of_int
+  | 13 ->
+    let n = String.get_int16_le self.dec.str self.off in
+    self.off <- self.off + 2;
+    Int64.of_int (n land 0xFFFF)
+    (* strip sign extension: treat as uint16 *)
+  | 14 ->
+    let n = String.get_int32_le self.dec.str self.off in
+    self.off <- self.off + 4;
+    Int64.logand (Int64.of_int32 n) 0xFFFFFFFFL (* treat as uint32 *)
+  | 15 ->
+    let n = String.get_int64_le self.dec.str self.off in
+    self.off <- self.off + 8;
+    n
+  | _ -> assert false
+
+let string_ self ~low : string =
+  let len = read_uint64 self ~low |> Int64.to_int in
+  let s = String.sub self.dec.str self.off len in
+  self.off <- self.off + len;
+  s
+
+let read (self : node_decoder) : value =
+  let off_start = self.off in
+  let high, low = read_leading_ self in
+  match high with
+  | 0 ->
+    (* make sure we can't read further *)
+    self.off <- String.length self.dec.str;
+    Stop
+  | 1 ->
+    (match low with
+    | 0 -> Null
+    | 1 -> Bool true
+    | 2 -> Bool false
+    | n -> failf off_start "invalid special: %d" n)
+  | 2 -> Int64 (read_uint64 self ~low)
+  | 3 -> Int64 (Int64.neg (read_uint64 self ~low))
+  | 4 -> Float (Int64.float_of_bits (read_uint64 self ~low))
+  | 5 -> String (string_ self ~low)
+  | 6 -> Blob (string_ self ~low)
+  | 7 -> Ref (read_uint64 self ~low |> Int64.to_int)
+  | _ -> failf off_start "invalid high: %d" high
+
+let read_node (self : t) (off : offset) f =
+  let dec = { dec = self; off } in
+  match read dec with
+  | String s -> f dec s
+  | _ -> fail off "expected node to start with a string"
+
+let iter_nodes (self : t) (f : offset -> string -> value list -> unit) : unit =
+  let total_len = String.length self.str in
+  let rec go off =
+    if off < total_len then (
+      let dec = { dec = self; off } in
+      match read dec with
+      | String cmd ->
+        (* save the offset just before each read; when we see Stop, that saved
+           value is the Stop byte's position — next node starts one byte later *)
+        let stop_off = ref dec.off in
+        let rec collect acc =
+          stop_off := dec.off;
+          match read dec with
+          | Stop -> List.rev acc
+          | v -> collect (v :: acc)
+        in
+        let args = collect [] in
+        f off cmd args;
+        go (!stop_off + 1)
+      | _ -> fail off "expected string at start of node"
+    )
+  in
+  go 0
diff --git a/src/minidag/decode.mli b/src/minidag/decode.mli
new file mode 100644
index 00000000..a548a75e
--- /dev/null
+++ b/src/minidag/decode.mli
@@ -0,0 +1,35 @@
+type t
+(** Decoder *)
+
+val create : string -> t
+(* TODO: on demand read *)
+
+type node_decoder
+(** Local state to read the data for a node *)
+
+type offset = int
+
+val read_node : t -> offset -> (node_decoder -> string -> 'a) -> 'a
+(** Decode node at offset. The callback gets a decoder for the arguments as well
+    as the command's name. *)
+
+exception Fail of string * offset
+
+type value =
+  | Stop  (** No other value left *)
+  | Null
+  | Bool of bool
+  | Int64 of int64
+  | Float of float
+  | String of string
+  | Blob of string
+  | Ref of int
+
+val read : node_decoder -> value
+(** [read dec] returns a value.
+    @raise Fail in case of malformed data. *)
+
+val iter_nodes : t -> (offset -> string -> value list -> unit) -> unit
+(** Stream forward from offset 0, calling [f offset cmd args] for each node.
+    [args] does not include the trailing [Stop] marker.
+    @raise Fail if the stream is malformed. *)
diff --git a/src/minidag/design.md b/src/minidag/design.md
new file mode 100644
index 00000000..b5bc5dad
--- /dev/null
+++ b/src/minidag/design.md
@@ -0,0 +1,108 @@
+
+
+# minidag
+
+A serialization format intended to be a trimmed-down, minimalistic version of [twine](https://twine-data.dev/)
+focusing on extensibility.
+
+We define a basic set of primitive types (scalars), a notion of _reference_ (pointer to a value coming
+earlier in the byte stream), and a notion of _command_ (which builds a composite value out of scalars and
+references to other values). The set of commands is user defined and provides extensibility;
+the builtin scalars and references provide a uniform representation that allow tools to process
+_any_ DAG in the same fashion (e.g. reachability analysis, display, or pruning).
+
+## data model
+
+The actual data model is user-defined, but must be expressible in terms of DAG nodes.
+Each node has a _command_, and a list of scalar arguments.
+
+In other words, the bytestream contains a DAG, whose nodes have the form
+`(<command> <scalar>*)`.
+
+Scalars comprise: `null`, booleans, int64 numbers, float32/float64, (UTF-8) strings, binary blobs,
+and the notion of _reference_ (an absolute offset in the bytestream, which point to earlier to ensure
+acyclicity).
+
+For example, lambda terms of the Simple Type Theory are represented by:
+
+```
+term := Var var | Lambda var term | Apply term term
+var := { name: string, type: type }
+type := Type_const name | Arrow type type
+```
+
+This can be defined with the following set of commands, where `@x` is a reference
+a command returning type `x`:
+
+```
+"term.var" @var  // returns term
+"term.app" @term @term  // returns term
+"term.lambda" @var @term  // returns term
+
+"var" string @type  // returns var
+
+"type.const" string // returns type
+"type.arrow" @type @type // returns type
+```
+
+## Wire format
+
+The format is designed to be read forward or backward.
+
+- forward: the byte stream is just a sequence of nodes, read in the same
+    order they were produced.
+- backward: starting from a byte offset that is of interest (eg the
+    proof of "false" in a refutational prover), traverse the DAG backward
+    by following references. This can allow tools to touch only a fraction of the
+    nodes in the stream.
+
+
+Each node is made of a list of scalars, followed by `0x00` (the stop marker).
+The first scalar is normally a string that represents the _command_; the other scalars
+are the command's arguments.
+
+Scalars are represented by a _leading byte_. The leading byte contains the scalar's
+type, and some length or integer value information:
+
+```
+  [ tag: 3 bits | padding: 1 bit | embedded integer: 4 bits ]
+
+    ^--- high nibble ----------^   ^---- low nibble -------^
+```
+
+Tags are as follow:
+
+- 0: stop (coincides with `0x00` being stop)
+- 1: special (null=0, true=1, false=2)
+- 2: non-negative integer, up to 64 bits. See later for where the data is.
+- 3: negative integer, up to -2^63 (min int64).
+- 4: float, 32 or 64 bits
+- 5: string (UTF-8). The integer data is the byte length.
+- 6: blob (raw binary). The integer data is the byte length.
+- 7: reference. The integer data is the absolute offset in the stream.
+
+The integer data associated with the scalar is represented in the low nibble.
+
+- values from 0 to 11 represent themselves (ie low nibble=3 represents the embedded
+    integer 3). Values above this indicate the number of additional bytes, immediately
+    following the leading byte, and that contain the actul value.
+- value 12: 1 byte follows, the value is that u8
+- value 13: 2 bytes follow, the value is that u16 (little endian)
+- value 14: 4 bytes follow, the value is that u32 (little endian)
+- value 15: 8 bytes follow, the value is that u64 (little endian)
+
+Each scalar type does a different thing with this embedded integer.
+- stop/null: no integer (must be 0)
+- bool: 0 is false, 1 is true
+- non-negative integer: it's the value.
+- negative integers: encode the absolute value
+- floats: value is 14 or 15, followed by the 32- or 64-bits representation of the float.
+- strings and blobs: the embedded integer is the number of bytes that follow with the data.
+- reference: value is the absolute offset referred to
+
+So the string "hello" would be: `0x55 'h' 'e' 'l' 'l' 'o'`.
+Integer `8 : u64` is `0x28`, integer `39 : u64` is `0x2c 39`.
+a 64kiB (=0x10000 bytes) png blob would be `0x6e 0x00 0x01 0x00 0x00 <the png bytes>`.
+
+
+
diff --git a/src/minidag/dune b/src/minidag/dune
new file mode 100644
index 00000000..ab1a3a87
--- /dev/null
+++ b/src/minidag/dune
@@ -0,0 +1,5 @@
+(library
+ (name sidekick_minidag)
+ (public_name sidekick.minidag)
+ (synopsis "Minidag binary serialization format for proof traces")
+ (libraries))
diff --git a/src/minidag/encode.ml b/src/minidag/encode.ml
new file mode 100644
index 00000000..af56578c
--- /dev/null
+++ b/src/minidag/encode.ml
@@ -0,0 +1,141 @@
+class type output = object
+  method write : bytes -> int -> int -> unit
+end
+
+type t = {
+  mutable buf: bytes;
+  mutable len: int;
+  out: output;
+  mutable cur_offset: int;  (** How many bytes written into [out] already? *)
+}
+(** Encoder *)
+
+let buf_size = 1024
+let high_watermark = buf_size - 32
+
+let create ~out () : t =
+  let out = (out :> output) in
+  { out; cur_offset = 0; buf = Bytes.create buf_size; len = 0 }
+
+let[@inline never] flush_ self : unit =
+  self.out#write self.buf 0 self.len;
+  self.cur_offset <- self.cur_offset + self.len;
+  self.len <- 0
+
+let[@inline] abs_offset_ (self : t) : int = self.cur_offset + self.len
+let flush self : unit = if self.len > 0 then flush_ self
+
+type node_encoder = t
+type offset = int
+
+let maybe_flush self = if self.len >= high_watermark then flush_ self
+let[@inline] buf_len self = Bytes.length self.buf
+
+let[@inline never] ensure_slow_ (self : t) n =
+  (* Flush before growing: peak memory stays bounded at buf_size + n rather
+     than the full accumulated output size. An alternative would be to grow
+     without flushing, but that would buffer the entire output in RAM. *)
+  if self.len + n >= high_watermark then flush self;
+  let cap = ref (buf_len self) in
+  while !cap < self.len + n do
+    cap := !cap + (!cap / 2)
+  done;
+  if !cap > buf_len self then (
+    let newbuf = Bytes.create !cap in
+    Bytes.blit self.buf 0 newbuf 0 self.len;
+    self.buf <- newbuf
+  )
+
+(** Ensure at least [n] bytes of free space *)
+let ensure_ self n : unit =
+  let cap = buf_len self in
+  if cap < self.len + n then ensure_slow_ self n;
+  assert (self.len + n <= buf_len self)
+
+let write_leading (self : t) ~high ~low =
+  if low >= 12 then ensure_ self 9;
+  Bytes.set self.buf self.len (Char.unsafe_chr ((high lsl 4) lor low));
+  self.len <- self.len + 1
+
+let stop self = write_leading self ~high:0 ~low:0
+let null self = write_leading self ~high:1 ~low:0
+
+let bool self b =
+  write_leading self ~high:1
+    ~low:
+      (if b then
+         1
+       else
+         2)
+
+let uint64_ self ~high i =
+  assert (i >= 0L);
+  if i < 12L then
+    write_leading self ~high ~low:(Int64.to_int i)
+  else if i < Int64.shift_left 1L 8 then (
+    write_leading self ~high ~low:12;
+    Bytes.set_int8 self.buf self.len (Int64.to_int i);
+    self.len <- self.len + 1
+  ) else if i < Int64.shift_left 1L 16 then (
+    write_leading self ~high ~low:13;
+    Bytes.set_int16_le self.buf self.len (Int64.to_int i);
+    self.len <- self.len + 2
+  ) else if i < Int64.shift_left 1L 32 then (
+    write_leading self ~high ~low:14;
+    Bytes.set_int32_le self.buf self.len (Int64.to_int32 i);
+    self.len <- self.len + 4
+  ) else (
+    write_leading self ~high ~low:15;
+    Bytes.set_int64_le self.buf self.len i;
+    self.len <- self.len + 8
+  )
+
+let uint_ self ~high i =
+  if i < 12 then
+    write_leading self ~high ~low:i
+  else
+    uint64_ self ~high (Int64.of_int i)
+
+let int64 self i =
+  if i = Int64.min_int then
+    failwith "Encode.int64: Int64.min_int is not representable"
+  else if i < 0L then
+    uint64_ self ~high:3 (Int64.neg i)
+  else
+    uint64_ self ~high:2 i
+
+let int self i =
+  if i = min_int then
+    uint64_ self ~high:3 Int64.(abs (of_int i))
+  else if i < 0 then
+    uint_ self ~high:3 (abs i)
+  else
+    uint_ self ~high:2 i
+
+let float self f =
+  (* always 64-bit; bits_of_float is a signed int64 so bypass uint64_ *)
+  write_leading self ~high:4 ~low:15;
+  Bytes.set_int64_le self.buf self.len (Int64.bits_of_float f);
+  self.len <- self.len + 8
+
+let string_ self ~high s =
+  let len = String.length s in
+  uint_ self ~high len;
+  ensure_ self len;
+  Bytes.blit_string s 0 self.buf self.len len;
+  self.len <- self.len + len
+
+let string self s = string_ self ~high:5 s
+let blob self s = string_ self ~high:6 s
+
+let ref self i =
+  assert (i < abs_offset_ self);
+  uint_ self ~high:7 i
+
+let write_node (self : t) cmd (f : node_encoder -> unit) : offset =
+  let offset = abs_offset_ self in
+  string self cmd;
+  f self;
+  stop self;
+  maybe_flush self;
+  offset
diff --git a/src/minidag/encode.mli b/src/minidag/encode.mli
new file mode 100644
index 00000000..1a80ac37
--- /dev/null
+++ b/src/minidag/encode.mli
@@ -0,0 +1,30 @@
+type t
+(** Encoder *)
+
+class type output = object
+  method write : bytes -> int -> int -> unit
+end
+
+val create : out:#output -> unit -> t
+
+val flush : t -> unit
+(** Write all data to the internal output *)
+
+type node_encoder
+(** Local state to write the data for a node *)
+
+type offset = private int
+
+val write_node : t -> string -> (node_encoder -> unit) -> offset
+(** [write_node enc command f] starts a new node with "command", calls [f] to
+    add the arguments, and returns the offset of this new node. The node encoder
+    must not escape. *)
+
+val null : node_encoder -> unit
+val bool : node_encoder -> bool -> unit
+val int64 : node_encoder -> int64 -> unit
+val int : node_encoder -> int -> unit
+val float : node_encoder -> float -> unit
+val string : node_encoder -> string -> unit
+val blob : node_encoder -> string -> unit
+val ref : node_encoder -> offset -> unit