wip: add Containers_pp sub-library

this implements Wadler's pretty printers, albeit in a more eager way.
This commit is contained in:
Simon Cruanes 2023-03-30 23:06:47 -04:00
parent aa6b40342e
commit 03c25cb18f
No known key found for this signature in database
GPG key ID: EBFFF6F283F3A2B4
3 changed files with 460 additions and 0 deletions

310
src/pp/containers_pp.ml Normal file
View file

@ -0,0 +1,310 @@
type t = {
view: view; (** Document view *)
wfl: int; (** Width if flattened *)
}
and view =
| Nil
| Newline
| Nest of int * t
| Append of t * t
| Char of char
| Text of string
| Text_sub of string * int * int
| Group of t
| Fill of { sep: t; l: t list }
let nil : t = { view = Nil; wfl = 0 }
let newline : t = { view = Newline; wfl = 1 }
let nl = newline
let char c =
if c = '\n' then
nl
else
{ view = Char c; wfl = 1 }
let nest i x : t =
match x.view with
| _ when i <= 0 -> x
| Nil -> nil
| _ -> { view = Nest (i, x); wfl = x.wfl }
let append a b : t =
match a.view, b.view with
| Nil, _ -> b
| _, Nil -> a
| _ -> { view = Append (a, b); wfl = a.wfl + b.wfl }
let group d : t =
match d.view with
| Nil -> nil
| Group _ -> d
| _ -> { view = Group d; wfl = d.wfl }
let ( ^ ) = append
let text_sub_ s i len : t = { view = Text_sub (s, i, len); wfl = len }
(* Turn [str], which contains some newlines, into a document.
We make a concatenation of
each line's content followed by a newline.
Then we group the result so that it remains in a unified block. *)
let split_text_ (str : string) : t =
let cur = ref nil in
let i = ref 0 in
let len = String.length str in
while !i < len do
match String.index_from str !i '\n' with
| exception Not_found ->
(* last chunk *)
if !i + 1 < len then cur := !cur ^ text_sub_ str !i (len - 1 - !i);
i := len
| j ->
cur := !cur ^ text_sub_ str !i (j - 1 - !i) ^ nl;
i := j + 1
done;
group !cur
let text (str : string) : t =
if str = "" then
nil
else if String.contains str '\n' then
split_text_ str
else
{ view = Text str; wfl = String.length str }
let textpf fmt = Printf.ksprintf text fmt
let textf fmt = Format.kasprintf text fmt
module B = Buffer
module Flatten = struct
let to_buffer buf (self : t) : unit =
let rec loop (d : t) =
match d.view with
| Nil -> ()
| Char c -> B.add_char buf c
| Newline -> B.add_char buf ' '
| Nest (_, x) -> loop x
| Append (x, y) ->
loop x;
loop y
| Text s -> B.add_string buf s
| Text_sub (s, i, len) -> B.add_substring buf s i len
| Group x -> loop x
| Fill { sep; l } ->
List.iteri
(fun i x ->
if i > 0 then loop sep;
loop x)
l
in
loop self
let to_string self : string =
let buf = Buffer.create 32 in
to_buffer buf self;
Buffer.contents buf
end
module Pretty = struct
type st = { buf: Buffer.t; width: int }
(** Add [i] spaces of indentation. *)
let add_indent (st : st) (i : int) =
for _i = 1 to i do
B.add_char st.buf ' '
done
let rec pp_flatten (st : st) (self : t) : int =
match self.view with
| Nil -> 0
| Char c ->
B.add_char st.buf c;
1
| Newline ->
B.add_char st.buf ' ';
1
| Nest (_i, x) -> pp_flatten st x
| Append (x, y) ->
let n = pp_flatten st x in
n + pp_flatten st y
| Text s ->
B.add_string st.buf s;
String.length s
| Text_sub (s, i, len) ->
B.add_substring st.buf s i len;
len
| Group x -> pp_flatten st x
| Fill { sep; l } ->
(* print separated by spaces *)
let n = ref 0 in
List.iteri
(fun i x ->
if i > 0 then n := !n + pp_flatten st sep;
n := !n + pp_flatten st x)
l;
!n
(** Does [x] fit in the current line when flattened, given that [k] chars
are already on the line? *)
let[@inline] fits_flattened st k x = x.wfl <= st.width - k
let pp_newline st i =
B.add_char st.buf '\n';
add_indent st i
(** Print [self] into the buffer.
@param k how many chars are already printed on the current line
*)
let rec pp_rec (st : st) (k : int) (stack : (int * t) list) : unit =
match stack with
| [] -> ()
| (i, d) :: stack_tl ->
pp_rec_top st ~k ~i d (fun k -> pp_rec st k stack_tl)
(** Print [d] at indentation [i], with [k] chars already printed
on the current line, then calls [kont] with the
new [k]. *)
and pp_rec_top st ~k ~i d (kont : int -> unit) : unit =
match d.view with
| Nil -> kont k
| Char c ->
B.add_char st.buf c;
kont (k + 1)
| Newline ->
pp_newline st i;
kont i
| Nest (j, x) -> pp_rec_top st ~k ~i:(i + j) x kont
| Append (x, y) ->
(* print [x], then print [y] *)
pp_rec_top st ~k ~i x (fun k -> pp_rec_top st ~k ~i y kont)
| Text s ->
B.add_string st.buf s;
kont (k + String.length s)
| Text_sub (s, i, len) ->
B.add_substring st.buf s i len;
kont (k + len)
| Group x ->
if fits_flattened st k x then (
(* print flattened *)
let w_x = pp_flatten st x in
assert (w_x = x.wfl);
kont (k + w_x)
) else
pp_rec_top st ~k ~i x kont
| Fill { sep; l } -> pp_fill st ~k ~i sep l kont
and pp_fill st ~k ~i sep l (kont : int -> unit) : unit =
(* [k] is the current offset in the line *)
let rec loop idx k l =
match l with
| x :: tl ->
if fits_flattened st k x then (
(* all flattened *)
let w_sep =
if idx = 0 then
0
else
pp_flatten st sep
in
let w_x = pp_flatten st x in
assert (w_x = x.wfl);
loop (idx + 1) (k + w_x + w_sep) tl
) else (
(* print, followed by a newline and resume filling with [k=i] *)
let pp_and_continue k =
pp_rec_top st ~k ~i x (fun k -> loop (idx + 1) k tl)
in
if idx > 0 then
(* separator, then item *)
pp_rec_top st ~k ~i sep pp_and_continue
else
pp_and_continue k
)
| [] -> kont k
in
loop 0 k l
let to_buffer ~width (buf : Buffer.t) (self : t) : unit =
let st = { buf; width } in
pp_rec st 0 [ 0, self ]
let to_string ~width (self : t) : string =
let buf = Buffer.create 32 in
to_buffer ~width buf self;
Buffer.contents buf
let to_format ~width out self : unit =
(* TODO: more efficient implementation *)
CCFormat.string_lines out (to_string ~width self)
end
let pp = Pretty.to_format ~width:80
(* helpers *)
let sp = char ' '
module Infix = struct
let ( ^ ) = append
let[@inline] ( ^+ ) x y = x ^ sp ^ y
let[@inline] ( ^/ ) x y = x ^ nl ^ y
end
include Infix
let true_ = text "true"
let false_ = text "false"
let bool b =
if b then
true_
else
false_
let int x : t = text (string_of_int x)
let float x : t = text (string_of_float x)
let float_hex x : t = textpf "%h" x
let append_l ?(sep = nil) l =
let rec loop = function
| [] -> nil
| [ x ] -> x
| x :: tl -> x ^ sep ^ loop tl
in
loop l
let append_sp l = append_l ~sep:sp l
let append_nl l = append_l ~sep:nl l
let fill sep = function
| [] -> nil
| [ x ] -> x
| l ->
(* flattened: just like concat *)
let wfl =
List.fold_left (fun wfl x -> wfl + x.wfl) 0 l
+ ((List.length l - 1) * sep.wfl)
in
{ view = Fill { sep; l }; wfl }
let fill_map sep f l = fill sep (List.map f l)
let of_list ?(sep = nil) f l =
let rec loop = function
| [] -> nil
| [ x ] -> f x
| x :: tl -> f x ^ sep ^ loop tl
in
loop l
let bracket l d r : t = group (text l ^ nest 2 (nl ^ d) ^ nl ^ text r)
let sexp_l l : t = char '(' ^ nest 1 (group (append_nl l ^ char ')'))
let sexp_apply a l : t = sexp_l (text a :: l)
module Dump = struct
let list l : t =
let sep = char ';' ^ nl in
group (char '[' ^ nest 1 (fill sep l) ^ char ']')
end

143
src/pp/containers_pp.mli Normal file
View file

@ -0,0 +1,143 @@
(** Pretty printing of documents.
A document is a structured tree of text with
formatting instructions.
It can be rendered into a string ("pretty printed"),
see {!Pretty}.
This follows Wadler's paper "A prettier printer", but with
some changes in the rendering part because we can't rely on lazyness
to make the algebraic implementation efficient.
*)
(** {2 Core} *)
type t
(** The type of documents *)
val nil : t
(** Empty document *)
val char : char -> t
(** Single char. *)
val text : string -> t
(** Text. The string will be split on ['\n']. *)
val textpf : ('a, unit, string, t) format4 -> 'a
(** Text, with a {!Printf}-compatible format.
For example, [textpf "%d-%d" 4 2] is like [text "4-2"]. *)
val textf : ('a, Format.formatter, unit, t) format4 -> 'a
(** Text, with a {!Format}-compatible format.
Note that this will bake-in any formatting done by {!Format}.
Newlines introduced by format will become hard newlines
in the resulting document. *)
val nest : int -> t -> t
(** Increase indentation by [n]. *)
val group : t -> t
(** Group the documents inside this.
Newlines immediately inside this group will either
render as new lines or as spaces, depending on the width available. *)
val append : t -> t -> t
(** Concatenation. *)
val newline : t
(** A line break. *)
val nl : t
(** Alias for {!newline} *)
(* TODO:
val ext : pre:(unit -> string) -> post:(unit -> string) -> t -> t
(** Extension. This is a custom-rendered document.
TODO: customize how long it is?
TODO: generic output, not [unit -> string]
*)
*)
(** {2 Pretty print and rendering} *)
(** Pretty-printing.
These functions are parametrized by a width,
and will try to fit the result within this width. *)
module Pretty : sig
val to_string : width:int -> t -> string
(** Render to a string. *)
val to_buffer : width:int -> Buffer.t -> t -> unit
(** Render to a buffer. *)
val to_format : width:int -> Format.formatter -> t -> unit
end
(** Trivial printing, on a single line.
This is generally ugly, but it's simple and fast when we do not
care about looks. *)
module Flatten : sig
val to_buffer : Buffer.t -> t -> unit
val to_string : t -> string
end
val pp : Format.formatter -> t -> unit
(** {2 Convenience functions *)
module Infix : sig
val ( ^ ) : t -> t -> t
(** Alias of {!append}. *)
val ( ^+ ) : t -> t -> t
(** [x ^+ y] is [x ^ text " " ^ y] *)
val ( ^/ ) : t -> t -> t
(** [x ^/ y] is [x ^ newline ^ y] *)
end
include module type of Infix
val sp : t
(** A single space *)
val append_l : ?sep:t -> t list -> t
(** [append_l ?sep l] is the concatenation of elements of
[l], separated by [sep] (default [nil]) *)
val append_sp : t list -> t
(** [append_sp l] is the concatenation of elements of [l], separated by [' '] *)
val append_nl : t list -> t
(** Same as {!append_l} with [sep=nl] *)
val bool : bool -> t
val int : int -> t
val float : float -> t
val float_hex : float -> t
val of_list : ?sep:t -> ('a -> t) -> 'a list -> t
(** [of_list f l] maps each element of [l] to a document
and concatenates them.
@param sep separator inserted between elements (default [nil]) *)
val bracket : string -> t -> string -> t
(** [bracket l d r] groups [d], indented, between brackets [l] and [r] *)
val sexp_apply : string -> t list -> t
(** [sexp_apply a l] is the S-expr ["(text a …l)"], pretty-printed *)
val sexp_l : t list -> t
(** [sexp_l [l1;…ln]] is the S-expr ["(l1 l2…ln)"], pretty-printed *)
(** Printers that correspond closely to OCaml's syntax. *)
module Dump : sig
val list : t list -> t
end

7
src/pp/dune Normal file
View file

@ -0,0 +1,7 @@
(library
(name containers_pp)
(public_name containers.pp)
(synopsis "Pretty printer for Containers")
(flags :standard)
(libraries containers))