linol/thirdparty/lsp/ocaml-lsp-server/vendor/cmarkit/cmarkit.ml

3194 lines
125 KiB
OCaml

(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
---------------------------------------------------------------------------*)
module String_map = Map.Make (String)
module Ascii = Cmarkit_base.Ascii
module Text = Cmarkit_base.Text
module Match = Cmarkit_base
module Textloc = Cmarkit_base.Textloc
module Meta = Cmarkit_base.Meta
module Layout = struct
type blanks = string
type nonrec string = string
type nonrec char = char
type count = int
type indent = int
let string ?(meta = Meta.none) s = s, meta
let empty = string ""
end
type byte_pos = Textloc.byte_pos
type line_span = Match.line_span =
(* Substring on a single line, hereafter abbreviated to span *)
{ line_pos : Textloc.line_pos; first : byte_pos; last : byte_pos }
type 'a node = 'a * Meta.t
module Block_line = struct
let _list_of_string flush s = (* cuts [s] on newlines *)
let rec loop s acc max start k =
if k > max then List.rev (flush s start max acc) else
if not (s.[k] = '\n' || s.[k] = '\r')
then loop s acc max start (k + 1) else
let acc = flush s start (k - 1) acc in
let next = k + 1 in
let start =
if s.[k] = '\r' && next <= max && s.[next] = '\n' then next + 1 else
next
in
loop s acc max start start
in
loop s [] (String.length s - 1) 0 0
let flush ?(meta = Meta.none) s start last acc =
let sub = String.sub s start (last - start + 1) in
(sub, meta) :: acc
let flush_tight ?(meta = Meta.none) s start last acc =
(* If [s] has newlines, blanks after newlines are layout *)
if start > last then ("", ("", meta)) :: acc else
match acc with
| [] (* On the first line the blanks are legit *) ->
("", (String.sub s start (last - start + 1), meta)) :: acc
| acc ->
let nb = Match.first_non_blank s ~last ~start in
(String.sub s start (nb - 1 - start + 1),
(String.sub s nb (last - nb + 1), meta)) :: acc
(* Block lines *)
type t = string node
let to_string = fst
let list_of_string ?meta s = _list_of_string (flush ?meta) s
let list_textloc = function
| [] -> Textloc.none | [(_, m)] -> Meta.textloc m
| (_, first) :: _ as l ->
let _, last = List.hd (List.rev l) in
Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last)
(* Tight lines *)
type tight = Layout.blanks * t
let tight_to_string l = fst (snd l)
let tight_list_of_string ?meta s = _list_of_string (flush_tight ?meta) s
let tight_list_textloc = function
| [] -> Textloc.none | [_, (_, m)] -> Meta.textloc m
| (_, (_, first)) :: _ as l ->
let (_, (_, last)) = List.hd (List.rev l) in
Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last)
(* Blank lines *)
type blank = Layout.blanks node
end
module Label = struct
type key = string
type t = { meta : Meta.t; key : key; text : Block_line.tight list }
let make ?(meta = Meta.none) ~key text = { key; text; meta }
let with_meta meta l = { l with meta }
let meta t = t.meta
let key t = t.key
let text t = t.text
let textloc t = Block_line.tight_list_textloc t.text
let text_to_string t =
String.concat " " (List.map Block_line.tight_to_string t.text)
let compare l0 l1 = String.compare l0.key l1.key
(* Definitions *)
module Map = Map.Make (String)
type def = ..
type defs = def Map.t
(* Resolvers *)
type context =
[ `Def of t option * t
| `Ref of [ `Link | `Image ] * t * (t option) ]
type resolver = context -> t option
let default_resolver = function
| `Def (None, k) -> Some k
| `Def (Some _, k) -> None
| `Ref (_, _, k) -> k
end
module Link_definition = struct
type layout =
{ indent : Layout.indent;
angled_dest : bool;
before_dest : Block_line.blank list;
after_dest : Block_line.blank list;
title_open_delim : Layout.char;
after_title : Block_line.blank list; }
let layout_for_dest dest =
let needs_angles c = Ascii.is_control c || c = ' ' in
let angled_dest = String.exists needs_angles dest in
{ indent = 0; angled_dest; before_dest = [];
after_dest = []; title_open_delim = '\"'; after_title = [] }
let default_layout =
{ indent = 0; angled_dest = false; before_dest = [];
after_dest = []; title_open_delim = '\"'; after_title = [] }
type t =
{ layout : layout;
label : Label.t option;
defined_label : Label.t option;
dest : string node option;
title : Block_line.tight list option; }
let make ?layout ?defined_label ?label ?dest ?title () =
let layout = match dest with
| None -> default_layout | Some (d, _) -> layout_for_dest d
in
let defined_label = match defined_label with None -> label | Some d -> d in
{ layout; label; defined_label; dest; title }
let layout ld = ld.layout
let label ld = ld.label
let defined_label ld = ld.defined_label
let dest ld = ld.dest
let title ld = ld.title
type Label.def += Def of t node
end
module Inline = struct
type t = ..
module Autolink = struct
type t = { is_email : bool; link : string node; }
let is_email a = a.is_email
let link a = a.link
let make link =
let is_email =
let l = String.concat "" ["<"; fst link; ">"] in
match Match.autolink_email l ~last:(String.length l - 1) ~start:0 with
| None -> false | Some _ -> true
in
{ is_email; link }
end
module Break = struct
type type' = [ `Hard | `Soft ]
type t =
{ layout_before : Layout.blanks node;
type' : type';
layout_after : Layout.blanks node; }
let make
?(layout_before = Layout.empty) ?(layout_after = Layout.empty) type'
=
{ layout_before; type'; layout_after }
let type' b = b.type'
let layout_before b = b.layout_before
let layout_after b = b.layout_after
end
module Code_span = struct
type t =
{ backtick_count : Layout.count;
code_layout : Block_line.tight list; }
let make ~backtick_count code_layout = { backtick_count; code_layout }
let min_backtick_count ~min counts =
let rec loop min = function
| c :: cs -> if min <> c then min else loop (c + 1) cs | [] -> min
in
loop min (List.sort Int.compare counts)
let of_string ?(meta = Meta.none) = function
| "" -> { backtick_count = 1 ; code_layout = ["", ("", meta)] }
| s ->
(* This finds out the needed backtick count, whether spaces are needed,
and treats blanks after newline as layout *)
let max = String.length s - 1 in
let need_sp = s.[0] = '`' || s.[max] = '`' in
let s = if need_sp then String.concat "" [" "; s; " "] else s in
let backtick_counts, code_layout =
let rec loop bt_counts acc max btc start k = match k > max with
| true ->
(* assert (btc = 0) because of [need_sp] *)
bt_counts,
if acc = [] then ["", (s, meta)] else
List.rev (Block_line.flush_tight ~meta s start max acc)
| false ->
if s.[k] = '`'
then loop bt_counts acc max (btc + 1) start (k + 1) else
let bt_counts = if btc > 0 then btc :: bt_counts else bt_counts in
if not (s.[k] = '\n' || s.[k] = '\r')
then loop bt_counts acc max 0 start (k + 1) else
let acc = Block_line.flush_tight ~meta s start (k - 1) acc in
let start =
if k + 1 <= max && s.[k] = '\r' && s.[k + 1] = '\n'
then k + 2 else k + 1
in
loop bt_counts acc max 0 start start
in
loop [] [] max 0 0 0
in
let backtick_count = min_backtick_count ~min:1 backtick_counts in
{ backtick_count; code_layout }
let backtick_count cs = cs.backtick_count
let code_layout cs = cs.code_layout
let code cs =
(* Extract code, see https://spec.commonmark.org/0.30/#code-spans *)
let sp c = Char.equal c ' ' in
let s = List.map Block_line.tight_to_string cs.code_layout in
let s = String.concat " " s in
if s = "" then "" else
if s.[0] = ' ' && s.[String.length s - 1] = ' ' &&
not (String.for_all sp s)
then String.sub s 1 (String.length s - 2) else s
end
module Emphasis = struct
type inline = t
type t = { delim : Layout.char; inline : inline }
let make ?(delim = '*') inline = { delim; inline }
let inline e = e.inline
let delim e = e.delim
end
module Link = struct
type inline = t
type reference_layout = [ `Collapsed | `Full | `Shortcut ]
type reference =
[ `Inline of Link_definition.t node
| `Ref of reference_layout * Label.t * Label.t ]
type t = { text : inline; reference : reference; }
let make text reference = { text; reference }
let text l = l.text
let reference l = l.reference
let referenced_label l = match l.reference with
| `Inline _ -> None | `Ref (_, _, k) -> Some k
let reference_definition defs l = match l.reference with
| `Inline ld -> Some (Link_definition.Def ld)
| `Ref (_, _, def) -> Label.Map.find_opt (Label.key def) defs
let is_unsafe l =
let allowed_data_url l =
let allowed = ["image/gif"; "image/png"; "image/jpeg"; "image/webp"] in
(* Extract mediatype from data:[<mediatype>][;base64],<data> *)
match String.index_from_opt l 4 ',' with
| None -> false
| Some j ->
let k = match String.index_from_opt l 4 ';' with
| None -> j | Some k -> k
in
let t = String.sub l 5 (min j k - 5) in
List.mem t allowed
in
Ascii.caseless_starts_with ~prefix:"javascript:" l ||
Ascii.caseless_starts_with ~prefix:"vbscript:" l ||
Ascii.caseless_starts_with ~prefix:"file:" l ||
(Ascii.caseless_starts_with ~prefix:"data:" l && not (allowed_data_url l))
end
module Raw_html = struct
type t = Block_line.tight list
end
module Text = struct
type t = string
end
type t +=
| Autolink of Autolink.t node
| Break of Break.t node
| Code_span of Code_span.t node
| Emphasis of Emphasis.t node
| Image of Link.t node
| Inlines of t list node
| Link of Link.t node
| Raw_html of Raw_html.t node
| Strong_emphasis of Emphasis.t node
| Text of Text.t node
let empty = Inlines ([], Meta.none)
let err_unknown = "Unknown Cmarkit.Inline.t type extension"
(* Extensions *)
module Strikethrough = struct
type nonrec t = t
let make = Fun.id
let inline = Fun.id
end
module Math_span = struct
type t = { display : bool; tex_layout : Block_line.tight list; }
let make ~display tex_layout = { display; tex_layout }
let display ms = ms.display
let tex_layout ms = ms.tex_layout
let tex ms =
let s = List.map Block_line.tight_to_string ms.tex_layout in
String.concat " "s
end
type t +=
| Ext_strikethrough of Strikethrough.t node
| Ext_math_span of Math_span.t node
(* Functions on inlines *)
let is_empty = function
| Text ("", _) | Inlines ([], _) -> true | _ -> false
let ext_none _ = invalid_arg err_unknown
let meta ?(ext = ext_none) = function
| Autolink (_, m) | Break (_, m) | Code_span (_, m) | Emphasis (_, m)
| Image (_, m) | Inlines (_, m) | Link (_, m) | Raw_html (_, m)
| Strong_emphasis (_, m) | Text (_, m) -> m
| Ext_strikethrough (_, m) -> m | Ext_math_span (_, m) -> m
| i -> ext i
let rec normalize ?(ext = ext_none) = function
| Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _
| Inlines ([], _) | Ext_math_span _ as i -> i
| Image (l, m) -> Image ({ l with text = normalize ~ext l.text }, m)
| Link (l, m) -> Link ({ l with text = normalize ~ext l.text }, m)
| Inlines ([i], _) -> i
| Emphasis (e, m) ->
Emphasis ({ e with inline = normalize ~ext e.inline}, m)
| Strong_emphasis (e, m) ->
Strong_emphasis ({ e with inline = normalize ~ext e.inline}, m)
| Inlines (i :: is, m) ->
let rec loop acc = function
| Inlines (is', m) :: is -> loop acc (List.rev_append (List.rev is') is)
| Text (t', m') as i' :: is ->
begin match acc with
| Text (t, m) :: acc ->
let tl = Textloc.span (Meta.textloc m) (Meta.textloc m') in
let i = Text (t ^ t', Meta.with_textloc ~keep_id:true m tl) in
loop (i :: acc) is
| _ -> loop (normalize ~ext i' :: acc) is
end
| i :: is -> loop (normalize ~ext i :: acc) is
| [] -> List.rev acc
in
let is = loop [normalize ~ext i] is in
(match is with [i] -> i | _ -> Inlines (is, m))
| Ext_strikethrough (i, m) -> Ext_strikethrough (normalize ~ext i, m)
| i -> ext i
let ext_none ~break_on_soft = ext_none
let to_plain_text ?(ext = ext_none) ~break_on_soft i =
let push s acc = (s :: List.hd acc) :: List.tl acc in
let newline acc = [] :: (List.rev (List.hd acc)) :: List.tl acc in
let rec loop ~break_on_soft acc = function
| Autolink (a, _) :: is ->
let acc = push (String.concat "" ["<"; fst a.link; ">"]) acc in
loop ~break_on_soft acc is
| Break ({ type' = `Hard }, _) :: is ->
loop ~break_on_soft (newline acc) is
| Break ({ type' = `Soft }, _) :: is ->
let acc = if break_on_soft then newline acc else (push " " acc) in
loop ~break_on_soft acc is
| Code_span (cs, _) :: is ->
loop ~break_on_soft (push (Code_span.code cs) acc) is
| Emphasis ({ inline }, _) :: is | Strong_emphasis ({ inline }, _) :: is ->
loop ~break_on_soft acc (inline :: is)
| Inlines (is', _) :: is ->
loop ~break_on_soft acc (List.rev_append (List.rev is') is)
| Link (l, _) :: is | Image (l, _) :: is ->
loop ~break_on_soft acc (l.text :: is)
| Raw_html _ :: is ->
loop ~break_on_soft acc is
| Text (t, _) :: is ->
loop ~break_on_soft (push t acc) is
| Ext_strikethrough (i, _) :: is ->
loop ~break_on_soft acc (i :: is)
| Ext_math_span (m, _) :: is ->
loop ~break_on_soft (push (Math_span.tex m) acc) is
| i :: is ->
loop ~break_on_soft acc (ext ~break_on_soft i :: is)
| [] ->
List.rev ((List.rev (List.hd acc)) :: List.tl acc)
in
loop ~break_on_soft ([] :: []) [i]
let id ?buf ?ext i =
let text = to_plain_text ?ext ~break_on_soft:false i in
let s = String.concat "\n" (List.map (String.concat "") text) in
let b = match buf with
| Some b -> Buffer.reset b; b | None -> Buffer.create 256
in
let[@inline] collapse_blanks b ~prev_byte =
(* Collapses non initial white *)
if Ascii.is_blank prev_byte && Buffer.length b <> 0
then Buffer.add_char b '-'
in
let rec loop b s max ~prev_byte k =
if k > max then Buffer.contents b else
match s.[k] with
| ' ' | '\t' as prev_byte -> loop b s max ~prev_byte (k + 1)
| '_' | '-' as c ->
collapse_blanks b ~prev_byte;
Buffer.add_char b c;
loop b s max ~prev_byte:c (k + 1)
| c ->
let () = collapse_blanks b ~prev_byte in
let d = String.get_utf_8_uchar s k in
let u = Uchar.utf_decode_uchar d in
let u = match Uchar.to_int u with 0x0000 -> Uchar.rep | _ -> u in
let k' = k + Uchar.utf_decode_length d in
if Cmarkit_data.is_unicode_punctuation u
then loop b s max ~prev_byte:'\x00' k' else
let () = match Cmarkit_data.unicode_case_fold u with
| None -> Buffer.add_utf_8_uchar b u
| Some fold -> Buffer.add_string b fold
in
let prev_byte = s.[k] in
loop b s max ~prev_byte k'
in
loop b s (String.length s - 1) ~prev_byte:'\x00' 0
end
(* Blocks *)
module Block = struct
type t = ..
module Blank_line = struct
type t = Layout.blanks
end
module Block_quote = struct
type nonrec t = { indent : Layout.indent; block : t; }
let make ?(indent = 0) block = { indent; block }
let indent bq = bq.indent
let block bq = bq.block
end
module Code_block = struct
type fenced_layout =
{ indent : Layout.indent;
opening_fence : Layout.string node;
closing_fence : Layout.string node option; }
let default_fenced_layout =
{ indent = 0;
opening_fence = Layout.empty;
closing_fence = Some Layout.empty }
type layout = [ `Indented | `Fenced of fenced_layout ]
type t =
{ layout : layout;
info_string : string node option;
code : string node list; }
let make ?(layout = `Fenced default_fenced_layout) ?info_string code =
let layout = match info_string, layout with
| Some _, `Indented -> `Fenced default_fenced_layout
| _, layout -> layout
in
{ layout; info_string; code }
let layout cb = cb.layout
let info_string cb = cb.info_string
let code cb = cb.code
let make_fence cb =
let rec loop char counts = function
| [] -> counts
| (c, _) :: cs ->
let max = String.length c - 1 in
let k = ref 0 in
while (!k <= max && c.[!k] = char) do incr k done;
loop char (if !k <> 0 then !k :: counts else counts) cs
in
let char = match cb.info_string with
| Some (i, _) when String.exists (Char.equal '`') i -> '~'
| None | Some _ -> '`'
in
let counts = loop char [] cb.code in
char,
Inline.Code_span.min_backtick_count (* not char specific *) ~min:3 counts
let language_of_info_string s =
let rec next_white s max i =
if i > max || Ascii.is_white s.[i] then i else
next_white s max (i + 1)
in
if s = "" then None else
let max = String.length s - 1 in
let white = next_white s max 0 in
let rem_first = Match.first_non_blank s ~last:max ~start:white in
let lang = String.sub s 0 white in
if lang = "" then None else
Some (lang, String.sub s rem_first (max - rem_first + 1))
let is_math_block = function
| None -> false | Some (i, _) -> match language_of_info_string i with
| Some ("math", _) -> true
| Some _ | None -> false
end
module Heading = struct
type atx_layout =
{ indent : Layout.indent;
after_opening : Layout.blanks;
closing : Layout.string; }
let default_atx_layout = { indent = 0; after_opening = ""; closing = "" }
type setext_layout =
{ leading_indent : Layout.indent;
trailing_blanks : Layout.blanks;
underline_indent : Layout.indent;
underline_count : Layout.count node;
underline_blanks : Layout.blanks; }
type layout = [ `Atx of atx_layout | `Setext of setext_layout ]
type id = [ `Auto of string | `Id of string ]
type t = { layout : layout; level : int; inline : Inline.t; id : id option }
let make ?id ?(layout = `Atx default_atx_layout) ~level inline =
let max = match layout with `Atx _ -> 6 | `Setext _ -> 2 in
let level = Int.max 1 (Int.min level max) in
{layout; level; inline; id}
let layout h = h.layout
let level h = h.level
let inline h = h.inline
let id h = h.id
end
module Html_block = struct
type t = string node list
end
module List_item = struct
type block = t
type t =
{ before_marker : Layout.indent;
marker : Layout.string node;
after_marker : Layout.indent;
block : block;
ext_task_marker : Uchar.t node option }
let make
?(before_marker = 0) ?(marker = Layout.empty) ?(after_marker = 1)
?ext_task_marker block
=
{ before_marker; marker; after_marker; block; ext_task_marker }
let block i = i.block
let before_marker i = i.before_marker
let marker i = i.marker
let after_marker i = i.after_marker
let ext_task_marker i = i.ext_task_marker
let task_status_of_task_marker u = match Uchar.to_int u with
| 0x0020 -> `Unchecked
| 0x0078 (* x *) | 0x0058 (* X *) | 0x2713 (**) | 0x2714 (**)
| 0x10102 (* 𐄂 *) | 0x1F5F8 (* 🗸*) -> `Checked
| 0x007E (* ~ *) -> `Cancelled
| _ -> `Other u
end
module List' = struct
type type' = [ `Unordered of Layout.char | `Ordered of int * Layout.char ]
type t =
{ type' : type';
tight : bool;
items : List_item.t node list; }
let make ?(tight = true) type' items = { type'; tight; items }
let type' l = l.type'
let tight l = l.tight
let items l = l.items
end
module Paragraph = struct
type t =
{ leading_indent : Layout.indent;
inline : Inline.t;
trailing_blanks : Layout.blanks; }
let make ?(leading_indent = 0) ?(trailing_blanks = "") inline =
{ leading_indent; inline; trailing_blanks }
let inline p = p.inline
let leading_indent p = p.leading_indent
let trailing_blanks p = p.trailing_blanks
end
module Thematic_break = struct
type t = { indent : Layout.indent; layout : Layout.string }
let make ?(indent = 0) ?(layout = "---") () = { indent; layout }
let indent t = t.indent
let layout t = t.layout
end
type t +=
| Blank_line of Layout.blanks node
| Block_quote of Block_quote.t node
| Blocks of t list node
| Code_block of Code_block.t node
| Heading of Heading.t node
| Html_block of Html_block.t node
| Link_reference_definition of Link_definition.t node
| List of List'.t node
| Paragraph of Paragraph.t node
| Thematic_break of Thematic_break.t node
let empty = Blocks ([], Meta.none)
(* Extensions *)
module Table = struct
type align = [ `Left | `Center | `Right ]
type sep = align option * Layout.count
type cell_layout = Layout.blanks * Layout.blanks
type row =
[ `Header of (Inline.t * cell_layout) list
| `Sep of sep node list
| `Data of (Inline.t * cell_layout) list ]
type t =
{ indent : Layout.indent;
col_count : int;
rows : (row node * Layout.blanks) list }
let col_count rows =
let rec loop c = function
| (((`Header cols | `Data cols), _), _) :: rs ->
loop (Int.max (List.length cols) c) rs
| (((`Sep cols), _), _) :: rs ->
loop (Int.max (List.length cols) c) rs
| [] -> c
in
loop 0 rows
let make ?(indent = 0) rows = { indent; col_count = col_count rows; rows }
let indent t = t.indent
let col_count t = t.col_count
let rows t = t.rows
let parse_sep_row cs =
let rec loop acc = function
| [] -> Some (List.rev acc)
| (Inline.Text (s, meta), ("", "")) :: cs ->
if s = "" then None else
let max = String.length s - 1 in
let first_colon = s.[0] = ':' and last_colon = s.[max] = ':' in
let first = if first_colon then 1 else 0 in
let last = if last_colon then max - 1 else max in
begin
match
for i = first to last do if s.[i] <> '-' then raise Exit; done
with
| exception Exit -> None
| () ->
let count = last - first + 1 in
let sep = match first_colon, last_colon with
| false, false -> None
| true, true -> Some `Center
| true, false -> Some `Left
| false, true -> Some `Right
in
loop (((sep, count), meta) :: acc) cs
end
| _ -> None
in
loop [] cs
end
module Footnote = struct
type nonrec t =
{ indent : Layout.indent;
label : Label.t;
defined_label : Label.t option;
block : t }
let make ?(indent = 0) ?defined_label:d label block =
let defined_label = match d with None -> Some label | Some d -> d in
{ indent; label; defined_label; block }
let indent fn = fn.indent
let label fn = fn.label
let defined_label fn = fn.defined_label
let block fn = fn.block
type Label.def += Def of t node
let stub label defined_label =
Def ({ indent = 0; label; defined_label; block = empty}, Meta.none)
end
type t +=
| Ext_math_block of Code_block.t node
| Ext_table of Table.t node
| Ext_footnote_definition of Footnote.t node
(* Functions on blocks *)
let err_unknown = "Unknown Cmarkit.Block.t type extension"
let ext_none _ = invalid_arg err_unknown
let meta ?(ext = ext_none) = function
| Blank_line (_, m) | Block_quote (_, m) | Blocks (_, m) | Code_block (_, m)
| Heading (_, m) | Html_block (_, m) | Link_reference_definition (_, m)
| List (_, m) | Paragraph (_, m) | Thematic_break (_, m)
| Ext_math_block (_, m) | Ext_table (_, m)
| Ext_footnote_definition (_, m) -> m
| b -> ext b
let rec normalize ?(ext = ext_none) = function
| Blank_line _ | Code_block _ | Heading _ | Html_block _
| Link_reference_definition _ | Paragraph _ | Thematic_break _
| Blocks ([], _) | Ext_math_block _ | Ext_table _ as b -> b
| Block_quote (b, m) ->
let b = { b with block = normalize ~ext b.block } in
Block_quote (b, m)
| List (l, m) ->
let item (i, meta) =
let block = List_item.block i in
{ i with List_item.block = normalize ~ext block }, meta
in
List ({ l with items = List.map item l.items }, m)
| Blocks (b :: bs, m) ->
let rec loop acc = function
| Blocks (bs', m) :: bs -> loop acc (List.rev_append (List.rev bs') bs)
| b :: bs -> loop (normalize ~ext b :: acc) bs
| [] -> List.rev acc
in
let bs = loop [normalize ~ext b] bs in
(match bs with [b] -> b | _ -> Blocks (bs, m))
| Ext_footnote_definition (fn, m) ->
let fn = { fn with block = normalize ~ext fn.block } in
Ext_footnote_definition (fn, m)
| b -> ext b
let rec defs
?(ext = fun b defs -> invalid_arg err_unknown) ?(init = Label.Map.empty)
= function
| Blank_line _ | Code_block _ | Heading _ | Html_block _
| Paragraph _ | Thematic_break _
| Ext_math_block _ | Ext_table _ -> init
| Block_quote (b, _) -> defs ~ext ~init (Block_quote.block b)
| Blocks (bs, _) -> List.fold_left (fun init b -> defs ~ext ~init b) init bs
| List (l, _) ->
let add init (i, _) = defs ~ext ~init (List_item.block i) in
List.fold_left add init l.items
| Link_reference_definition ld ->
begin match Link_definition.defined_label (fst ld) with
| None -> init
| Some def ->
Label.Map.add (Label.key def) (Link_definition.Def ld) init
end
| Ext_footnote_definition fn ->
let init = match Footnote.defined_label (fst fn) with
| None -> init
| Some def -> Label.Map.add (Label.key def) (Footnote.Def fn) init
in
defs ~ext ~init (Footnote.block (fst fn))
| b -> ext init b
end
(* Parsing *)
(* Closer indexes.
They map closing delimiters to the position where they
start. Shortcuts forward searches in inline parsing. See
Inline_struct. *)
module Pos_set = Set.Make (Int) (* Sets of positions. *)
module Closer = struct
type t =
| Backticks of int (* run length *)
| Right_brack
| Right_paren (* Only for ruling out pathological cases. *)
| Emphasis_marks of char
| Strikethrough_marks
| Math_span_marks of int (* run length *)
let compare = Stdlib.compare
end
module Closer_index = struct
include Map.Make (Closer)
type nonrec t = Pos_set.t t
let add cl pos cidx =
let add = function
| None -> Some (Pos_set.singleton pos)
| Some occs -> Some (Pos_set.add pos occs)
in
update cl add cidx
let closer_pos cl ~after cidx = match find_opt cl cidx with
| None -> None
| Some occs -> Pos_set.find_first_opt (fun pos -> pos > after) occs
let closer_exists cl ~after cidx = match closer_pos cl ~after cidx with
| None -> false | Some _ -> true
end
(* Columns. That notion is needed to handle tab stops.
See https://spec.commonmark.org/current/#tabs *)
type col = int
let[@inline] next_tab_stop col = (col + 4) land (lnot 3)
(* Parser abstraction *)
type parser =
{ file : Textloc.fpath (* input file name *);
i : string (* input string *);
buf : Buffer.t (* scratch buffer. *);
exts : bool; (* parse extensions if [true]. *)
nolocs : bool; (* do not compute locations if [true]. *)
nolayout : bool; (* do not compute layout fields if [true]. *)
heading_auto_ids : bool; (* compute heading ids. *)
nested_links : bool;
mutable defs : Label.defs;
resolver : Label.resolver;
mutable cidx : Closer_index.t; (* For inline parsing. *)
(* Current line (only used during block parsing) *)
mutable current_line_pos : Textloc.line_pos;
mutable current_line_last_char :
(* first char of line - 1 on empty lines *) Textloc.byte_pos;
mutable current_char : Textloc.byte_pos;
mutable current_char_col : col;
mutable next_non_blank :
(* current_line_last_char + 1 if none. *) Textloc.byte_pos;
mutable next_non_blank_col : col;
mutable tab_consumed_cols :
(* number of cols consumed from the tab if i.[current_char] is '\t' *)
col; }
let parser
?(defs = Label.Map.empty) ?(resolver = Label.default_resolver)
?(nested_links = false) ?(heading_auto_ids = false) ?(layout = false)
?(locs = false) ?(file = Textloc.file_none) ~strict i
=
let nolocs = not locs and nolayout = not layout and exts = not strict in
{ file; i; buf = Buffer.create 512; exts; nolocs; nolayout;
heading_auto_ids; nested_links; defs; resolver; cidx = Closer_index.empty;
current_line_pos = 1, 0; current_line_last_char = -1; current_char = 0;
current_char_col = 0; next_non_blank = 0; next_non_blank_col = 0;
tab_consumed_cols = 0; }
let find_label_defining_key p key = match Label.Map.find_opt key p.defs with
| Some (Link_definition.Def ld) -> Link_definition.defined_label (fst ld)
| Some (Block.Footnote.Def fn) -> Block.Footnote.defined_label (fst fn)
| None -> None
| _ -> assert false
let set_label_def p l def = p.defs <- Label.Map.add (Label.key l) def p.defs
let def_label p l =
p.resolver (`Def (find_label_defining_key p (Label.key l), l))
let find_def_for_ref ~image p ref =
let kind = if image then `Image else `Link in
let def = find_label_defining_key p (Label.key ref) in
p.resolver (`Ref (kind, ref, def))
let debug_span p s = String.sub p.i s.first (s.last - s.first + 1)
let debug_line p =
let first = snd p.current_line_pos and last = p.current_line_last_char in
String.sub p.i first (last - first + 1)
let current_line_span p ~first ~last =
{ line_pos = p.current_line_pos; first; last }
(* Making metas and text locations. This is centralized here to be able
to disable their creation which has a non-negligible impact on
performance. *)
let meta p textloc = if p.nolocs then Meta.none else Meta.make ~textloc ()
let textloc_of_span p span =
if p.nolocs then Textloc.none else
let first_byte = span.first and last_byte = span.last in
let first_line = span.line_pos and last_line = span.line_pos in
Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line
let textloc_of_lines p ~first ~last ~first_line ~last_line =
if p.nolocs then Textloc.none else
let first_byte = first and first_line = first_line.line_pos in
let last_byte = last and last_line = last_line.line_pos in
Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line
let meta_of_spans p ~first:first_line ~last:last_line =
if p.nolocs then Meta.none else
let first = first_line.first and last = last_line.last in
meta p (textloc_of_lines p ~first ~last ~first_line ~last_line)
let meta_of_metas p ~first ~last =
if p.nolocs then Meta.none else
meta p (Textloc.span (Meta.textloc first) (Meta.textloc last))
let clean_raw_span ?pad p span =
Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last,
meta p (textloc_of_span p span)
let clean_unref_span p span =
Text.utf_8_clean_unref p.buf p.i ~first:span.first ~last:span.last,
meta p (textloc_of_span p span)
let clean_unesc_unref_span p span =
Text.utf_8_clean_unesc_unref p.buf p.i ~first:span.first ~last:span.last,
meta p (textloc_of_span p span)
let layout_clean_raw_span ?pad p span =
if p.nolayout then Layout.empty else clean_raw_span ?pad p span
let layout_clean_raw_span' ?pad p span =
(* Like [layout_raw_span] but no meta *)
if p.nolayout then "" else
Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last
let _tight_block_lines xxx_span p ~rev_spans =
let rec loop p acc = function
| [] -> acc
| [_, fst_line] -> ("", xxx_span p fst_line) :: acc
| (line_start, span) :: spans ->
let acc =
let layout =
if p.nolayout || span.first <= line_start then "" else
Text.utf_8_clean_raw p.buf p.i ~first:line_start
~last:(span.first - 1)
in
(layout, xxx_span p span) :: acc
in
loop p acc spans
in
loop p [] rev_spans
let tight_block_lines p ~rev_spans =
_tight_block_lines clean_unesc_unref_span p ~rev_spans
let raw_tight_block_lines p ~rev_spans =
_tight_block_lines clean_raw_span p ~rev_spans
let first_non_blank_in_span p s = Match.first_non_blank_in_span p.i s
let first_non_blank_over_nl ~next_line p lines line ~start =
match Match.first_non_blank_over_nl ~next_line p.i lines ~line ~start with
| `None -> None
| `This_line non_blank ->
let layout =
if non_blank = start then [] else
[clean_raw_span p { line with first = start ; last = non_blank - 1}]
in
Some (lines, line, layout, non_blank)
| `Next_line (lines, newline, non_blank) ->
let first_layout = clean_raw_span p { line with first = start } in
let next_layout = clean_raw_span p { newline with last = non_blank -1 } in
let layout = [first_layout; next_layout] in
Some (lines, newline, layout, non_blank)
(* Inline structure parsing *)
module Inline_struct = struct
(* Tokens for parsing inlines.
The list of tokens of a paragraph are the points to consider to
parse it into inlines. Tokens gradually become [Inline] tokens
containing parsed inlines. Between two tokens there is implicit
textual data. This data gradually becomes part of [Inline] tokens
or, at the end of of the parsing process, becomes [Text] inlines.
The token list also represents newlines explicitly, either via
the [Newline] token or via the [Inline] token since inlines may
start on a line and up on another one. *)
type emphasis_marks =
{ start : byte_pos;
char : char;
count : int;
may_open : bool;
may_close : bool }
type strikethrough_marks =
{ start : byte_pos;
may_open : bool;
may_close : bool }
type math_span_marks =
{ start : byte_pos;
count : int;
may_open : bool;
may_close : bool; }
type token =
| Autolink_or_html_start of { start : byte_pos }
| Backticks of
{ start : byte_pos;
count : int;
escaped : bool }
| Emphasis_marks of emphasis_marks
| Inline of
{ start : byte_pos;
inline : Inline.t;
endline : line_span;
next : byte_pos }
| Link_start of
{ start : byte_pos;
image : bool }
| Newline of
{ start : (* points on spaces or \ on the broken line *) byte_pos;
break_type : Inline.Break.type';
newline : line_span; }
| Right_brack of { start : byte_pos }
| Right_paren of { start : byte_pos } (* Only used for closer index *)
| Strikethrough_marks of strikethrough_marks
| Math_span_marks of math_span_marks
let token_start = function
| Autolink_or_html_start { start } | Backticks { start }
| Emphasis_marks { start } | Inline { start } -> start | Link_start { start }
| Newline { start } | Right_brack { start } -> start
| Right_paren { start } -> start
| Strikethrough_marks { start } -> start
| Math_span_marks { start } -> start
let has_backticks ~count ~after cidx =
Closer_index.closer_exists (Closer.Backticks count) ~after cidx
let has_right_brack ~after cidx =
Closer_index.closer_exists Closer.Right_brack ~after cidx
let has_right_paren ~after cidx =
Closer_index.closer_exists Closer.Right_paren ~after cidx
let emphasis_closer_pos ~char ~after cidx =
Closer_index.closer_pos (Closer.Emphasis_marks char) ~after cidx
let has_emphasis_closer ~char ~after cidx =
Closer_index.closer_exists (Closer.Emphasis_marks char) ~after cidx
let has_strikethrough_closer ~after cidx =
Closer_index.closer_exists Closer.Strikethrough_marks ~after cidx
let has_math_span_closer ~count ~after cidx =
Closer_index.closer_exists (Closer.Math_span_marks count) ~after cidx
let rev_token_list_and_make_closer_index toks =
let rec loop cidx acc = function
| Backticks { start; count; _ } as t :: toks ->
let cidx = Closer_index.add (Closer.Backticks count) start cidx in
loop cidx (t :: acc) toks
| Right_brack { start } as t :: toks ->
let cidx = Closer_index.add Closer.Right_brack start cidx in
loop cidx (t :: acc) toks
| Right_paren { start } :: toks ->
let cidx = Closer_index.add Closer.Right_paren start cidx in
loop cidx (* we don't use the token for parsing *) acc toks
| Emphasis_marks { start; char; may_close = true } as t :: toks ->
let cidx = Closer_index.add (Closer.Emphasis_marks char) start cidx in
loop cidx (t :: acc) toks
| Strikethrough_marks { start; may_close = true } as t :: toks ->
let cidx = Closer_index.add Closer.Strikethrough_marks start cidx in
loop cidx (t :: acc) toks
| Math_span_marks { start; count; may_close = true } as t :: toks ->
let cidx = Closer_index.add (Closer.Math_span_marks count) start cidx in
loop cidx (t :: acc) toks
| t :: toks -> loop cidx (t :: acc) toks
| [] -> cidx, acc
in
loop Closer_index.empty [] toks
let rec rev_tokens_and_shorten_last_line ~to_last:last acc = function
(* Used to make the text delimitation precise for nested inlines *)
| Newline ({ newline; _ } as nl) :: toks ->
let t = Newline { nl with newline = { newline with last }} in
List.rev_append toks (t :: acc)
| Inline ({ endline; _ } as i) :: toks ->
let t = Inline { i with endline = { endline with last }} in
List.rev_append toks (t :: acc)
| t :: toks -> rev_tokens_and_shorten_last_line ~to_last:last (t :: acc) toks
| [] -> acc
let rec drop_stop_after_right_brack = function
| Right_brack _ :: toks -> toks
| _ :: toks -> drop_stop_after_right_brack toks
| [] -> []
let rec drop_until ~start = function
| t :: toks when token_start t < start -> drop_until ~start toks
| toks -> toks
let rec next_line = function
(* N.B. when we use this function considering Inline tokens is not needed. *)
| [] -> None
| Newline { newline; _ } :: toks -> Some (toks, newline)
| _ :: toks -> next_line toks
(* Tokenization *)
let newline_token s prev_line newline =
(* https://spec.commonmark.org/current/#softbreak *)
(* https://spec.commonmark.org/current/#hard-line-breaks *)
let start (* includes spaces or '\\' on prev line *), break_type =
let first = prev_line.first and last = prev_line.last in
let non_space = Match.rev_drop_spaces s ~first ~start:last in
if non_space = last && s.[non_space] = '\\' then (non_space, `Hard) else
let start = non_space + 1 in
(start, if last - start + 1 >= 2 then `Hard else `Soft)
in
Newline { start; break_type; newline }
let add_backtick_token acc s line ~prev_bslash ~start =
let last = Match.run_of ~char:'`' s ~last:line.last ~start:(start + 1) in
let count = last - start + 1 and escaped = prev_bslash in
Backticks {start; count; escaped} :: acc, last + 1
let try_add_image_link_start_token acc s line ~start =
let next = start + 1 in
if next > line.last || s.[next] <> '[' then acc, next else
Link_start { start; image = true } :: acc, next + 1
let try_add_emphasis_token acc s line ~start =
let first = line.first and last = line.last and char = s.[start] in
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
let count = run_last - start + 1 in
let prev_uchar = Match.prev_uchar s ~first ~before:start in
let next_uchar = Match.next_uchar s ~last ~after:run_last in
let prev_white = Cmarkit_data.is_unicode_whitespace prev_uchar in
let next_white = Cmarkit_data.is_unicode_whitespace next_uchar in
let prev_punct = Cmarkit_data.is_unicode_punctuation prev_uchar in
let next_punct = Cmarkit_data.is_unicode_punctuation next_uchar in
let is_left_flanking =
not next_white && (not next_punct || (prev_white || prev_punct))
in
let is_right_flanking =
not prev_white && (not prev_punct || (next_white || next_punct))
in
let next = run_last + 1 in
if not is_left_flanking && not is_right_flanking then acc, next else
let may_open =
(char = '*' && is_left_flanking) ||
(char = '_' && is_left_flanking && (not is_right_flanking || prev_punct))
in
let may_close =
(char = '*' && is_right_flanking) ||
(char = '_' && is_right_flanking && (not is_left_flanking || next_punct))
in
if not may_open && not may_close then acc, next else
Emphasis_marks { start; char; count; may_open; may_close } :: acc, next
let try_add_strikethrough_marks_token acc s line ~start =
let first = line.first and last = line.last and char = s.[start] in
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
let count = run_last - start + 1 in
let next = run_last + 1 in
if count <> 2 then acc, next else
let prev_uchar = Match.prev_uchar s ~first ~before:start in
let next_uchar = Match.next_uchar s ~last ~after:run_last in
let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in
let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in
if not may_open && not may_close then acc, next else
Strikethrough_marks { start; may_open; may_close } :: acc, next
let try_add_math_span_marks_token acc s line ~start =
let first = line.first and last = line.last and char = s.[start] in
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
let count = run_last - start + 1 in
let next = run_last + 1 in
if count > 2 then acc, next else
let may_open, may_close =
if count <> 1 then true, true else
let prev_uchar = Match.prev_uchar s ~first ~before:start in
let next_uchar = Match.next_uchar s ~last ~after:run_last in
let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in
let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in
may_open, may_close
in
if not may_open && not may_close then acc, next else
Math_span_marks { start; count; may_open; may_close } :: acc, next
let tokenize ~exts s lines =
(* For inlines this is where we conditionalize for extensions. All code
paths after that no longer check for p.exts: there just won't be
extension data to process if [exts] was not [true] here. *)
let rec loop ~exts s lines line ~prev_bslash acc k =
if k > line.last then match lines with
| [] -> rev_token_list_and_make_closer_index acc
| newline :: lines ->
let t = newline_token s line newline in
loop ~exts s lines newline ~prev_bslash:false (t :: acc) newline.first
else
if s.[k] = '\\'
then loop ~exts s lines line ~prev_bslash:(not prev_bslash) acc (k+1) else
let acc, next = match s.[k] with
| '`' -> add_backtick_token acc s line ~prev_bslash ~start:k
| c when prev_bslash -> acc, k + 1
| '*' | '_' -> try_add_emphasis_token acc s line ~start:k
| ']' -> Right_brack { start = k } :: acc, k + 1
| '[' -> Link_start { start = k; image = false } :: acc, k + 1
| '!' -> try_add_image_link_start_token acc s line ~start:k
| '<' -> Autolink_or_html_start { start = k } :: acc, k + 1
| ')' -> Right_paren { start = k } :: acc, k + 1
| '~' when exts -> try_add_strikethrough_marks_token acc s line ~start:k
| '$' when exts -> try_add_math_span_marks_token acc s line ~start:k
| _ -> acc, k + 1
in
loop ~exts s lines line ~prev_bslash:false acc next
in
let line = List.hd lines and lines = List.tl lines in
let cidx, toks = loop ~exts s lines line ~prev_bslash:false [] line.first in
cidx, toks, line
(* Making inlines and inline tokens *)
let break_inline p line ~start ~break_type:type' ~newline =
let layout_before = { line with first = start } in
let layout_after =
let non_blank = first_non_blank_in_span p newline in
{ newline with last = non_blank - 1 }
in
let m = meta_of_spans p ~first:layout_before ~last:layout_after in
let layout_before = layout_clean_raw_span p layout_before in
let layout_after = layout_clean_raw_span p layout_after in
Inline.Break ({ layout_before; type'; layout_after }, m)
let try_add_text_inline p line ~first ~last acc =
if first > last then acc else
let first = match first = line.first with
| true -> first_non_blank_in_span p line (* strip leading blanks *)
| false -> first
in
Inline.Text (clean_unesc_unref_span p { line with first; last }) :: acc
let inlines_inline p ~first ~last ~first_line ~last_line = function
| [i] -> i
| is ->
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
Inline.Inlines (is, meta p textloc)
let code_span_token p ~count ~first ~last ~first_line ~last_line rev_spans =
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
let code_layout = raw_tight_block_lines p ~rev_spans in
let meta = meta p textloc in
let cs = Inline.Code_span ({ backtick_count = count; code_layout }, meta) in
Inline { start = first; inline = cs; endline = last_line; next = last + 1 }
let autolink_token p line ~first ~last ~is_email =
let meta = meta p (textloc_of_span p { line with first; last }) in
let link = { line with first = first + 1; last = last - 1 } in
let link = clean_unref_span p link in
let inline = Inline.Autolink ({ is_email; link }, meta) in
Inline { start = first; inline; endline = line; next = last + 1 }
let raw_html_token p ~first ~last ~first_line ~last_line rev_spans =
let raw = raw_tight_block_lines p ~rev_spans in
let textloc =
let first = Meta.textloc (snd (snd (List.hd raw))) in
let last = snd (List.hd rev_spans) in
let last_byte = last.last and last_line = last.line_pos in
Textloc.set_last first ~last_byte ~last_line
in
let inline = Inline.Raw_html (raw, meta p textloc) in
Inline { start = first; inline; endline = last_line; next = last + 1 }
let link_token p ~first ~last ~first_line ~last_line ~image link =
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
let link = link, meta p textloc in
let inline = if image then Inline.Image link else Inline.Link link in
Inline { start = first; inline; endline = last_line; next = last + 1 }
let emphasis_token p ~first ~last ~first_line ~last_line ~strong emph =
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
let delim = p.i.[first] in
let e = { Inline.Emphasis.delim; inline = emph}, meta p textloc in
let i = if strong then Inline.Strong_emphasis e else Inline.Emphasis e in
Inline { start = first; inline = i ; endline = last_line; next = last + 1 }
let ext_strikethrough_token p ~first ~last ~first_line ~last_line s =
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
let inline = Inline.Ext_strikethrough (s, meta p textloc) in
Inline { start = first; inline; endline = last_line; next = last + 1 }
let ext_math_span_token p ~count ~first ~last ~first_line ~last_line rspans =
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
let tex_layout = raw_tight_block_lines p ~rev_spans:rspans in
let meta = meta p textloc in
let ms = Inline.Math_span.make ~display:(count = 2) tex_layout in
let inline = Inline.Ext_math_span (ms, meta) in
Inline { start = first; inline; endline = last_line; next = last + 1 }
(* Parsers *)
let try_code p toks start_line ~start:cstart ~count ~escaped =
(* https://spec.commonmark.org/current/#code-span *)
if escaped || not (has_backticks ~count ~after:cstart p.cidx) then None else
let rec match_backticks toks line ~count spans k = match toks with
| [] -> None
| Backticks { start; count = c; _ } :: toks ->
if c <> count then match_backticks toks line ~count spans k else
let span = line.first, { line with first = k; last = start - 1} in
let spans = span :: spans in
let first = cstart and last = start + count - 1 in
let first_line = start_line and last_line = line in
let t =
code_span_token p ~count ~first ~last ~first_line ~last_line spans
in
Some (toks, line, t)
| Newline { newline } :: toks ->
let spans = (line.first, { line with first = k }) :: spans in
let k = first_non_blank_in_span p newline in
match_backticks toks newline ~count spans k
| _ :: toks -> match_backticks toks line ~count spans k
in
let first = cstart + count in
match_backticks toks { start_line with first } ~count [] first
let try_math_span p toks start_line ~start:cstart ~count =
if not (has_math_span_closer ~count ~after:cstart p.cidx) then None else
let rec match_math_marks toks line ~count spans k = match toks with
| [] -> None
| Math_span_marks { start; count = c; may_close; _ } :: toks ->
if c <> count || not may_close
then match_math_marks toks line ~count spans k else
let span = line.first, { line with first = k; last = start - 1 } in
let spans = span :: spans in
let first = cstart and last = start + count - 1 in
let first_line = start_line and last_line = line in
let t =
ext_math_span_token p ~count ~first ~last ~first_line ~last_line
spans
in
Some (toks, line, t)
| Newline { newline } :: toks ->
let spans = (line.first, { line with first = k }) :: spans in
let k = first_non_blank_in_span p newline in
match_math_marks toks newline ~count spans k
| _ :: toks -> match_math_marks toks line ~count spans k
in
let first = cstart + count in
match_math_marks toks { start_line with first } ~count [] first
let try_autolink_or_html p toks line ~start =
match Match.autolink_uri p.i ~last:line.last ~start with
| Some last ->
let t = autolink_token p line ~first:start ~last ~is_email:false in
let toks = drop_until ~start:(last + 1) toks in
Some (toks, line, t)
| None ->
match Match.autolink_email p.i ~last:line.last ~start with
| Some last ->
let t = autolink_token p line ~first:start ~last ~is_email:true in
let toks = drop_until ~start:(last + 1) toks in
Some (toks, line, t)
| None ->
match Match.raw_html ~next_line p.i toks ~line ~start with
| None -> None
| Some (toks, last_line, spans, last) ->
let first = start and first_line = line in
let t = raw_html_token p ~first ~last ~first_line ~last_line spans in
let toks = drop_until ~start:(last + 1) toks in
Some (toks, last_line, t)
let label_of_rev_spans p ~key rev_spans =
let meta =
if p.nolocs || rev_spans = [] then Meta.none else
let first = snd (List.hd (List.rev rev_spans)) in
let last = snd (List.hd rev_spans) in
meta_of_spans p ~first ~last
in
let text = tight_block_lines p ~rev_spans in
{ Label.meta; key; text }
let try_full_reflink_remainder p toks line ~image ~start (* is label's [ *) =
(* https://spec.commonmark.org/current/#full-reference-link *)
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
| None -> None
| Some (toks, line, rev_spans, last, key) ->
let ref = label_of_rev_spans p ~key rev_spans in
let toks = drop_stop_after_right_brack toks in
match find_def_for_ref p ~image ref with
| None -> Some None
| Some def -> Some (Some (toks, line, `Ref (`Full, ref, def), last))
let try_shortcut_reflink p toks line ~image ~start (* is starting [ or ! *) =
(* https://spec.commonmark.org/current/#shortcut-reference-link *)
let start = if image then start + 1 (* [ *) else start in
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
| None -> None
| Some (toks, line, rev_spans, last, key) ->
let ref = label_of_rev_spans p ~key rev_spans in
let toks = drop_stop_after_right_brack toks in
match find_def_for_ref p ~image ref with
| None -> None
| Some def -> Some (toks, line, `Ref (`Shortcut, ref, def), last)
let try_collapsed_reflink p toks line ~image ~start (* is starting [ or ! *) =
(* https://spec.commonmark.org/current/#collapsed-reference-link *)
let start = if image then start + 1 (* [ *) else start in
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
| None -> None
| Some (toks, line, rev_spans, last, key) ->
let ref = label_of_rev_spans p ~key rev_spans in
let last = last + 2 in (* adjust for ][] *)
let toks = drop_stop_after_right_brack toks in
let toks = drop_stop_after_right_brack toks in
match find_def_for_ref p ~image ref with
| None -> None
| Some def -> Some (toks, line, `Ref (`Collapsed, ref, def), last)
let try_inline_link_remainder p toks start_line ~image ~start:st (* is ( *) =
(* https://spec.commonmark.org/current/#inline-link *)
if not (has_right_paren ~after:st p.cidx) then None else
let first_non_blank_over_nl = first_non_blank_over_nl ~next_line in
match first_non_blank_over_nl p toks start_line ~start:(st + 1) with
| None -> None
| Some (toks, line, before_dest, start) ->
let toks, line, angled_dest, dest, start =
match Match.link_destination p.i ~last:line.last ~start with
| None -> toks, line, false, None, start
| Some (angled, first, last) ->
let dest = clean_unesc_unref_span p { line with first; last } in
let next = if angled then last + 2 else last + 1 in
toks, line, angled, Some dest, next
in
let toks, line, after_dest, title_open_delim, title, start =
match first_non_blank_over_nl p toks line ~start with
| None ->
toks, line, [], '\"', None, start
| Some (_, _, _, start') when start' = start ->
toks, line, [], '\"', None, start
| Some (toks, line, after_destination, start) ->
match Match.link_title ~next_line p.i toks ~line ~start with
| None -> toks, line, after_destination, '\"', None, start
| Some (toks, line, rev_spans, last) ->
let title = tight_block_lines p ~rev_spans in
toks, line, after_destination, p.i.[start],
Some title, last + 1
in
let toks, line, after_title, last =
match first_non_blank_over_nl p toks line ~start with
| None -> toks, line, [], start
| Some (toks, line, after_title, start as v) -> v
in
if last > line.last || p.i.[last] <> ')' then None else
let layout =
{ Link_definition.indent = 0; angled_dest; before_dest;
after_dest; title_open_delim; after_title; }
in
let label = None and defined_label = None in
let ld = { Link_definition.layout; label; defined_label; dest; title }in
let textloc =
let first = st and last = start in
textloc_of_lines p ~first ~last ~first_line:start_line ~last_line:line
in
let ld = (ld, meta p textloc) in
let toks = drop_until ~start:(last + 1) toks in
Some (toks, line, `Inline ld, last)
let find_link_text_tokens p toks start_line ~start =
(* XXX The repetition with first_pass is annoying here.
we should figure out something for that not to happen. *)
(* https://spec.commonmark.org/current/#link-text *)
let rec loop toks line nest acc = match toks with
| Right_brack { start = last } :: toks when nest = 0 ->
let acc = rev_tokens_and_shorten_last_line ~to_last:(last - 1) [] acc in
Some (toks, line, acc, last)
| Backticks { start; count; escaped } :: toks ->
begin match try_code p toks line ~start ~count ~escaped with
| None -> loop toks line nest acc
| Some (toks, line, t) -> loop toks line nest (t :: acc)
end
| Math_span_marks { start; count; may_open; } :: toks ->
if not may_open then loop toks line nest acc else
begin match try_math_span p toks line ~start ~count with
| None -> loop toks line nest acc
| Some (toks, line, t) -> loop toks line nest (t :: acc)
end
| Autolink_or_html_start { start } :: toks ->
begin match try_autolink_or_html p toks line ~start with
| None -> loop toks line nest acc
| Some (toks, line, t) -> loop toks line nest (t :: acc)
end
| Right_brack _ as t :: toks -> loop toks line (nest - 1) (t :: acc)
| Link_start _ as t :: toks -> loop toks line (nest + 1) (t :: acc)
| Newline { newline; _ } as t :: toks -> loop toks newline nest (t :: acc)
| Inline { endline; _ } as t :: toks -> loop toks endline nest (t :: acc)
| t :: toks -> loop toks line nest (t :: acc)
| [] -> None
in
loop toks start_line 0 []
let try_link_def
p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text
=
let next = text_last + 1 in
let link =
if next > line.last
then try_shortcut_reflink p start_toks start_line ~image ~start else
match p.i.[next] with
| '(' ->
(match try_inline_link_remainder p toks line ~image ~start:next with
| None -> try_shortcut_reflink p start_toks start_line ~image ~start
| Some _ as v -> v)
| '[' ->
let next' = next + 1 in
if next' <= line.last && p.i.[next'] = ']'
then try_collapsed_reflink p start_toks start_line ~image ~start else
let r = try_full_reflink_remainder p toks line ~image ~start:next in
begin match r with
| None -> try_shortcut_reflink p start_toks start_line ~image ~start
| Some None -> None (* Example 570 *)
| Some (Some _ as v) -> v
end
| c ->
try_shortcut_reflink p start_toks start_line ~image ~start
in
match link with
| None -> None
| Some (toks, endline, reference, last) ->
let first = start in
let text =
let first_line = start_line and last_line = line in
inlines_inline p text ~first ~last:text_last ~first_line ~last_line
in
let link = { Inline.Link.text; reference } in
let first_line = start_line and last_line = endline in
let t = link_token p ~image ~first ~last ~first_line ~last_line link in
let had_link = not image && not p.nested_links in
Some (toks, endline, t, had_link)
(* The following sequence of mutually recursive functions define
inline parsing. We have three passes over a paragraph's token
list see the [parse_tokens] function below. *)
let rec try_link p start_toks start_line ~image ~start =
if not (has_right_brack ~after:start p.cidx) then None else
match find_link_text_tokens p start_toks start_line ~start with
| None -> None
| Some (toks, line, text_toks, text_last (* with ] delim *)) ->
let text, had_link =
let text_start =
let first = start + (if image then 2 else 1) in
let last =
if start_line == line then text_last - 1 else start_line.last
in
{ start_line with first; last }
in
parse_tokens p text_toks text_start
in
if had_link && not image
then None (* Could try to keep render *) else
try_link_def
p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text
and first_pass p toks line =
(* Parse inline atoms and links. Links are parsed here otherwise
link reference data gets parsed as atoms. *)
let rec loop p toks line ~had_link acc = match toks with
| [] -> List.rev acc, had_link
| Backticks { start; count; escaped } :: toks ->
begin match try_code p toks line ~start ~count ~escaped with
| None -> loop p toks line ~had_link acc
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
end
| Math_span_marks { start; count; may_open; } :: toks ->
if not may_open then loop p toks line ~had_link acc else
begin match try_math_span p toks line ~start ~count with
| None -> loop p toks line ~had_link acc
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
end
| Autolink_or_html_start { start } :: toks ->
begin match try_autolink_or_html p toks line ~start with
| None -> loop p toks line ~had_link acc
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
end
| Link_start { start; image } :: toks ->
begin match try_link p toks line ~image ~start with
| None -> loop p toks line ~had_link acc
| Some (toks, line, t, had_link) ->
loop p toks line ~had_link (t :: acc)
end
| Right_brack start :: toks -> loop p toks line ~had_link acc
| Newline { newline = l } as t :: toks -> loop p toks l ~had_link (t :: acc)
| t :: toks -> loop p toks line ~had_link (t :: acc)
in
loop p toks line ~had_link:false []
(* Second pass *)
and find_emphasis_text p toks line ~opener =
let marks_match ~marks ~opener =
(opener.char = marks.char) &&
(not (marks.may_open || opener.may_close) ||
marks.count mod 3 = 0 || (opener.count + marks.count) mod 3 != 0)
in
let marks_has_precedence p ~marks ~opener =
if marks.char = opener.char (* Rule 16 *) then true else (* Rule 15 *)
emphasis_closer_pos ~char:marks.char ~after:marks.start p.cidx <
emphasis_closer_pos ~char:opener.char ~after:marks.start p.cidx
in
let rec loop p toks line acc ~opener = match toks with
| [] -> Either.Left (List.rev acc) (* No match but keep nested work done *)
| Emphasis_marks marks as t :: toks ->
let after = marks.start in
if marks.may_close && marks_match ~marks ~opener then
let used = if marks.count >= 2 && opener.count >= 2 then 2 else 1 in
let to_last = marks.start - 1 in
let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in
Either.Right (toks, line, used, acc, marks)
else if marks.may_open && marks_has_precedence p ~marks ~opener then
match try_emphasis p toks line ~opener:marks with
| Either.Left toks -> loop p toks line acc ~opener
| Either.Right (toks, line) -> loop p toks line acc ~opener
else if has_emphasis_closer ~char:opener.char ~after p.cidx then
loop p toks line (t :: acc) ~opener
else (Either.Left (List.rev_append (t :: acc) toks))
| Newline { newline = l } as t :: toks -> loop p toks l (t :: acc) ~opener
| Inline { endline = l } as t :: toks -> loop p toks l (t :: acc) ~opener
| t :: toks -> loop p toks line (t :: acc) ~opener
in
loop p toks line [] ~opener
and try_emphasis p start_toks start_line ~opener =
let start = opener.start in
if not (has_emphasis_closer ~char:opener.char ~after:start p.cidx)
then Either.Left start_toks else
match find_emphasis_text p start_toks start_line ~opener with
| Either.Left _ as r -> r
| Either.Right (toks, line, used, emph_toks, closer) ->
let text_first = start + opener.count in
let text_last = closer.start - 1 (* XXX prev line ? *) in
let first = text_first - used in
let last = closer.start + used - 1 in
let first_line = start_line and last_line = line in
let emph =
let text_start =
let last =
if start_line == line then text_last else start_line.last
in
{ start_line with first = text_first; last }
in
(* No need to redo first pass *)
let emph_toks = second_pass p emph_toks text_start in
let text = last_pass p emph_toks text_start in
inlines_inline p text ~first ~last:text_last ~first_line ~last_line
in
let toks =
let count = closer.count - used in
if count = 0 then toks else
Emphasis_marks { closer with start = last + 1; count } :: toks
in
let toks =
let strong = used = 2 in
emphasis_token p ~first ~last ~first_line ~last_line ~strong emph ::
toks
in
let toks =
let count = opener.count - used in
if count = 0 then toks else
Emphasis_marks { opener with count } :: toks
in
Either.Right (toks, line)
and find_strikethrough_text p toks start_line =
let rec loop p toks line acc = match toks with
| [] -> Either.Left (List.rev acc) (* No match but keep nested work done *)
| Strikethrough_marks marks :: toks ->
if marks.may_close then
let to_last = marks.start - 1 in
let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in
Either.Right (toks, line, acc, marks)
else if marks.may_open then
match try_strikethrough p toks line ~opener:marks with
| Either.Left toks -> loop p toks line acc
| Either.Right (toks, line) -> loop p toks line acc
else assert false
| Newline { newline = l } as t :: toks -> loop p toks l (t :: acc)
| Inline { endline = l } as t :: toks -> loop p toks l (t :: acc)
| t :: toks -> loop p toks line (t :: acc)
in
loop p toks start_line []
and try_strikethrough p start_toks start_line ~opener =
let start = opener.start in
if not (has_strikethrough_closer ~after:start p.cidx)
then Either.Left start_toks else
match find_strikethrough_text p start_toks start_line with
| Either.Left _ as r -> r
| Either.Right (toks, line, stroken_toks, closer) ->
let first_line = start_line and last_line = line in
let text =
let first = start + 2 in
let last = closer.start - 1 in
let text_start =
let last =
if start_line == line then last else start_line.last
in
{ start_line with first; last }
in
(* No need to redo first pass *)
let emph_toks = second_pass p stroken_toks text_start in
let text = last_pass p emph_toks text_start in
inlines_inline p text ~first ~last ~first_line ~last_line
in
let toks =
let first = opener.start and last = closer.start + 1 in
ext_strikethrough_token p ~first ~last ~first_line ~last_line text
:: toks
in
Either.Right (toks, line)
and second_pass p toks line =
let rec loop p toks line acc = match toks with
| [] -> List.rev acc
| Emphasis_marks ({ may_open } as opener) :: toks ->
if not may_open then loop p toks line acc else
begin match try_emphasis p toks line ~opener with
| Either.Left toks -> loop p toks line acc
| Either.Right (toks, line) -> loop p toks line acc
end
| Strikethrough_marks ({ may_open } as opener) :: toks ->
if not may_open then loop p toks line acc else
begin match try_strikethrough p toks line ~opener with
| Either.Left toks -> loop p toks line acc
| Either.Right (toks, line) -> loop p toks line acc
end
| Newline { newline } as t :: toks -> loop p toks newline (t :: acc)
| Inline { endline } as t :: toks -> loop p toks endline (t :: acc)
| t :: toks -> loop p toks line (t :: acc)
in
loop p toks line []
(* Last pass *)
and last_pass p toks line =
(* Only [Inline] and [Newline] tokens remain. We fold over them to
convert them to [inline] values and [Break]s. [Text] inlines
are created for data between them. *)
let rec loop toks line acc k = match toks with
| [] ->
List.rev (try_add_text_inline p line ~first:k ~last:line.last acc)
| Newline { start; break_type; newline } :: toks ->
let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in
let break = break_inline p line ~start ~break_type ~newline in
loop toks newline (break :: acc) newline.first
| Inline { start; inline; endline; next } :: toks ->
let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in
let acc = match inline with
| Inline.Inlines (is, _meta_stub) -> List.rev_append (List.rev is) acc
| i -> i :: acc
in
loop toks endline acc next
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
| Math_span_marks _) :: _ ->
assert false
in
loop toks line [] line.first
and parse_tokens p toks first_line =
let toks, had_link = first_pass p toks first_line in
let toks = second_pass p toks first_line in
last_pass p toks first_line, had_link
let strip_paragraph p lines =
(* Remove initial and final blanks. Initial blank removal on
other paragraph lines is done during the inline parsing
and integrated into the AST for layout preservation. *)
let last, trailing_blanks =
let line = List.hd lines in
let first = line.first and start = line.last in
let non_blank = Match.last_non_blank p.i ~first ~start in
{ line with last = non_blank},
layout_clean_raw_span' p { line with first = non_blank + 1; }
in
let lines = List.rev (last :: List.tl lines) in
let first, leading_indent =
let line = List.hd lines in
let non_blank = first_non_blank_in_span p line in
{ line with first = non_blank },
non_blank - line.first
in
let lines = first :: List.tl lines in
let meta = meta_of_spans p ~first ~last in
(leading_indent, trailing_blanks), meta, lines
let parse p lines =
let layout, meta, lines = strip_paragraph p lines in
let cidx, toks, first_line = tokenize ~exts:p.exts p.i lines in
p.cidx <- cidx;
let is, _had_link = parse_tokens p toks first_line in
let inline = match is with [i] -> i | is -> Inline.Inlines (is, meta) in
layout, inline
(* Parsing table rows *)
let get_blanks p line ~before k =
let nb = Match.first_non_blank p.i ~last:(before - 1) ~start:k in
layout_clean_raw_span' p { line with first = k; last = nb - 1 }, nb
let make_col p = function
| [] -> assert false
| [i] -> i
| is ->
let last = Inline.meta (List.hd is) in
let is = List.rev is in
let first = Inline.meta (List.hd is) in
let meta = meta_of_metas p ~first ~last in
Inline.Inlines (is, meta)
let find_pipe p line ~before k =
let text p ~first ~last =
Inline.Text (clean_unesc_unref_span p { line with first; last })
in
let n = Match.first_non_escaped_char '|' p.i ~last:(before - 1) ~start:k in
if n = before then `Not_found (text p ~first:k ~last:(n - 1)) else
let nb = Match.last_non_blank p.i ~first:k ~start:(n - 1) in
let after =
layout_clean_raw_span' p { line with first = nb + 1; last = n - 1 }
in
let text = if nb < k then None else Some (text p ~first:k ~last:nb) in
`Found (text, after, n + 1)
let start_col p line ~before k =
let bbefore, k = get_blanks p line ~before k in
if k >= before then `Start (bbefore, []) else
match find_pipe p line ~before k with
| `Not_found text -> `Start (bbefore, [text])
| `Found (text, bafter, k) ->
let text = match text with
| Some text -> text
| None ->
let l = textloc_of_span p { line with first = k; last = k - 1 }in
(Inline.Inlines ([], meta p l))
in
`Col ((text, (bbefore, bafter)), k)
let rec finish_col p line blanks_before is toks k = match toks with
| [] ->
begin match find_pipe p line ~before:(line.last + 1) k with
| `Found (text, after, k) ->
let is = match text with Some t -> t :: is | None -> is in
(make_col p is, (blanks_before, after)), [], k
| `Not_found _ -> assert false
end
| Inline { start; inline; next } :: toks when k >= start ->
finish_col p line blanks_before (inline :: is) toks next
| Inline { start; inline; next } :: toks as toks' ->
begin match find_pipe p line ~before:start k with
| `Not_found text ->
let is = inline :: text :: is in
finish_col p line blanks_before is toks next
| `Found (text, after, k) ->
let is = match text with Some t -> t :: is | None -> is in
(make_col p is, (blanks_before, after)), toks', k
end
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
| Math_span_marks _ | Newline _ ) :: _ ->
assert false
let rec parse_cols p line acc toks k = match toks with
| [] ->
if k > line.last then (List.rev acc) else
begin match start_col p line ~before:(line.last + 1) k with
| `Col (col, k) -> parse_cols p line (col :: acc) [] k
| `Start _ -> assert false
end
| Inline { start; inline; next } :: toks as toks' ->
begin match start_col p line ~before:start k with
| `Col (col, k) -> parse_cols p line (col :: acc) toks' k
| `Start (before, is) ->
let is = inline :: is in
let col, toks, k = finish_col p line before is toks next in
parse_cols p line (col :: acc) toks k
end
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
| Math_span_marks _ | Newline _ ) :: _ ->
assert false
let parse_table_row p line =
let cidx, toks, first_line = tokenize ~exts:p.exts p.i [line] in
p.cidx <- cidx;
let toks, _had_link = first_pass p toks first_line in
let toks = second_pass p toks first_line in
(* We now have modified last pass, inner inlines will have gone through
the regular [last_pass] which is fine since we are only interested
in creating the toplevel text nodes further splited on (unescaped)
[\]. *)
parse_cols p line [] toks line.first
end
(* Block structure parsing. *)
module Block_struct = struct
(* Moving on the line in the indentation space (columns) and over container
markers. *)
let[@inline] current_col p = p.current_char_col + p.tab_consumed_cols
let[@inline] current_indent p = p.next_non_blank_col - current_col p
let[@inline] end_of_line p = p.current_char > p.current_line_last_char
let[@inline] only_blanks p = p.next_non_blank > p.current_line_last_char
let[@inline] has_next_non_blank p =
p.next_non_blank <= p.current_line_last_char
let update_next_non_blank p =
let rec loop p s last k col =
if k > last then (p.next_non_blank <- k; p.next_non_blank_col <- col) else
match s.[k] with
| ' ' -> loop p s last (k + 1) (col + 1)
| '\t' -> loop p s last (k + 1) (next_tab_stop col)
| _ -> p.next_non_blank <- k; p.next_non_blank_col <- col;
in
loop p p.i p.current_line_last_char p.current_char p.current_char_col
let accept_cols ~count p =
let rec loop p count k col =
if count = 0 then (p.current_char <- k; p.current_char_col <- col) else
if p.i.[k] <> '\t' then loop p (count - 1) (k + 1) (col + 1) else
let col' = next_tab_stop col in
let tab_cols = col' - (col + p.tab_consumed_cols) in
if tab_cols > count
then (p.tab_consumed_cols <- count; loop p 0 k col)
else (p.tab_consumed_cols <- 0; loop p (count - tab_cols) (k + 1) col')
in
loop p count p.current_char p.current_char_col;
update_next_non_blank p
let match_and_accept_block_quote p =
(* https://spec.commonmark.org/current/#block-quote-marker *)
if end_of_line p || p.i.[p.current_char] <> '>' then false else
let next_is_blank =
let next = p.current_char + 1 in
next <= p.current_line_last_char && Ascii.is_blank p.i.[next]
in
let count = if next_is_blank then (* we eat a space *) 2 else 1 in
accept_cols ~count p; true
let accept_list_marker_and_indent p ~marker_size ~last =
(* Returns min indent after marker for list item *)
accept_cols ~count:marker_size p;
let indent = current_indent p in
let min_indent =
if only_blanks p || indent > 4 (* indented code *)
then 1
else min indent 4
in
accept_cols ~count:min_indent p;
min_indent
let accept_code_indent p ~count =
(* Returns padding for partially consumed tab and content first char *)
accept_cols p ~count;
if p.tab_consumed_cols = 0 then 0, p.current_char else
let col' = next_tab_stop p.current_char_col in
let pad = col' - (p.current_char_col + p.tab_consumed_cols) in
pad, p.current_char (* is '\t' *) + 1
(* These data types are only used during parsing, to find out the
block structure. All the lists (blocks, lines) are in reverse
order. We don't extract data from the input here. We just store
line spans. See:
https://spec.commonmark.org/current/#phase-1-block-structure *)
type space_pad = int (* number of space characters to pad content with. *)
type indented_code_line =
{ pad : space_pad;
code : line_span;
is_blank : bool }
type fence =
{ indent : Layout.indent;
opening_fence : line_span;
fence : Char.t * int (* fence length *);
info_string : line_span option (* we drop the trailing blanks *);
closing_fence : line_span option; }
type fenced_code_block =
{ fence : fence;
code : (space_pad * line_span) list }
type code_block =
[ `Indented of indented_code_line list | `Fenced of fenced_code_block ]
type atx =
{ indent : Layout.indent;
level : Match.heading_level;
after_open : byte_pos;
heading : line_span;
layout_after : line_span }
type setext =
{ level : Match.heading_level;
heading_lines : line_span list;
underline : (* Indent, underline char count, blanks *)
Layout.indent * line_span * line_span; }
type heading = [ `Atx of atx | `Setext of setext ]
type html_block =
{ end_cond : Match.html_block_end_cond option;
html : line_span list }
type paragraph = { maybe_ref : bool; lines : line_span list }
type t =
| Block_quote of Layout.indent * t list
| Blank_line of space_pad * line_span
| Code_block of code_block
| Heading of heading
| Html_block of html_block
| List of list'
| Linkref_def of Link_definition.t node
| Paragraph of paragraph
| Thematic_break of Layout.indent * line_span (* including trailing blanks *)
| Ext_table of Layout.indent * (line_span * line_span (* trail blanks *)) list
| Ext_footnote of Layout.indent * (Label.t * Label.t option) * t list
and list_item =
{ before_marker : Layout.indent;
marker : line_span;
after_marker : Layout.indent;
ext_task_marker : (Uchar.t * line_span) option;
blocks : t list }
and list' =
{ last_blank : bool; (* last added line was blank and not first line
of item *)
loose : bool; (* inter-item looseness, intra-item is computed later *)
item_min_indent : int; (* last item minimal indent *)
list_type : Block.List'.type';
items : list_item list; }
let block_is_blank_line = function Blank_line _ -> true | _ -> false
(* Making blocks from the current line status *)
let blank_line p =
let first = p.current_char and last = p.current_line_last_char in
Blank_line (0, current_line_span p ~first ~last)
let thematic_break p ~indent ~last:_ =
let last = p.current_line_last_char (* let's keep everything *) in
let break = current_line_span p ~first:p.current_char ~last in
Thematic_break (indent, break)
let atx_heading p ~indent ~level ~after_open ~first_content ~last_content =
let heading = current_line_span p ~first:first_content ~last:last_content in
let layout_after =
let first = last_content + 1 and last = p.current_line_last_char in
current_line_span p ~first ~last
in
Heading (`Atx { indent; level; after_open; heading; layout_after })
let setext_heading p ~indent ~level ~last_underline heading_lines =
let u = current_line_span p ~first:p.current_char ~last:last_underline in
let blanks =
let first = last_underline + 1 and last = p.current_line_last_char in
current_line_span p ~first ~last
in
let underline = indent, u, blanks in
Heading (`Setext {level; heading_lines; underline})
let indented_code_block p = (* Has a side-effect on [p] *)
let pad, first = accept_code_indent p ~count:4 in
let code = current_line_span p ~first ~last:p.current_line_last_char in
Code_block (`Indented [{pad; code; is_blank = false}])
let fenced_code_block p ~indent ~fence_first ~fence_last ~info =
let info_string, layout_last = match info with
| None -> None, p.current_line_last_char
| Some (first, last) -> Some (current_line_span p ~first ~last), first - 1
in
let opening_fence =
current_line_span p ~first:fence_first ~last:layout_last
in
let fence = p.i.[fence_first], (fence_last - fence_first + 1) in
let closing_fence = None in
let fence = { indent; opening_fence; fence; info_string; closing_fence } in
Code_block (`Fenced {fence; code = []})
let html_block p ~end_cond ~indent_start =
let first = indent_start and last = p.current_line_last_char in
let end_cond = (* Check if the same line matches the end condition. *)
if Match.html_block_end p.i ~end_cond ~last ~start:p.current_char
then None (* We are already closed *) else Some end_cond
in
Html_block { end_cond; html = [current_line_span p ~first ~last] }
let paragraph p ~start =
let last = p.current_line_last_char in
let maybe_ref = Match.could_be_link_reference_definition p.i ~last ~start in
Paragraph { maybe_ref; lines = [current_line_span p ~first:start ~last]}
let add_paragraph_line p ~indent_start par bs =
let first = indent_start and last = p.current_line_last_char in
let lines = current_line_span p ~first ~last :: par.lines in
Paragraph { par with lines } :: bs
let table_row p ~first ~last =
current_line_span p ~first ~last,
current_line_span p ~first:(last + 1) ~last:p.current_line_last_char
let table p ~indent ~last =
let row = table_row p ~first:p.current_char ~last in
Ext_table (indent, [row])
(* Link reference definition parsing
This is invoked when we close a paragraph and works on the paragraph
lines. *)
let parse_link_reference_definition p lines =
(* Has no side effect on [p], parsing occurs on [lines] spans. *)
(* https://spec.commonmark.org/current/#link-reference-definitions *)
let none () = raise_notrace Exit in
let next_line = function line :: lines -> Some (lines, line) | [] -> None in
try
let lines, line = match next_line lines with
| None -> none () | Some v -> v
in
let start = first_non_blank_in_span p line in
let indent = start - line.first in
let meta_first = { line with first = start } in
let lines, line, label, start =
match Match.link_label p.buf ~next_line p.i lines ~line ~start with
| None -> none ()
| Some (lines, line, rev_spans, last, key) ->
let colon = last + 1 in
if colon > line.last || p.i.[colon] <> ':' then none () else
let label = Inline_struct.label_of_rev_spans p ~key rev_spans in
lines, line, label, colon + 1
in
let lines, line, before_dest, start =
match first_non_blank_over_nl ~next_line p lines line ~start with
| None -> none () | Some v -> v
in
let angled_dest, dest, start, meta_last =
match Match.link_destination p.i ~last:line.last ~start with
| None -> none ()
| Some (angled, first, last) ->
let dest = clean_unesc_unref_span p { line with first; last } in
let next = if angled then last + 2 else last + 1 in
angled, Some dest, next, { line with last = last }
in
let lines, after_dest, title_open_delim, title, after_title, meta_last =
match first_non_blank_over_nl ~next_line p lines line ~start with
| None -> lines, [], '\"', None, [], meta_last
| Some (_, _, _, st) when st = start (* need some space *) -> none ()
| Some (lines', line', after_dest, start') ->
let no_newline = line'.line_pos = line.line_pos in
let title =
Match.link_title ~next_line p.i lines' ~line:line' ~start:start'
in
match title with
| None ->
if no_newline then none () (* garbage after dest *) else
lines, [], '\"', None, [], meta_last
| Some (lines', line', rev_spans, last) ->
let after_title =
let last = line'.last and start = last + 1 in
let nb = Match.first_non_blank p.i ~last ~start in
if nb <= line'.last
then None
else
Some [layout_clean_raw_span p { line' with first = start; }]
in
match after_title with
| None when no_newline -> none ()
| None -> (lines, [], '\"', None, [], meta_last)
| Some after_title ->
let t = tight_block_lines p ~rev_spans in
lines', after_dest, p.i.[start'], Some t,
after_title,
{ line' with last }
in
let meta = meta_of_spans p ~first:meta_first ~last:meta_last in
let layout =
{ Link_definition.indent; angled_dest; before_dest;
after_dest; title_open_delim; after_title }
in
let defined_label = def_label p label in
let label = Some label in
let ld =
{ Link_definition.layout; label; defined_label; dest; title }, meta
in
begin match defined_label with
| None -> () | Some def -> set_label_def p def (Link_definition.Def ld)
end;
Some (ld, lines)
with
| Exit -> None
let maybe_add_link_reference_definitions p lines prevs =
let rec loop p prevs = function
| [] -> prevs
| ls ->
match parse_link_reference_definition p ls with
| None ->
(* Link defs can't interrupt a paragraph so we are good now. *)
Paragraph { maybe_ref = false; lines = List.rev ls } :: prevs
| Some (ld, ls) -> loop p (Linkref_def ld :: prevs) ls
in
loop p prevs (List.rev lines)
(* Closing blocks and finishing the document. *)
let close_indented_code_block p lines bs =
(* Removes trailing blank lines and add them as blank lines *)
let rec loop blanks lines bs = match lines with
| { pad; code; is_blank = true} :: lines ->
loop (Blank_line (pad, code) :: blanks) lines bs
| [] -> (* likely assert (false) *) List.rev_append blanks bs
| ls -> List.rev_append blanks ((Code_block (`Indented ls)) :: bs)
in
loop [] lines bs
let close_paragraph p par bs =
if not par.maybe_ref then Paragraph par :: bs else
maybe_add_link_reference_definitions p par.lines bs
let rec close_last_block p = function
| Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs
| Paragraph par :: bs -> close_paragraph p par bs
| List l :: bs -> close_list p l bs
| Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs
| bs -> bs
and close_list p l bs =
let i = List.hd l.items in
let blocks = close_last_block p i.blocks in
(* The final blank line extraction of the list item entails less blank
line churn for CommonMark rendering but we don't do it on empty list
items. *)
match blocks with
| Blank_line _ as bl :: (_ :: _ as blocks) ->
let items = { i with blocks } :: List.tl l.items in
bl :: List { l with items } :: bs
| blocks ->
let items = { i with blocks } :: List.tl l.items in
List { l with items } :: bs
and close_footnote p indent label blocks bs =
let blocks = close_last_block p blocks in
(* Like for lists above we do blank line extraction (except if blocks
is only a blank line) *)
let blanks, blocks =
let rec loop acc = function
| Blank_line _ as bl :: (_ :: _ as blocks) -> loop (bl :: acc) blocks
| blocks -> acc, blocks
in
loop [] blocks
in
List.rev_append blanks (Ext_footnote (indent, label, blocks) :: bs)
let close_last_list_item p l =
let item = List.hd l.items in
let item = { item with blocks = close_last_block p item.blocks } in
{ l with items = item :: List.tl l.items }
let end_doc_close_fenced_code_block p fenced bs = match fenced.code with
| (_, l) :: code when l.first > l.last (* empty line *) ->
Blank_line (0, l) :: Code_block (`Fenced { fenced with code }) :: bs
| _ -> Code_block (`Fenced fenced) :: bs
let end_doc_close_html p h bs = match h.html with
| l :: html when l.first > l.last (* empty line *) ->
Blank_line (0, l) :: Html_block { end_cond = None; html } :: bs
| _ ->
Html_block { h with end_cond = None } :: bs
let rec end_doc p = function
| Block_quote (indent, bq) :: bs -> Block_quote (indent, end_doc p bq) :: bs
| List list :: bs -> close_list p list bs
| Paragraph par :: bs -> close_paragraph p par bs
| Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs
| Code_block (`Fenced f) :: bs -> end_doc_close_fenced_code_block p f bs
| Html_block html :: bs -> end_doc_close_html p html bs
| Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs
| (Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _
| Ext_table _ ) :: _ | [] as bs -> bs
(* Adding lines to blocks *)
let match_line_type ~no_setext ~indent p =
(* Effects on [p]'s column advance *)
if only_blanks p then Match.Blank_line else
if indent >= 4 then Indented_code_block_line else begin
accept_cols ~count:indent p;
if end_of_line p then Match.Blank_line else
let start = p.current_char and last = p.current_line_last_char in
match p.i.[start] with
(* Early dispatch shaves a few ms but may not be worth doing vs
testing all the cases in sequences. *)
| '>' ->
if match_and_accept_block_quote p then Match.Block_quote_line else
Paragraph_line
| '=' when not no_setext ->
let r = Match.setext_heading_underline p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '-' ->
let r =
if no_setext then Match.Nomatch else
Match.setext_heading_underline p.i ~last ~start
in
if r <> Nomatch then r else
let r = Match.thematic_break p.i ~last ~start in
if r <> Nomatch then r else
let r = Match.list_marker p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '#' ->
let r = Match.atx_heading p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '+' | '*' | '0' .. '9' ->
let r = Match.thematic_break p.i ~last ~start in
if r <> Nomatch then r else
let r = Match.list_marker p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '_' ->
let r = Match.thematic_break p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '~' | '`' ->
let r = Match.fenced_code_block_start p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '<' ->
let r = Match.html_block_start p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '|' when p.exts ->
let r = Match.ext_table_row p.i ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| '[' when p.exts ->
let line_pos = p.current_line_pos in
let r = Match.ext_footnote_label p.buf p.i ~line_pos ~last ~start in
if r <> Nomatch then r else
Paragraph_line
| _ ->
Paragraph_line
end
let list_marker_can_interrupt_paragraph p = function
| `Ordered (1, _), marker_last | `Unordered _, marker_last ->
let last = p.current_line_last_char and start = marker_last + 1 in
let non_blank = Match.first_non_blank p.i ~last ~start in
non_blank <= p.current_line_last_char (* line is not blank *)
| _ -> false
let same_list_type t0 t1 = match t0, t1 with
| `Ordered (_, c0), `Ordered (_, c1)
| `Unordered c0, `Unordered c1 when Char.equal c0 c1 -> true
| _ -> false
let rec add_open_blocks_with_line_class p ~indent_start ~indent bs = function
| Match.Blank_line -> blank_line p :: bs
| Indented_code_block_line -> indented_code_block p :: bs
| Block_quote_line -> Block_quote (indent, add_open_blocks p []) :: bs
| Thematic_break_line last -> thematic_break p ~indent ~last :: bs
| List_marker_line m -> list p ~indent m bs
| Atx_heading_line (level, after_open, first_content, last_content) ->
atx_heading p ~indent ~level ~after_open ~first_content ~last_content ::
bs
| Fenced_code_block_line (fence_first, fence_last, info) ->
fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs
| Html_block_line end_cond -> html_block p ~end_cond ~indent_start :: bs
| Paragraph_line -> paragraph p ~start:indent_start :: bs
| Ext_table_row last -> table p ~indent ~last :: bs
| Ext_footnote_label (rev_spans, last, key) ->
footnote p ~indent ~last rev_spans key :: bs
| Setext_underline_line _ | Nomatch ->
(* This function should be called with a line type that comes out
of match_line_type ~no_setext:true *)
assert false
and add_open_blocks p bs =
let indent_start = p.current_char and indent = current_indent p in
let ltype = match_line_type ~no_setext:true ~indent p in
add_open_blocks_with_line_class p ~indent_start ~indent bs ltype
and footnote p ~indent ~last rev_spans key =
let label = Inline_struct.label_of_rev_spans p ~key rev_spans in
let defined_label = match def_label p label with
| None -> None
| Some def as l -> set_label_def p def (Block.Footnote.stub label l); l
in
accept_cols p ~count:(last - p.current_char + 1);
Ext_footnote (indent, (label, defined_label), add_open_blocks p [])
and list_item ~indent p (list_type, last) =
let before_marker = indent and marker_size = last - p.current_char + 1 in
let marker = current_line_span p ~first:p.current_char ~last in
let after_marker = accept_list_marker_and_indent p ~marker_size ~last in
let ext_task_marker, ext_task_marker_size = match p.exts with
| false -> None, 0
| true ->
let start = p.current_char and last = p.current_line_last_char in
match Match.ext_task_marker p.i ~last ~start with
| None -> None, 0
| Some (u, last) ->
accept_cols p ~count:(last - start + 1);
let last = match last = p.current_line_last_char with
| true -> (* empty line *) last
| false -> (* remove space for locs *) last - 1
in
Some (u, current_line_span p ~first:start ~last), 4
in
let min = indent + marker_size + after_marker + ext_task_marker_size in
min, { before_marker; marker; after_marker; ext_task_marker;
blocks = add_open_blocks p [] }
and list ~indent p (list_type, _ as m) bs =
let item_min_indent, item = list_item ~indent p m in
List { last_blank = false; loose = false;
item_min_indent; list_type; items = [item] } :: bs
let try_add_to_list ~indent p (lt, _ as m) l bs =
let item_min_indent, item = list_item ~indent p m in
if same_list_type lt l.list_type then
let l = close_last_list_item p l and last_blank = false in
let list_type = l.list_type in
List { last_blank; loose = l.last_blank; item_min_indent; list_type;
items = item :: l.items } :: bs
else
let bs = close_list p l bs and last_blank = false in
List { last_blank; loose = false; item_min_indent; list_type = lt;
items = [item] } :: bs
let try_add_to_paragraph p par bs =
let indent_start = p.current_char and indent = current_indent p in
match match_line_type ~no_setext:false ~indent p with
(* These can't interrupt paragraphs *)
| Html_block_line `End_blank_7
| Indented_code_block_line
| Ext_table_row _ | Ext_footnote_label _
| Paragraph_line ->
add_paragraph_line p ~indent_start par bs
| List_marker_line m when not (list_marker_can_interrupt_paragraph p m) ->
add_paragraph_line p ~indent_start par bs
| Blank_line ->
blank_line p :: close_paragraph p par bs
| Block_quote_line ->
Block_quote (indent, add_open_blocks p []) :: (close_paragraph p par bs)
| Setext_underline_line (level, last_underline) ->
let bs = close_paragraph p par bs in
begin match bs with
| Paragraph { lines; _ } :: bs ->
setext_heading p ~indent ~level ~last_underline lines :: bs
| bs -> paragraph p ~start:indent_start :: bs
end
| Thematic_break_line last ->
thematic_break p ~indent ~last :: (close_paragraph p par bs)
| List_marker_line m ->
list p ~indent m (close_paragraph p par bs)
| Atx_heading_line (level, after_open, first_content, last_content) ->
let bs = close_paragraph p par bs in
atx_heading p ~indent ~level ~after_open ~first_content ~last_content ::
bs
| Fenced_code_block_line (fence_first, fence_last, info) ->
let bs = close_paragraph p par bs in
fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs
| Html_block_line end_cond ->
html_block p ~end_cond ~indent_start :: (close_paragraph p par bs)
| Nomatch -> assert false
let try_add_to_indented_code_block p ls bs =
if current_indent p < 4 then
if has_next_non_blank p
then add_open_blocks p (close_indented_code_block p ls bs) else
(* Blank but white is not data, make an empty span *)
let first = p.current_line_last_char + 1 in
let last = p.current_line_last_char in
let code = current_line_span p ~first ~last in
let l = { pad = 0; code; is_blank = true } in
Code_block (`Indented (l :: ls)) :: bs
else
let pad, first = accept_code_indent p ~count:4 in
let last = p.current_line_last_char in
let is_blank = only_blanks p in
let l = { pad; code = current_line_span p ~first ~last; is_blank } in
Code_block (`Indented (l :: ls)) :: bs
let try_add_to_fenced_code_block p f bs = match f with
| { fence = { closing_fence = Some _; _}; _ } -> (* block is closed *)
add_open_blocks p ((Code_block (`Fenced f)) :: bs)
| { fence = { indent; fence; _} ; code = ls} as b ->
let start = p.current_char and last = p.current_line_last_char in
match Match.fenced_code_block_continue ~fence p.i ~last ~start with
| `Code ->
let strip = Int.min indent (current_indent p) in
let pad, first = accept_code_indent p ~count:strip in
let code = (pad, current_line_span p ~first ~last) :: ls in
Code_block (`Fenced { b with code }) :: bs
| `Close (first, _fence_last) ->
let close = current_line_span p ~first ~last (* with layout *)in
let fence = { b.fence with closing_fence = Some close } in
Code_block (`Fenced { b with fence }) :: bs
let try_add_to_html_block p b bs = match b.end_cond with
| None -> add_open_blocks p (Html_block { b with end_cond = None} :: bs)
| Some end_cond ->
let start = p.current_char and last = p.current_line_last_char in
let l = current_line_span p ~first:start ~last in
if not (Match.html_block_end p.i ~end_cond ~last ~start)
then Html_block { b with html = l :: b.html } :: bs else
match end_cond with
| `End_blank | `End_blank_7 ->
blank_line p :: Html_block { b with end_cond = None } :: bs
| _ ->
Html_block { end_cond = None; html = l :: b.html } :: bs
let rec try_lazy_continuation p ~indent_start = function
| Paragraph par :: bs -> Some (add_paragraph_line p ~indent_start par bs)
| Block_quote (indent, bq) :: bs ->
begin match try_lazy_continuation p ~indent_start bq with
| None -> None
| Some bq -> Some (Block_quote (indent, bq) :: bs)
end
| List l :: bs ->
let i = List.hd l.items in
begin match try_lazy_continuation p ~indent_start i.blocks with
| None -> None
| Some blocks ->
let items = { i with blocks } :: (List.tl l.items) in
Some (List { l with items; last_blank = false } :: bs)
end
| _ -> None
let try_add_to_table p ind rows bs =
let indent_start = p.current_char and indent = current_indent p in
match match_line_type ~indent ~no_setext:true p with
| Ext_table_row last ->
let row = table_row p ~first:p.current_char ~last in
Ext_table (ind, row :: rows) :: bs
| ltype ->
let bs = Ext_table (ind, rows) :: bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
let rec try_add_to_block_quote p indent_layout bq bs =
let indent_start = p.current_char and indent = current_indent p in
match match_line_type ~indent ~no_setext:true p with
| Block_quote_line -> Block_quote (indent_layout, add_line p bq) :: bs
| (Indented_code_block_line (* Looks like a *) | Paragraph_line) as ltype ->
begin match try_lazy_continuation p ~indent_start bq with
| Some bq -> Block_quote (indent_layout, bq) :: bs
| None ->
let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
end
| ltype ->
let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
and try_add_to_footnote p fn_indent label blocks bs =
let indent_start = p.current_char and indent = current_indent p in
if indent < fn_indent + 1 (* position of ^ *) then begin
match match_line_type ~indent ~no_setext:true p with
| (Indented_code_block_line (* Looks like a *) | Paragraph_line) as lt ->
begin match try_lazy_continuation p ~indent_start blocks with
| Some blocks -> Ext_footnote (fn_indent, label, blocks) :: bs
| None ->
let blocks = close_last_block p blocks in
let bs = (close_footnote p fn_indent label blocks) bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs lt
end
| Blank_line ->
Ext_footnote (fn_indent, label, add_line p blocks) :: bs
| ltype ->
let blocks = close_last_block p blocks in
let bs = close_footnote p fn_indent label blocks bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
end else begin
accept_cols p ~count:(fn_indent + 1);
Ext_footnote (fn_indent, label, add_line p blocks) :: bs
end
and try_add_to_list_item p list bs =
let indent_start = p.current_char and indent = current_indent p in
if indent >= list.item_min_indent then begin
let last_blank = only_blanks p in
let item = List.hd list.items and items = List.tl list.items in
if list.last_blank && not last_blank &&
List.for_all block_is_blank_line item.blocks
then
(* Item can only start with a single blank line, if we are
here it's not a new item so the list ends *)
add_open_blocks p (List list :: bs)
else begin
accept_cols ~count:list.item_min_indent p;
let item = { item with blocks = add_line p item.blocks } in
List { list with items = item :: items; last_blank } :: bs
end
end else match match_line_type ~indent ~no_setext:true p with
| Blank_line ->
let item = List.hd list.items and items = List.tl list.items in
let item = { item with blocks = add_line p item.blocks } in
List { list with items = item :: items; last_blank = true } :: bs
| Indented_code_block_line | Paragraph_line as ltype ->
let item = List.hd list.items and items = List.tl list.items in
begin match try_lazy_continuation p ~indent_start item.blocks with
| Some blocks ->
let items = { item with blocks } :: items in
List { list with items; last_blank = false } :: bs
| None ->
let bs = close_list p list bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
end
| List_marker_line m ->
try_add_to_list p ~indent m list bs
| ltype ->
let bs = close_list p list bs in
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
and add_line p = function
| Paragraph par :: bs -> try_add_to_paragraph p par bs
| ((Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _) :: _)
| [] as bs -> add_open_blocks p bs
| List list :: bs -> try_add_to_list_item p list bs
| Code_block (`Indented ls) :: bs -> try_add_to_indented_code_block p ls bs
| Code_block (`Fenced f) :: bs -> try_add_to_fenced_code_block p f bs
| Block_quote (ind, bq) :: bs -> try_add_to_block_quote p ind bq bs
| Html_block html :: bs -> try_add_to_html_block p html bs
| Ext_table (ind, rows) :: bs -> try_add_to_table p ind rows bs
| Ext_footnote (i, l, blocks) :: bs -> try_add_to_footnote p i l blocks bs
(* Parsing *)
let get_first_line p =
let max = String.length p.i - 1 in
let k = ref 0 in
let last_char =
while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done;
!k - 1 (* if the line is empty we have -1 *)
in
p.current_line_last_char <- last_char;
update_next_non_blank p;
(* Return first used newline (or "\n" if there is none) *)
if !k > max || p.i.[!k] = '\n' then "\n" else
let next = !k + 1 in
if next <= max && p.i.[next] = '\n' then "\r\n" else "\r"
let get_next_line p =
let max = String.length p.i - 1 in
if p.current_line_last_char = max then false else
let first_char =
let nl = p.current_line_last_char + 1 in
if p.i.[nl] = '\n' then nl + 1 else (* assert (p.i.[nl] = '\r') *)
let next = nl + 1 in
if next <= max && p.i.[next] = '\n' then next + 1 else next
in
let last_char =
let k = ref first_char in
while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done;
!k - 1 (* if the line is empty we have last_char = first_char - 1 *)
in
p.current_line_pos <- (fst p.current_line_pos + 1), first_char;
p.current_line_last_char <- last_char;
p.current_char <- first_char;
p.current_char_col <- 0;
p.tab_consumed_cols <- 0;
update_next_non_blank p;
true
let parse p =
let meta p =
let first_byte = 0 and last_byte = p.current_line_last_char in
let first_line = 1, first_byte and last_line = p.current_line_pos in
let file = p.file in
meta p (Textloc.v ~file ~first_byte ~last_byte ~first_line ~last_line)
in
let rec loop p bs =
let bs = add_line p bs in
if get_next_line p then loop p bs else (end_doc p bs), meta p
in
let nl = get_first_line p in
nl, loop p []
end
(* Building the final AST, invokes inline parsing. *)
let block_struct_to_blank_line p pad span =
Block.Blank_line (clean_raw_span p ~pad span)
let block_struct_to_code_block p = function
| `Indented (ls : Block_struct.indented_code_line list) (* non-empty *) ->
let line p { Block_struct.pad; code; _} = clean_raw_span ~pad p code in
let layout = `Indented and info_string = None in
let last = (List.hd ls).code in
let code = List.rev_map (line p) ls in
let meta =
let last_line = last.line_pos and last_byte = last.last in
let start = Meta.textloc (snd (List.hd code)) in
meta p (Textloc.set_last start ~last_byte ~last_line)
in
Block.Code_block ({layout; info_string; code}, meta)
| `Fenced { Block_struct.fence; code = ls } ->
let layout =
let opening_fence = layout_clean_raw_span p fence.opening_fence in
let closing_fence =
Option.map (layout_clean_raw_span p) fence.closing_fence
in
{ Block.Code_block.indent = fence.indent; opening_fence; closing_fence }
in
let info_string = Option.map (clean_unesc_unref_span p) fence.info_string in
let code = List.rev_map (fun (pad, l) -> clean_raw_span p ~pad l) ls in
let meta =
let first = fence.opening_fence in
let last = match fence.closing_fence with
| Some last -> last
| None -> match ls with [] -> first | (_, last_line) :: _ -> last_line
in
meta_of_spans p ~first ~last
in
let cb = {Block.Code_block.layout = `Fenced layout; info_string; code} in
if p.exts && Block.Code_block.is_math_block info_string
then Block.Ext_math_block (cb, meta)
else Block.Code_block (cb, meta)
let block_struct_to_heading p = function
| `Atx { Block_struct.indent; level; after_open; heading; layout_after } ->
let after_opening =
let first = after_open and last = heading.first - 1 in
layout_clean_raw_span' p { heading with first; last }
in
let closing = layout_clean_raw_span' p layout_after in
let layout = `Atx { Block.Heading.indent; after_opening; closing } in
let meta =
meta p (textloc_of_span p { heading with first = after_open - level })
in
let _layout, inline = Inline_struct.parse p [heading] in
let id = match p.heading_auto_ids with
| false -> None
| true -> Some (`Auto (Inline.id ~buf:p.buf inline))
in
Block.Heading ({layout; level; inline; id}, meta)
| `Setext { Block_struct.level; heading_lines; underline } ->
let (leading_indent, trailing_blanks), inline =
Inline_struct.parse p heading_lines
in
let underline_indent, u, blanks = underline in
let underline_blanks = layout_clean_raw_span' p blanks in
let underline_count = u.last - u.first + 1, meta p (textloc_of_span p u) in
let layout =
{ Block.Heading.leading_indent; trailing_blanks; underline_indent;
underline_count; underline_blanks }
in
let meta =
let last_line = u.line_pos and last_byte = u.last in
let start = Meta.textloc (Inline.meta inline) in
meta p (Textloc.set_last start ~last_byte ~last_line)
in
let id = match p.heading_auto_ids with
| false -> None
| true -> Some (`Auto (Inline.id ~buf:p.buf inline))
in
Block.Heading ({ layout = `Setext layout; level; inline; id }, meta)
let block_struct_to_html_block p (b : Block_struct.html_block) =
let last = List.hd b.html in
let last_byte = last.last and last_line = last.line_pos in
let lines = List.rev_map (clean_raw_span p) b.html in
let start_loc = Meta.textloc (snd (List.hd lines)) in
let meta = meta p (Textloc.set_last start_loc ~last_byte ~last_line) in
Block.Html_block (lines, meta)
let block_struct_to_paragraph p par =
let layout, inline = Inline_struct.parse p par.Block_struct.lines in
let leading_indent, trailing_blanks = layout in
let meta = Inline.meta inline in
Block.Paragraph ({ leading_indent; inline; trailing_blanks }, meta)
let block_struct_to_thematic_break p indent span =
let layout, meta = (* not layout because of loc *) clean_raw_span p span in
Block.Thematic_break ({ indent; layout }, meta)
let block_struct_to_table p indent rows =
let rec loop p col_count last_was_sep acc = function
| (row, blanks) :: rs ->
let meta = meta p (textloc_of_span p row) in
let row' = { row with first = row.first + 1; last = row.last } in
let cols = Inline_struct.parse_table_row p row' in
let col_count = Int.max col_count (List.length cols) in
let r, last_was_sep = match Block.Table.parse_sep_row cols with
| Some seps -> ((`Sep seps), meta), true
| None ->
((if last_was_sep then `Header cols else `Data cols), meta), false
in
let acc = (r, layout_clean_raw_span' p blanks) :: acc in
if rs = [] then row, col_count, acc else
loop p col_count last_was_sep acc rs
| [] -> assert false
in
let last = fst (List.hd rows) in
let first, col_count, rows = loop p 0 false [] rows in
let meta = meta_of_spans p ~first ~last in
Block.Ext_table ({ indent; col_count; rows }, meta)
let rec block_struct_to_block_quote p indent bs =
let add_block p acc b = block_struct_to_block p b :: acc in
let last = block_struct_to_block p (List.hd bs) in
let block = List.fold_left (add_block p) [last] (List.tl bs) in
let block = match block with
| [b] -> b
| quote ->
let first = Block.meta (List.hd quote) and last = Block.meta last in
Block.Blocks (quote, meta_of_metas p ~first ~last)
in
Block.Block_quote ({indent; block}, Block.meta block)
and block_struct_to_footnote_definition p indent (label, defined_label) bs =
let add_block p acc b = block_struct_to_block p b :: acc in
let last = block_struct_to_block p (List.hd bs) in
let block = List.fold_left (add_block p) [last] (List.tl bs) in
let last = Block.meta last in
let block = match block with
| [b] -> b
| bs ->
let first = Block.meta (List.hd bs) in
Block.Blocks (bs, meta_of_metas p ~first ~last)
in
let loc =
let labelloc = Label.textloc label in
let lastloc = Meta.textloc last in
let loc = Textloc.span labelloc lastloc in
let first_byte = Textloc.first_byte loc - 1 in
Textloc.set_first loc ~first_byte ~first_line:(Textloc.first_line loc)
in
let fn = { Block.Footnote.indent; label; defined_label; block }, meta p loc in
begin match defined_label with
| None -> () | Some def -> set_label_def p def (Block.Footnote.Def fn)
end;
Block.Ext_footnote_definition fn
and block_struct_to_list_item p (i : Block_struct.list_item) =
let rec loop bstate tight acc = function
| Block_struct.Blank_line _ as bl :: bs ->
let bstate = if bstate = `Trail_blank then `Trail_blank else `Blank in
loop bstate tight (block_struct_to_block p bl :: acc) bs
| Block_struct.List
{ items = { blocks = Block_struct.Blank_line _ :: _ } :: _ } as l :: bs
->
loop bstate false (block_struct_to_block p l :: acc) bs
| b :: bs ->
let tight = tight && not (bstate = `Blank) in
loop `Non_blank tight (block_struct_to_block p b :: acc) bs
| [] -> tight, acc
in
let last_meta, (tight, blocks) = match i.blocks with
| [Block_struct.Blank_line _ as blank] ->
let bl = block_struct_to_block p blank in
Block.meta bl, (true, [bl])
| Block_struct.Blank_line _ as blank :: bs ->
let bl = block_struct_to_block p blank in
(Block.meta bl), loop `Trail_blank true [bl] bs
| b :: bs ->
let b = block_struct_to_block p b in
(Block.meta b), loop `Non_blank true [b] bs
| [] -> assert false
in
let block = match blocks with
| [i] -> i
| is ->
let first = Block.meta (List.hd is) in
Block.Blocks (is, meta_of_metas p ~first ~last:last_meta)
in
let before_marker = i.before_marker and after_marker = i.after_marker in
let marker = (* not layout to get loc *) clean_raw_span p i.marker in
let ext_task_marker = match i.ext_task_marker with
| None -> None
| Some (u, span) -> Some (u, meta p (textloc_of_span p span))
in
let meta = meta_of_metas p ~first:(snd marker) ~last:last_meta in
let i =
{ Block.List_item.before_marker; marker; after_marker; block;
ext_task_marker }
in
(i, meta), tight
and block_struct_to_list p list =
let rec loop p tight acc = function
| [] -> tight, acc
| item :: items ->
let item, item_tight = block_struct_to_list_item p item in
loop p (tight && item_tight) (item :: acc) items
in
let items = list.Block_struct.items in
let last, tight = block_struct_to_list_item p (List.hd items) in
let tight, items = loop p (not list.loose && tight) [last] (List.tl items) in
let meta = meta_of_metas p ~first:(snd (List.hd items)) ~last:(snd last) in
Block.List ({ type' = list.Block_struct.list_type; tight; items }, meta)
and block_struct_to_block p = function
| Block_struct.Block_quote (ind, bs) -> block_struct_to_block_quote p ind bs
| Block_struct.List list -> block_struct_to_list p list
| Block_struct.Paragraph par -> block_struct_to_paragraph p par
| Block_struct.Thematic_break (i, br) -> block_struct_to_thematic_break p i br
| Block_struct.Code_block cb -> block_struct_to_code_block p cb
| Block_struct.Heading h -> block_struct_to_heading p h
| Block_struct.Html_block html -> block_struct_to_html_block p html
| Block_struct.Blank_line (pad, span) -> block_struct_to_blank_line p pad span
| Block_struct.Linkref_def r -> Block.Link_reference_definition r
| Block_struct.Ext_table (i, rows) -> block_struct_to_table p i rows
| Block_struct.Ext_footnote (i, labels, bs) ->
block_struct_to_footnote_definition p i labels bs
let block_struct_to_doc p (doc, meta) =
match List.rev_map (block_struct_to_block p) doc with
| [b] -> b | bs -> Block.Blocks (bs, meta)
(* Documents *)
module Doc = struct
type t = { nl : Layout.string; block : Block.t; defs : Label.defs }
let make ?(nl = "\n") ?(defs = Label.Map.empty) block = { nl; block; defs }
let empty = make (Block.Blocks ([], Meta.none))
let nl d = d.nl
let block d = d.block
let defs d = d.defs
let of_string
?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs ?file
?(strict = true) s
=
let p =
parser ?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs
?file ~strict s
in
let nl, doc = Block_struct.parse p in
let block = block_struct_to_doc p doc in
make ~nl block ~defs:p.defs
let unicode_version = Cmarkit_data.unicode_version
let commonmark_version = "0.30"
end
(* Maps and folds *)
module Mapper = struct
type 'a filter_map = 'a option
type 'a result = [ `Default | `Map of 'a filter_map ]
let default = `Default
let delete = `Map None
let ret v = `Map (Some v)
type t =
{ inline_ext_default : Inline.t map;
block_ext_default : Block.t map;
inline : Inline.t mapper;
block : Block.t mapper }
and 'a map = t -> 'a -> 'a filter_map
and 'a mapper = t -> 'a -> 'a result
let none _ _ = `Default
let ext_inline_none _ _ = invalid_arg Inline.err_unknown
let ext_block_none _ _ = invalid_arg Block.err_unknown
let make
?(inline_ext_default = ext_inline_none)
?(block_ext_default = ext_block_none)
?(inline = none) ?(block = none) ()
=
{ inline_ext_default; block_ext_default; inline; block }
let inline_mapper m = m.inline
let block_mapper m = m.block
let inline_ext_default m = m.inline_ext_default
let block_ext_default m = m.block_ext_default
let ( let* ) = Option.bind
let rec map_inline m i = match m.inline m i with
| `Map i -> i
| `Default ->
let open Inline in
match i with
| Autolink _ | Break _ | Code_span _ | Raw_html _
| Text _ | Ext_math_span _ as i -> Some i
| Image (l, meta) ->
let text = Option.value ~default:Inline.empty (map_inline m l.text) in
Some (Image ({ l with text }, meta))
| Link (l, meta) ->
let* text = map_inline m l.text in
Some (Link ({ l with text }, meta))
| Emphasis (e, meta) ->
let* inline = map_inline m e.inline in
Some (Emphasis ({ e with inline }, meta))
| Strong_emphasis (e, meta) ->
let* inline = map_inline m e.inline in
Some (Strong_emphasis ({ e with inline}, meta))
| Inlines (is, meta) ->
(match List.filter_map (map_inline m) is with
| [] -> None | is -> Some (Inlines (is, meta)))
| Ext_strikethrough (s, meta) ->
let* inline = map_inline m s in
Some (Ext_strikethrough (inline, meta))
| ext -> m.inline_ext_default m ext
let rec map_block m b = match m.block m b with
| `Map b -> b
| `Default ->
let open Block in
match b with
| Blank_line _ | Code_block _ | Html_block _
| Link_reference_definition _ | Thematic_break _
| Ext_math_block _ as b -> Some b
| Heading (h, meta) ->
let inline = match map_inline m (Block.Heading.inline h) with
| None -> (* Can be empty *) Inline.Inlines ([], Meta.none)
| Some i -> i
in
Some (Heading ({ h with inline}, meta))
| Block_quote (b, meta) ->
let block = match map_block m b.block with
| None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b
in
Some (Block_quote ({ b with block}, meta))
| Blocks (bs, meta) ->
(match List.filter_map (map_block m) bs with
| [] -> None | bs -> Some (Blocks (bs, meta)))
| List (l, meta) ->
let map_list_item m (i, meta) =
let* block = map_block m (List_item.block i) in
Some ({ i with block }, meta)
in
(match List.filter_map (map_list_item m) l.items with
| [] -> None | items -> Some (List ({ l with items }, meta)))
| Paragraph (p, meta) ->
let* inline = map_inline m (Paragraph.inline p) in
Some (Paragraph ({ p with inline }, meta))
| Ext_table (t, meta) ->
let map_col m (i, layout) = match map_inline m i with
| None -> None | Some i -> Some (i, layout)
in
let map_row (((r, meta), blanks) as row) = match r with
| `Header is ->
(`Header (List.filter_map (map_col m) is), meta), blanks
| `Sep _ -> row
| `Data is ->
(`Data (List.filter_map (map_col m) is), meta), blanks
in
let rows = List.map map_row t.rows in
Some (Ext_table ({ t with Table.rows }, meta))
| Ext_footnote_definition (fn, meta) ->
let block = match map_block m fn.block with
| None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b
in
Some (Ext_footnote_definition ({ fn with block}, meta))
| ext -> m.block_ext_default m ext
let map_doc m d =
let map_block m b = Option.value ~default:Block.empty (map_block m b) in
(* XXX something better for defs should be devised here. *)
let map_def m = function
| Block.Footnote.Def (fn, meta) ->
let block = map_block m (Block.Footnote.block fn) in
Block.Footnote.Def ({ fn with block }, meta)
| def -> def
in
let block = map_block m (Doc.block d) in
let defs = Label.Map.map (map_def m) (Doc.defs d) in
{ d with Doc.block; defs }
end
module Folder = struct
type 'a result = [ `Default | `Fold of 'a ]
let default = `Default
let ret v = `Fold v
type ('a, 'b) fold = 'b t -> 'b -> 'a -> 'b
and ('a, 'b) folder = 'b t -> 'b -> 'a -> 'b result
and 'a t =
{ inline_ext_default : (Inline.t, 'a) fold;
block_ext_default : (Block.t, 'a) fold;
inline : (Inline.t, 'a) folder;
block : (Block.t, 'a) folder; }
let none _ _ _ = `Default
let ext_inline_none _ _ _ = invalid_arg Inline.err_unknown
let ext_block_none _ _ _ = invalid_arg Block.err_unknown
let make
?(inline_ext_default = ext_inline_none)
?(block_ext_default = ext_block_none)
?(inline = none) ?(block = none) ()
=
{ inline_ext_default; block_ext_default; inline; block }
let inline_folder f = f.inline
let block_folder f = f.block
let inline_ext_default f = f.inline_ext_default
let block_ext_default f = f.block_ext_default
let rec fold_inline f acc i = match f.inline f acc i with
| `Fold acc -> acc
| `Default ->
let open Inline in
match i with
| Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _
| Ext_math_span _ -> acc
| Image (l, _) | Link (l, _) -> fold_inline f acc l.text
| Emphasis ({ inline }, _) -> fold_inline f acc inline
| Strong_emphasis ({ inline }, _) -> fold_inline f acc inline
| Inlines (is, _) -> List.fold_left (fold_inline f) acc is
| Ext_strikethrough (inline, _) -> fold_inline f acc inline
| ext -> f.inline_ext_default f acc ext
let rec fold_block f acc b = match f.block f acc b with
| `Fold acc -> acc
| `Default ->
let open Block in
match b with
| Blank_line _ | Code_block _ | Html_block _
| Link_reference_definition _ | Thematic_break _ | Ext_math_block _ -> acc
| Heading (h, _) -> fold_inline f acc (Block.Heading.inline h)
| Block_quote (bq, _) -> fold_block f acc bq.block
| Blocks (bs, _) -> List.fold_left (fold_block f) acc bs
| List (l, _) ->
let fold_list_item m acc (i, _) =
fold_block m acc (Block.List_item.block i)
in
List.fold_left (fold_list_item f) acc l.items
| Paragraph (p, _) -> fold_inline f acc (Block.Paragraph.inline p)
| Ext_table (t, _) ->
let fold_row acc ((r, _), _) = match r with
| (`Header is | `Data is) ->
List.fold_left (fun acc (i, _) -> fold_inline f acc i) acc is
| `Sep _ -> acc
in
List.fold_left fold_row acc t.Table.rows
| Ext_footnote_definition (fn, _) -> fold_block f acc fn.block
| ext -> f.block_ext_default f acc ext
let fold_doc f acc d = fold_block f acc (Doc.block d)
end
(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
---------------------------------------------------------------------------*)