mirror of
https://github.com/c-cube/linol.git
synced 2025-12-06 03:05:31 -05:00
3194 lines
125 KiB
OCaml
3194 lines
125 KiB
OCaml
(*---------------------------------------------------------------------------
|
|
Copyright (c) 2021 The cmarkit programmers. All rights reserved.
|
|
Distributed under the ISC license, see terms at the end of the file.
|
|
---------------------------------------------------------------------------*)
|
|
|
|
module String_map = Map.Make (String)
|
|
module Ascii = Cmarkit_base.Ascii
|
|
module Text = Cmarkit_base.Text
|
|
module Match = Cmarkit_base
|
|
module Textloc = Cmarkit_base.Textloc
|
|
module Meta = Cmarkit_base.Meta
|
|
module Layout = struct
|
|
type blanks = string
|
|
type nonrec string = string
|
|
type nonrec char = char
|
|
type count = int
|
|
type indent = int
|
|
let string ?(meta = Meta.none) s = s, meta
|
|
let empty = string ""
|
|
end
|
|
|
|
type byte_pos = Textloc.byte_pos
|
|
type line_span = Match.line_span =
|
|
(* Substring on a single line, hereafter abbreviated to span *)
|
|
{ line_pos : Textloc.line_pos; first : byte_pos; last : byte_pos }
|
|
|
|
type 'a node = 'a * Meta.t
|
|
|
|
module Block_line = struct
|
|
let _list_of_string flush s = (* cuts [s] on newlines *)
|
|
let rec loop s acc max start k =
|
|
if k > max then List.rev (flush s start max acc) else
|
|
if not (s.[k] = '\n' || s.[k] = '\r')
|
|
then loop s acc max start (k + 1) else
|
|
let acc = flush s start (k - 1) acc in
|
|
let next = k + 1 in
|
|
let start =
|
|
if s.[k] = '\r' && next <= max && s.[next] = '\n' then next + 1 else
|
|
next
|
|
in
|
|
loop s acc max start start
|
|
in
|
|
loop s [] (String.length s - 1) 0 0
|
|
|
|
let flush ?(meta = Meta.none) s start last acc =
|
|
let sub = String.sub s start (last - start + 1) in
|
|
(sub, meta) :: acc
|
|
|
|
let flush_tight ?(meta = Meta.none) s start last acc =
|
|
(* If [s] has newlines, blanks after newlines are layout *)
|
|
if start > last then ("", ("", meta)) :: acc else
|
|
match acc with
|
|
| [] (* On the first line the blanks are legit *) ->
|
|
("", (String.sub s start (last - start + 1), meta)) :: acc
|
|
| acc ->
|
|
let nb = Match.first_non_blank s ~last ~start in
|
|
(String.sub s start (nb - 1 - start + 1),
|
|
(String.sub s nb (last - nb + 1), meta)) :: acc
|
|
|
|
(* Block lines *)
|
|
|
|
type t = string node
|
|
|
|
let to_string = fst
|
|
let list_of_string ?meta s = _list_of_string (flush ?meta) s
|
|
let list_textloc = function
|
|
| [] -> Textloc.none | [(_, m)] -> Meta.textloc m
|
|
| (_, first) :: _ as l ->
|
|
let _, last = List.hd (List.rev l) in
|
|
Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last)
|
|
|
|
(* Tight lines *)
|
|
|
|
type tight = Layout.blanks * t
|
|
|
|
let tight_to_string l = fst (snd l)
|
|
let tight_list_of_string ?meta s = _list_of_string (flush_tight ?meta) s
|
|
let tight_list_textloc = function
|
|
| [] -> Textloc.none | [_, (_, m)] -> Meta.textloc m
|
|
| (_, (_, first)) :: _ as l ->
|
|
let (_, (_, last)) = List.hd (List.rev l) in
|
|
Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last)
|
|
|
|
(* Blank lines *)
|
|
|
|
type blank = Layout.blanks node
|
|
end
|
|
|
|
module Label = struct
|
|
type key = string
|
|
type t = { meta : Meta.t; key : key; text : Block_line.tight list }
|
|
let make ?(meta = Meta.none) ~key text = { key; text; meta }
|
|
let with_meta meta l = { l with meta }
|
|
let meta t = t.meta
|
|
let key t = t.key
|
|
let text t = t.text
|
|
let textloc t = Block_line.tight_list_textloc t.text
|
|
let text_to_string t =
|
|
String.concat " " (List.map Block_line.tight_to_string t.text)
|
|
|
|
let compare l0 l1 = String.compare l0.key l1.key
|
|
|
|
(* Definitions *)
|
|
|
|
module Map = Map.Make (String)
|
|
type def = ..
|
|
type defs = def Map.t
|
|
|
|
(* Resolvers *)
|
|
|
|
type context =
|
|
[ `Def of t option * t
|
|
| `Ref of [ `Link | `Image ] * t * (t option) ]
|
|
|
|
type resolver = context -> t option
|
|
let default_resolver = function
|
|
| `Def (None, k) -> Some k
|
|
| `Def (Some _, k) -> None
|
|
| `Ref (_, _, k) -> k
|
|
end
|
|
|
|
module Link_definition = struct
|
|
type layout =
|
|
{ indent : Layout.indent;
|
|
angled_dest : bool;
|
|
before_dest : Block_line.blank list;
|
|
after_dest : Block_line.blank list;
|
|
title_open_delim : Layout.char;
|
|
after_title : Block_line.blank list; }
|
|
|
|
let layout_for_dest dest =
|
|
let needs_angles c = Ascii.is_control c || c = ' ' in
|
|
let angled_dest = String.exists needs_angles dest in
|
|
{ indent = 0; angled_dest; before_dest = [];
|
|
after_dest = []; title_open_delim = '\"'; after_title = [] }
|
|
|
|
let default_layout =
|
|
{ indent = 0; angled_dest = false; before_dest = [];
|
|
after_dest = []; title_open_delim = '\"'; after_title = [] }
|
|
|
|
type t =
|
|
{ layout : layout;
|
|
label : Label.t option;
|
|
defined_label : Label.t option;
|
|
dest : string node option;
|
|
title : Block_line.tight list option; }
|
|
|
|
let make ?layout ?defined_label ?label ?dest ?title () =
|
|
let layout = match dest with
|
|
| None -> default_layout | Some (d, _) -> layout_for_dest d
|
|
in
|
|
let defined_label = match defined_label with None -> label | Some d -> d in
|
|
{ layout; label; defined_label; dest; title }
|
|
|
|
let layout ld = ld.layout
|
|
let label ld = ld.label
|
|
let defined_label ld = ld.defined_label
|
|
let dest ld = ld.dest
|
|
let title ld = ld.title
|
|
|
|
type Label.def += Def of t node
|
|
end
|
|
|
|
module Inline = struct
|
|
type t = ..
|
|
|
|
module Autolink = struct
|
|
type t = { is_email : bool; link : string node; }
|
|
let is_email a = a.is_email
|
|
let link a = a.link
|
|
let make link =
|
|
let is_email =
|
|
let l = String.concat "" ["<"; fst link; ">"] in
|
|
match Match.autolink_email l ~last:(String.length l - 1) ~start:0 with
|
|
| None -> false | Some _ -> true
|
|
in
|
|
{ is_email; link }
|
|
end
|
|
|
|
module Break = struct
|
|
type type' = [ `Hard | `Soft ]
|
|
type t =
|
|
{ layout_before : Layout.blanks node;
|
|
type' : type';
|
|
layout_after : Layout.blanks node; }
|
|
|
|
let make
|
|
?(layout_before = Layout.empty) ?(layout_after = Layout.empty) type'
|
|
=
|
|
{ layout_before; type'; layout_after }
|
|
|
|
let type' b = b.type'
|
|
let layout_before b = b.layout_before
|
|
let layout_after b = b.layout_after
|
|
end
|
|
|
|
module Code_span = struct
|
|
type t =
|
|
{ backtick_count : Layout.count;
|
|
code_layout : Block_line.tight list; }
|
|
|
|
let make ~backtick_count code_layout = { backtick_count; code_layout }
|
|
|
|
let min_backtick_count ~min counts =
|
|
let rec loop min = function
|
|
| c :: cs -> if min <> c then min else loop (c + 1) cs | [] -> min
|
|
in
|
|
loop min (List.sort Int.compare counts)
|
|
|
|
let of_string ?(meta = Meta.none) = function
|
|
| "" -> { backtick_count = 1 ; code_layout = ["", ("", meta)] }
|
|
| s ->
|
|
(* This finds out the needed backtick count, whether spaces are needed,
|
|
and treats blanks after newline as layout *)
|
|
let max = String.length s - 1 in
|
|
let need_sp = s.[0] = '`' || s.[max] = '`' in
|
|
let s = if need_sp then String.concat "" [" "; s; " "] else s in
|
|
let backtick_counts, code_layout =
|
|
let rec loop bt_counts acc max btc start k = match k > max with
|
|
| true ->
|
|
(* assert (btc = 0) because of [need_sp] *)
|
|
bt_counts,
|
|
if acc = [] then ["", (s, meta)] else
|
|
List.rev (Block_line.flush_tight ~meta s start max acc)
|
|
| false ->
|
|
if s.[k] = '`'
|
|
then loop bt_counts acc max (btc + 1) start (k + 1) else
|
|
let bt_counts = if btc > 0 then btc :: bt_counts else bt_counts in
|
|
if not (s.[k] = '\n' || s.[k] = '\r')
|
|
then loop bt_counts acc max 0 start (k + 1) else
|
|
let acc = Block_line.flush_tight ~meta s start (k - 1) acc in
|
|
let start =
|
|
if k + 1 <= max && s.[k] = '\r' && s.[k + 1] = '\n'
|
|
then k + 2 else k + 1
|
|
in
|
|
loop bt_counts acc max 0 start start
|
|
in
|
|
loop [] [] max 0 0 0
|
|
in
|
|
let backtick_count = min_backtick_count ~min:1 backtick_counts in
|
|
{ backtick_count; code_layout }
|
|
|
|
let backtick_count cs = cs.backtick_count
|
|
let code_layout cs = cs.code_layout
|
|
let code cs =
|
|
(* Extract code, see https://spec.commonmark.org/0.30/#code-spans *)
|
|
let sp c = Char.equal c ' ' in
|
|
let s = List.map Block_line.tight_to_string cs.code_layout in
|
|
let s = String.concat " " s in
|
|
if s = "" then "" else
|
|
if s.[0] = ' ' && s.[String.length s - 1] = ' ' &&
|
|
not (String.for_all sp s)
|
|
then String.sub s 1 (String.length s - 2) else s
|
|
end
|
|
|
|
module Emphasis = struct
|
|
type inline = t
|
|
type t = { delim : Layout.char; inline : inline }
|
|
let make ?(delim = '*') inline = { delim; inline }
|
|
let inline e = e.inline
|
|
let delim e = e.delim
|
|
end
|
|
|
|
module Link = struct
|
|
type inline = t
|
|
|
|
type reference_layout = [ `Collapsed | `Full | `Shortcut ]
|
|
type reference =
|
|
[ `Inline of Link_definition.t node
|
|
| `Ref of reference_layout * Label.t * Label.t ]
|
|
|
|
type t = { text : inline; reference : reference; }
|
|
|
|
let make text reference = { text; reference }
|
|
let text l = l.text
|
|
let reference l = l.reference
|
|
let referenced_label l = match l.reference with
|
|
| `Inline _ -> None | `Ref (_, _, k) -> Some k
|
|
|
|
let reference_definition defs l = match l.reference with
|
|
| `Inline ld -> Some (Link_definition.Def ld)
|
|
| `Ref (_, _, def) -> Label.Map.find_opt (Label.key def) defs
|
|
|
|
let is_unsafe l =
|
|
let allowed_data_url l =
|
|
let allowed = ["image/gif"; "image/png"; "image/jpeg"; "image/webp"] in
|
|
(* Extract mediatype from data:[<mediatype>][;base64],<data> *)
|
|
match String.index_from_opt l 4 ',' with
|
|
| None -> false
|
|
| Some j ->
|
|
let k = match String.index_from_opt l 4 ';' with
|
|
| None -> j | Some k -> k
|
|
in
|
|
let t = String.sub l 5 (min j k - 5) in
|
|
List.mem t allowed
|
|
in
|
|
Ascii.caseless_starts_with ~prefix:"javascript:" l ||
|
|
Ascii.caseless_starts_with ~prefix:"vbscript:" l ||
|
|
Ascii.caseless_starts_with ~prefix:"file:" l ||
|
|
(Ascii.caseless_starts_with ~prefix:"data:" l && not (allowed_data_url l))
|
|
end
|
|
|
|
module Raw_html = struct
|
|
type t = Block_line.tight list
|
|
end
|
|
|
|
module Text = struct
|
|
type t = string
|
|
end
|
|
|
|
type t +=
|
|
| Autolink of Autolink.t node
|
|
| Break of Break.t node
|
|
| Code_span of Code_span.t node
|
|
| Emphasis of Emphasis.t node
|
|
| Image of Link.t node
|
|
| Inlines of t list node
|
|
| Link of Link.t node
|
|
| Raw_html of Raw_html.t node
|
|
| Strong_emphasis of Emphasis.t node
|
|
| Text of Text.t node
|
|
|
|
let empty = Inlines ([], Meta.none)
|
|
|
|
let err_unknown = "Unknown Cmarkit.Inline.t type extension"
|
|
|
|
(* Extensions *)
|
|
|
|
module Strikethrough = struct
|
|
type nonrec t = t
|
|
let make = Fun.id
|
|
let inline = Fun.id
|
|
end
|
|
|
|
module Math_span = struct
|
|
type t = { display : bool; tex_layout : Block_line.tight list; }
|
|
let make ~display tex_layout = { display; tex_layout }
|
|
let display ms = ms.display
|
|
let tex_layout ms = ms.tex_layout
|
|
let tex ms =
|
|
let s = List.map Block_line.tight_to_string ms.tex_layout in
|
|
String.concat " "s
|
|
end
|
|
|
|
type t +=
|
|
| Ext_strikethrough of Strikethrough.t node
|
|
| Ext_math_span of Math_span.t node
|
|
|
|
(* Functions on inlines *)
|
|
|
|
let is_empty = function
|
|
| Text ("", _) | Inlines ([], _) -> true | _ -> false
|
|
|
|
let ext_none _ = invalid_arg err_unknown
|
|
let meta ?(ext = ext_none) = function
|
|
| Autolink (_, m) | Break (_, m) | Code_span (_, m) | Emphasis (_, m)
|
|
| Image (_, m) | Inlines (_, m) | Link (_, m) | Raw_html (_, m)
|
|
| Strong_emphasis (_, m) | Text (_, m) -> m
|
|
| Ext_strikethrough (_, m) -> m | Ext_math_span (_, m) -> m
|
|
| i -> ext i
|
|
|
|
let rec normalize ?(ext = ext_none) = function
|
|
| Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _
|
|
| Inlines ([], _) | Ext_math_span _ as i -> i
|
|
| Image (l, m) -> Image ({ l with text = normalize ~ext l.text }, m)
|
|
| Link (l, m) -> Link ({ l with text = normalize ~ext l.text }, m)
|
|
| Inlines ([i], _) -> i
|
|
| Emphasis (e, m) ->
|
|
Emphasis ({ e with inline = normalize ~ext e.inline}, m)
|
|
| Strong_emphasis (e, m) ->
|
|
Strong_emphasis ({ e with inline = normalize ~ext e.inline}, m)
|
|
| Inlines (i :: is, m) ->
|
|
let rec loop acc = function
|
|
| Inlines (is', m) :: is -> loop acc (List.rev_append (List.rev is') is)
|
|
| Text (t', m') as i' :: is ->
|
|
begin match acc with
|
|
| Text (t, m) :: acc ->
|
|
let tl = Textloc.span (Meta.textloc m) (Meta.textloc m') in
|
|
let i = Text (t ^ t', Meta.with_textloc ~keep_id:true m tl) in
|
|
loop (i :: acc) is
|
|
| _ -> loop (normalize ~ext i' :: acc) is
|
|
end
|
|
| i :: is -> loop (normalize ~ext i :: acc) is
|
|
| [] -> List.rev acc
|
|
in
|
|
let is = loop [normalize ~ext i] is in
|
|
(match is with [i] -> i | _ -> Inlines (is, m))
|
|
| Ext_strikethrough (i, m) -> Ext_strikethrough (normalize ~ext i, m)
|
|
| i -> ext i
|
|
|
|
let ext_none ~break_on_soft = ext_none
|
|
let to_plain_text ?(ext = ext_none) ~break_on_soft i =
|
|
let push s acc = (s :: List.hd acc) :: List.tl acc in
|
|
let newline acc = [] :: (List.rev (List.hd acc)) :: List.tl acc in
|
|
let rec loop ~break_on_soft acc = function
|
|
| Autolink (a, _) :: is ->
|
|
let acc = push (String.concat "" ["<"; fst a.link; ">"]) acc in
|
|
loop ~break_on_soft acc is
|
|
| Break ({ type' = `Hard }, _) :: is ->
|
|
loop ~break_on_soft (newline acc) is
|
|
| Break ({ type' = `Soft }, _) :: is ->
|
|
let acc = if break_on_soft then newline acc else (push " " acc) in
|
|
loop ~break_on_soft acc is
|
|
| Code_span (cs, _) :: is ->
|
|
loop ~break_on_soft (push (Code_span.code cs) acc) is
|
|
| Emphasis ({ inline }, _) :: is | Strong_emphasis ({ inline }, _) :: is ->
|
|
loop ~break_on_soft acc (inline :: is)
|
|
| Inlines (is', _) :: is ->
|
|
loop ~break_on_soft acc (List.rev_append (List.rev is') is)
|
|
| Link (l, _) :: is | Image (l, _) :: is ->
|
|
loop ~break_on_soft acc (l.text :: is)
|
|
| Raw_html _ :: is ->
|
|
loop ~break_on_soft acc is
|
|
| Text (t, _) :: is ->
|
|
loop ~break_on_soft (push t acc) is
|
|
| Ext_strikethrough (i, _) :: is ->
|
|
loop ~break_on_soft acc (i :: is)
|
|
| Ext_math_span (m, _) :: is ->
|
|
loop ~break_on_soft (push (Math_span.tex m) acc) is
|
|
| i :: is ->
|
|
loop ~break_on_soft acc (ext ~break_on_soft i :: is)
|
|
| [] ->
|
|
List.rev ((List.rev (List.hd acc)) :: List.tl acc)
|
|
in
|
|
loop ~break_on_soft ([] :: []) [i]
|
|
|
|
let id ?buf ?ext i =
|
|
let text = to_plain_text ?ext ~break_on_soft:false i in
|
|
let s = String.concat "\n" (List.map (String.concat "") text) in
|
|
let b = match buf with
|
|
| Some b -> Buffer.reset b; b | None -> Buffer.create 256
|
|
in
|
|
let[@inline] collapse_blanks b ~prev_byte =
|
|
(* Collapses non initial white *)
|
|
if Ascii.is_blank prev_byte && Buffer.length b <> 0
|
|
then Buffer.add_char b '-'
|
|
in
|
|
let rec loop b s max ~prev_byte k =
|
|
if k > max then Buffer.contents b else
|
|
match s.[k] with
|
|
| ' ' | '\t' as prev_byte -> loop b s max ~prev_byte (k + 1)
|
|
| '_' | '-' as c ->
|
|
collapse_blanks b ~prev_byte;
|
|
Buffer.add_char b c;
|
|
loop b s max ~prev_byte:c (k + 1)
|
|
| c ->
|
|
let () = collapse_blanks b ~prev_byte in
|
|
let d = String.get_utf_8_uchar s k in
|
|
let u = Uchar.utf_decode_uchar d in
|
|
let u = match Uchar.to_int u with 0x0000 -> Uchar.rep | _ -> u in
|
|
let k' = k + Uchar.utf_decode_length d in
|
|
if Cmarkit_data.is_unicode_punctuation u
|
|
then loop b s max ~prev_byte:'\x00' k' else
|
|
let () = match Cmarkit_data.unicode_case_fold u with
|
|
| None -> Buffer.add_utf_8_uchar b u
|
|
| Some fold -> Buffer.add_string b fold
|
|
in
|
|
let prev_byte = s.[k] in
|
|
loop b s max ~prev_byte k'
|
|
in
|
|
loop b s (String.length s - 1) ~prev_byte:'\x00' 0
|
|
end
|
|
|
|
(* Blocks *)
|
|
|
|
module Block = struct
|
|
type t = ..
|
|
|
|
module Blank_line = struct
|
|
type t = Layout.blanks
|
|
end
|
|
|
|
module Block_quote = struct
|
|
type nonrec t = { indent : Layout.indent; block : t; }
|
|
let make ?(indent = 0) block = { indent; block }
|
|
let indent bq = bq.indent
|
|
let block bq = bq.block
|
|
end
|
|
|
|
module Code_block = struct
|
|
type fenced_layout =
|
|
{ indent : Layout.indent;
|
|
opening_fence : Layout.string node;
|
|
closing_fence : Layout.string node option; }
|
|
|
|
let default_fenced_layout =
|
|
{ indent = 0;
|
|
opening_fence = Layout.empty;
|
|
closing_fence = Some Layout.empty }
|
|
|
|
type layout = [ `Indented | `Fenced of fenced_layout ]
|
|
type t =
|
|
{ layout : layout;
|
|
info_string : string node option;
|
|
code : string node list; }
|
|
|
|
let make ?(layout = `Fenced default_fenced_layout) ?info_string code =
|
|
let layout = match info_string, layout with
|
|
| Some _, `Indented -> `Fenced default_fenced_layout
|
|
| _, layout -> layout
|
|
in
|
|
{ layout; info_string; code }
|
|
|
|
let layout cb = cb.layout
|
|
let info_string cb = cb.info_string
|
|
let code cb = cb.code
|
|
|
|
let make_fence cb =
|
|
let rec loop char counts = function
|
|
| [] -> counts
|
|
| (c, _) :: cs ->
|
|
let max = String.length c - 1 in
|
|
let k = ref 0 in
|
|
while (!k <= max && c.[!k] = char) do incr k done;
|
|
loop char (if !k <> 0 then !k :: counts else counts) cs
|
|
in
|
|
let char = match cb.info_string with
|
|
| Some (i, _) when String.exists (Char.equal '`') i -> '~'
|
|
| None | Some _ -> '`'
|
|
in
|
|
let counts = loop char [] cb.code in
|
|
char,
|
|
Inline.Code_span.min_backtick_count (* not char specific *) ~min:3 counts
|
|
|
|
let language_of_info_string s =
|
|
let rec next_white s max i =
|
|
if i > max || Ascii.is_white s.[i] then i else
|
|
next_white s max (i + 1)
|
|
in
|
|
if s = "" then None else
|
|
let max = String.length s - 1 in
|
|
let white = next_white s max 0 in
|
|
let rem_first = Match.first_non_blank s ~last:max ~start:white in
|
|
let lang = String.sub s 0 white in
|
|
if lang = "" then None else
|
|
Some (lang, String.sub s rem_first (max - rem_first + 1))
|
|
|
|
let is_math_block = function
|
|
| None -> false | Some (i, _) -> match language_of_info_string i with
|
|
| Some ("math", _) -> true
|
|
| Some _ | None -> false
|
|
end
|
|
|
|
module Heading = struct
|
|
type atx_layout =
|
|
{ indent : Layout.indent;
|
|
after_opening : Layout.blanks;
|
|
closing : Layout.string; }
|
|
|
|
let default_atx_layout = { indent = 0; after_opening = ""; closing = "" }
|
|
|
|
type setext_layout =
|
|
{ leading_indent : Layout.indent;
|
|
trailing_blanks : Layout.blanks;
|
|
underline_indent : Layout.indent;
|
|
underline_count : Layout.count node;
|
|
underline_blanks : Layout.blanks; }
|
|
|
|
type layout = [ `Atx of atx_layout | `Setext of setext_layout ]
|
|
type id = [ `Auto of string | `Id of string ]
|
|
type t = { layout : layout; level : int; inline : Inline.t; id : id option }
|
|
|
|
let make ?id ?(layout = `Atx default_atx_layout) ~level inline =
|
|
let max = match layout with `Atx _ -> 6 | `Setext _ -> 2 in
|
|
let level = Int.max 1 (Int.min level max) in
|
|
{layout; level; inline; id}
|
|
|
|
let layout h = h.layout
|
|
let level h = h.level
|
|
let inline h = h.inline
|
|
let id h = h.id
|
|
end
|
|
|
|
module Html_block = struct
|
|
type t = string node list
|
|
end
|
|
|
|
module List_item = struct
|
|
type block = t
|
|
type t =
|
|
{ before_marker : Layout.indent;
|
|
marker : Layout.string node;
|
|
after_marker : Layout.indent;
|
|
block : block;
|
|
ext_task_marker : Uchar.t node option }
|
|
|
|
let make
|
|
?(before_marker = 0) ?(marker = Layout.empty) ?(after_marker = 1)
|
|
?ext_task_marker block
|
|
=
|
|
{ before_marker; marker; after_marker; block; ext_task_marker }
|
|
|
|
let block i = i.block
|
|
let before_marker i = i.before_marker
|
|
let marker i = i.marker
|
|
let after_marker i = i.after_marker
|
|
let ext_task_marker i = i.ext_task_marker
|
|
let task_status_of_task_marker u = match Uchar.to_int u with
|
|
| 0x0020 -> `Unchecked
|
|
| 0x0078 (* x *) | 0x0058 (* X *) | 0x2713 (* ✓ *) | 0x2714 (* ✔ *)
|
|
| 0x10102 (* 𐄂 *) | 0x1F5F8 (* 🗸*) -> `Checked
|
|
| 0x007E (* ~ *) -> `Cancelled
|
|
| _ -> `Other u
|
|
end
|
|
|
|
module List' = struct
|
|
type type' = [ `Unordered of Layout.char | `Ordered of int * Layout.char ]
|
|
type t =
|
|
{ type' : type';
|
|
tight : bool;
|
|
items : List_item.t node list; }
|
|
|
|
let make ?(tight = true) type' items = { type'; tight; items }
|
|
|
|
let type' l = l.type'
|
|
let tight l = l.tight
|
|
let items l = l.items
|
|
end
|
|
|
|
module Paragraph = struct
|
|
type t =
|
|
{ leading_indent : Layout.indent;
|
|
inline : Inline.t;
|
|
trailing_blanks : Layout.blanks; }
|
|
|
|
let make ?(leading_indent = 0) ?(trailing_blanks = "") inline =
|
|
{ leading_indent; inline; trailing_blanks }
|
|
|
|
let inline p = p.inline
|
|
let leading_indent p = p.leading_indent
|
|
let trailing_blanks p = p.trailing_blanks
|
|
end
|
|
|
|
module Thematic_break = struct
|
|
type t = { indent : Layout.indent; layout : Layout.string }
|
|
let make ?(indent = 0) ?(layout = "---") () = { indent; layout }
|
|
let indent t = t.indent
|
|
let layout t = t.layout
|
|
end
|
|
|
|
type t +=
|
|
| Blank_line of Layout.blanks node
|
|
| Block_quote of Block_quote.t node
|
|
| Blocks of t list node
|
|
| Code_block of Code_block.t node
|
|
| Heading of Heading.t node
|
|
| Html_block of Html_block.t node
|
|
| Link_reference_definition of Link_definition.t node
|
|
| List of List'.t node
|
|
| Paragraph of Paragraph.t node
|
|
| Thematic_break of Thematic_break.t node
|
|
|
|
let empty = Blocks ([], Meta.none)
|
|
|
|
(* Extensions *)
|
|
|
|
module Table = struct
|
|
type align = [ `Left | `Center | `Right ]
|
|
type sep = align option * Layout.count
|
|
type cell_layout = Layout.blanks * Layout.blanks
|
|
type row =
|
|
[ `Header of (Inline.t * cell_layout) list
|
|
| `Sep of sep node list
|
|
| `Data of (Inline.t * cell_layout) list ]
|
|
|
|
type t =
|
|
{ indent : Layout.indent;
|
|
col_count : int;
|
|
rows : (row node * Layout.blanks) list }
|
|
|
|
let col_count rows =
|
|
let rec loop c = function
|
|
| (((`Header cols | `Data cols), _), _) :: rs ->
|
|
loop (Int.max (List.length cols) c) rs
|
|
| (((`Sep cols), _), _) :: rs ->
|
|
loop (Int.max (List.length cols) c) rs
|
|
| [] -> c
|
|
in
|
|
loop 0 rows
|
|
|
|
let make ?(indent = 0) rows = { indent; col_count = col_count rows; rows }
|
|
let indent t = t.indent
|
|
let col_count t = t.col_count
|
|
let rows t = t.rows
|
|
|
|
let parse_sep_row cs =
|
|
let rec loop acc = function
|
|
| [] -> Some (List.rev acc)
|
|
| (Inline.Text (s, meta), ("", "")) :: cs ->
|
|
if s = "" then None else
|
|
let max = String.length s - 1 in
|
|
let first_colon = s.[0] = ':' and last_colon = s.[max] = ':' in
|
|
let first = if first_colon then 1 else 0 in
|
|
let last = if last_colon then max - 1 else max in
|
|
begin
|
|
match
|
|
for i = first to last do if s.[i] <> '-' then raise Exit; done
|
|
with
|
|
| exception Exit -> None
|
|
| () ->
|
|
let count = last - first + 1 in
|
|
let sep = match first_colon, last_colon with
|
|
| false, false -> None
|
|
| true, true -> Some `Center
|
|
| true, false -> Some `Left
|
|
| false, true -> Some `Right
|
|
in
|
|
loop (((sep, count), meta) :: acc) cs
|
|
end
|
|
| _ -> None
|
|
in
|
|
loop [] cs
|
|
end
|
|
|
|
module Footnote = struct
|
|
type nonrec t =
|
|
{ indent : Layout.indent;
|
|
label : Label.t;
|
|
defined_label : Label.t option;
|
|
block : t }
|
|
|
|
let make ?(indent = 0) ?defined_label:d label block =
|
|
let defined_label = match d with None -> Some label | Some d -> d in
|
|
{ indent; label; defined_label; block }
|
|
|
|
let indent fn = fn.indent
|
|
let label fn = fn.label
|
|
let defined_label fn = fn.defined_label
|
|
let block fn = fn.block
|
|
|
|
type Label.def += Def of t node
|
|
let stub label defined_label =
|
|
Def ({ indent = 0; label; defined_label; block = empty}, Meta.none)
|
|
end
|
|
|
|
type t +=
|
|
| Ext_math_block of Code_block.t node
|
|
| Ext_table of Table.t node
|
|
| Ext_footnote_definition of Footnote.t node
|
|
|
|
(* Functions on blocks *)
|
|
|
|
let err_unknown = "Unknown Cmarkit.Block.t type extension"
|
|
|
|
let ext_none _ = invalid_arg err_unknown
|
|
let meta ?(ext = ext_none) = function
|
|
| Blank_line (_, m) | Block_quote (_, m) | Blocks (_, m) | Code_block (_, m)
|
|
| Heading (_, m) | Html_block (_, m) | Link_reference_definition (_, m)
|
|
| List (_, m) | Paragraph (_, m) | Thematic_break (_, m)
|
|
| Ext_math_block (_, m) | Ext_table (_, m)
|
|
| Ext_footnote_definition (_, m) -> m
|
|
| b -> ext b
|
|
|
|
let rec normalize ?(ext = ext_none) = function
|
|
| Blank_line _ | Code_block _ | Heading _ | Html_block _
|
|
| Link_reference_definition _ | Paragraph _ | Thematic_break _
|
|
| Blocks ([], _) | Ext_math_block _ | Ext_table _ as b -> b
|
|
| Block_quote (b, m) ->
|
|
let b = { b with block = normalize ~ext b.block } in
|
|
Block_quote (b, m)
|
|
| List (l, m) ->
|
|
let item (i, meta) =
|
|
let block = List_item.block i in
|
|
{ i with List_item.block = normalize ~ext block }, meta
|
|
in
|
|
List ({ l with items = List.map item l.items }, m)
|
|
| Blocks (b :: bs, m) ->
|
|
let rec loop acc = function
|
|
| Blocks (bs', m) :: bs -> loop acc (List.rev_append (List.rev bs') bs)
|
|
| b :: bs -> loop (normalize ~ext b :: acc) bs
|
|
| [] -> List.rev acc
|
|
in
|
|
let bs = loop [normalize ~ext b] bs in
|
|
(match bs with [b] -> b | _ -> Blocks (bs, m))
|
|
| Ext_footnote_definition (fn, m) ->
|
|
let fn = { fn with block = normalize ~ext fn.block } in
|
|
Ext_footnote_definition (fn, m)
|
|
| b -> ext b
|
|
|
|
let rec defs
|
|
?(ext = fun b defs -> invalid_arg err_unknown) ?(init = Label.Map.empty)
|
|
= function
|
|
| Blank_line _ | Code_block _ | Heading _ | Html_block _
|
|
| Paragraph _ | Thematic_break _
|
|
| Ext_math_block _ | Ext_table _ -> init
|
|
| Block_quote (b, _) -> defs ~ext ~init (Block_quote.block b)
|
|
| Blocks (bs, _) -> List.fold_left (fun init b -> defs ~ext ~init b) init bs
|
|
| List (l, _) ->
|
|
let add init (i, _) = defs ~ext ~init (List_item.block i) in
|
|
List.fold_left add init l.items
|
|
| Link_reference_definition ld ->
|
|
begin match Link_definition.defined_label (fst ld) with
|
|
| None -> init
|
|
| Some def ->
|
|
Label.Map.add (Label.key def) (Link_definition.Def ld) init
|
|
end
|
|
| Ext_footnote_definition fn ->
|
|
let init = match Footnote.defined_label (fst fn) with
|
|
| None -> init
|
|
| Some def -> Label.Map.add (Label.key def) (Footnote.Def fn) init
|
|
in
|
|
defs ~ext ~init (Footnote.block (fst fn))
|
|
| b -> ext init b
|
|
end
|
|
|
|
(* Parsing *)
|
|
|
|
(* Closer indexes.
|
|
|
|
They map closing delimiters to the position where they
|
|
start. Shortcuts forward searches in inline parsing. See
|
|
Inline_struct. *)
|
|
|
|
module Pos_set = Set.Make (Int) (* Sets of positions. *)
|
|
module Closer = struct
|
|
type t =
|
|
| Backticks of int (* run length *)
|
|
| Right_brack
|
|
| Right_paren (* Only for ruling out pathological cases. *)
|
|
| Emphasis_marks of char
|
|
| Strikethrough_marks
|
|
| Math_span_marks of int (* run length *)
|
|
|
|
let compare = Stdlib.compare
|
|
end
|
|
|
|
module Closer_index = struct
|
|
include Map.Make (Closer)
|
|
type nonrec t = Pos_set.t t
|
|
|
|
let add cl pos cidx =
|
|
let add = function
|
|
| None -> Some (Pos_set.singleton pos)
|
|
| Some occs -> Some (Pos_set.add pos occs)
|
|
in
|
|
update cl add cidx
|
|
|
|
let closer_pos cl ~after cidx = match find_opt cl cidx with
|
|
| None -> None
|
|
| Some occs -> Pos_set.find_first_opt (fun pos -> pos > after) occs
|
|
|
|
let closer_exists cl ~after cidx = match closer_pos cl ~after cidx with
|
|
| None -> false | Some _ -> true
|
|
end
|
|
|
|
(* Columns. That notion is needed to handle tab stops.
|
|
See https://spec.commonmark.org/current/#tabs *)
|
|
|
|
type col = int
|
|
let[@inline] next_tab_stop col = (col + 4) land (lnot 3)
|
|
|
|
(* Parser abstraction *)
|
|
|
|
type parser =
|
|
{ file : Textloc.fpath (* input file name *);
|
|
i : string (* input string *);
|
|
buf : Buffer.t (* scratch buffer. *);
|
|
exts : bool; (* parse extensions if [true]. *)
|
|
nolocs : bool; (* do not compute locations if [true]. *)
|
|
nolayout : bool; (* do not compute layout fields if [true]. *)
|
|
heading_auto_ids : bool; (* compute heading ids. *)
|
|
nested_links : bool;
|
|
mutable defs : Label.defs;
|
|
resolver : Label.resolver;
|
|
mutable cidx : Closer_index.t; (* For inline parsing. *)
|
|
(* Current line (only used during block parsing) *)
|
|
mutable current_line_pos : Textloc.line_pos;
|
|
mutable current_line_last_char :
|
|
(* first char of line - 1 on empty lines *) Textloc.byte_pos;
|
|
mutable current_char : Textloc.byte_pos;
|
|
mutable current_char_col : col;
|
|
mutable next_non_blank :
|
|
(* current_line_last_char + 1 if none. *) Textloc.byte_pos;
|
|
mutable next_non_blank_col : col;
|
|
mutable tab_consumed_cols :
|
|
(* number of cols consumed from the tab if i.[current_char] is '\t' *)
|
|
col; }
|
|
|
|
let parser
|
|
?(defs = Label.Map.empty) ?(resolver = Label.default_resolver)
|
|
?(nested_links = false) ?(heading_auto_ids = false) ?(layout = false)
|
|
?(locs = false) ?(file = Textloc.file_none) ~strict i
|
|
=
|
|
let nolocs = not locs and nolayout = not layout and exts = not strict in
|
|
{ file; i; buf = Buffer.create 512; exts; nolocs; nolayout;
|
|
heading_auto_ids; nested_links; defs; resolver; cidx = Closer_index.empty;
|
|
current_line_pos = 1, 0; current_line_last_char = -1; current_char = 0;
|
|
current_char_col = 0; next_non_blank = 0; next_non_blank_col = 0;
|
|
tab_consumed_cols = 0; }
|
|
|
|
let find_label_defining_key p key = match Label.Map.find_opt key p.defs with
|
|
| Some (Link_definition.Def ld) -> Link_definition.defined_label (fst ld)
|
|
| Some (Block.Footnote.Def fn) -> Block.Footnote.defined_label (fst fn)
|
|
| None -> None
|
|
| _ -> assert false
|
|
|
|
let set_label_def p l def = p.defs <- Label.Map.add (Label.key l) def p.defs
|
|
let def_label p l =
|
|
p.resolver (`Def (find_label_defining_key p (Label.key l), l))
|
|
|
|
let find_def_for_ref ~image p ref =
|
|
let kind = if image then `Image else `Link in
|
|
let def = find_label_defining_key p (Label.key ref) in
|
|
p.resolver (`Ref (kind, ref, def))
|
|
|
|
let debug_span p s = String.sub p.i s.first (s.last - s.first + 1)
|
|
let debug_line p =
|
|
let first = snd p.current_line_pos and last = p.current_line_last_char in
|
|
String.sub p.i first (last - first + 1)
|
|
|
|
let current_line_span p ~first ~last =
|
|
{ line_pos = p.current_line_pos; first; last }
|
|
|
|
(* Making metas and text locations. This is centralized here to be able
|
|
to disable their creation which has a non-negligible impact on
|
|
performance. *)
|
|
|
|
let meta p textloc = if p.nolocs then Meta.none else Meta.make ~textloc ()
|
|
|
|
let textloc_of_span p span =
|
|
if p.nolocs then Textloc.none else
|
|
let first_byte = span.first and last_byte = span.last in
|
|
let first_line = span.line_pos and last_line = span.line_pos in
|
|
Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line
|
|
|
|
let textloc_of_lines p ~first ~last ~first_line ~last_line =
|
|
if p.nolocs then Textloc.none else
|
|
let first_byte = first and first_line = first_line.line_pos in
|
|
let last_byte = last and last_line = last_line.line_pos in
|
|
Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line
|
|
|
|
let meta_of_spans p ~first:first_line ~last:last_line =
|
|
if p.nolocs then Meta.none else
|
|
let first = first_line.first and last = last_line.last in
|
|
meta p (textloc_of_lines p ~first ~last ~first_line ~last_line)
|
|
|
|
let meta_of_metas p ~first ~last =
|
|
if p.nolocs then Meta.none else
|
|
meta p (Textloc.span (Meta.textloc first) (Meta.textloc last))
|
|
|
|
let clean_raw_span ?pad p span =
|
|
Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last,
|
|
meta p (textloc_of_span p span)
|
|
|
|
let clean_unref_span p span =
|
|
Text.utf_8_clean_unref p.buf p.i ~first:span.first ~last:span.last,
|
|
meta p (textloc_of_span p span)
|
|
|
|
let clean_unesc_unref_span p span =
|
|
Text.utf_8_clean_unesc_unref p.buf p.i ~first:span.first ~last:span.last,
|
|
meta p (textloc_of_span p span)
|
|
|
|
let layout_clean_raw_span ?pad p span =
|
|
if p.nolayout then Layout.empty else clean_raw_span ?pad p span
|
|
|
|
let layout_clean_raw_span' ?pad p span =
|
|
(* Like [layout_raw_span] but no meta *)
|
|
if p.nolayout then "" else
|
|
Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last
|
|
|
|
let _tight_block_lines xxx_span p ~rev_spans =
|
|
let rec loop p acc = function
|
|
| [] -> acc
|
|
| [_, fst_line] -> ("", xxx_span p fst_line) :: acc
|
|
| (line_start, span) :: spans ->
|
|
let acc =
|
|
let layout =
|
|
if p.nolayout || span.first <= line_start then "" else
|
|
Text.utf_8_clean_raw p.buf p.i ~first:line_start
|
|
~last:(span.first - 1)
|
|
in
|
|
(layout, xxx_span p span) :: acc
|
|
in
|
|
loop p acc spans
|
|
in
|
|
loop p [] rev_spans
|
|
|
|
let tight_block_lines p ~rev_spans =
|
|
_tight_block_lines clean_unesc_unref_span p ~rev_spans
|
|
|
|
let raw_tight_block_lines p ~rev_spans =
|
|
_tight_block_lines clean_raw_span p ~rev_spans
|
|
|
|
let first_non_blank_in_span p s = Match.first_non_blank_in_span p.i s
|
|
let first_non_blank_over_nl ~next_line p lines line ~start =
|
|
match Match.first_non_blank_over_nl ~next_line p.i lines ~line ~start with
|
|
| `None -> None
|
|
| `This_line non_blank ->
|
|
let layout =
|
|
if non_blank = start then [] else
|
|
[clean_raw_span p { line with first = start ; last = non_blank - 1}]
|
|
in
|
|
Some (lines, line, layout, non_blank)
|
|
| `Next_line (lines, newline, non_blank) ->
|
|
let first_layout = clean_raw_span p { line with first = start } in
|
|
let next_layout = clean_raw_span p { newline with last = non_blank -1 } in
|
|
let layout = [first_layout; next_layout] in
|
|
Some (lines, newline, layout, non_blank)
|
|
|
|
(* Inline structure parsing *)
|
|
|
|
module Inline_struct = struct
|
|
|
|
(* Tokens for parsing inlines.
|
|
|
|
The list of tokens of a paragraph are the points to consider to
|
|
parse it into inlines. Tokens gradually become [Inline] tokens
|
|
containing parsed inlines. Between two tokens there is implicit
|
|
textual data. This data gradually becomes part of [Inline] tokens
|
|
or, at the end of of the parsing process, becomes [Text] inlines.
|
|
|
|
The token list also represents newlines explicitly, either via
|
|
the [Newline] token or via the [Inline] token since inlines may
|
|
start on a line and up on another one. *)
|
|
|
|
type emphasis_marks =
|
|
{ start : byte_pos;
|
|
char : char;
|
|
count : int;
|
|
may_open : bool;
|
|
may_close : bool }
|
|
|
|
type strikethrough_marks =
|
|
{ start : byte_pos;
|
|
may_open : bool;
|
|
may_close : bool }
|
|
|
|
type math_span_marks =
|
|
{ start : byte_pos;
|
|
count : int;
|
|
may_open : bool;
|
|
may_close : bool; }
|
|
|
|
type token =
|
|
| Autolink_or_html_start of { start : byte_pos }
|
|
| Backticks of
|
|
{ start : byte_pos;
|
|
count : int;
|
|
escaped : bool }
|
|
| Emphasis_marks of emphasis_marks
|
|
| Inline of
|
|
{ start : byte_pos;
|
|
inline : Inline.t;
|
|
endline : line_span;
|
|
next : byte_pos }
|
|
| Link_start of
|
|
{ start : byte_pos;
|
|
image : bool }
|
|
| Newline of
|
|
{ start : (* points on spaces or \ on the broken line *) byte_pos;
|
|
break_type : Inline.Break.type';
|
|
newline : line_span; }
|
|
| Right_brack of { start : byte_pos }
|
|
| Right_paren of { start : byte_pos } (* Only used for closer index *)
|
|
| Strikethrough_marks of strikethrough_marks
|
|
| Math_span_marks of math_span_marks
|
|
|
|
let token_start = function
|
|
| Autolink_or_html_start { start } | Backticks { start }
|
|
| Emphasis_marks { start } | Inline { start } -> start | Link_start { start }
|
|
| Newline { start } | Right_brack { start } -> start
|
|
| Right_paren { start } -> start
|
|
| Strikethrough_marks { start } -> start
|
|
| Math_span_marks { start } -> start
|
|
|
|
let has_backticks ~count ~after cidx =
|
|
Closer_index.closer_exists (Closer.Backticks count) ~after cidx
|
|
|
|
let has_right_brack ~after cidx =
|
|
Closer_index.closer_exists Closer.Right_brack ~after cidx
|
|
|
|
let has_right_paren ~after cidx =
|
|
Closer_index.closer_exists Closer.Right_paren ~after cidx
|
|
|
|
let emphasis_closer_pos ~char ~after cidx =
|
|
Closer_index.closer_pos (Closer.Emphasis_marks char) ~after cidx
|
|
|
|
let has_emphasis_closer ~char ~after cidx =
|
|
Closer_index.closer_exists (Closer.Emphasis_marks char) ~after cidx
|
|
|
|
let has_strikethrough_closer ~after cidx =
|
|
Closer_index.closer_exists Closer.Strikethrough_marks ~after cidx
|
|
|
|
let has_math_span_closer ~count ~after cidx =
|
|
Closer_index.closer_exists (Closer.Math_span_marks count) ~after cidx
|
|
|
|
let rev_token_list_and_make_closer_index toks =
|
|
let rec loop cidx acc = function
|
|
| Backticks { start; count; _ } as t :: toks ->
|
|
let cidx = Closer_index.add (Closer.Backticks count) start cidx in
|
|
loop cidx (t :: acc) toks
|
|
| Right_brack { start } as t :: toks ->
|
|
let cidx = Closer_index.add Closer.Right_brack start cidx in
|
|
loop cidx (t :: acc) toks
|
|
| Right_paren { start } :: toks ->
|
|
let cidx = Closer_index.add Closer.Right_paren start cidx in
|
|
loop cidx (* we don't use the token for parsing *) acc toks
|
|
| Emphasis_marks { start; char; may_close = true } as t :: toks ->
|
|
let cidx = Closer_index.add (Closer.Emphasis_marks char) start cidx in
|
|
loop cidx (t :: acc) toks
|
|
| Strikethrough_marks { start; may_close = true } as t :: toks ->
|
|
let cidx = Closer_index.add Closer.Strikethrough_marks start cidx in
|
|
loop cidx (t :: acc) toks
|
|
| Math_span_marks { start; count; may_close = true } as t :: toks ->
|
|
let cidx = Closer_index.add (Closer.Math_span_marks count) start cidx in
|
|
loop cidx (t :: acc) toks
|
|
| t :: toks -> loop cidx (t :: acc) toks
|
|
| [] -> cidx, acc
|
|
in
|
|
loop Closer_index.empty [] toks
|
|
|
|
let rec rev_tokens_and_shorten_last_line ~to_last:last acc = function
|
|
(* Used to make the text delimitation precise for nested inlines *)
|
|
| Newline ({ newline; _ } as nl) :: toks ->
|
|
let t = Newline { nl with newline = { newline with last }} in
|
|
List.rev_append toks (t :: acc)
|
|
| Inline ({ endline; _ } as i) :: toks ->
|
|
let t = Inline { i with endline = { endline with last }} in
|
|
List.rev_append toks (t :: acc)
|
|
| t :: toks -> rev_tokens_and_shorten_last_line ~to_last:last (t :: acc) toks
|
|
| [] -> acc
|
|
|
|
let rec drop_stop_after_right_brack = function
|
|
| Right_brack _ :: toks -> toks
|
|
| _ :: toks -> drop_stop_after_right_brack toks
|
|
| [] -> []
|
|
|
|
let rec drop_until ~start = function
|
|
| t :: toks when token_start t < start -> drop_until ~start toks
|
|
| toks -> toks
|
|
|
|
let rec next_line = function
|
|
(* N.B. when we use this function considering Inline tokens is not needed. *)
|
|
| [] -> None
|
|
| Newline { newline; _ } :: toks -> Some (toks, newline)
|
|
| _ :: toks -> next_line toks
|
|
|
|
(* Tokenization *)
|
|
|
|
let newline_token s prev_line newline =
|
|
(* https://spec.commonmark.org/current/#softbreak *)
|
|
(* https://spec.commonmark.org/current/#hard-line-breaks *)
|
|
let start (* includes spaces or '\\' on prev line *), break_type =
|
|
let first = prev_line.first and last = prev_line.last in
|
|
let non_space = Match.rev_drop_spaces s ~first ~start:last in
|
|
if non_space = last && s.[non_space] = '\\' then (non_space, `Hard) else
|
|
let start = non_space + 1 in
|
|
(start, if last - start + 1 >= 2 then `Hard else `Soft)
|
|
in
|
|
Newline { start; break_type; newline }
|
|
|
|
let add_backtick_token acc s line ~prev_bslash ~start =
|
|
let last = Match.run_of ~char:'`' s ~last:line.last ~start:(start + 1) in
|
|
let count = last - start + 1 and escaped = prev_bslash in
|
|
Backticks {start; count; escaped} :: acc, last + 1
|
|
|
|
let try_add_image_link_start_token acc s line ~start =
|
|
let next = start + 1 in
|
|
if next > line.last || s.[next] <> '[' then acc, next else
|
|
Link_start { start; image = true } :: acc, next + 1
|
|
|
|
let try_add_emphasis_token acc s line ~start =
|
|
let first = line.first and last = line.last and char = s.[start] in
|
|
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
|
|
let count = run_last - start + 1 in
|
|
let prev_uchar = Match.prev_uchar s ~first ~before:start in
|
|
let next_uchar = Match.next_uchar s ~last ~after:run_last in
|
|
let prev_white = Cmarkit_data.is_unicode_whitespace prev_uchar in
|
|
let next_white = Cmarkit_data.is_unicode_whitespace next_uchar in
|
|
let prev_punct = Cmarkit_data.is_unicode_punctuation prev_uchar in
|
|
let next_punct = Cmarkit_data.is_unicode_punctuation next_uchar in
|
|
let is_left_flanking =
|
|
not next_white && (not next_punct || (prev_white || prev_punct))
|
|
in
|
|
let is_right_flanking =
|
|
not prev_white && (not prev_punct || (next_white || next_punct))
|
|
in
|
|
let next = run_last + 1 in
|
|
if not is_left_flanking && not is_right_flanking then acc, next else
|
|
let may_open =
|
|
(char = '*' && is_left_flanking) ||
|
|
(char = '_' && is_left_flanking && (not is_right_flanking || prev_punct))
|
|
in
|
|
let may_close =
|
|
(char = '*' && is_right_flanking) ||
|
|
(char = '_' && is_right_flanking && (not is_left_flanking || next_punct))
|
|
in
|
|
if not may_open && not may_close then acc, next else
|
|
Emphasis_marks { start; char; count; may_open; may_close } :: acc, next
|
|
|
|
let try_add_strikethrough_marks_token acc s line ~start =
|
|
let first = line.first and last = line.last and char = s.[start] in
|
|
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
|
|
let count = run_last - start + 1 in
|
|
let next = run_last + 1 in
|
|
if count <> 2 then acc, next else
|
|
let prev_uchar = Match.prev_uchar s ~first ~before:start in
|
|
let next_uchar = Match.next_uchar s ~last ~after:run_last in
|
|
let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in
|
|
let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in
|
|
if not may_open && not may_close then acc, next else
|
|
Strikethrough_marks { start; may_open; may_close } :: acc, next
|
|
|
|
let try_add_math_span_marks_token acc s line ~start =
|
|
let first = line.first and last = line.last and char = s.[start] in
|
|
let run_last = Match.run_of ~char ~last s ~start:(start + 1) in
|
|
let count = run_last - start + 1 in
|
|
let next = run_last + 1 in
|
|
if count > 2 then acc, next else
|
|
let may_open, may_close =
|
|
if count <> 1 then true, true else
|
|
let prev_uchar = Match.prev_uchar s ~first ~before:start in
|
|
let next_uchar = Match.next_uchar s ~last ~after:run_last in
|
|
let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in
|
|
let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in
|
|
may_open, may_close
|
|
in
|
|
if not may_open && not may_close then acc, next else
|
|
Math_span_marks { start; count; may_open; may_close } :: acc, next
|
|
|
|
let tokenize ~exts s lines =
|
|
(* For inlines this is where we conditionalize for extensions. All code
|
|
paths after that no longer check for p.exts: there just won't be
|
|
extension data to process if [exts] was not [true] here. *)
|
|
let rec loop ~exts s lines line ~prev_bslash acc k =
|
|
if k > line.last then match lines with
|
|
| [] -> rev_token_list_and_make_closer_index acc
|
|
| newline :: lines ->
|
|
let t = newline_token s line newline in
|
|
loop ~exts s lines newline ~prev_bslash:false (t :: acc) newline.first
|
|
else
|
|
if s.[k] = '\\'
|
|
then loop ~exts s lines line ~prev_bslash:(not prev_bslash) acc (k+1) else
|
|
let acc, next = match s.[k] with
|
|
| '`' -> add_backtick_token acc s line ~prev_bslash ~start:k
|
|
| c when prev_bslash -> acc, k + 1
|
|
| '*' | '_' -> try_add_emphasis_token acc s line ~start:k
|
|
| ']' -> Right_brack { start = k } :: acc, k + 1
|
|
| '[' -> Link_start { start = k; image = false } :: acc, k + 1
|
|
| '!' -> try_add_image_link_start_token acc s line ~start:k
|
|
| '<' -> Autolink_or_html_start { start = k } :: acc, k + 1
|
|
| ')' -> Right_paren { start = k } :: acc, k + 1
|
|
| '~' when exts -> try_add_strikethrough_marks_token acc s line ~start:k
|
|
| '$' when exts -> try_add_math_span_marks_token acc s line ~start:k
|
|
| _ -> acc, k + 1
|
|
in
|
|
loop ~exts s lines line ~prev_bslash:false acc next
|
|
in
|
|
let line = List.hd lines and lines = List.tl lines in
|
|
let cidx, toks = loop ~exts s lines line ~prev_bslash:false [] line.first in
|
|
cidx, toks, line
|
|
|
|
(* Making inlines and inline tokens *)
|
|
|
|
let break_inline p line ~start ~break_type:type' ~newline =
|
|
let layout_before = { line with first = start } in
|
|
let layout_after =
|
|
let non_blank = first_non_blank_in_span p newline in
|
|
{ newline with last = non_blank - 1 }
|
|
in
|
|
let m = meta_of_spans p ~first:layout_before ~last:layout_after in
|
|
let layout_before = layout_clean_raw_span p layout_before in
|
|
let layout_after = layout_clean_raw_span p layout_after in
|
|
Inline.Break ({ layout_before; type'; layout_after }, m)
|
|
|
|
let try_add_text_inline p line ~first ~last acc =
|
|
if first > last then acc else
|
|
let first = match first = line.first with
|
|
| true -> first_non_blank_in_span p line (* strip leading blanks *)
|
|
| false -> first
|
|
in
|
|
Inline.Text (clean_unesc_unref_span p { line with first; last }) :: acc
|
|
|
|
let inlines_inline p ~first ~last ~first_line ~last_line = function
|
|
| [i] -> i
|
|
| is ->
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
Inline.Inlines (is, meta p textloc)
|
|
|
|
let code_span_token p ~count ~first ~last ~first_line ~last_line rev_spans =
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
let code_layout = raw_tight_block_lines p ~rev_spans in
|
|
let meta = meta p textloc in
|
|
let cs = Inline.Code_span ({ backtick_count = count; code_layout }, meta) in
|
|
Inline { start = first; inline = cs; endline = last_line; next = last + 1 }
|
|
|
|
let autolink_token p line ~first ~last ~is_email =
|
|
let meta = meta p (textloc_of_span p { line with first; last }) in
|
|
let link = { line with first = first + 1; last = last - 1 } in
|
|
let link = clean_unref_span p link in
|
|
let inline = Inline.Autolink ({ is_email; link }, meta) in
|
|
Inline { start = first; inline; endline = line; next = last + 1 }
|
|
|
|
let raw_html_token p ~first ~last ~first_line ~last_line rev_spans =
|
|
let raw = raw_tight_block_lines p ~rev_spans in
|
|
let textloc =
|
|
let first = Meta.textloc (snd (snd (List.hd raw))) in
|
|
let last = snd (List.hd rev_spans) in
|
|
let last_byte = last.last and last_line = last.line_pos in
|
|
Textloc.set_last first ~last_byte ~last_line
|
|
in
|
|
let inline = Inline.Raw_html (raw, meta p textloc) in
|
|
Inline { start = first; inline; endline = last_line; next = last + 1 }
|
|
|
|
let link_token p ~first ~last ~first_line ~last_line ~image link =
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
let link = link, meta p textloc in
|
|
let inline = if image then Inline.Image link else Inline.Link link in
|
|
Inline { start = first; inline; endline = last_line; next = last + 1 }
|
|
|
|
let emphasis_token p ~first ~last ~first_line ~last_line ~strong emph =
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
let delim = p.i.[first] in
|
|
let e = { Inline.Emphasis.delim; inline = emph}, meta p textloc in
|
|
let i = if strong then Inline.Strong_emphasis e else Inline.Emphasis e in
|
|
Inline { start = first; inline = i ; endline = last_line; next = last + 1 }
|
|
|
|
let ext_strikethrough_token p ~first ~last ~first_line ~last_line s =
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
let inline = Inline.Ext_strikethrough (s, meta p textloc) in
|
|
Inline { start = first; inline; endline = last_line; next = last + 1 }
|
|
|
|
let ext_math_span_token p ~count ~first ~last ~first_line ~last_line rspans =
|
|
let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in
|
|
let tex_layout = raw_tight_block_lines p ~rev_spans:rspans in
|
|
let meta = meta p textloc in
|
|
let ms = Inline.Math_span.make ~display:(count = 2) tex_layout in
|
|
let inline = Inline.Ext_math_span (ms, meta) in
|
|
Inline { start = first; inline; endline = last_line; next = last + 1 }
|
|
|
|
(* Parsers *)
|
|
|
|
let try_code p toks start_line ~start:cstart ~count ~escaped =
|
|
(* https://spec.commonmark.org/current/#code-span *)
|
|
if escaped || not (has_backticks ~count ~after:cstart p.cidx) then None else
|
|
let rec match_backticks toks line ~count spans k = match toks with
|
|
| [] -> None
|
|
| Backticks { start; count = c; _ } :: toks ->
|
|
if c <> count then match_backticks toks line ~count spans k else
|
|
let span = line.first, { line with first = k; last = start - 1} in
|
|
let spans = span :: spans in
|
|
let first = cstart and last = start + count - 1 in
|
|
let first_line = start_line and last_line = line in
|
|
let t =
|
|
code_span_token p ~count ~first ~last ~first_line ~last_line spans
|
|
in
|
|
Some (toks, line, t)
|
|
| Newline { newline } :: toks ->
|
|
let spans = (line.first, { line with first = k }) :: spans in
|
|
let k = first_non_blank_in_span p newline in
|
|
match_backticks toks newline ~count spans k
|
|
| _ :: toks -> match_backticks toks line ~count spans k
|
|
in
|
|
let first = cstart + count in
|
|
match_backticks toks { start_line with first } ~count [] first
|
|
|
|
let try_math_span p toks start_line ~start:cstart ~count =
|
|
if not (has_math_span_closer ~count ~after:cstart p.cidx) then None else
|
|
let rec match_math_marks toks line ~count spans k = match toks with
|
|
| [] -> None
|
|
| Math_span_marks { start; count = c; may_close; _ } :: toks ->
|
|
if c <> count || not may_close
|
|
then match_math_marks toks line ~count spans k else
|
|
let span = line.first, { line with first = k; last = start - 1 } in
|
|
let spans = span :: spans in
|
|
let first = cstart and last = start + count - 1 in
|
|
let first_line = start_line and last_line = line in
|
|
let t =
|
|
ext_math_span_token p ~count ~first ~last ~first_line ~last_line
|
|
spans
|
|
in
|
|
Some (toks, line, t)
|
|
| Newline { newline } :: toks ->
|
|
let spans = (line.first, { line with first = k }) :: spans in
|
|
let k = first_non_blank_in_span p newline in
|
|
match_math_marks toks newline ~count spans k
|
|
| _ :: toks -> match_math_marks toks line ~count spans k
|
|
in
|
|
let first = cstart + count in
|
|
match_math_marks toks { start_line with first } ~count [] first
|
|
|
|
let try_autolink_or_html p toks line ~start =
|
|
match Match.autolink_uri p.i ~last:line.last ~start with
|
|
| Some last ->
|
|
let t = autolink_token p line ~first:start ~last ~is_email:false in
|
|
let toks = drop_until ~start:(last + 1) toks in
|
|
Some (toks, line, t)
|
|
| None ->
|
|
match Match.autolink_email p.i ~last:line.last ~start with
|
|
| Some last ->
|
|
let t = autolink_token p line ~first:start ~last ~is_email:true in
|
|
let toks = drop_until ~start:(last + 1) toks in
|
|
Some (toks, line, t)
|
|
| None ->
|
|
match Match.raw_html ~next_line p.i toks ~line ~start with
|
|
| None -> None
|
|
| Some (toks, last_line, spans, last) ->
|
|
let first = start and first_line = line in
|
|
let t = raw_html_token p ~first ~last ~first_line ~last_line spans in
|
|
let toks = drop_until ~start:(last + 1) toks in
|
|
Some (toks, last_line, t)
|
|
|
|
let label_of_rev_spans p ~key rev_spans =
|
|
let meta =
|
|
if p.nolocs || rev_spans = [] then Meta.none else
|
|
let first = snd (List.hd (List.rev rev_spans)) in
|
|
let last = snd (List.hd rev_spans) in
|
|
meta_of_spans p ~first ~last
|
|
in
|
|
let text = tight_block_lines p ~rev_spans in
|
|
{ Label.meta; key; text }
|
|
|
|
let try_full_reflink_remainder p toks line ~image ~start (* is label's [ *) =
|
|
(* https://spec.commonmark.org/current/#full-reference-link *)
|
|
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
|
|
| None -> None
|
|
| Some (toks, line, rev_spans, last, key) ->
|
|
let ref = label_of_rev_spans p ~key rev_spans in
|
|
let toks = drop_stop_after_right_brack toks in
|
|
match find_def_for_ref p ~image ref with
|
|
| None -> Some None
|
|
| Some def -> Some (Some (toks, line, `Ref (`Full, ref, def), last))
|
|
|
|
let try_shortcut_reflink p toks line ~image ~start (* is starting [ or ! *) =
|
|
(* https://spec.commonmark.org/current/#shortcut-reference-link *)
|
|
let start = if image then start + 1 (* [ *) else start in
|
|
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
|
|
| None -> None
|
|
| Some (toks, line, rev_spans, last, key) ->
|
|
let ref = label_of_rev_spans p ~key rev_spans in
|
|
let toks = drop_stop_after_right_brack toks in
|
|
match find_def_for_ref p ~image ref with
|
|
| None -> None
|
|
| Some def -> Some (toks, line, `Ref (`Shortcut, ref, def), last)
|
|
|
|
let try_collapsed_reflink p toks line ~image ~start (* is starting [ or ! *) =
|
|
(* https://spec.commonmark.org/current/#collapsed-reference-link *)
|
|
let start = if image then start + 1 (* [ *) else start in
|
|
match Match.link_label p.buf ~next_line p.i toks ~line ~start with
|
|
| None -> None
|
|
| Some (toks, line, rev_spans, last, key) ->
|
|
let ref = label_of_rev_spans p ~key rev_spans in
|
|
let last = last + 2 in (* adjust for ][] *)
|
|
let toks = drop_stop_after_right_brack toks in
|
|
let toks = drop_stop_after_right_brack toks in
|
|
match find_def_for_ref p ~image ref with
|
|
| None -> None
|
|
| Some def -> Some (toks, line, `Ref (`Collapsed, ref, def), last)
|
|
|
|
let try_inline_link_remainder p toks start_line ~image ~start:st (* is ( *) =
|
|
(* https://spec.commonmark.org/current/#inline-link *)
|
|
if not (has_right_paren ~after:st p.cidx) then None else
|
|
let first_non_blank_over_nl = first_non_blank_over_nl ~next_line in
|
|
match first_non_blank_over_nl p toks start_line ~start:(st + 1) with
|
|
| None -> None
|
|
| Some (toks, line, before_dest, start) ->
|
|
let toks, line, angled_dest, dest, start =
|
|
match Match.link_destination p.i ~last:line.last ~start with
|
|
| None -> toks, line, false, None, start
|
|
| Some (angled, first, last) ->
|
|
let dest = clean_unesc_unref_span p { line with first; last } in
|
|
let next = if angled then last + 2 else last + 1 in
|
|
toks, line, angled, Some dest, next
|
|
in
|
|
let toks, line, after_dest, title_open_delim, title, start =
|
|
match first_non_blank_over_nl p toks line ~start with
|
|
| None ->
|
|
toks, line, [], '\"', None, start
|
|
| Some (_, _, _, start') when start' = start ->
|
|
toks, line, [], '\"', None, start
|
|
| Some (toks, line, after_destination, start) ->
|
|
match Match.link_title ~next_line p.i toks ~line ~start with
|
|
| None -> toks, line, after_destination, '\"', None, start
|
|
| Some (toks, line, rev_spans, last) ->
|
|
let title = tight_block_lines p ~rev_spans in
|
|
toks, line, after_destination, p.i.[start],
|
|
Some title, last + 1
|
|
in
|
|
let toks, line, after_title, last =
|
|
match first_non_blank_over_nl p toks line ~start with
|
|
| None -> toks, line, [], start
|
|
| Some (toks, line, after_title, start as v) -> v
|
|
in
|
|
if last > line.last || p.i.[last] <> ')' then None else
|
|
let layout =
|
|
{ Link_definition.indent = 0; angled_dest; before_dest;
|
|
after_dest; title_open_delim; after_title; }
|
|
in
|
|
let label = None and defined_label = None in
|
|
let ld = { Link_definition.layout; label; defined_label; dest; title }in
|
|
let textloc =
|
|
let first = st and last = start in
|
|
textloc_of_lines p ~first ~last ~first_line:start_line ~last_line:line
|
|
in
|
|
let ld = (ld, meta p textloc) in
|
|
let toks = drop_until ~start:(last + 1) toks in
|
|
Some (toks, line, `Inline ld, last)
|
|
|
|
let find_link_text_tokens p toks start_line ~start =
|
|
(* XXX The repetition with first_pass is annoying here.
|
|
we should figure out something for that not to happen. *)
|
|
(* https://spec.commonmark.org/current/#link-text *)
|
|
let rec loop toks line nest acc = match toks with
|
|
| Right_brack { start = last } :: toks when nest = 0 ->
|
|
let acc = rev_tokens_and_shorten_last_line ~to_last:(last - 1) [] acc in
|
|
Some (toks, line, acc, last)
|
|
| Backticks { start; count; escaped } :: toks ->
|
|
begin match try_code p toks line ~start ~count ~escaped with
|
|
| None -> loop toks line nest acc
|
|
| Some (toks, line, t) -> loop toks line nest (t :: acc)
|
|
end
|
|
| Math_span_marks { start; count; may_open; } :: toks ->
|
|
if not may_open then loop toks line nest acc else
|
|
begin match try_math_span p toks line ~start ~count with
|
|
| None -> loop toks line nest acc
|
|
| Some (toks, line, t) -> loop toks line nest (t :: acc)
|
|
end
|
|
| Autolink_or_html_start { start } :: toks ->
|
|
begin match try_autolink_or_html p toks line ~start with
|
|
| None -> loop toks line nest acc
|
|
| Some (toks, line, t) -> loop toks line nest (t :: acc)
|
|
end
|
|
| Right_brack _ as t :: toks -> loop toks line (nest - 1) (t :: acc)
|
|
| Link_start _ as t :: toks -> loop toks line (nest + 1) (t :: acc)
|
|
| Newline { newline; _ } as t :: toks -> loop toks newline nest (t :: acc)
|
|
| Inline { endline; _ } as t :: toks -> loop toks endline nest (t :: acc)
|
|
| t :: toks -> loop toks line nest (t :: acc)
|
|
| [] -> None
|
|
in
|
|
loop toks start_line 0 []
|
|
|
|
let try_link_def
|
|
p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text
|
|
=
|
|
let next = text_last + 1 in
|
|
let link =
|
|
if next > line.last
|
|
then try_shortcut_reflink p start_toks start_line ~image ~start else
|
|
match p.i.[next] with
|
|
| '(' ->
|
|
(match try_inline_link_remainder p toks line ~image ~start:next with
|
|
| None -> try_shortcut_reflink p start_toks start_line ~image ~start
|
|
| Some _ as v -> v)
|
|
| '[' ->
|
|
let next' = next + 1 in
|
|
if next' <= line.last && p.i.[next'] = ']'
|
|
then try_collapsed_reflink p start_toks start_line ~image ~start else
|
|
let r = try_full_reflink_remainder p toks line ~image ~start:next in
|
|
begin match r with
|
|
| None -> try_shortcut_reflink p start_toks start_line ~image ~start
|
|
| Some None -> None (* Example 570 *)
|
|
| Some (Some _ as v) -> v
|
|
end
|
|
| c ->
|
|
try_shortcut_reflink p start_toks start_line ~image ~start
|
|
in
|
|
match link with
|
|
| None -> None
|
|
| Some (toks, endline, reference, last) ->
|
|
let first = start in
|
|
let text =
|
|
let first_line = start_line and last_line = line in
|
|
inlines_inline p text ~first ~last:text_last ~first_line ~last_line
|
|
in
|
|
let link = { Inline.Link.text; reference } in
|
|
let first_line = start_line and last_line = endline in
|
|
let t = link_token p ~image ~first ~last ~first_line ~last_line link in
|
|
let had_link = not image && not p.nested_links in
|
|
Some (toks, endline, t, had_link)
|
|
|
|
(* The following sequence of mutually recursive functions define
|
|
inline parsing. We have three passes over a paragraph's token
|
|
list see the [parse_tokens] function below. *)
|
|
|
|
let rec try_link p start_toks start_line ~image ~start =
|
|
if not (has_right_brack ~after:start p.cidx) then None else
|
|
match find_link_text_tokens p start_toks start_line ~start with
|
|
| None -> None
|
|
| Some (toks, line, text_toks, text_last (* with ] delim *)) ->
|
|
let text, had_link =
|
|
let text_start =
|
|
let first = start + (if image then 2 else 1) in
|
|
let last =
|
|
if start_line == line then text_last - 1 else start_line.last
|
|
in
|
|
{ start_line with first; last }
|
|
in
|
|
parse_tokens p text_toks text_start
|
|
in
|
|
if had_link && not image
|
|
then None (* Could try to keep render *) else
|
|
try_link_def
|
|
p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text
|
|
|
|
and first_pass p toks line =
|
|
(* Parse inline atoms and links. Links are parsed here otherwise
|
|
link reference data gets parsed as atoms. *)
|
|
let rec loop p toks line ~had_link acc = match toks with
|
|
| [] -> List.rev acc, had_link
|
|
| Backticks { start; count; escaped } :: toks ->
|
|
begin match try_code p toks line ~start ~count ~escaped with
|
|
| None -> loop p toks line ~had_link acc
|
|
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
|
|
end
|
|
| Math_span_marks { start; count; may_open; } :: toks ->
|
|
if not may_open then loop p toks line ~had_link acc else
|
|
begin match try_math_span p toks line ~start ~count with
|
|
| None -> loop p toks line ~had_link acc
|
|
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
|
|
end
|
|
| Autolink_or_html_start { start } :: toks ->
|
|
begin match try_autolink_or_html p toks line ~start with
|
|
| None -> loop p toks line ~had_link acc
|
|
| Some (toks, line, t) -> loop p toks line ~had_link (t :: acc)
|
|
end
|
|
| Link_start { start; image } :: toks ->
|
|
begin match try_link p toks line ~image ~start with
|
|
| None -> loop p toks line ~had_link acc
|
|
| Some (toks, line, t, had_link) ->
|
|
loop p toks line ~had_link (t :: acc)
|
|
end
|
|
| Right_brack start :: toks -> loop p toks line ~had_link acc
|
|
| Newline { newline = l } as t :: toks -> loop p toks l ~had_link (t :: acc)
|
|
| t :: toks -> loop p toks line ~had_link (t :: acc)
|
|
in
|
|
loop p toks line ~had_link:false []
|
|
|
|
(* Second pass *)
|
|
|
|
and find_emphasis_text p toks line ~opener =
|
|
let marks_match ~marks ~opener =
|
|
(opener.char = marks.char) &&
|
|
(not (marks.may_open || opener.may_close) ||
|
|
marks.count mod 3 = 0 || (opener.count + marks.count) mod 3 != 0)
|
|
in
|
|
let marks_has_precedence p ~marks ~opener =
|
|
if marks.char = opener.char (* Rule 16 *) then true else (* Rule 15 *)
|
|
emphasis_closer_pos ~char:marks.char ~after:marks.start p.cidx <
|
|
emphasis_closer_pos ~char:opener.char ~after:marks.start p.cidx
|
|
in
|
|
let rec loop p toks line acc ~opener = match toks with
|
|
| [] -> Either.Left (List.rev acc) (* No match but keep nested work done *)
|
|
| Emphasis_marks marks as t :: toks ->
|
|
let after = marks.start in
|
|
if marks.may_close && marks_match ~marks ~opener then
|
|
let used = if marks.count >= 2 && opener.count >= 2 then 2 else 1 in
|
|
let to_last = marks.start - 1 in
|
|
let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in
|
|
Either.Right (toks, line, used, acc, marks)
|
|
else if marks.may_open && marks_has_precedence p ~marks ~opener then
|
|
match try_emphasis p toks line ~opener:marks with
|
|
| Either.Left toks -> loop p toks line acc ~opener
|
|
| Either.Right (toks, line) -> loop p toks line acc ~opener
|
|
else if has_emphasis_closer ~char:opener.char ~after p.cidx then
|
|
loop p toks line (t :: acc) ~opener
|
|
else (Either.Left (List.rev_append (t :: acc) toks))
|
|
| Newline { newline = l } as t :: toks -> loop p toks l (t :: acc) ~opener
|
|
| Inline { endline = l } as t :: toks -> loop p toks l (t :: acc) ~opener
|
|
| t :: toks -> loop p toks line (t :: acc) ~opener
|
|
in
|
|
loop p toks line [] ~opener
|
|
|
|
and try_emphasis p start_toks start_line ~opener =
|
|
let start = opener.start in
|
|
if not (has_emphasis_closer ~char:opener.char ~after:start p.cidx)
|
|
then Either.Left start_toks else
|
|
match find_emphasis_text p start_toks start_line ~opener with
|
|
| Either.Left _ as r -> r
|
|
| Either.Right (toks, line, used, emph_toks, closer) ->
|
|
let text_first = start + opener.count in
|
|
let text_last = closer.start - 1 (* XXX prev line ? *) in
|
|
let first = text_first - used in
|
|
let last = closer.start + used - 1 in
|
|
let first_line = start_line and last_line = line in
|
|
let emph =
|
|
let text_start =
|
|
let last =
|
|
if start_line == line then text_last else start_line.last
|
|
in
|
|
{ start_line with first = text_first; last }
|
|
in
|
|
(* No need to redo first pass *)
|
|
let emph_toks = second_pass p emph_toks text_start in
|
|
let text = last_pass p emph_toks text_start in
|
|
inlines_inline p text ~first ~last:text_last ~first_line ~last_line
|
|
in
|
|
let toks =
|
|
let count = closer.count - used in
|
|
if count = 0 then toks else
|
|
Emphasis_marks { closer with start = last + 1; count } :: toks
|
|
in
|
|
let toks =
|
|
let strong = used = 2 in
|
|
emphasis_token p ~first ~last ~first_line ~last_line ~strong emph ::
|
|
toks
|
|
in
|
|
let toks =
|
|
let count = opener.count - used in
|
|
if count = 0 then toks else
|
|
Emphasis_marks { opener with count } :: toks
|
|
in
|
|
Either.Right (toks, line)
|
|
|
|
and find_strikethrough_text p toks start_line =
|
|
let rec loop p toks line acc = match toks with
|
|
| [] -> Either.Left (List.rev acc) (* No match but keep nested work done *)
|
|
| Strikethrough_marks marks :: toks ->
|
|
if marks.may_close then
|
|
let to_last = marks.start - 1 in
|
|
let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in
|
|
Either.Right (toks, line, acc, marks)
|
|
else if marks.may_open then
|
|
match try_strikethrough p toks line ~opener:marks with
|
|
| Either.Left toks -> loop p toks line acc
|
|
| Either.Right (toks, line) -> loop p toks line acc
|
|
else assert false
|
|
| Newline { newline = l } as t :: toks -> loop p toks l (t :: acc)
|
|
| Inline { endline = l } as t :: toks -> loop p toks l (t :: acc)
|
|
| t :: toks -> loop p toks line (t :: acc)
|
|
in
|
|
loop p toks start_line []
|
|
|
|
and try_strikethrough p start_toks start_line ~opener =
|
|
let start = opener.start in
|
|
if not (has_strikethrough_closer ~after:start p.cidx)
|
|
then Either.Left start_toks else
|
|
match find_strikethrough_text p start_toks start_line with
|
|
| Either.Left _ as r -> r
|
|
| Either.Right (toks, line, stroken_toks, closer) ->
|
|
let first_line = start_line and last_line = line in
|
|
let text =
|
|
let first = start + 2 in
|
|
let last = closer.start - 1 in
|
|
let text_start =
|
|
let last =
|
|
if start_line == line then last else start_line.last
|
|
in
|
|
{ start_line with first; last }
|
|
in
|
|
(* No need to redo first pass *)
|
|
let emph_toks = second_pass p stroken_toks text_start in
|
|
let text = last_pass p emph_toks text_start in
|
|
inlines_inline p text ~first ~last ~first_line ~last_line
|
|
in
|
|
let toks =
|
|
let first = opener.start and last = closer.start + 1 in
|
|
ext_strikethrough_token p ~first ~last ~first_line ~last_line text
|
|
:: toks
|
|
in
|
|
Either.Right (toks, line)
|
|
|
|
and second_pass p toks line =
|
|
let rec loop p toks line acc = match toks with
|
|
| [] -> List.rev acc
|
|
| Emphasis_marks ({ may_open } as opener) :: toks ->
|
|
if not may_open then loop p toks line acc else
|
|
begin match try_emphasis p toks line ~opener with
|
|
| Either.Left toks -> loop p toks line acc
|
|
| Either.Right (toks, line) -> loop p toks line acc
|
|
end
|
|
| Strikethrough_marks ({ may_open } as opener) :: toks ->
|
|
if not may_open then loop p toks line acc else
|
|
begin match try_strikethrough p toks line ~opener with
|
|
| Either.Left toks -> loop p toks line acc
|
|
| Either.Right (toks, line) -> loop p toks line acc
|
|
end
|
|
| Newline { newline } as t :: toks -> loop p toks newline (t :: acc)
|
|
| Inline { endline } as t :: toks -> loop p toks endline (t :: acc)
|
|
| t :: toks -> loop p toks line (t :: acc)
|
|
in
|
|
loop p toks line []
|
|
|
|
(* Last pass *)
|
|
|
|
and last_pass p toks line =
|
|
(* Only [Inline] and [Newline] tokens remain. We fold over them to
|
|
convert them to [inline] values and [Break]s. [Text] inlines
|
|
are created for data between them. *)
|
|
let rec loop toks line acc k = match toks with
|
|
| [] ->
|
|
List.rev (try_add_text_inline p line ~first:k ~last:line.last acc)
|
|
| Newline { start; break_type; newline } :: toks ->
|
|
let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in
|
|
let break = break_inline p line ~start ~break_type ~newline in
|
|
loop toks newline (break :: acc) newline.first
|
|
| Inline { start; inline; endline; next } :: toks ->
|
|
let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in
|
|
let acc = match inline with
|
|
| Inline.Inlines (is, _meta_stub) -> List.rev_append (List.rev is) acc
|
|
| i -> i :: acc
|
|
in
|
|
loop toks endline acc next
|
|
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
|
|
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
|
|
| Math_span_marks _) :: _ ->
|
|
assert false
|
|
in
|
|
loop toks line [] line.first
|
|
|
|
and parse_tokens p toks first_line =
|
|
let toks, had_link = first_pass p toks first_line in
|
|
let toks = second_pass p toks first_line in
|
|
last_pass p toks first_line, had_link
|
|
|
|
let strip_paragraph p lines =
|
|
(* Remove initial and final blanks. Initial blank removal on
|
|
other paragraph lines is done during the inline parsing
|
|
and integrated into the AST for layout preservation. *)
|
|
let last, trailing_blanks =
|
|
let line = List.hd lines in
|
|
let first = line.first and start = line.last in
|
|
let non_blank = Match.last_non_blank p.i ~first ~start in
|
|
{ line with last = non_blank},
|
|
layout_clean_raw_span' p { line with first = non_blank + 1; }
|
|
in
|
|
let lines = List.rev (last :: List.tl lines) in
|
|
let first, leading_indent =
|
|
let line = List.hd lines in
|
|
let non_blank = first_non_blank_in_span p line in
|
|
{ line with first = non_blank },
|
|
non_blank - line.first
|
|
in
|
|
let lines = first :: List.tl lines in
|
|
let meta = meta_of_spans p ~first ~last in
|
|
(leading_indent, trailing_blanks), meta, lines
|
|
|
|
let parse p lines =
|
|
let layout, meta, lines = strip_paragraph p lines in
|
|
let cidx, toks, first_line = tokenize ~exts:p.exts p.i lines in
|
|
p.cidx <- cidx;
|
|
let is, _had_link = parse_tokens p toks first_line in
|
|
let inline = match is with [i] -> i | is -> Inline.Inlines (is, meta) in
|
|
layout, inline
|
|
|
|
(* Parsing table rows *)
|
|
|
|
let get_blanks p line ~before k =
|
|
let nb = Match.first_non_blank p.i ~last:(before - 1) ~start:k in
|
|
layout_clean_raw_span' p { line with first = k; last = nb - 1 }, nb
|
|
|
|
let make_col p = function
|
|
| [] -> assert false
|
|
| [i] -> i
|
|
| is ->
|
|
let last = Inline.meta (List.hd is) in
|
|
let is = List.rev is in
|
|
let first = Inline.meta (List.hd is) in
|
|
let meta = meta_of_metas p ~first ~last in
|
|
Inline.Inlines (is, meta)
|
|
|
|
let find_pipe p line ~before k =
|
|
let text p ~first ~last =
|
|
Inline.Text (clean_unesc_unref_span p { line with first; last })
|
|
in
|
|
let n = Match.first_non_escaped_char '|' p.i ~last:(before - 1) ~start:k in
|
|
if n = before then `Not_found (text p ~first:k ~last:(n - 1)) else
|
|
let nb = Match.last_non_blank p.i ~first:k ~start:(n - 1) in
|
|
let after =
|
|
layout_clean_raw_span' p { line with first = nb + 1; last = n - 1 }
|
|
in
|
|
let text = if nb < k then None else Some (text p ~first:k ~last:nb) in
|
|
`Found (text, after, n + 1)
|
|
|
|
let start_col p line ~before k =
|
|
let bbefore, k = get_blanks p line ~before k in
|
|
if k >= before then `Start (bbefore, []) else
|
|
match find_pipe p line ~before k with
|
|
| `Not_found text -> `Start (bbefore, [text])
|
|
| `Found (text, bafter, k) ->
|
|
let text = match text with
|
|
| Some text -> text
|
|
| None ->
|
|
let l = textloc_of_span p { line with first = k; last = k - 1 }in
|
|
(Inline.Inlines ([], meta p l))
|
|
in
|
|
`Col ((text, (bbefore, bafter)), k)
|
|
|
|
let rec finish_col p line blanks_before is toks k = match toks with
|
|
| [] ->
|
|
begin match find_pipe p line ~before:(line.last + 1) k with
|
|
| `Found (text, after, k) ->
|
|
let is = match text with Some t -> t :: is | None -> is in
|
|
(make_col p is, (blanks_before, after)), [], k
|
|
| `Not_found _ -> assert false
|
|
end
|
|
| Inline { start; inline; next } :: toks when k >= start ->
|
|
finish_col p line blanks_before (inline :: is) toks next
|
|
| Inline { start; inline; next } :: toks as toks' ->
|
|
begin match find_pipe p line ~before:start k with
|
|
| `Not_found text ->
|
|
let is = inline :: text :: is in
|
|
finish_col p line blanks_before is toks next
|
|
| `Found (text, after, k) ->
|
|
let is = match text with Some t -> t :: is | None -> is in
|
|
(make_col p is, (blanks_before, after)), toks', k
|
|
end
|
|
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
|
|
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
|
|
| Math_span_marks _ | Newline _ ) :: _ ->
|
|
assert false
|
|
|
|
let rec parse_cols p line acc toks k = match toks with
|
|
| [] ->
|
|
if k > line.last then (List.rev acc) else
|
|
begin match start_col p line ~before:(line.last + 1) k with
|
|
| `Col (col, k) -> parse_cols p line (col :: acc) [] k
|
|
| `Start _ -> assert false
|
|
end
|
|
| Inline { start; inline; next } :: toks as toks' ->
|
|
begin match start_col p line ~before:start k with
|
|
| `Col (col, k) -> parse_cols p line (col :: acc) toks' k
|
|
| `Start (before, is) ->
|
|
let is = inline :: is in
|
|
let col, toks, k = finish_col p line before is toks next in
|
|
parse_cols p line (col :: acc) toks k
|
|
end
|
|
| (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _
|
|
| Emphasis_marks _ | Right_paren _ | Strikethrough_marks _
|
|
| Math_span_marks _ | Newline _ ) :: _ ->
|
|
assert false
|
|
|
|
let parse_table_row p line =
|
|
let cidx, toks, first_line = tokenize ~exts:p.exts p.i [line] in
|
|
p.cidx <- cidx;
|
|
let toks, _had_link = first_pass p toks first_line in
|
|
let toks = second_pass p toks first_line in
|
|
(* We now have modified last pass, inner inlines will have gone through
|
|
the regular [last_pass] which is fine since we are only interested
|
|
in creating the toplevel text nodes further splited on (unescaped)
|
|
[\]. *)
|
|
parse_cols p line [] toks line.first
|
|
end
|
|
|
|
(* Block structure parsing. *)
|
|
|
|
module Block_struct = struct
|
|
|
|
(* Moving on the line in the indentation space (columns) and over container
|
|
markers. *)
|
|
|
|
let[@inline] current_col p = p.current_char_col + p.tab_consumed_cols
|
|
let[@inline] current_indent p = p.next_non_blank_col - current_col p
|
|
let[@inline] end_of_line p = p.current_char > p.current_line_last_char
|
|
let[@inline] only_blanks p = p.next_non_blank > p.current_line_last_char
|
|
let[@inline] has_next_non_blank p =
|
|
p.next_non_blank <= p.current_line_last_char
|
|
|
|
let update_next_non_blank p =
|
|
let rec loop p s last k col =
|
|
if k > last then (p.next_non_blank <- k; p.next_non_blank_col <- col) else
|
|
match s.[k] with
|
|
| ' ' -> loop p s last (k + 1) (col + 1)
|
|
| '\t' -> loop p s last (k + 1) (next_tab_stop col)
|
|
| _ -> p.next_non_blank <- k; p.next_non_blank_col <- col;
|
|
in
|
|
loop p p.i p.current_line_last_char p.current_char p.current_char_col
|
|
|
|
let accept_cols ~count p =
|
|
let rec loop p count k col =
|
|
if count = 0 then (p.current_char <- k; p.current_char_col <- col) else
|
|
if p.i.[k] <> '\t' then loop p (count - 1) (k + 1) (col + 1) else
|
|
let col' = next_tab_stop col in
|
|
let tab_cols = col' - (col + p.tab_consumed_cols) in
|
|
if tab_cols > count
|
|
then (p.tab_consumed_cols <- count; loop p 0 k col)
|
|
else (p.tab_consumed_cols <- 0; loop p (count - tab_cols) (k + 1) col')
|
|
in
|
|
loop p count p.current_char p.current_char_col;
|
|
update_next_non_blank p
|
|
|
|
let match_and_accept_block_quote p =
|
|
(* https://spec.commonmark.org/current/#block-quote-marker *)
|
|
if end_of_line p || p.i.[p.current_char] <> '>' then false else
|
|
let next_is_blank =
|
|
let next = p.current_char + 1 in
|
|
next <= p.current_line_last_char && Ascii.is_blank p.i.[next]
|
|
in
|
|
let count = if next_is_blank then (* we eat a space *) 2 else 1 in
|
|
accept_cols ~count p; true
|
|
|
|
let accept_list_marker_and_indent p ~marker_size ~last =
|
|
(* Returns min indent after marker for list item *)
|
|
accept_cols ~count:marker_size p;
|
|
let indent = current_indent p in
|
|
let min_indent =
|
|
if only_blanks p || indent > 4 (* indented code *)
|
|
then 1
|
|
else min indent 4
|
|
in
|
|
accept_cols ~count:min_indent p;
|
|
min_indent
|
|
|
|
let accept_code_indent p ~count =
|
|
(* Returns padding for partially consumed tab and content first char *)
|
|
accept_cols p ~count;
|
|
if p.tab_consumed_cols = 0 then 0, p.current_char else
|
|
let col' = next_tab_stop p.current_char_col in
|
|
let pad = col' - (p.current_char_col + p.tab_consumed_cols) in
|
|
pad, p.current_char (* is '\t' *) + 1
|
|
|
|
(* These data types are only used during parsing, to find out the
|
|
block structure. All the lists (blocks, lines) are in reverse
|
|
order. We don't extract data from the input here. We just store
|
|
line spans. See:
|
|
https://spec.commonmark.org/current/#phase-1-block-structure *)
|
|
|
|
type space_pad = int (* number of space characters to pad content with. *)
|
|
type indented_code_line =
|
|
{ pad : space_pad;
|
|
code : line_span;
|
|
is_blank : bool }
|
|
|
|
type fence =
|
|
{ indent : Layout.indent;
|
|
opening_fence : line_span;
|
|
fence : Char.t * int (* fence length *);
|
|
info_string : line_span option (* we drop the trailing blanks *);
|
|
closing_fence : line_span option; }
|
|
|
|
type fenced_code_block =
|
|
{ fence : fence;
|
|
code : (space_pad * line_span) list }
|
|
|
|
type code_block =
|
|
[ `Indented of indented_code_line list | `Fenced of fenced_code_block ]
|
|
|
|
type atx =
|
|
{ indent : Layout.indent;
|
|
level : Match.heading_level;
|
|
after_open : byte_pos;
|
|
heading : line_span;
|
|
layout_after : line_span }
|
|
|
|
type setext =
|
|
{ level : Match.heading_level;
|
|
heading_lines : line_span list;
|
|
underline : (* Indent, underline char count, blanks *)
|
|
Layout.indent * line_span * line_span; }
|
|
|
|
type heading = [ `Atx of atx | `Setext of setext ]
|
|
|
|
type html_block =
|
|
{ end_cond : Match.html_block_end_cond option;
|
|
html : line_span list }
|
|
|
|
type paragraph = { maybe_ref : bool; lines : line_span list }
|
|
|
|
type t =
|
|
| Block_quote of Layout.indent * t list
|
|
| Blank_line of space_pad * line_span
|
|
| Code_block of code_block
|
|
| Heading of heading
|
|
| Html_block of html_block
|
|
| List of list'
|
|
| Linkref_def of Link_definition.t node
|
|
| Paragraph of paragraph
|
|
| Thematic_break of Layout.indent * line_span (* including trailing blanks *)
|
|
| Ext_table of Layout.indent * (line_span * line_span (* trail blanks *)) list
|
|
| Ext_footnote of Layout.indent * (Label.t * Label.t option) * t list
|
|
|
|
and list_item =
|
|
{ before_marker : Layout.indent;
|
|
marker : line_span;
|
|
after_marker : Layout.indent;
|
|
ext_task_marker : (Uchar.t * line_span) option;
|
|
blocks : t list }
|
|
|
|
and list' =
|
|
{ last_blank : bool; (* last added line was blank and not first line
|
|
of item *)
|
|
loose : bool; (* inter-item looseness, intra-item is computed later *)
|
|
item_min_indent : int; (* last item minimal indent *)
|
|
list_type : Block.List'.type';
|
|
items : list_item list; }
|
|
|
|
let block_is_blank_line = function Blank_line _ -> true | _ -> false
|
|
|
|
(* Making blocks from the current line status *)
|
|
|
|
let blank_line p =
|
|
let first = p.current_char and last = p.current_line_last_char in
|
|
Blank_line (0, current_line_span p ~first ~last)
|
|
|
|
let thematic_break p ~indent ~last:_ =
|
|
let last = p.current_line_last_char (* let's keep everything *) in
|
|
let break = current_line_span p ~first:p.current_char ~last in
|
|
Thematic_break (indent, break)
|
|
|
|
let atx_heading p ~indent ~level ~after_open ~first_content ~last_content =
|
|
let heading = current_line_span p ~first:first_content ~last:last_content in
|
|
let layout_after =
|
|
let first = last_content + 1 and last = p.current_line_last_char in
|
|
current_line_span p ~first ~last
|
|
in
|
|
Heading (`Atx { indent; level; after_open; heading; layout_after })
|
|
|
|
let setext_heading p ~indent ~level ~last_underline heading_lines =
|
|
let u = current_line_span p ~first:p.current_char ~last:last_underline in
|
|
let blanks =
|
|
let first = last_underline + 1 and last = p.current_line_last_char in
|
|
current_line_span p ~first ~last
|
|
in
|
|
let underline = indent, u, blanks in
|
|
Heading (`Setext {level; heading_lines; underline})
|
|
|
|
let indented_code_block p = (* Has a side-effect on [p] *)
|
|
let pad, first = accept_code_indent p ~count:4 in
|
|
let code = current_line_span p ~first ~last:p.current_line_last_char in
|
|
Code_block (`Indented [{pad; code; is_blank = false}])
|
|
|
|
let fenced_code_block p ~indent ~fence_first ~fence_last ~info =
|
|
let info_string, layout_last = match info with
|
|
| None -> None, p.current_line_last_char
|
|
| Some (first, last) -> Some (current_line_span p ~first ~last), first - 1
|
|
in
|
|
let opening_fence =
|
|
current_line_span p ~first:fence_first ~last:layout_last
|
|
in
|
|
let fence = p.i.[fence_first], (fence_last - fence_first + 1) in
|
|
let closing_fence = None in
|
|
let fence = { indent; opening_fence; fence; info_string; closing_fence } in
|
|
Code_block (`Fenced {fence; code = []})
|
|
|
|
let html_block p ~end_cond ~indent_start =
|
|
let first = indent_start and last = p.current_line_last_char in
|
|
let end_cond = (* Check if the same line matches the end condition. *)
|
|
if Match.html_block_end p.i ~end_cond ~last ~start:p.current_char
|
|
then None (* We are already closed *) else Some end_cond
|
|
in
|
|
Html_block { end_cond; html = [current_line_span p ~first ~last] }
|
|
|
|
let paragraph p ~start =
|
|
let last = p.current_line_last_char in
|
|
let maybe_ref = Match.could_be_link_reference_definition p.i ~last ~start in
|
|
Paragraph { maybe_ref; lines = [current_line_span p ~first:start ~last]}
|
|
|
|
let add_paragraph_line p ~indent_start par bs =
|
|
let first = indent_start and last = p.current_line_last_char in
|
|
let lines = current_line_span p ~first ~last :: par.lines in
|
|
Paragraph { par with lines } :: bs
|
|
|
|
let table_row p ~first ~last =
|
|
current_line_span p ~first ~last,
|
|
current_line_span p ~first:(last + 1) ~last:p.current_line_last_char
|
|
|
|
let table p ~indent ~last =
|
|
let row = table_row p ~first:p.current_char ~last in
|
|
Ext_table (indent, [row])
|
|
|
|
(* Link reference definition parsing
|
|
|
|
This is invoked when we close a paragraph and works on the paragraph
|
|
lines. *)
|
|
|
|
let parse_link_reference_definition p lines =
|
|
(* Has no side effect on [p], parsing occurs on [lines] spans. *)
|
|
(* https://spec.commonmark.org/current/#link-reference-definitions *)
|
|
let none () = raise_notrace Exit in
|
|
let next_line = function line :: lines -> Some (lines, line) | [] -> None in
|
|
try
|
|
let lines, line = match next_line lines with
|
|
| None -> none () | Some v -> v
|
|
in
|
|
let start = first_non_blank_in_span p line in
|
|
let indent = start - line.first in
|
|
let meta_first = { line with first = start } in
|
|
let lines, line, label, start =
|
|
match Match.link_label p.buf ~next_line p.i lines ~line ~start with
|
|
| None -> none ()
|
|
| Some (lines, line, rev_spans, last, key) ->
|
|
let colon = last + 1 in
|
|
if colon > line.last || p.i.[colon] <> ':' then none () else
|
|
let label = Inline_struct.label_of_rev_spans p ~key rev_spans in
|
|
lines, line, label, colon + 1
|
|
in
|
|
let lines, line, before_dest, start =
|
|
match first_non_blank_over_nl ~next_line p lines line ~start with
|
|
| None -> none () | Some v -> v
|
|
in
|
|
let angled_dest, dest, start, meta_last =
|
|
match Match.link_destination p.i ~last:line.last ~start with
|
|
| None -> none ()
|
|
| Some (angled, first, last) ->
|
|
let dest = clean_unesc_unref_span p { line with first; last } in
|
|
let next = if angled then last + 2 else last + 1 in
|
|
angled, Some dest, next, { line with last = last }
|
|
in
|
|
let lines, after_dest, title_open_delim, title, after_title, meta_last =
|
|
match first_non_blank_over_nl ~next_line p lines line ~start with
|
|
| None -> lines, [], '\"', None, [], meta_last
|
|
| Some (_, _, _, st) when st = start (* need some space *) -> none ()
|
|
| Some (lines', line', after_dest, start') ->
|
|
let no_newline = line'.line_pos = line.line_pos in
|
|
let title =
|
|
Match.link_title ~next_line p.i lines' ~line:line' ~start:start'
|
|
in
|
|
match title with
|
|
| None ->
|
|
if no_newline then none () (* garbage after dest *) else
|
|
lines, [], '\"', None, [], meta_last
|
|
| Some (lines', line', rev_spans, last) ->
|
|
let after_title =
|
|
let last = line'.last and start = last + 1 in
|
|
let nb = Match.first_non_blank p.i ~last ~start in
|
|
if nb <= line'.last
|
|
then None
|
|
else
|
|
Some [layout_clean_raw_span p { line' with first = start; }]
|
|
in
|
|
match after_title with
|
|
| None when no_newline -> none ()
|
|
| None -> (lines, [], '\"', None, [], meta_last)
|
|
| Some after_title ->
|
|
let t = tight_block_lines p ~rev_spans in
|
|
lines', after_dest, p.i.[start'], Some t,
|
|
after_title,
|
|
{ line' with last }
|
|
in
|
|
let meta = meta_of_spans p ~first:meta_first ~last:meta_last in
|
|
let layout =
|
|
{ Link_definition.indent; angled_dest; before_dest;
|
|
after_dest; title_open_delim; after_title }
|
|
in
|
|
let defined_label = def_label p label in
|
|
let label = Some label in
|
|
let ld =
|
|
{ Link_definition.layout; label; defined_label; dest; title }, meta
|
|
in
|
|
begin match defined_label with
|
|
| None -> () | Some def -> set_label_def p def (Link_definition.Def ld)
|
|
end;
|
|
Some (ld, lines)
|
|
with
|
|
| Exit -> None
|
|
|
|
let maybe_add_link_reference_definitions p lines prevs =
|
|
let rec loop p prevs = function
|
|
| [] -> prevs
|
|
| ls ->
|
|
match parse_link_reference_definition p ls with
|
|
| None ->
|
|
(* Link defs can't interrupt a paragraph so we are good now. *)
|
|
Paragraph { maybe_ref = false; lines = List.rev ls } :: prevs
|
|
| Some (ld, ls) -> loop p (Linkref_def ld :: prevs) ls
|
|
in
|
|
loop p prevs (List.rev lines)
|
|
|
|
(* Closing blocks and finishing the document. *)
|
|
|
|
let close_indented_code_block p lines bs =
|
|
(* Removes trailing blank lines and add them as blank lines *)
|
|
let rec loop blanks lines bs = match lines with
|
|
| { pad; code; is_blank = true} :: lines ->
|
|
loop (Blank_line (pad, code) :: blanks) lines bs
|
|
| [] -> (* likely assert (false) *) List.rev_append blanks bs
|
|
| ls -> List.rev_append blanks ((Code_block (`Indented ls)) :: bs)
|
|
in
|
|
loop [] lines bs
|
|
|
|
let close_paragraph p par bs =
|
|
if not par.maybe_ref then Paragraph par :: bs else
|
|
maybe_add_link_reference_definitions p par.lines bs
|
|
|
|
let rec close_last_block p = function
|
|
| Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs
|
|
| Paragraph par :: bs -> close_paragraph p par bs
|
|
| List l :: bs -> close_list p l bs
|
|
| Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs
|
|
| bs -> bs
|
|
|
|
and close_list p l bs =
|
|
let i = List.hd l.items in
|
|
let blocks = close_last_block p i.blocks in
|
|
(* The final blank line extraction of the list item entails less blank
|
|
line churn for CommonMark rendering but we don't do it on empty list
|
|
items. *)
|
|
match blocks with
|
|
| Blank_line _ as bl :: (_ :: _ as blocks) ->
|
|
let items = { i with blocks } :: List.tl l.items in
|
|
bl :: List { l with items } :: bs
|
|
| blocks ->
|
|
let items = { i with blocks } :: List.tl l.items in
|
|
List { l with items } :: bs
|
|
|
|
and close_footnote p indent label blocks bs =
|
|
let blocks = close_last_block p blocks in
|
|
(* Like for lists above we do blank line extraction (except if blocks
|
|
is only a blank line) *)
|
|
let blanks, blocks =
|
|
let rec loop acc = function
|
|
| Blank_line _ as bl :: (_ :: _ as blocks) -> loop (bl :: acc) blocks
|
|
| blocks -> acc, blocks
|
|
in
|
|
loop [] blocks
|
|
in
|
|
List.rev_append blanks (Ext_footnote (indent, label, blocks) :: bs)
|
|
|
|
let close_last_list_item p l =
|
|
let item = List.hd l.items in
|
|
let item = { item with blocks = close_last_block p item.blocks } in
|
|
{ l with items = item :: List.tl l.items }
|
|
|
|
let end_doc_close_fenced_code_block p fenced bs = match fenced.code with
|
|
| (_, l) :: code when l.first > l.last (* empty line *) ->
|
|
Blank_line (0, l) :: Code_block (`Fenced { fenced with code }) :: bs
|
|
| _ -> Code_block (`Fenced fenced) :: bs
|
|
|
|
let end_doc_close_html p h bs = match h.html with
|
|
| l :: html when l.first > l.last (* empty line *) ->
|
|
Blank_line (0, l) :: Html_block { end_cond = None; html } :: bs
|
|
| _ ->
|
|
Html_block { h with end_cond = None } :: bs
|
|
|
|
let rec end_doc p = function
|
|
| Block_quote (indent, bq) :: bs -> Block_quote (indent, end_doc p bq) :: bs
|
|
| List list :: bs -> close_list p list bs
|
|
| Paragraph par :: bs -> close_paragraph p par bs
|
|
| Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs
|
|
| Code_block (`Fenced f) :: bs -> end_doc_close_fenced_code_block p f bs
|
|
| Html_block html :: bs -> end_doc_close_html p html bs
|
|
| Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs
|
|
| (Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _
|
|
| Ext_table _ ) :: _ | [] as bs -> bs
|
|
|
|
(* Adding lines to blocks *)
|
|
|
|
let match_line_type ~no_setext ~indent p =
|
|
(* Effects on [p]'s column advance *)
|
|
if only_blanks p then Match.Blank_line else
|
|
if indent >= 4 then Indented_code_block_line else begin
|
|
accept_cols ~count:indent p;
|
|
if end_of_line p then Match.Blank_line else
|
|
let start = p.current_char and last = p.current_line_last_char in
|
|
match p.i.[start] with
|
|
(* Early dispatch shaves a few ms but may not be worth doing vs
|
|
testing all the cases in sequences. *)
|
|
| '>' ->
|
|
if match_and_accept_block_quote p then Match.Block_quote_line else
|
|
Paragraph_line
|
|
| '=' when not no_setext ->
|
|
let r = Match.setext_heading_underline p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '-' ->
|
|
let r =
|
|
if no_setext then Match.Nomatch else
|
|
Match.setext_heading_underline p.i ~last ~start
|
|
in
|
|
if r <> Nomatch then r else
|
|
let r = Match.thematic_break p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
let r = Match.list_marker p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '#' ->
|
|
let r = Match.atx_heading p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '+' | '*' | '0' .. '9' ->
|
|
let r = Match.thematic_break p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
let r = Match.list_marker p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '_' ->
|
|
let r = Match.thematic_break p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '~' | '`' ->
|
|
let r = Match.fenced_code_block_start p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '<' ->
|
|
let r = Match.html_block_start p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '|' when p.exts ->
|
|
let r = Match.ext_table_row p.i ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| '[' when p.exts ->
|
|
let line_pos = p.current_line_pos in
|
|
let r = Match.ext_footnote_label p.buf p.i ~line_pos ~last ~start in
|
|
if r <> Nomatch then r else
|
|
Paragraph_line
|
|
| _ ->
|
|
Paragraph_line
|
|
end
|
|
|
|
let list_marker_can_interrupt_paragraph p = function
|
|
| `Ordered (1, _), marker_last | `Unordered _, marker_last ->
|
|
let last = p.current_line_last_char and start = marker_last + 1 in
|
|
let non_blank = Match.first_non_blank p.i ~last ~start in
|
|
non_blank <= p.current_line_last_char (* line is not blank *)
|
|
| _ -> false
|
|
|
|
let same_list_type t0 t1 = match t0, t1 with
|
|
| `Ordered (_, c0), `Ordered (_, c1)
|
|
| `Unordered c0, `Unordered c1 when Char.equal c0 c1 -> true
|
|
| _ -> false
|
|
|
|
let rec add_open_blocks_with_line_class p ~indent_start ~indent bs = function
|
|
| Match.Blank_line -> blank_line p :: bs
|
|
| Indented_code_block_line -> indented_code_block p :: bs
|
|
| Block_quote_line -> Block_quote (indent, add_open_blocks p []) :: bs
|
|
| Thematic_break_line last -> thematic_break p ~indent ~last :: bs
|
|
| List_marker_line m -> list p ~indent m bs
|
|
| Atx_heading_line (level, after_open, first_content, last_content) ->
|
|
atx_heading p ~indent ~level ~after_open ~first_content ~last_content ::
|
|
bs
|
|
| Fenced_code_block_line (fence_first, fence_last, info) ->
|
|
fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs
|
|
| Html_block_line end_cond -> html_block p ~end_cond ~indent_start :: bs
|
|
| Paragraph_line -> paragraph p ~start:indent_start :: bs
|
|
| Ext_table_row last -> table p ~indent ~last :: bs
|
|
| Ext_footnote_label (rev_spans, last, key) ->
|
|
footnote p ~indent ~last rev_spans key :: bs
|
|
| Setext_underline_line _ | Nomatch ->
|
|
(* This function should be called with a line type that comes out
|
|
of match_line_type ~no_setext:true *)
|
|
assert false
|
|
|
|
and add_open_blocks p bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
let ltype = match_line_type ~no_setext:true ~indent p in
|
|
add_open_blocks_with_line_class p ~indent_start ~indent bs ltype
|
|
|
|
and footnote p ~indent ~last rev_spans key =
|
|
let label = Inline_struct.label_of_rev_spans p ~key rev_spans in
|
|
let defined_label = match def_label p label with
|
|
| None -> None
|
|
| Some def as l -> set_label_def p def (Block.Footnote.stub label l); l
|
|
in
|
|
accept_cols p ~count:(last - p.current_char + 1);
|
|
Ext_footnote (indent, (label, defined_label), add_open_blocks p [])
|
|
|
|
and list_item ~indent p (list_type, last) =
|
|
let before_marker = indent and marker_size = last - p.current_char + 1 in
|
|
let marker = current_line_span p ~first:p.current_char ~last in
|
|
let after_marker = accept_list_marker_and_indent p ~marker_size ~last in
|
|
let ext_task_marker, ext_task_marker_size = match p.exts with
|
|
| false -> None, 0
|
|
| true ->
|
|
let start = p.current_char and last = p.current_line_last_char in
|
|
match Match.ext_task_marker p.i ~last ~start with
|
|
| None -> None, 0
|
|
| Some (u, last) ->
|
|
accept_cols p ~count:(last - start + 1);
|
|
let last = match last = p.current_line_last_char with
|
|
| true -> (* empty line *) last
|
|
| false -> (* remove space for locs *) last - 1
|
|
in
|
|
Some (u, current_line_span p ~first:start ~last), 4
|
|
in
|
|
let min = indent + marker_size + after_marker + ext_task_marker_size in
|
|
min, { before_marker; marker; after_marker; ext_task_marker;
|
|
blocks = add_open_blocks p [] }
|
|
|
|
and list ~indent p (list_type, _ as m) bs =
|
|
let item_min_indent, item = list_item ~indent p m in
|
|
List { last_blank = false; loose = false;
|
|
item_min_indent; list_type; items = [item] } :: bs
|
|
|
|
let try_add_to_list ~indent p (lt, _ as m) l bs =
|
|
let item_min_indent, item = list_item ~indent p m in
|
|
if same_list_type lt l.list_type then
|
|
let l = close_last_list_item p l and last_blank = false in
|
|
let list_type = l.list_type in
|
|
List { last_blank; loose = l.last_blank; item_min_indent; list_type;
|
|
items = item :: l.items } :: bs
|
|
else
|
|
let bs = close_list p l bs and last_blank = false in
|
|
List { last_blank; loose = false; item_min_indent; list_type = lt;
|
|
items = [item] } :: bs
|
|
|
|
let try_add_to_paragraph p par bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
match match_line_type ~no_setext:false ~indent p with
|
|
(* These can't interrupt paragraphs *)
|
|
| Html_block_line `End_blank_7
|
|
| Indented_code_block_line
|
|
| Ext_table_row _ | Ext_footnote_label _
|
|
| Paragraph_line ->
|
|
add_paragraph_line p ~indent_start par bs
|
|
| List_marker_line m when not (list_marker_can_interrupt_paragraph p m) ->
|
|
add_paragraph_line p ~indent_start par bs
|
|
| Blank_line ->
|
|
blank_line p :: close_paragraph p par bs
|
|
| Block_quote_line ->
|
|
Block_quote (indent, add_open_blocks p []) :: (close_paragraph p par bs)
|
|
| Setext_underline_line (level, last_underline) ->
|
|
let bs = close_paragraph p par bs in
|
|
begin match bs with
|
|
| Paragraph { lines; _ } :: bs ->
|
|
setext_heading p ~indent ~level ~last_underline lines :: bs
|
|
| bs -> paragraph p ~start:indent_start :: bs
|
|
end
|
|
| Thematic_break_line last ->
|
|
thematic_break p ~indent ~last :: (close_paragraph p par bs)
|
|
| List_marker_line m ->
|
|
list p ~indent m (close_paragraph p par bs)
|
|
| Atx_heading_line (level, after_open, first_content, last_content) ->
|
|
let bs = close_paragraph p par bs in
|
|
atx_heading p ~indent ~level ~after_open ~first_content ~last_content ::
|
|
bs
|
|
| Fenced_code_block_line (fence_first, fence_last, info) ->
|
|
let bs = close_paragraph p par bs in
|
|
fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs
|
|
| Html_block_line end_cond ->
|
|
html_block p ~end_cond ~indent_start :: (close_paragraph p par bs)
|
|
| Nomatch -> assert false
|
|
|
|
let try_add_to_indented_code_block p ls bs =
|
|
if current_indent p < 4 then
|
|
if has_next_non_blank p
|
|
then add_open_blocks p (close_indented_code_block p ls bs) else
|
|
(* Blank but white is not data, make an empty span *)
|
|
let first = p.current_line_last_char + 1 in
|
|
let last = p.current_line_last_char in
|
|
let code = current_line_span p ~first ~last in
|
|
let l = { pad = 0; code; is_blank = true } in
|
|
Code_block (`Indented (l :: ls)) :: bs
|
|
else
|
|
let pad, first = accept_code_indent p ~count:4 in
|
|
let last = p.current_line_last_char in
|
|
let is_blank = only_blanks p in
|
|
let l = { pad; code = current_line_span p ~first ~last; is_blank } in
|
|
Code_block (`Indented (l :: ls)) :: bs
|
|
|
|
let try_add_to_fenced_code_block p f bs = match f with
|
|
| { fence = { closing_fence = Some _; _}; _ } -> (* block is closed *)
|
|
add_open_blocks p ((Code_block (`Fenced f)) :: bs)
|
|
| { fence = { indent; fence; _} ; code = ls} as b ->
|
|
let start = p.current_char and last = p.current_line_last_char in
|
|
match Match.fenced_code_block_continue ~fence p.i ~last ~start with
|
|
| `Code ->
|
|
let strip = Int.min indent (current_indent p) in
|
|
let pad, first = accept_code_indent p ~count:strip in
|
|
let code = (pad, current_line_span p ~first ~last) :: ls in
|
|
Code_block (`Fenced { b with code }) :: bs
|
|
| `Close (first, _fence_last) ->
|
|
let close = current_line_span p ~first ~last (* with layout *)in
|
|
let fence = { b.fence with closing_fence = Some close } in
|
|
Code_block (`Fenced { b with fence }) :: bs
|
|
|
|
let try_add_to_html_block p b bs = match b.end_cond with
|
|
| None -> add_open_blocks p (Html_block { b with end_cond = None} :: bs)
|
|
| Some end_cond ->
|
|
let start = p.current_char and last = p.current_line_last_char in
|
|
let l = current_line_span p ~first:start ~last in
|
|
if not (Match.html_block_end p.i ~end_cond ~last ~start)
|
|
then Html_block { b with html = l :: b.html } :: bs else
|
|
match end_cond with
|
|
| `End_blank | `End_blank_7 ->
|
|
blank_line p :: Html_block { b with end_cond = None } :: bs
|
|
| _ ->
|
|
Html_block { end_cond = None; html = l :: b.html } :: bs
|
|
|
|
let rec try_lazy_continuation p ~indent_start = function
|
|
| Paragraph par :: bs -> Some (add_paragraph_line p ~indent_start par bs)
|
|
| Block_quote (indent, bq) :: bs ->
|
|
begin match try_lazy_continuation p ~indent_start bq with
|
|
| None -> None
|
|
| Some bq -> Some (Block_quote (indent, bq) :: bs)
|
|
end
|
|
| List l :: bs ->
|
|
let i = List.hd l.items in
|
|
begin match try_lazy_continuation p ~indent_start i.blocks with
|
|
| None -> None
|
|
| Some blocks ->
|
|
let items = { i with blocks } :: (List.tl l.items) in
|
|
Some (List { l with items; last_blank = false } :: bs)
|
|
end
|
|
| _ -> None
|
|
|
|
let try_add_to_table p ind rows bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
match match_line_type ~indent ~no_setext:true p with
|
|
| Ext_table_row last ->
|
|
let row = table_row p ~first:p.current_char ~last in
|
|
Ext_table (ind, row :: rows) :: bs
|
|
| ltype ->
|
|
let bs = Ext_table (ind, rows) :: bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
|
|
let rec try_add_to_block_quote p indent_layout bq bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
match match_line_type ~indent ~no_setext:true p with
|
|
| Block_quote_line -> Block_quote (indent_layout, add_line p bq) :: bs
|
|
| (Indented_code_block_line (* Looks like a *) | Paragraph_line) as ltype ->
|
|
begin match try_lazy_continuation p ~indent_start bq with
|
|
| Some bq -> Block_quote (indent_layout, bq) :: bs
|
|
| None ->
|
|
let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
end
|
|
| ltype ->
|
|
let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
|
|
and try_add_to_footnote p fn_indent label blocks bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
if indent < fn_indent + 1 (* position of ^ *) then begin
|
|
match match_line_type ~indent ~no_setext:true p with
|
|
| (Indented_code_block_line (* Looks like a *) | Paragraph_line) as lt ->
|
|
begin match try_lazy_continuation p ~indent_start blocks with
|
|
| Some blocks -> Ext_footnote (fn_indent, label, blocks) :: bs
|
|
| None ->
|
|
let blocks = close_last_block p blocks in
|
|
let bs = (close_footnote p fn_indent label blocks) bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs lt
|
|
end
|
|
| Blank_line ->
|
|
Ext_footnote (fn_indent, label, add_line p blocks) :: bs
|
|
| ltype ->
|
|
let blocks = close_last_block p blocks in
|
|
let bs = close_footnote p fn_indent label blocks bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
end else begin
|
|
accept_cols p ~count:(fn_indent + 1);
|
|
Ext_footnote (fn_indent, label, add_line p blocks) :: bs
|
|
end
|
|
|
|
and try_add_to_list_item p list bs =
|
|
let indent_start = p.current_char and indent = current_indent p in
|
|
if indent >= list.item_min_indent then begin
|
|
let last_blank = only_blanks p in
|
|
let item = List.hd list.items and items = List.tl list.items in
|
|
if list.last_blank && not last_blank &&
|
|
List.for_all block_is_blank_line item.blocks
|
|
then
|
|
(* Item can only start with a single blank line, if we are
|
|
here it's not a new item so the list ends *)
|
|
add_open_blocks p (List list :: bs)
|
|
else begin
|
|
accept_cols ~count:list.item_min_indent p;
|
|
let item = { item with blocks = add_line p item.blocks } in
|
|
List { list with items = item :: items; last_blank } :: bs
|
|
end
|
|
end else match match_line_type ~indent ~no_setext:true p with
|
|
| Blank_line ->
|
|
let item = List.hd list.items and items = List.tl list.items in
|
|
let item = { item with blocks = add_line p item.blocks } in
|
|
List { list with items = item :: items; last_blank = true } :: bs
|
|
| Indented_code_block_line | Paragraph_line as ltype ->
|
|
let item = List.hd list.items and items = List.tl list.items in
|
|
begin match try_lazy_continuation p ~indent_start item.blocks with
|
|
| Some blocks ->
|
|
let items = { item with blocks } :: items in
|
|
List { list with items; last_blank = false } :: bs
|
|
| None ->
|
|
let bs = close_list p list bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
end
|
|
| List_marker_line m ->
|
|
try_add_to_list p ~indent m list bs
|
|
| ltype ->
|
|
let bs = close_list p list bs in
|
|
add_open_blocks_with_line_class p ~indent ~indent_start bs ltype
|
|
|
|
and add_line p = function
|
|
| Paragraph par :: bs -> try_add_to_paragraph p par bs
|
|
| ((Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _) :: _)
|
|
| [] as bs -> add_open_blocks p bs
|
|
| List list :: bs -> try_add_to_list_item p list bs
|
|
| Code_block (`Indented ls) :: bs -> try_add_to_indented_code_block p ls bs
|
|
| Code_block (`Fenced f) :: bs -> try_add_to_fenced_code_block p f bs
|
|
| Block_quote (ind, bq) :: bs -> try_add_to_block_quote p ind bq bs
|
|
| Html_block html :: bs -> try_add_to_html_block p html bs
|
|
| Ext_table (ind, rows) :: bs -> try_add_to_table p ind rows bs
|
|
| Ext_footnote (i, l, blocks) :: bs -> try_add_to_footnote p i l blocks bs
|
|
|
|
(* Parsing *)
|
|
|
|
let get_first_line p =
|
|
let max = String.length p.i - 1 in
|
|
let k = ref 0 in
|
|
let last_char =
|
|
while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done;
|
|
!k - 1 (* if the line is empty we have -1 *)
|
|
in
|
|
p.current_line_last_char <- last_char;
|
|
update_next_non_blank p;
|
|
(* Return first used newline (or "\n" if there is none) *)
|
|
if !k > max || p.i.[!k] = '\n' then "\n" else
|
|
let next = !k + 1 in
|
|
if next <= max && p.i.[next] = '\n' then "\r\n" else "\r"
|
|
|
|
let get_next_line p =
|
|
let max = String.length p.i - 1 in
|
|
if p.current_line_last_char = max then false else
|
|
let first_char =
|
|
let nl = p.current_line_last_char + 1 in
|
|
if p.i.[nl] = '\n' then nl + 1 else (* assert (p.i.[nl] = '\r') *)
|
|
let next = nl + 1 in
|
|
if next <= max && p.i.[next] = '\n' then next + 1 else next
|
|
in
|
|
let last_char =
|
|
let k = ref first_char in
|
|
while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done;
|
|
!k - 1 (* if the line is empty we have last_char = first_char - 1 *)
|
|
in
|
|
p.current_line_pos <- (fst p.current_line_pos + 1), first_char;
|
|
p.current_line_last_char <- last_char;
|
|
p.current_char <- first_char;
|
|
p.current_char_col <- 0;
|
|
p.tab_consumed_cols <- 0;
|
|
update_next_non_blank p;
|
|
true
|
|
|
|
let parse p =
|
|
let meta p =
|
|
let first_byte = 0 and last_byte = p.current_line_last_char in
|
|
let first_line = 1, first_byte and last_line = p.current_line_pos in
|
|
let file = p.file in
|
|
meta p (Textloc.v ~file ~first_byte ~last_byte ~first_line ~last_line)
|
|
in
|
|
let rec loop p bs =
|
|
let bs = add_line p bs in
|
|
if get_next_line p then loop p bs else (end_doc p bs), meta p
|
|
in
|
|
let nl = get_first_line p in
|
|
nl, loop p []
|
|
end
|
|
|
|
(* Building the final AST, invokes inline parsing. *)
|
|
|
|
let block_struct_to_blank_line p pad span =
|
|
Block.Blank_line (clean_raw_span p ~pad span)
|
|
|
|
let block_struct_to_code_block p = function
|
|
| `Indented (ls : Block_struct.indented_code_line list) (* non-empty *) ->
|
|
let line p { Block_struct.pad; code; _} = clean_raw_span ~pad p code in
|
|
let layout = `Indented and info_string = None in
|
|
let last = (List.hd ls).code in
|
|
let code = List.rev_map (line p) ls in
|
|
let meta =
|
|
let last_line = last.line_pos and last_byte = last.last in
|
|
let start = Meta.textloc (snd (List.hd code)) in
|
|
meta p (Textloc.set_last start ~last_byte ~last_line)
|
|
in
|
|
Block.Code_block ({layout; info_string; code}, meta)
|
|
| `Fenced { Block_struct.fence; code = ls } ->
|
|
let layout =
|
|
let opening_fence = layout_clean_raw_span p fence.opening_fence in
|
|
let closing_fence =
|
|
Option.map (layout_clean_raw_span p) fence.closing_fence
|
|
in
|
|
{ Block.Code_block.indent = fence.indent; opening_fence; closing_fence }
|
|
in
|
|
let info_string = Option.map (clean_unesc_unref_span p) fence.info_string in
|
|
let code = List.rev_map (fun (pad, l) -> clean_raw_span p ~pad l) ls in
|
|
let meta =
|
|
let first = fence.opening_fence in
|
|
let last = match fence.closing_fence with
|
|
| Some last -> last
|
|
| None -> match ls with [] -> first | (_, last_line) :: _ -> last_line
|
|
in
|
|
meta_of_spans p ~first ~last
|
|
in
|
|
let cb = {Block.Code_block.layout = `Fenced layout; info_string; code} in
|
|
if p.exts && Block.Code_block.is_math_block info_string
|
|
then Block.Ext_math_block (cb, meta)
|
|
else Block.Code_block (cb, meta)
|
|
|
|
let block_struct_to_heading p = function
|
|
| `Atx { Block_struct.indent; level; after_open; heading; layout_after } ->
|
|
let after_opening =
|
|
let first = after_open and last = heading.first - 1 in
|
|
layout_clean_raw_span' p { heading with first; last }
|
|
in
|
|
let closing = layout_clean_raw_span' p layout_after in
|
|
let layout = `Atx { Block.Heading.indent; after_opening; closing } in
|
|
let meta =
|
|
meta p (textloc_of_span p { heading with first = after_open - level })
|
|
in
|
|
let _layout, inline = Inline_struct.parse p [heading] in
|
|
let id = match p.heading_auto_ids with
|
|
| false -> None
|
|
| true -> Some (`Auto (Inline.id ~buf:p.buf inline))
|
|
in
|
|
Block.Heading ({layout; level; inline; id}, meta)
|
|
| `Setext { Block_struct.level; heading_lines; underline } ->
|
|
let (leading_indent, trailing_blanks), inline =
|
|
Inline_struct.parse p heading_lines
|
|
in
|
|
let underline_indent, u, blanks = underline in
|
|
let underline_blanks = layout_clean_raw_span' p blanks in
|
|
let underline_count = u.last - u.first + 1, meta p (textloc_of_span p u) in
|
|
let layout =
|
|
{ Block.Heading.leading_indent; trailing_blanks; underline_indent;
|
|
underline_count; underline_blanks }
|
|
in
|
|
let meta =
|
|
let last_line = u.line_pos and last_byte = u.last in
|
|
let start = Meta.textloc (Inline.meta inline) in
|
|
meta p (Textloc.set_last start ~last_byte ~last_line)
|
|
in
|
|
let id = match p.heading_auto_ids with
|
|
| false -> None
|
|
| true -> Some (`Auto (Inline.id ~buf:p.buf inline))
|
|
in
|
|
Block.Heading ({ layout = `Setext layout; level; inline; id }, meta)
|
|
|
|
let block_struct_to_html_block p (b : Block_struct.html_block) =
|
|
let last = List.hd b.html in
|
|
let last_byte = last.last and last_line = last.line_pos in
|
|
let lines = List.rev_map (clean_raw_span p) b.html in
|
|
let start_loc = Meta.textloc (snd (List.hd lines)) in
|
|
let meta = meta p (Textloc.set_last start_loc ~last_byte ~last_line) in
|
|
Block.Html_block (lines, meta)
|
|
|
|
let block_struct_to_paragraph p par =
|
|
let layout, inline = Inline_struct.parse p par.Block_struct.lines in
|
|
let leading_indent, trailing_blanks = layout in
|
|
let meta = Inline.meta inline in
|
|
Block.Paragraph ({ leading_indent; inline; trailing_blanks }, meta)
|
|
|
|
let block_struct_to_thematic_break p indent span =
|
|
let layout, meta = (* not layout because of loc *) clean_raw_span p span in
|
|
Block.Thematic_break ({ indent; layout }, meta)
|
|
|
|
let block_struct_to_table p indent rows =
|
|
let rec loop p col_count last_was_sep acc = function
|
|
| (row, blanks) :: rs ->
|
|
let meta = meta p (textloc_of_span p row) in
|
|
let row' = { row with first = row.first + 1; last = row.last } in
|
|
let cols = Inline_struct.parse_table_row p row' in
|
|
let col_count = Int.max col_count (List.length cols) in
|
|
let r, last_was_sep = match Block.Table.parse_sep_row cols with
|
|
| Some seps -> ((`Sep seps), meta), true
|
|
| None ->
|
|
((if last_was_sep then `Header cols else `Data cols), meta), false
|
|
in
|
|
let acc = (r, layout_clean_raw_span' p blanks) :: acc in
|
|
if rs = [] then row, col_count, acc else
|
|
loop p col_count last_was_sep acc rs
|
|
| [] -> assert false
|
|
in
|
|
let last = fst (List.hd rows) in
|
|
let first, col_count, rows = loop p 0 false [] rows in
|
|
let meta = meta_of_spans p ~first ~last in
|
|
Block.Ext_table ({ indent; col_count; rows }, meta)
|
|
|
|
let rec block_struct_to_block_quote p indent bs =
|
|
let add_block p acc b = block_struct_to_block p b :: acc in
|
|
let last = block_struct_to_block p (List.hd bs) in
|
|
let block = List.fold_left (add_block p) [last] (List.tl bs) in
|
|
let block = match block with
|
|
| [b] -> b
|
|
| quote ->
|
|
let first = Block.meta (List.hd quote) and last = Block.meta last in
|
|
Block.Blocks (quote, meta_of_metas p ~first ~last)
|
|
in
|
|
Block.Block_quote ({indent; block}, Block.meta block)
|
|
|
|
and block_struct_to_footnote_definition p indent (label, defined_label) bs =
|
|
let add_block p acc b = block_struct_to_block p b :: acc in
|
|
let last = block_struct_to_block p (List.hd bs) in
|
|
let block = List.fold_left (add_block p) [last] (List.tl bs) in
|
|
let last = Block.meta last in
|
|
let block = match block with
|
|
| [b] -> b
|
|
| bs ->
|
|
let first = Block.meta (List.hd bs) in
|
|
Block.Blocks (bs, meta_of_metas p ~first ~last)
|
|
in
|
|
let loc =
|
|
let labelloc = Label.textloc label in
|
|
let lastloc = Meta.textloc last in
|
|
let loc = Textloc.span labelloc lastloc in
|
|
let first_byte = Textloc.first_byte loc - 1 in
|
|
Textloc.set_first loc ~first_byte ~first_line:(Textloc.first_line loc)
|
|
in
|
|
let fn = { Block.Footnote.indent; label; defined_label; block }, meta p loc in
|
|
begin match defined_label with
|
|
| None -> () | Some def -> set_label_def p def (Block.Footnote.Def fn)
|
|
end;
|
|
Block.Ext_footnote_definition fn
|
|
|
|
and block_struct_to_list_item p (i : Block_struct.list_item) =
|
|
let rec loop bstate tight acc = function
|
|
| Block_struct.Blank_line _ as bl :: bs ->
|
|
let bstate = if bstate = `Trail_blank then `Trail_blank else `Blank in
|
|
loop bstate tight (block_struct_to_block p bl :: acc) bs
|
|
| Block_struct.List
|
|
{ items = { blocks = Block_struct.Blank_line _ :: _ } :: _ } as l :: bs
|
|
->
|
|
loop bstate false (block_struct_to_block p l :: acc) bs
|
|
| b :: bs ->
|
|
let tight = tight && not (bstate = `Blank) in
|
|
loop `Non_blank tight (block_struct_to_block p b :: acc) bs
|
|
| [] -> tight, acc
|
|
in
|
|
let last_meta, (tight, blocks) = match i.blocks with
|
|
| [Block_struct.Blank_line _ as blank] ->
|
|
let bl = block_struct_to_block p blank in
|
|
Block.meta bl, (true, [bl])
|
|
| Block_struct.Blank_line _ as blank :: bs ->
|
|
let bl = block_struct_to_block p blank in
|
|
(Block.meta bl), loop `Trail_blank true [bl] bs
|
|
| b :: bs ->
|
|
let b = block_struct_to_block p b in
|
|
(Block.meta b), loop `Non_blank true [b] bs
|
|
| [] -> assert false
|
|
in
|
|
let block = match blocks with
|
|
| [i] -> i
|
|
| is ->
|
|
let first = Block.meta (List.hd is) in
|
|
Block.Blocks (is, meta_of_metas p ~first ~last:last_meta)
|
|
in
|
|
let before_marker = i.before_marker and after_marker = i.after_marker in
|
|
let marker = (* not layout to get loc *) clean_raw_span p i.marker in
|
|
let ext_task_marker = match i.ext_task_marker with
|
|
| None -> None
|
|
| Some (u, span) -> Some (u, meta p (textloc_of_span p span))
|
|
in
|
|
let meta = meta_of_metas p ~first:(snd marker) ~last:last_meta in
|
|
let i =
|
|
{ Block.List_item.before_marker; marker; after_marker; block;
|
|
ext_task_marker }
|
|
in
|
|
(i, meta), tight
|
|
|
|
and block_struct_to_list p list =
|
|
let rec loop p tight acc = function
|
|
| [] -> tight, acc
|
|
| item :: items ->
|
|
let item, item_tight = block_struct_to_list_item p item in
|
|
loop p (tight && item_tight) (item :: acc) items
|
|
in
|
|
let items = list.Block_struct.items in
|
|
let last, tight = block_struct_to_list_item p (List.hd items) in
|
|
let tight, items = loop p (not list.loose && tight) [last] (List.tl items) in
|
|
let meta = meta_of_metas p ~first:(snd (List.hd items)) ~last:(snd last) in
|
|
Block.List ({ type' = list.Block_struct.list_type; tight; items }, meta)
|
|
|
|
and block_struct_to_block p = function
|
|
| Block_struct.Block_quote (ind, bs) -> block_struct_to_block_quote p ind bs
|
|
| Block_struct.List list -> block_struct_to_list p list
|
|
| Block_struct.Paragraph par -> block_struct_to_paragraph p par
|
|
| Block_struct.Thematic_break (i, br) -> block_struct_to_thematic_break p i br
|
|
| Block_struct.Code_block cb -> block_struct_to_code_block p cb
|
|
| Block_struct.Heading h -> block_struct_to_heading p h
|
|
| Block_struct.Html_block html -> block_struct_to_html_block p html
|
|
| Block_struct.Blank_line (pad, span) -> block_struct_to_blank_line p pad span
|
|
| Block_struct.Linkref_def r -> Block.Link_reference_definition r
|
|
| Block_struct.Ext_table (i, rows) -> block_struct_to_table p i rows
|
|
| Block_struct.Ext_footnote (i, labels, bs) ->
|
|
block_struct_to_footnote_definition p i labels bs
|
|
|
|
let block_struct_to_doc p (doc, meta) =
|
|
match List.rev_map (block_struct_to_block p) doc with
|
|
| [b] -> b | bs -> Block.Blocks (bs, meta)
|
|
|
|
(* Documents *)
|
|
|
|
module Doc = struct
|
|
type t = { nl : Layout.string; block : Block.t; defs : Label.defs }
|
|
let make ?(nl = "\n") ?(defs = Label.Map.empty) block = { nl; block; defs }
|
|
let empty = make (Block.Blocks ([], Meta.none))
|
|
let nl d = d.nl
|
|
let block d = d.block
|
|
let defs d = d.defs
|
|
let of_string
|
|
?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs ?file
|
|
?(strict = true) s
|
|
=
|
|
let p =
|
|
parser ?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs
|
|
?file ~strict s
|
|
in
|
|
let nl, doc = Block_struct.parse p in
|
|
let block = block_struct_to_doc p doc in
|
|
make ~nl block ~defs:p.defs
|
|
|
|
let unicode_version = Cmarkit_data.unicode_version
|
|
let commonmark_version = "0.30"
|
|
end
|
|
|
|
(* Maps and folds *)
|
|
|
|
module Mapper = struct
|
|
type 'a filter_map = 'a option
|
|
type 'a result = [ `Default | `Map of 'a filter_map ]
|
|
let default = `Default
|
|
let delete = `Map None
|
|
let ret v = `Map (Some v)
|
|
|
|
type t =
|
|
{ inline_ext_default : Inline.t map;
|
|
block_ext_default : Block.t map;
|
|
inline : Inline.t mapper;
|
|
block : Block.t mapper }
|
|
and 'a map = t -> 'a -> 'a filter_map
|
|
and 'a mapper = t -> 'a -> 'a result
|
|
|
|
let none _ _ = `Default
|
|
let ext_inline_none _ _ = invalid_arg Inline.err_unknown
|
|
let ext_block_none _ _ = invalid_arg Block.err_unknown
|
|
let make
|
|
?(inline_ext_default = ext_inline_none)
|
|
?(block_ext_default = ext_block_none)
|
|
?(inline = none) ?(block = none) ()
|
|
=
|
|
{ inline_ext_default; block_ext_default; inline; block }
|
|
|
|
let inline_mapper m = m.inline
|
|
let block_mapper m = m.block
|
|
let inline_ext_default m = m.inline_ext_default
|
|
let block_ext_default m = m.block_ext_default
|
|
|
|
let ( let* ) = Option.bind
|
|
|
|
let rec map_inline m i = match m.inline m i with
|
|
| `Map i -> i
|
|
| `Default ->
|
|
let open Inline in
|
|
match i with
|
|
| Autolink _ | Break _ | Code_span _ | Raw_html _
|
|
| Text _ | Ext_math_span _ as i -> Some i
|
|
| Image (l, meta) ->
|
|
let text = Option.value ~default:Inline.empty (map_inline m l.text) in
|
|
Some (Image ({ l with text }, meta))
|
|
| Link (l, meta) ->
|
|
let* text = map_inline m l.text in
|
|
Some (Link ({ l with text }, meta))
|
|
| Emphasis (e, meta) ->
|
|
let* inline = map_inline m e.inline in
|
|
Some (Emphasis ({ e with inline }, meta))
|
|
| Strong_emphasis (e, meta) ->
|
|
let* inline = map_inline m e.inline in
|
|
Some (Strong_emphasis ({ e with inline}, meta))
|
|
| Inlines (is, meta) ->
|
|
(match List.filter_map (map_inline m) is with
|
|
| [] -> None | is -> Some (Inlines (is, meta)))
|
|
| Ext_strikethrough (s, meta) ->
|
|
let* inline = map_inline m s in
|
|
Some (Ext_strikethrough (inline, meta))
|
|
| ext -> m.inline_ext_default m ext
|
|
|
|
let rec map_block m b = match m.block m b with
|
|
| `Map b -> b
|
|
| `Default ->
|
|
let open Block in
|
|
match b with
|
|
| Blank_line _ | Code_block _ | Html_block _
|
|
| Link_reference_definition _ | Thematic_break _
|
|
| Ext_math_block _ as b -> Some b
|
|
| Heading (h, meta) ->
|
|
let inline = match map_inline m (Block.Heading.inline h) with
|
|
| None -> (* Can be empty *) Inline.Inlines ([], Meta.none)
|
|
| Some i -> i
|
|
in
|
|
Some (Heading ({ h with inline}, meta))
|
|
| Block_quote (b, meta) ->
|
|
let block = match map_block m b.block with
|
|
| None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b
|
|
in
|
|
Some (Block_quote ({ b with block}, meta))
|
|
| Blocks (bs, meta) ->
|
|
(match List.filter_map (map_block m) bs with
|
|
| [] -> None | bs -> Some (Blocks (bs, meta)))
|
|
| List (l, meta) ->
|
|
let map_list_item m (i, meta) =
|
|
let* block = map_block m (List_item.block i) in
|
|
Some ({ i with block }, meta)
|
|
in
|
|
(match List.filter_map (map_list_item m) l.items with
|
|
| [] -> None | items -> Some (List ({ l with items }, meta)))
|
|
| Paragraph (p, meta) ->
|
|
let* inline = map_inline m (Paragraph.inline p) in
|
|
Some (Paragraph ({ p with inline }, meta))
|
|
| Ext_table (t, meta) ->
|
|
let map_col m (i, layout) = match map_inline m i with
|
|
| None -> None | Some i -> Some (i, layout)
|
|
in
|
|
let map_row (((r, meta), blanks) as row) = match r with
|
|
| `Header is ->
|
|
(`Header (List.filter_map (map_col m) is), meta), blanks
|
|
| `Sep _ -> row
|
|
| `Data is ->
|
|
(`Data (List.filter_map (map_col m) is), meta), blanks
|
|
in
|
|
let rows = List.map map_row t.rows in
|
|
Some (Ext_table ({ t with Table.rows }, meta))
|
|
| Ext_footnote_definition (fn, meta) ->
|
|
let block = match map_block m fn.block with
|
|
| None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b
|
|
in
|
|
Some (Ext_footnote_definition ({ fn with block}, meta))
|
|
| ext -> m.block_ext_default m ext
|
|
|
|
let map_doc m d =
|
|
let map_block m b = Option.value ~default:Block.empty (map_block m b) in
|
|
(* XXX something better for defs should be devised here. *)
|
|
let map_def m = function
|
|
| Block.Footnote.Def (fn, meta) ->
|
|
let block = map_block m (Block.Footnote.block fn) in
|
|
Block.Footnote.Def ({ fn with block }, meta)
|
|
| def -> def
|
|
in
|
|
let block = map_block m (Doc.block d) in
|
|
let defs = Label.Map.map (map_def m) (Doc.defs d) in
|
|
{ d with Doc.block; defs }
|
|
end
|
|
|
|
module Folder = struct
|
|
type 'a result = [ `Default | `Fold of 'a ]
|
|
let default = `Default
|
|
let ret v = `Fold v
|
|
|
|
type ('a, 'b) fold = 'b t -> 'b -> 'a -> 'b
|
|
and ('a, 'b) folder = 'b t -> 'b -> 'a -> 'b result
|
|
and 'a t =
|
|
{ inline_ext_default : (Inline.t, 'a) fold;
|
|
block_ext_default : (Block.t, 'a) fold;
|
|
inline : (Inline.t, 'a) folder;
|
|
block : (Block.t, 'a) folder; }
|
|
|
|
let none _ _ _ = `Default
|
|
let ext_inline_none _ _ _ = invalid_arg Inline.err_unknown
|
|
let ext_block_none _ _ _ = invalid_arg Block.err_unknown
|
|
let make
|
|
?(inline_ext_default = ext_inline_none)
|
|
?(block_ext_default = ext_block_none)
|
|
?(inline = none) ?(block = none) ()
|
|
=
|
|
{ inline_ext_default; block_ext_default; inline; block }
|
|
|
|
let inline_folder f = f.inline
|
|
let block_folder f = f.block
|
|
let inline_ext_default f = f.inline_ext_default
|
|
let block_ext_default f = f.block_ext_default
|
|
|
|
let rec fold_inline f acc i = match f.inline f acc i with
|
|
| `Fold acc -> acc
|
|
| `Default ->
|
|
let open Inline in
|
|
match i with
|
|
| Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _
|
|
| Ext_math_span _ -> acc
|
|
| Image (l, _) | Link (l, _) -> fold_inline f acc l.text
|
|
| Emphasis ({ inline }, _) -> fold_inline f acc inline
|
|
| Strong_emphasis ({ inline }, _) -> fold_inline f acc inline
|
|
| Inlines (is, _) -> List.fold_left (fold_inline f) acc is
|
|
| Ext_strikethrough (inline, _) -> fold_inline f acc inline
|
|
| ext -> f.inline_ext_default f acc ext
|
|
|
|
let rec fold_block f acc b = match f.block f acc b with
|
|
| `Fold acc -> acc
|
|
| `Default ->
|
|
let open Block in
|
|
match b with
|
|
| Blank_line _ | Code_block _ | Html_block _
|
|
| Link_reference_definition _ | Thematic_break _ | Ext_math_block _ -> acc
|
|
| Heading (h, _) -> fold_inline f acc (Block.Heading.inline h)
|
|
| Block_quote (bq, _) -> fold_block f acc bq.block
|
|
| Blocks (bs, _) -> List.fold_left (fold_block f) acc bs
|
|
| List (l, _) ->
|
|
let fold_list_item m acc (i, _) =
|
|
fold_block m acc (Block.List_item.block i)
|
|
in
|
|
List.fold_left (fold_list_item f) acc l.items
|
|
| Paragraph (p, _) -> fold_inline f acc (Block.Paragraph.inline p)
|
|
| Ext_table (t, _) ->
|
|
let fold_row acc ((r, _), _) = match r with
|
|
| (`Header is | `Data is) ->
|
|
List.fold_left (fun acc (i, _) -> fold_inline f acc i) acc is
|
|
| `Sep _ -> acc
|
|
in
|
|
List.fold_left fold_row acc t.Table.rows
|
|
| Ext_footnote_definition (fn, _) -> fold_block f acc fn.block
|
|
| ext -> f.block_ext_default f acc ext
|
|
|
|
let fold_doc f acc d = fold_block f acc (Doc.block d)
|
|
end
|
|
|
|
(*---------------------------------------------------------------------------
|
|
Copyright (c) 2021 The cmarkit programmers
|
|
|
|
Permission to use, copy, modify, and/or distribute this software for any
|
|
purpose with or without fee is hereby granted, provided that the above
|
|
copyright notice and this permission notice appear in all copies.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
---------------------------------------------------------------------------*)
|