linol/thirdparty/lsp/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.ml

1360 lines
52 KiB
OCaml

(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
---------------------------------------------------------------------------*)
(* N.B. The doc strings of the .mli can help understanding these internal
functions. *)
let sub_includes ~affix s ~first ~last =
let get = String.get in
let len_a = String.length affix in
let len_s = last - first + 1 in
if len_a > len_s then false else
let max_idx_a = len_a - 1 in
let max_idx_s = first + (len_s - len_a) in
let rec loop i k =
if i > max_idx_s then false else
if k > max_idx_a then true else
if k > 0
then if get affix k = get s (i + k) then loop i (k + 1) else loop (i + 1) 0
else if get affix 0 = get s i then loop i 1 else loop (i + 1) 0
in
loop first 0
let unsafe_get = String.unsafe_get
module String_set = Set.Make (String)
(* Heterogeneous dictionaries *)
module Dict = struct
(* Type identifiers, can be deleted once we require 5.1 *)
module Type = struct
type (_, _) eq = Equal : ('a, 'a) eq
module Id = struct
type _ id = ..
module type ID = sig type t type _ id += Id : t id end
type 'a t = (module ID with type t = 'a)
let make (type a) () : a t =
(module struct type t = a type _ id += Id : t id end)
let provably_equal
(type a b) ((module A) : a t) ((module B) : b t) : (a, b) eq option
=
match A.Id with B.Id -> Some Equal | _ -> None
let uid (type a) ((module A) : a t) =
Obj.Extension_constructor.id (Obj.Extension_constructor.of_val A.Id)
end
end
module M = Map.Make (Int)
type 'a key = 'a Type.Id.t
type binding = B : 'a key * 'a -> binding
type t = binding M.t
let key = Type.Id.make
let empty = M.empty
let mem k m = M.mem (Type.Id.uid k) m
let add k v m = M.add (Type.Id.uid k) (B (k, v)) m
let tag k m = add k () m
let remove k m = M.remove (Type.Id.uid k) m
let find : type a. a key -> t -> a option =
fun k m -> match M.find_opt (Type.Id.uid k) m with
| None -> None
| Some B (k', v) ->
match Type.Id.provably_equal k k' with
| None -> assert false | Some Type.Equal -> Some v
end
(* Text locations *)
module Textloc = struct
(* File paths *)
type fpath = string
let file_none = "-"
let pp_path = Format.pp_print_string
(* Byte positions *)
type byte_pos = int (* zero-based *)
let byte_pos_none = -1
(* Lines *)
type line_num = int (* one-based *)
let line_num_none = -1
(* Line positions
We keep the byte position of the first element on the line. This
first element may not exist and be equal to the text length if
the input ends with a newline. Editors expect tools to compute
visual columns (not a very good idea). By keeping these byte
positions we can approximate columns by subtracting the line byte
position data byte location. This will only be correct on
US-ASCII data. *)
type line_pos = line_num * byte_pos
let line_pos_first = 1, 0
let line_pos_none = line_num_none, byte_pos_none
(* Text locations *)
type t =
{ file : fpath;
first_byte : byte_pos; last_byte : byte_pos;
first_line : line_pos; last_line : line_pos }
let v ~file ~first_byte ~last_byte ~first_line ~last_line =
{ file; first_byte; last_byte; first_line; last_line }
let file l = l.file
let first_byte l = l.first_byte
let last_byte l = l.last_byte
let first_line l = l.first_line
let last_line l = l.last_line
let none =
let first_byte = byte_pos_none and last_byte = byte_pos_none in
let first_line = line_pos_none and last_line = line_pos_none in
v ~file:file_none ~first_byte ~last_byte ~first_line ~last_line
(* Predicates and comparisons *)
let is_none l = l.first_byte < 0
let is_empty l = l.first_byte > l.last_byte
let equal l0 l1 =
String.equal l0.file l1.file &&
Int.equal l0.first_byte l1.first_byte &&
Int.equal l0.last_byte l1.last_byte
let compare l0 l1 =
let c = String.compare l0.file l1.file in
if c <> 0 then c else
let c = Int.compare l0.first_byte l1.first_byte in
if c <> 0 then c else
Int.compare l0.last_byte l1.last_byte
(* Shrink and stretch *)
let set_first l ~first_byte ~first_line = { l with first_byte; first_line }
let set_last l ~last_byte ~last_line = { l with last_byte; last_line }
[@@@warning "-6"]
let to_first l = v l.file l.first_byte l.first_byte l.first_line l.first_line
let to_last l = v l.file l.last_byte l.last_byte l.last_line l.last_line
let before l = v l.file l.first_byte byte_pos_none l.first_line line_pos_none
let after l =
v l.file (l.first_byte + 1) byte_pos_none l.last_line line_pos_none
[@@@warning "+6"]
let span l0 l1 =
let first_byte, first_line =
if l0.first_byte < l1.first_byte
then l0.first_byte, l0.first_line
else l1.first_byte, l1.first_line
in
let last_byte, last_line, file =
if l0.last_byte < l1.last_byte
then l1.last_byte, l1.last_line, l1.file
else l0.last_byte, l0.last_line, l0.file
in
v ~file ~first_byte ~first_line ~last_byte ~last_line
[@@@warning "-6"]
let reloc ~first ~last =
v last.file first.first_byte last.last_byte first.first_line last.last_line
[@@@warning "+6"]
(* Formatters *)
let pf = Format.fprintf
let pp_ocaml ppf l = match is_none l with
| true -> pf ppf "File \"%a\"" pp_path l.file
| false ->
let pp_lines ppf l = match fst l.first_line = fst l.last_line with
| true -> pf ppf "line %d" (fst l.first_line)
| false -> pf ppf "lines %d-%d" (fst l.first_line) (fst l.last_line)
in
(* "characters" represent positions (insertion points) not columns *)
let pos_s = l.first_byte - snd l.first_line in
let pos_e = l.last_byte - snd l.last_line + 1 in
if pos_s = 0 && pos_e = 0
then pf ppf "File \"%a\", %a" pp_path l.file pp_lines l
else pf ppf "File \"%a\", %a, characters %d-%d"
pp_path l.file pp_lines l pos_s pos_e
let pp_gnu ppf l = match is_none l with
| true -> pf ppf "%a:" pp_path l.file
| false ->
let pp_lines ppf l =
let col_s = l.first_byte - snd l.first_line + 1 in
let col_e = l.last_byte - snd l.last_line + 1 in
match fst l.first_line = fst l.last_line with
| true -> pf ppf "%d.%d-%d" (fst l.first_line) col_s col_e
| false ->
pf ppf "%d.%d-%d.%d"
(fst l.first_line) col_s (fst l.last_line) col_e
in
pf ppf "%a:%a" pp_path l.file pp_lines l
let pp = pp_gnu
let pp_dump ppf l =
pf ppf "file:%s bytes:%d-%d lines:%d-%d lines-bytes:%d-%d]"
l.file l.first_byte l.last_byte (fst l.first_line) (fst l.last_line)
(snd l.first_line) (snd l.last_line)
end
type line_span =
{ line_pos : Textloc.line_pos;
first : Textloc.byte_pos;
last : Textloc.byte_pos }
type line_start = Textloc.byte_pos
type rev_spans = (line_start * line_span) list
type 'a next_line = 'a -> ('a * line_span) option
(* Node meta data *)
module Meta = struct
type id = int
type t = { textloc : Textloc.t; id : id; dict : Dict.t }
let new_id = let id = Atomic.make 0 in fun () -> Atomic.fetch_and_add id 1
let make ?(textloc = Textloc.none) () =
{ textloc; id = new_id (); dict = Dict.empty }
let none = make ()
let id m = m.id
let textloc m = m.textloc
let with_textloc ~keep_id m textloc = match keep_id with
| true -> { m with textloc }
| false -> { m with textloc; id = new_id () }
let equal m0 m1 = Int.equal m0.id m1.id
let compare m0 m1 = Int.compare m0.id m1.id
let is_none m = equal none m
type 'a key = 'a Dict.key
let key = Dict.key
let mem k m = Dict.mem k m.dict
let add k v m = { m with dict = Dict.add k v m.dict }
let tag k m = add k () m
let remove k m = { m with dict = Dict.remove k m.dict }
let find k m = Dict.find k m.dict
end
(* US-ASCII processing *)
module Ascii = struct
let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false
let is_letter = function 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false
let is_upper = function 'A' .. 'Z' -> true | _ -> false
let is_lower = function 'a' .. 'z' -> true | _ -> false
let is_digit = function '0' .. '9' -> true | _ -> false
let is_hex_digit = function
| '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' -> true | _ -> false
let hex_digit_to_int = function
| '0' .. '9' as c -> Char.code c - 0x30
| 'A' .. 'F' as c -> Char.code c - 0x37
| 'a' .. 'f' as c -> Char.code c - 0x57
| _ -> assert false
let is_alphanum = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true | _ -> false
let is_white = function
| '\x20' | '\x09' | '\x0A' | '\x0B' | '\x0C' | '\x0D' -> true | _ -> false
let is_punct = function
(* https://spec.commonmark.org/current/#ascii-punctuation-character *)
| '!' | '\"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+'
| ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@'
| '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' -> true
| _ -> false
let is_blank = function ' ' | '\t' -> true | _ -> false
let caseless_starts_with ~prefix s =
let get = String.get in
let len_a = String.length prefix in
let len_s = String.length s in
if len_a > len_s then false else
let max_idx_a = len_a - 1 in
let rec loop s i max =
if i > max then true else
let c = match get s i with
| 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c
in
if get prefix i <> c then false else loop s (i + 1) max
in
loop s 0 max_idx_a
let match' ~sub s ~start =
(* assert (start + String.length sub - 1 < String.length s) *)
try
for i = 0 to String.length sub - 1
do if s.[start + i] <> sub.[i] then raise_notrace Exit done; true
with
| Exit -> false
let caseless_match ~sub s ~start =
(* assert (start + String.length sub - 1 < String.length s) *)
try
for i = 0 to String.length sub - 1 do
let c = match s.[start + i] with
| 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c
in
if c <> sub.[i] then raise_notrace Exit
done;
true
with
| Exit -> false
let lowercase_sub s first len =
let b = Bytes.create len in
for i = 0 to len - 1 do
let c = match s.[first + i] with
| 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c
in
Bytes.set b i c
done;
Bytes.unsafe_to_string b
end
module Text = struct
let _utf_8_clean_unesc_unref ~do_unesc buf s ~first ~last =
(* This unescapes CommonMark escapes if [do_unesc] is true,
resolves entity and character references and replaces U+0000 or
UTF-8 decoding errors by U+FFFD *)
let get = String.get in
let flush buf s last start k =
if start <= last then Buffer.add_substring buf s start (k - start)
in
let rec try_entity_hex ~do_unesc buf s last start num_start k u =
(* https://spec.commonmark.org/current/\
#hexadecimal-numeric-character-references *)
if k > last || k > num_start + 6
then resolve ~do_unesc buf s last start k else
match get s k with
| ';' ->
let next = k + 1 in
if k = num_start then resolve ~do_unesc buf s last start next else
let u =
if Uchar.is_valid u && u <> 0 then Uchar.unsafe_of_int u else
Uchar.rep
in
flush buf s last start (num_start - 3 (* don't include &#(x|X) *));
Buffer.add_utf_8_uchar buf u;
resolve ~do_unesc buf s last next next
| c when Ascii.is_hex_digit c ->
let u = u * 16 + (Ascii.hex_digit_to_int c) in
try_entity_hex ~do_unesc buf s last start num_start (k + 1) u
| _ ->
resolve ~do_unesc buf s last start k
and try_entity_dec ~do_unesc buf s last start num_start k u =
if k > last || k > num_start + 7
then resolve ~do_unesc buf s last start k else
match get s k with
| ';' ->
let next = k + 1 in
if k = num_start then resolve ~do_unesc buf s last start next else
let u =
if Uchar.is_valid u && u <> 0 then Uchar.unsafe_of_int u else
Uchar.rep
in
flush buf s last start (num_start - 2 (* don't include &# *));
Buffer.add_utf_8_uchar buf u;
resolve ~do_unesc buf s last next next
| c when Ascii.is_digit c ->
let u = u * 10 + (Char.code c - 0x30) in
try_entity_dec ~do_unesc buf s last start num_start (k + 1) u
| _ ->
resolve ~do_unesc buf s last start k
and try_entity_named ~do_unesc buf s last start name_start k =
(* https://spec.commonmark.org/current/\
#entity-and-numeric-character-references *)
if k > last then resolve ~do_unesc buf s last start k else
match get s k with
| ';' ->
let name = String.sub s name_start (k - name_start) in
begin match Cmarkit_data.html_entity name with
| None -> resolve ~do_unesc buf s last start (k + 1)
| Some rep ->
let next = k + 1 in
flush buf s last start (name_start - 1 (* don't include & *)) ;
Buffer.add_string buf rep;
resolve ~do_unesc buf s last next next
end
| c when Ascii.is_alphanum c ->
try_entity_named ~do_unesc buf s last start name_start (k + 1)
| _ ->
resolve ~do_unesc buf s last start k
and resolve ~do_unesc buf s last start k =
if k > last then (flush buf s last start k; Buffer.contents buf) else
let next = k + 1 in
match get s k with
| '\x00' ->
flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep;
resolve ~do_unesc buf s last next next
| '\\' when do_unesc ->
if next > last then resolve ~do_unesc buf s last start next else
let nc = get s next in
if not (Ascii.is_punct nc)
then resolve ~do_unesc buf s last start next else
let next' = next + 1 in
(flush buf s last start k; Buffer.add_char buf nc;
resolve ~do_unesc buf s last next' next')
| '&' ->
if k + 2 > last then resolve ~do_unesc buf s last start next else
begin match get s next with
| c when Ascii.is_letter c ->
try_entity_named ~do_unesc buf s last start next next
| '#' ->
let next = next + 1 in
begin match get s next with
| c when Ascii.is_digit c ->
try_entity_dec ~do_unesc buf s last start next next 0
| 'x' | 'X' ->
let next = next + 1 in
try_entity_hex ~do_unesc buf s last start next next 0
| _ -> resolve ~do_unesc buf s last start next
end
| _ -> resolve ~do_unesc buf s last start next
end
| '\x01' .. '\x7F' -> resolve ~do_unesc buf s last start next
| b ->
let d = String.get_utf_8_uchar s k in
let next = k + Uchar.utf_decode_length d in
match Uchar.utf_decode_is_valid d with
| true -> resolve ~do_unesc buf s last start next
| false ->
flush buf s last start k;
Buffer.add_utf_8_uchar buf Uchar.rep;
resolve ~do_unesc buf s last next next
in
let rec check ~do_unesc buf s last start k =
if k > last then String.sub s first (last - start + 1) else
match unsafe_get s k with
| '\\' when do_unesc ->
Buffer.reset buf; resolve ~do_unesc buf s last start k
| '&' | '\x00' ->
Buffer.reset buf; resolve ~do_unesc buf s last start k
| '\x01' .. '\x7F' ->
check ~do_unesc buf s last start (k + 1)
| _ ->
let d = String.get_utf_8_uchar s k in
if Uchar.utf_decode_is_valid d
then check ~do_unesc buf s last start (k + Uchar.utf_decode_length d)
else (Buffer.reset buf; resolve ~do_unesc buf s last start k)
in
if first > last then "" else
let max = String.length s - 1 in
let last = if last > max then max else last in
let first = if first < 0 then 0 else first in
check ~do_unesc buf s last first first
let utf_8_clean_unesc_unref buf s ~first ~last =
_utf_8_clean_unesc_unref ~do_unesc:true buf s ~first ~last
let utf_8_clean_unref buf s ~first ~last =
_utf_8_clean_unesc_unref ~do_unesc:false buf s ~first ~last
let utf_8_clean_raw ?(pad = 0) buf s ~first ~last =
let get = String.get in
let padit buf pad = for i = 1 to pad do Buffer.add_char buf ' ' done in
let clean buf s last first dirty =
let flush buf s last start k =
if start <= last then Buffer.add_substring buf s start (k - start);
in
let rec loop buf s last start k =
if k > last then (flush buf s last start k; Buffer.contents buf) else
match get s k with
| '\x01' .. '\x7F' (* US-ASCII *) -> loop buf s last start (k + 1)
| '\x00' ->
let next = k + 1 in
flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep;
loop buf s last next next
| _ ->
let d = String.get_utf_8_uchar s k in
let next = k + Uchar.utf_decode_length d in
match Uchar.utf_decode_is_valid d with
| true -> loop buf s last start next
| false ->
flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep;
loop buf s last next next
in
flush buf s last first dirty; loop buf s last dirty dirty
in
let rec check buf s last first k =
if k > last then String.sub s first (last - first + 1) else
match get s k with
| '\x01' .. '\x7F' (* US-ASCII *) -> check buf s last first (k + 1)
| '\x00' -> (Buffer.reset buf; clean buf s last first k)
| _ ->
let d = String.get_utf_8_uchar s k in
if Uchar.utf_decode_is_valid d
then check buf s last first (k + Uchar.utf_decode_length d)
else (Buffer.reset buf; clean buf s last first k)
in
if first > last then
if pad = 0 then "" else
(Buffer.reset buf; padit buf pad; Buffer.contents buf)
else
let max = String.length s - 1 in
let last = if last > max then max else last in
let first = if first < 0 then 0 else first in
if pad = 0 then check buf s last first first else
(Buffer.reset buf; padit buf pad; clean buf s last first first)
end
(* Unicode matching *)
let prev_uchar s ~first ~before =
let rec find_utf_8_starter s ~first ~start =
if start < first then first else match s.[start] with
| '\x00' .. '\x7F' | '\xC2' .. '\xDF'
| '\xE0' .. '\xEF' | '\xF0' .. '\xF4' -> start
| _ -> find_utf_8_starter s ~first ~start:(start - 1)
in
if before <= first then Uchar.of_int 0x0020 (* something white *) else
let k = find_utf_8_starter s ~first ~start:(before - 1) in
Uchar.utf_decode_uchar (String.get_utf_8_uchar s k)
let next_uchar s ~last ~after =
if after >= last then Uchar.of_int 0x0020 (* something white *) else
Uchar.utf_decode_uchar (String.get_utf_8_uchar s (after + 1))
(* Result types *)
type indent = int
type byte_pos = Textloc.byte_pos
type first = Textloc.byte_pos
type last = Textloc.byte_pos
type next = Textloc.byte_pos
type heading_level = int
(* Runs, blanks and indents *)
let rec run_of ~char s ~last ~start =
if start > last || s.[start] <> char then start - 1 else
run_of ~char s ~last ~start:(start + 1)
let rec first_non_blank s ~last ~start =
if start > last then last + 1 else match s.[start] with
| ' ' | '\t' -> first_non_blank s ~last ~start:(start + 1)
| _ -> start
let first_non_blank_in_span s sp =
first_non_blank s ~last:sp.last ~start:sp.first
let rec last_non_blank s ~first ~start =
if start < first then first - 1 else match s.[start] with
| ' ' | '\t' -> last_non_blank s ~first ~start:(start - 1)
| _ -> start
let rec rev_drop_spaces s ~first ~start =
if start < first then first - 1 else
if s.[start] = ' ' then rev_drop_spaces s ~first ~start:(start - 1) else start
let push_span ~line first' last' = function
| (line_start, { line_pos; first; last }) :: acc
when (fst line_pos) = (fst line.line_pos) -> (* Merge if on same line *)
(line_start, { line with first; last = last' }) :: acc
| acc ->
(line.first, { line with first = first'; last = last' }) :: acc
let accept_to ~char ~next_line s lines ~line spans ~after =
(* Includes final [char] in spans *)
let rec loop ~char ~next_line s lines line start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
loop ~char ~next_line s lines newline start acc start
else
if s.[k] = char
then Some (lines, line, push_span ~line start k acc, k)
else loop ~char ~next_line s lines line start acc (k + 1)
in
loop ~char ~next_line s lines line after spans (after + 1)
let accept_upto ~char ~next_line s lines ~line acc ~after =
(* Does not not include final [char] in spans and continues on
backslashed [char]. *)
let rec loop ~char ~next_line s lines line ~prev_bslash start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
if newline.first > newline.last (* empty *) then None else
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
let prev_bslash = false in
loop ~char ~next_line s lines newline ~prev_bslash start acc start
else
if s.[k] = char && not prev_bslash
then Some (lines, line, push_span ~line start (k - 1) acc, k) else
let prev_bslash = s.[k] = '\\' && not prev_bslash (* \\ is not *) in
loop ~char ~next_line s lines line ~prev_bslash start acc (k + 1)
in
let start = after + 1 in
loop ~char ~next_line s lines line ~prev_bslash:false start acc start
let first_non_blank_over_nl ~next_line s lines ~line ~start =
let nb = first_non_blank s ~last:line.last ~start in
if nb <= line.last then `This_line nb else
match next_line lines with
| None -> `None
| Some (lines, newline) ->
let nb = first_non_blank_in_span s newline in
if nb > newline.last then `None else `Next_line (lines, newline, nb)
let first_non_blank_over_nl' ~next_line s lines ~line spans ~start =
(* Same as [first_non_blank_over_nl] but pushes skipped data on [spans]. *)
match first_non_blank_over_nl ~next_line s lines ~line ~start with
| `None -> None
| `This_line nb ->
let line = { line with first = start } (* no layout *) in
let spans = push_span ~line start (nb - 1) spans in
Some (lines, line, spans, nb - 1)
| `Next_line (lines, newline, nb) ->
let line = { line with first = start } (* no layout *) in
let spans = push_span ~line start line.last spans in
Some (lines, newline, spans, nb - 1)
let first_non_escaped_char c s ~last ~start =
let rec loop c s ~last ~start k =
if k > last then k else
if s.[k] = c && (k = start || s.[k - 1] <> '\\') then k else
loop c s ~last ~start (k + 1)
in
loop c s ~last ~start start
(* Autolinks *)
let autolink_email s ~last ~start =
(* https://spec.commonmark.org/current/#email-address
Via the ABNF "<" email ">" with email defined by:
https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address *)
let is_atext_plus_dot = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9'
| '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '/' | '=' | '?'
| '^' | '_' | '`' | '{' | '|' | '}' | '~' | '.' -> true
| _ -> false
in
let is_let_dig = Ascii.is_alphanum in
let is_let_dig_hyp c = Ascii.is_alphanum c || c = '-' in
let rec label_seq s last k =
let rec loop s last c k =
if k > last then None else
if is_let_dig_hyp s.[k] && c <= 63 then loop s last (c + 1) (k + 1) else
if c > 63 || not (is_let_dig s.[k - 1]) then None else
match s.[k] with
| '>' -> Some k
| '.' -> label_seq s last (k + 1)
| c -> None
in
if k > last || not (is_let_dig s.[k]) then None else
loop s last 1 (k + 1)
in
let rec atext_seq s last k =
if k > last then None else
if is_atext_plus_dot s.[k] then atext_seq s last (k + 1) else
if s.[k] = '@' && is_atext_plus_dot s.[k - 1]
then label_seq s last (k + 1)
else None
in
if start > last || s.[start] <> '<' then None else
atext_seq s last (start + 1)
let autolink_uri s ~last ~start =
(* https://spec.commonmark.org/current/#uri-autolink *)
let is_scheme_letter = function
| 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '.' | '-' -> true | _ -> false
in
let is_uri_char = function
|'\x00' .. '\x1F' | '\x7F' | ' ' | '<' | '>' -> false | _ -> true
in
let rec rest s last k =
if k > last then None else
if is_uri_char s.[k] then rest s last (k + 1) else
if s.[k] = '>' then Some k else None
in
let rec scheme s last c k =
if k > last then None else
if is_scheme_letter s.[k] && c <= 32 then scheme s last (c + 1) (k + 1) else
if s.[k] = ':' && 2 <= c && c <= 32 then rest s last (k + 1) else None
in
let next = start + 1 in
if next > last || s.[start] <> '<' || not (Ascii.is_letter s.[next])
then None else scheme s last 1 (next + 1)
(* Raw HTML *)
let tag_name s ~last ~start : last option =
(* https://spec.commonmark.org/current/#tag-name *)
let rec loop s last k =
if k > last || not (Ascii.is_alphanum s.[k] || s.[k] = '-')
then Some (k - 1) else loop s last (k + 1)
in
if start > last || not (Ascii.is_letter s.[start]) then None else
loop s last (start + 1)
let attribute_name s ~last ~start : next option =
(* https://spec.commonmark.org/current/#attribute-name *)
let is_start = function
| c when Ascii.is_letter c -> true | '_' | ':' -> true | _ -> false
in
let is_cont = function
| c when Ascii.is_alphanum c -> true | '_' | '.' | ':' | '-' -> true
| _ -> false
in
let rec loop s last k =
if k > last || not (is_cont s.[k])
then Some (k - 1) else loop s last (k + 1)
in
if start > last || not (is_start s.[start]) then None else
loop s last (start + 1)
let attribute_value ~next_line s lines ~line spans ~start =
(* https://spec.commonmark.org/current/#attribute-value *)
if start > line.last then None else match s.[start] with
| '\'' | '\"' as char ->
(* https://spec.commonmark.org/current/#double-quoted-attribute-value
https://spec.commonmark.org/current/#unquoted-attribute-value *)
accept_to ~char ~next_line s lines ~line spans ~after:start
| c ->
(* https://spec.commonmark.org/current/#unquoted-attribute-value *)
let cont = function
| ' ' | '\t' | '\"' | '\'' | '=' | '<' | '>' | '`' -> false | _ -> true
in
let rec loop s last k =
if k > last || not (cont s.[k]) then
let last = k - 1 in
Some (lines, line, push_span ~line start last spans, last)
else loop s last (k + 1)
in
loop s line.last (start + 1)
let attribute ~next_line s lines ~line spans ~start =
(* https://spec.commonmark.org/current/#attribute *)
(* https://spec.commonmark.org/current/#attribute-value-specification *)
match attribute_name s ~last:line.last ~start with
| None -> None
| Some end_name ->
let spans = push_span ~line start end_name spans in
let start = end_name + 1 in
match first_non_blank_over_nl' ~next_line s lines ~line spans ~start with
| None -> None
| Some (lines', line', spans', last_blank) ->
let nb = last_blank + 1 in
if s.[nb] <> '='
then Some (lines, line, spans, end_name) (* no value *) else
let spans' = push_span ~line nb nb spans' in
let start = nb + 1 in
match
first_non_blank_over_nl'
~next_line s lines' ~line:line' spans' ~start
with
| None -> None
| Some (lines, line, spans, last_blank) ->
let start = last_blank + 1 in
attribute_value ~next_line s lines ~line spans ~start
let open_tag ~next_line s lines ~line ~start:tag_start = (* tag_start has < *)
(* https://spec.commonmark.org/current/#open-tag *)
match tag_name s ~last:line.last ~start:(tag_start + 1) with
| None -> None
| Some tag_end_name ->
let rec loop ~next_line s lines ~line spans ~start =
match
first_non_blank_over_nl' ~next_line s lines ~line spans ~start
with
| None -> None
| Some (lines, line, spans, last_blank) ->
let next = last_blank + 1 in
match s.[next] with
| '>' ->
Some (lines, line, push_span ~line next next spans, next)
| '/' ->
let last = next + 1 in
if last > line.last || s.[last] <> '>' then None else
Some (lines, line, push_span ~line next last spans, last)
| c ->
if next = start then None else
match attribute ~next_line s lines ~line spans ~start:next with
| None -> None
| Some (lines, line, spans, last) ->
loop ~next_line s lines ~line spans ~start:(last + 1)
in
let start = tag_end_name + 1 in
let span = { line with first = tag_start; last = tag_end_name} in
let spans = [tag_start, span] in
loop ~next_line s lines ~line spans ~start
let closing_tag ~next_line s ls ~line ~start:tag_start = (* start is on </ *)
(* https://spec.commonmark.org/current/#closing-tag *)
match tag_name s ~last:line.last ~start:(tag_start + 2) with
| None -> None
| Some tag_name_end ->
let span = { line with first = tag_start; last = tag_name_end} in
let spans = [tag_start, span] in
let start = tag_name_end + 1 in
match first_non_blank_over_nl' ~next_line s ls ~line spans ~start with
| None -> None
| Some (lines, line, spans, last_blank) ->
let last = last_blank + 1 in
if s.[last] <> '>' then None else
Some (lines, line, push_span ~line last last spans, last)
let declaration ~next_line s lines ~line ~start = (* start is on <!{letter} *)
(* https://spec.commonmark.org/current/#declaration *)
accept_to ~char:'>' ~next_line s lines ~line [] ~after:start
let processing_instruction ~next_line s lines ~line ~start = (* start is on <?*)
(* https://spec.commonmark.org/current/#processing-instruction *)
let rec loop ~next_line s lines line start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
loop ~next_line s lines newline start acc start
else
if s.[k] <> '?' then loop ~next_line s lines line start acc (k + 1) else
let last = k + 1 in
if last <= line.last && s.[last] = '>' (* ?> *)
then Some (lines, line, push_span ~line start last acc, last)
else loop ~next_line s lines line start acc last
in
loop ~next_line s lines line start [] (start + 2)
let html_comment ~next_line s lines ~line ~start = (* start is on <!- *)
(* https://spec.commonmark.org/current/#html-comment *)
let rec loop ~next_line s lines line start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
loop ~next_line s lines newline start acc start
else
if s.[k] = '-' && s.[k - 1] <> '-' then
let last = k + 2 in
if last <= line.last && s.[k + 1] = '-' then
if s.[last] = '>' (* --> and we do not end with - *)
then Some (lines, line, push_span ~line start last acc, last)
else None (* -- in the input *)
else loop ~next_line s lines line start acc (k + 1)
else loop ~next_line s lines line start acc (k + 1)
in
(* Check we have at least <!-- and not <!--> or <!---> *)
if (start + 3 > line.last) || not (s.[start + 3] = '-') ||
(start + 4 <= line.last && s.[start + 4] = '>') ||
(start + 5 <= line.last && s.[start + 4] = '-' && s.[start + 5] = '>')
then None else loop ~next_line s lines line start [] (start + 4)
let cdata_section ~next_line s lines ~line ~start = (* start is on <![ *)
(* https://spec.commonmark.org/current/#cdata-section *)
let rec loop ~next_line s lines line start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
loop ~next_line s lines newline start acc start
else
if s.[k] <> ']' then loop ~next_line s lines line start acc (k + 1) else
let last = k + 2 in
if last <= line.last && s.[k + 1] = ']' && s.[last] = '>' (* ]> *)
then Some (lines, line, push_span ~line start last acc, last)
else loop ~next_line s lines line start acc (k + 1)
in
if start + 8 > line.last || (* not CDATA[ *)
not (s.[start + 3] = 'C' && s.[start + 4] = 'D' && s.[start + 5] = 'A' &&
s.[start + 6] = 'T' && s.[start + 7] = 'A' && s.[start + 8] = '[')
then None else loop ~next_line s lines line start [] (start + 9)
let raw_html ~next_line s lines ~line ~start =
(* https://spec.commonmark.org/current/#html-tag *)
let next = start + 1 and last = line.last in
if next > last || s.[start] <> '<' then None else match s.[next] with
| '/' -> closing_tag ~next_line s lines ~line ~start
| '?' -> processing_instruction ~next_line s lines ~line ~start
| '!' ->
let next = next + 1 in
if next > last then None else
begin match s.[next] with
| '-' -> html_comment ~next_line s lines ~line ~start
| '[' -> cdata_section ~next_line s lines ~line ~start
| c when Ascii.is_letter c -> declaration ~next_line s lines ~line ~start
| _ -> None
end
| c -> open_tag ~next_line s lines ~line ~start
(* Links *)
let link_destination s ~last ~start =
let delimited s ~last ~start = (* start has '<' *)
(* https://spec.commonmark.org/current/#link-destination 1st *)
let rec loop s start last prev_byte k =
if k > last then None else match s.[k] with
| '\n' | '\r' -> None
| '\\' when prev_byte = '\\' -> loop s start last '\x00' (k + 1)
| '<' when prev_byte <> '\\' -> None
| '>' when prev_byte <> '\\' -> Some (true, (start + 1), k - 1)
| c -> loop s start last c (k + 1)
in
loop s start last '\x00' (start + 1)
in
let not_delimited s ~last ~start =
(* https://spec.commonmark.org/current/#link-destination 2nd *)
let rec loop s start last prev_byte bal k =
if k > last then (if bal = 0 then Some (false, start, k - 1) else None)
else match s.[k] with
| '\\' when prev_byte = '\\' -> loop s start last '\x00' bal (k + 1)
| '(' as c when prev_byte <> '\\' -> loop s start last c (bal + 1) (k + 1)
| ')' as c when prev_byte <> '\\' ->
let bal = bal - 1 in
if bal < 0
then Some (false, start, k - 1) (* hit inline link closing ')' *)
else loop s start last c bal (k + 1)
| ' ' -> if k <> start && bal = 0 then Some (false, start, k-1) else None
| c when Ascii.is_control c ->
if k <> start && bal = 0 then Some (false, start, k - 1) else None
| c -> loop s start last c bal (k + 1)
in
loop s start last '\x00' 0 start
in
if start > last then None else
if s.[start] = '<'
then delimited s ~last ~start
else not_delimited s ~last ~start
let link_title ~next_line s lines ~line ~start =
(* https://spec.commonmark.org/current/#link-title *)
let rec paren ~next_line s lines ~line ~prev_bslash start acc k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
if newline.first > newline.last (* empty *) then None else
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
let prev_bslash = false in
paren ~next_line s lines ~line:newline ~prev_bslash start acc start
else
if s.[k] = '(' && not prev_bslash then None else
if s.[k] = ')' && not prev_bslash
then Some (lines, line, push_span ~line start (k - 1) acc, k) else
let prev_bslash = s.[k] = '\\' && not prev_bslash in
paren ~next_line s lines ~line ~prev_bslash start acc (k + 1)
in
if start > line.last then None else match s.[start] with
| '\"' | '\'' as char ->
accept_upto ~char ~next_line s lines ~line [] ~after:start
| '(' ->
let start = start + 1 and prev_bslash = false in
paren ~next_line s lines ~line ~prev_bslash start [] start
| _ -> None
let link_label b ~next_line s lines ~line ~start =
(* https://spec.commonmark.org/current/#link-label *)
let rec loop b ~next_line s lines ~line ~prev_byte start acc count k =
if k > line.last then match next_line lines with
| None -> None
| Some (lines, newline) ->
if newline.first > newline.last (* empty *) then None else
let acc = push_span ~line start line.last acc in
let start = first_non_blank_in_span s newline in
let () = if Buffer.length b <> 0 then Buffer.add_char b ' ' in
let prev_byte = '\x00' in
loop b ~next_line s lines ~line:newline ~prev_byte start acc count start
else
if count > 999 then None else match s.[k] with
| '\\' when prev_byte = '\\' ->
Buffer.add_char b '\\';
let prev_byte = '\x00' in
loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) (k + 1)
| ']' when prev_byte <> '\\' ->
let key = Buffer.contents b in
if String.for_all Ascii.is_blank key then None else
let acc = push_span ~line start (k - 1) acc in
Some (lines, line, acc, k, key)
| '[' when prev_byte <> '\\' -> None
| ' ' | '\t' as prev_byte ->
loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) (k + 1)
| c ->
let () =
(* Collapses non initial white *)
if Ascii.is_blank prev_byte && Buffer.length b <> 0
then Buffer.add_char b ' '
in
let d = String.get_utf_8_uchar s k in
let u = Uchar.utf_decode_uchar d in
let u = match Uchar.to_int u with 0x0000 -> Uchar.rep | _ -> u in
let k' = k + Uchar.utf_decode_length d in
let () = match Cmarkit_data.unicode_case_fold u with
| None -> Buffer.add_utf_8_uchar b u
| Some fold -> Buffer.add_string b fold
in
let prev_byte = s.[k] in
loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) k'
in
if start > line.last || s.[start] <> '[' then None else
let start = start + 1 in
(Buffer.reset b;
loop b ~next_line s lines ~line ~prev_byte:'\x00' start [] 0 start)
(* Leaf blocks
The matching functions assume the indentation has been stripped. *)
type html_block_end_cond =
[ `End_str of string | `End_cond_1 | `End_blank | `End_blank_7 ]
type line_type =
| Atx_heading_line of heading_level * byte_pos * first * last
| Blank_line
| Block_quote_line
| Fenced_code_block_line of first * last * (first * last) option
| Html_block_line of html_block_end_cond
| Indented_code_block_line
| List_marker_line of ([ `Ordered of int * char | `Unordered of char ] * last)
| Paragraph_line
| Setext_underline_line of heading_level * last
| Thematic_break_line of last
| Ext_table_row of last
| Ext_footnote_label of rev_spans * last * string
| Nomatch
let thematic_break s ~last ~start =
(* https://spec.commonmark.org/current/#thematic-breaks *)
let rec loop s last count prev k =
if k > last
then (if count < 3 then Nomatch else Thematic_break_line prev) else
if s.[k] = s.[prev] then loop s last (count + 1) k (k + 1) else
if s.[k] = ' ' || s.[k] = '\t' then loop s last count prev (k + 1) else
Nomatch
in
if start > last then Nomatch else match s.[start] with
| '-' | '_' | '*' -> loop s last 1 start (start + 1)
| _ -> Nomatch
let atx_heading s ~last ~start =
(* https://spec.commonmark.org/current/#atx-headings *)
let rec skip_hashes s last k =
if k > last then k else
if s.[k] = '#' then skip_hashes s last (k + 1) else k
in
let find_end s last k = (* blank on k, last + 1 if blank* [#+] blank* *)
let after_blank = first_non_blank s ~last ~start:(k + 1) in
if after_blank > last then after_blank else
if s.[after_blank] <> '#' then after_blank else
let after_hash = skip_hashes s last (after_blank + 1) in
let after_blank = first_non_blank s ~last ~start:after_hash in
if after_blank > last || after_blank = after_hash then after_blank else
after_blank - 1 (* this could be the beginning of the end, trigger again *)
in
let rec content s last k =
if k > last then k - 1 else
if not (s.[k] = ' ' || s.[k] = '\t') then content s last (k + 1) else
let end' = find_end s last k in
if end' > last then (k - 1) else content s last end'
in
let rec level s last acc k =
if k > last then Atx_heading_line (acc, k, k, last) else
if s.[k] = '#' then
if acc < 6 then level s last (acc + 1) (k + 1) else Nomatch
else
let first = first_non_blank s ~last ~start:k in
if first > last
then Atx_heading_line (acc, k, last + 1, last) (* empty cases *) else
if first = k then Nomatch (* need a blank *) else
let last =
if s.[first] <> '#' then content s last (first + 1) else
let end' = find_end s last (first - 1 (* start on blank *)) in
if end' > last then first - 1 else content s last end'
in
Atx_heading_line (acc, k, first, last)
in
if start > last || s.[start] <> '#' then Nomatch else
level s last 1 (start + 1)
let setext_heading_underline s ~last ~start =
(* https://spec.commonmark.org/current/#setext-heading *)
let level c = if c = '=' then 1 else 2 in
let rec underline s last start k =
if k > last then Setext_underline_line (level s.[start], k - 1) else
if s.[k] = s.[start] then underline s last start (k + 1) else
if not (s.[k] = ' ' || s.[k] = '\t') then Nomatch else
let end_blank = first_non_blank s ~last ~start:(k + 1) in
if end_blank > last
then Setext_underline_line (level s.[start], k - 1)
else Nomatch
in
if start > last then Nomatch else
if not (s.[start] = '-' || s.[start] = '=') then Nomatch else
underline s last start (start + 1)
let fenced_code_block_start s ~last ~start =
(* https://spec.commonmark.org/current/#code-fence *)
let rec info s last nobt info_first k =
if k > last then Some (info_first, last) else
if nobt && s.[k] = '`' then raise_notrace Exit else
if not (s.[k] = ' ' || s.[k] = '\t')
then info s last nobt info_first (k + 1) else
let after_blank = first_non_blank s ~last ~start:k in
if after_blank > last then Some (info_first, k - 1) else
info s last nobt info_first after_blank
in
let rec fence s last fence_first k =
if k <= last && s.[k] = s.[fence_first]
then fence s last fence_first (k + 1) else
let fence_last = k - 1 in
let fcount = fence_last - fence_first + 1 in
if fcount < 3 then Nomatch else
let info =
let after_blank = first_non_blank s ~last ~start:k in
if after_blank > last then None else
info s last (s.[fence_first] = '`') after_blank after_blank
in
Fenced_code_block_line (fence_first, fence_last, info)
in
let rec loop s first last k =
if k > last then Nomatch else
if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else
if not (s.[k] = '~' || s.[k] = '`') then Nomatch else
try fence s last k (k + 1) with
| Exit (* backtick fence and info *) -> Nomatch
in
if start > last then Nomatch else loop s start last start
let fenced_code_block_continue ~fence:(fc, fcount) s ~last ~start =
(* https://spec.commonmark.org/current/#code-fence *)
let rec fence s last fence_first k =
if k <= last && s.[k] = fc then fence s last fence_first (k + 1) else
let fence_last = k - 1 in
if fence_last - fence_first + 1 < fcount then raise Exit (* not closing *)
else
let after_blank = first_non_blank s ~last ~start:k in
if after_blank > last then `Close (fence_first, fence_last) else
raise Exit
in
let rec loop s first last k =
if k > last then `Code (* short blank line *) else
if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else
if s.[k] <> fc then `Code else
try fence s last k (k + 1) with Exit -> `Code
in
if start > last then `Code else loop s start last start
let html_start_cond_1_set =
String_set.of_list ["pre"; "script"; "style"; "textarea"]
let html_start_cond_6_set =
String_set.of_list
[ "address"; "article"; "aside"; "base"; "basefont"; "blockquote"; "body";
"caption"; "center"; "col"; "colgroup"; "dd"; "details"; "dialog"; "dir";
"div"; "dl"; "dt"; "fieldset"; "figcaption"; "figure"; "footer"; "form";
"frame"; "frameset"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "head"; "header";
"hr"; "html"; "iframe"; "legend"; "li"; "link"; "main"; "menu";
"menuitem"; "nav"; "noframes"; "ol"; "optgroup"; "option"; "p"; "param";
"section"; "source"; "summary"; "table"; "tbody"; "td"; "tfoot"; "th";
"thead"; "title"; "tr"; "track"; "ul" ]
let html_block_start_5 s ~last ~start = (* 3 first chars checked *)
let next = start + 3 and sub = "CDATA[" in
if start + 8 > last || not (Ascii.match' ~sub s ~start:next) then Nomatch else
Html_block_line (`End_str "]]>") (* 5 *)
let html_block_start_2 s ~last ~start = (* 3 first chars checked *)
let next = start + 3 in
if next > last || s.[next] <> '-' then Nomatch else
Html_block_line (`End_str "-->") (* 2 *)
let html_block_start_7_open_tag s ~last ~start =
(* Has to be on the same line we fake one and use the inline parser *)
let line = { line_pos = Textloc.line_pos_none; first = start; last } in
let next_line () = None in
match open_tag ~next_line s () ~line ~start with
| None -> Nomatch
| Some (_, _, _, tag_end) ->
let next = first_non_blank s ~last ~start:(tag_end + 1) in
if next > last then Html_block_line `End_blank_7 else Nomatch
let html_block_start_7_close_tag s ~last ~start =
(* Has to be on the same line we fake one and use the inline parser *)
let line = { line_pos = Textloc.line_pos_none; first = start; last } in
let next_line () = None in
match closing_tag ~next_line s () ~line ~start with
| None -> Nomatch
| Some (_, _, _, tag_end) ->
let next = first_non_blank s ~last ~start:(tag_end + 1) in
if next > last then Html_block_line `End_blank_7 else Nomatch
let html_block_start s ~last ~start =
(* https://spec.commonmark.org/current/#html-blocks *)
let next = start + 1 in
if next > last || s.[start] <> '<' then Nomatch else
match s.[next] with
| '?' -> Html_block_line (`End_str "?>") (* 3 *)
| '!' ->
let next = next + 1 in
if next > last then Nomatch else
begin match s.[next] with
| '[' -> html_block_start_5 s ~last ~start
| '-' -> html_block_start_2 s ~last ~start
| c when Ascii.is_letter c -> Html_block_line (`End_str ">") (* 4 *)
| _ -> Nomatch
end
| c when Ascii.is_letter c || c = '/' ->
let tag_first = if c = '/' then next + 1 else next in
let tag_last =
let rec find_tag_end s last i =
if i > last || not (Ascii.is_letter s.[i]) then i - 1 else
find_tag_end s last (i + 1)
in
find_tag_end s last tag_first
in
let tag = Ascii.lowercase_sub s tag_first (tag_last - tag_first + 1) in
let is_open_end =
let n = tag_last + 1 in
n > last || s.[n] = ' ' || s.[n] = '\t' || s.[n] = '>'
in
let is_open_close_end =
is_open_end ||
(tag_last + 2 <= last && s.[tag_last + 1] = '/' &&
s.[tag_last + 2] = '>')
in
if c <> '/' then begin
if String_set.mem tag html_start_cond_1_set && is_open_end
then Html_block_line `End_cond_1 (* 1 *) else
if String_set.mem tag html_start_cond_6_set && is_open_close_end
then Html_block_line `End_blank (* 6 *) else
html_block_start_7_open_tag s ~last ~start
end else begin
if String_set.mem tag html_start_cond_6_set && is_open_close_end
then Html_block_line `End_blank (* 6 *) else
html_block_start_7_close_tag s ~last ~start
end
| _ -> Nomatch
let html_block_end_cond_1 s ~last ~start =
(* https://spec.commonmark.org/current/#html-blocks end condition 1. *)
let rec loop s last k =
if k + 3 > last then false else
if s.[k] <> '<' || s.[k + 1] <> '/' then loop s last (k + 1) else
let next = k + 2 in
let is_end_tag = match s.[next] with
| 'p' -> Ascii.caseless_match ~sub:"pre>" s ~start:next
| 's' ->
if s.[k + 3] = 't'
then Ascii.caseless_match ~sub:"style>" s ~start:next
else Ascii.caseless_match ~sub:"script>" s ~start:next
| 't' -> Ascii.caseless_match ~sub:"textarea>" s ~start:next
| _ -> false
in
if is_end_tag then true else loop s last (k + 1)
in
loop s last start
let html_block_end ~end_cond s ~last ~start = match end_cond with
| `End_str str -> sub_includes ~affix:str s ~first:start ~last
| `End_cond_1 -> html_block_end_cond_1 s ~last ~start
| `End_blank | `End_blank_7 -> first_non_blank s ~last ~start = last + 1
let ext_table_row s ~last ~start =
if start > last || s.[start] <> '|' then Nomatch else
let first = start + 1 in
let last_nb = last_non_blank s ~first ~start:last in
let before = last_nb - 1 in
if last_nb < first || s.[last_nb] <> '|' ||
(before >= first && s.[before] = '\\')
then Nomatch else Ext_table_row last_nb
let ext_footnote_label buf s ~line_pos ~last ~start =
if start + 1 > last || s.[start] <> '[' || s.[start + 1] <> '^'
then Nomatch else
let rbrack = first_non_escaped_char ']' s ~last ~start:(start + 2) in
let colon = rbrack + 1 in
if colon > last || s.[colon] <> ':' || colon - start + 1 < 5 then Nomatch else
(* Get the normalized label *)
let line = { line_pos; first = start; last } in
let next_line () = None in
match link_label buf ~next_line s () ~line ~start with
| None -> (* should not happen *) Nomatch
| Some (_, _, rev_spans, _, key) ->
Ext_footnote_label (rev_spans, colon, key)
let could_be_link_reference_definition s ~last ~start =
(* https://spec.commonmark.org/current/#link-reference-definition *)
let rec loop s first last k =
if k > last then false else
if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else
s.[k] = '['
in
if start > last then false else loop s start last start
(* Container blocks *)
let list_marker s ~last ~start =
(* https://spec.commonmark.org/current/#list-marker *)
if start > last then Nomatch else match s.[start] with
| '-' | '+' | '*' as c ->
let next = start + 1 in
if next > last || Ascii.is_blank s.[next]
then List_marker_line (`Unordered c, start)
else Nomatch
| '0' .. '9' as c ->
let[@inline] digit c = Char.code c - 0x30 in
let rec loop s last count acc k =
if k > last || count > 9 then Nomatch else
match s.[k] with
| '0' .. '9' as c ->
loop s last (count + 1) (acc * 10 + digit c) (k + 1)
| '.' | ')' as c ->
let next = k + 1 in
if next > last || Ascii.is_blank s.[next]
then List_marker_line (`Ordered (acc, c), k) else Nomatch
| _ -> Nomatch
in
loop s last 1 (digit c) (start + 1)
| _ -> Nomatch
let ext_task_marker s ~last ~start =
if start + 1 > last then None else
if s.[start] <> '[' then None else
let next = start + 1 in
let u = String.get_utf_8_uchar s next in
if not (Uchar.utf_decode_is_valid u) then None else
let next = next + Uchar.utf_decode_length u in
if next > last then None else
if s.[next] <> ']' then None else
let next = next + 1 in
if next > last
then Some (Uchar.utf_decode_uchar u, last)
else if s.[next] <> ' ' then None else
Some (Uchar.utf_decode_uchar u, next)
(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
---------------------------------------------------------------------------*)