linol/thirdparty/lsp/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.mli

401 lines
15 KiB
OCaml

(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers. All rights reserved.
Distributed under the ISC license, see terms at the end of the file.
---------------------------------------------------------------------------*)
(** Low-level internal tools. *)
(** Heterogeneous dictionaries.
Used by {!Cmarkit.Meta}. *)
module Dict : sig
type 'a key
val key : unit -> 'a key
type t
val empty : t
val mem : 'a key -> t -> bool
val add : 'a key -> 'a -> t -> t
val tag : unit key -> t -> t
val remove : 'a key -> t -> t
val find : 'a key -> t -> 'a option
end
(** Text locations.
See {!Cmarkit.Textloc} for documentation. *)
module Textloc : sig
type fpath = string
val file_none : fpath
type byte_pos = int
val byte_pos_none : byte_pos
type line_num = int
val line_num_none : line_num
type line_pos = line_num * byte_pos
val line_pos_first : line_pos
val line_pos_none : line_pos
type t
val none : t
val v :
file:fpath -> first_byte:byte_pos -> last_byte:byte_pos ->
first_line:line_pos -> last_line:line_pos -> t
val file : t -> fpath
val first_byte : t -> byte_pos
val last_byte : t -> byte_pos
val first_line : t -> line_pos
val last_line : t -> line_pos
val is_none : t -> bool
val is_empty : t -> bool
val equal : t -> t -> bool
val compare : t -> t -> int
val set_first : t -> first_byte:byte_pos -> first_line:line_pos -> t
val set_last : t -> last_byte:byte_pos -> last_line:line_pos -> t
val to_first : t -> t
val to_last : t -> t
val before : t -> t
val after : t -> t
val span : t -> t -> t
val reloc : first:t -> last:t -> t
val pp_ocaml : Format.formatter -> t -> unit
val pp_gnu : Format.formatter -> t -> unit
val pp : Format.formatter -> t -> unit
val pp_dump : Format.formatter -> t -> unit
end
(** Node metadata.
See {!Cmarkit.Meta} for documentation. *)
module Meta : sig
type id = int
type t
val none : t
val make : ?textloc:Textloc.t -> unit -> t
val id : t -> id
val textloc : t -> Textloc.t
val with_textloc : keep_id:bool -> t -> Textloc.t -> t
val equal : t -> t -> bool
val compare : t -> t -> int
val is_none : t -> bool
type 'a key
val key : unit -> 'a key
val mem : 'a key -> t -> bool
val add : 'a key -> 'a -> t -> t
val tag : unit key -> t -> t
val remove : 'a key -> t -> t
val find : 'a key -> t -> 'a option
end
type line_span =
{ line_pos : Textloc.line_pos;
first : Textloc.byte_pos;
last : Textloc.byte_pos }
(** The type for line spans. A line position, the first and last
bytes of the span. If the former is greater than the latter,
the span is empty. *)
type line_start = Textloc.byte_pos
(** The type for denoting a line start inside
a CommonMark container (i.e. may not match the text line's first
character). *)
type rev_spans = (line_start * line_span) list
(** A reversed list of line spans, tupled with the byte position on
where the line starts (inside a CommonMark container). The
[line_start] is the start of line in the container, the
[line_span] has the actual data. The characters in the
\[[line_start];[line_span.first - 1]\] are blanks. *)
type 'a next_line = 'a -> ('a * line_span) option
(** The type for getting a new line of input. This is used by certain
multi-line matchers (e.g. raw HTML). *)
(** {1:ascii US-ASCII matching} *)
(** US-ASCII matching. *)
module Ascii : sig
val is_control : char -> bool
val is_letter : char -> bool
val is_upper : char -> bool
val is_lower : char -> bool
val is_digit : char -> bool
val is_hex_digit : char -> bool
val hex_digit_to_int : char -> int
val is_alphanum : char -> bool
val is_white : char -> bool
val is_punct : char -> bool
val is_blank : char -> bool
val caseless_starts_with : prefix:string -> string -> bool
end
(** {1:uchar Unicode matching} *)
val prev_uchar : string -> first:int -> before:int -> Uchar.t
(** [prev_uchar s ~first ~before] is the first Unicode character before
byte position [before] in the range \[[first];[before-1]\]. If
[before <= first], U+0020 is returned (a Unicode whitespace character). *)
val next_uchar : string -> last:int -> after:int -> Uchar.t
(** [next_uchar s ~last ~after] is the next Unicode character after
byte position [after] in the range \[[after+1];[last]\]. If [after
>= last], U+0020 is returned (a Unicode whitespace character). *)
(** {1:content Textual content} *)
(** Textual content.
Ensures UTF-8 validity, unescapes, resolves numeric and named character
references. *)
module Text : sig
val utf_8_clean_unesc_unref :
Buffer.t -> string -> first:int -> last:int -> string
(** [utf_8_clean_unesc_unref b s ~first ~last] unescapes CommonMark
escapes, resolves HTML entity and character references in the
given span and replaces U+0000 and UTF-8 decoding errors by
{!Uchar.rep}. [b] is used as scratch space. If [last > first]
or [first] and [last] are not valid indices of [s] is [""]. *)
val utf_8_clean_unref :
Buffer.t -> string -> first:int -> last:int -> string
(** [utf_8_clean_unref b s ~first ~last] is like
{!utf_8_clean_unesc_unref} but does not unsescape. *)
val utf_8_clean_raw :
?pad:int -> Buffer.t -> string -> first:int -> last:int -> string
(** [utf_8_clean_raw b s ~first ~last] replaces U+0000 and UTF-8
decoding errors by {!Uchar.rep}. [b] is used as scratch space.
[pad] (defaults to [0]) specifies a number of U+0020 spaces to prepend.
If [last > first] or [first] and [last] are not valid indices of
[s] is either [""] or the padded string. *)
end
(** {1:result Result types} *)
type indent = int
(** The type for indentation magnitude. *)
type byte_pos = Textloc.byte_pos
(** The type for positions. *)
type first = Textloc.byte_pos
(** The type for the first first byte position of a parsed construct. *)
type last = Textloc.byte_pos
(** The type for the last byte position of a parsed construct. *)
type next = Textloc.byte_pos
(** The type for a byte position after a parsed construct. The byte position
may be invalid (end of input range) or on the newline. *)
type heading_level = int
(** The type for heading levels. *)
(** {1:blanks Newlines, runs, blanks and indents} *)
val run_of : char:char -> string -> last:byte_pos -> start:byte_pos -> last
(** [run_of ~char s ~last ~start] is the last byte position of a
consecutive run of [char] in the range \[[start];[last]\] or
[start - 1] if [start] is not [char]. *)
val first_non_blank : string -> last:byte_pos -> start:byte_pos -> byte_pos
(** [first_non_blank s ~last ~start] is the first byte position in the
range \[[start];[last]\] that is not blank and [last + 1] if there
is none. *)
val first_non_blank_in_span : string -> line_span -> byte_pos
(** [first_non_blank_in_span s span] is
[first_non_blank s ~last:span.last ~start:span.first]. *)
val last_non_blank : string -> first:byte_pos -> start:byte_pos -> byte_pos
(** [last_non_blank s ~first ~start] is the last position in the
range \[[first];[start]\] that is non blank and [first - 1] if
there is none. *)
val rev_drop_spaces : string -> first:byte_pos -> start:byte_pos -> byte_pos
(** [rev_drop_spaces s ~first ~start] is the last position in the
range \[[first];[start]\] that is not U+0020 and [first - 1] if
there is none. *)
val first_non_blank_over_nl :
next_line: 'a next_line -> string -> 'a -> line:line_span -> start:int ->
[ `None
| `This_line of byte_pos
| `Next_line of 'a * line_span * byte_pos ]
(** [first_non_blank_over_nl ~next_line s ~line ~start] is the first
byte position starting with [start] that is not blank in [line] or
on the next line as determined by [next_line]. Returns [`None] if
there is no such position. *)
val first_non_escaped_char :
char -> string -> last:byte_pos -> start:byte_pos -> byte_pos
(** [first_non_escaped_char c s ~last ~start] is the first byte position
in the range \[[start];[last]\] that has [c] unescaped and [last + 1]
if there is none. *)
(** {1:autolinks Autolinks} *)
val autolink_email : string -> last:byte_pos -> start:byte_pos -> last option
(** [autolink_email s ~last ~start] matches an email autolink starting at
[start] in the range \[[start];[last]\] (assumed on the same line). *)
val autolink_uri : string -> last:byte_pos -> start:byte_pos -> last option
(** [autolink_uri s ~last ~start] matches an URI autolink starting at
[start] in the range \[[start];[last]\] (assumed on the same line). *)
(** {1:raw_html Raw HTML} *)
val raw_html :
next_line:'a next_line -> string -> 'a -> line:line_span -> start:byte_pos ->
('a * line_span * rev_spans * last) option
(** [raw_html ~next_line s lines ~line ~start] matches raw HTML on
line [line] starting at [start]. [next_line] is used to get new
lines on [lines]. Returns [Some (lines, last_line, spans,
last_byte)] with [lines] the lines after consuming the raw HTML,
[last_line] the line where it stops [spans] the byte ranges of [s]
that make up the raw HTML in reverse order and [last_byte] the
last byte included in it (guaranteed to be on [last_line]). *)
(** {1:link Links} *)
val link_destination :
string -> last:byte_pos -> start:byte_pos -> (bool * first * last) option
(** [link_destination s ~last ~start] matches a link destination
starting at [start] in the range \[[start];[last]\] (assumed on
the same line). This is [Some (delimited, first, last)] with the
data in \[[first];[last]\] the destination data. [delimited] is
[true] if [first-1] is '<' and [last + 1] is '>'. *)
val link_title :
next_line:'a next_line -> string -> 'a -> line:line_span -> start:byte_pos ->
('a * line_span * rev_spans * last) option
(** [link_title ~next_line s lines ~line ~last] is a link title on line [line]
starting at [start]. Returns [Some (lines, last_line, spans, last)] with
[lines] the lines after consuming the title, [last_line] the line where
it stops, [spans] the byte ranges of [s] that make up the title in reverse
order, [last] is on the closing delimiter and guaranteed to be on
[last_line]. *)
val link_label :
Buffer.t -> next_line:'a next_line -> string -> 'a -> line:line_span ->
start:byte_pos -> ('a * line_span * rev_spans * last * string) option
(** [link_label buf ~next_line s lines ~line ~start] matches a link label
on line [line] starting at [start]. The byte ranges have the label's
content, the string is the normalized label. [buf] is used as scratch
space. *)
(** {1:leaf_block Leaf blocks}
Unless otherwise noted [start] is always after leading blanks. *)
type html_block_end_cond =
[ `End_str of string | `End_cond_1 | `End_blank | `End_blank_7 ]
(** The type for HTML block end conditions. *)
type line_type =
| Atx_heading_line of heading_level * byte_pos (* after # *) * first * last
| Blank_line
| Block_quote_line
| Fenced_code_block_line of first * last * (first * last) option
| Html_block_line of html_block_end_cond
| Indented_code_block_line
| List_marker_line of ([ `Ordered of int * char | `Unordered of char ] * last)
| Paragraph_line
| Setext_underline_line of heading_level * last
| Thematic_break_line of last
| Ext_table_row of last
| Ext_footnote_label of rev_spans * last * string
| Nomatch (* built-in [None] to avoid option allocs *)
val thematic_break : string -> last:byte_pos -> start:byte_pos -> line_type
(** [thematic_break s ~last ~start] matches a thematic break in the range
in the range \[[start];[last]\]. The returned position is the last
non-blank. *)
val atx_heading :
string -> last:byte_pos -> start:byte_pos -> line_type
(** [atx_heading s ~first ~last] is an ATX heading in the range
\[[start];[last]\]. *)
val setext_heading_underline :
string -> last:byte_pos -> start:byte_pos -> line_type
(** [setext_heading_underline s ~last ~start] is a setext heading
underline in the range \[[start];[last]\]. The returned position
is the last underline char. *)
val fenced_code_block_start :
string -> last:byte_pos -> start:byte_pos -> line_type
(** [fenced_code_block_start s ~last ~start] is the start of a fenced
code block line in the range \[[start];[last]\]. The first span is
the fence and the second one is the info string (if any). *)
val fenced_code_block_continue :
fence:char * int -> string -> last:byte_pos -> start:byte_pos ->
[ `Close of first * last | `Code ]
(** [fenced_code_block_continue ~fence s ~last ~start] indicates
whether the fence code continues or closes in the the range
\[[start];[last]\] given the opening [open] which indicates the
indent, fence char and number of fence chars. *)
val html_block_start :
string -> last:byte_pos -> start:byte_pos -> line_type
(** [html_block_start s ~last ~start] matches the start of an HTML
block starting at [start] in the range \[[start];[last]\] and on
success returns the condition to end it. *)
val html_block_end :
end_cond:html_block_end_cond -> string -> last:byte_pos -> start:byte_pos ->
bool
(** [html_block ~end_code s ~last ~start] is [true] if the HTML block
end with [end_code] in the the range \[[start];[last]\] *)
val ext_table_row : string -> last:byte_pos -> start:byte_pos -> line_type
(** [ext_table s ~last ~start] matches a table row in the range
\[[start];[last]\]. The returned position is the rightmost [|]. *)
val ext_footnote_label :
Buffer.t -> string -> line_pos:Textloc.line_pos -> last:byte_pos ->
start:byte_pos -> line_type
(** [ext_footnote_label s ~last ~start] matches a footnote label the range
\[[start];[last]\]. The returned position is the rightmost [:].
This remains on the same line. *)
val could_be_link_reference_definition :
string -> last:byte_pos -> start:byte_pos -> bool
(** [could_be_link_reference_definition s ~last ~start] is [true] if
in the range \[[start];[last]\] could hold a link reference definition. *)
(** {1:container Container blocks} *)
val list_marker :
string -> last:byte_pos -> start:byte_pos -> line_type
(** [list_marker s ~last ~start] is a list marker in the range
\[[start];[last]\]. This checks there's at least one space
following unless the item is empty. *)
val ext_task_marker :
string -> last:byte_pos -> start:byte_pos -> (Uchar.t * last) option
(** [ext_task_marker s ~last ~start] is a list task item marker in the
range \[[start];[last]\]. *)
(*---------------------------------------------------------------------------
Copyright (c) 2021 The cmarkit programmers
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
---------------------------------------------------------------------------*)