mirror of
https://github.com/c-cube/ocaml-containers.git
synced 2025-12-07 03:35:30 -05:00
91 lines
2.9 KiB
OCaml
91 lines
2.9 KiB
OCaml
{
|
|
open CCShims_
|
|
type token =
|
|
| ATOM of string
|
|
| LIST_OPEN
|
|
| LIST_CLOSE
|
|
| SEXP_COMMENT
|
|
| EOI
|
|
|
|
(* location + message *)
|
|
exception Error of int * int * string
|
|
|
|
let error lexbuf msg =
|
|
let start = Lexing.lexeme_start_p lexbuf in
|
|
let line = start.Lexing.pos_lnum in
|
|
let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in
|
|
raise (Error (line,col,msg))
|
|
|
|
type unescape_state =
|
|
| Not_escaped
|
|
| Escaped
|
|
| Escaped_int_1 of int
|
|
| Escaped_int_2 of int
|
|
|
|
let char_equal (a : char) b = Stdlib.(=) a b
|
|
|
|
(* remove quotes + unescape *)
|
|
let remove_quotes lexbuf s =
|
|
assert (char_equal s.[0] '"' && char_equal s.[String.length s - 1] '"');
|
|
let buf = Buffer.create (String.length s) in
|
|
let st = ref Not_escaped in
|
|
for i = 1 to String.length s-2 do
|
|
match !st, s.[i] with
|
|
| Escaped, '\\' -> Buffer.add_char buf '\\'; st := Not_escaped
|
|
| Not_escaped, '\\' -> st := Escaped
|
|
| Escaped, 'n' -> Buffer.add_char buf '\n'; st := Not_escaped
|
|
| Escaped, 'r' -> Buffer.add_char buf '\r'; st := Not_escaped
|
|
| Escaped, 't' -> Buffer.add_char buf '\t'; st := Not_escaped
|
|
| Escaped, 'b' -> Buffer.add_char buf '\b'; st := Not_escaped
|
|
| Escaped, '"' -> Buffer.add_char buf '"'; st := Not_escaped
|
|
| Escaped, ('0'..'9' as c) ->
|
|
st := Escaped_int_1 (Char.code c - Char.code '0')
|
|
| Escaped_int_1 i, ('0'..'9' as c) ->
|
|
st := Escaped_int_2 (10*i+Char.code c - Char.code '0')
|
|
| Escaped_int_2 i, ('0'..'9' as c) ->
|
|
let n = 10*i+Char.code c - Char.code '0' in
|
|
if n < 256 then (
|
|
Buffer.add_char buf (Char.chr n);
|
|
) else (
|
|
(* non-ascii unicode code point: encode to utf8 on the fly *)
|
|
let c =
|
|
try Uchar.of_int n
|
|
with _ ->
|
|
failwith (Printf.sprintf "CCSexp: invalid unicode codepont '%d'" n)
|
|
in
|
|
CCUtf8_string.uchar_to_bytes c (Buffer.add_char buf)
|
|
);
|
|
st := Not_escaped
|
|
| (Escaped | Escaped_int_1 _ | Escaped_int_2 _), c ->
|
|
error lexbuf (Printf.sprintf "wrong escape `%c`" c)
|
|
| Not_escaped, c -> Buffer.add_char buf c;
|
|
done;
|
|
Buffer.contents buf
|
|
|
|
}
|
|
|
|
let newline = '\n' | "\r\n"
|
|
let white = [' ' '\r' '\t'] | newline
|
|
|
|
let comment_line = ';' [^ '\n']*
|
|
let printable_char = [^ '\n']
|
|
|
|
let id = [^ ')' '(' '"' ' ' '\t' '\r' '\n']+
|
|
let num = ['0'-'9']
|
|
let string_item =
|
|
([^ '"' '\\'] | "\\\"" | "\\\\" | "\\b" | "\\n" | "\\t" | "\\r" | '\\' num num num )
|
|
let string = '"' string_item* '"'
|
|
|
|
rule token = parse
|
|
| "#;" { SEXP_COMMENT }
|
|
| comment_line { token lexbuf }
|
|
| newline { Lexing.new_line lexbuf; token lexbuf }
|
|
| white { token lexbuf }
|
|
| eof { EOI }
|
|
| '(' { LIST_OPEN }
|
|
| ')' { LIST_CLOSE }
|
|
| id { ATOM (Lexing.lexeme lexbuf) }
|
|
| string { ATOM (remove_quotes lexbuf (Lexing.lexeme lexbuf)) }
|
|
| _ as c
|
|
{ error lexbuf (Printf.sprintf "lexing failed on char `%c`" c) }
|
|
|