diff --git a/src/sexp/CCSexp.ml b/src/sexp/CCSexp.ml index bc83efa0..5934ab37 100644 --- a/src/sexp/CCSexp.ml +++ b/src/sexp/CCSexp.ml @@ -205,6 +205,7 @@ let parse_string s : t or_error = (*$T CCResult.to_opt (parse_string "(abc d/e/f \"hello \\\" () world\" )") <> None CCResult.to_opt (parse_string "(abc ( d e ffff ) \"hello/world\")") <> None + CCResult.to_opt (parse_string "\"\123\bcoucou\"") <> None *) (*$inject diff --git a/src/sexp/CCSexp_lex.mll b/src/sexp/CCSexp_lex.mll index 4470a75b..c67e66eb 100644 --- a/src/sexp/CCSexp_lex.mll +++ b/src/sexp/CCSexp_lex.mll @@ -14,23 +14,39 @@ let col = start.Lexing.pos_cnum - start.Lexing.pos_bol in raise (Error (line,col,msg)) + type unescape_state = + | Not_escaped + | Escaped + | Escaped_int_1 of int + | Escaped_int_2 of int + (* remove quotes + unescape *) let remove_quotes lexbuf s = assert (s.[0] = '"' && s.[String.length s - 1] = '"'); let buf = Buffer.create (String.length s) in - let escaped = ref false in + let st = ref Not_escaped in for i = 1 to String.length s-2 do - match s.[i] with - | '\\' when !escaped -> Buffer.add_char buf '\\'; escaped := false - | '\\' -> escaped := true - | 'n' when !escaped -> Buffer.add_char buf '\n'; escaped := false - | 'r' when !escaped -> Buffer.add_char buf '\r'; escaped := false - | 't' when !escaped -> Buffer.add_char buf '\t'; escaped := false - | '"' when !escaped -> Buffer.add_char buf '"'; escaped := false - | c when !escaped -> error lexbuf (Printf.sprintf "wrong escape `%c`" c) - | c -> Buffer.add_char buf c; + match !st, s.[i] with + | Escaped, '\\' -> Buffer.add_char buf '\\'; st := Not_escaped + | Not_escaped, '\\' -> st := Escaped + | Escaped, 'n' -> Buffer.add_char buf '\n'; st := Not_escaped + | Escaped, 'r' -> Buffer.add_char buf '\r'; st := Not_escaped + | Escaped, 't' -> Buffer.add_char buf '\t'; st := Not_escaped + | Escaped, 'b' -> Buffer.add_char buf '\b'; st := Not_escaped + | Escaped, '"' -> Buffer.add_char buf '"'; st := Not_escaped + | Escaped, ('0'..'9' as c) -> + st := Escaped_int_1 (Char.code c - Char.code '0') + | Escaped_int_1 i, ('0'..'9' as c) -> + st := Escaped_int_2 (10*i+Char.code c - Char.code '0') + | Escaped_int_2 i, ('0'..'9' as c) -> + Buffer.add_char buf (Char.chr (10*i+Char.code c - Char.code '0')); + st := Not_escaped + | (Escaped | Escaped_int_1 _ | Escaped_int_2 _), c -> + error lexbuf (Printf.sprintf "wrong escape `%c`" c) + | Not_escaped, c -> Buffer.add_char buf c; done; Buffer.contents buf + } let newline = '\n' | "\r\n" @@ -40,7 +56,10 @@ let comment_line = ';' [^ '\n']* let printable_char = [^ '\n'] let id = [^ ')' '(' '"' ' ' '\t' '\r' '\n']+ -let string = '"' ([^ '"' '\\'] | "\\\"" | "\\\\" | "\\n" | "\\t" | "\\r")* '"' +let num = ['0'-'9'] +let string_item = + ([^ '"' '\\'] | "\\\"" | "\\\\" | "\\b" | "\\n" | "\\t" | "\\r" | '\\' num num num ) +let string = '"' string_item* '"' rule token = parse | comment_line { token lexbuf }