From 5581e7b5a8574f45f0ebe690ac53de0290221992 Mon Sep 17 00:00:00 2001 From: Darren Ldl Date: Tue, 8 Dec 2020 20:53:51 +1100 Subject: [PATCH] Added fuzzing for CCUtf8_string.uchar_to_bytes --- ...char_to_bytes_is_same_as_simple_version.ml | 168 ++++++++++++++++++ fuzz/dune | 1 + 2 files changed, 169 insertions(+) create mode 100644 fuzz/ccutf8_string_uchar_to_bytes_is_same_as_simple_version.ml diff --git a/fuzz/ccutf8_string_uchar_to_bytes_is_same_as_simple_version.ml b/fuzz/ccutf8_string_uchar_to_bytes_is_same_as_simple_version.ml new file mode 100644 index 00000000..af930e6c --- /dev/null +++ b/fuzz/ccutf8_string_uchar_to_bytes_is_same_as_simple_version.ml @@ -0,0 +1,168 @@ +let simple_uchar_to_string (c : Uchar.t) : string = + let c = Uchar.to_int c in + let bits = + Array.make 64 false + |> Array.mapi (fun i _ -> + ((Int.shift_right c (63 - i)) land 0x1) <> 0 + ) + in + let char_of_bit_list bits = + let bits = Array.of_list bits in + assert (Array.length bits = 8); + let res = ref 0 in + for i=0 to 7 do + if bits.(i) then + res := !res lor (0x1 lsl (7-i)) + done; + Char.chr !res + in + let get_start_from_right i = + Array.get bits (63 - i) + in + let chars = + if c <= 0x7F then ( + [ + [ + false; + get_start_from_right 6; + get_start_from_right 5; + get_start_from_right 4; + get_start_from_right 3; + get_start_from_right 2; + get_start_from_right 1; + get_start_from_right 0; + ] + ] + ) + else if c <= 0x7FF then ( + [ + [ + true; + true; + false; + get_start_from_right 10; + get_start_from_right 9; + get_start_from_right 8; + get_start_from_right 7; + get_start_from_right 6; + ]; + [ + true; + false; + get_start_from_right 5; + get_start_from_right 4; + get_start_from_right 3; + get_start_from_right 2; + get_start_from_right 1; + get_start_from_right 0; + ]; + ] + ) + else if c <= 0xFFFF then ( + [ + [ + true; + true; + true; + false; + get_start_from_right 15; + get_start_from_right 14; + get_start_from_right 13; + get_start_from_right 12; + ]; + [ + true; + false; + get_start_from_right 11; + get_start_from_right 10; + get_start_from_right 9; + get_start_from_right 8; + get_start_from_right 7; + get_start_from_right 6; + ]; + [ + true; + false; + get_start_from_right 5; + get_start_from_right 4; + get_start_from_right 3; + get_start_from_right 2; + get_start_from_right 1; + get_start_from_right 0; + ]; + ] + ) + else if c <= 0x10FFFF then ( + [ + [ + true; + true; + true; + true; + false; + get_start_from_right 20; + get_start_from_right 19; + get_start_from_right 18; + ]; + [ + true; + false; + get_start_from_right 17; + get_start_from_right 16; + get_start_from_right 15; + get_start_from_right 14; + get_start_from_right 13; + get_start_from_right 12; + ]; + [ + true; + false; + get_start_from_right 11; + get_start_from_right 10; + get_start_from_right 9; + get_start_from_right 8; + get_start_from_right 7; + get_start_from_right 6; + ]; + [ + true; + false; + get_start_from_right 5; + get_start_from_right 4; + get_start_from_right 3; + get_start_from_right 2; + get_start_from_right 1; + get_start_from_right 0; + ]; + ] + ) + else ( + failwith "Unexpected case" + ) + in + chars + |> List.map char_of_bit_list + |> List.to_seq + |> String.of_seq + +let () = + Crowbar.add_test ~name:"ccutf8_string_uchar_to_bytes_is_same_as_simple_version" [ Crowbar.range (succ 0x10FFFF) ] + (fun c -> + Crowbar.guard (Uchar.is_valid c); + let c = Uchar.of_int c in + let simple_answer = + simple_uchar_to_string c + in + let answer = + let buf = ref [] in + CCUtf8_string.uchar_to_bytes c (fun c -> + buf := c :: !buf; + ); + !buf + |> List.rev + |> List.to_seq + |> String.of_seq + in + Crowbar.check_eq + simple_answer answer + ) diff --git a/fuzz/dune b/fuzz/dune index dc5d9758..cdc34acc 100644 --- a/fuzz/dune +++ b/fuzz/dune @@ -1,6 +1,7 @@ (executables (flags (-w "+a-4-9-29-37-40-42-44-48-50-32" -g)) (names ccsexp_parse_string_does_not_crash + ccutf8_string_uchar_to_bytes_is_same_as_simple_version ) (libraries crowbar containers