From 26af1f12970a073adb5541a7bc5e2f43346b86fe Mon Sep 17 00:00:00 2001 From: Simon Cruanes Date: Wed, 23 Jun 2021 14:03:42 -0400 Subject: [PATCH] feat(ccutf8string): add `{make,empty,of_uchar}` --- src/core/CCUtf8_string.ml | 28 ++++++++++++++++++++++++++++ src/core/CCUtf8_string.mli | 15 +++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/core/CCUtf8_string.ml b/src/core/CCUtf8_string.ml index c36fbf52..abc4666d 100644 --- a/src/core/CCUtf8_string.ml +++ b/src/core/CCUtf8_string.ml @@ -17,6 +17,7 @@ let pp = Format.pp_print_string include String +let empty = "" let to_string x = x (** State for decoding *) @@ -239,6 +240,23 @@ let of_iter i : t = i (fun c -> uchar_to_bytes c (Buffer.add_char buf)); Buffer.contents buf +let make n c = + if n=0 then empty + else ( + let n_bytes = uchar_num_bytes c in + let buf = Bytes.create (n * n_bytes) in + (* copy [c] at the beginning of the buffer *) + let i = ref 0 in + uchar_to_bytes c (fun b -> Bytes.set buf !i b; incr i); + (* now repeat the prefix n-1 times *) + for j = 1 to n-1 do + Bytes.blit buf 0 buf (n_bytes * j) n_bytes; + done; + Bytes.unsafe_to_string buf + ) + +let[@inline] of_uchar c : t = make 1 c + let of_list l : t = let len = List.fold_left (fun n c -> n + uchar_num_bytes c) 0 l in if len > Sys.max_string_length then ( @@ -455,3 +473,13 @@ let of_string s = if is_valid s then Some s else None assert_equal 1 (n_bytes (of_list [c])) done *) + +(*$QR + Q.(small_list arb_uchar) (fun l -> + of_list l = concat empty (List.map of_uchar l)) + *) + +(*$QR + Q.(pair small_nat arb_uchar) (fun (i,c) -> + make i c = concat empty (CCList.init i (fun _ -> of_uchar c))) + *) diff --git a/src/core/CCUtf8_string.mli b/src/core/CCUtf8_string.mli index 37a23ba4..fc12ec56 100644 --- a/src/core/CCUtf8_string.mli +++ b/src/core/CCUtf8_string.mli @@ -77,9 +77,24 @@ val filter_map : (uchar -> uchar option) -> t -> t val flat_map : (uchar -> t) -> t -> t +val empty : t +(** Empty string. + @since NEXT_RELEASE *) + val append : t -> t -> t +(** Append two string together. *) val concat : t -> t list -> t +(** [concat sep l] concatenates each string in [l], inserting [sep] + in between each string. Similar to {!String.concat}. *) + +val of_uchar : uchar -> t +(** [of_char c] is a string with only one unicode char in it. + @since NEXT_RELEASE *) + +val make : int -> uchar -> t +(** [make n c] makes a new string with [n] copies of [c] in it. + @since NEXT_RELEASE *) val of_seq : uchar Seq.t -> t (** Build a string from unicode codepoints