From b95e2de65b81e86c4f5be9665b1b9f0b9538051e Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 4 Jan 2025 09:18:51 -0500 Subject: [PATCH 1/4] Added functions to the Char module to check common character properties. --- src/core/CCChar.ml | 9 +++++++++ src/core/CCChar.mli | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/core/CCChar.ml b/src/core/CCChar.ml index 5936830a..402f886e 100644 --- a/src/core/CCChar.ml +++ b/src/core/CCChar.ml @@ -23,3 +23,12 @@ module Infix = struct end include Infix + +let is_uppercase_ascii c = c > '\064' && c < '\091' +let is_lowercase_ascii c = c > '\096' && c < '\123' + +let is_letter_ascii c = + (is_lowercase_ascii [@inlined]) c || (is_uppercase_ascii [@inlined]) c + +let is_digit_ascii c = c > '\047' && c < '\058' +let is_whitespace_ascii c = c = '\032' || (c > '\008' && c < '\014') diff --git a/src/core/CCChar.mli b/src/core/CCChar.mli index d18eb48e..28fd4bcf 100644 --- a/src/core/CCChar.mli +++ b/src/core/CCChar.mli @@ -40,6 +40,32 @@ val pp_buf : Buffer.t -> t -> unit val pp : Format.formatter -> t -> unit (** Renamed from [print] since 2.0. *) +val is_uppercase_ascii : t -> bool +(** [is_uppercase_ascii c] is true exactly when [c] is an + uppercase ASCII character, i.e. ['\064'] < [c] < ['\091']. + @since 3.16 *) + +val is_lowercase_ascii : t -> bool +(** [is_lowercase_ascii c] is true exactly when [c] is a + lowercase ASCII character, i.e. ['\097'] < [c] < ['\123']. + @since 3.16 *) + +val is_letter_ascii : t -> bool +(** [is_letter_ascii c] is true exactly when [c] is an ASCII + letter, i.e. [is_uppercase_ascii c || is_lowercase_ascii c]. + @since 3.16 *) + +val is_digit_ascii : t -> bool +(** [is_digit_ascii c] is true exactly when [c] is an + ASCII digit, i.e. ['\047'] < [c] < ['\058']. + @since 3.16 *) + +val is_whitespace_ascii : t -> bool +(** [is_whitespace_ascii c] is true exactly when [c] is an ASCII + whitespace character as defined by Unicode, i.e. either [c = ' '] + or ['\008'] < [c] < ['\014']. + @since 3.16 *) + (** {2 Infix Operators} @since 3.3 *) From 1486cbf5a164bb9d7885171e3f42369c4b40a288 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 4 Jan 2025 10:00:36 -0500 Subject: [PATCH 2/4] Added tests for `CCChar` predicates. --- tests/core/t_char.ml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/core/t_char.ml b/tests/core/t_char.ml index d8c5d971..7c7691fe 100644 --- a/tests/core/t_char.ml +++ b/tests/core/t_char.ml @@ -8,3 +8,43 @@ eq None (of_int 257);; q (Q.string_of_size (Q.Gen.return 1)) (fun s -> Stdlib.( = ) (to_string s.[0]) s) +;; + +q (Q.int_range 65 90 |> Q.map Char.chr) CCChar.is_uppercase_ascii;; + +q + (Q.int_range 0 64 |> Q.map Char.chr) + (fun c -> not @@ CCChar.is_uppercase_ascii c) +;; + +q + (Q.int_range 91 127 |> Q.map Char.chr) + (fun c -> not @@ CCChar.is_uppercase_ascii c) +;; + +q (Q.int_range 97 122 |> Q.map Char.chr) CCChar.is_lowercase_ascii;; + +q + (Q.int_range 0 96 |> Q.map Char.chr) + (fun c -> not @@ CCChar.is_lowercase_ascii c) +;; + +q + (Q.int_range 123 127 |> Q.map Char.chr) + (fun c -> not @@ CCChar.is_lowercase_ascii c) +;; + +q (Q.int_range 48 57 |> Q.map Char.chr) CCChar.is_digit_ascii;; +q (Q.int_range 0 47 |> Q.map Char.chr) (fun c -> not @@ CCChar.is_digit_ascii c) +;; + +q + (Q.int_range 58 127 |> Q.map Char.chr) + (fun c -> not @@ CCChar.is_digit_ascii c) +;; + +eq true (String.for_all CCChar.is_whitespace_ascii "\n\t \010\011\012\013");; + +eq false + (String.for_all CCChar.is_whitespace_ascii + "Hello!--NOthina\055kag$$$%^bch\008h") From bace9fe20940d8afeea12f7660db2e16ece7b3f7 Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 4 Jan 2025 10:11:03 -0500 Subject: [PATCH 3/4] Fixed tests to work with older OCaml versions that lack `String.for_all`. --- tests/core/t_char.ml | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/tests/core/t_char.ml b/tests/core/t_char.ml index 7c7691fe..665f4f3f 100644 --- a/tests/core/t_char.ml +++ b/tests/core/t_char.ml @@ -43,8 +43,41 @@ q (fun c -> not @@ CCChar.is_digit_ascii c) ;; -eq true (String.for_all CCChar.is_whitespace_ascii "\n\t \010\011\012\013");; +eq true + (Stdlib.List.for_all CCChar.is_whitespace_ascii + [ '\n'; '\t'; ' '; '\010'; '\011'; '\012'; '\013' ]) +;; eq false - (String.for_all CCChar.is_whitespace_ascii - "Hello!--NOthina\055kag$$$%^bch\008h") + (Stdlib.List.exists CCChar.is_whitespace_ascii + [ + 'H'; + 'e'; + 'l'; + 'l'; + 'o'; + '!'; + '-'; + '-'; + 'N'; + 'O'; + 't'; + 'h'; + 'i'; + 'n'; + 'a'; + '\055'; + 'k'; + 'a'; + 'g'; + '$'; + '$'; + '$'; + '%'; + '^'; + 'b'; + 'c'; + 'h'; + '\008'; + 'h'; + ]) From 2dcaa12fb78fec1b5ef97d7e46a11a4e6248d96a Mon Sep 17 00:00:00 2001 From: Alexander Date: Sat, 4 Jan 2025 11:19:57 -0500 Subject: [PATCH 4/4] Fixed docstring typo. --- src/core/CCChar.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/CCChar.mli b/src/core/CCChar.mli index 28fd4bcf..34dc9a81 100644 --- a/src/core/CCChar.mli +++ b/src/core/CCChar.mli @@ -47,7 +47,7 @@ val is_uppercase_ascii : t -> bool val is_lowercase_ascii : t -> bool (** [is_lowercase_ascii c] is true exactly when [c] is a - lowercase ASCII character, i.e. ['\097'] < [c] < ['\123']. + lowercase ASCII character, i.e. ['\096'] < [c] < ['\123']. @since 3.16 *) val is_letter_ascii : t -> bool