git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: Achu Luma <ach.lumap@gmail.com>, git@vger.kernel.org
Cc: gitster@pobox.com, chriscool@tuxfamily.org,
	christian.couder@gmail.com, phillip.wood@dunelm.org.uk,
	steadmon@google.com, me@ttaylorr.com,
	Phillip Wood <phillip.wood123@gmail.com>
Subject: Re: [Outreachy][PATCH v4] Port helper/test-ctype.c to unit-tests/t-ctype.c
Date: Sun, 7 Jan 2024 13:45:59 +0100	[thread overview]
Message-ID: <a087f57c-ce72-45c7-8182-f38d0aca9030@web.de> (raw)
In-Reply-To: <20240105161413.10422-1-ach.lumap@gmail.com>

Am 05.01.24 um 17:14 schrieb Achu Luma:
> In the recent codebase update (8bf6fbd00d (Merge branch
> 'js/doc-unit-tests', 2023-12-09)), a new unit testing framework was
> merged, providing a standardized approach for testing C code. Prior to
> this update, some unit tests relied on the test helper mechanism,
> lacking a dedicated unit testing framework. It's more natural to perform
> these unit tests using the new unit test framework.
>
> This commit migrates the unit tests for C character classification
> functions (isdigit(), isspace(), etc) from the legacy approach
> using the test-tool command `test-tool ctype` in t/helper/test-ctype.c
> to the new unit testing framework (t/unit-tests/test-lib.h).
>
> The migration involves refactoring the tests to utilize the testing
> macros provided by the framework (TEST() and check_*()).
>
> Mentored-by: Christian Couder <chriscool@tuxfamily.org>
> Helped-by: René Scharfe <l.s.r@web.de>
> Helped-by: Phillip Wood <phillip.wood123@gmail.com>
> Helped-by: Taylor Blau <me@ttaylorr.com>
> Signed-off-by: Achu Luma <ach.lumap@gmail.com>
> ---

[snip]

> diff --git a/t/helper/test-ctype.c b/t/helper/test-ctype.c
> deleted file mode 100644
> index e5659df40b..0000000000
> --- a/t/helper/test-ctype.c
> +++ /dev/null
> @@ -1,70 +0,0 @@
> -#include "test-tool.h"
> -
> -static int rc;
> -
> -static void report_error(const char *class, int ch)
> -{
> -	printf("%s classifies char %d (0x%02x) wrongly\n", class, ch, ch);
> -	rc = 1;
> -}
> -
> -static int is_in(const char *s, int ch)
> -{
> -	/*
> -	 * We can't find NUL using strchr. Accept it as the first
> -	 * character in the spec -- there are no empty classes.
> -	 */
> -	if (ch == '\0')
> -		return ch == *s;
> -	if (*s == '\0')
> -		s++;
> -	return !!strchr(s, ch);
> -}
> -
> -#define TEST_CLASS(t,s) {			\
> -	int i;					\
> -	for (i = 0; i < 256; i++) {		\
> -		if (is_in(s, i) != t(i))	\
> -			report_error(#t, i);	\
> -	}					\
> -	if (t(EOF))				\
> -		report_error(#t, EOF);		\
> -}
> -
> -#define DIGIT "0123456789"
> -#define LOWER "abcdefghijklmnopqrstuvwxyz"
> -#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> -#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
> -#define ASCII \
> -	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> -	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> -	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
> -	"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
> -	"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
> -	"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
> -	"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
> -	"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
> -#define CNTRL \
> -	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> -	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> -	"\x7f"
> -
> -int cmd__ctype(int argc UNUSED, const char **argv UNUSED)
> -{
> -	TEST_CLASS(isdigit, DIGIT);
> -	TEST_CLASS(isspace, " \n\r\t");
> -	TEST_CLASS(isalpha, LOWER UPPER);
> -	TEST_CLASS(isalnum, LOWER UPPER DIGIT);
> -	TEST_CLASS(is_glob_special, "*?[\\");
> -	TEST_CLASS(is_regex_special, "$()*+.?[\\^{|");
> -	TEST_CLASS(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~");
> -	TEST_CLASS(isascii, ASCII);
> -	TEST_CLASS(islower, LOWER);
> -	TEST_CLASS(isupper, UPPER);
> -	TEST_CLASS(iscntrl, CNTRL);
> -	TEST_CLASS(ispunct, PUNCT);
> -	TEST_CLASS(isxdigit, DIGIT "abcdefABCDEF");
> -	TEST_CLASS(isprint, LOWER UPPER DIGIT PUNCT " ");
> -
> -	return rc;
> -}

[snip]

> diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c
> new file mode 100644
> index 0000000000..3a338df541
> --- /dev/null
> +++ b/t/unit-tests/t-ctype.c
> @@ -0,0 +1,78 @@
> +#include "test-lib.h"
> +
> +static int is_in(const char *s, int ch)
> +{
> +	/*
> +	 * We can't find NUL using strchr. Accept it as the first
> +	 * character in the spec -- there are no empty classes.
> +	 */
> +	if (ch == '\0')
> +		return ch == *s;
> +	if (*s == '\0')
> +		s++;
> +	return !!strchr(s, ch);
> +}
> +
> +/* Macro to test a character type */
> +#define TEST_CTYPE_FUNC(func, string) \
> +static void test_ctype_##func(void) { \
> +	for (int i = 0; i < 256; i++) { \
> +		if (!check_int(func(i), ==, is_in(string, i))) \
> +			test_msg("       i: 0x%02x", i); \
> +	} \
> +}
> +
> +#define TEST_CHAR_CLASS(class) TEST(test_ctype_##class(), #class " works")
> +
> +#define DIGIT "0123456789"
> +#define LOWER "abcdefghijklmnopqrstuvwxyz"
> +#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
> +#define ASCII \
> +	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> +	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> +	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
> +	"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
> +	"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
> +	"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
> +	"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
> +	"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
> +#define CNTRL \
> +	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> +	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> +	"\x7f"
> +
> +TEST_CTYPE_FUNC(isdigit, DIGIT)
> +TEST_CTYPE_FUNC(isspace, " \n\r\t")
> +TEST_CTYPE_FUNC(isalpha, LOWER UPPER)
> +TEST_CTYPE_FUNC(isalnum, LOWER UPPER DIGIT)
> +TEST_CTYPE_FUNC(is_glob_special, "*?[\\")
> +TEST_CTYPE_FUNC(is_regex_special, "$()*+.?[\\^{|")
> +TEST_CTYPE_FUNC(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~")
> +TEST_CTYPE_FUNC(isascii, ASCII)
> +TEST_CTYPE_FUNC(islower, LOWER)
> +TEST_CTYPE_FUNC(isupper, UPPER)
> +TEST_CTYPE_FUNC(iscntrl, CNTRL)
> +TEST_CTYPE_FUNC(ispunct, PUNCT)
> +TEST_CTYPE_FUNC(isxdigit, DIGIT "abcdefABCDEF")
> +TEST_CTYPE_FUNC(isprint, LOWER UPPER DIGIT PUNCT " ")
> +
> +int cmd_main(int argc, const char **argv) {
> +	/* Run all character type tests */
> +	TEST_CHAR_CLASS(isspace);
> +	TEST_CHAR_CLASS(isdigit);
> +	TEST_CHAR_CLASS(isalpha);
> +	TEST_CHAR_CLASS(isalnum);
> +	TEST_CHAR_CLASS(is_glob_special);
> +	TEST_CHAR_CLASS(is_regex_special);
> +	TEST_CHAR_CLASS(is_pathspec_magic);
> +	TEST_CHAR_CLASS(isascii);
> +	TEST_CHAR_CLASS(islower);
> +	TEST_CHAR_CLASS(isupper);
> +	TEST_CHAR_CLASS(iscntrl);
> +	TEST_CHAR_CLASS(ispunct);
> +	TEST_CHAR_CLASS(isxdigit);
> +	TEST_CHAR_CLASS(isprint);
> +
> +	return test_done();
> +}
> --
> 2.42.0.windows.2
>

Quite an improvement over v3!  Now you only need to repeat the class
names once.

Can we do any better?  We could simply have one test per character per
class like this:

#define TEST_CHAR_CLASS(class, expect) \
	for (int i = 0; i < 256; i++) \
		TEST(check_int(class(i), ==, is_in(expect, i)),	\
		     "%s(0x%02x) works", #class, i)

Which would be used like this:

	TEST_CHAR_CLASS(isspace, " \n\r\t");

With that there is no need to define any functions anymore.  We also
don't need any custom output, as the test name includes the character
code.  Downside: We'd have thousands of tests.  But is that actually
a downside or is that how the unit test framework is supposed to be
used?

If we need to aggregate the results by class for some reason, we
could use strings, like we already do for defining the expected class
members.  We need special handling for NUL, as that character
terminates C strings, but we can put all other characters into a
string and then use check_str:

#define TEST_CHAR_CLASS(class, expect) \
	do { \
		int expect_nul = expect[0] == '\0'; \
		char expect_rest[256] = {0}; \
		char actual_rest[256] = {0}; \
		for (int i = 1, j = 0; i < 256; i++) \
			if (strchr(&expect[expect_nul], i)) \
				expect_rest[j++] = i; \
		for (int i = 1, j = 0; i < 256; i++) \
			if (class(i) == 1) \
				actual_rest[j++] = i; \
		TEST(check_int(class(0), ==, expect_nul) && \
		     check_str(actual_rest, expect_rest), \
		     #class " works"); \
	} while (0)

check_str escapes non-printable characters when reporting a mismatch,
so this shouldn't mess up your terminal.

By the way: Like the original code these checks are stricter than
required by the C standard in requiring the result to be 1 instead of
just true (any non-zero value).  Perhaps they should be relaxed.  But
that's a tangent and independent of the convergence to a unit test.

René


  reply	other threads:[~2024-01-07 12:46 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-21 23:15 [PATCH] Port helper/test-ctype.c to unit-tests/t-ctype.c Achu Luma
2023-12-26 18:45 ` Junio C Hamano
2023-12-27 10:57   ` Christian Couder
2023-12-27 11:57     ` René Scharfe
2023-12-27 14:40       ` Phillip Wood
2023-12-27 23:48       ` Junio C Hamano
2023-12-28 16:05         ` René Scharfe
2024-01-02 18:55   ` Taylor Blau
2023-12-30  0:09 ` [Outreachy][PATCH v2] " Achu Luma
2024-01-01 10:40   ` [Outreachy][PATCH v3] " Achu Luma
2024-01-01 16:41     ` René Scharfe
2024-01-02 16:35       ` Junio C Hamano
2024-01-05 16:14     ` [Outreachy][PATCH v4] " Achu Luma
2024-01-07 12:45       ` René Scharfe [this message]
2024-01-08 22:32         ` Junio C Hamano
2024-01-09 10:35       ` Phillip Wood
2024-01-09 17:12         ` Junio C Hamano
2024-01-12 10:27       ` [Outreachy][PATCH v5] " Achu Luma
2024-01-15 10:39         ` Phillip Wood
2024-01-16 15:38           ` Junio C Hamano
2024-01-16 19:27             ` René Scharfe
2024-01-16 19:45               ` Christian Couder
2024-01-16 19:52               ` Junio C Hamano
2024-01-17  5:37               ` Josh Steadmon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a087f57c-ce72-45c7-8182-f38d0aca9030@web.de \
    --to=l.s.r@web.de \
    --cc=ach.lumap@gmail.com \
    --cc=chriscool@tuxfamily.org \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=me@ttaylorr.com \
    --cc=phillip.wood123@gmail.com \
    --cc=phillip.wood@dunelm.org.uk \
    --cc=steadmon@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).