All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
To: linux-fsdevel@vger.kernel.org
Cc: jra@google.com, tytso@mit.edu, olaf@sgi.com,
	darrick.wong@oracle.com, kernel@lists.collabora.co.uk,
	Gabriel Krisman Bertazi <krisman@collabora.co.uk>
Subject: [PATCH 07/15] nls: Add new interface for string comparisons
Date: Wed,  9 May 2018 03:47:58 -0300	[thread overview]
Message-ID: <20180509064800.28658-8-krisman@collabora.co.uk> (raw)
In-Reply-To: <20180509064800.28658-1-krisman@collabora.co.uk>

The existing stricmp() interface is limited by not accepting separated
length parameters for each string being compared.  This is a problem for
charsets doing normalization or full casefold comparison, since
different sized strings can still be matched.  To resolve this problem,
this patch implements a new interface, allowing charsets to do the
comparison, if needed.

The original stricmp is left in the code, while all callers are not
converted, but was rewritten the new interface.

Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.co.uk>
---
 include/linux/nls.h | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/include/linux/nls.h b/include/linux/nls.h
index 3766fbe6efc3..1a653b866a64 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -3,6 +3,7 @@
 #define _LINUX_NLS_H
 
 #include <linux/init.h>
+#include <linux/string.h>
 
 /* Unicode has changed over the years.  Unicode code points no longer
  * fit into 16 bits; as of Unicode 5 valid code points range from 0
@@ -21,11 +22,18 @@ typedef u16 wchar_t;
 
 /* Arbitrary Unicode character */
 typedef u32 unicode_t;
+struct nls_table;
 
 struct nls_ops {
 	int (*uni2char) (wchar_t uni, unsigned char *out, int boundlen);
 	int (*char2uni) (const unsigned char *rawstring, int boundlen,
 			 wchar_t *uni);
+	int (*strncmp)(const struct nls_table *charset,
+		       const unsigned char *str1, size_t len1,
+		       const unsigned char *str2, size_t len2);
+	int (*strncasecmp)(const struct nls_table *charset,
+			   const unsigned char *str1, size_t len1,
+			   const unsigned char *str2, size_t len2);
 };
 
 struct nls_table {
@@ -105,10 +113,17 @@ static inline unsigned char nls_toupper(struct nls_table *t, unsigned char c)
 	return nc ? nc : c;
 }
 
-static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1,
-		const unsigned char *s2, int len)
+static inline int nls_strncasecmp(struct nls_table *t,
+				  const unsigned char *s1, size_t len1,
+				  const unsigned char *s2, size_t len2)
 {
-	while (len--) {
+	if (t->ops->strncasecmp)
+		return t->ops->strncasecmp(t, s1, len1, s2, len2);
+
+	if (len1 != len2)
+		return 1;
+
+	while (len1--) {
 		if (nls_tolower(t, *s1++) != nls_tolower(t, *s2++))
 			return 1;
 	}
@@ -116,6 +131,27 @@ static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1,
 	return 0;
 }
 
+static inline int nls_strncmp(struct nls_table *t,
+			      const unsigned char *s1, size_t len1,
+			      const unsigned char *s2, size_t len2)
+{
+	if (t->ops->strncmp)
+		return t->ops->strncmp(t, s1, len1, s2, len2);
+
+	if (len1 != len2)
+		return 1;
+
+	/* strnicmp did not return negative values. So let's keep the
+	 * abi for now */
+	return !!memcmp(s1, s2, len1);
+}
+
+static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1,
+		const unsigned char *s2, int len)
+{
+	return nls_strncasecmp(t, s1, len, s2, len);
+}
+
 /*
  * nls_nullsize - return length of null character for codepage
  * @codepage - codepage for which to return length of NULL terminator
-- 
2.17.0

  parent reply	other threads:[~2018-05-09  6:49 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09  6:47 [PATCH 00/15] NLS refactor and UTF-8 normalization Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 01/15] nls: Wrap uni2char/char2uni callers Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 02/15] nls: Wrap charset field access Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 03/15] nls: Wrap charset hooks in ops structure Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 04/15] nls: Split default charset from NLS core Gabriel Krisman Bertazi
2018-05-09 14:52   ` kbuild test robot
2018-05-15  2:45     ` Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 05/15] nls: Split struct nls_charset from struct nls_table Gabriel Krisman Bertazi
2018-05-09 14:30   ` kbuild test robot
2018-05-15  2:41     ` Gabriel Krisman Bertazi
2018-05-09  6:47 ` [PATCH 06/15] nls: Add support for multiple versions of an encoding Gabriel Krisman Bertazi
2018-05-09  6:47 ` Gabriel Krisman Bertazi [this message]
2018-05-09  6:47 ` [PATCH 08/15] nls: Let charsets define the behavior of tolower/toupper Gabriel Krisman Bertazi
2018-05-09  6:48 ` [PATCH 09/15] nls: Add optional normalization and casefold hooks Gabriel Krisman Bertazi
2018-05-09  6:55 ` [PATCH 10/15] nls: utf8norm: Add unicode character database files Gabriel Krisman Bertazi
2018-05-09  6:55   ` [PATCH 11/15] scripts: add trie generator for UTF-8 Gabriel Krisman Bertazi
2018-05-09  6:55   ` [PATCH 12/15] nls: utf8norm: Introduce code for UTF-8 normalization Gabriel Krisman Bertazi
2018-05-09 17:02     ` kbuild test robot
2018-05-09 18:46       ` Gabriel Krisman Bertazi
2018-05-09  6:55   ` [PATCH 13/15] nls: utf8norm: reduce the size of utf8data[] Gabriel Krisman Bertazi
2018-05-09  6:55   ` [PATCH 14/15] nls: utf8norm: Integrate utf8norm code with NLS subsystem Gabriel Krisman Bertazi
2018-05-09  6:55   ` [PATCH 15/15] nls: utf8norm: Introduce test module for utf8norm implementation Gabriel Krisman Bertazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180509064800.28658-8-krisman@collabora.co.uk \
    --to=krisman@collabora.co.uk \
    --cc=darrick.wong@oracle.com \
    --cc=jra@google.com \
    --cc=kernel@lists.collabora.co.uk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=olaf@sgi.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.