linux-cifs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Pali Rohár" <pali@kernel.org>
To: linux-fsdevel@vger.kernel.org,
	linux-ntfs-dev@lists.sourceforge.net, linux-cifs@vger.kernel.org,
	jfs-discussion@lists.sourceforge.net,
	linux-kernel@vger.kernel.org,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Jan Kara" <jack@suse.cz>,
	"OGAWA Hirofumi" <hirofumi@mail.parknet.co.jp>,
	"Theodore Y . Ts'o" <tytso@mit.edu>,
	"Luis de Bethencourt" <luisbg@kernel.org>,
	"Salah Triki" <salah.triki@gmail.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Dave Kleikamp" <shaggy@kernel.org>,
	"Anton Altaparmakov" <anton@tuxera.com>,
	"Pavel Machek" <pavel@ucw.cz>, "Marek Behún" <marek.behun@nic.cz>,
	"Christoph Hellwig" <hch@infradead.org>
Subject: [RFC PATCH 17/20] ntfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option
Date: Sun,  8 Aug 2021 18:24:50 +0200	[thread overview]
Message-ID: <20210808162453.1653-18-pali@kernel.org> (raw)
In-Reply-To: <20210808162453.1653-1-pali@kernel.org>

NLS table for utf8 is broken and cannot be fixed.

So instead of broken utf8 nls functions char2uni() and uni2char() use
functions utf8s_to_utf16s() and utf16s_to_utf8s() which implements correct
conversion between UTF-16 and UTF-8.

These functions implements also correct processing of UTF-16 surrogate
pairs and therefore after this change ntfs driver would be able to correctly
handle also file names with 4-byte UTF-8 sequences.

When iochatset=utf8 is used then set vol->nls_map to NULL and use it for
distinguish between the fact if NLS table or native UTF-8 functions should
be used.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 fs/ntfs/dir.c    |  6 ++++--
 fs/ntfs/inode.c  |  5 ++++-
 fs/ntfs/super.c  | 41 ++++++++++++++++++++++++-----------------
 fs/ntfs/unistr.c | 27 ++++++++++++++++++++++++---
 4 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index cd96083a12c8..035582b92aa2 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1034,7 +1034,8 @@ static inline int ntfs_filldir(ntfs_volume *vol,
 	}
 	name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
 			ie->key.file_name.file_name_length, &name,
-			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
+			NTFS_MAX_NAME_LEN *
+			(vol->nls_map ? NLS_MAX_CHARSET_SIZE : 4) + 1);
 	if (name_len <= 0) {
 		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
 				(long long)MREF_LE(ie->data.dir.indexed_file));
@@ -1118,7 +1119,8 @@ static int ntfs_readdir(struct file *file, struct dir_context *actor)
 	 * Allocate a buffer to store the current name being processed
 	 * converted to format determined by current NLS.
 	 */
-	name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
+	name = kmalloc(NTFS_MAX_NAME_LEN *
+		       (vol->nls_map ? NLS_MAX_CHARSET_SIZE : 4) + 1, GFP_NOFS);
 	if (unlikely(!name)) {
 		err = -ENOMEM;
 		goto err_out;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 3676f185b4a0..1437944be66d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2303,7 +2303,10 @@ int ntfs_show_options(struct seq_file *sf, struct dentry *root)
 		seq_printf(sf, ",fmask=0%o", vol->fmask);
 		seq_printf(sf, ",dmask=0%o", vol->dmask);
 	}
-	seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
+	if (vol->nls_map)
+		seq_printf(sf, ",iocharset=%s", vol->nls_map->charset);
+	else
+		seq_puts(sf, ",iocharset=utf8");
 	if (NVolCaseSensitive(vol))
 		seq_printf(sf, ",case_sensitive");
 	if (NVolShowSystemFiles(vol))
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 69c7871b742e..358f5e9e3c46 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -84,7 +84,7 @@ static int simple_getbool(char *s, bool *setval)
  *
  * Parse the recognized options in @opt for the ntfs volume described by @vol.
  */
-static bool parse_options(ntfs_volume *vol, char *opt)
+static bool parse_options(ntfs_volume *vol, char *opt, int remount)
 {
 	char *p, *v, *ov;
 	static char *utf8 = "utf8";
@@ -95,6 +95,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 	int mft_zone_multiplier = -1, on_errors = -1;
 	int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
 	struct nls_table *nls_map = NULL;
+	int have_iocharset = 0;
 
 	/* I am lazy... (-8 */
 #define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value)	\
@@ -196,12 +197,16 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 				goto needs_arg;
 use_utf8:
 			unload_nls(nls_map);
-			nls_map = load_nls(v);
-			if (!nls_map) {
-				ntfs_error(vol->sb, "NLS character set "
-					   "%s not found.", v);
-				return false;
+			nls_map = NULL;
+			if (strcmp(v, "utf8") != 0) {
+				nls_map = load_nls(v);
+				if (!nls_map) {
+					ntfs_error(vol->sb, "NLS character set "
+						   "%s not found.", v);
+					return false;
+				}
 			}
+			have_iocharset = 1;
 		} else if (!strcmp(p, "utf8")) {
 			bool val = false;
 			ntfs_warning(vol->sb, "Option utf8 is no longer "
@@ -241,25 +246,27 @@ static bool parse_options(ntfs_volume *vol, char *opt)
 			return false;
 		}
 	}
-	if (nls_map) {
-		if (vol->nls_map && vol->nls_map != nls_map) {
+	if (have_iocharset) {
+		if (remount && vol->nls_map != nls_map) {
 			ntfs_error(vol->sb, "Cannot change NLS character set "
 					"on remount.");
 			return false;
-		} /* else (!vol->nls_map) */
-		ntfs_debug("Using NLS character set %s.", nls_map->charset);
-		vol->nls_map = nls_map;
-	} else /* (!nls_map) */ {
-		if (!vol->nls_map) {
+		} else (!remount) {
+			ntfs_debug("Using NLS character set %s.",
+					nls_map ? nls_map->charset : "utf8");
+			vol->nls_map = nls_map;
+		}
+	} else if (!remount) {
+		if (strcmp(CONFIG_NLS_DEFAULT, "utf8") != 0) {
 			vol->nls_map = load_nls_default();
 			if (!vol->nls_map) {
 				ntfs_error(vol->sb, "Failed to load default "
 						"NLS character set.");
 				return false;
 			}
-			ntfs_debug("Using default NLS character set (%s).",
-					vol->nls_map->charset);
 		}
+		ntfs_debug("Using default NLS character set (%s).",
+				vol->nls_map ? vol->nls_map->charset : "utf8");
 	}
 	if (mft_zone_multiplier != -1) {
 		if (vol->mft_zone_multiplier && vol->mft_zone_multiplier !=
@@ -534,7 +541,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
 
 	// TODO: Deal with *flags.
 
-	if (!parse_options(vol, opt))
+	if (!parse_options(vol, opt, 1))
 		return -EINVAL;
 
 	ntfs_debug("Done.");
@@ -2731,7 +2738,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	NVolSetSparseEnabled(vol);
 
 	/* Important to get the mount options dealt with now. */
-	if (!parse_options(vol, (char*)opt))
+	if (!parse_options(vol, (char*)opt, 0))
 		goto err_out_now;
 
 	/* We support sector sizes up to the PAGE_SIZE. */
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 75a7f73bccdd..f29d83fb09bb 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -254,6 +254,16 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 	if (likely(ins)) {
 		ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
 		if (likely(ucs)) {
+			if (!nls) {
+				wc_len = utf8s_to_utf16s(ins, ins_len,
+						UTF16_LITTLE_ENDIAN, ucs,
+						NTFS_MAX_NAME_LEN);
+				if (wc_len < 0 || wc_len >= NTFS_MAX_NAME_LEN)
+					goto name_err;
+				ucs[wc_len] = 0;
+				*outs = ucs;
+				return o;
+			}
 			for (i = o = 0; i < ins_len; i += wc_len) {
 				wc_len = nls->char2uni(ins + i, ins_len - i,
 						&wc);
@@ -283,7 +293,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
 	if (wc_len < 0) {
 		ntfs_error(vol->sb, "Name using character set %s contains "
 				"characters that cannot be converted to "
-				"Unicode.", nls->charset);
+				"Unicode.", nls ? nls->charset : "utf8");
 		i = -EILSEQ;
 	} else /* if (o >= NTFS_MAX_NAME_LEN) */ {
 		ntfs_error(vol->sb, "Name is too long (maximum length for a "
@@ -335,11 +345,22 @@ int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
 			goto conversion_err;
 		}
 		if (!ns) {
-			ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
+			ns_len = ins_len * (nls ? NLS_MAX_CHARSET_SIZE : 4);
 			ns = kmalloc(ns_len + 1, GFP_NOFS);
 			if (!ns)
 				goto mem_err_out;
 		}
+		if (!nls) {
+			o = utf16s_to_utf8s(ins, ins_len, UTF16_LITTLE_ENDIAN,
+					ns, ns_len);
+			if (o >= ns_len) {
+				wc = -ENAMETOOLONG;
+				goto conversion_err;
+			}
+			ns[o] = 0;
+			*outs = ns;
+			return o;
+		}
 		for (i = o = 0; i < ins_len; i++) {
 retry:			wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
 					ns_len - o);
@@ -373,7 +394,7 @@ retry:			wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
 	ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
 			"converted to character set %s.  You might want to "
 			"try to use the mount option iocharset=utf8.",
-			nls->charset);
+			nls ? nls->charset : "utf8");
 	if (ns != *outs)
 		kfree(ns);
 	if (wc != -ENAMETOOLONG)
-- 
2.20.1


  parent reply	other threads:[~2021-08-08 16:25 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-08 16:24 [RFC PATCH 00/20] fs: Remove usage of broken nls_utf8 and drop it Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 01/20] fat: Fix iocharset=utf8 mount option Pali Rohár
2021-08-15  3:42   ` OGAWA Hirofumi
2021-08-15  9:42     ` Pali Rohár
2021-08-15 11:23       ` OGAWA Hirofumi
2021-08-23  3:51   ` Kari Argillander
2021-08-08 16:24 ` [RFC PATCH 02/20] hfsplus: Add iocharset= mount option as alias for nls= Pali Rohár
2021-08-09 17:51   ` Viacheslav Dubeyko
2021-08-09 20:49   ` Kari Argillander
2021-08-09 21:25     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 03/20] udf: Fix iocharset=utf8 mount option Pali Rohár
2021-08-12 14:17   ` Jan Kara
2021-08-12 15:51     ` Pali Rohár
2021-08-13 13:48       ` Jan Kara
2021-08-19  8:34         ` Pali Rohár
2021-08-19 10:41           ` Jan Kara
2021-08-08 16:24 ` [RFC PATCH 04/20] isofs: joliet: " Pali Rohár
2021-08-12 14:18   ` Jan Kara
2021-08-08 16:24 ` [RFC PATCH 05/20] ntfs: Undeprecate iocharset= " Pali Rohár
2021-08-09 20:52   ` Kari Argillander
2021-08-19  1:21   ` Kari Argillander
2021-08-19  8:12     ` Pali Rohár
2021-08-19 10:23       ` Kari Argillander
2021-08-19 22:04         ` Pali Rohár
2021-08-19 23:18           ` Kari Argillander
2021-08-08 16:24 ` [RFC PATCH 06/20] ntfs: Fix error processing when load_nls() fails Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 07/20] befs: Fix printing iocharset= mount option Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 08/20] befs: Rename enum value Opt_charset to Opt_iocharset to match " Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 09/20] befs: Fix error processing when load_nls() fails Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 10/20] befs: Allow to use native UTF-8 mode Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 11/20] hfs: Explicitly set hsb->nls_disk when hsb->nls_io is set Pali Rohár
2021-08-09 17:31   ` Viacheslav Dubeyko
2021-08-09 17:37     ` Matthew Wilcox
2021-08-09 17:47       ` Pali Rohár
2021-08-09 20:43         ` Steve French
2021-08-09 18:00       ` Viacheslav Dubeyko
2021-08-08 16:24 ` [RFC PATCH 12/20] hfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2021-08-09 17:49   ` Viacheslav Dubeyko
2022-09-25 12:06     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 13/20] hfsplus: " Pali Rohár
2021-08-09 17:42   ` Viacheslav Dubeyko
2022-09-25 12:12     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 14/20] jfs: Remove custom iso8859-1 implementation Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 15/20] jfs: Fix buffer overflow in jfs_strfromUCS_le() function Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 16/20] jfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2021-08-08 16:24 ` Pali Rohár [this message]
2021-08-08 16:24 ` [RFC PATCH 18/20] cifs: " Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 19/20] cifs: Remove usage of load_nls_default() calls Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 20/20] nls: Drop broken nls_utf8 module Pali Rohár
2021-09-03 21:26 ` [RFC PATCH 00/20] fs: Remove usage of broken nls_utf8 and drop it Kari Argillander
2021-09-03 21:37   ` Pali Rohár
2021-09-03 22:06     ` Kari Argillander

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210808162453.1653-18-pali@kernel.org \
    --to=pali@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=anton@tuxera.com \
    --cc=hch@infradead.org \
    --cc=hirofumi@mail.parknet.co.jp \
    --cc=jack@suse.cz \
    --cc=jfs-discussion@lists.sourceforge.net \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-ntfs-dev@lists.sourceforge.net \
    --cc=luisbg@kernel.org \
    --cc=marek.behun@nic.cz \
    --cc=pavel@ucw.cz \
    --cc=salah.triki@gmail.com \
    --cc=shaggy@kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).