From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev,
Marios Makassikis <mmakassikis@freebox.fr>,
Namjae Jeon <linkinjeon@kernel.org>,
Steve French <stfrench@microsoft.com>,
Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.6 05/49] ksmbd: add support for surrogate pair conversion
Date: Wed, 3 Jan 2024 17:55:25 +0100 [thread overview]
Message-ID: <20240103164835.759886589@linuxfoundation.org> (raw)
In-Reply-To: <20240103164834.970234661@linuxfoundation.org>
6.6-stable review patch. If anyone has any objections, please let me know.
------------------
From: Namjae Jeon <linkinjeon@kernel.org>
[ Upstream commit 0c180317c654a494fe429adbf7bc9b0793caf9e2 ]
ksmbd is missing supporting to convert filename included surrogate pair
characters. It triggers a "file or folder does not exist" error in
Windows client.
[Steps to Reproduce for bug]
1. Create surrogate pair file
touch $(echo -e '\xf0\x9d\x9f\xa3')
touch $(echo -e '\xf0\x9d\x9f\xa4')
2. Try to open these files in ksmbd share through Windows client.
This patch update unicode functions not to consider about surrogate pair
(and IVS).
Reviewed-by: Marios Makassikis <mmakassikis@freebox.fr>
Tested-by: Marios Makassikis <mmakassikis@freebox.fr>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
fs/smb/server/unicode.c | 187 +++++++++++++++++++++++++++++-----------
1 file changed, 138 insertions(+), 49 deletions(-)
diff --git a/fs/smb/server/unicode.c b/fs/smb/server/unicode.c
index 393dd4a7432b6..43ed29ee44ead 100644
--- a/fs/smb/server/unicode.c
+++ b/fs/smb/server/unicode.c
@@ -13,46 +13,10 @@
#include "unicode.h"
#include "smb_common.h"
-/*
- * smb_utf16_bytes() - how long will a string be after conversion?
- * @from: pointer to input string
- * @maxbytes: don't go past this many bytes of input string
- * @codepage: destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- *
- * Return: string length after conversion
- */
-static int smb_utf16_bytes(const __le16 *from, int maxbytes,
- const struct nls_table *codepage)
-{
- int i;
- int charlen, outlen = 0;
- int maxwords = maxbytes / 2;
- char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
-
- for (i = 0; i < maxwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
- break;
-
- charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
- if (charlen > 0)
- outlen += charlen;
- else
- outlen++;
- }
-
- return outlen;
-}
-
/*
* cifs_mapchar() - convert a host-endian char to proper char in codepage
* @target: where converted character should be copied
- * @src_char: 2 byte host-endian source character
+ * @from: host-endian source string
* @cp: codepage to which character should be converted
* @mapchar: should character be mapped according to mapchars mount option?
*
@@ -63,10 +27,13 @@ static int smb_utf16_bytes(const __le16 *from, int maxbytes,
* Return: string length after conversion
*/
static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
bool mapchar)
{
int len = 1;
+ __u16 src_char;
+
+ src_char = *from;
if (!mapchar)
goto cp_convert;
@@ -104,12 +71,66 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
cp_convert:
len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
- if (len <= 0) {
- *target = '?';
- len = 1;
- }
+ if (len <= 0)
+ goto surrogate_pair;
goto out;
+
+surrogate_pair:
+ /* convert SURROGATE_PAIR and IVS */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+ if (len <= 0)
+ goto unknown;
+ return len;
+
+unknown:
+ *target = '?';
+ len = 1;
+ goto out;
+}
+
+/*
+ * smb_utf16_bytes() - compute converted string length
+ * @from: pointer to input string
+ * @maxbytes: input string length
+ * @codepage: destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ *
+ * Return: string length after conversion
+ */
+static int smb_utf16_bytes(const __le16 *from, int maxbytes,
+ const struct nls_table *codepage)
+{
+ int i, j;
+ int charlen, outlen = 0;
+ int maxwords = maxbytes / 2;
+ char tmp[NLS_MAX_CHARSET_SIZE];
+ __u16 ftmp[3];
+
+ for (i = 0; i < maxwords; i++) {
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
+ break;
+ for (j = 1; j <= 2; j++) {
+ if (i + j < maxwords)
+ ftmp[j] = get_unaligned_le16(&from[i + j]);
+ else
+ ftmp[j] = 0;
+ }
+
+ charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
+ if (charlen > 0)
+ outlen += charlen;
+ else
+ outlen++;
+ }
+
+ return outlen;
}
/*
@@ -139,12 +160,12 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
const struct nls_table *codepage, bool mapchar)
{
- int i, charlen, safelen;
+ int i, j, charlen, safelen;
int outlen = 0;
int nullsize = nls_nullsize(codepage);
int fromwords = fromlen / 2;
char tmp[NLS_MAX_CHARSET_SIZE];
- __u16 ftmp;
+ __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
/*
* because the chars can be of varying widths, we need to take care
@@ -155,9 +176,15 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
for (i = 0; i < fromwords; i++) {
- ftmp = get_unaligned_le16(&from[i]);
- if (ftmp == 0)
+ ftmp[0] = get_unaligned_le16(&from[i]);
+ if (ftmp[0] == 0)
break;
+ for (j = 1; j <= 2; j++) {
+ if (i + j < fromwords)
+ ftmp[j] = get_unaligned_le16(&from[i + j]);
+ else
+ ftmp[j] = 0;
+ }
/*
* check to see if converting this character might make the
@@ -172,6 +199,19 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
/* put converted char into 'to' buffer */
charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
outlen += charlen;
+
+ /*
+ * charlen (=bytes of UTF-8 for 1 character)
+ * 4bytes UTF-8(surrogate pair) is charlen=4
+ * (4bytes UTF-16 code)
+ * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+ * (2 UTF-8 pairs divided to 2 UTF-16 pairs)
+ */
+ if (charlen == 4)
+ i++;
+ else if (charlen >= 5)
+ /* 5-6bytes UTF-8 */
+ i += 2;
}
/* properly null-terminate string */
@@ -306,6 +346,9 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
char src_char;
__le16 dst_char;
wchar_t tmp;
+ wchar_t wchar_to[6]; /* UTF-16 */
+ int ret;
+ unicode_t u;
if (!mapchars)
return smb_strtoUTF16(target, source, srclen, cp);
@@ -348,11 +391,57 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
* if no match, use question mark, which at least in
* some cases serves as wild card
*/
- if (charlen < 1) {
- dst_char = cpu_to_le16(0x003f);
- charlen = 1;
+ if (charlen > 0)
+ goto ctoUTF16;
+
+ /* convert SURROGATE_PAIR */
+ if (strcmp(cp->charset, "utf8"))
+ goto unknown;
+ if (*(source + i) & 0x80) {
+ charlen = utf8_to_utf32(source + i, 6, &u);
+ if (charlen < 0)
+ goto unknown;
+ } else
+ goto unknown;
+ ret = utf8s_to_utf16s(source + i, charlen,
+ UTF16_LITTLE_ENDIAN,
+ wchar_to, 6);
+ if (ret < 0)
+ goto unknown;
+
+ i += charlen;
+ dst_char = cpu_to_le16(*wchar_to);
+ if (charlen <= 3)
+ /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ else if (charlen == 4) {
+ /*
+ * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+ * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+ * (charlen=3+4 or 4+4)
+ */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ } else if (charlen >= 5) {
+ /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 1));
+ j++;
+ put_unaligned(dst_char, &target[j]);
+ dst_char = cpu_to_le16(*(wchar_to + 2));
+ j++;
+ put_unaligned(dst_char, &target[j]);
}
+ continue;
+
+unknown:
+ dst_char = cpu_to_le16(0x003f);
+ charlen = 1;
}
+
+ctoUTF16:
/*
* character may take more than one byte in the source string,
* but will take exactly two bytes in the target string
--
2.43.0
next prev parent reply other threads:[~2024-01-03 17:13 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-03 16:55 [PATCH 6.6 00/49] 6.6.10-rc1 review Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 01/49] ksmbd: Remove unused field in ksmbd_user struct Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 02/49] ksmbd: reorganize ksmbd_iov_pin_rsp() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 03/49] ksmbd: fix kernel-doc comment of ksmbd_vfs_setxattr() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 04/49] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() Greg Kroah-Hartman
2024-01-03 16:55 ` Greg Kroah-Hartman [this message]
2024-01-03 16:55 ` [PATCH 6.6 06/49] ksmbd: no need to wait for binded connection termination at logoff Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 07/49] ksmbd: fix kernel-doc comment of ksmbd_vfs_kern_path_locked() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 08/49] ksmbd: prevent memory leak on error return Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 09/49] ksmbd: separately allocate ci per dentry Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 10/49] ksmbd: move oplock handling after unlock parent dir Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 11/49] ksmbd: release interim response after sending status pending response Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 12/49] ksmbd: move setting SMB2_FLAGS_ASYNC_COMMAND and AsyncId Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 13/49] ksmbd: dont update ->op_state as OPLOCK_STATE_NONE on error Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 14/49] ksmbd: set epoch in create context v2 lease Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 15/49] ksmbd: set v2 lease capability Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 16/49] ksmbd: downgrade RWH lease caching state to RH for directory Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 17/49] ksmbd: send v2 lease break notification " Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 18/49] ksmbd: lazy v2 lease break on smb2_write() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 19/49] ksmbd: avoid duplicate opinfo_put() call on error of smb21_lease_break_ack() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 20/49] fs: new accessor methods for atime and mtime Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 21/49] client: convert to new timestamp accessors Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 22/49] fs: cifs: Fix atime update check Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 23/49] virtio_ring: fix syncs DMA memory with different direction Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 24/49] kexec: fix KEXEC_FILE dependencies Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 25/49] kexec: select CRYPTO from KEXEC_FILE instead of depending on it Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 26/49] linux/export: Fix alignment for 64-bit ksymtab entries Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 27/49] linux/export: Ensure natural alignment of kcrctab array Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 28/49] mptcp: refactor sndbuf auto-tuning Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 29/49] mptcp: fix possible NULL pointer dereference on close Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 30/49] mptcp: fix inconsistent state on fastopen race Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 31/49] block: renumber QUEUE_FLAG_HW_WC Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 32/49] platform/x86/intel/pmc: Add suspend callback Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 33/49] platform/x86/intel/pmc: Allow reenabling LTRs Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 34/49] platform/x86/intel/pmc: Move GBE LTR ignore to suspend callback Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 35/49] ksmbd: fix slab-out-of-bounds in smb_strndup_from_utf16() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 36/49] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe Greg Kroah-Hartman
2024-01-04 9:01 ` Shinichiro Kawasaki
2024-01-03 16:55 ` [PATCH 6.6 37/49] maple_tree: do not preallocate nodes for slot stores Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 38/49] selftests: secretmem: floor the memory size to the multiple of page_size Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 39/49] mm/filemap: avoid buffered read/write race to read inconsistent data Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 40/49] mm: migrate high-order folios in swap cache correctly Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 41/49] mm/memory-failure: cast index to loff_t before shifting it Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 42/49] mm/memory-failure: check the mapcount of the precise page Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 43/49] Revert "nvme-fc: fix race between error recovery and creating association" Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 44/49] ring-buffer: Fix wake ups when buffer_percent is set to 100 Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 45/49] ftrace: Fix modification of direct_function hash while in use Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 46/49] tracing: Fix blocked reader of snapshot buffer Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 47/49] wifi: cfg80211: fix CQM for non-range use Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 48/49] wifi: nl80211: fix deadlock in nl80211_set_cqm_rssi (6.6.x) Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 49/49] netfilter: nf_tables: skip set commit for deleted/destroyed sets Greg Kroah-Hartman
2024-01-03 17:44 ` [PATCH 6.6 00/49] 6.6.10-rc1 review Nam Cao
2024-01-03 18:57 ` SeongJae Park
2024-01-03 22:04 ` Florian Fainelli
2024-01-03 23:35 ` Kelsey Steele
2024-01-04 0:18 ` Shuah Khan
2024-01-04 2:24 ` Takeshi Ogasawara
2024-01-04 4:10 ` Daniel Díaz
2024-01-04 7:15 ` Daniel Díaz
2024-01-04 7:58 ` Greg Kroah-Hartman
2024-01-04 8:21 ` Johannes Berg
2024-01-04 12:39 ` Naresh Kamboju
2024-01-04 12:58 ` Greg Kroah-Hartman
2024-01-04 5:20 ` Bagas Sanjaya
2024-01-04 7:55 ` Luna Jernberg
2024-01-04 7:57 ` Greg Kroah-Hartman
2024-01-04 10:26 ` Ron Economos
2024-01-04 11:53 ` Harshit Mogalapalli
2024-01-04 16:52 ` Jon Hunter
2024-01-04 17:12 ` Allen
2024-01-05 1:04 ` Guenter Roeck
2024-01-05 2:43 ` Namjae Jeon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240103164835.759886589@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=linkinjeon@kernel.org \
--cc=mmakassikis@freebox.fr \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
--cc=stfrench@microsoft.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).